1 /* Data references and dependences detectors.
2    Copyright (C) 2003-2021 Free Software Foundation, Inc.
3    Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 /* This pass walks a given loop structure searching for array
22    references.  The information about the array accesses is recorded
23    in DATA_REFERENCE structures.
24 
25    The basic test for determining the dependences is:
26    given two access functions chrec1 and chrec2 to a same array, and
27    x and y two vectors from the iteration domain, the same element of
28    the array is accessed twice at iterations x and y if and only if:
29    |             chrec1 (x) == chrec2 (y).
30 
31    The goals of this analysis are:
32 
33    - to determine the independence: the relation between two
34      independent accesses is qualified with the chrec_known (this
35      information allows a loop parallelization),
36 
37    - when two data references access the same data, to qualify the
38      dependence relation with classic dependence representations:
39 
40        - distance vectors
41        - direction vectors
42        - loop carried level dependence
43        - polyhedron dependence
44      or with the chains of recurrences based representation,
45 
46    - to define a knowledge base for storing the data dependence
47      information,
48 
49    - to define an interface to access this data.
50 
51 
52    Definitions:
53 
54    - subscript: given two array accesses a subscript is the tuple
55    composed of the access functions for a given dimension.  Example:
56    Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
57    (f1, g1), (f2, g2), (f3, g3).
58 
59    - Diophantine equation: an equation whose coefficients and
60    solutions are integer constants, for example the equation
61    |   3*x + 2*y = 1
62    has an integer solution x = 1 and y = -1.
63 
64    References:
65 
66    - "Advanced Compilation for High Performance Computing" by Randy
67    Allen and Ken Kennedy.
68    http://citeseer.ist.psu.edu/goff91practical.html
69 
70    - "Loop Transformations for Restructuring Compilers - The Foundations"
71    by Utpal Banerjee.
72 
73 
74 */
75 
76 #include "config.h"
77 #include "system.h"
78 #include "coretypes.h"
79 #include "backend.h"
80 #include "rtl.h"
81 #include "tree.h"
82 #include "gimple.h"
83 #include "gimple-pretty-print.h"
84 #include "alias.h"
85 #include "fold-const.h"
86 #include "expr.h"
87 #include "gimple-iterator.h"
88 #include "tree-ssa-loop-niter.h"
89 #include "tree-ssa-loop.h"
90 #include "tree-ssa.h"
91 #include "cfgloop.h"
92 #include "tree-data-ref.h"
93 #include "tree-scalar-evolution.h"
94 #include "dumpfile.h"
95 #include "tree-affine.h"
96 #include "builtins.h"
97 #include "tree-eh.h"
98 #include "ssa.h"
99 #include "internal-fn.h"
100 #include "vr-values.h"
101 #include "range-op.h"
102 #include "tree-ssa-loop-ivopts.h"
103 
104 static struct datadep_stats
105 {
106   int num_dependence_tests;
107   int num_dependence_dependent;
108   int num_dependence_independent;
109   int num_dependence_undetermined;
110 
111   int num_subscript_tests;
112   int num_subscript_undetermined;
113   int num_same_subscript_function;
114 
115   int num_ziv;
116   int num_ziv_independent;
117   int num_ziv_dependent;
118   int num_ziv_unimplemented;
119 
120   int num_siv;
121   int num_siv_independent;
122   int num_siv_dependent;
123   int num_siv_unimplemented;
124 
125   int num_miv;
126   int num_miv_independent;
127   int num_miv_dependent;
128   int num_miv_unimplemented;
129 } dependence_stats;
130 
131 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
132 					   unsigned int, unsigned int,
133 					   class loop *);
134 /* Returns true iff A divides B.  */
135 
136 static inline bool
tree_fold_divides_p(const_tree a,const_tree b)137 tree_fold_divides_p (const_tree a, const_tree b)
138 {
139   gcc_assert (TREE_CODE (a) == INTEGER_CST);
140   gcc_assert (TREE_CODE (b) == INTEGER_CST);
141   return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
142 }
143 
144 /* Returns true iff A divides B.  */
145 
146 static inline bool
int_divides_p(lambda_int a,lambda_int b)147 int_divides_p (lambda_int a, lambda_int b)
148 {
149   return ((b % a) == 0);
150 }
151 
152 /* Return true if reference REF contains a union access.  */
153 
154 static bool
ref_contains_union_access_p(tree ref)155 ref_contains_union_access_p (tree ref)
156 {
157   while (handled_component_p (ref))
158     {
159       ref = TREE_OPERAND (ref, 0);
160       if (TREE_CODE (TREE_TYPE (ref)) == UNION_TYPE
161 	  || TREE_CODE (TREE_TYPE (ref)) == QUAL_UNION_TYPE)
162 	return true;
163     }
164   return false;
165 }
166 
167 
168 
169 /* Dump into FILE all the data references from DATAREFS.  */
170 
171 static void
dump_data_references(FILE * file,vec<data_reference_p> datarefs)172 dump_data_references (FILE *file, vec<data_reference_p> datarefs)
173 {
174   for (data_reference *dr : datarefs)
175     dump_data_reference (file, dr);
176 }
177 
178 /* Unified dump into FILE all the data references from DATAREFS.  */
179 
180 DEBUG_FUNCTION void
debug(vec<data_reference_p> & ref)181 debug (vec<data_reference_p> &ref)
182 {
183   dump_data_references (stderr, ref);
184 }
185 
186 DEBUG_FUNCTION void
debug(vec<data_reference_p> * ptr)187 debug (vec<data_reference_p> *ptr)
188 {
189   if (ptr)
190     debug (*ptr);
191   else
192     fprintf (stderr, "<nil>\n");
193 }
194 
195 
196 /* Dump into STDERR all the data references from DATAREFS.  */
197 
198 DEBUG_FUNCTION void
debug_data_references(vec<data_reference_p> datarefs)199 debug_data_references (vec<data_reference_p> datarefs)
200 {
201   dump_data_references (stderr, datarefs);
202 }
203 
204 /* Print to STDERR the data_reference DR.  */
205 
206 DEBUG_FUNCTION void
debug_data_reference(struct data_reference * dr)207 debug_data_reference (struct data_reference *dr)
208 {
209   dump_data_reference (stderr, dr);
210 }
211 
212 /* Dump function for a DATA_REFERENCE structure.  */
213 
214 void
dump_data_reference(FILE * outf,struct data_reference * dr)215 dump_data_reference (FILE *outf,
216 		     struct data_reference *dr)
217 {
218   unsigned int i;
219 
220   fprintf (outf, "#(Data Ref: \n");
221   fprintf (outf, "#  bb: %d \n", gimple_bb (DR_STMT (dr))->index);
222   fprintf (outf, "#  stmt: ");
223   print_gimple_stmt (outf, DR_STMT (dr), 0);
224   fprintf (outf, "#  ref: ");
225   print_generic_stmt (outf, DR_REF (dr));
226   fprintf (outf, "#  base_object: ");
227   print_generic_stmt (outf, DR_BASE_OBJECT (dr));
228 
229   for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
230     {
231       fprintf (outf, "#  Access function %d: ", i);
232       print_generic_stmt (outf, DR_ACCESS_FN (dr, i));
233     }
234   fprintf (outf, "#)\n");
235 }
236 
237 /* Unified dump function for a DATA_REFERENCE structure.  */
238 
239 DEBUG_FUNCTION void
debug(data_reference & ref)240 debug (data_reference &ref)
241 {
242   dump_data_reference (stderr, &ref);
243 }
244 
245 DEBUG_FUNCTION void
debug(data_reference * ptr)246 debug (data_reference *ptr)
247 {
248   if (ptr)
249     debug (*ptr);
250   else
251     fprintf (stderr, "<nil>\n");
252 }
253 
254 
255 /* Dumps the affine function described by FN to the file OUTF.  */
256 
257 DEBUG_FUNCTION void
dump_affine_function(FILE * outf,affine_fn fn)258 dump_affine_function (FILE *outf, affine_fn fn)
259 {
260   unsigned i;
261   tree coef;
262 
263   print_generic_expr (outf, fn[0], TDF_SLIM);
264   for (i = 1; fn.iterate (i, &coef); i++)
265     {
266       fprintf (outf, " + ");
267       print_generic_expr (outf, coef, TDF_SLIM);
268       fprintf (outf, " * x_%u", i);
269     }
270 }
271 
272 /* Dumps the conflict function CF to the file OUTF.  */
273 
274 DEBUG_FUNCTION void
dump_conflict_function(FILE * outf,conflict_function * cf)275 dump_conflict_function (FILE *outf, conflict_function *cf)
276 {
277   unsigned i;
278 
279   if (cf->n == NO_DEPENDENCE)
280     fprintf (outf, "no dependence");
281   else if (cf->n == NOT_KNOWN)
282     fprintf (outf, "not known");
283   else
284     {
285       for (i = 0; i < cf->n; i++)
286 	{
287 	  if (i != 0)
288 	    fprintf (outf, " ");
289 	  fprintf (outf, "[");
290 	  dump_affine_function (outf, cf->fns[i]);
291 	  fprintf (outf, "]");
292 	}
293     }
294 }
295 
296 /* Dump function for a SUBSCRIPT structure.  */
297 
298 DEBUG_FUNCTION void
dump_subscript(FILE * outf,struct subscript * subscript)299 dump_subscript (FILE *outf, struct subscript *subscript)
300 {
301   conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
302 
303   fprintf (outf, "\n (subscript \n");
304   fprintf (outf, "  iterations_that_access_an_element_twice_in_A: ");
305   dump_conflict_function (outf, cf);
306   if (CF_NONTRIVIAL_P (cf))
307     {
308       tree last_iteration = SUB_LAST_CONFLICT (subscript);
309       fprintf (outf, "\n  last_conflict: ");
310       print_generic_expr (outf, last_iteration);
311     }
312 
313   cf = SUB_CONFLICTS_IN_B (subscript);
314   fprintf (outf, "\n  iterations_that_access_an_element_twice_in_B: ");
315   dump_conflict_function (outf, cf);
316   if (CF_NONTRIVIAL_P (cf))
317     {
318       tree last_iteration = SUB_LAST_CONFLICT (subscript);
319       fprintf (outf, "\n  last_conflict: ");
320       print_generic_expr (outf, last_iteration);
321     }
322 
323   fprintf (outf, "\n  (Subscript distance: ");
324   print_generic_expr (outf, SUB_DISTANCE (subscript));
325   fprintf (outf, " ))\n");
326 }
327 
328 /* Print the classic direction vector DIRV to OUTF.  */
329 
330 DEBUG_FUNCTION void
print_direction_vector(FILE * outf,lambda_vector dirv,int length)331 print_direction_vector (FILE *outf,
332 			lambda_vector dirv,
333 			int length)
334 {
335   int eq;
336 
337   for (eq = 0; eq < length; eq++)
338     {
339       enum data_dependence_direction dir = ((enum data_dependence_direction)
340 					    dirv[eq]);
341 
342       switch (dir)
343 	{
344 	case dir_positive:
345 	  fprintf (outf, "    +");
346 	  break;
347 	case dir_negative:
348 	  fprintf (outf, "    -");
349 	  break;
350 	case dir_equal:
351 	  fprintf (outf, "    =");
352 	  break;
353 	case dir_positive_or_equal:
354 	  fprintf (outf, "   +=");
355 	  break;
356 	case dir_positive_or_negative:
357 	  fprintf (outf, "   +-");
358 	  break;
359 	case dir_negative_or_equal:
360 	  fprintf (outf, "   -=");
361 	  break;
362 	case dir_star:
363 	  fprintf (outf, "    *");
364 	  break;
365 	default:
366 	  fprintf (outf, "indep");
367 	  break;
368 	}
369     }
370   fprintf (outf, "\n");
371 }
372 
373 /* Print a vector of direction vectors.  */
374 
375 DEBUG_FUNCTION void
print_dir_vectors(FILE * outf,vec<lambda_vector> dir_vects,int length)376 print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
377 		   int length)
378 {
379   for (lambda_vector v : dir_vects)
380     print_direction_vector (outf, v, length);
381 }
382 
383 /* Print out a vector VEC of length N to OUTFILE.  */
384 
385 DEBUG_FUNCTION void
print_lambda_vector(FILE * outfile,lambda_vector vector,int n)386 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
387 {
388   int i;
389 
390   for (i = 0; i < n; i++)
391     fprintf (outfile, HOST_WIDE_INT_PRINT_DEC " ", vector[i]);
392   fprintf (outfile, "\n");
393 }
394 
395 /* Print a vector of distance vectors.  */
396 
397 DEBUG_FUNCTION void
print_dist_vectors(FILE * outf,vec<lambda_vector> dist_vects,int length)398 print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
399 		    int length)
400 {
401   for (lambda_vector v : dist_vects)
402     print_lambda_vector (outf, v, length);
403 }
404 
405 /* Dump function for a DATA_DEPENDENCE_RELATION structure.  */
406 
407 DEBUG_FUNCTION void
dump_data_dependence_relation(FILE * outf,const data_dependence_relation * ddr)408 dump_data_dependence_relation (FILE *outf, const data_dependence_relation *ddr)
409 {
410   struct data_reference *dra, *drb;
411 
412   fprintf (outf, "(Data Dep: \n");
413 
414   if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
415     {
416       if (ddr)
417 	{
418 	  dra = DDR_A (ddr);
419 	  drb = DDR_B (ddr);
420 	  if (dra)
421 	    dump_data_reference (outf, dra);
422 	  else
423 	    fprintf (outf, "    (nil)\n");
424 	  if (drb)
425 	    dump_data_reference (outf, drb);
426 	  else
427 	    fprintf (outf, "    (nil)\n");
428 	}
429       fprintf (outf, "    (don't know)\n)\n");
430       return;
431     }
432 
433   dra = DDR_A (ddr);
434   drb = DDR_B (ddr);
435   dump_data_reference (outf, dra);
436   dump_data_reference (outf, drb);
437 
438   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
439     fprintf (outf, "    (no dependence)\n");
440 
441   else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
442     {
443       unsigned int i;
444       class loop *loopi;
445 
446       subscript *sub;
447       FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
448 	{
449 	  fprintf (outf, "  access_fn_A: ");
450 	  print_generic_stmt (outf, SUB_ACCESS_FN (sub, 0));
451 	  fprintf (outf, "  access_fn_B: ");
452 	  print_generic_stmt (outf, SUB_ACCESS_FN (sub, 1));
453 	  dump_subscript (outf, sub);
454 	}
455 
456       fprintf (outf, "  loop nest: (");
457       FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
458 	fprintf (outf, "%d ", loopi->num);
459       fprintf (outf, ")\n");
460 
461       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
462 	{
463 	  fprintf (outf, "  distance_vector: ");
464 	  print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
465 			       DDR_NB_LOOPS (ddr));
466 	}
467 
468       for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
469 	{
470 	  fprintf (outf, "  direction_vector: ");
471 	  print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
472 				  DDR_NB_LOOPS (ddr));
473 	}
474     }
475 
476   fprintf (outf, ")\n");
477 }
478 
479 /* Debug version.  */
480 
481 DEBUG_FUNCTION void
debug_data_dependence_relation(const struct data_dependence_relation * ddr)482 debug_data_dependence_relation (const struct data_dependence_relation *ddr)
483 {
484   dump_data_dependence_relation (stderr, ddr);
485 }
486 
487 /* Dump into FILE all the dependence relations from DDRS.  */
488 
489 DEBUG_FUNCTION void
dump_data_dependence_relations(FILE * file,const vec<ddr_p> & ddrs)490 dump_data_dependence_relations (FILE *file, const vec<ddr_p> &ddrs)
491 {
492   for (auto ddr : ddrs)
493     dump_data_dependence_relation (file, ddr);
494 }
495 
496 DEBUG_FUNCTION void
debug(vec<ddr_p> & ref)497 debug (vec<ddr_p> &ref)
498 {
499   dump_data_dependence_relations (stderr, ref);
500 }
501 
502 DEBUG_FUNCTION void
debug(vec<ddr_p> * ptr)503 debug (vec<ddr_p> *ptr)
504 {
505   if (ptr)
506     debug (*ptr);
507   else
508     fprintf (stderr, "<nil>\n");
509 }
510 
511 
512 /* Dump to STDERR all the dependence relations from DDRS.  */
513 
514 DEBUG_FUNCTION void
debug_data_dependence_relations(vec<ddr_p> ddrs)515 debug_data_dependence_relations (vec<ddr_p> ddrs)
516 {
517   dump_data_dependence_relations (stderr, ddrs);
518 }
519 
520 /* Dumps the distance and direction vectors in FILE.  DDRS contains
521    the dependence relations, and VECT_SIZE is the size of the
522    dependence vectors, or in other words the number of loops in the
523    considered nest.  */
524 
525 DEBUG_FUNCTION void
dump_dist_dir_vectors(FILE * file,vec<ddr_p> ddrs)526 dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
527 {
528   for (data_dependence_relation *ddr : ddrs)
529     if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
530       {
531 	for (lambda_vector v : DDR_DIST_VECTS (ddr))
532 	  {
533 	    fprintf (file, "DISTANCE_V (");
534 	    print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
535 	    fprintf (file, ")\n");
536 	  }
537 
538 	for (lambda_vector v : DDR_DIR_VECTS (ddr))
539 	  {
540 	    fprintf (file, "DIRECTION_V (");
541 	    print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
542 	    fprintf (file, ")\n");
543 	  }
544       }
545 
546   fprintf (file, "\n\n");
547 }
548 
549 /* Dumps the data dependence relations DDRS in FILE.  */
550 
551 DEBUG_FUNCTION void
dump_ddrs(FILE * file,vec<ddr_p> ddrs)552 dump_ddrs (FILE *file, vec<ddr_p> ddrs)
553 {
554   for (data_dependence_relation *ddr : ddrs)
555     dump_data_dependence_relation (file, ddr);
556 
557   fprintf (file, "\n\n");
558 }
559 
560 DEBUG_FUNCTION void
debug_ddrs(vec<ddr_p> ddrs)561 debug_ddrs (vec<ddr_p> ddrs)
562 {
563   dump_ddrs (stderr, ddrs);
564 }
565 
566 /* If RESULT_RANGE is nonnull, set *RESULT_RANGE to the range of
567    OP0 CODE OP1, where:
568 
569    - OP0 CODE OP1 has integral type TYPE
570    - the range of OP0 is given by OP0_RANGE and
571    - the range of OP1 is given by OP1_RANGE.
572 
573    Independently of RESULT_RANGE, try to compute:
574 
575      DELTA = ((sizetype) OP0 CODE (sizetype) OP1)
576 	     - (sizetype) (OP0 CODE OP1)
577 
578    as a constant and subtract DELTA from the ssizetype constant in *OFF.
579    Return true on success, or false if DELTA is not known at compile time.
580 
581    Truncation and sign changes are known to distribute over CODE, i.e.
582 
583      (itype) (A CODE B) == (itype) A CODE (itype) B
584 
585    for any integral type ITYPE whose precision is no greater than the
586    precision of A and B.  */
587 
588 static bool
compute_distributive_range(tree type,value_range & op0_range,tree_code code,value_range & op1_range,tree * off,value_range * result_range)589 compute_distributive_range (tree type, value_range &op0_range,
590 			    tree_code code, value_range &op1_range,
591 			    tree *off, value_range *result_range)
592 {
593   gcc_assert (INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type));
594   if (result_range)
595     {
596       range_operator *op = range_op_handler (code, type);
597       op->fold_range (*result_range, type, op0_range, op1_range);
598     }
599 
600   /* The distributive property guarantees that if TYPE is no narrower
601      than SIZETYPE,
602 
603        (sizetype) (OP0 CODE OP1) == (sizetype) OP0 CODE (sizetype) OP1
604 
605      and so we can treat DELTA as zero.  */
606   if (TYPE_PRECISION (type) >= TYPE_PRECISION (sizetype))
607     return true;
608 
609   /* If overflow is undefined, we can assume that:
610 
611        X == (ssizetype) OP0 CODE (ssizetype) OP1
612 
613      is within the range of TYPE, i.e.:
614 
615        X == (ssizetype) (TYPE) X
616 
617      Distributing the (TYPE) truncation over X gives:
618 
619        X == (ssizetype) (OP0 CODE OP1)
620 
621      Casting both sides to sizetype and distributing the sizetype cast
622      over X gives:
623 
624        (sizetype) OP0 CODE (sizetype) OP1 == (sizetype) (OP0 CODE OP1)
625 
626      and so we can treat DELTA as zero.  */
627   if (TYPE_OVERFLOW_UNDEFINED (type))
628     return true;
629 
630   /* Compute the range of:
631 
632        (ssizetype) OP0 CODE (ssizetype) OP1
633 
634      The distributive property guarantees that this has the same bitpattern as:
635 
636        (sizetype) OP0 CODE (sizetype) OP1
637 
638      but its range is more conducive to analysis.  */
639   range_cast (op0_range, ssizetype);
640   range_cast (op1_range, ssizetype);
641   value_range wide_range;
642   range_operator *op = range_op_handler (code, ssizetype);
643   bool saved_flag_wrapv = flag_wrapv;
644   flag_wrapv = 1;
645   op->fold_range (wide_range, ssizetype, op0_range, op1_range);
646   flag_wrapv = saved_flag_wrapv;
647   if (wide_range.num_pairs () != 1 || !range_int_cst_p (&wide_range))
648     return false;
649 
650   wide_int lb = wide_range.lower_bound ();
651   wide_int ub = wide_range.upper_bound ();
652 
653   /* Calculate the number of times that each end of the range overflows or
654      underflows TYPE.  We can only calculate DELTA if the numbers match.  */
655   unsigned int precision = TYPE_PRECISION (type);
656   if (!TYPE_UNSIGNED (type))
657     {
658       wide_int type_min = wi::mask (precision - 1, true, lb.get_precision ());
659       lb -= type_min;
660       ub -= type_min;
661     }
662   wide_int upper_bits = wi::mask (precision, true, lb.get_precision ());
663   lb &= upper_bits;
664   ub &= upper_bits;
665   if (lb != ub)
666     return false;
667 
668   /* OP0 CODE OP1 overflows exactly arshift (LB, PRECISION) times, with
669      negative values indicating underflow.  The low PRECISION bits of LB
670      are clear, so DELTA is therefore LB (== UB).  */
671   *off = wide_int_to_tree (ssizetype, wi::to_wide (*off) - lb);
672   return true;
673 }
674 
675 /* Return true if (sizetype) OP == (sizetype) (TO_TYPE) OP,
676    given that OP has type FROM_TYPE and range RANGE.  Both TO_TYPE and
677    FROM_TYPE are integral types.  */
678 
679 static bool
nop_conversion_for_offset_p(tree to_type,tree from_type,value_range & range)680 nop_conversion_for_offset_p (tree to_type, tree from_type, value_range &range)
681 {
682   gcc_assert (INTEGRAL_TYPE_P (to_type)
683 	      && INTEGRAL_TYPE_P (from_type)
684 	      && !TYPE_OVERFLOW_TRAPS (to_type)
685 	      && !TYPE_OVERFLOW_TRAPS (from_type));
686 
687   /* Converting to something no narrower than sizetype and then to sizetype
688      is equivalent to converting directly to sizetype.  */
689   if (TYPE_PRECISION (to_type) >= TYPE_PRECISION (sizetype))
690     return true;
691 
692   /* Check whether TO_TYPE can represent all values that FROM_TYPE can.  */
693   if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type)
694       && (TYPE_UNSIGNED (from_type) || !TYPE_UNSIGNED (to_type)))
695     return true;
696 
697   /* For narrowing conversions, we could in principle test whether
698      the bits in FROM_TYPE but not in TO_TYPE have a fixed value
699      and apply a constant adjustment.
700 
701      For other conversions (which involve a sign change) we could
702      check that the signs are always equal, and apply a constant
703      adjustment if the signs are negative.
704 
705      However, both cases should be rare.  */
706   return range_fits_type_p (&range, TYPE_PRECISION (to_type),
707 			    TYPE_SIGN (to_type));
708 }
709 
710 static void
711 split_constant_offset (tree type, tree *var, tree *off,
712 		       value_range *result_range,
713 		       hash_map<tree, std::pair<tree, tree> > &cache,
714 		       unsigned *limit);
715 
716 /* Helper function for split_constant_offset.  If TYPE is a pointer type,
717    try to express OP0 CODE OP1 as:
718 
719      POINTER_PLUS <*VAR, (sizetype) *OFF>
720 
721    where:
722 
723    - *VAR has type TYPE
724    - *OFF is a constant of type ssizetype.
725 
726    If TYPE is an integral type, try to express (sizetype) (OP0 CODE OP1) as:
727 
728      *VAR + (sizetype) *OFF
729 
730    where:
731 
732    - *VAR has type sizetype
733    - *OFF is a constant of type ssizetype.
734 
735    In both cases, OP0 CODE OP1 has type TYPE.
736 
737    Return true on success.  A false return value indicates that we can't
738    do better than set *OFF to zero.
739 
740    When returning true, set RESULT_RANGE to the range of OP0 CODE OP1,
741    if RESULT_RANGE is nonnull and if we can do better than assume VR_VARYING.
742 
743    CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
744    visited.  LIMIT counts down the number of SSA names that we are
745    allowed to process before giving up.  */
746 
747 static bool
split_constant_offset_1(tree type,tree op0,enum tree_code code,tree op1,tree * var,tree * off,value_range * result_range,hash_map<tree,std::pair<tree,tree>> & cache,unsigned * limit)748 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
749 			 tree *var, tree *off, value_range *result_range,
750 			 hash_map<tree, std::pair<tree, tree> > &cache,
751 			 unsigned *limit)
752 {
753   tree var0, var1;
754   tree off0, off1;
755   value_range op0_range, op1_range;
756 
757   *var = NULL_TREE;
758   *off = NULL_TREE;
759 
760   if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type))
761     return false;
762 
763   switch (code)
764     {
765     case INTEGER_CST:
766       *var = size_int (0);
767       *off = fold_convert (ssizetype, op0);
768       if (result_range)
769 	result_range->set (op0, op0);
770       return true;
771 
772     case POINTER_PLUS_EXPR:
773       split_constant_offset (op0, &var0, &off0, nullptr, cache, limit);
774       split_constant_offset (op1, &var1, &off1, nullptr, cache, limit);
775       *var = fold_build2 (POINTER_PLUS_EXPR, type, var0, var1);
776       *off = size_binop (PLUS_EXPR, off0, off1);
777       return true;
778 
779     case PLUS_EXPR:
780     case MINUS_EXPR:
781       split_constant_offset (op0, &var0, &off0, &op0_range, cache, limit);
782       split_constant_offset (op1, &var1, &off1, &op1_range, cache, limit);
783       *off = size_binop (code, off0, off1);
784       if (!compute_distributive_range (type, op0_range, code, op1_range,
785 				       off, result_range))
786 	return false;
787       *var = fold_build2 (code, sizetype, var0, var1);
788       return true;
789 
790     case MULT_EXPR:
791       if (TREE_CODE (op1) != INTEGER_CST)
792 	return false;
793 
794       split_constant_offset (op0, &var0, &off0, &op0_range, cache, limit);
795       op1_range.set (op1, op1);
796       *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
797       if (!compute_distributive_range (type, op0_range, code, op1_range,
798 				       off, result_range))
799 	return false;
800       *var = fold_build2 (MULT_EXPR, sizetype, var0,
801 			  fold_convert (sizetype, op1));
802       return true;
803 
804     case ADDR_EXPR:
805       {
806 	tree base, poffset;
807 	poly_int64 pbitsize, pbitpos, pbytepos;
808 	machine_mode pmode;
809 	int punsignedp, preversep, pvolatilep;
810 
811 	op0 = TREE_OPERAND (op0, 0);
812 	base
813 	  = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset, &pmode,
814 				 &punsignedp, &preversep, &pvolatilep);
815 
816 	if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
817 	  return false;
818 	base = build_fold_addr_expr (base);
819 	off0 = ssize_int (pbytepos);
820 
821 	if (poffset)
822 	  {
823 	    split_constant_offset (poffset, &poffset, &off1, nullptr,
824 				   cache, limit);
825 	    off0 = size_binop (PLUS_EXPR, off0, off1);
826 	    base = fold_build_pointer_plus (base, poffset);
827 	  }
828 
829 	var0 = fold_convert (type, base);
830 
831 	/* If variable length types are involved, punt, otherwise casts
832 	   might be converted into ARRAY_REFs in gimplify_conversion.
833 	   To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
834 	   possibly no longer appears in current GIMPLE, might resurface.
835 	   This perhaps could run
836 	   if (CONVERT_EXPR_P (var0))
837 	     {
838 	       gimplify_conversion (&var0);
839 	       // Attempt to fill in any within var0 found ARRAY_REF's
840 	       // element size from corresponding op embedded ARRAY_REF,
841 	       // if unsuccessful, just punt.
842 	     }  */
843 	while (POINTER_TYPE_P (type))
844 	  type = TREE_TYPE (type);
845 	if (int_size_in_bytes (type) < 0)
846 	  return false;
847 
848 	*var = var0;
849 	*off = off0;
850 	return true;
851       }
852 
853     case SSA_NAME:
854       {
855 	if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0))
856 	  return false;
857 
858 	gimple *def_stmt = SSA_NAME_DEF_STMT (op0);
859 	enum tree_code subcode;
860 
861 	if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
862 	  return false;
863 
864 	subcode = gimple_assign_rhs_code (def_stmt);
865 
866 	/* We are using a cache to avoid un-CSEing large amounts of code.  */
867 	bool use_cache = false;
868 	if (!has_single_use (op0)
869 	    && (subcode == POINTER_PLUS_EXPR
870 		|| subcode == PLUS_EXPR
871 		|| subcode == MINUS_EXPR
872 		|| subcode == MULT_EXPR
873 		|| subcode == ADDR_EXPR
874 		|| CONVERT_EXPR_CODE_P (subcode)))
875 	  {
876 	    use_cache = true;
877 	    bool existed;
878 	    std::pair<tree, tree> &e = cache.get_or_insert (op0, &existed);
879 	    if (existed)
880 	      {
881 		if (integer_zerop (e.second))
882 		  return false;
883 		*var = e.first;
884 		*off = e.second;
885 		/* The caller sets the range in this case.  */
886 		return true;
887 	      }
888 	    e = std::make_pair (op0, ssize_int (0));
889 	  }
890 
891 	if (*limit == 0)
892 	  return false;
893 	--*limit;
894 
895 	var0 = gimple_assign_rhs1 (def_stmt);
896 	var1 = gimple_assign_rhs2 (def_stmt);
897 
898 	bool res = split_constant_offset_1 (type, var0, subcode, var1,
899 					    var, off, nullptr, cache, limit);
900 	if (res && use_cache)
901 	  *cache.get (op0) = std::make_pair (*var, *off);
902 	/* The caller sets the range in this case.  */
903 	return res;
904       }
905     CASE_CONVERT:
906       {
907 	/* We can only handle the following conversions:
908 
909 	   - Conversions from one pointer type to another pointer type.
910 
911 	   - Conversions from one non-trapping integral type to another
912 	     non-trapping integral type.  In this case, the recursive
913 	     call makes sure that:
914 
915 	       (sizetype) OP0
916 
917 	     can be expressed as a sizetype operation involving VAR and OFF,
918 	     and all we need to do is check whether:
919 
920 	       (sizetype) OP0 == (sizetype) (TYPE) OP0
921 
922 	   - Conversions from a non-trapping sizetype-size integral type to
923 	     a like-sized pointer type.  In this case, the recursive call
924 	     makes sure that:
925 
926 	       (sizetype) OP0 == *VAR + (sizetype) *OFF
927 
928 	     and we can convert that to:
929 
930 	       POINTER_PLUS <(TYPE) *VAR, (sizetype) *OFF>
931 
932 	   - Conversions from a sizetype-sized pointer type to a like-sized
933 	     non-trapping integral type.  In this case, the recursive call
934 	     makes sure that:
935 
936 	       OP0 == POINTER_PLUS <*VAR, (sizetype) *OFF>
937 
938 	     where the POINTER_PLUS and *VAR have the same precision as
939 	     TYPE (and the same precision as sizetype).  Then:
940 
941 	       (sizetype) (TYPE) OP0 == (sizetype) *VAR + (sizetype) *OFF.  */
942 	tree itype = TREE_TYPE (op0);
943 	if ((POINTER_TYPE_P (itype)
944 	     || (INTEGRAL_TYPE_P (itype) && !TYPE_OVERFLOW_TRAPS (itype)))
945 	    && (POINTER_TYPE_P (type)
946 		|| (INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type)))
947 	    && (POINTER_TYPE_P (type) == POINTER_TYPE_P (itype)
948 		|| (TYPE_PRECISION (type) == TYPE_PRECISION (sizetype)
949 		    && TYPE_PRECISION (itype) == TYPE_PRECISION (sizetype))))
950 	  {
951 	    if (POINTER_TYPE_P (type))
952 	      {
953 		split_constant_offset (op0, var, off, nullptr, cache, limit);
954 		*var = fold_convert (type, *var);
955 	      }
956 	    else if (POINTER_TYPE_P (itype))
957 	      {
958 		split_constant_offset (op0, var, off, nullptr, cache, limit);
959 		*var = fold_convert (sizetype, *var);
960 	      }
961 	    else
962 	      {
963 		split_constant_offset (op0, var, off, &op0_range,
964 				       cache, limit);
965 		if (!nop_conversion_for_offset_p (type, itype, op0_range))
966 		  return false;
967 		if (result_range)
968 		  {
969 		    *result_range = op0_range;
970 		    range_cast (*result_range, type);
971 		  }
972 	      }
973 	    return true;
974 	  }
975 	return false;
976       }
977 
978     default:
979       return false;
980     }
981 }
982 
983 /* If EXP has pointer type, try to express it as:
984 
985      POINTER_PLUS <*VAR, (sizetype) *OFF>
986 
987    where:
988 
989    - *VAR has the same type as EXP
990    - *OFF is a constant of type ssizetype.
991 
992    If EXP has an integral type, try to express (sizetype) EXP as:
993 
994      *VAR + (sizetype) *OFF
995 
996    where:
997 
998    - *VAR has type sizetype
999    - *OFF is a constant of type ssizetype.
1000 
1001    If EXP_RANGE is nonnull, set it to the range of EXP.
1002 
1003    CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
1004    visited.  LIMIT counts down the number of SSA names that we are
1005    allowed to process before giving up.  */
1006 
1007 static void
split_constant_offset(tree exp,tree * var,tree * off,value_range * exp_range,hash_map<tree,std::pair<tree,tree>> & cache,unsigned * limit)1008 split_constant_offset (tree exp, tree *var, tree *off, value_range *exp_range,
1009 		       hash_map<tree, std::pair<tree, tree> > &cache,
1010 		       unsigned *limit)
1011 {
1012   tree type = TREE_TYPE (exp), op0, op1;
1013   enum tree_code code;
1014 
1015   code = TREE_CODE (exp);
1016   if (exp_range)
1017     {
1018       *exp_range = type;
1019       if (code == SSA_NAME)
1020 	{
1021 	  value_range vr;
1022 	  get_range_query (cfun)->range_of_expr (vr, exp);
1023 	  if (vr.undefined_p ())
1024 	    vr.set_varying (TREE_TYPE (exp));
1025 	  wide_int var_min = wi::to_wide (vr.min ());
1026 	  wide_int var_max = wi::to_wide (vr.max ());
1027 	  value_range_kind vr_kind = vr.kind ();
1028 	  wide_int var_nonzero = get_nonzero_bits (exp);
1029 	  vr_kind = intersect_range_with_nonzero_bits (vr_kind,
1030 						       &var_min, &var_max,
1031 						       var_nonzero,
1032 						       TYPE_SIGN (type));
1033 	  /* This check for VR_VARYING is here because the old code
1034 	     using get_range_info would return VR_RANGE for the entire
1035 	     domain, instead of VR_VARYING.  The new code normalizes
1036 	     full-domain ranges to VR_VARYING.  */
1037 	  if (vr_kind == VR_RANGE || vr_kind == VR_VARYING)
1038 	    *exp_range = value_range (type, var_min, var_max);
1039 	}
1040     }
1041 
1042   if (!tree_is_chrec (exp)
1043       && get_gimple_rhs_class (TREE_CODE (exp)) != GIMPLE_TERNARY_RHS)
1044     {
1045       extract_ops_from_tree (exp, &code, &op0, &op1);
1046       if (split_constant_offset_1 (type, op0, code, op1, var, off,
1047 				   exp_range, cache, limit))
1048 	return;
1049     }
1050 
1051   *var = exp;
1052   if (INTEGRAL_TYPE_P (type))
1053     *var = fold_convert (sizetype, *var);
1054   *off = ssize_int (0);
1055 
1056   value_range r;
1057   if (exp_range && code != SSA_NAME
1058       && get_range_query (cfun)->range_of_expr (r, exp)
1059       && !r.undefined_p ())
1060     *exp_range = r;
1061 }
1062 
1063 /* Expresses EXP as VAR + OFF, where OFF is a constant.  VAR has the same
1064    type as EXP while OFF has type ssizetype.  */
1065 
1066 void
split_constant_offset(tree exp,tree * var,tree * off)1067 split_constant_offset (tree exp, tree *var, tree *off)
1068 {
1069   unsigned limit = param_ssa_name_def_chain_limit;
1070   static hash_map<tree, std::pair<tree, tree> > *cache;
1071   if (!cache)
1072     cache = new hash_map<tree, std::pair<tree, tree> > (37);
1073   split_constant_offset (exp, var, off, nullptr, *cache, &limit);
1074   *var = fold_convert (TREE_TYPE (exp), *var);
1075   cache->empty ();
1076 }
1077 
1078 /* Returns the address ADDR of an object in a canonical shape (without nop
1079    casts, and with type of pointer to the object).  */
1080 
1081 static tree
canonicalize_base_object_address(tree addr)1082 canonicalize_base_object_address (tree addr)
1083 {
1084   tree orig = addr;
1085 
1086   STRIP_NOPS (addr);
1087 
1088   /* The base address may be obtained by casting from integer, in that case
1089      keep the cast.  */
1090   if (!POINTER_TYPE_P (TREE_TYPE (addr)))
1091     return orig;
1092 
1093   if (TREE_CODE (addr) != ADDR_EXPR)
1094     return addr;
1095 
1096   return build_fold_addr_expr (TREE_OPERAND (addr, 0));
1097 }
1098 
1099 /* Analyze the behavior of memory reference REF within STMT.
1100    There are two modes:
1101 
1102    - BB analysis.  In this case we simply split the address into base,
1103      init and offset components, without reference to any containing loop.
1104      The resulting base and offset are general expressions and they can
1105      vary arbitrarily from one iteration of the containing loop to the next.
1106      The step is always zero.
1107 
1108    - loop analysis.  In this case we analyze the reference both wrt LOOP
1109      and on the basis that the reference occurs (is "used") in LOOP;
1110      see the comment above analyze_scalar_evolution_in_loop for more
1111      information about this distinction.  The base, init, offset and
1112      step fields are all invariant in LOOP.
1113 
1114    Perform BB analysis if LOOP is null, or if LOOP is the function's
1115    dummy outermost loop.  In other cases perform loop analysis.
1116 
1117    Return true if the analysis succeeded and store the results in DRB if so.
1118    BB analysis can only fail for bitfield or reversed-storage accesses.  */
1119 
1120 opt_result
dr_analyze_innermost(innermost_loop_behavior * drb,tree ref,class loop * loop,const gimple * stmt)1121 dr_analyze_innermost (innermost_loop_behavior *drb, tree ref,
1122 		      class loop *loop, const gimple *stmt)
1123 {
1124   poly_int64 pbitsize, pbitpos;
1125   tree base, poffset;
1126   machine_mode pmode;
1127   int punsignedp, preversep, pvolatilep;
1128   affine_iv base_iv, offset_iv;
1129   tree init, dinit, step;
1130   bool in_loop = (loop && loop->num);
1131 
1132   if (dump_file && (dump_flags & TDF_DETAILS))
1133     fprintf (dump_file, "analyze_innermost: ");
1134 
1135   base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset, &pmode,
1136 			      &punsignedp, &preversep, &pvolatilep);
1137   gcc_assert (base != NULL_TREE);
1138 
1139   poly_int64 pbytepos;
1140   if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
1141     return opt_result::failure_at (stmt,
1142 				   "failed: bit offset alignment.\n");
1143 
1144   if (preversep)
1145     return opt_result::failure_at (stmt,
1146 				   "failed: reverse storage order.\n");
1147 
1148   /* Calculate the alignment and misalignment for the inner reference.  */
1149   unsigned int HOST_WIDE_INT bit_base_misalignment;
1150   unsigned int bit_base_alignment;
1151   get_object_alignment_1 (base, &bit_base_alignment, &bit_base_misalignment);
1152 
1153   /* There are no bitfield references remaining in BASE, so the values
1154      we got back must be whole bytes.  */
1155   gcc_assert (bit_base_alignment % BITS_PER_UNIT == 0
1156 	      && bit_base_misalignment % BITS_PER_UNIT == 0);
1157   unsigned int base_alignment = bit_base_alignment / BITS_PER_UNIT;
1158   poly_int64 base_misalignment = bit_base_misalignment / BITS_PER_UNIT;
1159 
1160   if (TREE_CODE (base) == MEM_REF)
1161     {
1162       if (!integer_zerop (TREE_OPERAND (base, 1)))
1163 	{
1164 	  /* Subtract MOFF from the base and add it to POFFSET instead.
1165 	     Adjust the misalignment to reflect the amount we subtracted.  */
1166 	  poly_offset_int moff = mem_ref_offset (base);
1167 	  base_misalignment -= moff.force_shwi ();
1168 	  tree mofft = wide_int_to_tree (sizetype, moff);
1169 	  if (!poffset)
1170 	    poffset = mofft;
1171 	  else
1172 	    poffset = size_binop (PLUS_EXPR, poffset, mofft);
1173 	}
1174       base = TREE_OPERAND (base, 0);
1175     }
1176   else
1177     base = build_fold_addr_expr (base);
1178 
1179   if (in_loop)
1180     {
1181       if (!simple_iv (loop, loop, base, &base_iv, true))
1182 	return opt_result::failure_at
1183 	  (stmt, "failed: evolution of base is not affine.\n");
1184     }
1185   else
1186     {
1187       base_iv.base = base;
1188       base_iv.step = ssize_int (0);
1189       base_iv.no_overflow = true;
1190     }
1191 
1192   if (!poffset)
1193     {
1194       offset_iv.base = ssize_int (0);
1195       offset_iv.step = ssize_int (0);
1196     }
1197   else
1198     {
1199       if (!in_loop)
1200         {
1201           offset_iv.base = poffset;
1202           offset_iv.step = ssize_int (0);
1203         }
1204       else if (!simple_iv (loop, loop, poffset, &offset_iv, true))
1205 	return opt_result::failure_at
1206 	  (stmt, "failed: evolution of offset is not affine.\n");
1207     }
1208 
1209   init = ssize_int (pbytepos);
1210 
1211   /* Subtract any constant component from the base and add it to INIT instead.
1212      Adjust the misalignment to reflect the amount we subtracted.  */
1213   split_constant_offset (base_iv.base, &base_iv.base, &dinit);
1214   init = size_binop (PLUS_EXPR, init, dinit);
1215   base_misalignment -= TREE_INT_CST_LOW (dinit);
1216 
1217   split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
1218   init = size_binop (PLUS_EXPR, init, dinit);
1219 
1220   step = size_binop (PLUS_EXPR,
1221 		     fold_convert (ssizetype, base_iv.step),
1222 		     fold_convert (ssizetype, offset_iv.step));
1223 
1224   base = canonicalize_base_object_address (base_iv.base);
1225 
1226   /* See if get_pointer_alignment can guarantee a higher alignment than
1227      the one we calculated above.  */
1228   unsigned int HOST_WIDE_INT alt_misalignment;
1229   unsigned int alt_alignment;
1230   get_pointer_alignment_1 (base, &alt_alignment, &alt_misalignment);
1231 
1232   /* As above, these values must be whole bytes.  */
1233   gcc_assert (alt_alignment % BITS_PER_UNIT == 0
1234 	      && alt_misalignment % BITS_PER_UNIT == 0);
1235   alt_alignment /= BITS_PER_UNIT;
1236   alt_misalignment /= BITS_PER_UNIT;
1237 
1238   if (base_alignment < alt_alignment)
1239     {
1240       base_alignment = alt_alignment;
1241       base_misalignment = alt_misalignment;
1242     }
1243 
1244   drb->base_address = base;
1245   drb->offset = fold_convert (ssizetype, offset_iv.base);
1246   drb->init = init;
1247   drb->step = step;
1248   if (known_misalignment (base_misalignment, base_alignment,
1249 			  &drb->base_misalignment))
1250     drb->base_alignment = base_alignment;
1251   else
1252     {
1253       drb->base_alignment = known_alignment (base_misalignment);
1254       drb->base_misalignment = 0;
1255     }
1256   drb->offset_alignment = highest_pow2_factor (offset_iv.base);
1257   drb->step_alignment = highest_pow2_factor (step);
1258 
1259   if (dump_file && (dump_flags & TDF_DETAILS))
1260     fprintf (dump_file, "success.\n");
1261 
1262   return opt_result::success ();
1263 }
1264 
1265 /* Return true if OP is a valid component reference for a DR access
1266    function.  This accepts a subset of what handled_component_p accepts.  */
1267 
1268 static bool
access_fn_component_p(tree op)1269 access_fn_component_p (tree op)
1270 {
1271   switch (TREE_CODE (op))
1272     {
1273     case REALPART_EXPR:
1274     case IMAGPART_EXPR:
1275     case ARRAY_REF:
1276       return true;
1277 
1278     case COMPONENT_REF:
1279       return TREE_CODE (TREE_TYPE (TREE_OPERAND (op, 0))) == RECORD_TYPE;
1280 
1281     default:
1282       return false;
1283     }
1284 }
1285 
1286 /* Returns whether BASE can have a access_fn_component_p with BASE
1287    as base.  */
1288 
1289 static bool
base_supports_access_fn_components_p(tree base)1290 base_supports_access_fn_components_p (tree base)
1291 {
1292   switch (TREE_CODE (TREE_TYPE (base)))
1293     {
1294     case COMPLEX_TYPE:
1295     case ARRAY_TYPE:
1296     case RECORD_TYPE:
1297       return true;
1298     default:
1299       return false;
1300     }
1301 }
1302 
1303 /* Determines the base object and the list of indices of memory reference
1304    DR, analyzed in LOOP and instantiated before NEST.  */
1305 
1306 static void
dr_analyze_indices(struct indices * dri,tree ref,edge nest,loop_p loop)1307 dr_analyze_indices (struct indices *dri, tree ref, edge nest, loop_p loop)
1308 {
1309   /* If analyzing a basic-block there are no indices to analyze
1310      and thus no access functions.  */
1311   if (!nest)
1312     {
1313       dri->base_object = ref;
1314       dri->access_fns.create (0);
1315       return;
1316     }
1317 
1318   vec<tree> access_fns = vNULL;
1319 
1320   /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
1321      into a two element array with a constant index.  The base is
1322      then just the immediate underlying object.  */
1323   if (TREE_CODE (ref) == REALPART_EXPR)
1324     {
1325       ref = TREE_OPERAND (ref, 0);
1326       access_fns.safe_push (integer_zero_node);
1327     }
1328   else if (TREE_CODE (ref) == IMAGPART_EXPR)
1329     {
1330       ref = TREE_OPERAND (ref, 0);
1331       access_fns.safe_push (integer_one_node);
1332     }
1333 
1334   /* Analyze access functions of dimensions we know to be independent.
1335      The list of component references handled here should be kept in
1336      sync with access_fn_component_p.  */
1337   while (handled_component_p (ref))
1338     {
1339       if (TREE_CODE (ref) == ARRAY_REF)
1340 	{
1341 	  tree op = TREE_OPERAND (ref, 1);
1342 	  tree access_fn = analyze_scalar_evolution (loop, op);
1343 	  access_fn = instantiate_scev (nest, loop, access_fn);
1344 	  access_fns.safe_push (access_fn);
1345 	}
1346       else if (TREE_CODE (ref) == COMPONENT_REF
1347 	       && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
1348 	{
1349 	  /* For COMPONENT_REFs of records (but not unions!) use the
1350 	     FIELD_DECL offset as constant access function so we can
1351 	     disambiguate a[i].f1 and a[i].f2.  */
1352 	  tree off = component_ref_field_offset (ref);
1353 	  off = size_binop (PLUS_EXPR,
1354 			    size_binop (MULT_EXPR,
1355 					fold_convert (bitsizetype, off),
1356 					bitsize_int (BITS_PER_UNIT)),
1357 			    DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
1358 	  access_fns.safe_push (off);
1359 	}
1360       else
1361 	/* If we have an unhandled component we could not translate
1362 	   to an access function stop analyzing.  We have determined
1363 	   our base object in this case.  */
1364 	break;
1365 
1366       ref = TREE_OPERAND (ref, 0);
1367     }
1368 
1369   /* If the address operand of a MEM_REF base has an evolution in the
1370      analyzed nest, add it as an additional independent access-function.  */
1371   if (TREE_CODE (ref) == MEM_REF)
1372     {
1373       tree op = TREE_OPERAND (ref, 0);
1374       tree access_fn = analyze_scalar_evolution (loop, op);
1375       access_fn = instantiate_scev (nest, loop, access_fn);
1376       STRIP_NOPS (access_fn);
1377       if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
1378 	{
1379 	  tree memoff = TREE_OPERAND (ref, 1);
1380 	  tree base = initial_condition (access_fn);
1381 	  tree orig_type = TREE_TYPE (base);
1382 	  STRIP_USELESS_TYPE_CONVERSION (base);
1383 	  tree off;
1384 	  split_constant_offset (base, &base, &off);
1385 	  STRIP_USELESS_TYPE_CONVERSION (base);
1386 	  /* Fold the MEM_REF offset into the evolutions initial
1387 	     value to make more bases comparable.  */
1388 	  if (!integer_zerop (memoff))
1389 	    {
1390 	      off = size_binop (PLUS_EXPR, off,
1391 				fold_convert (ssizetype, memoff));
1392 	      memoff = build_int_cst (TREE_TYPE (memoff), 0);
1393 	    }
1394 	  /* Adjust the offset so it is a multiple of the access type
1395 	     size and thus we separate bases that can possibly be used
1396 	     to produce partial overlaps (which the access_fn machinery
1397 	     cannot handle).  */
1398 	  wide_int rem;
1399 	  if (TYPE_SIZE_UNIT (TREE_TYPE (ref))
1400 	      && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref))) == INTEGER_CST
1401 	      && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref))))
1402 	    rem = wi::mod_trunc
1403 	      (wi::to_wide (off),
1404 	       wi::to_wide (TYPE_SIZE_UNIT (TREE_TYPE (ref))),
1405 	       SIGNED);
1406 	  else
1407 	    /* If we can't compute the remainder simply force the initial
1408 	       condition to zero.  */
1409 	    rem = wi::to_wide (off);
1410 	  off = wide_int_to_tree (ssizetype, wi::to_wide (off) - rem);
1411 	  memoff = wide_int_to_tree (TREE_TYPE (memoff), rem);
1412 	  /* And finally replace the initial condition.  */
1413 	  access_fn = chrec_replace_initial_condition
1414 	      (access_fn, fold_convert (orig_type, off));
1415 	  /* ???  This is still not a suitable base object for
1416 	     dr_may_alias_p - the base object needs to be an
1417 	     access that covers the object as whole.  With
1418 	     an evolution in the pointer this cannot be
1419 	     guaranteed.
1420 	     As a band-aid, mark the access so we can special-case
1421 	     it in dr_may_alias_p.  */
1422 	  tree old = ref;
1423 	  ref = fold_build2_loc (EXPR_LOCATION (ref),
1424 				 MEM_REF, TREE_TYPE (ref),
1425 				 base, memoff);
1426 	  MR_DEPENDENCE_CLIQUE (ref) = MR_DEPENDENCE_CLIQUE (old);
1427 	  MR_DEPENDENCE_BASE (ref) = MR_DEPENDENCE_BASE (old);
1428 	  dri->unconstrained_base = true;
1429 	  access_fns.safe_push (access_fn);
1430 	}
1431     }
1432   else if (DECL_P (ref))
1433     {
1434       /* Canonicalize DR_BASE_OBJECT to MEM_REF form.  */
1435       ref = build2 (MEM_REF, TREE_TYPE (ref),
1436 		    build_fold_addr_expr (ref),
1437 		    build_int_cst (reference_alias_ptr_type (ref), 0));
1438     }
1439 
1440   dri->base_object = ref;
1441   dri->access_fns = access_fns;
1442 }
1443 
1444 /* Extracts the alias analysis information from the memory reference DR.  */
1445 
1446 static void
dr_analyze_alias(struct data_reference * dr)1447 dr_analyze_alias (struct data_reference *dr)
1448 {
1449   tree ref = DR_REF (dr);
1450   tree base = get_base_address (ref), addr;
1451 
1452   if (INDIRECT_REF_P (base)
1453       || TREE_CODE (base) == MEM_REF)
1454     {
1455       addr = TREE_OPERAND (base, 0);
1456       if (TREE_CODE (addr) == SSA_NAME)
1457 	DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
1458     }
1459 }
1460 
1461 /* Frees data reference DR.  */
1462 
1463 void
free_data_ref(data_reference_p dr)1464 free_data_ref (data_reference_p dr)
1465 {
1466   DR_ACCESS_FNS (dr).release ();
1467   if (dr->alt_indices.base_object)
1468     dr->alt_indices.access_fns.release ();
1469   free (dr);
1470 }
1471 
1472 /* Analyze memory reference MEMREF, which is accessed in STMT.
1473    The reference is a read if IS_READ is true, otherwise it is a write.
1474    IS_CONDITIONAL_IN_STMT indicates that the reference is conditional
1475    within STMT, i.e. that it might not occur even if STMT is executed
1476    and runs to completion.
1477 
1478    Return the data_reference description of MEMREF.  NEST is the outermost
1479    loop in which the reference should be instantiated, LOOP is the loop
1480    in which the data reference should be analyzed.  */
1481 
1482 struct data_reference *
create_data_ref(edge nest,loop_p loop,tree memref,gimple * stmt,bool is_read,bool is_conditional_in_stmt)1483 create_data_ref (edge nest, loop_p loop, tree memref, gimple *stmt,
1484 		 bool is_read, bool is_conditional_in_stmt)
1485 {
1486   struct data_reference *dr;
1487 
1488   if (dump_file && (dump_flags & TDF_DETAILS))
1489     {
1490       fprintf (dump_file, "Creating dr for ");
1491       print_generic_expr (dump_file, memref, TDF_SLIM);
1492       fprintf (dump_file, "\n");
1493     }
1494 
1495   dr = XCNEW (struct data_reference);
1496   DR_STMT (dr) = stmt;
1497   DR_REF (dr) = memref;
1498   DR_IS_READ (dr) = is_read;
1499   DR_IS_CONDITIONAL_IN_STMT (dr) = is_conditional_in_stmt;
1500 
1501   dr_analyze_innermost (&DR_INNERMOST (dr), memref,
1502 			nest != NULL ? loop : NULL, stmt);
1503   dr_analyze_indices (&dr->indices, DR_REF (dr), nest, loop);
1504   dr_analyze_alias (dr);
1505 
1506   if (dump_file && (dump_flags & TDF_DETAILS))
1507     {
1508       unsigned i;
1509       fprintf (dump_file, "\tbase_address: ");
1510       print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1511       fprintf (dump_file, "\n\toffset from base address: ");
1512       print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1513       fprintf (dump_file, "\n\tconstant offset from base address: ");
1514       print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1515       fprintf (dump_file, "\n\tstep: ");
1516       print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1517       fprintf (dump_file, "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr));
1518       fprintf (dump_file, "\n\tbase misalignment: %d",
1519 	       DR_BASE_MISALIGNMENT (dr));
1520       fprintf (dump_file, "\n\toffset alignment: %d",
1521 	       DR_OFFSET_ALIGNMENT (dr));
1522       fprintf (dump_file, "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr));
1523       fprintf (dump_file, "\n\tbase_object: ");
1524       print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1525       fprintf (dump_file, "\n");
1526       for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1527 	{
1528 	  fprintf (dump_file, "\tAccess function %d: ", i);
1529 	  print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1530 	}
1531     }
1532 
1533   return dr;
1534 }
1535 
1536 /*  A helper function computes order between two tree expressions T1 and T2.
1537     This is used in comparator functions sorting objects based on the order
1538     of tree expressions.  The function returns -1, 0, or 1.  */
1539 
1540 int
data_ref_compare_tree(tree t1,tree t2)1541 data_ref_compare_tree (tree t1, tree t2)
1542 {
1543   int i, cmp;
1544   enum tree_code code;
1545   char tclass;
1546 
1547   if (t1 == t2)
1548     return 0;
1549   if (t1 == NULL)
1550     return -1;
1551   if (t2 == NULL)
1552     return 1;
1553 
1554   STRIP_USELESS_TYPE_CONVERSION (t1);
1555   STRIP_USELESS_TYPE_CONVERSION (t2);
1556   if (t1 == t2)
1557     return 0;
1558 
1559   if (TREE_CODE (t1) != TREE_CODE (t2)
1560       && ! (CONVERT_EXPR_P (t1) && CONVERT_EXPR_P (t2)))
1561     return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
1562 
1563   code = TREE_CODE (t1);
1564   switch (code)
1565     {
1566     case INTEGER_CST:
1567       return tree_int_cst_compare (t1, t2);
1568 
1569     case STRING_CST:
1570       if (TREE_STRING_LENGTH (t1) != TREE_STRING_LENGTH (t2))
1571 	return TREE_STRING_LENGTH (t1) < TREE_STRING_LENGTH (t2) ? -1 : 1;
1572       return memcmp (TREE_STRING_POINTER (t1), TREE_STRING_POINTER (t2),
1573 		     TREE_STRING_LENGTH (t1));
1574 
1575     case SSA_NAME:
1576       if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2))
1577 	return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1;
1578       break;
1579 
1580     default:
1581       if (POLY_INT_CST_P (t1))
1582 	return compare_sizes_for_sort (wi::to_poly_widest (t1),
1583 				       wi::to_poly_widest (t2));
1584 
1585       tclass = TREE_CODE_CLASS (code);
1586 
1587       /* For decls, compare their UIDs.  */
1588       if (tclass == tcc_declaration)
1589 	{
1590 	  if (DECL_UID (t1) != DECL_UID (t2))
1591 	    return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1;
1592 	  break;
1593 	}
1594       /* For expressions, compare their operands recursively.  */
1595       else if (IS_EXPR_CODE_CLASS (tclass))
1596 	{
1597 	  for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i)
1598 	    {
1599 	      cmp = data_ref_compare_tree (TREE_OPERAND (t1, i),
1600 					   TREE_OPERAND (t2, i));
1601 	      if (cmp != 0)
1602 		return cmp;
1603 	    }
1604 	}
1605       else
1606 	gcc_unreachable ();
1607     }
1608 
1609   return 0;
1610 }
1611 
1612 /* Return TRUE it's possible to resolve data dependence DDR by runtime alias
1613    check.  */
1614 
1615 opt_result
runtime_alias_check_p(ddr_p ddr,class loop * loop,bool speed_p)1616 runtime_alias_check_p (ddr_p ddr, class loop *loop, bool speed_p)
1617 {
1618   if (dump_enabled_p ())
1619     dump_printf (MSG_NOTE,
1620 		 "consider run-time aliasing test between %T and %T\n",
1621 		 DR_REF (DDR_A (ddr)), DR_REF (DDR_B (ddr)));
1622 
1623   if (!speed_p)
1624     return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1625 				   "runtime alias check not supported when"
1626 				   " optimizing for size.\n");
1627 
1628   /* FORNOW: We don't support versioning with outer-loop in either
1629      vectorization or loop distribution.  */
1630   if (loop != NULL && loop->inner != NULL)
1631     return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1632 				   "runtime alias check not supported for"
1633 				   " outer loop.\n");
1634 
1635   return opt_result::success ();
1636 }
1637 
1638 /* Operator == between two dr_with_seg_len objects.
1639 
1640    This equality operator is used to make sure two data refs
1641    are the same one so that we will consider to combine the
1642    aliasing checks of those two pairs of data dependent data
1643    refs.  */
1644 
1645 static bool
1646 operator == (const dr_with_seg_len& d1,
1647 	     const dr_with_seg_len& d2)
1648 {
1649   return (operand_equal_p (DR_BASE_ADDRESS (d1.dr),
1650 			   DR_BASE_ADDRESS (d2.dr), 0)
1651 	  && data_ref_compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0
1652 	  && data_ref_compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0
1653 	  && data_ref_compare_tree (d1.seg_len, d2.seg_len) == 0
1654 	  && known_eq (d1.access_size, d2.access_size)
1655 	  && d1.align == d2.align);
1656 }
1657 
1658 /* Comparison function for sorting objects of dr_with_seg_len_pair_t
1659    so that we can combine aliasing checks in one scan.  */
1660 
1661 static int
comp_dr_with_seg_len_pair(const void * pa_,const void * pb_)1662 comp_dr_with_seg_len_pair (const void *pa_, const void *pb_)
1663 {
1664   const dr_with_seg_len_pair_t* pa = (const dr_with_seg_len_pair_t *) pa_;
1665   const dr_with_seg_len_pair_t* pb = (const dr_with_seg_len_pair_t *) pb_;
1666   const dr_with_seg_len &a1 = pa->first, &a2 = pa->second;
1667   const dr_with_seg_len &b1 = pb->first, &b2 = pb->second;
1668 
1669   /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks
1670      if a and c have the same basic address snd step, and b and d have the same
1671      address and step.  Therefore, if any a&c or b&d don't have the same address
1672      and step, we don't care the order of those two pairs after sorting.  */
1673   int comp_res;
1674 
1675   if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a1.dr),
1676 					 DR_BASE_ADDRESS (b1.dr))) != 0)
1677     return comp_res;
1678   if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a2.dr),
1679 					 DR_BASE_ADDRESS (b2.dr))) != 0)
1680     return comp_res;
1681   if ((comp_res = data_ref_compare_tree (DR_STEP (a1.dr),
1682 					 DR_STEP (b1.dr))) != 0)
1683     return comp_res;
1684   if ((comp_res = data_ref_compare_tree (DR_STEP (a2.dr),
1685 					 DR_STEP (b2.dr))) != 0)
1686     return comp_res;
1687   if ((comp_res = data_ref_compare_tree (DR_OFFSET (a1.dr),
1688 					 DR_OFFSET (b1.dr))) != 0)
1689     return comp_res;
1690   if ((comp_res = data_ref_compare_tree (DR_INIT (a1.dr),
1691 					 DR_INIT (b1.dr))) != 0)
1692     return comp_res;
1693   if ((comp_res = data_ref_compare_tree (DR_OFFSET (a2.dr),
1694 					 DR_OFFSET (b2.dr))) != 0)
1695     return comp_res;
1696   if ((comp_res = data_ref_compare_tree (DR_INIT (a2.dr),
1697 					 DR_INIT (b2.dr))) != 0)
1698     return comp_res;
1699 
1700   return 0;
1701 }
1702 
1703 /* Dump information about ALIAS_PAIR, indenting each line by INDENT.  */
1704 
1705 static void
dump_alias_pair(dr_with_seg_len_pair_t * alias_pair,const char * indent)1706 dump_alias_pair (dr_with_seg_len_pair_t *alias_pair, const char *indent)
1707 {
1708   dump_printf (MSG_NOTE, "%sreference:      %T vs. %T\n", indent,
1709 	       DR_REF (alias_pair->first.dr),
1710 	       DR_REF (alias_pair->second.dr));
1711 
1712   dump_printf (MSG_NOTE, "%ssegment length: %T", indent,
1713 	       alias_pair->first.seg_len);
1714   if (!operand_equal_p (alias_pair->first.seg_len,
1715 			alias_pair->second.seg_len, 0))
1716     dump_printf (MSG_NOTE, " vs. %T", alias_pair->second.seg_len);
1717 
1718   dump_printf (MSG_NOTE, "\n%saccess size:    ", indent);
1719   dump_dec (MSG_NOTE, alias_pair->first.access_size);
1720   if (maybe_ne (alias_pair->first.access_size, alias_pair->second.access_size))
1721     {
1722       dump_printf (MSG_NOTE, " vs. ");
1723       dump_dec (MSG_NOTE, alias_pair->second.access_size);
1724     }
1725 
1726   dump_printf (MSG_NOTE, "\n%salignment:      %d", indent,
1727 	       alias_pair->first.align);
1728   if (alias_pair->first.align != alias_pair->second.align)
1729     dump_printf (MSG_NOTE, " vs. %d", alias_pair->second.align);
1730 
1731   dump_printf (MSG_NOTE, "\n%sflags:         ", indent);
1732   if (alias_pair->flags & DR_ALIAS_RAW)
1733     dump_printf (MSG_NOTE, " RAW");
1734   if (alias_pair->flags & DR_ALIAS_WAR)
1735     dump_printf (MSG_NOTE, " WAR");
1736   if (alias_pair->flags & DR_ALIAS_WAW)
1737     dump_printf (MSG_NOTE, " WAW");
1738   if (alias_pair->flags & DR_ALIAS_ARBITRARY)
1739     dump_printf (MSG_NOTE, " ARBITRARY");
1740   if (alias_pair->flags & DR_ALIAS_SWAPPED)
1741     dump_printf (MSG_NOTE, " SWAPPED");
1742   if (alias_pair->flags & DR_ALIAS_UNSWAPPED)
1743     dump_printf (MSG_NOTE, " UNSWAPPED");
1744   if (alias_pair->flags & DR_ALIAS_MIXED_STEPS)
1745     dump_printf (MSG_NOTE, " MIXED_STEPS");
1746   if (alias_pair->flags == 0)
1747     dump_printf (MSG_NOTE, " <none>");
1748   dump_printf (MSG_NOTE, "\n");
1749 }
1750 
1751 /* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones.
1752    FACTOR is number of iterations that each data reference is accessed.
1753 
1754    Basically, for each pair of dependent data refs store_ptr_0 & load_ptr_0,
1755    we create an expression:
1756 
1757    ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1758    || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
1759 
1760    for aliasing checks.  However, in some cases we can decrease the number
1761    of checks by combining two checks into one.  For example, suppose we have
1762    another pair of data refs store_ptr_0 & load_ptr_1, and if the following
1763    condition is satisfied:
1764 
1765    load_ptr_0 < load_ptr_1  &&
1766    load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0
1767 
1768    (this condition means, in each iteration of vectorized loop, the accessed
1769    memory of store_ptr_0 cannot be between the memory of load_ptr_0 and
1770    load_ptr_1.)
1771 
1772    we then can use only the following expression to finish the alising checks
1773    between store_ptr_0 & load_ptr_0 and store_ptr_0 & load_ptr_1:
1774 
1775    ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1776    || (load_ptr_1 + load_segment_length_1 <= store_ptr_0))
1777 
1778    Note that we only consider that load_ptr_0 and load_ptr_1 have the same
1779    basic address.  */
1780 
1781 void
prune_runtime_alias_test_list(vec<dr_with_seg_len_pair_t> * alias_pairs,poly_uint64)1782 prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
1783 			       poly_uint64)
1784 {
1785   if (alias_pairs->is_empty ())
1786     return;
1787 
1788   /* Canonicalize each pair so that the base components are ordered wrt
1789      data_ref_compare_tree.  This allows the loop below to merge more
1790      cases.  */
1791   unsigned int i;
1792   dr_with_seg_len_pair_t *alias_pair;
1793   FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
1794     {
1795       data_reference_p dr_a = alias_pair->first.dr;
1796       data_reference_p dr_b = alias_pair->second.dr;
1797       int comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (dr_a),
1798 					    DR_BASE_ADDRESS (dr_b));
1799       if (comp_res == 0)
1800 	comp_res = data_ref_compare_tree (DR_OFFSET (dr_a), DR_OFFSET (dr_b));
1801       if (comp_res == 0)
1802 	comp_res = data_ref_compare_tree (DR_INIT (dr_a), DR_INIT (dr_b));
1803       if (comp_res > 0)
1804 	{
1805 	  std::swap (alias_pair->first, alias_pair->second);
1806 	  alias_pair->flags |= DR_ALIAS_SWAPPED;
1807 	}
1808       else
1809 	alias_pair->flags |= DR_ALIAS_UNSWAPPED;
1810     }
1811 
1812   /* Sort the collected data ref pairs so that we can scan them once to
1813      combine all possible aliasing checks.  */
1814   alias_pairs->qsort (comp_dr_with_seg_len_pair);
1815 
1816   /* Scan the sorted dr pairs and check if we can combine alias checks
1817      of two neighboring dr pairs.  */
1818   unsigned int last = 0;
1819   for (i = 1; i < alias_pairs->length (); ++i)
1820     {
1821       /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2).  */
1822       dr_with_seg_len_pair_t *alias_pair1 = &(*alias_pairs)[last];
1823       dr_with_seg_len_pair_t *alias_pair2 = &(*alias_pairs)[i];
1824 
1825       dr_with_seg_len *dr_a1 = &alias_pair1->first;
1826       dr_with_seg_len *dr_b1 = &alias_pair1->second;
1827       dr_with_seg_len *dr_a2 = &alias_pair2->first;
1828       dr_with_seg_len *dr_b2 = &alias_pair2->second;
1829 
1830       /* Remove duplicate data ref pairs.  */
1831       if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
1832 	{
1833 	  if (dump_enabled_p ())
1834 	    dump_printf (MSG_NOTE, "found equal ranges %T, %T and %T, %T\n",
1835 			 DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1836 			 DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1837 	  alias_pair1->flags |= alias_pair2->flags;
1838 	  continue;
1839 	}
1840 
1841       /* Assume that we won't be able to merge the pairs, then correct
1842 	 if we do.  */
1843       last += 1;
1844       if (last != i)
1845 	(*alias_pairs)[last] = (*alias_pairs)[i];
1846 
1847       if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2)
1848 	{
1849 	  /* We consider the case that DR_B1 and DR_B2 are same memrefs,
1850 	     and DR_A1 and DR_A2 are two consecutive memrefs.  */
1851 	  if (*dr_a1 == *dr_a2)
1852 	    {
1853 	      std::swap (dr_a1, dr_b1);
1854 	      std::swap (dr_a2, dr_b2);
1855 	    }
1856 
1857 	  poly_int64 init_a1, init_a2;
1858 	  /* Only consider cases in which the distance between the initial
1859 	     DR_A1 and the initial DR_A2 is known at compile time.  */
1860 	  if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr),
1861 				DR_BASE_ADDRESS (dr_a2->dr), 0)
1862 	      || !operand_equal_p (DR_OFFSET (dr_a1->dr),
1863 				   DR_OFFSET (dr_a2->dr), 0)
1864 	      || !poly_int_tree_p (DR_INIT (dr_a1->dr), &init_a1)
1865 	      || !poly_int_tree_p (DR_INIT (dr_a2->dr), &init_a2))
1866 	    continue;
1867 
1868 	  /* Don't combine if we can't tell which one comes first.  */
1869 	  if (!ordered_p (init_a1, init_a2))
1870 	    continue;
1871 
1872 	  /* Work out what the segment length would be if we did combine
1873 	     DR_A1 and DR_A2:
1874 
1875 	     - If DR_A1 and DR_A2 have equal lengths, that length is
1876 	       also the combined length.
1877 
1878 	     - If DR_A1 and DR_A2 both have negative "lengths", the combined
1879 	       length is the lower bound on those lengths.
1880 
1881 	     - If DR_A1 and DR_A2 both have positive lengths, the combined
1882 	       length is the upper bound on those lengths.
1883 
1884 	     Other cases are unlikely to give a useful combination.
1885 
1886 	     The lengths both have sizetype, so the sign is taken from
1887 	     the step instead.  */
1888 	  poly_uint64 new_seg_len = 0;
1889 	  bool new_seg_len_p = !operand_equal_p (dr_a1->seg_len,
1890 						 dr_a2->seg_len, 0);
1891 	  if (new_seg_len_p)
1892 	    {
1893 	      poly_uint64 seg_len_a1, seg_len_a2;
1894 	      if (!poly_int_tree_p (dr_a1->seg_len, &seg_len_a1)
1895 		  || !poly_int_tree_p (dr_a2->seg_len, &seg_len_a2))
1896 		continue;
1897 
1898 	      tree indicator_a = dr_direction_indicator (dr_a1->dr);
1899 	      if (TREE_CODE (indicator_a) != INTEGER_CST)
1900 		continue;
1901 
1902 	      tree indicator_b = dr_direction_indicator (dr_a2->dr);
1903 	      if (TREE_CODE (indicator_b) != INTEGER_CST)
1904 		continue;
1905 
1906 	      int sign_a = tree_int_cst_sgn (indicator_a);
1907 	      int sign_b = tree_int_cst_sgn (indicator_b);
1908 
1909 	      if (sign_a <= 0 && sign_b <= 0)
1910 		new_seg_len = lower_bound (seg_len_a1, seg_len_a2);
1911 	      else if (sign_a >= 0 && sign_b >= 0)
1912 		new_seg_len = upper_bound (seg_len_a1, seg_len_a2);
1913 	      else
1914 		continue;
1915 	    }
1916 	  /* At this point we're committed to merging the refs.  */
1917 
1918 	  /* Make sure dr_a1 starts left of dr_a2.  */
1919 	  if (maybe_gt (init_a1, init_a2))
1920 	    {
1921 	      std::swap (*dr_a1, *dr_a2);
1922 	      std::swap (init_a1, init_a2);
1923 	    }
1924 
1925 	  /* The DR_Bs are equal, so only the DR_As can introduce
1926 	     mixed steps.  */
1927 	  if (!operand_equal_p (DR_STEP (dr_a1->dr), DR_STEP (dr_a2->dr), 0))
1928 	    alias_pair1->flags |= DR_ALIAS_MIXED_STEPS;
1929 
1930 	  if (new_seg_len_p)
1931 	    {
1932 	      dr_a1->seg_len = build_int_cst (TREE_TYPE (dr_a1->seg_len),
1933 					      new_seg_len);
1934 	      dr_a1->align = MIN (dr_a1->align, known_alignment (new_seg_len));
1935 	    }
1936 
1937 	  /* This is always positive due to the swap above.  */
1938 	  poly_uint64 diff = init_a2 - init_a1;
1939 
1940 	  /* The new check will start at DR_A1.  Make sure that its access
1941 	     size encompasses the initial DR_A2.  */
1942 	  if (maybe_lt (dr_a1->access_size, diff + dr_a2->access_size))
1943 	    {
1944 	      dr_a1->access_size = upper_bound (dr_a1->access_size,
1945 						diff + dr_a2->access_size);
1946 	      unsigned int new_align = known_alignment (dr_a1->access_size);
1947 	      dr_a1->align = MIN (dr_a1->align, new_align);
1948 	    }
1949 	  if (dump_enabled_p ())
1950 	    dump_printf (MSG_NOTE, "merging ranges for %T, %T and %T, %T\n",
1951 			 DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1952 			 DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1953 	  alias_pair1->flags |= alias_pair2->flags;
1954 	  last -= 1;
1955 	}
1956     }
1957   alias_pairs->truncate (last + 1);
1958 
1959   /* Try to restore the original dr_with_seg_len order within each
1960      dr_with_seg_len_pair_t.  If we ended up combining swapped and
1961      unswapped pairs into the same check, we have to invalidate any
1962      RAW, WAR and WAW information for it.  */
1963   if (dump_enabled_p ())
1964     dump_printf (MSG_NOTE, "merged alias checks:\n");
1965   FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
1966     {
1967       unsigned int swap_mask = (DR_ALIAS_SWAPPED | DR_ALIAS_UNSWAPPED);
1968       unsigned int swapped = (alias_pair->flags & swap_mask);
1969       if (swapped == DR_ALIAS_SWAPPED)
1970 	std::swap (alias_pair->first, alias_pair->second);
1971       else if (swapped != DR_ALIAS_UNSWAPPED)
1972 	alias_pair->flags |= DR_ALIAS_ARBITRARY;
1973       alias_pair->flags &= ~swap_mask;
1974       if (dump_enabled_p ())
1975 	dump_alias_pair (alias_pair, "  ");
1976     }
1977 }
1978 
1979 /* A subroutine of create_intersect_range_checks, with a subset of the
1980    same arguments.  Try to use IFN_CHECK_RAW_PTRS and IFN_CHECK_WAR_PTRS
1981    to optimize cases in which the references form a simple RAW, WAR or
1982    WAR dependence.  */
1983 
1984 static bool
create_ifn_alias_checks(tree * cond_expr,const dr_with_seg_len_pair_t & alias_pair)1985 create_ifn_alias_checks (tree *cond_expr,
1986 			 const dr_with_seg_len_pair_t &alias_pair)
1987 {
1988   const dr_with_seg_len& dr_a = alias_pair.first;
1989   const dr_with_seg_len& dr_b = alias_pair.second;
1990 
1991   /* Check for cases in which:
1992 
1993      (a) we have a known RAW, WAR or WAR dependence
1994      (b) the accesses are well-ordered in both the original and new code
1995 	 (see the comment above the DR_ALIAS_* flags for details); and
1996      (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR.  */
1997   if (alias_pair.flags & ~(DR_ALIAS_RAW | DR_ALIAS_WAR | DR_ALIAS_WAW))
1998     return false;
1999 
2000   /* Make sure that both DRs access the same pattern of bytes,
2001      with a constant length and step.  */
2002   poly_uint64 seg_len;
2003   if (!operand_equal_p (dr_a.seg_len, dr_b.seg_len, 0)
2004       || !poly_int_tree_p (dr_a.seg_len, &seg_len)
2005       || maybe_ne (dr_a.access_size, dr_b.access_size)
2006       || !operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0)
2007       || !tree_fits_uhwi_p (DR_STEP (dr_a.dr)))
2008     return false;
2009 
2010   unsigned HOST_WIDE_INT bytes = tree_to_uhwi (DR_STEP (dr_a.dr));
2011   tree addr_a = DR_BASE_ADDRESS (dr_a.dr);
2012   tree addr_b = DR_BASE_ADDRESS (dr_b.dr);
2013 
2014   /* See whether the target suports what we want to do.  WAW checks are
2015      equivalent to WAR checks here.  */
2016   internal_fn ifn = (alias_pair.flags & DR_ALIAS_RAW
2017 		     ? IFN_CHECK_RAW_PTRS
2018 		     : IFN_CHECK_WAR_PTRS);
2019   unsigned int align = MIN (dr_a.align, dr_b.align);
2020   poly_uint64 full_length = seg_len + bytes;
2021   if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
2022 					   full_length, align))
2023     {
2024       full_length = seg_len + dr_a.access_size;
2025       if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
2026 					       full_length, align))
2027 	return false;
2028     }
2029 
2030   /* Commit to using this form of test.  */
2031   addr_a = fold_build_pointer_plus (addr_a, DR_OFFSET (dr_a.dr));
2032   addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
2033 
2034   addr_b = fold_build_pointer_plus (addr_b, DR_OFFSET (dr_b.dr));
2035   addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
2036 
2037   *cond_expr = build_call_expr_internal_loc (UNKNOWN_LOCATION,
2038 					     ifn, boolean_type_node,
2039 					     4, addr_a, addr_b,
2040 					     size_int (full_length),
2041 					     size_int (align));
2042 
2043   if (dump_enabled_p ())
2044     {
2045       if (ifn == IFN_CHECK_RAW_PTRS)
2046 	dump_printf (MSG_NOTE, "using an IFN_CHECK_RAW_PTRS test\n");
2047       else
2048 	dump_printf (MSG_NOTE, "using an IFN_CHECK_WAR_PTRS test\n");
2049     }
2050   return true;
2051 }
2052 
2053 /* Try to generate a runtime condition that is true if ALIAS_PAIR is
2054    free of aliases, using a condition based on index values instead
2055    of a condition based on addresses.  Return true on success,
2056    storing the condition in *COND_EXPR.
2057 
2058    This can only be done if the two data references in ALIAS_PAIR access
2059    the same array object and the index is the only difference.  For example,
2060    if the two data references are DR_A and DR_B:
2061 
2062                        DR_A                           DR_B
2063       data-ref         arr[i]                         arr[j]
2064       base_object      arr                            arr
2065       index            {i_0, +, 1}_loop               {j_0, +, 1}_loop
2066 
2067    The addresses and their index are like:
2068 
2069         |<- ADDR_A    ->|          |<- ADDR_B    ->|
2070      ------------------------------------------------------->
2071         |   |   |   |   |          |   |   |   |   |
2072      ------------------------------------------------------->
2073         i_0 ...         i_0+4      j_0 ...         j_0+4
2074 
2075    We can create expression based on index rather than address:
2076 
2077      (unsigned) (i_0 - j_0 + 3) <= 6
2078 
2079    i.e. the indices are less than 4 apart.
2080 
2081    Note evolution step of index needs to be considered in comparison.  */
2082 
2083 static bool
create_intersect_range_checks_index(class loop * loop,tree * cond_expr,const dr_with_seg_len_pair_t & alias_pair)2084 create_intersect_range_checks_index (class loop *loop, tree *cond_expr,
2085 				     const dr_with_seg_len_pair_t &alias_pair)
2086 {
2087   const dr_with_seg_len &dr_a = alias_pair.first;
2088   const dr_with_seg_len &dr_b = alias_pair.second;
2089   if ((alias_pair.flags & DR_ALIAS_MIXED_STEPS)
2090       || integer_zerop (DR_STEP (dr_a.dr))
2091       || integer_zerop (DR_STEP (dr_b.dr))
2092       || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
2093     return false;
2094 
2095   poly_uint64 seg_len1, seg_len2;
2096   if (!poly_int_tree_p (dr_a.seg_len, &seg_len1)
2097       || !poly_int_tree_p (dr_b.seg_len, &seg_len2))
2098     return false;
2099 
2100   if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
2101     return false;
2102 
2103   if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
2104     return false;
2105 
2106   if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
2107     return false;
2108 
2109   gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
2110 
2111   bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
2112   unsigned HOST_WIDE_INT abs_step = tree_to_shwi (DR_STEP (dr_a.dr));
2113   if (neg_step)
2114     {
2115       abs_step = -abs_step;
2116       seg_len1 = (-wi::to_poly_wide (dr_a.seg_len)).force_uhwi ();
2117       seg_len2 = (-wi::to_poly_wide (dr_b.seg_len)).force_uhwi ();
2118     }
2119 
2120   /* Infer the number of iterations with which the memory segment is accessed
2121      by DR.  In other words, alias is checked if memory segment accessed by
2122      DR_A in some iterations intersect with memory segment accessed by DR_B
2123      in the same amount iterations.
2124      Note segnment length is a linear function of number of iterations with
2125      DR_STEP as the coefficient.  */
2126   poly_uint64 niter_len1, niter_len2;
2127   if (!can_div_trunc_p (seg_len1 + abs_step - 1, abs_step, &niter_len1)
2128       || !can_div_trunc_p (seg_len2 + abs_step - 1, abs_step, &niter_len2))
2129     return false;
2130 
2131   /* Divide each access size by the byte step, rounding up.  */
2132   poly_uint64 niter_access1, niter_access2;
2133   if (!can_div_trunc_p (dr_a.access_size + abs_step - 1,
2134 			abs_step, &niter_access1)
2135       || !can_div_trunc_p (dr_b.access_size + abs_step - 1,
2136 			   abs_step, &niter_access2))
2137     return false;
2138 
2139   bool waw_or_war_p = (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW)) == 0;
2140 
2141   int found = -1;
2142   for (unsigned int i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
2143     {
2144       tree access1 = DR_ACCESS_FN (dr_a.dr, i);
2145       tree access2 = DR_ACCESS_FN (dr_b.dr, i);
2146       /* Two indices must be the same if they are not scev, or not scev wrto
2147 	 current loop being vecorized.  */
2148       if (TREE_CODE (access1) != POLYNOMIAL_CHREC
2149 	  || TREE_CODE (access2) != POLYNOMIAL_CHREC
2150 	  || CHREC_VARIABLE (access1) != (unsigned)loop->num
2151 	  || CHREC_VARIABLE (access2) != (unsigned)loop->num)
2152 	{
2153 	  if (operand_equal_p (access1, access2, 0))
2154 	    continue;
2155 
2156 	  return false;
2157 	}
2158       if (found >= 0)
2159 	return false;
2160       found = i;
2161     }
2162 
2163   /* Ought not to happen in practice, since if all accesses are equal then the
2164      alias should be decidable at compile time.  */
2165   if (found < 0)
2166     return false;
2167 
2168   /* The two indices must have the same step.  */
2169   tree access1 = DR_ACCESS_FN (dr_a.dr, found);
2170   tree access2 = DR_ACCESS_FN (dr_b.dr, found);
2171   if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
2172     return false;
2173 
2174   tree idx_step = CHREC_RIGHT (access1);
2175   /* Index must have const step, otherwise DR_STEP won't be constant.  */
2176   gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
2177   /* Index must evaluate in the same direction as DR.  */
2178   gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
2179 
2180   tree min1 = CHREC_LEFT (access1);
2181   tree min2 = CHREC_LEFT (access2);
2182   if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
2183     return false;
2184 
2185   /* Ideally, alias can be checked against loop's control IV, but we
2186      need to prove linear mapping between control IV and reference
2187      index.  Although that should be true, we check against (array)
2188      index of data reference.  Like segment length, index length is
2189      linear function of the number of iterations with index_step as
2190      the coefficient, i.e, niter_len * idx_step.  */
2191   offset_int abs_idx_step = offset_int::from (wi::to_wide (idx_step),
2192 					      SIGNED);
2193   if (neg_step)
2194     abs_idx_step = -abs_idx_step;
2195   poly_offset_int idx_len1 = abs_idx_step * niter_len1;
2196   poly_offset_int idx_len2 = abs_idx_step * niter_len2;
2197   poly_offset_int idx_access1 = abs_idx_step * niter_access1;
2198   poly_offset_int idx_access2 = abs_idx_step * niter_access2;
2199 
2200   gcc_assert (known_ge (idx_len1, 0)
2201 	      && known_ge (idx_len2, 0)
2202 	      && known_ge (idx_access1, 0)
2203 	      && known_ge (idx_access2, 0));
2204 
2205   /* Each access has the following pattern, with lengths measured
2206      in units of INDEX:
2207 
2208 	  <-- idx_len -->
2209 	  <--- A: -ve step --->
2210 	  +-----+-------+-----+-------+-----+
2211 	  | n-1 | ..... |  0  | ..... | n-1 |
2212 	  +-----+-------+-----+-------+-----+
2213 			<--- B: +ve step --->
2214 			<-- idx_len -->
2215 			|
2216 		       min
2217 
2218      where "n" is the number of scalar iterations covered by the segment
2219      and where each access spans idx_access units.
2220 
2221      A is the range of bytes accessed when the step is negative,
2222      B is the range when the step is positive.
2223 
2224      When checking for general overlap, we need to test whether
2225      the range:
2226 
2227        [min1 + low_offset1, min1 + high_offset1 + idx_access1 - 1]
2228 
2229      overlaps:
2230 
2231        [min2 + low_offset2, min2 + high_offset2 + idx_access2 - 1]
2232 
2233      where:
2234 
2235 	low_offsetN = +ve step ? 0 : -idx_lenN;
2236        high_offsetN = +ve step ? idx_lenN : 0;
2237 
2238      This is equivalent to testing whether:
2239 
2240        min1 + low_offset1 <= min2 + high_offset2 + idx_access2 - 1
2241        && min2 + low_offset2 <= min1 + high_offset1 + idx_access1 - 1
2242 
2243      Converting this into a single test, there is an overlap if:
2244 
2245        0 <= min2 - min1 + bias <= limit
2246 
2247      where  bias = high_offset2 + idx_access2 - 1 - low_offset1
2248 	   limit = (high_offset1 - low_offset1 + idx_access1 - 1)
2249 		 + (high_offset2 - low_offset2 + idx_access2 - 1)
2250       i.e. limit = idx_len1 + idx_access1 - 1 + idx_len2 + idx_access2 - 1
2251 
2252      Combining the tests requires limit to be computable in an unsigned
2253      form of the index type; if it isn't, we fall back to the usual
2254      pointer-based checks.
2255 
2256      We can do better if DR_B is a write and if DR_A and DR_B are
2257      well-ordered in both the original and the new code (see the
2258      comment above the DR_ALIAS_* flags for details).  In this case
2259      we know that for each i in [0, n-1], the write performed by
2260      access i of DR_B occurs after access numbers j<=i of DR_A in
2261      both the original and the new code.  Any write or anti
2262      dependencies wrt those DR_A accesses are therefore maintained.
2263 
2264      We just need to make sure that each individual write in DR_B does not
2265      overlap any higher-indexed access in DR_A; such DR_A accesses happen
2266      after the DR_B access in the original code but happen before it in
2267      the new code.
2268 
2269      We know the steps for both accesses are equal, so by induction, we
2270      just need to test whether the first write of DR_B overlaps a later
2271      access of DR_A.  In other words, we need to move min1 along by
2272      one iteration:
2273 
2274        min1' = min1 + idx_step
2275 
2276      and use the ranges:
2277 
2278        [min1' + low_offset1', min1' + high_offset1' + idx_access1 - 1]
2279 
2280      and:
2281 
2282        [min2, min2 + idx_access2 - 1]
2283 
2284      where:
2285 
2286 	low_offset1' = +ve step ? 0 : -(idx_len1 - |idx_step|)
2287        high_offset1' = +ve_step ? idx_len1 - |idx_step| : 0.  */
2288   if (waw_or_war_p)
2289     idx_len1 -= abs_idx_step;
2290 
2291   poly_offset_int limit = idx_len1 + idx_access1 - 1 + idx_access2 - 1;
2292   if (!waw_or_war_p)
2293     limit += idx_len2;
2294 
2295   tree utype = unsigned_type_for (TREE_TYPE (min1));
2296   if (!wi::fits_to_tree_p (limit, utype))
2297     return false;
2298 
2299   poly_offset_int low_offset1 = neg_step ? -idx_len1 : 0;
2300   poly_offset_int high_offset2 = neg_step || waw_or_war_p ? 0 : idx_len2;
2301   poly_offset_int bias = high_offset2 + idx_access2 - 1 - low_offset1;
2302   /* Equivalent to adding IDX_STEP to MIN1.  */
2303   if (waw_or_war_p)
2304     bias -= wi::to_offset (idx_step);
2305 
2306   tree subject = fold_build2 (MINUS_EXPR, utype,
2307 			      fold_convert (utype, min2),
2308 			      fold_convert (utype, min1));
2309   subject = fold_build2 (PLUS_EXPR, utype, subject,
2310 			 wide_int_to_tree (utype, bias));
2311   tree part_cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject,
2312 				     wide_int_to_tree (utype, limit));
2313   if (*cond_expr)
2314     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2315 			      *cond_expr, part_cond_expr);
2316   else
2317     *cond_expr = part_cond_expr;
2318   if (dump_enabled_p ())
2319     {
2320       if (waw_or_war_p)
2321 	dump_printf (MSG_NOTE, "using an index-based WAR/WAW test\n");
2322       else
2323 	dump_printf (MSG_NOTE, "using an index-based overlap test\n");
2324     }
2325   return true;
2326 }
2327 
2328 /* A subroutine of create_intersect_range_checks, with a subset of the
2329    same arguments.  Try to optimize cases in which the second access
2330    is a write and in which some overlap is valid.  */
2331 
2332 static bool
create_waw_or_war_checks(tree * cond_expr,const dr_with_seg_len_pair_t & alias_pair)2333 create_waw_or_war_checks (tree *cond_expr,
2334 			  const dr_with_seg_len_pair_t &alias_pair)
2335 {
2336   const dr_with_seg_len& dr_a = alias_pair.first;
2337   const dr_with_seg_len& dr_b = alias_pair.second;
2338 
2339   /* Check for cases in which:
2340 
2341      (a) DR_B is always a write;
2342      (b) the accesses are well-ordered in both the original and new code
2343 	 (see the comment above the DR_ALIAS_* flags for details); and
2344      (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR.  */
2345   if (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW))
2346     return false;
2347 
2348   /* Check for equal (but possibly variable) steps.  */
2349   tree step = DR_STEP (dr_a.dr);
2350   if (!operand_equal_p (step, DR_STEP (dr_b.dr)))
2351     return false;
2352 
2353   /* Make sure that we can operate on sizetype without loss of precision.  */
2354   tree addr_type = TREE_TYPE (DR_BASE_ADDRESS (dr_a.dr));
2355   if (TYPE_PRECISION (addr_type) != TYPE_PRECISION (sizetype))
2356     return false;
2357 
2358   /* All addresses involved are known to have a common alignment ALIGN.
2359      We can therefore subtract ALIGN from an exclusive endpoint to get
2360      an inclusive endpoint.  In the best (and common) case, ALIGN is the
2361      same as the access sizes of both DRs, and so subtracting ALIGN
2362      cancels out the addition of an access size.  */
2363   unsigned int align = MIN (dr_a.align, dr_b.align);
2364   poly_uint64 last_chunk_a = dr_a.access_size - align;
2365   poly_uint64 last_chunk_b = dr_b.access_size - align;
2366 
2367   /* Get a boolean expression that is true when the step is negative.  */
2368   tree indicator = dr_direction_indicator (dr_a.dr);
2369   tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
2370 			       fold_convert (ssizetype, indicator),
2371 			       ssize_int (0));
2372 
2373   /* Get lengths in sizetype.  */
2374   tree seg_len_a
2375     = fold_convert (sizetype, rewrite_to_non_trapping_overflow (dr_a.seg_len));
2376   step = fold_convert (sizetype, rewrite_to_non_trapping_overflow (step));
2377 
2378   /* Each access has the following pattern:
2379 
2380 	  <- |seg_len| ->
2381 	  <--- A: -ve step --->
2382 	  +-----+-------+-----+-------+-----+
2383 	  | n-1 | ..... |  0  | ..... | n-1 |
2384 	  +-----+-------+-----+-------+-----+
2385 			<--- B: +ve step --->
2386 			<- |seg_len| ->
2387 			|
2388 		   base address
2389 
2390      where "n" is the number of scalar iterations covered by the segment.
2391 
2392      A is the range of bytes accessed when the step is negative,
2393      B is the range when the step is positive.
2394 
2395      We know that DR_B is a write.  We also know (from checking that
2396      DR_A and DR_B are well-ordered) that for each i in [0, n-1],
2397      the write performed by access i of DR_B occurs after access numbers
2398      j<=i of DR_A in both the original and the new code.  Any write or
2399      anti dependencies wrt those DR_A accesses are therefore maintained.
2400 
2401      We just need to make sure that each individual write in DR_B does not
2402      overlap any higher-indexed access in DR_A; such DR_A accesses happen
2403      after the DR_B access in the original code but happen before it in
2404      the new code.
2405 
2406      We know the steps for both accesses are equal, so by induction, we
2407      just need to test whether the first write of DR_B overlaps a later
2408      access of DR_A.  In other words, we need to move addr_a along by
2409      one iteration:
2410 
2411        addr_a' = addr_a + step
2412 
2413      and check whether:
2414 
2415        [addr_b, addr_b + last_chunk_b]
2416 
2417      overlaps:
2418 
2419        [addr_a' + low_offset_a, addr_a' + high_offset_a + last_chunk_a]
2420 
2421      where [low_offset_a, high_offset_a] spans accesses [1, n-1].  I.e.:
2422 
2423 	low_offset_a = +ve step ? 0 : seg_len_a - step
2424        high_offset_a = +ve step ? seg_len_a - step : 0
2425 
2426      This is equivalent to testing whether:
2427 
2428        addr_a' + low_offset_a <= addr_b + last_chunk_b
2429        && addr_b <= addr_a' + high_offset_a + last_chunk_a
2430 
2431      Converting this into a single test, there is an overlap if:
2432 
2433        0 <= addr_b + last_chunk_b - addr_a' - low_offset_a <= limit
2434 
2435      where limit = high_offset_a - low_offset_a + last_chunk_a + last_chunk_b
2436 
2437      If DR_A is performed, limit + |step| - last_chunk_b is known to be
2438      less than the size of the object underlying DR_A.  We also know
2439      that last_chunk_b <= |step|; this is checked elsewhere if it isn't
2440      guaranteed at compile time.  There can therefore be no overflow if
2441      "limit" is calculated in an unsigned type with pointer precision.  */
2442   tree addr_a = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_a.dr),
2443 					 DR_OFFSET (dr_a.dr));
2444   addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
2445 
2446   tree addr_b = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_b.dr),
2447 					 DR_OFFSET (dr_b.dr));
2448   addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
2449 
2450   /* Advance ADDR_A by one iteration and adjust the length to compensate.  */
2451   addr_a = fold_build_pointer_plus (addr_a, step);
2452   tree seg_len_a_minus_step = fold_build2 (MINUS_EXPR, sizetype,
2453 					   seg_len_a, step);
2454   if (!CONSTANT_CLASS_P (seg_len_a_minus_step))
2455     seg_len_a_minus_step = build1 (SAVE_EXPR, sizetype, seg_len_a_minus_step);
2456 
2457   tree low_offset_a = fold_build3 (COND_EXPR, sizetype, neg_step,
2458 				   seg_len_a_minus_step, size_zero_node);
2459   if (!CONSTANT_CLASS_P (low_offset_a))
2460     low_offset_a = build1 (SAVE_EXPR, sizetype, low_offset_a);
2461 
2462   /* We could use COND_EXPR <neg_step, size_zero_node, seg_len_a_minus_step>,
2463      but it's usually more efficient to reuse the LOW_OFFSET_A result.  */
2464   tree high_offset_a = fold_build2 (MINUS_EXPR, sizetype, seg_len_a_minus_step,
2465 				    low_offset_a);
2466 
2467   /* The amount added to addr_b - addr_a'.  */
2468   tree bias = fold_build2 (MINUS_EXPR, sizetype,
2469 			   size_int (last_chunk_b), low_offset_a);
2470 
2471   tree limit = fold_build2 (MINUS_EXPR, sizetype, high_offset_a, low_offset_a);
2472   limit = fold_build2 (PLUS_EXPR, sizetype, limit,
2473 		       size_int (last_chunk_a + last_chunk_b));
2474 
2475   tree subject = fold_build2 (POINTER_DIFF_EXPR, ssizetype, addr_b, addr_a);
2476   subject = fold_build2 (PLUS_EXPR, sizetype,
2477 			 fold_convert (sizetype, subject), bias);
2478 
2479   *cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject, limit);
2480   if (dump_enabled_p ())
2481     dump_printf (MSG_NOTE, "using an address-based WAR/WAW test\n");
2482   return true;
2483 }
2484 
2485 /* If ALIGN is nonzero, set up *SEQ_MIN_OUT and *SEQ_MAX_OUT so that for
2486    every address ADDR accessed by D:
2487 
2488      *SEQ_MIN_OUT <= ADDR (== ADDR & -ALIGN) <= *SEQ_MAX_OUT
2489 
2490    In this case, every element accessed by D is aligned to at least
2491    ALIGN bytes.
2492 
2493    If ALIGN is zero then instead set *SEG_MAX_OUT so that:
2494 
2495      *SEQ_MIN_OUT <= ADDR < *SEQ_MAX_OUT.  */
2496 
2497 static void
get_segment_min_max(const dr_with_seg_len & d,tree * seg_min_out,tree * seg_max_out,HOST_WIDE_INT align)2498 get_segment_min_max (const dr_with_seg_len &d, tree *seg_min_out,
2499 		     tree *seg_max_out, HOST_WIDE_INT align)
2500 {
2501   /* Each access has the following pattern:
2502 
2503 	  <- |seg_len| ->
2504 	  <--- A: -ve step --->
2505 	  +-----+-------+-----+-------+-----+
2506 	  | n-1 | ,.... |  0  | ..... | n-1 |
2507 	  +-----+-------+-----+-------+-----+
2508 			<--- B: +ve step --->
2509 			<- |seg_len| ->
2510 			|
2511 		   base address
2512 
2513      where "n" is the number of scalar iterations covered by the segment.
2514      (This should be VF for a particular pair if we know that both steps
2515      are the same, otherwise it will be the full number of scalar loop
2516      iterations.)
2517 
2518      A is the range of bytes accessed when the step is negative,
2519      B is the range when the step is positive.
2520 
2521      If the access size is "access_size" bytes, the lowest addressed byte is:
2522 
2523 	 base + (step < 0 ? seg_len : 0)   [LB]
2524 
2525      and the highest addressed byte is always below:
2526 
2527 	 base + (step < 0 ? 0 : seg_len) + access_size   [UB]
2528 
2529      Thus:
2530 
2531 	 LB <= ADDR < UB
2532 
2533      If ALIGN is nonzero, all three values are aligned to at least ALIGN
2534      bytes, so:
2535 
2536 	 LB <= ADDR <= UB - ALIGN
2537 
2538      where "- ALIGN" folds naturally with the "+ access_size" and often
2539      cancels it out.
2540 
2541      We don't try to simplify LB and UB beyond this (e.g. by using
2542      MIN and MAX based on whether seg_len rather than the stride is
2543      negative) because it is possible for the absolute size of the
2544      segment to overflow the range of a ssize_t.
2545 
2546      Keeping the pointer_plus outside of the cond_expr should allow
2547      the cond_exprs to be shared with other alias checks.  */
2548   tree indicator = dr_direction_indicator (d.dr);
2549   tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
2550 			       fold_convert (ssizetype, indicator),
2551 			       ssize_int (0));
2552   tree addr_base = fold_build_pointer_plus (DR_BASE_ADDRESS (d.dr),
2553 					    DR_OFFSET (d.dr));
2554   addr_base = fold_build_pointer_plus (addr_base, DR_INIT (d.dr));
2555   tree seg_len
2556     = fold_convert (sizetype, rewrite_to_non_trapping_overflow (d.seg_len));
2557 
2558   tree min_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
2559 				seg_len, size_zero_node);
2560   tree max_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
2561 				size_zero_node, seg_len);
2562   max_reach = fold_build2 (PLUS_EXPR, sizetype, max_reach,
2563 			   size_int (d.access_size - align));
2564 
2565   *seg_min_out = fold_build_pointer_plus (addr_base, min_reach);
2566   *seg_max_out = fold_build_pointer_plus (addr_base, max_reach);
2567 }
2568 
2569 /* Generate a runtime condition that is true if ALIAS_PAIR is free of aliases,
2570    storing the condition in *COND_EXPR.  The fallback is to generate a
2571    a test that the two accesses do not overlap:
2572 
2573      end_a <= start_b || end_b <= start_a.  */
2574 
2575 static void
create_intersect_range_checks(class loop * loop,tree * cond_expr,const dr_with_seg_len_pair_t & alias_pair)2576 create_intersect_range_checks (class loop *loop, tree *cond_expr,
2577 			       const dr_with_seg_len_pair_t &alias_pair)
2578 {
2579   const dr_with_seg_len& dr_a = alias_pair.first;
2580   const dr_with_seg_len& dr_b = alias_pair.second;
2581   *cond_expr = NULL_TREE;
2582   if (create_intersect_range_checks_index (loop, cond_expr, alias_pair))
2583     return;
2584 
2585   if (create_ifn_alias_checks (cond_expr, alias_pair))
2586     return;
2587 
2588   if (create_waw_or_war_checks (cond_expr, alias_pair))
2589     return;
2590 
2591   unsigned HOST_WIDE_INT min_align;
2592   tree_code cmp_code;
2593   /* We don't have to check DR_ALIAS_MIXED_STEPS here, since both versions
2594      are equivalent.  This is just an optimization heuristic.  */
2595   if (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST
2596       && TREE_CODE (DR_STEP (dr_b.dr)) == INTEGER_CST)
2597     {
2598       /* In this case adding access_size to seg_len is likely to give
2599 	 a simple X * step, where X is either the number of scalar
2600 	 iterations or the vectorization factor.  We're better off
2601 	 keeping that, rather than subtracting an alignment from it.
2602 
2603 	 In this case the maximum values are exclusive and so there is
2604 	 no alias if the maximum of one segment equals the minimum
2605 	 of another.  */
2606       min_align = 0;
2607       cmp_code = LE_EXPR;
2608     }
2609   else
2610     {
2611       /* Calculate the minimum alignment shared by all four pointers,
2612 	 then arrange for this alignment to be subtracted from the
2613 	 exclusive maximum values to get inclusive maximum values.
2614 	 This "- min_align" is cumulative with a "+ access_size"
2615 	 in the calculation of the maximum values.  In the best
2616 	 (and common) case, the two cancel each other out, leaving
2617 	 us with an inclusive bound based only on seg_len.  In the
2618 	 worst case we're simply adding a smaller number than before.
2619 
2620 	 Because the maximum values are inclusive, there is an alias
2621 	 if the maximum value of one segment is equal to the minimum
2622 	 value of the other.  */
2623       min_align = MIN (dr_a.align, dr_b.align);
2624       cmp_code = LT_EXPR;
2625     }
2626 
2627   tree seg_a_min, seg_a_max, seg_b_min, seg_b_max;
2628   get_segment_min_max (dr_a, &seg_a_min, &seg_a_max, min_align);
2629   get_segment_min_max (dr_b, &seg_b_min, &seg_b_max, min_align);
2630 
2631   *cond_expr
2632     = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2633 	fold_build2 (cmp_code, boolean_type_node, seg_a_max, seg_b_min),
2634 	fold_build2 (cmp_code, boolean_type_node, seg_b_max, seg_a_min));
2635   if (dump_enabled_p ())
2636     dump_printf (MSG_NOTE, "using an address-based overlap test\n");
2637 }
2638 
2639 /* Create a conditional expression that represents the run-time checks for
2640    overlapping of address ranges represented by a list of data references
2641    pairs passed in ALIAS_PAIRS.  Data references are in LOOP.  The returned
2642    COND_EXPR is the conditional expression to be used in the if statement
2643    that controls which version of the loop gets executed at runtime.  */
2644 
2645 void
create_runtime_alias_checks(class loop * loop,const vec<dr_with_seg_len_pair_t> * alias_pairs,tree * cond_expr)2646 create_runtime_alias_checks (class loop *loop,
2647 			     const vec<dr_with_seg_len_pair_t> *alias_pairs,
2648 			     tree * cond_expr)
2649 {
2650   tree part_cond_expr;
2651 
2652   fold_defer_overflow_warnings ();
2653   for (const dr_with_seg_len_pair_t &alias_pair : alias_pairs)
2654     {
2655       gcc_assert (alias_pair.flags);
2656       if (dump_enabled_p ())
2657 	dump_printf (MSG_NOTE,
2658 		     "create runtime check for data references %T and %T\n",
2659 		     DR_REF (alias_pair.first.dr),
2660 		     DR_REF (alias_pair.second.dr));
2661 
2662       /* Create condition expression for each pair data references.  */
2663       create_intersect_range_checks (loop, &part_cond_expr, alias_pair);
2664       if (*cond_expr)
2665 	*cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2666 				  *cond_expr, part_cond_expr);
2667       else
2668 	*cond_expr = part_cond_expr;
2669     }
2670   fold_undefer_and_ignore_overflow_warnings ();
2671 }
2672 
2673 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
2674    expressions.  */
2675 static bool
dr_equal_offsets_p1(tree offset1,tree offset2)2676 dr_equal_offsets_p1 (tree offset1, tree offset2)
2677 {
2678   bool res;
2679 
2680   STRIP_NOPS (offset1);
2681   STRIP_NOPS (offset2);
2682 
2683   if (offset1 == offset2)
2684     return true;
2685 
2686   if (TREE_CODE (offset1) != TREE_CODE (offset2)
2687       || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
2688     return false;
2689 
2690   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
2691                              TREE_OPERAND (offset2, 0));
2692 
2693   if (!res || !BINARY_CLASS_P (offset1))
2694     return res;
2695 
2696   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
2697                              TREE_OPERAND (offset2, 1));
2698 
2699   return res;
2700 }
2701 
2702 /* Check if DRA and DRB have equal offsets.  */
2703 bool
dr_equal_offsets_p(struct data_reference * dra,struct data_reference * drb)2704 dr_equal_offsets_p (struct data_reference *dra,
2705                     struct data_reference *drb)
2706 {
2707   tree offset1, offset2;
2708 
2709   offset1 = DR_OFFSET (dra);
2710   offset2 = DR_OFFSET (drb);
2711 
2712   return dr_equal_offsets_p1 (offset1, offset2);
2713 }
2714 
2715 /* Returns true if FNA == FNB.  */
2716 
2717 static bool
affine_function_equal_p(affine_fn fna,affine_fn fnb)2718 affine_function_equal_p (affine_fn fna, affine_fn fnb)
2719 {
2720   unsigned i, n = fna.length ();
2721 
2722   if (n != fnb.length ())
2723     return false;
2724 
2725   for (i = 0; i < n; i++)
2726     if (!operand_equal_p (fna[i], fnb[i], 0))
2727       return false;
2728 
2729   return true;
2730 }
2731 
2732 /* If all the functions in CF are the same, returns one of them,
2733    otherwise returns NULL.  */
2734 
2735 static affine_fn
common_affine_function(conflict_function * cf)2736 common_affine_function (conflict_function *cf)
2737 {
2738   unsigned i;
2739   affine_fn comm;
2740 
2741   if (!CF_NONTRIVIAL_P (cf))
2742     return affine_fn ();
2743 
2744   comm = cf->fns[0];
2745 
2746   for (i = 1; i < cf->n; i++)
2747     if (!affine_function_equal_p (comm, cf->fns[i]))
2748       return affine_fn ();
2749 
2750   return comm;
2751 }
2752 
2753 /* Returns the base of the affine function FN.  */
2754 
2755 static tree
affine_function_base(affine_fn fn)2756 affine_function_base (affine_fn fn)
2757 {
2758   return fn[0];
2759 }
2760 
2761 /* Returns true if FN is a constant.  */
2762 
2763 static bool
affine_function_constant_p(affine_fn fn)2764 affine_function_constant_p (affine_fn fn)
2765 {
2766   unsigned i;
2767   tree coef;
2768 
2769   for (i = 1; fn.iterate (i, &coef); i++)
2770     if (!integer_zerop (coef))
2771       return false;
2772 
2773   return true;
2774 }
2775 
2776 /* Returns true if FN is the zero constant function.  */
2777 
2778 static bool
affine_function_zero_p(affine_fn fn)2779 affine_function_zero_p (affine_fn fn)
2780 {
2781   return (integer_zerop (affine_function_base (fn))
2782 	  && affine_function_constant_p (fn));
2783 }
2784 
2785 /* Returns a signed integer type with the largest precision from TA
2786    and TB.  */
2787 
2788 static tree
signed_type_for_types(tree ta,tree tb)2789 signed_type_for_types (tree ta, tree tb)
2790 {
2791   if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
2792     return signed_type_for (ta);
2793   else
2794     return signed_type_for (tb);
2795 }
2796 
2797 /* Applies operation OP on affine functions FNA and FNB, and returns the
2798    result.  */
2799 
2800 static affine_fn
affine_fn_op(enum tree_code op,affine_fn fna,affine_fn fnb)2801 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
2802 {
2803   unsigned i, n, m;
2804   affine_fn ret;
2805   tree coef;
2806 
2807   if (fnb.length () > fna.length ())
2808     {
2809       n = fna.length ();
2810       m = fnb.length ();
2811     }
2812   else
2813     {
2814       n = fnb.length ();
2815       m = fna.length ();
2816     }
2817 
2818   ret.create (m);
2819   for (i = 0; i < n; i++)
2820     {
2821       tree type = signed_type_for_types (TREE_TYPE (fna[i]),
2822 					 TREE_TYPE (fnb[i]));
2823       ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
2824     }
2825 
2826   for (; fna.iterate (i, &coef); i++)
2827     ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2828 				 coef, integer_zero_node));
2829   for (; fnb.iterate (i, &coef); i++)
2830     ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2831 				 integer_zero_node, coef));
2832 
2833   return ret;
2834 }
2835 
2836 /* Returns the sum of affine functions FNA and FNB.  */
2837 
2838 static affine_fn
affine_fn_plus(affine_fn fna,affine_fn fnb)2839 affine_fn_plus (affine_fn fna, affine_fn fnb)
2840 {
2841   return affine_fn_op (PLUS_EXPR, fna, fnb);
2842 }
2843 
2844 /* Returns the difference of affine functions FNA and FNB.  */
2845 
2846 static affine_fn
affine_fn_minus(affine_fn fna,affine_fn fnb)2847 affine_fn_minus (affine_fn fna, affine_fn fnb)
2848 {
2849   return affine_fn_op (MINUS_EXPR, fna, fnb);
2850 }
2851 
2852 /* Frees affine function FN.  */
2853 
2854 static void
affine_fn_free(affine_fn fn)2855 affine_fn_free (affine_fn fn)
2856 {
2857   fn.release ();
2858 }
2859 
2860 /* Determine for each subscript in the data dependence relation DDR
2861    the distance.  */
2862 
2863 static void
compute_subscript_distance(struct data_dependence_relation * ddr)2864 compute_subscript_distance (struct data_dependence_relation *ddr)
2865 {
2866   conflict_function *cf_a, *cf_b;
2867   affine_fn fn_a, fn_b, diff;
2868 
2869   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
2870     {
2871       unsigned int i;
2872 
2873       for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2874  	{
2875  	  struct subscript *subscript;
2876 
2877  	  subscript = DDR_SUBSCRIPT (ddr, i);
2878  	  cf_a = SUB_CONFLICTS_IN_A (subscript);
2879  	  cf_b = SUB_CONFLICTS_IN_B (subscript);
2880 
2881 	  fn_a = common_affine_function (cf_a);
2882 	  fn_b = common_affine_function (cf_b);
2883 	  if (!fn_a.exists () || !fn_b.exists ())
2884 	    {
2885 	      SUB_DISTANCE (subscript) = chrec_dont_know;
2886 	      return;
2887 	    }
2888 	  diff = affine_fn_minus (fn_a, fn_b);
2889 
2890  	  if (affine_function_constant_p (diff))
2891  	    SUB_DISTANCE (subscript) = affine_function_base (diff);
2892  	  else
2893  	    SUB_DISTANCE (subscript) = chrec_dont_know;
2894 
2895 	  affine_fn_free (diff);
2896  	}
2897     }
2898 }
2899 
2900 /* Returns the conflict function for "unknown".  */
2901 
2902 static conflict_function *
conflict_fn_not_known(void)2903 conflict_fn_not_known (void)
2904 {
2905   conflict_function *fn = XCNEW (conflict_function);
2906   fn->n = NOT_KNOWN;
2907 
2908   return fn;
2909 }
2910 
2911 /* Returns the conflict function for "independent".  */
2912 
2913 static conflict_function *
conflict_fn_no_dependence(void)2914 conflict_fn_no_dependence (void)
2915 {
2916   conflict_function *fn = XCNEW (conflict_function);
2917   fn->n = NO_DEPENDENCE;
2918 
2919   return fn;
2920 }
2921 
2922 /* Returns true if the address of OBJ is invariant in LOOP.  */
2923 
2924 static bool
object_address_invariant_in_loop_p(const class loop * loop,const_tree obj)2925 object_address_invariant_in_loop_p (const class loop *loop, const_tree obj)
2926 {
2927   while (handled_component_p (obj))
2928     {
2929       if (TREE_CODE (obj) == ARRAY_REF)
2930 	{
2931 	  for (int i = 1; i < 4; ++i)
2932 	    if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, i),
2933 							loop->num))
2934 	      return false;
2935 	}
2936       else if (TREE_CODE (obj) == COMPONENT_REF)
2937 	{
2938 	  if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
2939 						      loop->num))
2940 	    return false;
2941 	}
2942       obj = TREE_OPERAND (obj, 0);
2943     }
2944 
2945   if (!INDIRECT_REF_P (obj)
2946       && TREE_CODE (obj) != MEM_REF)
2947     return true;
2948 
2949   return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
2950 						  loop->num);
2951 }
2952 
2953 /* Returns false if we can prove that data references A and B do not alias,
2954    true otherwise.  If LOOP_NEST is false no cross-iteration aliases are
2955    considered.  */
2956 
2957 bool
dr_may_alias_p(const struct data_reference * a,const struct data_reference * b,class loop * loop_nest)2958 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
2959 		class loop *loop_nest)
2960 {
2961   tree addr_a = DR_BASE_OBJECT (a);
2962   tree addr_b = DR_BASE_OBJECT (b);
2963 
2964   /* If we are not processing a loop nest but scalar code we
2965      do not need to care about possible cross-iteration dependences
2966      and thus can process the full original reference.  Do so,
2967      similar to how loop invariant motion applies extra offset-based
2968      disambiguation.  */
2969   if (!loop_nest)
2970     {
2971       aff_tree off1, off2;
2972       poly_widest_int size1, size2;
2973       get_inner_reference_aff (DR_REF (a), &off1, &size1);
2974       get_inner_reference_aff (DR_REF (b), &off2, &size2);
2975       aff_combination_scale (&off1, -1);
2976       aff_combination_add (&off2, &off1);
2977       if (aff_comb_cannot_overlap_p (&off2, size1, size2))
2978 	return false;
2979     }
2980 
2981   if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF)
2982       && (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF)
2983       /* For cross-iteration dependences the cliques must be valid for the
2984 	 whole loop, not just individual iterations.  */
2985       && (!loop_nest
2986 	  || MR_DEPENDENCE_CLIQUE (addr_a) == 1
2987 	  || MR_DEPENDENCE_CLIQUE (addr_a) == loop_nest->owned_clique)
2988       && MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b)
2989       && MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b))
2990     return false;
2991 
2992   /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
2993      do not know the size of the base-object.  So we cannot do any
2994      offset/overlap based analysis but have to rely on points-to
2995      information only.  */
2996   if (TREE_CODE (addr_a) == MEM_REF
2997       && (DR_UNCONSTRAINED_BASE (a)
2998 	  || TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME))
2999     {
3000       /* For true dependences we can apply TBAA.  */
3001       if (flag_strict_aliasing
3002 	  && DR_IS_WRITE (a) && DR_IS_READ (b)
3003 	  && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
3004 				     get_alias_set (DR_REF (b))))
3005 	return false;
3006       if (TREE_CODE (addr_b) == MEM_REF)
3007 	return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3008 				       TREE_OPERAND (addr_b, 0));
3009       else
3010 	return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3011 				       build_fold_addr_expr (addr_b));
3012     }
3013   else if (TREE_CODE (addr_b) == MEM_REF
3014 	   && (DR_UNCONSTRAINED_BASE (b)
3015 	       || TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME))
3016     {
3017       /* For true dependences we can apply TBAA.  */
3018       if (flag_strict_aliasing
3019 	  && DR_IS_WRITE (a) && DR_IS_READ (b)
3020 	  && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
3021 				     get_alias_set (DR_REF (b))))
3022 	return false;
3023       if (TREE_CODE (addr_a) == MEM_REF)
3024 	return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3025 				       TREE_OPERAND (addr_b, 0));
3026       else
3027 	return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
3028 				       TREE_OPERAND (addr_b, 0));
3029     }
3030 
3031   /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
3032      that is being subsetted in the loop nest.  */
3033   if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
3034     return refs_output_dependent_p (addr_a, addr_b);
3035   else if (DR_IS_READ (a) && DR_IS_WRITE (b))
3036     return refs_anti_dependent_p (addr_a, addr_b);
3037   return refs_may_alias_p (addr_a, addr_b);
3038 }
3039 
3040 /* REF_A and REF_B both satisfy access_fn_component_p.  Return true
3041    if it is meaningful to compare their associated access functions
3042    when checking for dependencies.  */
3043 
3044 static bool
access_fn_components_comparable_p(tree ref_a,tree ref_b)3045 access_fn_components_comparable_p (tree ref_a, tree ref_b)
3046 {
3047   /* Allow pairs of component refs from the following sets:
3048 
3049        { REALPART_EXPR, IMAGPART_EXPR }
3050        { COMPONENT_REF }
3051        { ARRAY_REF }.  */
3052   tree_code code_a = TREE_CODE (ref_a);
3053   tree_code code_b = TREE_CODE (ref_b);
3054   if (code_a == IMAGPART_EXPR)
3055     code_a = REALPART_EXPR;
3056   if (code_b == IMAGPART_EXPR)
3057     code_b = REALPART_EXPR;
3058   if (code_a != code_b)
3059     return false;
3060 
3061   if (TREE_CODE (ref_a) == COMPONENT_REF)
3062     /* ??? We cannot simply use the type of operand #0 of the refs here as
3063        the Fortran compiler smuggles type punning into COMPONENT_REFs.
3064        Use the DECL_CONTEXT of the FIELD_DECLs instead.  */
3065     return (DECL_CONTEXT (TREE_OPERAND (ref_a, 1))
3066 	    == DECL_CONTEXT (TREE_OPERAND (ref_b, 1)));
3067 
3068   return types_compatible_p (TREE_TYPE (TREE_OPERAND (ref_a, 0)),
3069 			     TREE_TYPE (TREE_OPERAND (ref_b, 0)));
3070 }
3071 
3072 /* Initialize a data dependence relation RES in LOOP_NEST.  USE_ALT_INDICES
3073    is true when the main indices of A and B were not comparable so we try again
3074    with alternate indices computed on an indirect reference.  */
3075 
3076 struct data_dependence_relation *
initialize_data_dependence_relation(struct data_dependence_relation * res,vec<loop_p> loop_nest,bool use_alt_indices)3077 initialize_data_dependence_relation (struct data_dependence_relation *res,
3078 				     vec<loop_p> loop_nest,
3079 				     bool use_alt_indices)
3080 {
3081   struct data_reference *a = DDR_A (res);
3082   struct data_reference *b = DDR_B (res);
3083   unsigned int i;
3084 
3085   struct indices *indices_a = &a->indices;
3086   struct indices *indices_b = &b->indices;
3087   if (use_alt_indices)
3088     {
3089       if (TREE_CODE (DR_REF (a)) != MEM_REF)
3090 	indices_a = &a->alt_indices;
3091       if (TREE_CODE (DR_REF (b)) != MEM_REF)
3092 	indices_b = &b->alt_indices;
3093     }
3094   unsigned int num_dimensions_a = indices_a->access_fns.length ();
3095   unsigned int num_dimensions_b = indices_b->access_fns.length ();
3096   if (num_dimensions_a == 0 || num_dimensions_b == 0)
3097     {
3098       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3099       return res;
3100     }
3101 
3102   /* For unconstrained bases, the root (highest-indexed) subscript
3103      describes a variation in the base of the original DR_REF rather
3104      than a component access.  We have no type that accurately describes
3105      the new DR_BASE_OBJECT (whose TREE_TYPE describes the type *after*
3106      applying this subscript) so limit the search to the last real
3107      component access.
3108 
3109      E.g. for:
3110 
3111 	void
3112 	f (int a[][8], int b[][8])
3113 	{
3114 	  for (int i = 0; i < 8; ++i)
3115 	    a[i * 2][0] = b[i][0];
3116 	}
3117 
3118      the a and b accesses have a single ARRAY_REF component reference [0]
3119      but have two subscripts.  */
3120   if (indices_a->unconstrained_base)
3121     num_dimensions_a -= 1;
3122   if (indices_b->unconstrained_base)
3123     num_dimensions_b -= 1;
3124 
3125   /* These structures describe sequences of component references in
3126      DR_REF (A) and DR_REF (B).  Each component reference is tied to a
3127      specific access function.  */
3128   struct {
3129     /* The sequence starts at DR_ACCESS_FN (A, START_A) of A and
3130        DR_ACCESS_FN (B, START_B) of B (inclusive) and extends to higher
3131        indices.  In C notation, these are the indices of the rightmost
3132        component references; e.g. for a sequence .b.c.d, the start
3133        index is for .d.  */
3134     unsigned int start_a;
3135     unsigned int start_b;
3136 
3137     /* The sequence contains LENGTH consecutive access functions from
3138        each DR.  */
3139     unsigned int length;
3140 
3141     /* The enclosing objects for the A and B sequences respectively,
3142        i.e. the objects to which DR_ACCESS_FN (A, START_A + LENGTH - 1)
3143        and DR_ACCESS_FN (B, START_B + LENGTH - 1) are applied.  */
3144     tree object_a;
3145     tree object_b;
3146   } full_seq = {}, struct_seq = {};
3147 
3148   /* Before each iteration of the loop:
3149 
3150      - REF_A is what you get after applying DR_ACCESS_FN (A, INDEX_A) and
3151      - REF_B is what you get after applying DR_ACCESS_FN (B, INDEX_B).  */
3152   unsigned int index_a = 0;
3153   unsigned int index_b = 0;
3154   tree ref_a = DR_REF (a);
3155   tree ref_b = DR_REF (b);
3156 
3157   /* Now walk the component references from the final DR_REFs back up to
3158      the enclosing base objects.  Each component reference corresponds
3159      to one access function in the DR, with access function 0 being for
3160      the final DR_REF and the highest-indexed access function being the
3161      one that is applied to the base of the DR.
3162 
3163      Look for a sequence of component references whose access functions
3164      are comparable (see access_fn_components_comparable_p).  If more
3165      than one such sequence exists, pick the one nearest the base
3166      (which is the leftmost sequence in C notation).  Store this sequence
3167      in FULL_SEQ.
3168 
3169      For example, if we have:
3170 
3171 	struct foo { struct bar s; ... } (*a)[10], (*b)[10];
3172 
3173 	A: a[0][i].s.c.d
3174 	B: __real b[0][i].s.e[i].f
3175 
3176      (where d is the same type as the real component of f) then the access
3177      functions would be:
3178 
3179 			 0   1   2   3
3180 	A:              .d  .c  .s [i]
3181 
3182 		 0   1   2   3   4   5
3183 	B:  __real  .f [i]  .e  .s [i]
3184 
3185      The A0/B2 column isn't comparable, since .d is a COMPONENT_REF
3186      and [i] is an ARRAY_REF.  However, the A1/B3 column contains two
3187      COMPONENT_REF accesses for struct bar, so is comparable.  Likewise
3188      the A2/B4 column contains two COMPONENT_REF accesses for struct foo,
3189      so is comparable.  The A3/B5 column contains two ARRAY_REFs that
3190      index foo[10] arrays, so is again comparable.  The sequence is
3191      therefore:
3192 
3193         A: [1, 3]  (i.e. [i].s.c)
3194         B: [3, 5]  (i.e. [i].s.e)
3195 
3196      Also look for sequences of component references whose access
3197      functions are comparable and whose enclosing objects have the same
3198      RECORD_TYPE.  Store this sequence in STRUCT_SEQ.  In the above
3199      example, STRUCT_SEQ would be:
3200 
3201         A: [1, 2]  (i.e. s.c)
3202         B: [3, 4]  (i.e. s.e)  */
3203   while (index_a < num_dimensions_a && index_b < num_dimensions_b)
3204     {
3205       /* The alternate indices form always has a single dimension
3206 	 with unconstrained base.  */
3207       gcc_assert (!use_alt_indices);
3208 
3209       /* REF_A and REF_B must be one of the component access types
3210 	 allowed by dr_analyze_indices.  */
3211       gcc_checking_assert (access_fn_component_p (ref_a));
3212       gcc_checking_assert (access_fn_component_p (ref_b));
3213 
3214       /* Get the immediately-enclosing objects for REF_A and REF_B,
3215 	 i.e. the references *before* applying DR_ACCESS_FN (A, INDEX_A)
3216 	 and DR_ACCESS_FN (B, INDEX_B).  */
3217       tree object_a = TREE_OPERAND (ref_a, 0);
3218       tree object_b = TREE_OPERAND (ref_b, 0);
3219 
3220       tree type_a = TREE_TYPE (object_a);
3221       tree type_b = TREE_TYPE (object_b);
3222       if (access_fn_components_comparable_p (ref_a, ref_b))
3223 	{
3224 	  /* This pair of component accesses is comparable for dependence
3225 	     analysis, so we can include DR_ACCESS_FN (A, INDEX_A) and
3226 	     DR_ACCESS_FN (B, INDEX_B) in the sequence.  */
3227 	  if (full_seq.start_a + full_seq.length != index_a
3228 	      || full_seq.start_b + full_seq.length != index_b)
3229 	    {
3230 	      /* The accesses don't extend the current sequence,
3231 		 so start a new one here.  */
3232 	      full_seq.start_a = index_a;
3233 	      full_seq.start_b = index_b;
3234 	      full_seq.length = 0;
3235 	    }
3236 
3237 	  /* Add this pair of references to the sequence.  */
3238 	  full_seq.length += 1;
3239 	  full_seq.object_a = object_a;
3240 	  full_seq.object_b = object_b;
3241 
3242 	  /* If the enclosing objects are structures (and thus have the
3243 	     same RECORD_TYPE), record the new sequence in STRUCT_SEQ.  */
3244 	  if (TREE_CODE (type_a) == RECORD_TYPE)
3245 	    struct_seq = full_seq;
3246 
3247 	  /* Move to the next containing reference for both A and B.  */
3248 	  ref_a = object_a;
3249 	  ref_b = object_b;
3250 	  index_a += 1;
3251 	  index_b += 1;
3252 	  continue;
3253 	}
3254 
3255       /* Try to approach equal type sizes.  */
3256       if (!COMPLETE_TYPE_P (type_a)
3257 	  || !COMPLETE_TYPE_P (type_b)
3258 	  || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_a))
3259 	  || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_b)))
3260 	break;
3261 
3262       unsigned HOST_WIDE_INT size_a = tree_to_uhwi (TYPE_SIZE_UNIT (type_a));
3263       unsigned HOST_WIDE_INT size_b = tree_to_uhwi (TYPE_SIZE_UNIT (type_b));
3264       if (size_a <= size_b)
3265 	{
3266 	  index_a += 1;
3267 	  ref_a = object_a;
3268 	}
3269       if (size_b <= size_a)
3270 	{
3271 	  index_b += 1;
3272 	  ref_b = object_b;
3273 	}
3274     }
3275 
3276   /* See whether FULL_SEQ ends at the base and whether the two bases
3277      are equal.  We do not care about TBAA or alignment info so we can
3278      use OEP_ADDRESS_OF to avoid false negatives.  */
3279   tree base_a = indices_a->base_object;
3280   tree base_b = indices_b->base_object;
3281   bool same_base_p = (full_seq.start_a + full_seq.length == num_dimensions_a
3282 		      && full_seq.start_b + full_seq.length == num_dimensions_b
3283 		      && (indices_a->unconstrained_base
3284 			  == indices_b->unconstrained_base)
3285 		      && operand_equal_p (base_a, base_b, OEP_ADDRESS_OF)
3286 		      && (types_compatible_p (TREE_TYPE (base_a),
3287 					      TREE_TYPE (base_b))
3288 			  || (!base_supports_access_fn_components_p (base_a)
3289 			      && !base_supports_access_fn_components_p (base_b)
3290 			      && operand_equal_p
3291 				   (TYPE_SIZE (TREE_TYPE (base_a)),
3292 				    TYPE_SIZE (TREE_TYPE (base_b)), 0)))
3293 		      && (!loop_nest.exists ()
3294 			  || (object_address_invariant_in_loop_p
3295 			      (loop_nest[0], base_a))));
3296 
3297   /* If the bases are the same, we can include the base variation too.
3298      E.g. the b accesses in:
3299 
3300        for (int i = 0; i < n; ++i)
3301          b[i + 4][0] = b[i][0];
3302 
3303      have a definite dependence distance of 4, while for:
3304 
3305        for (int i = 0; i < n; ++i)
3306          a[i + 4][0] = b[i][0];
3307 
3308      the dependence distance depends on the gap between a and b.
3309 
3310      If the bases are different then we can only rely on the sequence
3311      rooted at a structure access, since arrays are allowed to overlap
3312      arbitrarily and change shape arbitrarily.  E.g. we treat this as
3313      valid code:
3314 
3315        int a[256];
3316        ...
3317        ((int (*)[4][3]) &a[1])[i][0] += ((int (*)[4][3]) &a[2])[i][0];
3318 
3319      where two lvalues with the same int[4][3] type overlap, and where
3320      both lvalues are distinct from the object's declared type.  */
3321   if (same_base_p)
3322     {
3323       if (indices_a->unconstrained_base)
3324 	full_seq.length += 1;
3325     }
3326   else
3327     full_seq = struct_seq;
3328 
3329   /* Punt if we didn't find a suitable sequence.  */
3330   if (full_seq.length == 0)
3331     {
3332       if (use_alt_indices
3333 	  || (TREE_CODE (DR_REF (a)) == MEM_REF
3334 	      && TREE_CODE (DR_REF (b)) == MEM_REF)
3335 	  || may_be_nonaddressable_p (DR_REF (a))
3336 	  || may_be_nonaddressable_p (DR_REF (b)))
3337 	{
3338 	  /* Fully exhausted possibilities.  */
3339 	  DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3340 	  return res;
3341 	}
3342 
3343       /* Try evaluating both DRs as dereferences of pointers.  */
3344       if (!a->alt_indices.base_object
3345 	  && TREE_CODE (DR_REF (a)) != MEM_REF)
3346 	{
3347 	  tree alt_ref = build2 (MEM_REF, TREE_TYPE (DR_REF (a)),
3348 				 build1 (ADDR_EXPR, ptr_type_node, DR_REF (a)),
3349 				 build_int_cst
3350 				   (reference_alias_ptr_type (DR_REF (a)), 0));
3351 	  dr_analyze_indices (&a->alt_indices, alt_ref,
3352 			      loop_preheader_edge (loop_nest[0]),
3353 			      loop_containing_stmt (DR_STMT (a)));
3354 	}
3355       if (!b->alt_indices.base_object
3356 	  && TREE_CODE (DR_REF (b)) != MEM_REF)
3357 	{
3358 	  tree alt_ref = build2 (MEM_REF, TREE_TYPE (DR_REF (b)),
3359 				 build1 (ADDR_EXPR, ptr_type_node, DR_REF (b)),
3360 				 build_int_cst
3361 				   (reference_alias_ptr_type (DR_REF (b)), 0));
3362 	  dr_analyze_indices (&b->alt_indices, alt_ref,
3363 			      loop_preheader_edge (loop_nest[0]),
3364 			      loop_containing_stmt (DR_STMT (b)));
3365 	}
3366       return initialize_data_dependence_relation (res, loop_nest, true);
3367     }
3368 
3369   if (!same_base_p)
3370     {
3371       /* Partial overlap is possible for different bases when strict aliasing
3372 	 is not in effect.  It's also possible if either base involves a union
3373 	 access; e.g. for:
3374 
3375 	   struct s1 { int a[2]; };
3376 	   struct s2 { struct s1 b; int c; };
3377 	   struct s3 { int d; struct s1 e; };
3378 	   union u { struct s2 f; struct s3 g; } *p, *q;
3379 
3380 	 the s1 at "p->f.b" (base "p->f") partially overlaps the s1 at
3381 	 "p->g.e" (base "p->g") and might partially overlap the s1 at
3382 	 "q->g.e" (base "q->g").  */
3383       if (!flag_strict_aliasing
3384 	  || ref_contains_union_access_p (full_seq.object_a)
3385 	  || ref_contains_union_access_p (full_seq.object_b))
3386 	{
3387 	  DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3388 	  return res;
3389 	}
3390 
3391       DDR_COULD_BE_INDEPENDENT_P (res) = true;
3392       if (!loop_nest.exists ()
3393 	  || (object_address_invariant_in_loop_p (loop_nest[0],
3394 						  full_seq.object_a)
3395 	      && object_address_invariant_in_loop_p (loop_nest[0],
3396 						     full_seq.object_b)))
3397 	{
3398 	  DDR_OBJECT_A (res) = full_seq.object_a;
3399 	  DDR_OBJECT_B (res) = full_seq.object_b;
3400 	}
3401     }
3402 
3403   DDR_AFFINE_P (res) = true;
3404   DDR_ARE_DEPENDENT (res) = NULL_TREE;
3405   DDR_SUBSCRIPTS (res).create (full_seq.length);
3406   DDR_LOOP_NEST (res) = loop_nest;
3407   DDR_SELF_REFERENCE (res) = false;
3408 
3409   for (i = 0; i < full_seq.length; ++i)
3410     {
3411       struct subscript *subscript;
3412 
3413       subscript = XNEW (struct subscript);
3414       SUB_ACCESS_FN (subscript, 0) = indices_a->access_fns[full_seq.start_a + i];
3415       SUB_ACCESS_FN (subscript, 1) = indices_b->access_fns[full_seq.start_b + i];
3416       SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
3417       SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
3418       SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
3419       SUB_DISTANCE (subscript) = chrec_dont_know;
3420       DDR_SUBSCRIPTS (res).safe_push (subscript);
3421     }
3422 
3423   return res;
3424 }
3425 
3426 /* Initialize a data dependence relation between data accesses A and
3427    B.  NB_LOOPS is the number of loops surrounding the references: the
3428    size of the classic distance/direction vectors.  */
3429 
3430 struct data_dependence_relation *
initialize_data_dependence_relation(struct data_reference * a,struct data_reference * b,vec<loop_p> loop_nest)3431 initialize_data_dependence_relation (struct data_reference *a,
3432 				     struct data_reference *b,
3433 				     vec<loop_p> loop_nest)
3434 {
3435   data_dependence_relation *res = XCNEW (struct data_dependence_relation);
3436   DDR_A (res) = a;
3437   DDR_B (res) = b;
3438   DDR_LOOP_NEST (res).create (0);
3439   DDR_SUBSCRIPTS (res).create (0);
3440   DDR_DIR_VECTS (res).create (0);
3441   DDR_DIST_VECTS (res).create (0);
3442 
3443   if (a == NULL || b == NULL)
3444     {
3445       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3446       return res;
3447     }
3448 
3449   /* If the data references do not alias, then they are independent.  */
3450   if (!dr_may_alias_p (a, b, loop_nest.exists () ? loop_nest[0] : NULL))
3451     {
3452       DDR_ARE_DEPENDENT (res) = chrec_known;
3453       return res;
3454     }
3455 
3456   return initialize_data_dependence_relation (res, loop_nest, false);
3457 }
3458 
3459 
3460 /* Frees memory used by the conflict function F.  */
3461 
3462 static void
free_conflict_function(conflict_function * f)3463 free_conflict_function (conflict_function *f)
3464 {
3465   unsigned i;
3466 
3467   if (CF_NONTRIVIAL_P (f))
3468     {
3469       for (i = 0; i < f->n; i++)
3470 	affine_fn_free (f->fns[i]);
3471     }
3472   free (f);
3473 }
3474 
3475 /* Frees memory used by SUBSCRIPTS.  */
3476 
3477 static void
free_subscripts(vec<subscript_p> subscripts)3478 free_subscripts (vec<subscript_p> subscripts)
3479 {
3480   for (subscript_p s : subscripts)
3481     {
3482       free_conflict_function (s->conflicting_iterations_in_a);
3483       free_conflict_function (s->conflicting_iterations_in_b);
3484       free (s);
3485     }
3486   subscripts.release ();
3487 }
3488 
3489 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
3490    description.  */
3491 
3492 static inline void
finalize_ddr_dependent(struct data_dependence_relation * ddr,tree chrec)3493 finalize_ddr_dependent (struct data_dependence_relation *ddr,
3494 			tree chrec)
3495 {
3496   DDR_ARE_DEPENDENT (ddr) = chrec;
3497   free_subscripts (DDR_SUBSCRIPTS (ddr));
3498   DDR_SUBSCRIPTS (ddr).create (0);
3499 }
3500 
3501 /* The dependence relation DDR cannot be represented by a distance
3502    vector.  */
3503 
3504 static inline void
non_affine_dependence_relation(struct data_dependence_relation * ddr)3505 non_affine_dependence_relation (struct data_dependence_relation *ddr)
3506 {
3507   if (dump_file && (dump_flags & TDF_DETAILS))
3508     fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
3509 
3510   DDR_AFFINE_P (ddr) = false;
3511 }
3512 
3513 
3514 
3515 /* This section contains the classic Banerjee tests.  */
3516 
3517 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
3518    variables, i.e., if the ZIV (Zero Index Variable) test is true.  */
3519 
3520 static inline bool
ziv_subscript_p(const_tree chrec_a,const_tree chrec_b)3521 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
3522 {
3523   return (evolution_function_is_constant_p (chrec_a)
3524 	  && evolution_function_is_constant_p (chrec_b));
3525 }
3526 
3527 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
3528    variable, i.e., if the SIV (Single Index Variable) test is true.  */
3529 
3530 static bool
siv_subscript_p(const_tree chrec_a,const_tree chrec_b)3531 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
3532 {
3533   if ((evolution_function_is_constant_p (chrec_a)
3534        && evolution_function_is_univariate_p (chrec_b))
3535       || (evolution_function_is_constant_p (chrec_b)
3536 	  && evolution_function_is_univariate_p (chrec_a)))
3537     return true;
3538 
3539   if (evolution_function_is_univariate_p (chrec_a)
3540       && evolution_function_is_univariate_p (chrec_b))
3541     {
3542       switch (TREE_CODE (chrec_a))
3543 	{
3544 	case POLYNOMIAL_CHREC:
3545 	  switch (TREE_CODE (chrec_b))
3546 	    {
3547 	    case POLYNOMIAL_CHREC:
3548 	      if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
3549 		return false;
3550 	      /* FALLTHRU */
3551 
3552 	    default:
3553 	      return true;
3554 	    }
3555 
3556 	default:
3557 	  return true;
3558 	}
3559     }
3560 
3561   return false;
3562 }
3563 
3564 /* Creates a conflict function with N dimensions.  The affine functions
3565    in each dimension follow.  */
3566 
3567 static conflict_function *
conflict_fn(unsigned n,...)3568 conflict_fn (unsigned n, ...)
3569 {
3570   unsigned i;
3571   conflict_function *ret = XCNEW (conflict_function);
3572   va_list ap;
3573 
3574   gcc_assert (n > 0 && n <= MAX_DIM);
3575   va_start (ap, n);
3576 
3577   ret->n = n;
3578   for (i = 0; i < n; i++)
3579     ret->fns[i] = va_arg (ap, affine_fn);
3580   va_end (ap);
3581 
3582   return ret;
3583 }
3584 
3585 /* Returns constant affine function with value CST.  */
3586 
3587 static affine_fn
affine_fn_cst(tree cst)3588 affine_fn_cst (tree cst)
3589 {
3590   affine_fn fn;
3591   fn.create (1);
3592   fn.quick_push (cst);
3593   return fn;
3594 }
3595 
3596 /* Returns affine function with single variable, CST + COEF * x_DIM.  */
3597 
3598 static affine_fn
affine_fn_univar(tree cst,unsigned dim,tree coef)3599 affine_fn_univar (tree cst, unsigned dim, tree coef)
3600 {
3601   affine_fn fn;
3602   fn.create (dim + 1);
3603   unsigned i;
3604 
3605   gcc_assert (dim > 0);
3606   fn.quick_push (cst);
3607   for (i = 1; i < dim; i++)
3608     fn.quick_push (integer_zero_node);
3609   fn.quick_push (coef);
3610   return fn;
3611 }
3612 
3613 /* Analyze a ZIV (Zero Index Variable) subscript.  *OVERLAPS_A and
3614    *OVERLAPS_B are initialized to the functions that describe the
3615    relation between the elements accessed twice by CHREC_A and
3616    CHREC_B.  For k >= 0, the following property is verified:
3617 
3618    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
3619 
3620 static void
analyze_ziv_subscript(tree chrec_a,tree chrec_b,conflict_function ** overlaps_a,conflict_function ** overlaps_b,tree * last_conflicts)3621 analyze_ziv_subscript (tree chrec_a,
3622 		       tree chrec_b,
3623 		       conflict_function **overlaps_a,
3624 		       conflict_function **overlaps_b,
3625 		       tree *last_conflicts)
3626 {
3627   tree type, difference;
3628   dependence_stats.num_ziv++;
3629 
3630   if (dump_file && (dump_flags & TDF_DETAILS))
3631     fprintf (dump_file, "(analyze_ziv_subscript \n");
3632 
3633   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3634   chrec_a = chrec_convert (type, chrec_a, NULL);
3635   chrec_b = chrec_convert (type, chrec_b, NULL);
3636   difference = chrec_fold_minus (type, chrec_a, chrec_b);
3637 
3638   switch (TREE_CODE (difference))
3639     {
3640     case INTEGER_CST:
3641       if (integer_zerop (difference))
3642 	{
3643 	  /* The difference is equal to zero: the accessed index
3644 	     overlaps for each iteration in the loop.  */
3645 	  *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3646 	  *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3647 	  *last_conflicts = chrec_dont_know;
3648 	  dependence_stats.num_ziv_dependent++;
3649 	}
3650       else
3651 	{
3652 	  /* The accesses do not overlap.  */
3653 	  *overlaps_a = conflict_fn_no_dependence ();
3654 	  *overlaps_b = conflict_fn_no_dependence ();
3655 	  *last_conflicts = integer_zero_node;
3656 	  dependence_stats.num_ziv_independent++;
3657 	}
3658       break;
3659 
3660     default:
3661       /* We're not sure whether the indexes overlap.  For the moment,
3662 	 conservatively answer "don't know".  */
3663       if (dump_file && (dump_flags & TDF_DETAILS))
3664 	fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
3665 
3666       *overlaps_a = conflict_fn_not_known ();
3667       *overlaps_b = conflict_fn_not_known ();
3668       *last_conflicts = chrec_dont_know;
3669       dependence_stats.num_ziv_unimplemented++;
3670       break;
3671     }
3672 
3673   if (dump_file && (dump_flags & TDF_DETAILS))
3674     fprintf (dump_file, ")\n");
3675 }
3676 
3677 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
3678    and only if it fits to the int type.  If this is not the case, or the
3679    bound  on the number of iterations of LOOP could not be derived, returns
3680    chrec_dont_know.  */
3681 
3682 static tree
max_stmt_executions_tree(class loop * loop)3683 max_stmt_executions_tree (class loop *loop)
3684 {
3685   widest_int nit;
3686 
3687   if (!max_stmt_executions (loop, &nit))
3688     return chrec_dont_know;
3689 
3690   if (!wi::fits_to_tree_p (nit, unsigned_type_node))
3691     return chrec_dont_know;
3692 
3693   return wide_int_to_tree (unsigned_type_node, nit);
3694 }
3695 
3696 /* Determine whether the CHREC is always positive/negative.  If the expression
3697    cannot be statically analyzed, return false, otherwise set the answer into
3698    VALUE.  */
3699 
3700 static bool
chrec_is_positive(tree chrec,bool * value)3701 chrec_is_positive (tree chrec, bool *value)
3702 {
3703   bool value0, value1, value2;
3704   tree end_value, nb_iter;
3705 
3706   switch (TREE_CODE (chrec))
3707     {
3708     case POLYNOMIAL_CHREC:
3709       if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
3710 	  || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
3711 	return false;
3712 
3713       /* FIXME -- overflows.  */
3714       if (value0 == value1)
3715 	{
3716 	  *value = value0;
3717 	  return true;
3718 	}
3719 
3720       /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
3721 	 and the proof consists in showing that the sign never
3722 	 changes during the execution of the loop, from 0 to
3723 	 loop->nb_iterations.  */
3724       if (!evolution_function_is_affine_p (chrec))
3725 	return false;
3726 
3727       nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
3728       if (chrec_contains_undetermined (nb_iter))
3729 	return false;
3730 
3731 #if 0
3732       /* TODO -- If the test is after the exit, we may decrease the number of
3733 	 iterations by one.  */
3734       if (after_exit)
3735 	nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
3736 #endif
3737 
3738       end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
3739 
3740       if (!chrec_is_positive (end_value, &value2))
3741 	return false;
3742 
3743       *value = value0;
3744       return value0 == value1;
3745 
3746     case INTEGER_CST:
3747       switch (tree_int_cst_sgn (chrec))
3748 	{
3749 	case -1:
3750 	  *value = false;
3751 	  break;
3752 	case 1:
3753 	  *value = true;
3754 	  break;
3755 	default:
3756 	  return false;
3757 	}
3758       return true;
3759 
3760     default:
3761       return false;
3762     }
3763 }
3764 
3765 
3766 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
3767    constant, and CHREC_B is an affine function.  *OVERLAPS_A and
3768    *OVERLAPS_B are initialized to the functions that describe the
3769    relation between the elements accessed twice by CHREC_A and
3770    CHREC_B.  For k >= 0, the following property is verified:
3771 
3772    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
3773 
3774 static void
analyze_siv_subscript_cst_affine(tree chrec_a,tree chrec_b,conflict_function ** overlaps_a,conflict_function ** overlaps_b,tree * last_conflicts)3775 analyze_siv_subscript_cst_affine (tree chrec_a,
3776 				  tree chrec_b,
3777 				  conflict_function **overlaps_a,
3778 				  conflict_function **overlaps_b,
3779 				  tree *last_conflicts)
3780 {
3781   bool value0, value1, value2;
3782   tree type, difference, tmp;
3783 
3784   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3785   chrec_a = chrec_convert (type, chrec_a, NULL);
3786   chrec_b = chrec_convert (type, chrec_b, NULL);
3787   difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
3788 
3789   /* Special case overlap in the first iteration.  */
3790   if (integer_zerop (difference))
3791     {
3792       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3793       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3794       *last_conflicts = integer_one_node;
3795       return;
3796     }
3797 
3798   if (!chrec_is_positive (initial_condition (difference), &value0))
3799     {
3800       if (dump_file && (dump_flags & TDF_DETAILS))
3801 	fprintf (dump_file, "siv test failed: chrec is not positive.\n");
3802 
3803       dependence_stats.num_siv_unimplemented++;
3804       *overlaps_a = conflict_fn_not_known ();
3805       *overlaps_b = conflict_fn_not_known ();
3806       *last_conflicts = chrec_dont_know;
3807       return;
3808     }
3809   else
3810     {
3811       if (value0 == false)
3812 	{
3813 	  if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3814 	      || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
3815 	    {
3816 	      if (dump_file && (dump_flags & TDF_DETAILS))
3817 		fprintf (dump_file, "siv test failed: chrec not positive.\n");
3818 
3819 	      *overlaps_a = conflict_fn_not_known ();
3820 	      *overlaps_b = conflict_fn_not_known ();
3821 	      *last_conflicts = chrec_dont_know;
3822 	      dependence_stats.num_siv_unimplemented++;
3823 	      return;
3824 	    }
3825 	  else
3826 	    {
3827 	      if (value1 == true)
3828 		{
3829 		  /* Example:
3830 		     chrec_a = 12
3831 		     chrec_b = {10, +, 1}
3832 		  */
3833 
3834 		  if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3835 		    {
3836 		      HOST_WIDE_INT numiter;
3837 		      class loop *loop = get_chrec_loop (chrec_b);
3838 
3839 		      *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3840 		      tmp = fold_build2 (EXACT_DIV_EXPR, type,
3841 					 fold_build1 (ABS_EXPR, type, difference),
3842 					 CHREC_RIGHT (chrec_b));
3843 		      *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3844 		      *last_conflicts = integer_one_node;
3845 
3846 
3847 		      /* Perform weak-zero siv test to see if overlap is
3848 			 outside the loop bounds.  */
3849 		      numiter = max_stmt_executions_int (loop);
3850 
3851 		      if (numiter >= 0
3852 			  && compare_tree_int (tmp, numiter) > 0)
3853 			{
3854 			  free_conflict_function (*overlaps_a);
3855 			  free_conflict_function (*overlaps_b);
3856 			  *overlaps_a = conflict_fn_no_dependence ();
3857 			  *overlaps_b = conflict_fn_no_dependence ();
3858 			  *last_conflicts = integer_zero_node;
3859 			  dependence_stats.num_siv_independent++;
3860 			  return;
3861 			}
3862 		      dependence_stats.num_siv_dependent++;
3863 		      return;
3864 		    }
3865 
3866 		  /* When the step does not divide the difference, there are
3867 		     no overlaps.  */
3868 		  else
3869 		    {
3870 		      *overlaps_a = conflict_fn_no_dependence ();
3871 		      *overlaps_b = conflict_fn_no_dependence ();
3872 		      *last_conflicts = integer_zero_node;
3873 		      dependence_stats.num_siv_independent++;
3874 		      return;
3875 		    }
3876 		}
3877 
3878 	      else
3879 		{
3880 		  /* Example:
3881 		     chrec_a = 12
3882 		     chrec_b = {10, +, -1}
3883 
3884 		     In this case, chrec_a will not overlap with chrec_b.  */
3885 		  *overlaps_a = conflict_fn_no_dependence ();
3886 		  *overlaps_b = conflict_fn_no_dependence ();
3887 		  *last_conflicts = integer_zero_node;
3888 		  dependence_stats.num_siv_independent++;
3889 		  return;
3890 		}
3891 	    }
3892 	}
3893       else
3894 	{
3895 	  if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3896 	      || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
3897 	    {
3898 	      if (dump_file && (dump_flags & TDF_DETAILS))
3899 		fprintf (dump_file, "siv test failed: chrec not positive.\n");
3900 
3901 	      *overlaps_a = conflict_fn_not_known ();
3902 	      *overlaps_b = conflict_fn_not_known ();
3903 	      *last_conflicts = chrec_dont_know;
3904 	      dependence_stats.num_siv_unimplemented++;
3905 	      return;
3906 	    }
3907 	  else
3908 	    {
3909 	      if (value2 == false)
3910 		{
3911 		  /* Example:
3912 		     chrec_a = 3
3913 		     chrec_b = {10, +, -1}
3914 		  */
3915 		  if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3916 		    {
3917 		      HOST_WIDE_INT numiter;
3918 		      class loop *loop = get_chrec_loop (chrec_b);
3919 
3920 		      *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3921 		      tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
3922 					 CHREC_RIGHT (chrec_b));
3923 		      *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3924 		      *last_conflicts = integer_one_node;
3925 
3926 		      /* Perform weak-zero siv test to see if overlap is
3927 			 outside the loop bounds.  */
3928 		      numiter = max_stmt_executions_int (loop);
3929 
3930 		      if (numiter >= 0
3931 			  && compare_tree_int (tmp, numiter) > 0)
3932 			{
3933 			  free_conflict_function (*overlaps_a);
3934 			  free_conflict_function (*overlaps_b);
3935 			  *overlaps_a = conflict_fn_no_dependence ();
3936 			  *overlaps_b = conflict_fn_no_dependence ();
3937 			  *last_conflicts = integer_zero_node;
3938 			  dependence_stats.num_siv_independent++;
3939 			  return;
3940 			}
3941 		      dependence_stats.num_siv_dependent++;
3942 		      return;
3943 		    }
3944 
3945 		  /* When the step does not divide the difference, there
3946 		     are no overlaps.  */
3947 		  else
3948 		    {
3949 		      *overlaps_a = conflict_fn_no_dependence ();
3950 		      *overlaps_b = conflict_fn_no_dependence ();
3951 		      *last_conflicts = integer_zero_node;
3952 		      dependence_stats.num_siv_independent++;
3953 		      return;
3954 		    }
3955 		}
3956 	      else
3957 		{
3958 		  /* Example:
3959 		     chrec_a = 3
3960 		     chrec_b = {4, +, 1}
3961 
3962 		     In this case, chrec_a will not overlap with chrec_b.  */
3963 		  *overlaps_a = conflict_fn_no_dependence ();
3964 		  *overlaps_b = conflict_fn_no_dependence ();
3965 		  *last_conflicts = integer_zero_node;
3966 		  dependence_stats.num_siv_independent++;
3967 		  return;
3968 		}
3969 	    }
3970 	}
3971     }
3972 }
3973 
3974 /* Helper recursive function for initializing the matrix A.  Returns
3975    the initial value of CHREC.  */
3976 
3977 static tree
initialize_matrix_A(lambda_matrix A,tree chrec,unsigned index,int mult)3978 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
3979 {
3980   gcc_assert (chrec);
3981 
3982   switch (TREE_CODE (chrec))
3983     {
3984     case POLYNOMIAL_CHREC:
3985       HOST_WIDE_INT chrec_right;
3986       if (!cst_and_fits_in_hwi (CHREC_RIGHT (chrec)))
3987 	return chrec_dont_know;
3988       chrec_right = int_cst_value (CHREC_RIGHT (chrec));
3989       /* We want to be able to negate without overflow.  */
3990       if (chrec_right == HOST_WIDE_INT_MIN)
3991 	return chrec_dont_know;
3992       A[index][0] = mult * chrec_right;
3993       return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
3994 
3995     case PLUS_EXPR:
3996     case MULT_EXPR:
3997     case MINUS_EXPR:
3998       {
3999 	tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4000 	tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
4001 
4002 	return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
4003       }
4004 
4005     CASE_CONVERT:
4006       {
4007 	tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4008 	return chrec_convert (chrec_type (chrec), op, NULL);
4009       }
4010 
4011     case BIT_NOT_EXPR:
4012       {
4013 	/* Handle ~X as -1 - X.  */
4014 	tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4015 	return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
4016 			      build_int_cst (TREE_TYPE (chrec), -1), op);
4017       }
4018 
4019     case INTEGER_CST:
4020       return chrec;
4021 
4022     default:
4023       gcc_unreachable ();
4024       return NULL_TREE;
4025     }
4026 }
4027 
4028 #define FLOOR_DIV(x,y) ((x) / (y))
4029 
4030 /* Solves the special case of the Diophantine equation:
4031    | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
4032 
4033    Computes the descriptions OVERLAPS_A and OVERLAPS_B.  NITER is the
4034    number of iterations that loops X and Y run.  The overlaps will be
4035    constructed as evolutions in dimension DIM.  */
4036 
4037 static void
compute_overlap_steps_for_affine_univar(HOST_WIDE_INT niter,HOST_WIDE_INT step_a,HOST_WIDE_INT step_b,affine_fn * overlaps_a,affine_fn * overlaps_b,tree * last_conflicts,int dim)4038 compute_overlap_steps_for_affine_univar (HOST_WIDE_INT niter,
4039 					 HOST_WIDE_INT step_a,
4040 					 HOST_WIDE_INT step_b,
4041 					 affine_fn *overlaps_a,
4042 					 affine_fn *overlaps_b,
4043 					 tree *last_conflicts, int dim)
4044 {
4045   if (((step_a > 0 && step_b > 0)
4046        || (step_a < 0 && step_b < 0)))
4047     {
4048       HOST_WIDE_INT step_overlaps_a, step_overlaps_b;
4049       HOST_WIDE_INT gcd_steps_a_b, last_conflict, tau2;
4050 
4051       gcd_steps_a_b = gcd (step_a, step_b);
4052       step_overlaps_a = step_b / gcd_steps_a_b;
4053       step_overlaps_b = step_a / gcd_steps_a_b;
4054 
4055       if (niter > 0)
4056 	{
4057 	  tau2 = FLOOR_DIV (niter, step_overlaps_a);
4058 	  tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
4059 	  last_conflict = tau2;
4060 	  *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
4061 	}
4062       else
4063 	*last_conflicts = chrec_dont_know;
4064 
4065       *overlaps_a = affine_fn_univar (integer_zero_node, dim,
4066 				      build_int_cst (NULL_TREE,
4067 						     step_overlaps_a));
4068       *overlaps_b = affine_fn_univar (integer_zero_node, dim,
4069 				      build_int_cst (NULL_TREE,
4070 						     step_overlaps_b));
4071     }
4072 
4073   else
4074     {
4075       *overlaps_a = affine_fn_cst (integer_zero_node);
4076       *overlaps_b = affine_fn_cst (integer_zero_node);
4077       *last_conflicts = integer_zero_node;
4078     }
4079 }
4080 
4081 /* Solves the special case of a Diophantine equation where CHREC_A is
4082    an affine bivariate function, and CHREC_B is an affine univariate
4083    function.  For example,
4084 
4085    | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
4086 
4087    has the following overlapping functions:
4088 
4089    | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
4090    | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
4091    | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
4092 
4093    FORNOW: This is a specialized implementation for a case occurring in
4094    a common benchmark.  Implement the general algorithm.  */
4095 
4096 static void
compute_overlap_steps_for_affine_1_2(tree chrec_a,tree chrec_b,conflict_function ** overlaps_a,conflict_function ** overlaps_b,tree * last_conflicts)4097 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
4098 				      conflict_function **overlaps_a,
4099 				      conflict_function **overlaps_b,
4100 				      tree *last_conflicts)
4101 {
4102   bool xz_p, yz_p, xyz_p;
4103   HOST_WIDE_INT step_x, step_y, step_z;
4104   HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
4105   affine_fn overlaps_a_xz, overlaps_b_xz;
4106   affine_fn overlaps_a_yz, overlaps_b_yz;
4107   affine_fn overlaps_a_xyz, overlaps_b_xyz;
4108   affine_fn ova1, ova2, ovb;
4109   tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
4110 
4111   step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
4112   step_y = int_cst_value (CHREC_RIGHT (chrec_a));
4113   step_z = int_cst_value (CHREC_RIGHT (chrec_b));
4114 
4115   niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
4116   niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
4117   niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
4118 
4119   if (niter_x < 0 || niter_y < 0 || niter_z < 0)
4120     {
4121       if (dump_file && (dump_flags & TDF_DETAILS))
4122 	fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
4123 
4124       *overlaps_a = conflict_fn_not_known ();
4125       *overlaps_b = conflict_fn_not_known ();
4126       *last_conflicts = chrec_dont_know;
4127       return;
4128     }
4129 
4130   niter = MIN (niter_x, niter_z);
4131   compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
4132 					   &overlaps_a_xz,
4133 					   &overlaps_b_xz,
4134 					   &last_conflicts_xz, 1);
4135   niter = MIN (niter_y, niter_z);
4136   compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
4137 					   &overlaps_a_yz,
4138 					   &overlaps_b_yz,
4139 					   &last_conflicts_yz, 2);
4140   niter = MIN (niter_x, niter_z);
4141   niter = MIN (niter_y, niter);
4142   compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
4143 					   &overlaps_a_xyz,
4144 					   &overlaps_b_xyz,
4145 					   &last_conflicts_xyz, 3);
4146 
4147   xz_p = !integer_zerop (last_conflicts_xz);
4148   yz_p = !integer_zerop (last_conflicts_yz);
4149   xyz_p = !integer_zerop (last_conflicts_xyz);
4150 
4151   if (xz_p || yz_p || xyz_p)
4152     {
4153       ova1 = affine_fn_cst (integer_zero_node);
4154       ova2 = affine_fn_cst (integer_zero_node);
4155       ovb = affine_fn_cst (integer_zero_node);
4156       if (xz_p)
4157 	{
4158 	  affine_fn t0 = ova1;
4159 	  affine_fn t2 = ovb;
4160 
4161 	  ova1 = affine_fn_plus (ova1, overlaps_a_xz);
4162 	  ovb = affine_fn_plus (ovb, overlaps_b_xz);
4163 	  affine_fn_free (t0);
4164 	  affine_fn_free (t2);
4165 	  *last_conflicts = last_conflicts_xz;
4166 	}
4167       if (yz_p)
4168 	{
4169 	  affine_fn t0 = ova2;
4170 	  affine_fn t2 = ovb;
4171 
4172 	  ova2 = affine_fn_plus (ova2, overlaps_a_yz);
4173 	  ovb = affine_fn_plus (ovb, overlaps_b_yz);
4174 	  affine_fn_free (t0);
4175 	  affine_fn_free (t2);
4176 	  *last_conflicts = last_conflicts_yz;
4177 	}
4178       if (xyz_p)
4179 	{
4180 	  affine_fn t0 = ova1;
4181 	  affine_fn t2 = ova2;
4182 	  affine_fn t4 = ovb;
4183 
4184 	  ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
4185 	  ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
4186 	  ovb = affine_fn_plus (ovb, overlaps_b_xyz);
4187 	  affine_fn_free (t0);
4188 	  affine_fn_free (t2);
4189 	  affine_fn_free (t4);
4190 	  *last_conflicts = last_conflicts_xyz;
4191 	}
4192       *overlaps_a = conflict_fn (2, ova1, ova2);
4193       *overlaps_b = conflict_fn (1, ovb);
4194     }
4195   else
4196     {
4197       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4198       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4199       *last_conflicts = integer_zero_node;
4200     }
4201 
4202   affine_fn_free (overlaps_a_xz);
4203   affine_fn_free (overlaps_b_xz);
4204   affine_fn_free (overlaps_a_yz);
4205   affine_fn_free (overlaps_b_yz);
4206   affine_fn_free (overlaps_a_xyz);
4207   affine_fn_free (overlaps_b_xyz);
4208 }
4209 
4210 /* Copy the elements of vector VEC1 with length SIZE to VEC2.  */
4211 
4212 static void
lambda_vector_copy(lambda_vector vec1,lambda_vector vec2,int size)4213 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
4214 		    int size)
4215 {
4216   memcpy (vec2, vec1, size * sizeof (*vec1));
4217 }
4218 
4219 /* Copy the elements of M x N matrix MAT1 to MAT2.  */
4220 
4221 static void
lambda_matrix_copy(lambda_matrix mat1,lambda_matrix mat2,int m,int n)4222 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
4223 		    int m, int n)
4224 {
4225   int i;
4226 
4227   for (i = 0; i < m; i++)
4228     lambda_vector_copy (mat1[i], mat2[i], n);
4229 }
4230 
4231 /* Store the N x N identity matrix in MAT.  */
4232 
4233 static void
lambda_matrix_id(lambda_matrix mat,int size)4234 lambda_matrix_id (lambda_matrix mat, int size)
4235 {
4236   int i, j;
4237 
4238   for (i = 0; i < size; i++)
4239     for (j = 0; j < size; j++)
4240       mat[i][j] = (i == j) ? 1 : 0;
4241 }
4242 
4243 /* Return the index of the first nonzero element of vector VEC1 between
4244    START and N.  We must have START <= N.
4245    Returns N if VEC1 is the zero vector.  */
4246 
4247 static int
lambda_vector_first_nz(lambda_vector vec1,int n,int start)4248 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
4249 {
4250   int j = start;
4251   while (j < n && vec1[j] == 0)
4252     j++;
4253   return j;
4254 }
4255 
4256 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
4257    R2 = R2 + CONST1 * R1.  */
4258 
4259 static bool
lambda_matrix_row_add(lambda_matrix mat,int n,int r1,int r2,lambda_int const1)4260 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2,
4261 		       lambda_int const1)
4262 {
4263   int i;
4264 
4265   if (const1 == 0)
4266     return true;
4267 
4268   for (i = 0; i < n; i++)
4269     {
4270       bool ovf;
4271       lambda_int tem = mul_hwi (mat[r1][i], const1, &ovf);
4272       if (ovf)
4273 	return false;
4274       lambda_int tem2 = add_hwi (mat[r2][i], tem, &ovf);
4275       if (ovf || tem2 == HOST_WIDE_INT_MIN)
4276 	return false;
4277       mat[r2][i] = tem2;
4278     }
4279 
4280   return true;
4281 }
4282 
4283 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
4284    and store the result in VEC2.  */
4285 
4286 static void
lambda_vector_mult_const(lambda_vector vec1,lambda_vector vec2,int size,lambda_int const1)4287 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
4288 			  int size, lambda_int const1)
4289 {
4290   int i;
4291 
4292   if (const1 == 0)
4293     lambda_vector_clear (vec2, size);
4294   else
4295     for (i = 0; i < size; i++)
4296       vec2[i] = const1 * vec1[i];
4297 }
4298 
4299 /* Negate vector VEC1 with length SIZE and store it in VEC2.  */
4300 
4301 static void
lambda_vector_negate(lambda_vector vec1,lambda_vector vec2,int size)4302 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
4303 		      int size)
4304 {
4305   lambda_vector_mult_const (vec1, vec2, size, -1);
4306 }
4307 
4308 /* Negate row R1 of matrix MAT which has N columns.  */
4309 
4310 static void
lambda_matrix_row_negate(lambda_matrix mat,int n,int r1)4311 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
4312 {
4313   lambda_vector_negate (mat[r1], mat[r1], n);
4314 }
4315 
4316 /* Return true if two vectors are equal.  */
4317 
4318 static bool
lambda_vector_equal(lambda_vector vec1,lambda_vector vec2,int size)4319 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
4320 {
4321   int i;
4322   for (i = 0; i < size; i++)
4323     if (vec1[i] != vec2[i])
4324       return false;
4325   return true;
4326 }
4327 
4328 /* Given an M x N integer matrix A, this function determines an M x
4329    M unimodular matrix U, and an M x N echelon matrix S such that
4330    "U.A = S".  This decomposition is also known as "right Hermite".
4331 
4332    Ref: Algorithm 2.1 page 33 in "Loop Transformations for
4333    Restructuring Compilers" Utpal Banerjee.  */
4334 
4335 static bool
lambda_matrix_right_hermite(lambda_matrix A,int m,int n,lambda_matrix S,lambda_matrix U)4336 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
4337 			     lambda_matrix S, lambda_matrix U)
4338 {
4339   int i, j, i0 = 0;
4340 
4341   lambda_matrix_copy (A, S, m, n);
4342   lambda_matrix_id (U, m);
4343 
4344   for (j = 0; j < n; j++)
4345     {
4346       if (lambda_vector_first_nz (S[j], m, i0) < m)
4347 	{
4348 	  ++i0;
4349 	  for (i = m - 1; i >= i0; i--)
4350 	    {
4351 	      while (S[i][j] != 0)
4352 		{
4353 		  lambda_int factor, a, b;
4354 
4355 		  a = S[i-1][j];
4356 		  b = S[i][j];
4357 		  gcc_assert (a != HOST_WIDE_INT_MIN);
4358 		  factor = a / b;
4359 
4360 		  if (!lambda_matrix_row_add (S, n, i, i-1, -factor))
4361 		    return false;
4362 		  std::swap (S[i], S[i-1]);
4363 
4364 		  if (!lambda_matrix_row_add (U, m, i, i-1, -factor))
4365 		    return false;
4366 		  std::swap (U[i], U[i-1]);
4367 		}
4368 	    }
4369 	}
4370     }
4371 
4372   return true;
4373 }
4374 
4375 /* Determines the overlapping elements due to accesses CHREC_A and
4376    CHREC_B, that are affine functions.  This function cannot handle
4377    symbolic evolution functions, ie. when initial conditions are
4378    parameters, because it uses lambda matrices of integers.  */
4379 
4380 static void
analyze_subscript_affine_affine(tree chrec_a,tree chrec_b,conflict_function ** overlaps_a,conflict_function ** overlaps_b,tree * last_conflicts)4381 analyze_subscript_affine_affine (tree chrec_a,
4382 				 tree chrec_b,
4383 				 conflict_function **overlaps_a,
4384 				 conflict_function **overlaps_b,
4385 				 tree *last_conflicts)
4386 {
4387   unsigned nb_vars_a, nb_vars_b, dim;
4388   lambda_int gamma, gcd_alpha_beta;
4389   lambda_matrix A, U, S;
4390   struct obstack scratch_obstack;
4391 
4392   if (eq_evolutions_p (chrec_a, chrec_b))
4393     {
4394       /* The accessed index overlaps for each iteration in the
4395 	 loop.  */
4396       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4397       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4398       *last_conflicts = chrec_dont_know;
4399       return;
4400     }
4401   if (dump_file && (dump_flags & TDF_DETAILS))
4402     fprintf (dump_file, "(analyze_subscript_affine_affine \n");
4403 
4404   /* For determining the initial intersection, we have to solve a
4405      Diophantine equation.  This is the most time consuming part.
4406 
4407      For answering to the question: "Is there a dependence?" we have
4408      to prove that there exists a solution to the Diophantine
4409      equation, and that the solution is in the iteration domain,
4410      i.e. the solution is positive or zero, and that the solution
4411      happens before the upper bound loop.nb_iterations.  Otherwise
4412      there is no dependence.  This function outputs a description of
4413      the iterations that hold the intersections.  */
4414 
4415   nb_vars_a = nb_vars_in_chrec (chrec_a);
4416   nb_vars_b = nb_vars_in_chrec (chrec_b);
4417 
4418   gcc_obstack_init (&scratch_obstack);
4419 
4420   dim = nb_vars_a + nb_vars_b;
4421   U = lambda_matrix_new (dim, dim, &scratch_obstack);
4422   A = lambda_matrix_new (dim, 1, &scratch_obstack);
4423   S = lambda_matrix_new (dim, 1, &scratch_obstack);
4424 
4425   tree init_a = initialize_matrix_A (A, chrec_a, 0, 1);
4426   tree init_b = initialize_matrix_A (A, chrec_b, nb_vars_a, -1);
4427   if (init_a == chrec_dont_know
4428       || init_b == chrec_dont_know)
4429     {
4430       if (dump_file && (dump_flags & TDF_DETAILS))
4431 	fprintf (dump_file, "affine-affine test failed: "
4432 		 "representation issue.\n");
4433       *overlaps_a = conflict_fn_not_known ();
4434       *overlaps_b = conflict_fn_not_known ();
4435       *last_conflicts = chrec_dont_know;
4436       goto end_analyze_subs_aa;
4437     }
4438   gamma = int_cst_value (init_b) - int_cst_value (init_a);
4439 
4440   /* Don't do all the hard work of solving the Diophantine equation
4441      when we already know the solution: for example,
4442      | {3, +, 1}_1
4443      | {3, +, 4}_2
4444      | gamma = 3 - 3 = 0.
4445      Then the first overlap occurs during the first iterations:
4446      | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
4447   */
4448   if (gamma == 0)
4449     {
4450       if (nb_vars_a == 1 && nb_vars_b == 1)
4451 	{
4452 	  HOST_WIDE_INT step_a, step_b;
4453 	  HOST_WIDE_INT niter, niter_a, niter_b;
4454 	  affine_fn ova, ovb;
4455 
4456 	  niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
4457 	  niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
4458 	  niter = MIN (niter_a, niter_b);
4459 	  step_a = int_cst_value (CHREC_RIGHT (chrec_a));
4460 	  step_b = int_cst_value (CHREC_RIGHT (chrec_b));
4461 
4462 	  compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
4463 						   &ova, &ovb,
4464 						   last_conflicts, 1);
4465 	  *overlaps_a = conflict_fn (1, ova);
4466 	  *overlaps_b = conflict_fn (1, ovb);
4467 	}
4468 
4469       else if (nb_vars_a == 2 && nb_vars_b == 1)
4470 	compute_overlap_steps_for_affine_1_2
4471 	  (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
4472 
4473       else if (nb_vars_a == 1 && nb_vars_b == 2)
4474 	compute_overlap_steps_for_affine_1_2
4475 	  (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
4476 
4477       else
4478 	{
4479 	  if (dump_file && (dump_flags & TDF_DETAILS))
4480 	    fprintf (dump_file, "affine-affine test failed: too many variables.\n");
4481 	  *overlaps_a = conflict_fn_not_known ();
4482 	  *overlaps_b = conflict_fn_not_known ();
4483 	  *last_conflicts = chrec_dont_know;
4484 	}
4485       goto end_analyze_subs_aa;
4486     }
4487 
4488   /* U.A = S */
4489   if (!lambda_matrix_right_hermite (A, dim, 1, S, U))
4490     {
4491       *overlaps_a = conflict_fn_not_known ();
4492       *overlaps_b = conflict_fn_not_known ();
4493       *last_conflicts = chrec_dont_know;
4494       goto end_analyze_subs_aa;
4495     }
4496 
4497   if (S[0][0] < 0)
4498     {
4499       S[0][0] *= -1;
4500       lambda_matrix_row_negate (U, dim, 0);
4501     }
4502   gcd_alpha_beta = S[0][0];
4503 
4504   /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
4505      but that is a quite strange case.  Instead of ICEing, answer
4506      don't know.  */
4507   if (gcd_alpha_beta == 0)
4508     {
4509       *overlaps_a = conflict_fn_not_known ();
4510       *overlaps_b = conflict_fn_not_known ();
4511       *last_conflicts = chrec_dont_know;
4512       goto end_analyze_subs_aa;
4513     }
4514 
4515   /* The classic "gcd-test".  */
4516   if (!int_divides_p (gcd_alpha_beta, gamma))
4517     {
4518       /* The "gcd-test" has determined that there is no integer
4519 	 solution, i.e. there is no dependence.  */
4520       *overlaps_a = conflict_fn_no_dependence ();
4521       *overlaps_b = conflict_fn_no_dependence ();
4522       *last_conflicts = integer_zero_node;
4523     }
4524 
4525   /* Both access functions are univariate.  This includes SIV and MIV cases.  */
4526   else if (nb_vars_a == 1 && nb_vars_b == 1)
4527     {
4528       /* Both functions should have the same evolution sign.  */
4529       if (((A[0][0] > 0 && -A[1][0] > 0)
4530 	   || (A[0][0] < 0 && -A[1][0] < 0)))
4531 	{
4532 	  /* The solutions are given by:
4533 	     |
4534 	     | [GAMMA/GCD_ALPHA_BETA  t].[u11 u12]  = [x0]
4535 	     |                           [u21 u22]    [y0]
4536 
4537 	     For a given integer t.  Using the following variables,
4538 
4539 	     | i0 = u11 * gamma / gcd_alpha_beta
4540 	     | j0 = u12 * gamma / gcd_alpha_beta
4541 	     | i1 = u21
4542 	     | j1 = u22
4543 
4544 	     the solutions are:
4545 
4546 	     | x0 = i0 + i1 * t,
4547 	     | y0 = j0 + j1 * t.  */
4548       	  HOST_WIDE_INT i0, j0, i1, j1;
4549 
4550 	  i0 = U[0][0] * gamma / gcd_alpha_beta;
4551 	  j0 = U[0][1] * gamma / gcd_alpha_beta;
4552 	  i1 = U[1][0];
4553 	  j1 = U[1][1];
4554 
4555 	  if ((i1 == 0 && i0 < 0)
4556 	      || (j1 == 0 && j0 < 0))
4557 	    {
4558 	      /* There is no solution.
4559 		 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
4560 		 falls in here, but for the moment we don't look at the
4561 		 upper bound of the iteration domain.  */
4562 	      *overlaps_a = conflict_fn_no_dependence ();
4563 	      *overlaps_b = conflict_fn_no_dependence ();
4564 	      *last_conflicts = integer_zero_node;
4565 	      goto end_analyze_subs_aa;
4566 	    }
4567 
4568 	  if (i1 > 0 && j1 > 0)
4569 	    {
4570 	      HOST_WIDE_INT niter_a
4571 		= max_stmt_executions_int (get_chrec_loop (chrec_a));
4572 	      HOST_WIDE_INT niter_b
4573 		= max_stmt_executions_int (get_chrec_loop (chrec_b));
4574 	      HOST_WIDE_INT niter = MIN (niter_a, niter_b);
4575 
4576 	      /* (X0, Y0) is a solution of the Diophantine equation:
4577 		 "chrec_a (X0) = chrec_b (Y0)".  */
4578 	      HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
4579 					CEIL (-j0, j1));
4580 	      HOST_WIDE_INT x0 = i1 * tau1 + i0;
4581 	      HOST_WIDE_INT y0 = j1 * tau1 + j0;
4582 
4583 	      /* (X1, Y1) is the smallest positive solution of the eq
4584 		 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
4585 		 first conflict occurs.  */
4586 	      HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
4587 	      HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
4588 	      HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
4589 
4590 	      if (niter > 0)
4591 		{
4592 		  /* If the overlap occurs outside of the bounds of the
4593 		     loop, there is no dependence.  */
4594 		  if (x1 >= niter_a || y1 >= niter_b)
4595 		    {
4596 		      *overlaps_a = conflict_fn_no_dependence ();
4597 		      *overlaps_b = conflict_fn_no_dependence ();
4598 		      *last_conflicts = integer_zero_node;
4599 		      goto end_analyze_subs_aa;
4600 		    }
4601 
4602 		  /* max stmt executions can get quite large, avoid
4603 		     overflows by using wide ints here.  */
4604 		  widest_int tau2
4605 		    = wi::smin (wi::sdiv_floor (wi::sub (niter_a, i0), i1),
4606 				wi::sdiv_floor (wi::sub (niter_b, j0), j1));
4607 		  widest_int last_conflict = wi::sub (tau2, (x1 - i0)/i1);
4608 		  if (wi::min_precision (last_conflict, SIGNED)
4609 		      <= TYPE_PRECISION (integer_type_node))
4610 		    *last_conflicts
4611 		       = build_int_cst (integer_type_node,
4612 					last_conflict.to_shwi ());
4613 		  else
4614 		    *last_conflicts = chrec_dont_know;
4615 		}
4616 	      else
4617 		*last_conflicts = chrec_dont_know;
4618 
4619 	      *overlaps_a
4620 		= conflict_fn (1,
4621 			       affine_fn_univar (build_int_cst (NULL_TREE, x1),
4622 						 1,
4623 						 build_int_cst (NULL_TREE, i1)));
4624 	      *overlaps_b
4625 		= conflict_fn (1,
4626 			       affine_fn_univar (build_int_cst (NULL_TREE, y1),
4627 						 1,
4628 						 build_int_cst (NULL_TREE, j1)));
4629 	    }
4630 	  else
4631 	    {
4632 	      /* FIXME: For the moment, the upper bound of the
4633 		 iteration domain for i and j is not checked.  */
4634 	      if (dump_file && (dump_flags & TDF_DETAILS))
4635 		fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4636 	      *overlaps_a = conflict_fn_not_known ();
4637 	      *overlaps_b = conflict_fn_not_known ();
4638 	      *last_conflicts = chrec_dont_know;
4639 	    }
4640 	}
4641       else
4642 	{
4643 	  if (dump_file && (dump_flags & TDF_DETAILS))
4644 	    fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4645 	  *overlaps_a = conflict_fn_not_known ();
4646 	  *overlaps_b = conflict_fn_not_known ();
4647 	  *last_conflicts = chrec_dont_know;
4648 	}
4649     }
4650   else
4651     {
4652       if (dump_file && (dump_flags & TDF_DETAILS))
4653 	fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4654       *overlaps_a = conflict_fn_not_known ();
4655       *overlaps_b = conflict_fn_not_known ();
4656       *last_conflicts = chrec_dont_know;
4657     }
4658 
4659 end_analyze_subs_aa:
4660   obstack_free (&scratch_obstack, NULL);
4661   if (dump_file && (dump_flags & TDF_DETAILS))
4662     {
4663       fprintf (dump_file, "  (overlaps_a = ");
4664       dump_conflict_function (dump_file, *overlaps_a);
4665       fprintf (dump_file, ")\n  (overlaps_b = ");
4666       dump_conflict_function (dump_file, *overlaps_b);
4667       fprintf (dump_file, "))\n");
4668     }
4669 }
4670 
4671 /* Returns true when analyze_subscript_affine_affine can be used for
4672    determining the dependence relation between chrec_a and chrec_b,
4673    that contain symbols.  This function modifies chrec_a and chrec_b
4674    such that the analysis result is the same, and such that they don't
4675    contain symbols, and then can safely be passed to the analyzer.
4676 
4677    Example: The analysis of the following tuples of evolutions produce
4678    the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
4679    vs. {0, +, 1}_1
4680 
4681    {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
4682    {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
4683 */
4684 
4685 static bool
can_use_analyze_subscript_affine_affine(tree * chrec_a,tree * chrec_b)4686 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
4687 {
4688   tree diff, type, left_a, left_b, right_b;
4689 
4690   if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
4691       || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
4692     /* FIXME: For the moment not handled.  Might be refined later.  */
4693     return false;
4694 
4695   type = chrec_type (*chrec_a);
4696   left_a = CHREC_LEFT (*chrec_a);
4697   left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
4698   diff = chrec_fold_minus (type, left_a, left_b);
4699 
4700   if (!evolution_function_is_constant_p (diff))
4701     return false;
4702 
4703   if (dump_file && (dump_flags & TDF_DETAILS))
4704     fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
4705 
4706   *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
4707 				     diff, CHREC_RIGHT (*chrec_a));
4708   right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
4709   *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
4710 				     build_int_cst (type, 0),
4711 				     right_b);
4712   return true;
4713 }
4714 
4715 /* Analyze a SIV (Single Index Variable) subscript.  *OVERLAPS_A and
4716    *OVERLAPS_B are initialized to the functions that describe the
4717    relation between the elements accessed twice by CHREC_A and
4718    CHREC_B.  For k >= 0, the following property is verified:
4719 
4720    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
4721 
4722 static void
analyze_siv_subscript(tree chrec_a,tree chrec_b,conflict_function ** overlaps_a,conflict_function ** overlaps_b,tree * last_conflicts,int loop_nest_num)4723 analyze_siv_subscript (tree chrec_a,
4724 		       tree chrec_b,
4725 		       conflict_function **overlaps_a,
4726 		       conflict_function **overlaps_b,
4727 		       tree *last_conflicts,
4728 		       int loop_nest_num)
4729 {
4730   dependence_stats.num_siv++;
4731 
4732   if (dump_file && (dump_flags & TDF_DETAILS))
4733     fprintf (dump_file, "(analyze_siv_subscript \n");
4734 
4735   if (evolution_function_is_constant_p (chrec_a)
4736       && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
4737     analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
4738 				      overlaps_a, overlaps_b, last_conflicts);
4739 
4740   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
4741 	   && evolution_function_is_constant_p (chrec_b))
4742     analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
4743 				      overlaps_b, overlaps_a, last_conflicts);
4744 
4745   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
4746 	   && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
4747     {
4748       if (!chrec_contains_symbols (chrec_a)
4749 	  && !chrec_contains_symbols (chrec_b))
4750 	{
4751 	  analyze_subscript_affine_affine (chrec_a, chrec_b,
4752 					   overlaps_a, overlaps_b,
4753 					   last_conflicts);
4754 
4755 	  if (CF_NOT_KNOWN_P (*overlaps_a)
4756 	      || CF_NOT_KNOWN_P (*overlaps_b))
4757 	    dependence_stats.num_siv_unimplemented++;
4758 	  else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4759 		   || CF_NO_DEPENDENCE_P (*overlaps_b))
4760 	    dependence_stats.num_siv_independent++;
4761 	  else
4762 	    dependence_stats.num_siv_dependent++;
4763 	}
4764       else if (can_use_analyze_subscript_affine_affine (&chrec_a,
4765 							&chrec_b))
4766 	{
4767 	  analyze_subscript_affine_affine (chrec_a, chrec_b,
4768 					   overlaps_a, overlaps_b,
4769 					   last_conflicts);
4770 
4771 	  if (CF_NOT_KNOWN_P (*overlaps_a)
4772 	      || CF_NOT_KNOWN_P (*overlaps_b))
4773 	    dependence_stats.num_siv_unimplemented++;
4774 	  else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4775 		   || CF_NO_DEPENDENCE_P (*overlaps_b))
4776 	    dependence_stats.num_siv_independent++;
4777 	  else
4778 	    dependence_stats.num_siv_dependent++;
4779 	}
4780       else
4781 	goto siv_subscript_dontknow;
4782     }
4783 
4784   else
4785     {
4786     siv_subscript_dontknow:;
4787       if (dump_file && (dump_flags & TDF_DETAILS))
4788 	fprintf (dump_file, "  siv test failed: unimplemented");
4789       *overlaps_a = conflict_fn_not_known ();
4790       *overlaps_b = conflict_fn_not_known ();
4791       *last_conflicts = chrec_dont_know;
4792       dependence_stats.num_siv_unimplemented++;
4793     }
4794 
4795   if (dump_file && (dump_flags & TDF_DETAILS))
4796     fprintf (dump_file, ")\n");
4797 }
4798 
4799 /* Returns false if we can prove that the greatest common divisor of the steps
4800    of CHREC does not divide CST, false otherwise.  */
4801 
4802 static bool
gcd_of_steps_may_divide_p(const_tree chrec,const_tree cst)4803 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
4804 {
4805   HOST_WIDE_INT cd = 0, val;
4806   tree step;
4807 
4808   if (!tree_fits_shwi_p (cst))
4809     return true;
4810   val = tree_to_shwi (cst);
4811 
4812   while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
4813     {
4814       step = CHREC_RIGHT (chrec);
4815       if (!tree_fits_shwi_p (step))
4816 	return true;
4817       cd = gcd (cd, tree_to_shwi (step));
4818       chrec = CHREC_LEFT (chrec);
4819     }
4820 
4821   return val % cd == 0;
4822 }
4823 
4824 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
4825    LOOP_NEST.  *OVERLAPS_A and *OVERLAPS_B are initialized to the
4826    functions that describe the relation between the elements accessed
4827    twice by CHREC_A and CHREC_B.  For k >= 0, the following property
4828    is verified:
4829 
4830    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
4831 
4832 static void
analyze_miv_subscript(tree chrec_a,tree chrec_b,conflict_function ** overlaps_a,conflict_function ** overlaps_b,tree * last_conflicts,class loop * loop_nest)4833 analyze_miv_subscript (tree chrec_a,
4834 		       tree chrec_b,
4835 		       conflict_function **overlaps_a,
4836 		       conflict_function **overlaps_b,
4837 		       tree *last_conflicts,
4838 		       class loop *loop_nest)
4839 {
4840   tree type, difference;
4841 
4842   dependence_stats.num_miv++;
4843   if (dump_file && (dump_flags & TDF_DETAILS))
4844     fprintf (dump_file, "(analyze_miv_subscript \n");
4845 
4846   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
4847   chrec_a = chrec_convert (type, chrec_a, NULL);
4848   chrec_b = chrec_convert (type, chrec_b, NULL);
4849   difference = chrec_fold_minus (type, chrec_a, chrec_b);
4850 
4851   if (eq_evolutions_p (chrec_a, chrec_b))
4852     {
4853       /* Access functions are the same: all the elements are accessed
4854 	 in the same order.  */
4855       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4856       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4857       *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
4858       dependence_stats.num_miv_dependent++;
4859     }
4860 
4861   else if (evolution_function_is_constant_p (difference)
4862 	   && evolution_function_is_affine_multivariate_p (chrec_a,
4863 							   loop_nest->num)
4864 	   && !gcd_of_steps_may_divide_p (chrec_a, difference))
4865     {
4866       /* testsuite/.../ssa-chrec-33.c
4867 	 {{21, +, 2}_1, +, -2}_2  vs.  {{20, +, 2}_1, +, -2}_2
4868 
4869 	 The difference is 1, and all the evolution steps are multiples
4870 	 of 2, consequently there are no overlapping elements.  */
4871       *overlaps_a = conflict_fn_no_dependence ();
4872       *overlaps_b = conflict_fn_no_dependence ();
4873       *last_conflicts = integer_zero_node;
4874       dependence_stats.num_miv_independent++;
4875     }
4876 
4877   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest->num)
4878 	   && !chrec_contains_symbols (chrec_a, loop_nest)
4879 	   && evolution_function_is_affine_in_loop (chrec_b, loop_nest->num)
4880 	   && !chrec_contains_symbols (chrec_b, loop_nest))
4881     {
4882       /* testsuite/.../ssa-chrec-35.c
4883 	 {0, +, 1}_2  vs.  {0, +, 1}_3
4884 	 the overlapping elements are respectively located at iterations:
4885 	 {0, +, 1}_x and {0, +, 1}_x,
4886 	 in other words, we have the equality:
4887 	 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
4888 
4889 	 Other examples:
4890 	 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
4891 	 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
4892 
4893 	 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
4894 	 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
4895       */
4896       analyze_subscript_affine_affine (chrec_a, chrec_b,
4897 				       overlaps_a, overlaps_b, last_conflicts);
4898 
4899       if (CF_NOT_KNOWN_P (*overlaps_a)
4900  	  || CF_NOT_KNOWN_P (*overlaps_b))
4901 	dependence_stats.num_miv_unimplemented++;
4902       else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4903 	       || CF_NO_DEPENDENCE_P (*overlaps_b))
4904 	dependence_stats.num_miv_independent++;
4905       else
4906 	dependence_stats.num_miv_dependent++;
4907     }
4908 
4909   else
4910     {
4911       /* When the analysis is too difficult, answer "don't know".  */
4912       if (dump_file && (dump_flags & TDF_DETAILS))
4913 	fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
4914 
4915       *overlaps_a = conflict_fn_not_known ();
4916       *overlaps_b = conflict_fn_not_known ();
4917       *last_conflicts = chrec_dont_know;
4918       dependence_stats.num_miv_unimplemented++;
4919     }
4920 
4921   if (dump_file && (dump_flags & TDF_DETAILS))
4922     fprintf (dump_file, ")\n");
4923 }
4924 
4925 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
4926    with respect to LOOP_NEST.  OVERLAP_ITERATIONS_A and
4927    OVERLAP_ITERATIONS_B are initialized with two functions that
4928    describe the iterations that contain conflicting elements.
4929 
4930    Remark: For an integer k >= 0, the following equality is true:
4931 
4932    CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
4933 */
4934 
4935 static void
analyze_overlapping_iterations(tree chrec_a,tree chrec_b,conflict_function ** overlap_iterations_a,conflict_function ** overlap_iterations_b,tree * last_conflicts,class loop * loop_nest)4936 analyze_overlapping_iterations (tree chrec_a,
4937 				tree chrec_b,
4938 				conflict_function **overlap_iterations_a,
4939 				conflict_function **overlap_iterations_b,
4940 				tree *last_conflicts, class loop *loop_nest)
4941 {
4942   unsigned int lnn = loop_nest->num;
4943 
4944   dependence_stats.num_subscript_tests++;
4945 
4946   if (dump_file && (dump_flags & TDF_DETAILS))
4947     {
4948       fprintf (dump_file, "(analyze_overlapping_iterations \n");
4949       fprintf (dump_file, "  (chrec_a = ");
4950       print_generic_expr (dump_file, chrec_a);
4951       fprintf (dump_file, ")\n  (chrec_b = ");
4952       print_generic_expr (dump_file, chrec_b);
4953       fprintf (dump_file, ")\n");
4954     }
4955 
4956   if (chrec_a == NULL_TREE
4957       || chrec_b == NULL_TREE
4958       || chrec_contains_undetermined (chrec_a)
4959       || chrec_contains_undetermined (chrec_b))
4960     {
4961       dependence_stats.num_subscript_undetermined++;
4962 
4963       *overlap_iterations_a = conflict_fn_not_known ();
4964       *overlap_iterations_b = conflict_fn_not_known ();
4965     }
4966 
4967   /* If they are the same chrec, and are affine, they overlap
4968      on every iteration.  */
4969   else if (eq_evolutions_p (chrec_a, chrec_b)
4970 	   && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4971 	       || operand_equal_p (chrec_a, chrec_b, 0)))
4972     {
4973       dependence_stats.num_same_subscript_function++;
4974       *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4975       *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4976       *last_conflicts = chrec_dont_know;
4977     }
4978 
4979   /* If they aren't the same, and aren't affine, we can't do anything
4980      yet.  */
4981   else if ((chrec_contains_symbols (chrec_a)
4982 	    || chrec_contains_symbols (chrec_b))
4983 	   && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4984 	       || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
4985     {
4986       dependence_stats.num_subscript_undetermined++;
4987       *overlap_iterations_a = conflict_fn_not_known ();
4988       *overlap_iterations_b = conflict_fn_not_known ();
4989     }
4990 
4991   else if (ziv_subscript_p (chrec_a, chrec_b))
4992     analyze_ziv_subscript (chrec_a, chrec_b,
4993 			   overlap_iterations_a, overlap_iterations_b,
4994 			   last_conflicts);
4995 
4996   else if (siv_subscript_p (chrec_a, chrec_b))
4997     analyze_siv_subscript (chrec_a, chrec_b,
4998 			   overlap_iterations_a, overlap_iterations_b,
4999 			   last_conflicts, lnn);
5000 
5001   else
5002     analyze_miv_subscript (chrec_a, chrec_b,
5003 			   overlap_iterations_a, overlap_iterations_b,
5004 			   last_conflicts, loop_nest);
5005 
5006   if (dump_file && (dump_flags & TDF_DETAILS))
5007     {
5008       fprintf (dump_file, "  (overlap_iterations_a = ");
5009       dump_conflict_function (dump_file, *overlap_iterations_a);
5010       fprintf (dump_file, ")\n  (overlap_iterations_b = ");
5011       dump_conflict_function (dump_file, *overlap_iterations_b);
5012       fprintf (dump_file, "))\n");
5013     }
5014 }
5015 
5016 /* Helper function for uniquely inserting distance vectors.  */
5017 
5018 static void
save_dist_v(struct data_dependence_relation * ddr,lambda_vector dist_v)5019 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
5020 {
5021   for (lambda_vector v : DDR_DIST_VECTS (ddr))
5022     if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
5023       return;
5024 
5025   DDR_DIST_VECTS (ddr).safe_push (dist_v);
5026 }
5027 
5028 /* Helper function for uniquely inserting direction vectors.  */
5029 
5030 static void
save_dir_v(struct data_dependence_relation * ddr,lambda_vector dir_v)5031 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
5032 {
5033   for (lambda_vector v : DDR_DIR_VECTS (ddr))
5034     if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
5035       return;
5036 
5037   DDR_DIR_VECTS (ddr).safe_push (dir_v);
5038 }
5039 
5040 /* Add a distance of 1 on all the loops outer than INDEX.  If we
5041    haven't yet determined a distance for this outer loop, push a new
5042    distance vector composed of the previous distance, and a distance
5043    of 1 for this outer loop.  Example:
5044 
5045    | loop_1
5046    |   loop_2
5047    |     A[10]
5048    |   endloop_2
5049    | endloop_1
5050 
5051    Saved vectors are of the form (dist_in_1, dist_in_2).  First, we
5052    save (0, 1), then we have to save (1, 0).  */
5053 
5054 static void
add_outer_distances(struct data_dependence_relation * ddr,lambda_vector dist_v,int index)5055 add_outer_distances (struct data_dependence_relation *ddr,
5056 		     lambda_vector dist_v, int index)
5057 {
5058   /* For each outer loop where init_v is not set, the accesses are
5059      in dependence of distance 1 in the loop.  */
5060   while (--index >= 0)
5061     {
5062       lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5063       lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
5064       save_v[index] = 1;
5065       save_dist_v (ddr, save_v);
5066     }
5067 }
5068 
5069 /* Return false when fail to represent the data dependence as a
5070    distance vector.  A_INDEX is the index of the first reference
5071    (0 for DDR_A, 1 for DDR_B) and B_INDEX is the index of the
5072    second reference.  INIT_B is set to true when a component has been
5073    added to the distance vector DIST_V.  INDEX_CARRY is then set to
5074    the index in DIST_V that carries the dependence.  */
5075 
5076 static bool
build_classic_dist_vector_1(struct data_dependence_relation * ddr,unsigned int a_index,unsigned int b_index,lambda_vector dist_v,bool * init_b,int * index_carry)5077 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
5078 			     unsigned int a_index, unsigned int b_index,
5079 			     lambda_vector dist_v, bool *init_b,
5080 			     int *index_carry)
5081 {
5082   unsigned i;
5083   lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5084   class loop *loop = DDR_LOOP_NEST (ddr)[0];
5085 
5086   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
5087     {
5088       tree access_fn_a, access_fn_b;
5089       struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
5090 
5091       if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
5092 	{
5093 	  non_affine_dependence_relation (ddr);
5094 	  return false;
5095 	}
5096 
5097       access_fn_a = SUB_ACCESS_FN (subscript, a_index);
5098       access_fn_b = SUB_ACCESS_FN (subscript, b_index);
5099 
5100       if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
5101 	  && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
5102 	{
5103 	  HOST_WIDE_INT dist;
5104 	  int index;
5105 	  int var_a = CHREC_VARIABLE (access_fn_a);
5106 	  int var_b = CHREC_VARIABLE (access_fn_b);
5107 
5108 	  if (var_a != var_b
5109 	      || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
5110 	    {
5111 	      non_affine_dependence_relation (ddr);
5112 	      return false;
5113 	    }
5114 
5115 	  /* When data references are collected in a loop while data
5116 	     dependences are analyzed in loop nest nested in the loop, we
5117 	     would have more number of access functions than number of
5118 	     loops.  Skip access functions of loops not in the loop nest.
5119 
5120 	     See PR89725 for more information.  */
5121 	  if (flow_loop_nested_p (get_loop (cfun, var_a), loop))
5122 	    continue;
5123 
5124 	  dist = int_cst_value (SUB_DISTANCE (subscript));
5125 	  index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
5126 	  *index_carry = MIN (index, *index_carry);
5127 
5128 	  /* This is the subscript coupling test.  If we have already
5129 	     recorded a distance for this loop (a distance coming from
5130 	     another subscript), it should be the same.  For example,
5131 	     in the following code, there is no dependence:
5132 
5133 	     | loop i = 0, N, 1
5134 	     |   T[i+1][i] = ...
5135 	     |   ... = T[i][i]
5136 	     | endloop
5137 	  */
5138 	  if (init_v[index] != 0 && dist_v[index] != dist)
5139 	    {
5140 	      finalize_ddr_dependent (ddr, chrec_known);
5141 	      return false;
5142 	    }
5143 
5144 	  dist_v[index] = dist;
5145 	  init_v[index] = 1;
5146 	  *init_b = true;
5147 	}
5148       else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
5149 	{
5150 	  /* This can be for example an affine vs. constant dependence
5151 	     (T[i] vs. T[3]) that is not an affine dependence and is
5152 	     not representable as a distance vector.  */
5153 	  non_affine_dependence_relation (ddr);
5154 	  return false;
5155 	}
5156       else
5157 	*init_b = true;
5158     }
5159 
5160   return true;
5161 }
5162 
5163 /* Return true when the DDR contains only invariant access functions wrto. loop
5164    number LNUM.  */
5165 
5166 static bool
invariant_access_functions(const struct data_dependence_relation * ddr,int lnum)5167 invariant_access_functions (const struct data_dependence_relation *ddr,
5168 			    int lnum)
5169 {
5170   for (subscript *sub : DDR_SUBSCRIPTS (ddr))
5171     if (!evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 0), lnum)
5172 	|| !evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 1), lnum))
5173       return false;
5174 
5175   return true;
5176 }
5177 
5178 /* Helper function for the case where DDR_A and DDR_B are the same
5179    multivariate access function with a constant step.  For an example
5180    see pr34635-1.c.  */
5181 
5182 static void
add_multivariate_self_dist(struct data_dependence_relation * ddr,tree c_2)5183 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
5184 {
5185   int x_1, x_2;
5186   tree c_1 = CHREC_LEFT (c_2);
5187   tree c_0 = CHREC_LEFT (c_1);
5188   lambda_vector dist_v;
5189   HOST_WIDE_INT v1, v2, cd;
5190 
5191   /* Polynomials with more than 2 variables are not handled yet.  When
5192      the evolution steps are parameters, it is not possible to
5193      represent the dependence using classical distance vectors.  */
5194   if (TREE_CODE (c_0) != INTEGER_CST
5195       || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
5196       || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
5197     {
5198       DDR_AFFINE_P (ddr) = false;
5199       return;
5200     }
5201 
5202   x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
5203   x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
5204 
5205   /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2).  */
5206   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5207   v1 = int_cst_value (CHREC_RIGHT (c_1));
5208   v2 = int_cst_value (CHREC_RIGHT (c_2));
5209   cd = gcd (v1, v2);
5210   v1 /= cd;
5211   v2 /= cd;
5212 
5213   if (v2 < 0)
5214     {
5215       v2 = -v2;
5216       v1 = -v1;
5217     }
5218 
5219   dist_v[x_1] = v2;
5220   dist_v[x_2] = -v1;
5221   save_dist_v (ddr, dist_v);
5222 
5223   add_outer_distances (ddr, dist_v, x_1);
5224 }
5225 
5226 /* Helper function for the case where DDR_A and DDR_B are the same
5227    access functions.  */
5228 
5229 static void
add_other_self_distances(struct data_dependence_relation * ddr)5230 add_other_self_distances (struct data_dependence_relation *ddr)
5231 {
5232   lambda_vector dist_v;
5233   unsigned i;
5234   int index_carry = DDR_NB_LOOPS (ddr);
5235   subscript *sub;
5236   class loop *loop = DDR_LOOP_NEST (ddr)[0];
5237 
5238   FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
5239     {
5240       tree access_fun = SUB_ACCESS_FN (sub, 0);
5241 
5242       if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
5243 	{
5244 	  if (!evolution_function_is_univariate_p (access_fun, loop->num))
5245 	    {
5246 	      if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
5247 		{
5248 		  DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
5249 		  return;
5250 		}
5251 
5252 	      access_fun = SUB_ACCESS_FN (DDR_SUBSCRIPT (ddr, 0), 0);
5253 
5254 	      if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
5255 		add_multivariate_self_dist (ddr, access_fun);
5256 	      else
5257 		/* The evolution step is not constant: it varies in
5258 		   the outer loop, so this cannot be represented by a
5259 		   distance vector.  For example in pr34635.c the
5260 		   evolution is {0, +, {0, +, 4}_1}_2.  */
5261 		DDR_AFFINE_P (ddr) = false;
5262 
5263 	      return;
5264 	    }
5265 
5266 	  /* When data references are collected in a loop while data
5267 	     dependences are analyzed in loop nest nested in the loop, we
5268 	     would have more number of access functions than number of
5269 	     loops.  Skip access functions of loops not in the loop nest.
5270 
5271 	     See PR89725 for more information.  */
5272 	  if (flow_loop_nested_p (get_loop (cfun, CHREC_VARIABLE (access_fun)),
5273 				  loop))
5274 	    continue;
5275 
5276 	  index_carry = MIN (index_carry,
5277 			     index_in_loop_nest (CHREC_VARIABLE (access_fun),
5278 						 DDR_LOOP_NEST (ddr)));
5279 	}
5280     }
5281 
5282   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5283   add_outer_distances (ddr, dist_v, index_carry);
5284 }
5285 
5286 static void
insert_innermost_unit_dist_vector(struct data_dependence_relation * ddr)5287 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
5288 {
5289   lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5290 
5291   dist_v[0] = 1;
5292   save_dist_v (ddr, dist_v);
5293 }
5294 
5295 /* Adds a unit distance vector to DDR when there is a 0 overlap.  This
5296    is the case for example when access functions are the same and
5297    equal to a constant, as in:
5298 
5299    | loop_1
5300    |   A[3] = ...
5301    |   ... = A[3]
5302    | endloop_1
5303 
5304    in which case the distance vectors are (0) and (1).  */
5305 
5306 static void
add_distance_for_zero_overlaps(struct data_dependence_relation * ddr)5307 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
5308 {
5309   unsigned i, j;
5310 
5311   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
5312     {
5313       subscript_p sub = DDR_SUBSCRIPT (ddr, i);
5314       conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
5315       conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
5316 
5317       for (j = 0; j < ca->n; j++)
5318 	if (affine_function_zero_p (ca->fns[j]))
5319 	  {
5320 	    insert_innermost_unit_dist_vector (ddr);
5321 	    return;
5322 	  }
5323 
5324       for (j = 0; j < cb->n; j++)
5325 	if (affine_function_zero_p (cb->fns[j]))
5326 	  {
5327 	    insert_innermost_unit_dist_vector (ddr);
5328 	    return;
5329 	  }
5330     }
5331 }
5332 
5333 /* Return true when the DDR contains two data references that have the
5334    same access functions.  */
5335 
5336 static inline bool
same_access_functions(const struct data_dependence_relation * ddr)5337 same_access_functions (const struct data_dependence_relation *ddr)
5338 {
5339   for (subscript *sub : DDR_SUBSCRIPTS (ddr))
5340     if (!eq_evolutions_p (SUB_ACCESS_FN (sub, 0),
5341 			  SUB_ACCESS_FN (sub, 1)))
5342       return false;
5343 
5344   return true;
5345 }
5346 
5347 /* Compute the classic per loop distance vector.  DDR is the data
5348    dependence relation to build a vector from.  Return false when fail
5349    to represent the data dependence as a distance vector.  */
5350 
5351 static bool
build_classic_dist_vector(struct data_dependence_relation * ddr,class loop * loop_nest)5352 build_classic_dist_vector (struct data_dependence_relation *ddr,
5353 			   class loop *loop_nest)
5354 {
5355   bool init_b = false;
5356   int index_carry = DDR_NB_LOOPS (ddr);
5357   lambda_vector dist_v;
5358 
5359   if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
5360     return false;
5361 
5362   if (same_access_functions (ddr))
5363     {
5364       /* Save the 0 vector.  */
5365       dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5366       save_dist_v (ddr, dist_v);
5367 
5368       if (invariant_access_functions (ddr, loop_nest->num))
5369 	add_distance_for_zero_overlaps (ddr);
5370 
5371       if (DDR_NB_LOOPS (ddr) > 1)
5372 	add_other_self_distances (ddr);
5373 
5374       return true;
5375     }
5376 
5377   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5378   if (!build_classic_dist_vector_1 (ddr, 0, 1, dist_v, &init_b, &index_carry))
5379     return false;
5380 
5381   /* Save the distance vector if we initialized one.  */
5382   if (init_b)
5383     {
5384       /* Verify a basic constraint: classic distance vectors should
5385 	 always be lexicographically positive.
5386 
5387 	 Data references are collected in the order of execution of
5388 	 the program, thus for the following loop
5389 
5390 	 | for (i = 1; i < 100; i++)
5391 	 |   for (j = 1; j < 100; j++)
5392 	 |     {
5393 	 |       t = T[j+1][i-1];  // A
5394 	 |       T[j][i] = t + 2;  // B
5395 	 |     }
5396 
5397 	 references are collected following the direction of the wind:
5398 	 A then B.  The data dependence tests are performed also
5399 	 following this order, such that we're looking at the distance
5400 	 separating the elements accessed by A from the elements later
5401 	 accessed by B.  But in this example, the distance returned by
5402 	 test_dep (A, B) is lexicographically negative (-1, 1), that
5403 	 means that the access A occurs later than B with respect to
5404 	 the outer loop, ie. we're actually looking upwind.  In this
5405 	 case we solve test_dep (B, A) looking downwind to the
5406 	 lexicographically positive solution, that returns the
5407 	 distance vector (1, -1).  */
5408       if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
5409 	{
5410 	  lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5411 	  if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
5412 	    return false;
5413 	  compute_subscript_distance (ddr);
5414 	  if (!build_classic_dist_vector_1 (ddr, 1, 0, save_v, &init_b,
5415 					    &index_carry))
5416 	    return false;
5417 	  save_dist_v (ddr, save_v);
5418 	  DDR_REVERSED_P (ddr) = true;
5419 
5420 	  /* In this case there is a dependence forward for all the
5421 	     outer loops:
5422 
5423 	     | for (k = 1; k < 100; k++)
5424 	     |  for (i = 1; i < 100; i++)
5425 	     |   for (j = 1; j < 100; j++)
5426 	     |     {
5427 	     |       t = T[j+1][i-1];  // A
5428 	     |       T[j][i] = t + 2;  // B
5429 	     |     }
5430 
5431 	     the vectors are:
5432 	     (0,  1, -1)
5433 	     (1,  1, -1)
5434 	     (1, -1,  1)
5435 	  */
5436 	  if (DDR_NB_LOOPS (ddr) > 1)
5437 	    {
5438  	      add_outer_distances (ddr, save_v, index_carry);
5439 	      add_outer_distances (ddr, dist_v, index_carry);
5440 	    }
5441 	}
5442       else
5443 	{
5444 	  lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5445 	  lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
5446 
5447 	  if (DDR_NB_LOOPS (ddr) > 1)
5448 	    {
5449 	      lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5450 
5451 	      if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
5452 		return false;
5453 	      compute_subscript_distance (ddr);
5454 	      if (!build_classic_dist_vector_1 (ddr, 1, 0, opposite_v, &init_b,
5455 						&index_carry))
5456 		return false;
5457 
5458 	      save_dist_v (ddr, save_v);
5459 	      add_outer_distances (ddr, dist_v, index_carry);
5460 	      add_outer_distances (ddr, opposite_v, index_carry);
5461 	    }
5462 	  else
5463 	    save_dist_v (ddr, save_v);
5464 	}
5465     }
5466   else
5467     {
5468       /* There is a distance of 1 on all the outer loops: Example:
5469 	 there is a dependence of distance 1 on loop_1 for the array A.
5470 
5471 	 | loop_1
5472 	 |   A[5] = ...
5473 	 | endloop
5474       */
5475       add_outer_distances (ddr, dist_v,
5476 			   lambda_vector_first_nz (dist_v,
5477 						   DDR_NB_LOOPS (ddr), 0));
5478     }
5479 
5480   if (dump_file && (dump_flags & TDF_DETAILS))
5481     {
5482       unsigned i;
5483 
5484       fprintf (dump_file, "(build_classic_dist_vector\n");
5485       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
5486 	{
5487 	  fprintf (dump_file, "  dist_vector = (");
5488 	  print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
5489 			       DDR_NB_LOOPS (ddr));
5490 	  fprintf (dump_file, "  )\n");
5491 	}
5492       fprintf (dump_file, ")\n");
5493     }
5494 
5495   return true;
5496 }
5497 
5498 /* Return the direction for a given distance.
5499    FIXME: Computing dir this way is suboptimal, since dir can catch
5500    cases that dist is unable to represent.  */
5501 
5502 static inline enum data_dependence_direction
dir_from_dist(int dist)5503 dir_from_dist (int dist)
5504 {
5505   if (dist > 0)
5506     return dir_positive;
5507   else if (dist < 0)
5508     return dir_negative;
5509   else
5510     return dir_equal;
5511 }
5512 
5513 /* Compute the classic per loop direction vector.  DDR is the data
5514    dependence relation to build a vector from.  */
5515 
5516 static void
build_classic_dir_vector(struct data_dependence_relation * ddr)5517 build_classic_dir_vector (struct data_dependence_relation *ddr)
5518 {
5519   unsigned i, j;
5520   lambda_vector dist_v;
5521 
5522   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
5523     {
5524       lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5525 
5526       for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
5527 	dir_v[j] = dir_from_dist (dist_v[j]);
5528 
5529       save_dir_v (ddr, dir_v);
5530     }
5531 }
5532 
5533 /* Helper function.  Returns true when there is a dependence between the
5534    data references.  A_INDEX is the index of the first reference (0 for
5535    DDR_A, 1 for DDR_B) and B_INDEX is the index of the second reference.  */
5536 
5537 static bool
subscript_dependence_tester_1(struct data_dependence_relation * ddr,unsigned int a_index,unsigned int b_index,class loop * loop_nest)5538 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
5539 			       unsigned int a_index, unsigned int b_index,
5540 			       class loop *loop_nest)
5541 {
5542   unsigned int i;
5543   tree last_conflicts;
5544   struct subscript *subscript;
5545   tree res = NULL_TREE;
5546 
5547   for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++)
5548     {
5549       conflict_function *overlaps_a, *overlaps_b;
5550 
5551       analyze_overlapping_iterations (SUB_ACCESS_FN (subscript, a_index),
5552 				      SUB_ACCESS_FN (subscript, b_index),
5553 				      &overlaps_a, &overlaps_b,
5554 				      &last_conflicts, loop_nest);
5555 
5556       if (SUB_CONFLICTS_IN_A (subscript))
5557 	free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
5558       if (SUB_CONFLICTS_IN_B (subscript))
5559 	free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
5560 
5561       SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
5562       SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
5563       SUB_LAST_CONFLICT (subscript) = last_conflicts;
5564 
5565       /* If there is any undetermined conflict function we have to
5566          give a conservative answer in case we cannot prove that
5567 	 no dependence exists when analyzing another subscript.  */
5568       if (CF_NOT_KNOWN_P (overlaps_a)
5569  	  || CF_NOT_KNOWN_P (overlaps_b))
5570  	{
5571 	  res = chrec_dont_know;
5572 	  continue;
5573  	}
5574 
5575       /* When there is a subscript with no dependence we can stop.  */
5576       else if (CF_NO_DEPENDENCE_P (overlaps_a)
5577  	       || CF_NO_DEPENDENCE_P (overlaps_b))
5578  	{
5579 	  res = chrec_known;
5580 	  break;
5581  	}
5582     }
5583 
5584   if (res == NULL_TREE)
5585     return true;
5586 
5587   if (res == chrec_known)
5588     dependence_stats.num_dependence_independent++;
5589   else
5590     dependence_stats.num_dependence_undetermined++;
5591   finalize_ddr_dependent (ddr, res);
5592   return false;
5593 }
5594 
5595 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR.  */
5596 
5597 static void
subscript_dependence_tester(struct data_dependence_relation * ddr,class loop * loop_nest)5598 subscript_dependence_tester (struct data_dependence_relation *ddr,
5599 			     class loop *loop_nest)
5600 {
5601   if (subscript_dependence_tester_1 (ddr, 0, 1, loop_nest))
5602     dependence_stats.num_dependence_dependent++;
5603 
5604   compute_subscript_distance (ddr);
5605   if (build_classic_dist_vector (ddr, loop_nest))
5606     build_classic_dir_vector (ddr);
5607 }
5608 
5609 /* Returns true when all the access functions of A are affine or
5610    constant with respect to LOOP_NEST.  */
5611 
5612 static bool
access_functions_are_affine_or_constant_p(const struct data_reference * a,const class loop * loop_nest)5613 access_functions_are_affine_or_constant_p (const struct data_reference *a,
5614 					   const class loop *loop_nest)
5615 {
5616   vec<tree> fns = DR_ACCESS_FNS (a);
5617   for (tree t : fns)
5618     if (!evolution_function_is_invariant_p (t, loop_nest->num)
5619 	&& !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
5620       return false;
5621 
5622   return true;
5623 }
5624 
5625 /* This computes the affine dependence relation between A and B with
5626    respect to LOOP_NEST.  CHREC_KNOWN is used for representing the
5627    independence between two accesses, while CHREC_DONT_KNOW is used
5628    for representing the unknown relation.
5629 
5630    Note that it is possible to stop the computation of the dependence
5631    relation the first time we detect a CHREC_KNOWN element for a given
5632    subscript.  */
5633 
5634 void
compute_affine_dependence(struct data_dependence_relation * ddr,class loop * loop_nest)5635 compute_affine_dependence (struct data_dependence_relation *ddr,
5636 			   class loop *loop_nest)
5637 {
5638   struct data_reference *dra = DDR_A (ddr);
5639   struct data_reference *drb = DDR_B (ddr);
5640 
5641   if (dump_file && (dump_flags & TDF_DETAILS))
5642     {
5643       fprintf (dump_file, "(compute_affine_dependence\n");
5644       fprintf (dump_file, "  ref_a: ");
5645       print_generic_expr (dump_file, DR_REF (dra));
5646       fprintf (dump_file, ", stmt_a: ");
5647       print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
5648       fprintf (dump_file, "  ref_b: ");
5649       print_generic_expr (dump_file, DR_REF (drb));
5650       fprintf (dump_file, ", stmt_b: ");
5651       print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
5652     }
5653 
5654   /* Analyze only when the dependence relation is not yet known.  */
5655   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
5656     {
5657       dependence_stats.num_dependence_tests++;
5658 
5659       if (access_functions_are_affine_or_constant_p (dra, loop_nest)
5660 	  && access_functions_are_affine_or_constant_p (drb, loop_nest))
5661 	subscript_dependence_tester (ddr, loop_nest);
5662 
5663       /* As a last case, if the dependence cannot be determined, or if
5664 	 the dependence is considered too difficult to determine, answer
5665 	 "don't know".  */
5666       else
5667 	{
5668 	  dependence_stats.num_dependence_undetermined++;
5669 
5670 	  if (dump_file && (dump_flags & TDF_DETAILS))
5671 	    {
5672 	      fprintf (dump_file, "Data ref a:\n");
5673 	      dump_data_reference (dump_file, dra);
5674 	      fprintf (dump_file, "Data ref b:\n");
5675 	      dump_data_reference (dump_file, drb);
5676 	      fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
5677 	    }
5678 	  finalize_ddr_dependent (ddr, chrec_dont_know);
5679 	}
5680     }
5681 
5682   if (dump_file && (dump_flags & TDF_DETAILS))
5683     {
5684       if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
5685 	fprintf (dump_file, ") -> no dependence\n");
5686       else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
5687 	fprintf (dump_file, ") -> dependence analysis failed\n");
5688       else
5689 	fprintf (dump_file, ")\n");
5690     }
5691 }
5692 
5693 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
5694    the data references in DATAREFS, in the LOOP_NEST.  When
5695    COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
5696    relations.  Return true when successful, i.e. data references number
5697    is small enough to be handled.  */
5698 
5699 bool
compute_all_dependences(const vec<data_reference_p> & datarefs,vec<ddr_p> * dependence_relations,const vec<loop_p> & loop_nest,bool compute_self_and_rr)5700 compute_all_dependences (const vec<data_reference_p> &datarefs,
5701 			 vec<ddr_p> *dependence_relations,
5702 			 const vec<loop_p> &loop_nest,
5703 			 bool compute_self_and_rr)
5704 {
5705   struct data_dependence_relation *ddr;
5706   struct data_reference *a, *b;
5707   unsigned int i, j;
5708 
5709   if ((int) datarefs.length ()
5710       > param_loop_max_datarefs_for_datadeps)
5711     {
5712       struct data_dependence_relation *ddr;
5713 
5714       /* Insert a single relation into dependence_relations:
5715 	 chrec_dont_know.  */
5716       ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
5717       dependence_relations->safe_push (ddr);
5718       return false;
5719     }
5720 
5721   FOR_EACH_VEC_ELT (datarefs, i, a)
5722     for (j = i + 1; datarefs.iterate (j, &b); j++)
5723       if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
5724 	{
5725 	  ddr = initialize_data_dependence_relation (a, b, loop_nest);
5726 	  dependence_relations->safe_push (ddr);
5727           if (loop_nest.exists ())
5728    	    compute_affine_dependence (ddr, loop_nest[0]);
5729 	}
5730 
5731   if (compute_self_and_rr)
5732     FOR_EACH_VEC_ELT (datarefs, i, a)
5733       {
5734 	ddr = initialize_data_dependence_relation (a, a, loop_nest);
5735 	dependence_relations->safe_push (ddr);
5736         if (loop_nest.exists ())
5737    	  compute_affine_dependence (ddr, loop_nest[0]);
5738       }
5739 
5740   return true;
5741 }
5742 
5743 /* Describes a location of a memory reference.  */
5744 
5745 struct data_ref_loc
5746 {
5747   /* The memory reference.  */
5748   tree ref;
5749 
5750   /* True if the memory reference is read.  */
5751   bool is_read;
5752 
5753   /* True if the data reference is conditional within the containing
5754      statement, i.e. if it might not occur even when the statement
5755      is executed and runs to completion.  */
5756   bool is_conditional_in_stmt;
5757 };
5758 
5759 
5760 /* Stores the locations of memory references in STMT to REFERENCES.  Returns
5761    true if STMT clobbers memory, false otherwise.  */
5762 
5763 static bool
get_references_in_stmt(gimple * stmt,vec<data_ref_loc,va_heap> * references)5764 get_references_in_stmt (gimple *stmt, vec<data_ref_loc, va_heap> *references)
5765 {
5766   bool clobbers_memory = false;
5767   data_ref_loc ref;
5768   tree op0, op1;
5769   enum gimple_code stmt_code = gimple_code (stmt);
5770 
5771   /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
5772      As we cannot model data-references to not spelled out
5773      accesses give up if they may occur.  */
5774   if (stmt_code == GIMPLE_CALL
5775       && !(gimple_call_flags (stmt) & ECF_CONST))
5776     {
5777       /* Allow IFN_GOMP_SIMD_LANE in their own loops.  */
5778       if (gimple_call_internal_p (stmt))
5779 	switch (gimple_call_internal_fn (stmt))
5780 	  {
5781 	  case IFN_GOMP_SIMD_LANE:
5782 	    {
5783 	      class loop *loop = gimple_bb (stmt)->loop_father;
5784 	      tree uid = gimple_call_arg (stmt, 0);
5785 	      gcc_assert (TREE_CODE (uid) == SSA_NAME);
5786 	      if (loop == NULL
5787 		  || loop->simduid != SSA_NAME_VAR (uid))
5788 		clobbers_memory = true;
5789 	      break;
5790 	    }
5791 	  case IFN_MASK_LOAD:
5792 	  case IFN_MASK_STORE:
5793 	    break;
5794 	  default:
5795 	    clobbers_memory = true;
5796 	    break;
5797 	  }
5798       else
5799 	clobbers_memory = true;
5800     }
5801   else if (stmt_code == GIMPLE_ASM
5802 	   && (gimple_asm_volatile_p (as_a <gasm *> (stmt))
5803 	       || gimple_vuse (stmt)))
5804     clobbers_memory = true;
5805 
5806   if (!gimple_vuse (stmt))
5807     return clobbers_memory;
5808 
5809   if (stmt_code == GIMPLE_ASSIGN)
5810     {
5811       tree base;
5812       op0 = gimple_assign_lhs (stmt);
5813       op1 = gimple_assign_rhs1 (stmt);
5814 
5815       if (DECL_P (op1)
5816 	  || (REFERENCE_CLASS_P (op1)
5817 	      && (base = get_base_address (op1))
5818 	      && TREE_CODE (base) != SSA_NAME
5819 	      && !is_gimple_min_invariant (base)))
5820 	{
5821 	  ref.ref = op1;
5822 	  ref.is_read = true;
5823 	  ref.is_conditional_in_stmt = false;
5824 	  references->safe_push (ref);
5825 	}
5826     }
5827   else if (stmt_code == GIMPLE_CALL)
5828     {
5829       unsigned i, n;
5830       tree ptr, type;
5831       unsigned int align;
5832 
5833       ref.is_read = false;
5834       if (gimple_call_internal_p (stmt))
5835 	switch (gimple_call_internal_fn (stmt))
5836 	  {
5837 	  case IFN_MASK_LOAD:
5838 	    if (gimple_call_lhs (stmt) == NULL_TREE)
5839 	      break;
5840 	    ref.is_read = true;
5841 	    /* FALLTHRU */
5842 	  case IFN_MASK_STORE:
5843 	    ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)), 0);
5844 	    align = tree_to_shwi (gimple_call_arg (stmt, 1));
5845 	    if (ref.is_read)
5846 	      type = TREE_TYPE (gimple_call_lhs (stmt));
5847 	    else
5848 	      type = TREE_TYPE (gimple_call_arg (stmt, 3));
5849 	    if (TYPE_ALIGN (type) != align)
5850 	      type = build_aligned_type (type, align);
5851 	    ref.is_conditional_in_stmt = true;
5852 	    ref.ref = fold_build2 (MEM_REF, type, gimple_call_arg (stmt, 0),
5853 				   ptr);
5854 	    references->safe_push (ref);
5855 	    return false;
5856 	  default:
5857 	    break;
5858 	  }
5859 
5860       op0 = gimple_call_lhs (stmt);
5861       n = gimple_call_num_args (stmt);
5862       for (i = 0; i < n; i++)
5863 	{
5864 	  op1 = gimple_call_arg (stmt, i);
5865 
5866 	  if (DECL_P (op1)
5867 	      || (REFERENCE_CLASS_P (op1) && get_base_address (op1)))
5868 	    {
5869 	      ref.ref = op1;
5870 	      ref.is_read = true;
5871 	      ref.is_conditional_in_stmt = false;
5872 	      references->safe_push (ref);
5873 	    }
5874 	}
5875     }
5876   else
5877     return clobbers_memory;
5878 
5879   if (op0
5880       && (DECL_P (op0)
5881 	  || (REFERENCE_CLASS_P (op0) && get_base_address (op0))))
5882     {
5883       ref.ref = op0;
5884       ref.is_read = false;
5885       ref.is_conditional_in_stmt = false;
5886       references->safe_push (ref);
5887     }
5888   return clobbers_memory;
5889 }
5890 
5891 
5892 /* Returns true if the loop-nest has any data reference.  */
5893 
5894 bool
loop_nest_has_data_refs(loop_p loop)5895 loop_nest_has_data_refs (loop_p loop)
5896 {
5897   basic_block *bbs = get_loop_body (loop);
5898   auto_vec<data_ref_loc, 3> references;
5899 
5900   for (unsigned i = 0; i < loop->num_nodes; i++)
5901     {
5902       basic_block bb = bbs[i];
5903       gimple_stmt_iterator bsi;
5904 
5905       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5906 	{
5907 	  gimple *stmt = gsi_stmt (bsi);
5908 	  get_references_in_stmt (stmt, &references);
5909 	  if (references.length ())
5910 	    {
5911 	      free (bbs);
5912 	      return true;
5913 	    }
5914 	}
5915     }
5916   free (bbs);
5917   return false;
5918 }
5919 
5920 /* Stores the data references in STMT to DATAREFS.  If there is an unanalyzable
5921    reference, returns false, otherwise returns true.  NEST is the outermost
5922    loop of the loop nest in which the references should be analyzed.  */
5923 
5924 opt_result
find_data_references_in_stmt(class loop * nest,gimple * stmt,vec<data_reference_p> * datarefs)5925 find_data_references_in_stmt (class loop *nest, gimple *stmt,
5926 			      vec<data_reference_p> *datarefs)
5927 {
5928   auto_vec<data_ref_loc, 2> references;
5929   data_reference_p dr;
5930 
5931   if (get_references_in_stmt (stmt, &references))
5932     return opt_result::failure_at (stmt, "statement clobbers memory: %G",
5933 				   stmt);
5934 
5935   for (const data_ref_loc &ref : references)
5936     {
5937       dr = create_data_ref (nest ? loop_preheader_edge (nest) : NULL,
5938 			    loop_containing_stmt (stmt), ref.ref,
5939 			    stmt, ref.is_read, ref.is_conditional_in_stmt);
5940       gcc_assert (dr != NULL);
5941       datarefs->safe_push (dr);
5942     }
5943 
5944   return opt_result::success ();
5945 }
5946 
5947 /* Stores the data references in STMT to DATAREFS.  If there is an
5948    unanalyzable reference, returns false, otherwise returns true.
5949    NEST is the outermost loop of the loop nest in which the references
5950    should be instantiated, LOOP is the loop in which the references
5951    should be analyzed.  */
5952 
5953 bool
graphite_find_data_references_in_stmt(edge nest,loop_p loop,gimple * stmt,vec<data_reference_p> * datarefs)5954 graphite_find_data_references_in_stmt (edge nest, loop_p loop, gimple *stmt,
5955 				       vec<data_reference_p> *datarefs)
5956 {
5957   auto_vec<data_ref_loc, 2> references;
5958   bool ret = true;
5959   data_reference_p dr;
5960 
5961   if (get_references_in_stmt (stmt, &references))
5962     return false;
5963 
5964   for (const data_ref_loc &ref : references)
5965     {
5966       dr = create_data_ref (nest, loop, ref.ref, stmt, ref.is_read,
5967 			    ref.is_conditional_in_stmt);
5968       gcc_assert (dr != NULL);
5969       datarefs->safe_push (dr);
5970     }
5971 
5972   return ret;
5973 }
5974 
5975 /* Search the data references in LOOP, and record the information into
5976    DATAREFS.  Returns chrec_dont_know when failing to analyze a
5977    difficult case, returns NULL_TREE otherwise.  */
5978 
5979 tree
find_data_references_in_bb(class loop * loop,basic_block bb,vec<data_reference_p> * datarefs)5980 find_data_references_in_bb (class loop *loop, basic_block bb,
5981                             vec<data_reference_p> *datarefs)
5982 {
5983   gimple_stmt_iterator bsi;
5984 
5985   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5986     {
5987       gimple *stmt = gsi_stmt (bsi);
5988 
5989       if (!find_data_references_in_stmt (loop, stmt, datarefs))
5990         {
5991           struct data_reference *res;
5992           res = XCNEW (struct data_reference);
5993           datarefs->safe_push (res);
5994 
5995           return chrec_dont_know;
5996         }
5997     }
5998 
5999   return NULL_TREE;
6000 }
6001 
6002 /* Search the data references in LOOP, and record the information into
6003    DATAREFS.  Returns chrec_dont_know when failing to analyze a
6004    difficult case, returns NULL_TREE otherwise.
6005 
6006    TODO: This function should be made smarter so that it can handle address
6007    arithmetic as if they were array accesses, etc.  */
6008 
6009 tree
find_data_references_in_loop(class loop * loop,vec<data_reference_p> * datarefs)6010 find_data_references_in_loop (class loop *loop,
6011 			      vec<data_reference_p> *datarefs)
6012 {
6013   basic_block bb, *bbs;
6014   unsigned int i;
6015 
6016   bbs = get_loop_body_in_dom_order (loop);
6017 
6018   for (i = 0; i < loop->num_nodes; i++)
6019     {
6020       bb = bbs[i];
6021 
6022       if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
6023         {
6024           free (bbs);
6025           return chrec_dont_know;
6026         }
6027     }
6028   free (bbs);
6029 
6030   return NULL_TREE;
6031 }
6032 
6033 /* Return the alignment in bytes that DRB is guaranteed to have at all
6034    times.  */
6035 
6036 unsigned int
dr_alignment(innermost_loop_behavior * drb)6037 dr_alignment (innermost_loop_behavior *drb)
6038 {
6039   /* Get the alignment of BASE_ADDRESS + INIT.  */
6040   unsigned int alignment = drb->base_alignment;
6041   unsigned int misalignment = (drb->base_misalignment
6042 			       + TREE_INT_CST_LOW (drb->init));
6043   if (misalignment != 0)
6044     alignment = MIN (alignment, misalignment & -misalignment);
6045 
6046   /* Cap it to the alignment of OFFSET.  */
6047   if (!integer_zerop (drb->offset))
6048     alignment = MIN (alignment, drb->offset_alignment);
6049 
6050   /* Cap it to the alignment of STEP.  */
6051   if (!integer_zerop (drb->step))
6052     alignment = MIN (alignment, drb->step_alignment);
6053 
6054   return alignment;
6055 }
6056 
6057 /* If BASE is a pointer-typed SSA name, try to find the object that it
6058    is based on.  Return this object X on success and store the alignment
6059    in bytes of BASE - &X in *ALIGNMENT_OUT.  */
6060 
6061 static tree
get_base_for_alignment_1(tree base,unsigned int * alignment_out)6062 get_base_for_alignment_1 (tree base, unsigned int *alignment_out)
6063 {
6064   if (TREE_CODE (base) != SSA_NAME || !POINTER_TYPE_P (TREE_TYPE (base)))
6065     return NULL_TREE;
6066 
6067   gimple *def = SSA_NAME_DEF_STMT (base);
6068   base = analyze_scalar_evolution (loop_containing_stmt (def), base);
6069 
6070   /* Peel chrecs and record the minimum alignment preserved by
6071      all steps.  */
6072   unsigned int alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
6073   while (TREE_CODE (base) == POLYNOMIAL_CHREC)
6074     {
6075       unsigned int step_alignment = highest_pow2_factor (CHREC_RIGHT (base));
6076       alignment = MIN (alignment, step_alignment);
6077       base = CHREC_LEFT (base);
6078     }
6079 
6080   /* Punt if the expression is too complicated to handle.  */
6081   if (tree_contains_chrecs (base, NULL) || !POINTER_TYPE_P (TREE_TYPE (base)))
6082     return NULL_TREE;
6083 
6084   /* The only useful cases are those for which a dereference folds to something
6085      other than an INDIRECT_REF.  */
6086   tree ref_type = TREE_TYPE (TREE_TYPE (base));
6087   tree ref = fold_indirect_ref_1 (UNKNOWN_LOCATION, ref_type, base);
6088   if (!ref)
6089     return NULL_TREE;
6090 
6091   /* Analyze the base to which the steps we peeled were applied.  */
6092   poly_int64 bitsize, bitpos, bytepos;
6093   machine_mode mode;
6094   int unsignedp, reversep, volatilep;
6095   tree offset;
6096   base = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
6097 			      &unsignedp, &reversep, &volatilep);
6098   if (!base || !multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
6099     return NULL_TREE;
6100 
6101   /* Restrict the alignment to that guaranteed by the offsets.  */
6102   unsigned int bytepos_alignment = known_alignment (bytepos);
6103   if (bytepos_alignment != 0)
6104     alignment = MIN (alignment, bytepos_alignment);
6105   if (offset)
6106     {
6107       unsigned int offset_alignment = highest_pow2_factor (offset);
6108       alignment = MIN (alignment, offset_alignment);
6109     }
6110 
6111   *alignment_out = alignment;
6112   return base;
6113 }
6114 
6115 /* Return the object whose alignment would need to be changed in order
6116    to increase the alignment of ADDR.  Store the maximum achievable
6117    alignment in *MAX_ALIGNMENT.  */
6118 
6119 tree
get_base_for_alignment(tree addr,unsigned int * max_alignment)6120 get_base_for_alignment (tree addr, unsigned int *max_alignment)
6121 {
6122   tree base = get_base_for_alignment_1 (addr, max_alignment);
6123   if (base)
6124     return base;
6125 
6126   if (TREE_CODE (addr) == ADDR_EXPR)
6127     addr = TREE_OPERAND (addr, 0);
6128   *max_alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
6129   return addr;
6130 }
6131 
6132 /* Recursive helper function.  */
6133 
6134 static bool
find_loop_nest_1(class loop * loop,vec<loop_p> * loop_nest)6135 find_loop_nest_1 (class loop *loop, vec<loop_p> *loop_nest)
6136 {
6137   /* Inner loops of the nest should not contain siblings.  Example:
6138      when there are two consecutive loops,
6139 
6140      | loop_0
6141      |   loop_1
6142      |     A[{0, +, 1}_1]
6143      |   endloop_1
6144      |   loop_2
6145      |     A[{0, +, 1}_2]
6146      |   endloop_2
6147      | endloop_0
6148 
6149      the dependence relation cannot be captured by the distance
6150      abstraction.  */
6151   if (loop->next)
6152     return false;
6153 
6154   loop_nest->safe_push (loop);
6155   if (loop->inner)
6156     return find_loop_nest_1 (loop->inner, loop_nest);
6157   return true;
6158 }
6159 
6160 /* Return false when the LOOP is not well nested.  Otherwise return
6161    true and insert in LOOP_NEST the loops of the nest.  LOOP_NEST will
6162    contain the loops from the outermost to the innermost, as they will
6163    appear in the classic distance vector.  */
6164 
6165 bool
find_loop_nest(class loop * loop,vec<loop_p> * loop_nest)6166 find_loop_nest (class loop *loop, vec<loop_p> *loop_nest)
6167 {
6168   loop_nest->safe_push (loop);
6169   if (loop->inner)
6170     return find_loop_nest_1 (loop->inner, loop_nest);
6171   return true;
6172 }
6173 
6174 /* Returns true when the data dependences have been computed, false otherwise.
6175    Given a loop nest LOOP, the following vectors are returned:
6176    DATAREFS is initialized to all the array elements contained in this loop,
6177    DEPENDENCE_RELATIONS contains the relations between the data references.
6178    Compute read-read and self relations if
6179    COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE.  */
6180 
6181 bool
compute_data_dependences_for_loop(class loop * loop,bool compute_self_and_read_read_dependences,vec<loop_p> * loop_nest,vec<data_reference_p> * datarefs,vec<ddr_p> * dependence_relations)6182 compute_data_dependences_for_loop (class loop *loop,
6183 				   bool compute_self_and_read_read_dependences,
6184 				   vec<loop_p> *loop_nest,
6185 				   vec<data_reference_p> *datarefs,
6186 				   vec<ddr_p> *dependence_relations)
6187 {
6188   bool res = true;
6189 
6190   memset (&dependence_stats, 0, sizeof (dependence_stats));
6191 
6192   /* If the loop nest is not well formed, or one of the data references
6193      is not computable, give up without spending time to compute other
6194      dependences.  */
6195   if (!loop
6196       || !find_loop_nest (loop, loop_nest)
6197       || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
6198       || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
6199 				   compute_self_and_read_read_dependences))
6200     res = false;
6201 
6202   if (dump_file && (dump_flags & TDF_STATS))
6203     {
6204       fprintf (dump_file, "Dependence tester statistics:\n");
6205 
6206       fprintf (dump_file, "Number of dependence tests: %d\n",
6207 	       dependence_stats.num_dependence_tests);
6208       fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
6209 	       dependence_stats.num_dependence_dependent);
6210       fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
6211 	       dependence_stats.num_dependence_independent);
6212       fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
6213 	       dependence_stats.num_dependence_undetermined);
6214 
6215       fprintf (dump_file, "Number of subscript tests: %d\n",
6216 	       dependence_stats.num_subscript_tests);
6217       fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
6218 	       dependence_stats.num_subscript_undetermined);
6219       fprintf (dump_file, "Number of same subscript function: %d\n",
6220 	       dependence_stats.num_same_subscript_function);
6221 
6222       fprintf (dump_file, "Number of ziv tests: %d\n",
6223 	       dependence_stats.num_ziv);
6224       fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
6225 	       dependence_stats.num_ziv_dependent);
6226       fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
6227 	       dependence_stats.num_ziv_independent);
6228       fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
6229 	       dependence_stats.num_ziv_unimplemented);
6230 
6231       fprintf (dump_file, "Number of siv tests: %d\n",
6232 	       dependence_stats.num_siv);
6233       fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
6234 	       dependence_stats.num_siv_dependent);
6235       fprintf (dump_file, "Number of siv tests returning independent: %d\n",
6236 	       dependence_stats.num_siv_independent);
6237       fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
6238 	       dependence_stats.num_siv_unimplemented);
6239 
6240       fprintf (dump_file, "Number of miv tests: %d\n",
6241 	       dependence_stats.num_miv);
6242       fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
6243 	       dependence_stats.num_miv_dependent);
6244       fprintf (dump_file, "Number of miv tests returning independent: %d\n",
6245 	       dependence_stats.num_miv_independent);
6246       fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
6247 	       dependence_stats.num_miv_unimplemented);
6248     }
6249 
6250   return res;
6251 }
6252 
6253 /* Free the memory used by a data dependence relation DDR.  */
6254 
6255 void
free_dependence_relation(struct data_dependence_relation * ddr)6256 free_dependence_relation (struct data_dependence_relation *ddr)
6257 {
6258   if (ddr == NULL)
6259     return;
6260 
6261   if (DDR_SUBSCRIPTS (ddr).exists ())
6262     free_subscripts (DDR_SUBSCRIPTS (ddr));
6263   DDR_DIST_VECTS (ddr).release ();
6264   DDR_DIR_VECTS (ddr).release ();
6265 
6266   free (ddr);
6267 }
6268 
6269 /* Free the memory used by the data dependence relations from
6270    DEPENDENCE_RELATIONS.  */
6271 
6272 void
free_dependence_relations(vec<ddr_p> & dependence_relations)6273 free_dependence_relations (vec<ddr_p>& dependence_relations)
6274 {
6275   for (data_dependence_relation *ddr : dependence_relations)
6276     if (ddr)
6277       free_dependence_relation (ddr);
6278 
6279   dependence_relations.release ();
6280 }
6281 
6282 /* Free the memory used by the data references from DATAREFS.  */
6283 
6284 void
free_data_refs(vec<data_reference_p> & datarefs)6285 free_data_refs (vec<data_reference_p>& datarefs)
6286 {
6287   for (data_reference *dr : datarefs)
6288     free_data_ref (dr);
6289   datarefs.release ();
6290 }
6291 
6292 /* Common routine implementing both dr_direction_indicator and
6293    dr_zero_step_indicator.  Return USEFUL_MIN if the indicator is known
6294    to be >= USEFUL_MIN and -1 if the indicator is known to be negative.
6295    Return the step as the indicator otherwise.  */
6296 
6297 static tree
dr_step_indicator(struct data_reference * dr,int useful_min)6298 dr_step_indicator (struct data_reference *dr, int useful_min)
6299 {
6300   tree step = DR_STEP (dr);
6301   if (!step)
6302     return NULL_TREE;
6303   STRIP_NOPS (step);
6304   /* Look for cases where the step is scaled by a positive constant
6305      integer, which will often be the access size.  If the multiplication
6306      doesn't change the sign (due to overflow effects) then we can
6307      test the unscaled value instead.  */
6308   if (TREE_CODE (step) == MULT_EXPR
6309       && TREE_CODE (TREE_OPERAND (step, 1)) == INTEGER_CST
6310       && tree_int_cst_sgn (TREE_OPERAND (step, 1)) > 0)
6311     {
6312       tree factor = TREE_OPERAND (step, 1);
6313       step = TREE_OPERAND (step, 0);
6314 
6315       /* Strip widening and truncating conversions as well as nops.  */
6316       if (CONVERT_EXPR_P (step)
6317 	  && INTEGRAL_TYPE_P (TREE_TYPE (TREE_OPERAND (step, 0))))
6318 	step = TREE_OPERAND (step, 0);
6319       tree type = TREE_TYPE (step);
6320 
6321       /* Get the range of step values that would not cause overflow.  */
6322       widest_int minv = (wi::to_widest (TYPE_MIN_VALUE (ssizetype))
6323 			 / wi::to_widest (factor));
6324       widest_int maxv = (wi::to_widest (TYPE_MAX_VALUE (ssizetype))
6325 			 / wi::to_widest (factor));
6326 
6327       /* Get the range of values that the unconverted step actually has.  */
6328       wide_int step_min, step_max;
6329       value_range vr;
6330       if (TREE_CODE (step) != SSA_NAME
6331 	  || !get_range_query (cfun)->range_of_expr (vr, step)
6332 	  || vr.kind () != VR_RANGE)
6333 	{
6334 	  step_min = wi::to_wide (TYPE_MIN_VALUE (type));
6335 	  step_max = wi::to_wide (TYPE_MAX_VALUE (type));
6336 	}
6337       else
6338 	{
6339 	  step_min = vr.lower_bound ();
6340 	  step_max = vr.upper_bound ();
6341 	}
6342 
6343       /* Check whether the unconverted step has an acceptable range.  */
6344       signop sgn = TYPE_SIGN (type);
6345       if (wi::les_p (minv, widest_int::from (step_min, sgn))
6346 	  && wi::ges_p (maxv, widest_int::from (step_max, sgn)))
6347 	{
6348 	  if (wi::ge_p (step_min, useful_min, sgn))
6349 	    return ssize_int (useful_min);
6350 	  else if (wi::lt_p (step_max, 0, sgn))
6351 	    return ssize_int (-1);
6352 	  else
6353 	    return fold_convert (ssizetype, step);
6354 	}
6355     }
6356   return DR_STEP (dr);
6357 }
6358 
6359 /* Return a value that is negative iff DR has a negative step.  */
6360 
6361 tree
dr_direction_indicator(struct data_reference * dr)6362 dr_direction_indicator (struct data_reference *dr)
6363 {
6364   return dr_step_indicator (dr, 0);
6365 }
6366 
6367 /* Return a value that is zero iff DR has a zero step.  */
6368 
6369 tree
dr_zero_step_indicator(struct data_reference * dr)6370 dr_zero_step_indicator (struct data_reference *dr)
6371 {
6372   return dr_step_indicator (dr, 1);
6373 }
6374 
6375 /* Return true if DR is known to have a nonnegative (but possibly zero)
6376    step.  */
6377 
6378 bool
dr_known_forward_stride_p(struct data_reference * dr)6379 dr_known_forward_stride_p (struct data_reference *dr)
6380 {
6381   tree indicator = dr_direction_indicator (dr);
6382   tree neg_step_val = fold_binary (LT_EXPR, boolean_type_node,
6383 				   fold_convert (ssizetype, indicator),
6384 				   ssize_int (0));
6385   return neg_step_val && integer_zerop (neg_step_val);
6386 }
6387