1 /* Data references and dependences detectors.
2    Copyright (C) 2003-2020 Free Software Foundation, Inc.
3    Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 /* This pass walks a given loop structure searching for array
22    references.  The information about the array accesses is recorded
23    in DATA_REFERENCE structures.
24 
25    The basic test for determining the dependences is:
26    given two access functions chrec1 and chrec2 to a same array, and
27    x and y two vectors from the iteration domain, the same element of
28    the array is accessed twice at iterations x and y if and only if:
29    |             chrec1 (x) == chrec2 (y).
30 
31    The goals of this analysis are:
32 
33    - to determine the independence: the relation between two
34      independent accesses is qualified with the chrec_known (this
35      information allows a loop parallelization),
36 
37    - when two data references access the same data, to qualify the
38      dependence relation with classic dependence representations:
39 
40        - distance vectors
41        - direction vectors
42        - loop carried level dependence
43        - polyhedron dependence
44      or with the chains of recurrences based representation,
45 
46    - to define a knowledge base for storing the data dependence
47      information,
48 
49    - to define an interface to access this data.
50 
51 
52    Definitions:
53 
54    - subscript: given two array accesses a subscript is the tuple
55    composed of the access functions for a given dimension.  Example:
56    Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
57    (f1, g1), (f2, g2), (f3, g3).
58 
59    - Diophantine equation: an equation whose coefficients and
60    solutions are integer constants, for example the equation
61    |   3*x + 2*y = 1
62    has an integer solution x = 1 and y = -1.
63 
64    References:
65 
66    - "Advanced Compilation for High Performance Computing" by Randy
67    Allen and Ken Kennedy.
68    http://citeseer.ist.psu.edu/goff91practical.html
69 
70    - "Loop Transformations for Restructuring Compilers - The Foundations"
71    by Utpal Banerjee.
72 
73 
74 */
75 
76 #include "config.h"
77 #include "system.h"
78 #include "coretypes.h"
79 #include "backend.h"
80 #include "rtl.h"
81 #include "tree.h"
82 #include "gimple.h"
83 #include "gimple-pretty-print.h"
84 #include "alias.h"
85 #include "fold-const.h"
86 #include "expr.h"
87 #include "gimple-iterator.h"
88 #include "tree-ssa-loop-niter.h"
89 #include "tree-ssa-loop.h"
90 #include "tree-ssa.h"
91 #include "cfgloop.h"
92 #include "tree-data-ref.h"
93 #include "tree-scalar-evolution.h"
94 #include "dumpfile.h"
95 #include "tree-affine.h"
96 #include "builtins.h"
97 #include "tree-eh.h"
98 #include "ssa.h"
99 #include "internal-fn.h"
100 
101 static struct datadep_stats
102 {
103   int num_dependence_tests;
104   int num_dependence_dependent;
105   int num_dependence_independent;
106   int num_dependence_undetermined;
107 
108   int num_subscript_tests;
109   int num_subscript_undetermined;
110   int num_same_subscript_function;
111 
112   int num_ziv;
113   int num_ziv_independent;
114   int num_ziv_dependent;
115   int num_ziv_unimplemented;
116 
117   int num_siv;
118   int num_siv_independent;
119   int num_siv_dependent;
120   int num_siv_unimplemented;
121 
122   int num_miv;
123   int num_miv_independent;
124   int num_miv_dependent;
125   int num_miv_unimplemented;
126 } dependence_stats;
127 
128 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
129 					   unsigned int, unsigned int,
130 					   class loop *);
131 /* Returns true iff A divides B.  */
132 
133 static inline bool
tree_fold_divides_p(const_tree a,const_tree b)134 tree_fold_divides_p (const_tree a, const_tree b)
135 {
136   gcc_assert (TREE_CODE (a) == INTEGER_CST);
137   gcc_assert (TREE_CODE (b) == INTEGER_CST);
138   return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
139 }
140 
141 /* Returns true iff A divides B.  */
142 
143 static inline bool
int_divides_p(lambda_int a,lambda_int b)144 int_divides_p (lambda_int a, lambda_int b)
145 {
146   return ((b % a) == 0);
147 }
148 
149 /* Return true if reference REF contains a union access.  */
150 
151 static bool
ref_contains_union_access_p(tree ref)152 ref_contains_union_access_p (tree ref)
153 {
154   while (handled_component_p (ref))
155     {
156       ref = TREE_OPERAND (ref, 0);
157       if (TREE_CODE (TREE_TYPE (ref)) == UNION_TYPE
158 	  || TREE_CODE (TREE_TYPE (ref)) == QUAL_UNION_TYPE)
159 	return true;
160     }
161   return false;
162 }
163 
164 
165 
166 /* Dump into FILE all the data references from DATAREFS.  */
167 
168 static void
dump_data_references(FILE * file,vec<data_reference_p> datarefs)169 dump_data_references (FILE *file, vec<data_reference_p> datarefs)
170 {
171   unsigned int i;
172   struct data_reference *dr;
173 
174   FOR_EACH_VEC_ELT (datarefs, i, dr)
175     dump_data_reference (file, dr);
176 }
177 
178 /* Unified dump into FILE all the data references from DATAREFS.  */
179 
180 DEBUG_FUNCTION void
debug(vec<data_reference_p> & ref)181 debug (vec<data_reference_p> &ref)
182 {
183   dump_data_references (stderr, ref);
184 }
185 
186 DEBUG_FUNCTION void
debug(vec<data_reference_p> * ptr)187 debug (vec<data_reference_p> *ptr)
188 {
189   if (ptr)
190     debug (*ptr);
191   else
192     fprintf (stderr, "<nil>\n");
193 }
194 
195 
196 /* Dump into STDERR all the data references from DATAREFS.  */
197 
198 DEBUG_FUNCTION void
debug_data_references(vec<data_reference_p> datarefs)199 debug_data_references (vec<data_reference_p> datarefs)
200 {
201   dump_data_references (stderr, datarefs);
202 }
203 
204 /* Print to STDERR the data_reference DR.  */
205 
206 DEBUG_FUNCTION void
debug_data_reference(struct data_reference * dr)207 debug_data_reference (struct data_reference *dr)
208 {
209   dump_data_reference (stderr, dr);
210 }
211 
212 /* Dump function for a DATA_REFERENCE structure.  */
213 
214 void
dump_data_reference(FILE * outf,struct data_reference * dr)215 dump_data_reference (FILE *outf,
216 		     struct data_reference *dr)
217 {
218   unsigned int i;
219 
220   fprintf (outf, "#(Data Ref: \n");
221   fprintf (outf, "#  bb: %d \n", gimple_bb (DR_STMT (dr))->index);
222   fprintf (outf, "#  stmt: ");
223   print_gimple_stmt (outf, DR_STMT (dr), 0);
224   fprintf (outf, "#  ref: ");
225   print_generic_stmt (outf, DR_REF (dr));
226   fprintf (outf, "#  base_object: ");
227   print_generic_stmt (outf, DR_BASE_OBJECT (dr));
228 
229   for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
230     {
231       fprintf (outf, "#  Access function %d: ", i);
232       print_generic_stmt (outf, DR_ACCESS_FN (dr, i));
233     }
234   fprintf (outf, "#)\n");
235 }
236 
237 /* Unified dump function for a DATA_REFERENCE structure.  */
238 
239 DEBUG_FUNCTION void
debug(data_reference & ref)240 debug (data_reference &ref)
241 {
242   dump_data_reference (stderr, &ref);
243 }
244 
245 DEBUG_FUNCTION void
debug(data_reference * ptr)246 debug (data_reference *ptr)
247 {
248   if (ptr)
249     debug (*ptr);
250   else
251     fprintf (stderr, "<nil>\n");
252 }
253 
254 
255 /* Dumps the affine function described by FN to the file OUTF.  */
256 
257 DEBUG_FUNCTION void
dump_affine_function(FILE * outf,affine_fn fn)258 dump_affine_function (FILE *outf, affine_fn fn)
259 {
260   unsigned i;
261   tree coef;
262 
263   print_generic_expr (outf, fn[0], TDF_SLIM);
264   for (i = 1; fn.iterate (i, &coef); i++)
265     {
266       fprintf (outf, " + ");
267       print_generic_expr (outf, coef, TDF_SLIM);
268       fprintf (outf, " * x_%u", i);
269     }
270 }
271 
272 /* Dumps the conflict function CF to the file OUTF.  */
273 
274 DEBUG_FUNCTION void
dump_conflict_function(FILE * outf,conflict_function * cf)275 dump_conflict_function (FILE *outf, conflict_function *cf)
276 {
277   unsigned i;
278 
279   if (cf->n == NO_DEPENDENCE)
280     fprintf (outf, "no dependence");
281   else if (cf->n == NOT_KNOWN)
282     fprintf (outf, "not known");
283   else
284     {
285       for (i = 0; i < cf->n; i++)
286 	{
287 	  if (i != 0)
288 	    fprintf (outf, " ");
289 	  fprintf (outf, "[");
290 	  dump_affine_function (outf, cf->fns[i]);
291 	  fprintf (outf, "]");
292 	}
293     }
294 }
295 
296 /* Dump function for a SUBSCRIPT structure.  */
297 
298 DEBUG_FUNCTION void
dump_subscript(FILE * outf,struct subscript * subscript)299 dump_subscript (FILE *outf, struct subscript *subscript)
300 {
301   conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
302 
303   fprintf (outf, "\n (subscript \n");
304   fprintf (outf, "  iterations_that_access_an_element_twice_in_A: ");
305   dump_conflict_function (outf, cf);
306   if (CF_NONTRIVIAL_P (cf))
307     {
308       tree last_iteration = SUB_LAST_CONFLICT (subscript);
309       fprintf (outf, "\n  last_conflict: ");
310       print_generic_expr (outf, last_iteration);
311     }
312 
313   cf = SUB_CONFLICTS_IN_B (subscript);
314   fprintf (outf, "\n  iterations_that_access_an_element_twice_in_B: ");
315   dump_conflict_function (outf, cf);
316   if (CF_NONTRIVIAL_P (cf))
317     {
318       tree last_iteration = SUB_LAST_CONFLICT (subscript);
319       fprintf (outf, "\n  last_conflict: ");
320       print_generic_expr (outf, last_iteration);
321     }
322 
323   fprintf (outf, "\n  (Subscript distance: ");
324   print_generic_expr (outf, SUB_DISTANCE (subscript));
325   fprintf (outf, " ))\n");
326 }
327 
328 /* Print the classic direction vector DIRV to OUTF.  */
329 
330 DEBUG_FUNCTION void
print_direction_vector(FILE * outf,lambda_vector dirv,int length)331 print_direction_vector (FILE *outf,
332 			lambda_vector dirv,
333 			int length)
334 {
335   int eq;
336 
337   for (eq = 0; eq < length; eq++)
338     {
339       enum data_dependence_direction dir = ((enum data_dependence_direction)
340 					    dirv[eq]);
341 
342       switch (dir)
343 	{
344 	case dir_positive:
345 	  fprintf (outf, "    +");
346 	  break;
347 	case dir_negative:
348 	  fprintf (outf, "    -");
349 	  break;
350 	case dir_equal:
351 	  fprintf (outf, "    =");
352 	  break;
353 	case dir_positive_or_equal:
354 	  fprintf (outf, "   +=");
355 	  break;
356 	case dir_positive_or_negative:
357 	  fprintf (outf, "   +-");
358 	  break;
359 	case dir_negative_or_equal:
360 	  fprintf (outf, "   -=");
361 	  break;
362 	case dir_star:
363 	  fprintf (outf, "    *");
364 	  break;
365 	default:
366 	  fprintf (outf, "indep");
367 	  break;
368 	}
369     }
370   fprintf (outf, "\n");
371 }
372 
373 /* Print a vector of direction vectors.  */
374 
375 DEBUG_FUNCTION void
print_dir_vectors(FILE * outf,vec<lambda_vector> dir_vects,int length)376 print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
377 		   int length)
378 {
379   unsigned j;
380   lambda_vector v;
381 
382   FOR_EACH_VEC_ELT (dir_vects, j, v)
383     print_direction_vector (outf, v, length);
384 }
385 
386 /* Print out a vector VEC of length N to OUTFILE.  */
387 
388 DEBUG_FUNCTION void
print_lambda_vector(FILE * outfile,lambda_vector vector,int n)389 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
390 {
391   int i;
392 
393   for (i = 0; i < n; i++)
394     fprintf (outfile, "%3d ", (int)vector[i]);
395   fprintf (outfile, "\n");
396 }
397 
398 /* Print a vector of distance vectors.  */
399 
400 DEBUG_FUNCTION void
print_dist_vectors(FILE * outf,vec<lambda_vector> dist_vects,int length)401 print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
402 		    int length)
403 {
404   unsigned j;
405   lambda_vector v;
406 
407   FOR_EACH_VEC_ELT (dist_vects, j, v)
408     print_lambda_vector (outf, v, length);
409 }
410 
411 /* Dump function for a DATA_DEPENDENCE_RELATION structure.  */
412 
413 DEBUG_FUNCTION void
dump_data_dependence_relation(FILE * outf,struct data_dependence_relation * ddr)414 dump_data_dependence_relation (FILE *outf,
415 			       struct data_dependence_relation *ddr)
416 {
417   struct data_reference *dra, *drb;
418 
419   fprintf (outf, "(Data Dep: \n");
420 
421   if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
422     {
423       if (ddr)
424 	{
425 	  dra = DDR_A (ddr);
426 	  drb = DDR_B (ddr);
427 	  if (dra)
428 	    dump_data_reference (outf, dra);
429 	  else
430 	    fprintf (outf, "    (nil)\n");
431 	  if (drb)
432 	    dump_data_reference (outf, drb);
433 	  else
434 	    fprintf (outf, "    (nil)\n");
435 	}
436       fprintf (outf, "    (don't know)\n)\n");
437       return;
438     }
439 
440   dra = DDR_A (ddr);
441   drb = DDR_B (ddr);
442   dump_data_reference (outf, dra);
443   dump_data_reference (outf, drb);
444 
445   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
446     fprintf (outf, "    (no dependence)\n");
447 
448   else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
449     {
450       unsigned int i;
451       class loop *loopi;
452 
453       subscript *sub;
454       FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
455 	{
456 	  fprintf (outf, "  access_fn_A: ");
457 	  print_generic_stmt (outf, SUB_ACCESS_FN (sub, 0));
458 	  fprintf (outf, "  access_fn_B: ");
459 	  print_generic_stmt (outf, SUB_ACCESS_FN (sub, 1));
460 	  dump_subscript (outf, sub);
461 	}
462 
463       fprintf (outf, "  loop nest: (");
464       FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
465 	fprintf (outf, "%d ", loopi->num);
466       fprintf (outf, ")\n");
467 
468       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
469 	{
470 	  fprintf (outf, "  distance_vector: ");
471 	  print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
472 			       DDR_NB_LOOPS (ddr));
473 	}
474 
475       for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
476 	{
477 	  fprintf (outf, "  direction_vector: ");
478 	  print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
479 				  DDR_NB_LOOPS (ddr));
480 	}
481     }
482 
483   fprintf (outf, ")\n");
484 }
485 
486 /* Debug version.  */
487 
488 DEBUG_FUNCTION void
debug_data_dependence_relation(struct data_dependence_relation * ddr)489 debug_data_dependence_relation (struct data_dependence_relation *ddr)
490 {
491   dump_data_dependence_relation (stderr, ddr);
492 }
493 
494 /* Dump into FILE all the dependence relations from DDRS.  */
495 
496 DEBUG_FUNCTION void
dump_data_dependence_relations(FILE * file,vec<ddr_p> ddrs)497 dump_data_dependence_relations (FILE *file,
498 				vec<ddr_p> ddrs)
499 {
500   unsigned int i;
501   struct data_dependence_relation *ddr;
502 
503   FOR_EACH_VEC_ELT (ddrs, i, ddr)
504     dump_data_dependence_relation (file, ddr);
505 }
506 
507 DEBUG_FUNCTION void
debug(vec<ddr_p> & ref)508 debug (vec<ddr_p> &ref)
509 {
510   dump_data_dependence_relations (stderr, ref);
511 }
512 
513 DEBUG_FUNCTION void
debug(vec<ddr_p> * ptr)514 debug (vec<ddr_p> *ptr)
515 {
516   if (ptr)
517     debug (*ptr);
518   else
519     fprintf (stderr, "<nil>\n");
520 }
521 
522 
523 /* Dump to STDERR all the dependence relations from DDRS.  */
524 
525 DEBUG_FUNCTION void
debug_data_dependence_relations(vec<ddr_p> ddrs)526 debug_data_dependence_relations (vec<ddr_p> ddrs)
527 {
528   dump_data_dependence_relations (stderr, ddrs);
529 }
530 
531 /* Dumps the distance and direction vectors in FILE.  DDRS contains
532    the dependence relations, and VECT_SIZE is the size of the
533    dependence vectors, or in other words the number of loops in the
534    considered nest.  */
535 
536 DEBUG_FUNCTION void
dump_dist_dir_vectors(FILE * file,vec<ddr_p> ddrs)537 dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
538 {
539   unsigned int i, j;
540   struct data_dependence_relation *ddr;
541   lambda_vector v;
542 
543   FOR_EACH_VEC_ELT (ddrs, i, ddr)
544     if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
545       {
546 	FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), j, v)
547 	  {
548 	    fprintf (file, "DISTANCE_V (");
549 	    print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
550 	    fprintf (file, ")\n");
551 	  }
552 
553 	FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), j, v)
554 	  {
555 	    fprintf (file, "DIRECTION_V (");
556 	    print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
557 	    fprintf (file, ")\n");
558 	  }
559       }
560 
561   fprintf (file, "\n\n");
562 }
563 
564 /* Dumps the data dependence relations DDRS in FILE.  */
565 
566 DEBUG_FUNCTION void
dump_ddrs(FILE * file,vec<ddr_p> ddrs)567 dump_ddrs (FILE *file, vec<ddr_p> ddrs)
568 {
569   unsigned int i;
570   struct data_dependence_relation *ddr;
571 
572   FOR_EACH_VEC_ELT (ddrs, i, ddr)
573     dump_data_dependence_relation (file, ddr);
574 
575   fprintf (file, "\n\n");
576 }
577 
578 DEBUG_FUNCTION void
debug_ddrs(vec<ddr_p> ddrs)579 debug_ddrs (vec<ddr_p> ddrs)
580 {
581   dump_ddrs (stderr, ddrs);
582 }
583 
584 static void
585 split_constant_offset (tree exp, tree *var, tree *off,
586 		       hash_map<tree, std::pair<tree, tree> > &cache,
587 		       unsigned *limit);
588 
589 /* Helper function for split_constant_offset.  Expresses OP0 CODE OP1
590    (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
591    constant of type ssizetype, and returns true.  If we cannot do this
592    with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
593    is returned.  */
594 
595 static bool
split_constant_offset_1(tree type,tree op0,enum tree_code code,tree op1,tree * var,tree * off,hash_map<tree,std::pair<tree,tree>> & cache,unsigned * limit)596 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
597 			 tree *var, tree *off,
598 			 hash_map<tree, std::pair<tree, tree> > &cache,
599 			 unsigned *limit)
600 {
601   tree var0, var1;
602   tree off0, off1;
603   enum tree_code ocode = code;
604 
605   *var = NULL_TREE;
606   *off = NULL_TREE;
607 
608   switch (code)
609     {
610     case INTEGER_CST:
611       *var = build_int_cst (type, 0);
612       *off = fold_convert (ssizetype, op0);
613       return true;
614 
615     case POINTER_PLUS_EXPR:
616       ocode = PLUS_EXPR;
617       /* FALLTHROUGH */
618     case PLUS_EXPR:
619     case MINUS_EXPR:
620       if (TREE_CODE (op1) == INTEGER_CST)
621 	{
622 	  split_constant_offset (op0, &var0, &off0, cache, limit);
623 	  *var = var0;
624 	  *off = size_binop (ocode, off0, fold_convert (ssizetype, op1));
625 	  return true;
626 	}
627       split_constant_offset (op0, &var0, &off0, cache, limit);
628       split_constant_offset (op1, &var1, &off1, cache, limit);
629       *var = fold_build2 (code, type, var0, var1);
630       *off = size_binop (ocode, off0, off1);
631       return true;
632 
633     case MULT_EXPR:
634       if (TREE_CODE (op1) != INTEGER_CST)
635 	return false;
636 
637       split_constant_offset (op0, &var0, &off0, cache, limit);
638       *var = fold_build2 (MULT_EXPR, type, var0, op1);
639       *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
640       return true;
641 
642     case ADDR_EXPR:
643       {
644 	tree base, poffset;
645 	poly_int64 pbitsize, pbitpos, pbytepos;
646 	machine_mode pmode;
647 	int punsignedp, preversep, pvolatilep;
648 
649 	op0 = TREE_OPERAND (op0, 0);
650 	base
651 	  = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset, &pmode,
652 				 &punsignedp, &preversep, &pvolatilep);
653 
654 	if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
655 	  return false;
656 	base = build_fold_addr_expr (base);
657 	off0 = ssize_int (pbytepos);
658 
659 	if (poffset)
660 	  {
661 	    split_constant_offset (poffset, &poffset, &off1, cache, limit);
662 	    off0 = size_binop (PLUS_EXPR, off0, off1);
663 	    if (POINTER_TYPE_P (TREE_TYPE (base)))
664 	      base = fold_build_pointer_plus (base, poffset);
665 	    else
666 	      base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
667 				  fold_convert (TREE_TYPE (base), poffset));
668 	  }
669 
670 	var0 = fold_convert (type, base);
671 
672 	/* If variable length types are involved, punt, otherwise casts
673 	   might be converted into ARRAY_REFs in gimplify_conversion.
674 	   To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
675 	   possibly no longer appears in current GIMPLE, might resurface.
676 	   This perhaps could run
677 	   if (CONVERT_EXPR_P (var0))
678 	     {
679 	       gimplify_conversion (&var0);
680 	       // Attempt to fill in any within var0 found ARRAY_REF's
681 	       // element size from corresponding op embedded ARRAY_REF,
682 	       // if unsuccessful, just punt.
683 	     }  */
684 	while (POINTER_TYPE_P (type))
685 	  type = TREE_TYPE (type);
686 	if (int_size_in_bytes (type) < 0)
687 	  return false;
688 
689 	*var = var0;
690 	*off = off0;
691 	return true;
692       }
693 
694     case SSA_NAME:
695       {
696 	if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0))
697 	  return false;
698 
699 	gimple *def_stmt = SSA_NAME_DEF_STMT (op0);
700 	enum tree_code subcode;
701 
702 	if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
703 	  return false;
704 
705 	subcode = gimple_assign_rhs_code (def_stmt);
706 
707 	/* We are using a cache to avoid un-CSEing large amounts of code.  */
708 	bool use_cache = false;
709 	if (!has_single_use (op0)
710 	    && (subcode == POINTER_PLUS_EXPR
711 		|| subcode == PLUS_EXPR
712 		|| subcode == MINUS_EXPR
713 		|| subcode == MULT_EXPR
714 		|| subcode == ADDR_EXPR
715 		|| CONVERT_EXPR_CODE_P (subcode)))
716 	  {
717 	    use_cache = true;
718 	    bool existed;
719 	    std::pair<tree, tree> &e = cache.get_or_insert (op0, &existed);
720 	    if (existed)
721 	      {
722 		if (integer_zerop (e.second))
723 		  return false;
724 		*var = e.first;
725 		*off = e.second;
726 		return true;
727 	      }
728 	    e = std::make_pair (op0, ssize_int (0));
729 	  }
730 
731 	if (*limit == 0)
732 	  return false;
733 	--*limit;
734 
735 	var0 = gimple_assign_rhs1 (def_stmt);
736 	var1 = gimple_assign_rhs2 (def_stmt);
737 
738 	bool res = split_constant_offset_1 (type, var0, subcode, var1,
739 					    var, off, cache, limit);
740 	if (res && use_cache)
741 	  *cache.get (op0) = std::make_pair (*var, *off);
742 	return res;
743       }
744     CASE_CONVERT:
745       {
746 	/* We must not introduce undefined overflow, and we must not change
747 	   the value.  Hence we're okay if the inner type doesn't overflow
748 	   to start with (pointer or signed), the outer type also is an
749 	   integer or pointer and the outer precision is at least as large
750 	   as the inner.  */
751 	tree itype = TREE_TYPE (op0);
752 	if ((POINTER_TYPE_P (itype)
753 	     || (INTEGRAL_TYPE_P (itype) && !TYPE_OVERFLOW_TRAPS (itype)))
754 	    && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
755 	    && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
756 	  {
757 	    if (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_WRAPS (itype))
758 	      {
759 		/* Split the unconverted operand and try to prove that
760 		   wrapping isn't a problem.  */
761 		tree tmp_var, tmp_off;
762 		split_constant_offset (op0, &tmp_var, &tmp_off, cache, limit);
763 
764 		/* See whether we have an SSA_NAME whose range is known
765 		   to be [A, B].  */
766 		if (TREE_CODE (tmp_var) != SSA_NAME)
767 		  return false;
768 		wide_int var_min, var_max;
769 		value_range_kind vr_type = get_range_info (tmp_var, &var_min,
770 							   &var_max);
771 		wide_int var_nonzero = get_nonzero_bits (tmp_var);
772 		signop sgn = TYPE_SIGN (itype);
773 		if (intersect_range_with_nonzero_bits (vr_type, &var_min,
774 						       &var_max, var_nonzero,
775 						       sgn) != VR_RANGE)
776 		  return false;
777 
778 		/* See whether the range of OP0 (i.e. TMP_VAR + TMP_OFF)
779 		   is known to be [A + TMP_OFF, B + TMP_OFF], with all
780 		   operations done in ITYPE.  The addition must overflow
781 		   at both ends of the range or at neither.  */
782 		wi::overflow_type overflow[2];
783 		unsigned int prec = TYPE_PRECISION (itype);
784 		wide_int woff = wi::to_wide (tmp_off, prec);
785 		wide_int op0_min = wi::add (var_min, woff, sgn, &overflow[0]);
786 		wi::add (var_max, woff, sgn, &overflow[1]);
787 		if ((overflow[0] != wi::OVF_NONE) != (overflow[1] != wi::OVF_NONE))
788 		  return false;
789 
790 		/* Calculate (ssizetype) OP0 - (ssizetype) TMP_VAR.  */
791 		widest_int diff = (widest_int::from (op0_min, sgn)
792 				   - widest_int::from (var_min, sgn));
793 		var0 = tmp_var;
794 		*off = wide_int_to_tree (ssizetype, diff);
795 	      }
796 	    else
797 	      split_constant_offset (op0, &var0, off, cache, limit);
798 	    *var = fold_convert (type, var0);
799 	    return true;
800 	  }
801 	return false;
802       }
803 
804     default:
805       return false;
806     }
807 }
808 
809 /* Expresses EXP as VAR + OFF, where off is a constant.  The type of OFF
810    will be ssizetype.  */
811 
812 static void
split_constant_offset(tree exp,tree * var,tree * off,hash_map<tree,std::pair<tree,tree>> & cache,unsigned * limit)813 split_constant_offset (tree exp, tree *var, tree *off,
814 		       hash_map<tree, std::pair<tree, tree> > &cache,
815 		       unsigned *limit)
816 {
817   tree type = TREE_TYPE (exp), op0, op1, e, o;
818   enum tree_code code;
819 
820   *var = exp;
821   *off = ssize_int (0);
822 
823   if (tree_is_chrec (exp)
824       || get_gimple_rhs_class (TREE_CODE (exp)) == GIMPLE_TERNARY_RHS)
825     return;
826 
827   code = TREE_CODE (exp);
828   extract_ops_from_tree (exp, &code, &op0, &op1);
829   if (split_constant_offset_1 (type, op0, code, op1, &e, &o, cache, limit))
830     {
831       *var = e;
832       *off = o;
833     }
834 }
835 
836 void
split_constant_offset(tree exp,tree * var,tree * off)837 split_constant_offset (tree exp, tree *var, tree *off)
838 {
839   unsigned limit = param_ssa_name_def_chain_limit;
840   static hash_map<tree, std::pair<tree, tree> > *cache;
841   if (!cache)
842     cache = new hash_map<tree, std::pair<tree, tree> > (37);
843   split_constant_offset (exp, var, off, *cache, &limit);
844   cache->empty ();
845 }
846 
847 /* Returns the address ADDR of an object in a canonical shape (without nop
848    casts, and with type of pointer to the object).  */
849 
850 static tree
canonicalize_base_object_address(tree addr)851 canonicalize_base_object_address (tree addr)
852 {
853   tree orig = addr;
854 
855   STRIP_NOPS (addr);
856 
857   /* The base address may be obtained by casting from integer, in that case
858      keep the cast.  */
859   if (!POINTER_TYPE_P (TREE_TYPE (addr)))
860     return orig;
861 
862   if (TREE_CODE (addr) != ADDR_EXPR)
863     return addr;
864 
865   return build_fold_addr_expr (TREE_OPERAND (addr, 0));
866 }
867 
868 /* Analyze the behavior of memory reference REF within STMT.
869    There are two modes:
870 
871    - BB analysis.  In this case we simply split the address into base,
872      init and offset components, without reference to any containing loop.
873      The resulting base and offset are general expressions and they can
874      vary arbitrarily from one iteration of the containing loop to the next.
875      The step is always zero.
876 
877    - loop analysis.  In this case we analyze the reference both wrt LOOP
878      and on the basis that the reference occurs (is "used") in LOOP;
879      see the comment above analyze_scalar_evolution_in_loop for more
880      information about this distinction.  The base, init, offset and
881      step fields are all invariant in LOOP.
882 
883    Perform BB analysis if LOOP is null, or if LOOP is the function's
884    dummy outermost loop.  In other cases perform loop analysis.
885 
886    Return true if the analysis succeeded and store the results in DRB if so.
887    BB analysis can only fail for bitfield or reversed-storage accesses.  */
888 
889 opt_result
dr_analyze_innermost(innermost_loop_behavior * drb,tree ref,class loop * loop,const gimple * stmt)890 dr_analyze_innermost (innermost_loop_behavior *drb, tree ref,
891 		      class loop *loop, const gimple *stmt)
892 {
893   poly_int64 pbitsize, pbitpos;
894   tree base, poffset;
895   machine_mode pmode;
896   int punsignedp, preversep, pvolatilep;
897   affine_iv base_iv, offset_iv;
898   tree init, dinit, step;
899   bool in_loop = (loop && loop->num);
900 
901   if (dump_file && (dump_flags & TDF_DETAILS))
902     fprintf (dump_file, "analyze_innermost: ");
903 
904   base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset, &pmode,
905 			      &punsignedp, &preversep, &pvolatilep);
906   gcc_assert (base != NULL_TREE);
907 
908   poly_int64 pbytepos;
909   if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
910     return opt_result::failure_at (stmt,
911 				   "failed: bit offset alignment.\n");
912 
913   if (preversep)
914     return opt_result::failure_at (stmt,
915 				   "failed: reverse storage order.\n");
916 
917   /* Calculate the alignment and misalignment for the inner reference.  */
918   unsigned int HOST_WIDE_INT bit_base_misalignment;
919   unsigned int bit_base_alignment;
920   get_object_alignment_1 (base, &bit_base_alignment, &bit_base_misalignment);
921 
922   /* There are no bitfield references remaining in BASE, so the values
923      we got back must be whole bytes.  */
924   gcc_assert (bit_base_alignment % BITS_PER_UNIT == 0
925 	      && bit_base_misalignment % BITS_PER_UNIT == 0);
926   unsigned int base_alignment = bit_base_alignment / BITS_PER_UNIT;
927   poly_int64 base_misalignment = bit_base_misalignment / BITS_PER_UNIT;
928 
929   if (TREE_CODE (base) == MEM_REF)
930     {
931       if (!integer_zerop (TREE_OPERAND (base, 1)))
932 	{
933 	  /* Subtract MOFF from the base and add it to POFFSET instead.
934 	     Adjust the misalignment to reflect the amount we subtracted.  */
935 	  poly_offset_int moff = mem_ref_offset (base);
936 	  base_misalignment -= moff.force_shwi ();
937 	  tree mofft = wide_int_to_tree (sizetype, moff);
938 	  if (!poffset)
939 	    poffset = mofft;
940 	  else
941 	    poffset = size_binop (PLUS_EXPR, poffset, mofft);
942 	}
943       base = TREE_OPERAND (base, 0);
944     }
945   else
946     base = build_fold_addr_expr (base);
947 
948   if (in_loop)
949     {
950       if (!simple_iv (loop, loop, base, &base_iv, true))
951 	return opt_result::failure_at
952 	  (stmt, "failed: evolution of base is not affine.\n");
953     }
954   else
955     {
956       base_iv.base = base;
957       base_iv.step = ssize_int (0);
958       base_iv.no_overflow = true;
959     }
960 
961   if (!poffset)
962     {
963       offset_iv.base = ssize_int (0);
964       offset_iv.step = ssize_int (0);
965     }
966   else
967     {
968       if (!in_loop)
969         {
970           offset_iv.base = poffset;
971           offset_iv.step = ssize_int (0);
972         }
973       else if (!simple_iv (loop, loop, poffset, &offset_iv, true))
974 	return opt_result::failure_at
975 	  (stmt, "failed: evolution of offset is not affine.\n");
976     }
977 
978   init = ssize_int (pbytepos);
979 
980   /* Subtract any constant component from the base and add it to INIT instead.
981      Adjust the misalignment to reflect the amount we subtracted.  */
982   split_constant_offset (base_iv.base, &base_iv.base, &dinit);
983   init = size_binop (PLUS_EXPR, init, dinit);
984   base_misalignment -= TREE_INT_CST_LOW (dinit);
985 
986   split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
987   init = size_binop (PLUS_EXPR, init, dinit);
988 
989   step = size_binop (PLUS_EXPR,
990 		     fold_convert (ssizetype, base_iv.step),
991 		     fold_convert (ssizetype, offset_iv.step));
992 
993   base = canonicalize_base_object_address (base_iv.base);
994 
995   /* See if get_pointer_alignment can guarantee a higher alignment than
996      the one we calculated above.  */
997   unsigned int HOST_WIDE_INT alt_misalignment;
998   unsigned int alt_alignment;
999   get_pointer_alignment_1 (base, &alt_alignment, &alt_misalignment);
1000 
1001   /* As above, these values must be whole bytes.  */
1002   gcc_assert (alt_alignment % BITS_PER_UNIT == 0
1003 	      && alt_misalignment % BITS_PER_UNIT == 0);
1004   alt_alignment /= BITS_PER_UNIT;
1005   alt_misalignment /= BITS_PER_UNIT;
1006 
1007   if (base_alignment < alt_alignment)
1008     {
1009       base_alignment = alt_alignment;
1010       base_misalignment = alt_misalignment;
1011     }
1012 
1013   drb->base_address = base;
1014   drb->offset = fold_convert (ssizetype, offset_iv.base);
1015   drb->init = init;
1016   drb->step = step;
1017   if (known_misalignment (base_misalignment, base_alignment,
1018 			  &drb->base_misalignment))
1019     drb->base_alignment = base_alignment;
1020   else
1021     {
1022       drb->base_alignment = known_alignment (base_misalignment);
1023       drb->base_misalignment = 0;
1024     }
1025   drb->offset_alignment = highest_pow2_factor (offset_iv.base);
1026   drb->step_alignment = highest_pow2_factor (step);
1027 
1028   if (dump_file && (dump_flags & TDF_DETAILS))
1029     fprintf (dump_file, "success.\n");
1030 
1031   return opt_result::success ();
1032 }
1033 
1034 /* Return true if OP is a valid component reference for a DR access
1035    function.  This accepts a subset of what handled_component_p accepts.  */
1036 
1037 static bool
access_fn_component_p(tree op)1038 access_fn_component_p (tree op)
1039 {
1040   switch (TREE_CODE (op))
1041     {
1042     case REALPART_EXPR:
1043     case IMAGPART_EXPR:
1044     case ARRAY_REF:
1045       return true;
1046 
1047     case COMPONENT_REF:
1048       return TREE_CODE (TREE_TYPE (TREE_OPERAND (op, 0))) == RECORD_TYPE;
1049 
1050     default:
1051       return false;
1052     }
1053 }
1054 
1055 /* Determines the base object and the list of indices of memory reference
1056    DR, analyzed in LOOP and instantiated before NEST.  */
1057 
1058 static void
dr_analyze_indices(struct data_reference * dr,edge nest,loop_p loop)1059 dr_analyze_indices (struct data_reference *dr, edge nest, loop_p loop)
1060 {
1061   vec<tree> access_fns = vNULL;
1062   tree ref, op;
1063   tree base, off, access_fn;
1064 
1065   /* If analyzing a basic-block there are no indices to analyze
1066      and thus no access functions.  */
1067   if (!nest)
1068     {
1069       DR_BASE_OBJECT (dr) = DR_REF (dr);
1070       DR_ACCESS_FNS (dr).create (0);
1071       return;
1072     }
1073 
1074   ref = DR_REF (dr);
1075 
1076   /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
1077      into a two element array with a constant index.  The base is
1078      then just the immediate underlying object.  */
1079   if (TREE_CODE (ref) == REALPART_EXPR)
1080     {
1081       ref = TREE_OPERAND (ref, 0);
1082       access_fns.safe_push (integer_zero_node);
1083     }
1084   else if (TREE_CODE (ref) == IMAGPART_EXPR)
1085     {
1086       ref = TREE_OPERAND (ref, 0);
1087       access_fns.safe_push (integer_one_node);
1088     }
1089 
1090   /* Analyze access functions of dimensions we know to be independent.
1091      The list of component references handled here should be kept in
1092      sync with access_fn_component_p.  */
1093   while (handled_component_p (ref))
1094     {
1095       if (TREE_CODE (ref) == ARRAY_REF)
1096 	{
1097 	  op = TREE_OPERAND (ref, 1);
1098 	  access_fn = analyze_scalar_evolution (loop, op);
1099 	  access_fn = instantiate_scev (nest, loop, access_fn);
1100 	  access_fns.safe_push (access_fn);
1101 	}
1102       else if (TREE_CODE (ref) == COMPONENT_REF
1103 	       && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
1104 	{
1105 	  /* For COMPONENT_REFs of records (but not unions!) use the
1106 	     FIELD_DECL offset as constant access function so we can
1107 	     disambiguate a[i].f1 and a[i].f2.  */
1108 	  tree off = component_ref_field_offset (ref);
1109 	  off = size_binop (PLUS_EXPR,
1110 			    size_binop (MULT_EXPR,
1111 					fold_convert (bitsizetype, off),
1112 					bitsize_int (BITS_PER_UNIT)),
1113 			    DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
1114 	  access_fns.safe_push (off);
1115 	}
1116       else
1117 	/* If we have an unhandled component we could not translate
1118 	   to an access function stop analyzing.  We have determined
1119 	   our base object in this case.  */
1120 	break;
1121 
1122       ref = TREE_OPERAND (ref, 0);
1123     }
1124 
1125   /* If the address operand of a MEM_REF base has an evolution in the
1126      analyzed nest, add it as an additional independent access-function.  */
1127   if (TREE_CODE (ref) == MEM_REF)
1128     {
1129       op = TREE_OPERAND (ref, 0);
1130       access_fn = analyze_scalar_evolution (loop, op);
1131       access_fn = instantiate_scev (nest, loop, access_fn);
1132       if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
1133 	{
1134 	  tree orig_type;
1135 	  tree memoff = TREE_OPERAND (ref, 1);
1136 	  base = initial_condition (access_fn);
1137 	  orig_type = TREE_TYPE (base);
1138 	  STRIP_USELESS_TYPE_CONVERSION (base);
1139 	  split_constant_offset (base, &base, &off);
1140 	  STRIP_USELESS_TYPE_CONVERSION (base);
1141 	  /* Fold the MEM_REF offset into the evolutions initial
1142 	     value to make more bases comparable.  */
1143 	  if (!integer_zerop (memoff))
1144 	    {
1145 	      off = size_binop (PLUS_EXPR, off,
1146 				fold_convert (ssizetype, memoff));
1147 	      memoff = build_int_cst (TREE_TYPE (memoff), 0);
1148 	    }
1149 	  /* Adjust the offset so it is a multiple of the access type
1150 	     size and thus we separate bases that can possibly be used
1151 	     to produce partial overlaps (which the access_fn machinery
1152 	     cannot handle).  */
1153 	  wide_int rem;
1154 	  if (TYPE_SIZE_UNIT (TREE_TYPE (ref))
1155 	      && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref))) == INTEGER_CST
1156 	      && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref))))
1157 	    rem = wi::mod_trunc
1158 	      (wi::to_wide (off),
1159 	       wi::to_wide (TYPE_SIZE_UNIT (TREE_TYPE (ref))),
1160 	       SIGNED);
1161 	  else
1162 	    /* If we can't compute the remainder simply force the initial
1163 	       condition to zero.  */
1164 	    rem = wi::to_wide (off);
1165 	  off = wide_int_to_tree (ssizetype, wi::to_wide (off) - rem);
1166 	  memoff = wide_int_to_tree (TREE_TYPE (memoff), rem);
1167 	  /* And finally replace the initial condition.  */
1168 	  access_fn = chrec_replace_initial_condition
1169 	      (access_fn, fold_convert (orig_type, off));
1170 	  /* ???  This is still not a suitable base object for
1171 	     dr_may_alias_p - the base object needs to be an
1172 	     access that covers the object as whole.  With
1173 	     an evolution in the pointer this cannot be
1174 	     guaranteed.
1175 	     As a band-aid, mark the access so we can special-case
1176 	     it in dr_may_alias_p.  */
1177 	  tree old = ref;
1178 	  ref = fold_build2_loc (EXPR_LOCATION (ref),
1179 				 MEM_REF, TREE_TYPE (ref),
1180 				 base, memoff);
1181 	  MR_DEPENDENCE_CLIQUE (ref) = MR_DEPENDENCE_CLIQUE (old);
1182 	  MR_DEPENDENCE_BASE (ref) = MR_DEPENDENCE_BASE (old);
1183 	  DR_UNCONSTRAINED_BASE (dr) = true;
1184 	  access_fns.safe_push (access_fn);
1185 	}
1186     }
1187   else if (DECL_P (ref))
1188     {
1189       /* Canonicalize DR_BASE_OBJECT to MEM_REF form.  */
1190       ref = build2 (MEM_REF, TREE_TYPE (ref),
1191 		    build_fold_addr_expr (ref),
1192 		    build_int_cst (reference_alias_ptr_type (ref), 0));
1193     }
1194 
1195   DR_BASE_OBJECT (dr) = ref;
1196   DR_ACCESS_FNS (dr) = access_fns;
1197 }
1198 
1199 /* Extracts the alias analysis information from the memory reference DR.  */
1200 
1201 static void
dr_analyze_alias(struct data_reference * dr)1202 dr_analyze_alias (struct data_reference *dr)
1203 {
1204   tree ref = DR_REF (dr);
1205   tree base = get_base_address (ref), addr;
1206 
1207   if (INDIRECT_REF_P (base)
1208       || TREE_CODE (base) == MEM_REF)
1209     {
1210       addr = TREE_OPERAND (base, 0);
1211       if (TREE_CODE (addr) == SSA_NAME)
1212 	DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
1213     }
1214 }
1215 
1216 /* Frees data reference DR.  */
1217 
1218 void
free_data_ref(data_reference_p dr)1219 free_data_ref (data_reference_p dr)
1220 {
1221   DR_ACCESS_FNS (dr).release ();
1222   free (dr);
1223 }
1224 
1225 /* Analyze memory reference MEMREF, which is accessed in STMT.
1226    The reference is a read if IS_READ is true, otherwise it is a write.
1227    IS_CONDITIONAL_IN_STMT indicates that the reference is conditional
1228    within STMT, i.e. that it might not occur even if STMT is executed
1229    and runs to completion.
1230 
1231    Return the data_reference description of MEMREF.  NEST is the outermost
1232    loop in which the reference should be instantiated, LOOP is the loop
1233    in which the data reference should be analyzed.  */
1234 
1235 struct data_reference *
create_data_ref(edge nest,loop_p loop,tree memref,gimple * stmt,bool is_read,bool is_conditional_in_stmt)1236 create_data_ref (edge nest, loop_p loop, tree memref, gimple *stmt,
1237 		 bool is_read, bool is_conditional_in_stmt)
1238 {
1239   struct data_reference *dr;
1240 
1241   if (dump_file && (dump_flags & TDF_DETAILS))
1242     {
1243       fprintf (dump_file, "Creating dr for ");
1244       print_generic_expr (dump_file, memref, TDF_SLIM);
1245       fprintf (dump_file, "\n");
1246     }
1247 
1248   dr = XCNEW (struct data_reference);
1249   DR_STMT (dr) = stmt;
1250   DR_REF (dr) = memref;
1251   DR_IS_READ (dr) = is_read;
1252   DR_IS_CONDITIONAL_IN_STMT (dr) = is_conditional_in_stmt;
1253 
1254   dr_analyze_innermost (&DR_INNERMOST (dr), memref,
1255 			nest != NULL ? loop : NULL, stmt);
1256   dr_analyze_indices (dr, nest, loop);
1257   dr_analyze_alias (dr);
1258 
1259   if (dump_file && (dump_flags & TDF_DETAILS))
1260     {
1261       unsigned i;
1262       fprintf (dump_file, "\tbase_address: ");
1263       print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1264       fprintf (dump_file, "\n\toffset from base address: ");
1265       print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1266       fprintf (dump_file, "\n\tconstant offset from base address: ");
1267       print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1268       fprintf (dump_file, "\n\tstep: ");
1269       print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1270       fprintf (dump_file, "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr));
1271       fprintf (dump_file, "\n\tbase misalignment: %d",
1272 	       DR_BASE_MISALIGNMENT (dr));
1273       fprintf (dump_file, "\n\toffset alignment: %d",
1274 	       DR_OFFSET_ALIGNMENT (dr));
1275       fprintf (dump_file, "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr));
1276       fprintf (dump_file, "\n\tbase_object: ");
1277       print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1278       fprintf (dump_file, "\n");
1279       for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1280 	{
1281 	  fprintf (dump_file, "\tAccess function %d: ", i);
1282 	  print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1283 	}
1284     }
1285 
1286   return dr;
1287 }
1288 
1289 /*  A helper function computes order between two tree expressions T1 and T2.
1290     This is used in comparator functions sorting objects based on the order
1291     of tree expressions.  The function returns -1, 0, or 1.  */
1292 
1293 int
data_ref_compare_tree(tree t1,tree t2)1294 data_ref_compare_tree (tree t1, tree t2)
1295 {
1296   int i, cmp;
1297   enum tree_code code;
1298   char tclass;
1299 
1300   if (t1 == t2)
1301     return 0;
1302   if (t1 == NULL)
1303     return -1;
1304   if (t2 == NULL)
1305     return 1;
1306 
1307   STRIP_USELESS_TYPE_CONVERSION (t1);
1308   STRIP_USELESS_TYPE_CONVERSION (t2);
1309   if (t1 == t2)
1310     return 0;
1311 
1312   if (TREE_CODE (t1) != TREE_CODE (t2)
1313       && ! (CONVERT_EXPR_P (t1) && CONVERT_EXPR_P (t2)))
1314     return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
1315 
1316   code = TREE_CODE (t1);
1317   switch (code)
1318     {
1319     case INTEGER_CST:
1320       return tree_int_cst_compare (t1, t2);
1321 
1322     case STRING_CST:
1323       if (TREE_STRING_LENGTH (t1) != TREE_STRING_LENGTH (t2))
1324 	return TREE_STRING_LENGTH (t1) < TREE_STRING_LENGTH (t2) ? -1 : 1;
1325       return memcmp (TREE_STRING_POINTER (t1), TREE_STRING_POINTER (t2),
1326 		     TREE_STRING_LENGTH (t1));
1327 
1328     case SSA_NAME:
1329       if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2))
1330 	return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1;
1331       break;
1332 
1333     default:
1334       if (POLY_INT_CST_P (t1))
1335 	return compare_sizes_for_sort (wi::to_poly_widest (t1),
1336 				       wi::to_poly_widest (t2));
1337 
1338       tclass = TREE_CODE_CLASS (code);
1339 
1340       /* For decls, compare their UIDs.  */
1341       if (tclass == tcc_declaration)
1342 	{
1343 	  if (DECL_UID (t1) != DECL_UID (t2))
1344 	    return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1;
1345 	  break;
1346 	}
1347       /* For expressions, compare their operands recursively.  */
1348       else if (IS_EXPR_CODE_CLASS (tclass))
1349 	{
1350 	  for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i)
1351 	    {
1352 	      cmp = data_ref_compare_tree (TREE_OPERAND (t1, i),
1353 					   TREE_OPERAND (t2, i));
1354 	      if (cmp != 0)
1355 		return cmp;
1356 	    }
1357 	}
1358       else
1359 	gcc_unreachable ();
1360     }
1361 
1362   return 0;
1363 }
1364 
1365 /* Return TRUE it's possible to resolve data dependence DDR by runtime alias
1366    check.  */
1367 
1368 opt_result
runtime_alias_check_p(ddr_p ddr,class loop * loop,bool speed_p)1369 runtime_alias_check_p (ddr_p ddr, class loop *loop, bool speed_p)
1370 {
1371   if (dump_enabled_p ())
1372     dump_printf (MSG_NOTE,
1373 		 "consider run-time aliasing test between %T and %T\n",
1374 		 DR_REF (DDR_A (ddr)), DR_REF (DDR_B (ddr)));
1375 
1376   if (!speed_p)
1377     return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1378 				   "runtime alias check not supported when"
1379 				   " optimizing for size.\n");
1380 
1381   /* FORNOW: We don't support versioning with outer-loop in either
1382      vectorization or loop distribution.  */
1383   if (loop != NULL && loop->inner != NULL)
1384     return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1385 				   "runtime alias check not supported for"
1386 				   " outer loop.\n");
1387 
1388   return opt_result::success ();
1389 }
1390 
1391 /* Operator == between two dr_with_seg_len objects.
1392 
1393    This equality operator is used to make sure two data refs
1394    are the same one so that we will consider to combine the
1395    aliasing checks of those two pairs of data dependent data
1396    refs.  */
1397 
1398 static bool
1399 operator == (const dr_with_seg_len& d1,
1400 	     const dr_with_seg_len& d2)
1401 {
1402   return (operand_equal_p (DR_BASE_ADDRESS (d1.dr),
1403 			   DR_BASE_ADDRESS (d2.dr), 0)
1404 	  && data_ref_compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0
1405 	  && data_ref_compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0
1406 	  && data_ref_compare_tree (d1.seg_len, d2.seg_len) == 0
1407 	  && known_eq (d1.access_size, d2.access_size)
1408 	  && d1.align == d2.align);
1409 }
1410 
1411 /* Comparison function for sorting objects of dr_with_seg_len_pair_t
1412    so that we can combine aliasing checks in one scan.  */
1413 
1414 static int
comp_dr_with_seg_len_pair(const void * pa_,const void * pb_)1415 comp_dr_with_seg_len_pair (const void *pa_, const void *pb_)
1416 {
1417   const dr_with_seg_len_pair_t* pa = (const dr_with_seg_len_pair_t *) pa_;
1418   const dr_with_seg_len_pair_t* pb = (const dr_with_seg_len_pair_t *) pb_;
1419   const dr_with_seg_len &a1 = pa->first, &a2 = pa->second;
1420   const dr_with_seg_len &b1 = pb->first, &b2 = pb->second;
1421 
1422   /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks
1423      if a and c have the same basic address snd step, and b and d have the same
1424      address and step.  Therefore, if any a&c or b&d don't have the same address
1425      and step, we don't care the order of those two pairs after sorting.  */
1426   int comp_res;
1427 
1428   if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a1.dr),
1429 					 DR_BASE_ADDRESS (b1.dr))) != 0)
1430     return comp_res;
1431   if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a2.dr),
1432 					 DR_BASE_ADDRESS (b2.dr))) != 0)
1433     return comp_res;
1434   if ((comp_res = data_ref_compare_tree (DR_STEP (a1.dr),
1435 					 DR_STEP (b1.dr))) != 0)
1436     return comp_res;
1437   if ((comp_res = data_ref_compare_tree (DR_STEP (a2.dr),
1438 					 DR_STEP (b2.dr))) != 0)
1439     return comp_res;
1440   if ((comp_res = data_ref_compare_tree (DR_OFFSET (a1.dr),
1441 					 DR_OFFSET (b1.dr))) != 0)
1442     return comp_res;
1443   if ((comp_res = data_ref_compare_tree (DR_INIT (a1.dr),
1444 					 DR_INIT (b1.dr))) != 0)
1445     return comp_res;
1446   if ((comp_res = data_ref_compare_tree (DR_OFFSET (a2.dr),
1447 					 DR_OFFSET (b2.dr))) != 0)
1448     return comp_res;
1449   if ((comp_res = data_ref_compare_tree (DR_INIT (a2.dr),
1450 					 DR_INIT (b2.dr))) != 0)
1451     return comp_res;
1452 
1453   return 0;
1454 }
1455 
1456 /* Dump information about ALIAS_PAIR, indenting each line by INDENT.  */
1457 
1458 static void
dump_alias_pair(dr_with_seg_len_pair_t * alias_pair,const char * indent)1459 dump_alias_pair (dr_with_seg_len_pair_t *alias_pair, const char *indent)
1460 {
1461   dump_printf (MSG_NOTE, "%sreference:      %T vs. %T\n", indent,
1462 	       DR_REF (alias_pair->first.dr),
1463 	       DR_REF (alias_pair->second.dr));
1464 
1465   dump_printf (MSG_NOTE, "%ssegment length: %T", indent,
1466 	       alias_pair->first.seg_len);
1467   if (!operand_equal_p (alias_pair->first.seg_len,
1468 			alias_pair->second.seg_len, 0))
1469     dump_printf (MSG_NOTE, " vs. %T", alias_pair->second.seg_len);
1470 
1471   dump_printf (MSG_NOTE, "\n%saccess size:    ", indent);
1472   dump_dec (MSG_NOTE, alias_pair->first.access_size);
1473   if (maybe_ne (alias_pair->first.access_size, alias_pair->second.access_size))
1474     {
1475       dump_printf (MSG_NOTE, " vs. ");
1476       dump_dec (MSG_NOTE, alias_pair->second.access_size);
1477     }
1478 
1479   dump_printf (MSG_NOTE, "\n%salignment:      %d", indent,
1480 	       alias_pair->first.align);
1481   if (alias_pair->first.align != alias_pair->second.align)
1482     dump_printf (MSG_NOTE, " vs. %d", alias_pair->second.align);
1483 
1484   dump_printf (MSG_NOTE, "\n%sflags:         ", indent);
1485   if (alias_pair->flags & DR_ALIAS_RAW)
1486     dump_printf (MSG_NOTE, " RAW");
1487   if (alias_pair->flags & DR_ALIAS_WAR)
1488     dump_printf (MSG_NOTE, " WAR");
1489   if (alias_pair->flags & DR_ALIAS_WAW)
1490     dump_printf (MSG_NOTE, " WAW");
1491   if (alias_pair->flags & DR_ALIAS_ARBITRARY)
1492     dump_printf (MSG_NOTE, " ARBITRARY");
1493   if (alias_pair->flags & DR_ALIAS_SWAPPED)
1494     dump_printf (MSG_NOTE, " SWAPPED");
1495   if (alias_pair->flags & DR_ALIAS_UNSWAPPED)
1496     dump_printf (MSG_NOTE, " UNSWAPPED");
1497   if (alias_pair->flags & DR_ALIAS_MIXED_STEPS)
1498     dump_printf (MSG_NOTE, " MIXED_STEPS");
1499   if (alias_pair->flags == 0)
1500     dump_printf (MSG_NOTE, " <none>");
1501   dump_printf (MSG_NOTE, "\n");
1502 }
1503 
1504 /* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones.
1505    FACTOR is number of iterations that each data reference is accessed.
1506 
1507    Basically, for each pair of dependent data refs store_ptr_0 & load_ptr_0,
1508    we create an expression:
1509 
1510    ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1511    || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
1512 
1513    for aliasing checks.  However, in some cases we can decrease the number
1514    of checks by combining two checks into one.  For example, suppose we have
1515    another pair of data refs store_ptr_0 & load_ptr_1, and if the following
1516    condition is satisfied:
1517 
1518    load_ptr_0 < load_ptr_1  &&
1519    load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0
1520 
1521    (this condition means, in each iteration of vectorized loop, the accessed
1522    memory of store_ptr_0 cannot be between the memory of load_ptr_0 and
1523    load_ptr_1.)
1524 
1525    we then can use only the following expression to finish the alising checks
1526    between store_ptr_0 & load_ptr_0 and store_ptr_0 & load_ptr_1:
1527 
1528    ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1529    || (load_ptr_1 + load_segment_length_1 <= store_ptr_0))
1530 
1531    Note that we only consider that load_ptr_0 and load_ptr_1 have the same
1532    basic address.  */
1533 
1534 void
prune_runtime_alias_test_list(vec<dr_with_seg_len_pair_t> * alias_pairs,poly_uint64)1535 prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
1536 			       poly_uint64)
1537 {
1538   if (alias_pairs->is_empty ())
1539     return;
1540 
1541   /* Canonicalize each pair so that the base components are ordered wrt
1542      data_ref_compare_tree.  This allows the loop below to merge more
1543      cases.  */
1544   unsigned int i;
1545   dr_with_seg_len_pair_t *alias_pair;
1546   FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
1547     {
1548       data_reference_p dr_a = alias_pair->first.dr;
1549       data_reference_p dr_b = alias_pair->second.dr;
1550       int comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (dr_a),
1551 					    DR_BASE_ADDRESS (dr_b));
1552       if (comp_res == 0)
1553 	comp_res = data_ref_compare_tree (DR_OFFSET (dr_a), DR_OFFSET (dr_b));
1554       if (comp_res == 0)
1555 	comp_res = data_ref_compare_tree (DR_INIT (dr_a), DR_INIT (dr_b));
1556       if (comp_res > 0)
1557 	{
1558 	  std::swap (alias_pair->first, alias_pair->second);
1559 	  alias_pair->flags |= DR_ALIAS_SWAPPED;
1560 	}
1561       else
1562 	alias_pair->flags |= DR_ALIAS_UNSWAPPED;
1563     }
1564 
1565   /* Sort the collected data ref pairs so that we can scan them once to
1566      combine all possible aliasing checks.  */
1567   alias_pairs->qsort (comp_dr_with_seg_len_pair);
1568 
1569   /* Scan the sorted dr pairs and check if we can combine alias checks
1570      of two neighboring dr pairs.  */
1571   unsigned int last = 0;
1572   for (i = 1; i < alias_pairs->length (); ++i)
1573     {
1574       /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2).  */
1575       dr_with_seg_len_pair_t *alias_pair1 = &(*alias_pairs)[last];
1576       dr_with_seg_len_pair_t *alias_pair2 = &(*alias_pairs)[i];
1577 
1578       dr_with_seg_len *dr_a1 = &alias_pair1->first;
1579       dr_with_seg_len *dr_b1 = &alias_pair1->second;
1580       dr_with_seg_len *dr_a2 = &alias_pair2->first;
1581       dr_with_seg_len *dr_b2 = &alias_pair2->second;
1582 
1583       /* Remove duplicate data ref pairs.  */
1584       if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
1585 	{
1586 	  if (dump_enabled_p ())
1587 	    dump_printf (MSG_NOTE, "found equal ranges %T, %T and %T, %T\n",
1588 			 DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1589 			 DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1590 	  alias_pair1->flags |= alias_pair2->flags;
1591 	  continue;
1592 	}
1593 
1594       /* Assume that we won't be able to merge the pairs, then correct
1595 	 if we do.  */
1596       last += 1;
1597       if (last != i)
1598 	(*alias_pairs)[last] = (*alias_pairs)[i];
1599 
1600       if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2)
1601 	{
1602 	  /* We consider the case that DR_B1 and DR_B2 are same memrefs,
1603 	     and DR_A1 and DR_A2 are two consecutive memrefs.  */
1604 	  if (*dr_a1 == *dr_a2)
1605 	    {
1606 	      std::swap (dr_a1, dr_b1);
1607 	      std::swap (dr_a2, dr_b2);
1608 	    }
1609 
1610 	  poly_int64 init_a1, init_a2;
1611 	  /* Only consider cases in which the distance between the initial
1612 	     DR_A1 and the initial DR_A2 is known at compile time.  */
1613 	  if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr),
1614 				DR_BASE_ADDRESS (dr_a2->dr), 0)
1615 	      || !operand_equal_p (DR_OFFSET (dr_a1->dr),
1616 				   DR_OFFSET (dr_a2->dr), 0)
1617 	      || !poly_int_tree_p (DR_INIT (dr_a1->dr), &init_a1)
1618 	      || !poly_int_tree_p (DR_INIT (dr_a2->dr), &init_a2))
1619 	    continue;
1620 
1621 	  /* Don't combine if we can't tell which one comes first.  */
1622 	  if (!ordered_p (init_a1, init_a2))
1623 	    continue;
1624 
1625 	  /* Work out what the segment length would be if we did combine
1626 	     DR_A1 and DR_A2:
1627 
1628 	     - If DR_A1 and DR_A2 have equal lengths, that length is
1629 	       also the combined length.
1630 
1631 	     - If DR_A1 and DR_A2 both have negative "lengths", the combined
1632 	       length is the lower bound on those lengths.
1633 
1634 	     - If DR_A1 and DR_A2 both have positive lengths, the combined
1635 	       length is the upper bound on those lengths.
1636 
1637 	     Other cases are unlikely to give a useful combination.
1638 
1639 	     The lengths both have sizetype, so the sign is taken from
1640 	     the step instead.  */
1641 	  poly_uint64 new_seg_len = 0;
1642 	  bool new_seg_len_p = !operand_equal_p (dr_a1->seg_len,
1643 						 dr_a2->seg_len, 0);
1644 	  if (new_seg_len_p)
1645 	    {
1646 	      poly_uint64 seg_len_a1, seg_len_a2;
1647 	      if (!poly_int_tree_p (dr_a1->seg_len, &seg_len_a1)
1648 		  || !poly_int_tree_p (dr_a2->seg_len, &seg_len_a2))
1649 		continue;
1650 
1651 	      tree indicator_a = dr_direction_indicator (dr_a1->dr);
1652 	      if (TREE_CODE (indicator_a) != INTEGER_CST)
1653 		continue;
1654 
1655 	      tree indicator_b = dr_direction_indicator (dr_a2->dr);
1656 	      if (TREE_CODE (indicator_b) != INTEGER_CST)
1657 		continue;
1658 
1659 	      int sign_a = tree_int_cst_sgn (indicator_a);
1660 	      int sign_b = tree_int_cst_sgn (indicator_b);
1661 
1662 	      if (sign_a <= 0 && sign_b <= 0)
1663 		new_seg_len = lower_bound (seg_len_a1, seg_len_a2);
1664 	      else if (sign_a >= 0 && sign_b >= 0)
1665 		new_seg_len = upper_bound (seg_len_a1, seg_len_a2);
1666 	      else
1667 		continue;
1668 	    }
1669 	  /* At this point we're committed to merging the refs.  */
1670 
1671 	  /* Make sure dr_a1 starts left of dr_a2.  */
1672 	  if (maybe_gt (init_a1, init_a2))
1673 	    {
1674 	      std::swap (*dr_a1, *dr_a2);
1675 	      std::swap (init_a1, init_a2);
1676 	    }
1677 
1678 	  /* The DR_Bs are equal, so only the DR_As can introduce
1679 	     mixed steps.  */
1680 	  if (!operand_equal_p (DR_STEP (dr_a1->dr), DR_STEP (dr_a2->dr), 0))
1681 	    alias_pair1->flags |= DR_ALIAS_MIXED_STEPS;
1682 
1683 	  if (new_seg_len_p)
1684 	    {
1685 	      dr_a1->seg_len = build_int_cst (TREE_TYPE (dr_a1->seg_len),
1686 					      new_seg_len);
1687 	      dr_a1->align = MIN (dr_a1->align, known_alignment (new_seg_len));
1688 	    }
1689 
1690 	  /* This is always positive due to the swap above.  */
1691 	  poly_uint64 diff = init_a2 - init_a1;
1692 
1693 	  /* The new check will start at DR_A1.  Make sure that its access
1694 	     size encompasses the initial DR_A2.  */
1695 	  if (maybe_lt (dr_a1->access_size, diff + dr_a2->access_size))
1696 	    {
1697 	      dr_a1->access_size = upper_bound (dr_a1->access_size,
1698 						diff + dr_a2->access_size);
1699 	      unsigned int new_align = known_alignment (dr_a1->access_size);
1700 	      dr_a1->align = MIN (dr_a1->align, new_align);
1701 	    }
1702 	  if (dump_enabled_p ())
1703 	    dump_printf (MSG_NOTE, "merging ranges for %T, %T and %T, %T\n",
1704 			 DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1705 			 DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1706 	  alias_pair1->flags |= alias_pair2->flags;
1707 	  last -= 1;
1708 	}
1709     }
1710   alias_pairs->truncate (last + 1);
1711 
1712   /* Try to restore the original dr_with_seg_len order within each
1713      dr_with_seg_len_pair_t.  If we ended up combining swapped and
1714      unswapped pairs into the same check, we have to invalidate any
1715      RAW, WAR and WAW information for it.  */
1716   if (dump_enabled_p ())
1717     dump_printf (MSG_NOTE, "merged alias checks:\n");
1718   FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
1719     {
1720       unsigned int swap_mask = (DR_ALIAS_SWAPPED | DR_ALIAS_UNSWAPPED);
1721       unsigned int swapped = (alias_pair->flags & swap_mask);
1722       if (swapped == DR_ALIAS_SWAPPED)
1723 	std::swap (alias_pair->first, alias_pair->second);
1724       else if (swapped != DR_ALIAS_UNSWAPPED)
1725 	alias_pair->flags |= DR_ALIAS_ARBITRARY;
1726       alias_pair->flags &= ~swap_mask;
1727       if (dump_enabled_p ())
1728 	dump_alias_pair (alias_pair, "  ");
1729     }
1730 }
1731 
1732 /* A subroutine of create_intersect_range_checks, with a subset of the
1733    same arguments.  Try to use IFN_CHECK_RAW_PTRS and IFN_CHECK_WAR_PTRS
1734    to optimize cases in which the references form a simple RAW, WAR or
1735    WAR dependence.  */
1736 
1737 static bool
create_ifn_alias_checks(tree * cond_expr,const dr_with_seg_len_pair_t & alias_pair)1738 create_ifn_alias_checks (tree *cond_expr,
1739 			 const dr_with_seg_len_pair_t &alias_pair)
1740 {
1741   const dr_with_seg_len& dr_a = alias_pair.first;
1742   const dr_with_seg_len& dr_b = alias_pair.second;
1743 
1744   /* Check for cases in which:
1745 
1746      (a) we have a known RAW, WAR or WAR dependence
1747      (b) the accesses are well-ordered in both the original and new code
1748 	 (see the comment above the DR_ALIAS_* flags for details); and
1749      (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR.  */
1750   if (alias_pair.flags & ~(DR_ALIAS_RAW | DR_ALIAS_WAR | DR_ALIAS_WAW))
1751     return false;
1752 
1753   /* Make sure that both DRs access the same pattern of bytes,
1754      with a constant length and step.  */
1755   poly_uint64 seg_len;
1756   if (!operand_equal_p (dr_a.seg_len, dr_b.seg_len, 0)
1757       || !poly_int_tree_p (dr_a.seg_len, &seg_len)
1758       || maybe_ne (dr_a.access_size, dr_b.access_size)
1759       || !operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0)
1760       || !tree_fits_uhwi_p (DR_STEP (dr_a.dr)))
1761     return false;
1762 
1763   unsigned HOST_WIDE_INT bytes = tree_to_uhwi (DR_STEP (dr_a.dr));
1764   tree addr_a = DR_BASE_ADDRESS (dr_a.dr);
1765   tree addr_b = DR_BASE_ADDRESS (dr_b.dr);
1766 
1767   /* See whether the target suports what we want to do.  WAW checks are
1768      equivalent to WAR checks here.  */
1769   internal_fn ifn = (alias_pair.flags & DR_ALIAS_RAW
1770 		     ? IFN_CHECK_RAW_PTRS
1771 		     : IFN_CHECK_WAR_PTRS);
1772   unsigned int align = MIN (dr_a.align, dr_b.align);
1773   poly_uint64 full_length = seg_len + bytes;
1774   if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
1775 					   full_length, align))
1776     {
1777       full_length = seg_len + dr_a.access_size;
1778       if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
1779 					       full_length, align))
1780 	return false;
1781     }
1782 
1783   /* Commit to using this form of test.  */
1784   addr_a = fold_build_pointer_plus (addr_a, DR_OFFSET (dr_a.dr));
1785   addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
1786 
1787   addr_b = fold_build_pointer_plus (addr_b, DR_OFFSET (dr_b.dr));
1788   addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
1789 
1790   *cond_expr = build_call_expr_internal_loc (UNKNOWN_LOCATION,
1791 					     ifn, boolean_type_node,
1792 					     4, addr_a, addr_b,
1793 					     size_int (full_length),
1794 					     size_int (align));
1795 
1796   if (dump_enabled_p ())
1797     {
1798       if (ifn == IFN_CHECK_RAW_PTRS)
1799 	dump_printf (MSG_NOTE, "using an IFN_CHECK_RAW_PTRS test\n");
1800       else
1801 	dump_printf (MSG_NOTE, "using an IFN_CHECK_WAR_PTRS test\n");
1802     }
1803   return true;
1804 }
1805 
1806 /* Try to generate a runtime condition that is true if ALIAS_PAIR is
1807    free of aliases, using a condition based on index values instead
1808    of a condition based on addresses.  Return true on success,
1809    storing the condition in *COND_EXPR.
1810 
1811    This can only be done if the two data references in ALIAS_PAIR access
1812    the same array object and the index is the only difference.  For example,
1813    if the two data references are DR_A and DR_B:
1814 
1815                        DR_A                           DR_B
1816       data-ref         arr[i]                         arr[j]
1817       base_object      arr                            arr
1818       index            {i_0, +, 1}_loop               {j_0, +, 1}_loop
1819 
1820    The addresses and their index are like:
1821 
1822         |<- ADDR_A    ->|          |<- ADDR_B    ->|
1823      ------------------------------------------------------->
1824         |   |   |   |   |          |   |   |   |   |
1825      ------------------------------------------------------->
1826         i_0 ...         i_0+4      j_0 ...         j_0+4
1827 
1828    We can create expression based on index rather than address:
1829 
1830      (unsigned) (i_0 - j_0 + 3) <= 6
1831 
1832    i.e. the indices are less than 4 apart.
1833 
1834    Note evolution step of index needs to be considered in comparison.  */
1835 
1836 static bool
create_intersect_range_checks_index(class loop * loop,tree * cond_expr,const dr_with_seg_len_pair_t & alias_pair)1837 create_intersect_range_checks_index (class loop *loop, tree *cond_expr,
1838 				     const dr_with_seg_len_pair_t &alias_pair)
1839 {
1840   const dr_with_seg_len &dr_a = alias_pair.first;
1841   const dr_with_seg_len &dr_b = alias_pair.second;
1842   if ((alias_pair.flags & DR_ALIAS_MIXED_STEPS)
1843       || integer_zerop (DR_STEP (dr_a.dr))
1844       || integer_zerop (DR_STEP (dr_b.dr))
1845       || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
1846     return false;
1847 
1848   poly_uint64 seg_len1, seg_len2;
1849   if (!poly_int_tree_p (dr_a.seg_len, &seg_len1)
1850       || !poly_int_tree_p (dr_b.seg_len, &seg_len2))
1851     return false;
1852 
1853   if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
1854     return false;
1855 
1856   if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
1857     return false;
1858 
1859   if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
1860     return false;
1861 
1862   gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
1863 
1864   bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
1865   unsigned HOST_WIDE_INT abs_step = tree_to_shwi (DR_STEP (dr_a.dr));
1866   if (neg_step)
1867     {
1868       abs_step = -abs_step;
1869       seg_len1 = (-wi::to_poly_wide (dr_a.seg_len)).force_uhwi ();
1870       seg_len2 = (-wi::to_poly_wide (dr_b.seg_len)).force_uhwi ();
1871     }
1872 
1873   /* Infer the number of iterations with which the memory segment is accessed
1874      by DR.  In other words, alias is checked if memory segment accessed by
1875      DR_A in some iterations intersect with memory segment accessed by DR_B
1876      in the same amount iterations.
1877      Note segnment length is a linear function of number of iterations with
1878      DR_STEP as the coefficient.  */
1879   poly_uint64 niter_len1, niter_len2;
1880   if (!can_div_trunc_p (seg_len1 + abs_step - 1, abs_step, &niter_len1)
1881       || !can_div_trunc_p (seg_len2 + abs_step - 1, abs_step, &niter_len2))
1882     return false;
1883 
1884   /* Divide each access size by the byte step, rounding up.  */
1885   poly_uint64 niter_access1, niter_access2;
1886   if (!can_div_trunc_p (dr_a.access_size + abs_step - 1,
1887 			abs_step, &niter_access1)
1888       || !can_div_trunc_p (dr_b.access_size + abs_step - 1,
1889 			   abs_step, &niter_access2))
1890     return false;
1891 
1892   bool waw_or_war_p = (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW)) == 0;
1893 
1894   unsigned int i;
1895   for (i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
1896     {
1897       tree access1 = DR_ACCESS_FN (dr_a.dr, i);
1898       tree access2 = DR_ACCESS_FN (dr_b.dr, i);
1899       /* Two indices must be the same if they are not scev, or not scev wrto
1900 	 current loop being vecorized.  */
1901       if (TREE_CODE (access1) != POLYNOMIAL_CHREC
1902 	  || TREE_CODE (access2) != POLYNOMIAL_CHREC
1903 	  || CHREC_VARIABLE (access1) != (unsigned)loop->num
1904 	  || CHREC_VARIABLE (access2) != (unsigned)loop->num)
1905 	{
1906 	  if (operand_equal_p (access1, access2, 0))
1907 	    continue;
1908 
1909 	  return false;
1910 	}
1911       /* The two indices must have the same step.  */
1912       if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
1913 	return false;
1914 
1915       tree idx_step = CHREC_RIGHT (access1);
1916       /* Index must have const step, otherwise DR_STEP won't be constant.  */
1917       gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
1918       /* Index must evaluate in the same direction as DR.  */
1919       gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
1920 
1921       tree min1 = CHREC_LEFT (access1);
1922       tree min2 = CHREC_LEFT (access2);
1923       if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
1924 	return false;
1925 
1926       /* Ideally, alias can be checked against loop's control IV, but we
1927 	 need to prove linear mapping between control IV and reference
1928 	 index.  Although that should be true, we check against (array)
1929 	 index of data reference.  Like segment length, index length is
1930 	 linear function of the number of iterations with index_step as
1931 	 the coefficient, i.e, niter_len * idx_step.  */
1932       offset_int abs_idx_step = offset_int::from (wi::to_wide (idx_step),
1933 						  SIGNED);
1934       if (neg_step)
1935 	abs_idx_step = -abs_idx_step;
1936       poly_offset_int idx_len1 = abs_idx_step * niter_len1;
1937       poly_offset_int idx_len2 = abs_idx_step * niter_len2;
1938       poly_offset_int idx_access1 = abs_idx_step * niter_access1;
1939       poly_offset_int idx_access2 = abs_idx_step * niter_access2;
1940 
1941       gcc_assert (known_ge (idx_len1, 0)
1942 		  && known_ge (idx_len2, 0)
1943 		  && known_ge (idx_access1, 0)
1944 		  && known_ge (idx_access2, 0));
1945 
1946       /* Each access has the following pattern, with lengths measured
1947 	 in units of INDEX:
1948 
1949 	      <-- idx_len -->
1950 	      <--- A: -ve step --->
1951 	      +-----+-------+-----+-------+-----+
1952 	      | n-1 | ..... |  0  | ..... | n-1 |
1953 	      +-----+-------+-----+-------+-----+
1954 			    <--- B: +ve step --->
1955 			    <-- idx_len -->
1956 			    |
1957 			   min
1958 
1959 	 where "n" is the number of scalar iterations covered by the segment
1960 	 and where each access spans idx_access units.
1961 
1962 	 A is the range of bytes accessed when the step is negative,
1963 	 B is the range when the step is positive.
1964 
1965 	 When checking for general overlap, we need to test whether
1966 	 the range:
1967 
1968 	   [min1 + low_offset1, min2 + high_offset1 + idx_access1 - 1]
1969 
1970 	 overlaps:
1971 
1972 	   [min2 + low_offset2, min2 + high_offset2 + idx_access2 - 1]
1973 
1974 	 where:
1975 
1976 	    low_offsetN = +ve step ? 0 : -idx_lenN;
1977 	   high_offsetN = +ve step ? idx_lenN : 0;
1978 
1979 	 This is equivalent to testing whether:
1980 
1981 	   min1 + low_offset1 <= min2 + high_offset2 + idx_access2 - 1
1982 	   && min2 + low_offset2 <= min1 + high_offset1 + idx_access1 - 1
1983 
1984 	 Converting this into a single test, there is an overlap if:
1985 
1986 	   0 <= min2 - min1 + bias <= limit
1987 
1988 	 where  bias = high_offset2 + idx_access2 - 1 - low_offset1
1989 	       limit = (high_offset1 - low_offset1 + idx_access1 - 1)
1990 		     + (high_offset2 - low_offset2 + idx_access2 - 1)
1991 	  i.e. limit = idx_len1 + idx_access1 - 1 + idx_len2 + idx_access2 - 1
1992 
1993 	 Combining the tests requires limit to be computable in an unsigned
1994 	 form of the index type; if it isn't, we fall back to the usual
1995 	 pointer-based checks.
1996 
1997 	 We can do better if DR_B is a write and if DR_A and DR_B are
1998 	 well-ordered in both the original and the new code (see the
1999 	 comment above the DR_ALIAS_* flags for details).  In this case
2000 	 we know that for each i in [0, n-1], the write performed by
2001 	 access i of DR_B occurs after access numbers j<=i of DR_A in
2002 	 both the original and the new code.  Any write or anti
2003 	 dependencies wrt those DR_A accesses are therefore maintained.
2004 
2005 	 We just need to make sure that each individual write in DR_B does not
2006 	 overlap any higher-indexed access in DR_A; such DR_A accesses happen
2007 	 after the DR_B access in the original code but happen before it in
2008 	 the new code.
2009 
2010 	 We know the steps for both accesses are equal, so by induction, we
2011 	 just need to test whether the first write of DR_B overlaps a later
2012 	 access of DR_A.  In other words, we need to move min1 along by
2013 	 one iteration:
2014 
2015 	   min1' = min1 + idx_step
2016 
2017 	 and use the ranges:
2018 
2019 	   [min1' + low_offset1', min1' + high_offset1' + idx_access1 - 1]
2020 
2021 	 and:
2022 
2023 	   [min2, min2 + idx_access2 - 1]
2024 
2025 	 where:
2026 
2027 	    low_offset1' = +ve step ? 0 : -(idx_len1 - |idx_step|)
2028 	   high_offset1' = +ve_step ? idx_len1 - |idx_step| : 0.  */
2029       if (waw_or_war_p)
2030 	idx_len1 -= abs_idx_step;
2031 
2032       poly_offset_int limit = idx_len1 + idx_access1 - 1 + idx_access2 - 1;
2033       if (!waw_or_war_p)
2034 	limit += idx_len2;
2035 
2036       tree utype = unsigned_type_for (TREE_TYPE (min1));
2037       if (!wi::fits_to_tree_p (limit, utype))
2038 	return false;
2039 
2040       poly_offset_int low_offset1 = neg_step ? -idx_len1 : 0;
2041       poly_offset_int high_offset2 = neg_step || waw_or_war_p ? 0 : idx_len2;
2042       poly_offset_int bias = high_offset2 + idx_access2 - 1 - low_offset1;
2043       /* Equivalent to adding IDX_STEP to MIN1.  */
2044       if (waw_or_war_p)
2045 	bias -= wi::to_offset (idx_step);
2046 
2047       tree subject = fold_build2 (MINUS_EXPR, utype,
2048 				  fold_convert (utype, min2),
2049 				  fold_convert (utype, min1));
2050       subject = fold_build2 (PLUS_EXPR, utype, subject,
2051 			     wide_int_to_tree (utype, bias));
2052       tree part_cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject,
2053 					 wide_int_to_tree (utype, limit));
2054       if (*cond_expr)
2055 	*cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2056 				  *cond_expr, part_cond_expr);
2057       else
2058 	*cond_expr = part_cond_expr;
2059     }
2060   if (dump_enabled_p ())
2061     {
2062       if (waw_or_war_p)
2063 	dump_printf (MSG_NOTE, "using an index-based WAR/WAW test\n");
2064       else
2065 	dump_printf (MSG_NOTE, "using an index-based overlap test\n");
2066     }
2067   return true;
2068 }
2069 
2070 /* A subroutine of create_intersect_range_checks, with a subset of the
2071    same arguments.  Try to optimize cases in which the second access
2072    is a write and in which some overlap is valid.  */
2073 
2074 static bool
create_waw_or_war_checks(tree * cond_expr,const dr_with_seg_len_pair_t & alias_pair)2075 create_waw_or_war_checks (tree *cond_expr,
2076 			  const dr_with_seg_len_pair_t &alias_pair)
2077 {
2078   const dr_with_seg_len& dr_a = alias_pair.first;
2079   const dr_with_seg_len& dr_b = alias_pair.second;
2080 
2081   /* Check for cases in which:
2082 
2083      (a) DR_B is always a write;
2084      (b) the accesses are well-ordered in both the original and new code
2085 	 (see the comment above the DR_ALIAS_* flags for details); and
2086      (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR.  */
2087   if (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW))
2088     return false;
2089 
2090   /* Check for equal (but possibly variable) steps.  */
2091   tree step = DR_STEP (dr_a.dr);
2092   if (!operand_equal_p (step, DR_STEP (dr_b.dr)))
2093     return false;
2094 
2095   /* Make sure that we can operate on sizetype without loss of precision.  */
2096   tree addr_type = TREE_TYPE (DR_BASE_ADDRESS (dr_a.dr));
2097   if (TYPE_PRECISION (addr_type) != TYPE_PRECISION (sizetype))
2098     return false;
2099 
2100   /* All addresses involved are known to have a common alignment ALIGN.
2101      We can therefore subtract ALIGN from an exclusive endpoint to get
2102      an inclusive endpoint.  In the best (and common) case, ALIGN is the
2103      same as the access sizes of both DRs, and so subtracting ALIGN
2104      cancels out the addition of an access size.  */
2105   unsigned int align = MIN (dr_a.align, dr_b.align);
2106   poly_uint64 last_chunk_a = dr_a.access_size - align;
2107   poly_uint64 last_chunk_b = dr_b.access_size - align;
2108 
2109   /* Get a boolean expression that is true when the step is negative.  */
2110   tree indicator = dr_direction_indicator (dr_a.dr);
2111   tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
2112 			       fold_convert (ssizetype, indicator),
2113 			       ssize_int (0));
2114 
2115   /* Get lengths in sizetype.  */
2116   tree seg_len_a
2117     = fold_convert (sizetype, rewrite_to_non_trapping_overflow (dr_a.seg_len));
2118   step = fold_convert (sizetype, rewrite_to_non_trapping_overflow (step));
2119 
2120   /* Each access has the following pattern:
2121 
2122 	  <- |seg_len| ->
2123 	  <--- A: -ve step --->
2124 	  +-----+-------+-----+-------+-----+
2125 	  | n-1 | ..... |  0  | ..... | n-1 |
2126 	  +-----+-------+-----+-------+-----+
2127 			<--- B: +ve step --->
2128 			<- |seg_len| ->
2129 			|
2130 		   base address
2131 
2132      where "n" is the number of scalar iterations covered by the segment.
2133 
2134      A is the range of bytes accessed when the step is negative,
2135      B is the range when the step is positive.
2136 
2137      We know that DR_B is a write.  We also know (from checking that
2138      DR_A and DR_B are well-ordered) that for each i in [0, n-1],
2139      the write performed by access i of DR_B occurs after access numbers
2140      j<=i of DR_A in both the original and the new code.  Any write or
2141      anti dependencies wrt those DR_A accesses are therefore maintained.
2142 
2143      We just need to make sure that each individual write in DR_B does not
2144      overlap any higher-indexed access in DR_A; such DR_A accesses happen
2145      after the DR_B access in the original code but happen before it in
2146      the new code.
2147 
2148      We know the steps for both accesses are equal, so by induction, we
2149      just need to test whether the first write of DR_B overlaps a later
2150      access of DR_A.  In other words, we need to move addr_a along by
2151      one iteration:
2152 
2153        addr_a' = addr_a + step
2154 
2155      and check whether:
2156 
2157        [addr_b, addr_b + last_chunk_b]
2158 
2159      overlaps:
2160 
2161        [addr_a' + low_offset_a, addr_a' + high_offset_a + last_chunk_a]
2162 
2163      where [low_offset_a, high_offset_a] spans accesses [1, n-1].  I.e.:
2164 
2165 	low_offset_a = +ve step ? 0 : seg_len_a - step
2166        high_offset_a = +ve step ? seg_len_a - step : 0
2167 
2168      This is equivalent to testing whether:
2169 
2170        addr_a' + low_offset_a <= addr_b + last_chunk_b
2171        && addr_b <= addr_a' + high_offset_a + last_chunk_a
2172 
2173      Converting this into a single test, there is an overlap if:
2174 
2175        0 <= addr_b + last_chunk_b - addr_a' - low_offset_a <= limit
2176 
2177      where limit = high_offset_a - low_offset_a + last_chunk_a + last_chunk_b
2178 
2179      If DR_A is performed, limit + |step| - last_chunk_b is known to be
2180      less than the size of the object underlying DR_A.  We also know
2181      that last_chunk_b <= |step|; this is checked elsewhere if it isn't
2182      guaranteed at compile time.  There can therefore be no overflow if
2183      "limit" is calculated in an unsigned type with pointer precision.  */
2184   tree addr_a = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_a.dr),
2185 					 DR_OFFSET (dr_a.dr));
2186   addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
2187 
2188   tree addr_b = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_b.dr),
2189 					 DR_OFFSET (dr_b.dr));
2190   addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
2191 
2192   /* Advance ADDR_A by one iteration and adjust the length to compensate.  */
2193   addr_a = fold_build_pointer_plus (addr_a, step);
2194   tree seg_len_a_minus_step = fold_build2 (MINUS_EXPR, sizetype,
2195 					   seg_len_a, step);
2196   if (!CONSTANT_CLASS_P (seg_len_a_minus_step))
2197     seg_len_a_minus_step = build1 (SAVE_EXPR, sizetype, seg_len_a_minus_step);
2198 
2199   tree low_offset_a = fold_build3 (COND_EXPR, sizetype, neg_step,
2200 				   seg_len_a_minus_step, size_zero_node);
2201   if (!CONSTANT_CLASS_P (low_offset_a))
2202     low_offset_a = build1 (SAVE_EXPR, sizetype, low_offset_a);
2203 
2204   /* We could use COND_EXPR <neg_step, size_zero_node, seg_len_a_minus_step>,
2205      but it's usually more efficient to reuse the LOW_OFFSET_A result.  */
2206   tree high_offset_a = fold_build2 (MINUS_EXPR, sizetype, seg_len_a_minus_step,
2207 				    low_offset_a);
2208 
2209   /* The amount added to addr_b - addr_a'.  */
2210   tree bias = fold_build2 (MINUS_EXPR, sizetype,
2211 			   size_int (last_chunk_b), low_offset_a);
2212 
2213   tree limit = fold_build2 (MINUS_EXPR, sizetype, high_offset_a, low_offset_a);
2214   limit = fold_build2 (PLUS_EXPR, sizetype, limit,
2215 		       size_int (last_chunk_a + last_chunk_b));
2216 
2217   tree subject = fold_build2 (POINTER_DIFF_EXPR, ssizetype, addr_b, addr_a);
2218   subject = fold_build2 (PLUS_EXPR, sizetype,
2219 			 fold_convert (sizetype, subject), bias);
2220 
2221   *cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject, limit);
2222   if (dump_enabled_p ())
2223     dump_printf (MSG_NOTE, "using an address-based WAR/WAW test\n");
2224   return true;
2225 }
2226 
2227 /* If ALIGN is nonzero, set up *SEQ_MIN_OUT and *SEQ_MAX_OUT so that for
2228    every address ADDR accessed by D:
2229 
2230      *SEQ_MIN_OUT <= ADDR (== ADDR & -ALIGN) <= *SEQ_MAX_OUT
2231 
2232    In this case, every element accessed by D is aligned to at least
2233    ALIGN bytes.
2234 
2235    If ALIGN is zero then instead set *SEG_MAX_OUT so that:
2236 
2237      *SEQ_MIN_OUT <= ADDR < *SEQ_MAX_OUT.  */
2238 
2239 static void
get_segment_min_max(const dr_with_seg_len & d,tree * seg_min_out,tree * seg_max_out,HOST_WIDE_INT align)2240 get_segment_min_max (const dr_with_seg_len &d, tree *seg_min_out,
2241 		     tree *seg_max_out, HOST_WIDE_INT align)
2242 {
2243   /* Each access has the following pattern:
2244 
2245 	  <- |seg_len| ->
2246 	  <--- A: -ve step --->
2247 	  +-----+-------+-----+-------+-----+
2248 	  | n-1 | ,.... |  0  | ..... | n-1 |
2249 	  +-----+-------+-----+-------+-----+
2250 			<--- B: +ve step --->
2251 			<- |seg_len| ->
2252 			|
2253 		   base address
2254 
2255      where "n" is the number of scalar iterations covered by the segment.
2256      (This should be VF for a particular pair if we know that both steps
2257      are the same, otherwise it will be the full number of scalar loop
2258      iterations.)
2259 
2260      A is the range of bytes accessed when the step is negative,
2261      B is the range when the step is positive.
2262 
2263      If the access size is "access_size" bytes, the lowest addressed byte is:
2264 
2265 	 base + (step < 0 ? seg_len : 0)   [LB]
2266 
2267      and the highest addressed byte is always below:
2268 
2269 	 base + (step < 0 ? 0 : seg_len) + access_size   [UB]
2270 
2271      Thus:
2272 
2273 	 LB <= ADDR < UB
2274 
2275      If ALIGN is nonzero, all three values are aligned to at least ALIGN
2276      bytes, so:
2277 
2278 	 LB <= ADDR <= UB - ALIGN
2279 
2280      where "- ALIGN" folds naturally with the "+ access_size" and often
2281      cancels it out.
2282 
2283      We don't try to simplify LB and UB beyond this (e.g. by using
2284      MIN and MAX based on whether seg_len rather than the stride is
2285      negative) because it is possible for the absolute size of the
2286      segment to overflow the range of a ssize_t.
2287 
2288      Keeping the pointer_plus outside of the cond_expr should allow
2289      the cond_exprs to be shared with other alias checks.  */
2290   tree indicator = dr_direction_indicator (d.dr);
2291   tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
2292 			       fold_convert (ssizetype, indicator),
2293 			       ssize_int (0));
2294   tree addr_base = fold_build_pointer_plus (DR_BASE_ADDRESS (d.dr),
2295 					    DR_OFFSET (d.dr));
2296   addr_base = fold_build_pointer_plus (addr_base, DR_INIT (d.dr));
2297   tree seg_len
2298     = fold_convert (sizetype, rewrite_to_non_trapping_overflow (d.seg_len));
2299 
2300   tree min_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
2301 				seg_len, size_zero_node);
2302   tree max_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
2303 				size_zero_node, seg_len);
2304   max_reach = fold_build2 (PLUS_EXPR, sizetype, max_reach,
2305 			   size_int (d.access_size - align));
2306 
2307   *seg_min_out = fold_build_pointer_plus (addr_base, min_reach);
2308   *seg_max_out = fold_build_pointer_plus (addr_base, max_reach);
2309 }
2310 
2311 /* Generate a runtime condition that is true if ALIAS_PAIR is free of aliases,
2312    storing the condition in *COND_EXPR.  The fallback is to generate a
2313    a test that the two accesses do not overlap:
2314 
2315      end_a <= start_b || end_b <= start_a.  */
2316 
2317 static void
create_intersect_range_checks(class loop * loop,tree * cond_expr,const dr_with_seg_len_pair_t & alias_pair)2318 create_intersect_range_checks (class loop *loop, tree *cond_expr,
2319 			       const dr_with_seg_len_pair_t &alias_pair)
2320 {
2321   const dr_with_seg_len& dr_a = alias_pair.first;
2322   const dr_with_seg_len& dr_b = alias_pair.second;
2323   *cond_expr = NULL_TREE;
2324   if (create_intersect_range_checks_index (loop, cond_expr, alias_pair))
2325     return;
2326 
2327   if (create_ifn_alias_checks (cond_expr, alias_pair))
2328     return;
2329 
2330   if (create_waw_or_war_checks (cond_expr, alias_pair))
2331     return;
2332 
2333   unsigned HOST_WIDE_INT min_align;
2334   tree_code cmp_code;
2335   /* We don't have to check DR_ALIAS_MIXED_STEPS here, since both versions
2336      are equivalent.  This is just an optimization heuristic.  */
2337   if (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST
2338       && TREE_CODE (DR_STEP (dr_b.dr)) == INTEGER_CST)
2339     {
2340       /* In this case adding access_size to seg_len is likely to give
2341 	 a simple X * step, where X is either the number of scalar
2342 	 iterations or the vectorization factor.  We're better off
2343 	 keeping that, rather than subtracting an alignment from it.
2344 
2345 	 In this case the maximum values are exclusive and so there is
2346 	 no alias if the maximum of one segment equals the minimum
2347 	 of another.  */
2348       min_align = 0;
2349       cmp_code = LE_EXPR;
2350     }
2351   else
2352     {
2353       /* Calculate the minimum alignment shared by all four pointers,
2354 	 then arrange for this alignment to be subtracted from the
2355 	 exclusive maximum values to get inclusive maximum values.
2356 	 This "- min_align" is cumulative with a "+ access_size"
2357 	 in the calculation of the maximum values.  In the best
2358 	 (and common) case, the two cancel each other out, leaving
2359 	 us with an inclusive bound based only on seg_len.  In the
2360 	 worst case we're simply adding a smaller number than before.
2361 
2362 	 Because the maximum values are inclusive, there is an alias
2363 	 if the maximum value of one segment is equal to the minimum
2364 	 value of the other.  */
2365       min_align = MIN (dr_a.align, dr_b.align);
2366       cmp_code = LT_EXPR;
2367     }
2368 
2369   tree seg_a_min, seg_a_max, seg_b_min, seg_b_max;
2370   get_segment_min_max (dr_a, &seg_a_min, &seg_a_max, min_align);
2371   get_segment_min_max (dr_b, &seg_b_min, &seg_b_max, min_align);
2372 
2373   *cond_expr
2374     = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2375 	fold_build2 (cmp_code, boolean_type_node, seg_a_max, seg_b_min),
2376 	fold_build2 (cmp_code, boolean_type_node, seg_b_max, seg_a_min));
2377   if (dump_enabled_p ())
2378     dump_printf (MSG_NOTE, "using an address-based overlap test\n");
2379 }
2380 
2381 /* Create a conditional expression that represents the run-time checks for
2382    overlapping of address ranges represented by a list of data references
2383    pairs passed in ALIAS_PAIRS.  Data references are in LOOP.  The returned
2384    COND_EXPR is the conditional expression to be used in the if statement
2385    that controls which version of the loop gets executed at runtime.  */
2386 
2387 void
create_runtime_alias_checks(class loop * loop,vec<dr_with_seg_len_pair_t> * alias_pairs,tree * cond_expr)2388 create_runtime_alias_checks (class loop *loop,
2389 			     vec<dr_with_seg_len_pair_t> *alias_pairs,
2390 			     tree * cond_expr)
2391 {
2392   tree part_cond_expr;
2393 
2394   fold_defer_overflow_warnings ();
2395   dr_with_seg_len_pair_t *alias_pair;
2396   unsigned int i;
2397   FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
2398     {
2399       gcc_assert (alias_pair->flags);
2400       if (dump_enabled_p ())
2401 	dump_printf (MSG_NOTE,
2402 		     "create runtime check for data references %T and %T\n",
2403 		     DR_REF (alias_pair->first.dr),
2404 		     DR_REF (alias_pair->second.dr));
2405 
2406       /* Create condition expression for each pair data references.  */
2407       create_intersect_range_checks (loop, &part_cond_expr, *alias_pair);
2408       if (*cond_expr)
2409 	*cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2410 				  *cond_expr, part_cond_expr);
2411       else
2412 	*cond_expr = part_cond_expr;
2413     }
2414   fold_undefer_and_ignore_overflow_warnings ();
2415 }
2416 
2417 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
2418    expressions.  */
2419 static bool
dr_equal_offsets_p1(tree offset1,tree offset2)2420 dr_equal_offsets_p1 (tree offset1, tree offset2)
2421 {
2422   bool res;
2423 
2424   STRIP_NOPS (offset1);
2425   STRIP_NOPS (offset2);
2426 
2427   if (offset1 == offset2)
2428     return true;
2429 
2430   if (TREE_CODE (offset1) != TREE_CODE (offset2)
2431       || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
2432     return false;
2433 
2434   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
2435                              TREE_OPERAND (offset2, 0));
2436 
2437   if (!res || !BINARY_CLASS_P (offset1))
2438     return res;
2439 
2440   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
2441                              TREE_OPERAND (offset2, 1));
2442 
2443   return res;
2444 }
2445 
2446 /* Check if DRA and DRB have equal offsets.  */
2447 bool
dr_equal_offsets_p(struct data_reference * dra,struct data_reference * drb)2448 dr_equal_offsets_p (struct data_reference *dra,
2449                     struct data_reference *drb)
2450 {
2451   tree offset1, offset2;
2452 
2453   offset1 = DR_OFFSET (dra);
2454   offset2 = DR_OFFSET (drb);
2455 
2456   return dr_equal_offsets_p1 (offset1, offset2);
2457 }
2458 
2459 /* Returns true if FNA == FNB.  */
2460 
2461 static bool
affine_function_equal_p(affine_fn fna,affine_fn fnb)2462 affine_function_equal_p (affine_fn fna, affine_fn fnb)
2463 {
2464   unsigned i, n = fna.length ();
2465 
2466   if (n != fnb.length ())
2467     return false;
2468 
2469   for (i = 0; i < n; i++)
2470     if (!operand_equal_p (fna[i], fnb[i], 0))
2471       return false;
2472 
2473   return true;
2474 }
2475 
2476 /* If all the functions in CF are the same, returns one of them,
2477    otherwise returns NULL.  */
2478 
2479 static affine_fn
common_affine_function(conflict_function * cf)2480 common_affine_function (conflict_function *cf)
2481 {
2482   unsigned i;
2483   affine_fn comm;
2484 
2485   if (!CF_NONTRIVIAL_P (cf))
2486     return affine_fn ();
2487 
2488   comm = cf->fns[0];
2489 
2490   for (i = 1; i < cf->n; i++)
2491     if (!affine_function_equal_p (comm, cf->fns[i]))
2492       return affine_fn ();
2493 
2494   return comm;
2495 }
2496 
2497 /* Returns the base of the affine function FN.  */
2498 
2499 static tree
affine_function_base(affine_fn fn)2500 affine_function_base (affine_fn fn)
2501 {
2502   return fn[0];
2503 }
2504 
2505 /* Returns true if FN is a constant.  */
2506 
2507 static bool
affine_function_constant_p(affine_fn fn)2508 affine_function_constant_p (affine_fn fn)
2509 {
2510   unsigned i;
2511   tree coef;
2512 
2513   for (i = 1; fn.iterate (i, &coef); i++)
2514     if (!integer_zerop (coef))
2515       return false;
2516 
2517   return true;
2518 }
2519 
2520 /* Returns true if FN is the zero constant function.  */
2521 
2522 static bool
affine_function_zero_p(affine_fn fn)2523 affine_function_zero_p (affine_fn fn)
2524 {
2525   return (integer_zerop (affine_function_base (fn))
2526 	  && affine_function_constant_p (fn));
2527 }
2528 
2529 /* Returns a signed integer type with the largest precision from TA
2530    and TB.  */
2531 
2532 static tree
signed_type_for_types(tree ta,tree tb)2533 signed_type_for_types (tree ta, tree tb)
2534 {
2535   if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
2536     return signed_type_for (ta);
2537   else
2538     return signed_type_for (tb);
2539 }
2540 
2541 /* Applies operation OP on affine functions FNA and FNB, and returns the
2542    result.  */
2543 
2544 static affine_fn
affine_fn_op(enum tree_code op,affine_fn fna,affine_fn fnb)2545 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
2546 {
2547   unsigned i, n, m;
2548   affine_fn ret;
2549   tree coef;
2550 
2551   if (fnb.length () > fna.length ())
2552     {
2553       n = fna.length ();
2554       m = fnb.length ();
2555     }
2556   else
2557     {
2558       n = fnb.length ();
2559       m = fna.length ();
2560     }
2561 
2562   ret.create (m);
2563   for (i = 0; i < n; i++)
2564     {
2565       tree type = signed_type_for_types (TREE_TYPE (fna[i]),
2566 					 TREE_TYPE (fnb[i]));
2567       ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
2568     }
2569 
2570   for (; fna.iterate (i, &coef); i++)
2571     ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2572 				 coef, integer_zero_node));
2573   for (; fnb.iterate (i, &coef); i++)
2574     ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2575 				 integer_zero_node, coef));
2576 
2577   return ret;
2578 }
2579 
2580 /* Returns the sum of affine functions FNA and FNB.  */
2581 
2582 static affine_fn
affine_fn_plus(affine_fn fna,affine_fn fnb)2583 affine_fn_plus (affine_fn fna, affine_fn fnb)
2584 {
2585   return affine_fn_op (PLUS_EXPR, fna, fnb);
2586 }
2587 
2588 /* Returns the difference of affine functions FNA and FNB.  */
2589 
2590 static affine_fn
affine_fn_minus(affine_fn fna,affine_fn fnb)2591 affine_fn_minus (affine_fn fna, affine_fn fnb)
2592 {
2593   return affine_fn_op (MINUS_EXPR, fna, fnb);
2594 }
2595 
2596 /* Frees affine function FN.  */
2597 
2598 static void
affine_fn_free(affine_fn fn)2599 affine_fn_free (affine_fn fn)
2600 {
2601   fn.release ();
2602 }
2603 
2604 /* Determine for each subscript in the data dependence relation DDR
2605    the distance.  */
2606 
2607 static void
compute_subscript_distance(struct data_dependence_relation * ddr)2608 compute_subscript_distance (struct data_dependence_relation *ddr)
2609 {
2610   conflict_function *cf_a, *cf_b;
2611   affine_fn fn_a, fn_b, diff;
2612 
2613   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
2614     {
2615       unsigned int i;
2616 
2617       for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2618  	{
2619  	  struct subscript *subscript;
2620 
2621  	  subscript = DDR_SUBSCRIPT (ddr, i);
2622  	  cf_a = SUB_CONFLICTS_IN_A (subscript);
2623  	  cf_b = SUB_CONFLICTS_IN_B (subscript);
2624 
2625 	  fn_a = common_affine_function (cf_a);
2626 	  fn_b = common_affine_function (cf_b);
2627 	  if (!fn_a.exists () || !fn_b.exists ())
2628 	    {
2629 	      SUB_DISTANCE (subscript) = chrec_dont_know;
2630 	      return;
2631 	    }
2632 	  diff = affine_fn_minus (fn_a, fn_b);
2633 
2634  	  if (affine_function_constant_p (diff))
2635  	    SUB_DISTANCE (subscript) = affine_function_base (diff);
2636  	  else
2637  	    SUB_DISTANCE (subscript) = chrec_dont_know;
2638 
2639 	  affine_fn_free (diff);
2640  	}
2641     }
2642 }
2643 
2644 /* Returns the conflict function for "unknown".  */
2645 
2646 static conflict_function *
conflict_fn_not_known(void)2647 conflict_fn_not_known (void)
2648 {
2649   conflict_function *fn = XCNEW (conflict_function);
2650   fn->n = NOT_KNOWN;
2651 
2652   return fn;
2653 }
2654 
2655 /* Returns the conflict function for "independent".  */
2656 
2657 static conflict_function *
conflict_fn_no_dependence(void)2658 conflict_fn_no_dependence (void)
2659 {
2660   conflict_function *fn = XCNEW (conflict_function);
2661   fn->n = NO_DEPENDENCE;
2662 
2663   return fn;
2664 }
2665 
2666 /* Returns true if the address of OBJ is invariant in LOOP.  */
2667 
2668 static bool
object_address_invariant_in_loop_p(const class loop * loop,const_tree obj)2669 object_address_invariant_in_loop_p (const class loop *loop, const_tree obj)
2670 {
2671   while (handled_component_p (obj))
2672     {
2673       if (TREE_CODE (obj) == ARRAY_REF)
2674 	{
2675 	  for (int i = 1; i < 4; ++i)
2676 	    if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, i),
2677 							loop->num))
2678 	      return false;
2679 	}
2680       else if (TREE_CODE (obj) == COMPONENT_REF)
2681 	{
2682 	  if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
2683 						      loop->num))
2684 	    return false;
2685 	}
2686       obj = TREE_OPERAND (obj, 0);
2687     }
2688 
2689   if (!INDIRECT_REF_P (obj)
2690       && TREE_CODE (obj) != MEM_REF)
2691     return true;
2692 
2693   return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
2694 						  loop->num);
2695 }
2696 
2697 /* Returns false if we can prove that data references A and B do not alias,
2698    true otherwise.  If LOOP_NEST is false no cross-iteration aliases are
2699    considered.  */
2700 
2701 bool
dr_may_alias_p(const struct data_reference * a,const struct data_reference * b,class loop * loop_nest)2702 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
2703 		class loop *loop_nest)
2704 {
2705   tree addr_a = DR_BASE_OBJECT (a);
2706   tree addr_b = DR_BASE_OBJECT (b);
2707 
2708   /* If we are not processing a loop nest but scalar code we
2709      do not need to care about possible cross-iteration dependences
2710      and thus can process the full original reference.  Do so,
2711      similar to how loop invariant motion applies extra offset-based
2712      disambiguation.  */
2713   if (!loop_nest)
2714     {
2715       aff_tree off1, off2;
2716       poly_widest_int size1, size2;
2717       get_inner_reference_aff (DR_REF (a), &off1, &size1);
2718       get_inner_reference_aff (DR_REF (b), &off2, &size2);
2719       aff_combination_scale (&off1, -1);
2720       aff_combination_add (&off2, &off1);
2721       if (aff_comb_cannot_overlap_p (&off2, size1, size2))
2722 	return false;
2723     }
2724 
2725   if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF)
2726       && (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF)
2727       /* For cross-iteration dependences the cliques must be valid for the
2728 	 whole loop, not just individual iterations.  */
2729       && (!loop_nest
2730 	  || MR_DEPENDENCE_CLIQUE (addr_a) == 1
2731 	  || MR_DEPENDENCE_CLIQUE (addr_a) == loop_nest->owned_clique)
2732       && MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b)
2733       && MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b))
2734     return false;
2735 
2736   /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
2737      do not know the size of the base-object.  So we cannot do any
2738      offset/overlap based analysis but have to rely on points-to
2739      information only.  */
2740   if (TREE_CODE (addr_a) == MEM_REF
2741       && (DR_UNCONSTRAINED_BASE (a)
2742 	  || TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME))
2743     {
2744       /* For true dependences we can apply TBAA.  */
2745       if (flag_strict_aliasing
2746 	  && DR_IS_WRITE (a) && DR_IS_READ (b)
2747 	  && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
2748 				     get_alias_set (DR_REF (b))))
2749 	return false;
2750       if (TREE_CODE (addr_b) == MEM_REF)
2751 	return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2752 				       TREE_OPERAND (addr_b, 0));
2753       else
2754 	return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2755 				       build_fold_addr_expr (addr_b));
2756     }
2757   else if (TREE_CODE (addr_b) == MEM_REF
2758 	   && (DR_UNCONSTRAINED_BASE (b)
2759 	       || TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME))
2760     {
2761       /* For true dependences we can apply TBAA.  */
2762       if (flag_strict_aliasing
2763 	  && DR_IS_WRITE (a) && DR_IS_READ (b)
2764 	  && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
2765 				     get_alias_set (DR_REF (b))))
2766 	return false;
2767       if (TREE_CODE (addr_a) == MEM_REF)
2768 	return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2769 				       TREE_OPERAND (addr_b, 0));
2770       else
2771 	return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
2772 				       TREE_OPERAND (addr_b, 0));
2773     }
2774 
2775   /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
2776      that is being subsetted in the loop nest.  */
2777   if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
2778     return refs_output_dependent_p (addr_a, addr_b);
2779   else if (DR_IS_READ (a) && DR_IS_WRITE (b))
2780     return refs_anti_dependent_p (addr_a, addr_b);
2781   return refs_may_alias_p (addr_a, addr_b);
2782 }
2783 
2784 /* REF_A and REF_B both satisfy access_fn_component_p.  Return true
2785    if it is meaningful to compare their associated access functions
2786    when checking for dependencies.  */
2787 
2788 static bool
access_fn_components_comparable_p(tree ref_a,tree ref_b)2789 access_fn_components_comparable_p (tree ref_a, tree ref_b)
2790 {
2791   /* Allow pairs of component refs from the following sets:
2792 
2793        { REALPART_EXPR, IMAGPART_EXPR }
2794        { COMPONENT_REF }
2795        { ARRAY_REF }.  */
2796   tree_code code_a = TREE_CODE (ref_a);
2797   tree_code code_b = TREE_CODE (ref_b);
2798   if (code_a == IMAGPART_EXPR)
2799     code_a = REALPART_EXPR;
2800   if (code_b == IMAGPART_EXPR)
2801     code_b = REALPART_EXPR;
2802   if (code_a != code_b)
2803     return false;
2804 
2805   if (TREE_CODE (ref_a) == COMPONENT_REF)
2806     /* ??? We cannot simply use the type of operand #0 of the refs here as
2807        the Fortran compiler smuggles type punning into COMPONENT_REFs.
2808        Use the DECL_CONTEXT of the FIELD_DECLs instead.  */
2809     return (DECL_CONTEXT (TREE_OPERAND (ref_a, 1))
2810 	    == DECL_CONTEXT (TREE_OPERAND (ref_b, 1)));
2811 
2812   return types_compatible_p (TREE_TYPE (TREE_OPERAND (ref_a, 0)),
2813 			     TREE_TYPE (TREE_OPERAND (ref_b, 0)));
2814 }
2815 
2816 /* Initialize a data dependence relation between data accesses A and
2817    B.  NB_LOOPS is the number of loops surrounding the references: the
2818    size of the classic distance/direction vectors.  */
2819 
2820 struct data_dependence_relation *
initialize_data_dependence_relation(struct data_reference * a,struct data_reference * b,vec<loop_p> loop_nest)2821 initialize_data_dependence_relation (struct data_reference *a,
2822 				     struct data_reference *b,
2823  				     vec<loop_p> loop_nest)
2824 {
2825   struct data_dependence_relation *res;
2826   unsigned int i;
2827 
2828   res = XCNEW (struct data_dependence_relation);
2829   DDR_A (res) = a;
2830   DDR_B (res) = b;
2831   DDR_LOOP_NEST (res).create (0);
2832   DDR_SUBSCRIPTS (res).create (0);
2833   DDR_DIR_VECTS (res).create (0);
2834   DDR_DIST_VECTS (res).create (0);
2835 
2836   if (a == NULL || b == NULL)
2837     {
2838       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2839       return res;
2840     }
2841 
2842   /* If the data references do not alias, then they are independent.  */
2843   if (!dr_may_alias_p (a, b, loop_nest.exists () ? loop_nest[0] : NULL))
2844     {
2845       DDR_ARE_DEPENDENT (res) = chrec_known;
2846       return res;
2847     }
2848 
2849   unsigned int num_dimensions_a = DR_NUM_DIMENSIONS (a);
2850   unsigned int num_dimensions_b = DR_NUM_DIMENSIONS (b);
2851   if (num_dimensions_a == 0 || num_dimensions_b == 0)
2852     {
2853       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2854       return res;
2855     }
2856 
2857   /* For unconstrained bases, the root (highest-indexed) subscript
2858      describes a variation in the base of the original DR_REF rather
2859      than a component access.  We have no type that accurately describes
2860      the new DR_BASE_OBJECT (whose TREE_TYPE describes the type *after*
2861      applying this subscript) so limit the search to the last real
2862      component access.
2863 
2864      E.g. for:
2865 
2866 	void
2867 	f (int a[][8], int b[][8])
2868 	{
2869 	  for (int i = 0; i < 8; ++i)
2870 	    a[i * 2][0] = b[i][0];
2871 	}
2872 
2873      the a and b accesses have a single ARRAY_REF component reference [0]
2874      but have two subscripts.  */
2875   if (DR_UNCONSTRAINED_BASE (a))
2876     num_dimensions_a -= 1;
2877   if (DR_UNCONSTRAINED_BASE (b))
2878     num_dimensions_b -= 1;
2879 
2880   /* These structures describe sequences of component references in
2881      DR_REF (A) and DR_REF (B).  Each component reference is tied to a
2882      specific access function.  */
2883   struct {
2884     /* The sequence starts at DR_ACCESS_FN (A, START_A) of A and
2885        DR_ACCESS_FN (B, START_B) of B (inclusive) and extends to higher
2886        indices.  In C notation, these are the indices of the rightmost
2887        component references; e.g. for a sequence .b.c.d, the start
2888        index is for .d.  */
2889     unsigned int start_a;
2890     unsigned int start_b;
2891 
2892     /* The sequence contains LENGTH consecutive access functions from
2893        each DR.  */
2894     unsigned int length;
2895 
2896     /* The enclosing objects for the A and B sequences respectively,
2897        i.e. the objects to which DR_ACCESS_FN (A, START_A + LENGTH - 1)
2898        and DR_ACCESS_FN (B, START_B + LENGTH - 1) are applied.  */
2899     tree object_a;
2900     tree object_b;
2901   } full_seq = {}, struct_seq = {};
2902 
2903   /* Before each iteration of the loop:
2904 
2905      - REF_A is what you get after applying DR_ACCESS_FN (A, INDEX_A) and
2906      - REF_B is what you get after applying DR_ACCESS_FN (B, INDEX_B).  */
2907   unsigned int index_a = 0;
2908   unsigned int index_b = 0;
2909   tree ref_a = DR_REF (a);
2910   tree ref_b = DR_REF (b);
2911 
2912   /* Now walk the component references from the final DR_REFs back up to
2913      the enclosing base objects.  Each component reference corresponds
2914      to one access function in the DR, with access function 0 being for
2915      the final DR_REF and the highest-indexed access function being the
2916      one that is applied to the base of the DR.
2917 
2918      Look for a sequence of component references whose access functions
2919      are comparable (see access_fn_components_comparable_p).  If more
2920      than one such sequence exists, pick the one nearest the base
2921      (which is the leftmost sequence in C notation).  Store this sequence
2922      in FULL_SEQ.
2923 
2924      For example, if we have:
2925 
2926 	struct foo { struct bar s; ... } (*a)[10], (*b)[10];
2927 
2928 	A: a[0][i].s.c.d
2929 	B: __real b[0][i].s.e[i].f
2930 
2931      (where d is the same type as the real component of f) then the access
2932      functions would be:
2933 
2934 			 0   1   2   3
2935 	A:              .d  .c  .s [i]
2936 
2937 		 0   1   2   3   4   5
2938 	B:  __real  .f [i]  .e  .s [i]
2939 
2940      The A0/B2 column isn't comparable, since .d is a COMPONENT_REF
2941      and [i] is an ARRAY_REF.  However, the A1/B3 column contains two
2942      COMPONENT_REF accesses for struct bar, so is comparable.  Likewise
2943      the A2/B4 column contains two COMPONENT_REF accesses for struct foo,
2944      so is comparable.  The A3/B5 column contains two ARRAY_REFs that
2945      index foo[10] arrays, so is again comparable.  The sequence is
2946      therefore:
2947 
2948         A: [1, 3]  (i.e. [i].s.c)
2949         B: [3, 5]  (i.e. [i].s.e)
2950 
2951      Also look for sequences of component references whose access
2952      functions are comparable and whose enclosing objects have the same
2953      RECORD_TYPE.  Store this sequence in STRUCT_SEQ.  In the above
2954      example, STRUCT_SEQ would be:
2955 
2956         A: [1, 2]  (i.e. s.c)
2957         B: [3, 4]  (i.e. s.e)  */
2958   while (index_a < num_dimensions_a && index_b < num_dimensions_b)
2959     {
2960       /* REF_A and REF_B must be one of the component access types
2961 	 allowed by dr_analyze_indices.  */
2962       gcc_checking_assert (access_fn_component_p (ref_a));
2963       gcc_checking_assert (access_fn_component_p (ref_b));
2964 
2965       /* Get the immediately-enclosing objects for REF_A and REF_B,
2966 	 i.e. the references *before* applying DR_ACCESS_FN (A, INDEX_A)
2967 	 and DR_ACCESS_FN (B, INDEX_B).  */
2968       tree object_a = TREE_OPERAND (ref_a, 0);
2969       tree object_b = TREE_OPERAND (ref_b, 0);
2970 
2971       tree type_a = TREE_TYPE (object_a);
2972       tree type_b = TREE_TYPE (object_b);
2973       if (access_fn_components_comparable_p (ref_a, ref_b))
2974 	{
2975 	  /* This pair of component accesses is comparable for dependence
2976 	     analysis, so we can include DR_ACCESS_FN (A, INDEX_A) and
2977 	     DR_ACCESS_FN (B, INDEX_B) in the sequence.  */
2978 	  if (full_seq.start_a + full_seq.length != index_a
2979 	      || full_seq.start_b + full_seq.length != index_b)
2980 	    {
2981 	      /* The accesses don't extend the current sequence,
2982 		 so start a new one here.  */
2983 	      full_seq.start_a = index_a;
2984 	      full_seq.start_b = index_b;
2985 	      full_seq.length = 0;
2986 	    }
2987 
2988 	  /* Add this pair of references to the sequence.  */
2989 	  full_seq.length += 1;
2990 	  full_seq.object_a = object_a;
2991 	  full_seq.object_b = object_b;
2992 
2993 	  /* If the enclosing objects are structures (and thus have the
2994 	     same RECORD_TYPE), record the new sequence in STRUCT_SEQ.  */
2995 	  if (TREE_CODE (type_a) == RECORD_TYPE)
2996 	    struct_seq = full_seq;
2997 
2998 	  /* Move to the next containing reference for both A and B.  */
2999 	  ref_a = object_a;
3000 	  ref_b = object_b;
3001 	  index_a += 1;
3002 	  index_b += 1;
3003 	  continue;
3004 	}
3005 
3006       /* Try to approach equal type sizes.  */
3007       if (!COMPLETE_TYPE_P (type_a)
3008 	  || !COMPLETE_TYPE_P (type_b)
3009 	  || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_a))
3010 	  || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_b)))
3011 	break;
3012 
3013       unsigned HOST_WIDE_INT size_a = tree_to_uhwi (TYPE_SIZE_UNIT (type_a));
3014       unsigned HOST_WIDE_INT size_b = tree_to_uhwi (TYPE_SIZE_UNIT (type_b));
3015       if (size_a <= size_b)
3016 	{
3017 	  index_a += 1;
3018 	  ref_a = object_a;
3019 	}
3020       if (size_b <= size_a)
3021 	{
3022 	  index_b += 1;
3023 	  ref_b = object_b;
3024 	}
3025     }
3026 
3027   /* See whether FULL_SEQ ends at the base and whether the two bases
3028      are equal.  We do not care about TBAA or alignment info so we can
3029      use OEP_ADDRESS_OF to avoid false negatives.  */
3030   tree base_a = DR_BASE_OBJECT (a);
3031   tree base_b = DR_BASE_OBJECT (b);
3032   bool same_base_p = (full_seq.start_a + full_seq.length == num_dimensions_a
3033 		      && full_seq.start_b + full_seq.length == num_dimensions_b
3034 		      && DR_UNCONSTRAINED_BASE (a) == DR_UNCONSTRAINED_BASE (b)
3035 		      && operand_equal_p (base_a, base_b, OEP_ADDRESS_OF)
3036 		      && types_compatible_p (TREE_TYPE (base_a),
3037 					     TREE_TYPE (base_b))
3038 		      && (!loop_nest.exists ()
3039 			  || (object_address_invariant_in_loop_p
3040 			      (loop_nest[0], base_a))));
3041 
3042   /* If the bases are the same, we can include the base variation too.
3043      E.g. the b accesses in:
3044 
3045        for (int i = 0; i < n; ++i)
3046          b[i + 4][0] = b[i][0];
3047 
3048      have a definite dependence distance of 4, while for:
3049 
3050        for (int i = 0; i < n; ++i)
3051          a[i + 4][0] = b[i][0];
3052 
3053      the dependence distance depends on the gap between a and b.
3054 
3055      If the bases are different then we can only rely on the sequence
3056      rooted at a structure access, since arrays are allowed to overlap
3057      arbitrarily and change shape arbitrarily.  E.g. we treat this as
3058      valid code:
3059 
3060        int a[256];
3061        ...
3062        ((int (*)[4][3]) &a[1])[i][0] += ((int (*)[4][3]) &a[2])[i][0];
3063 
3064      where two lvalues with the same int[4][3] type overlap, and where
3065      both lvalues are distinct from the object's declared type.  */
3066   if (same_base_p)
3067     {
3068       if (DR_UNCONSTRAINED_BASE (a))
3069 	full_seq.length += 1;
3070     }
3071   else
3072     full_seq = struct_seq;
3073 
3074   /* Punt if we didn't find a suitable sequence.  */
3075   if (full_seq.length == 0)
3076     {
3077       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3078       return res;
3079     }
3080 
3081   if (!same_base_p)
3082     {
3083       /* Partial overlap is possible for different bases when strict aliasing
3084 	 is not in effect.  It's also possible if either base involves a union
3085 	 access; e.g. for:
3086 
3087 	   struct s1 { int a[2]; };
3088 	   struct s2 { struct s1 b; int c; };
3089 	   struct s3 { int d; struct s1 e; };
3090 	   union u { struct s2 f; struct s3 g; } *p, *q;
3091 
3092 	 the s1 at "p->f.b" (base "p->f") partially overlaps the s1 at
3093 	 "p->g.e" (base "p->g") and might partially overlap the s1 at
3094 	 "q->g.e" (base "q->g").  */
3095       if (!flag_strict_aliasing
3096 	  || ref_contains_union_access_p (full_seq.object_a)
3097 	  || ref_contains_union_access_p (full_seq.object_b))
3098 	{
3099 	  DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3100 	  return res;
3101 	}
3102 
3103       DDR_COULD_BE_INDEPENDENT_P (res) = true;
3104       if (!loop_nest.exists ()
3105 	  || (object_address_invariant_in_loop_p (loop_nest[0],
3106 						  full_seq.object_a)
3107 	      && object_address_invariant_in_loop_p (loop_nest[0],
3108 						     full_seq.object_b)))
3109 	{
3110 	  DDR_OBJECT_A (res) = full_seq.object_a;
3111 	  DDR_OBJECT_B (res) = full_seq.object_b;
3112 	}
3113     }
3114 
3115   DDR_AFFINE_P (res) = true;
3116   DDR_ARE_DEPENDENT (res) = NULL_TREE;
3117   DDR_SUBSCRIPTS (res).create (full_seq.length);
3118   DDR_LOOP_NEST (res) = loop_nest;
3119   DDR_SELF_REFERENCE (res) = false;
3120 
3121   for (i = 0; i < full_seq.length; ++i)
3122     {
3123       struct subscript *subscript;
3124 
3125       subscript = XNEW (struct subscript);
3126       SUB_ACCESS_FN (subscript, 0) = DR_ACCESS_FN (a, full_seq.start_a + i);
3127       SUB_ACCESS_FN (subscript, 1) = DR_ACCESS_FN (b, full_seq.start_b + i);
3128       SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
3129       SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
3130       SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
3131       SUB_DISTANCE (subscript) = chrec_dont_know;
3132       DDR_SUBSCRIPTS (res).safe_push (subscript);
3133     }
3134 
3135   return res;
3136 }
3137 
3138 /* Frees memory used by the conflict function F.  */
3139 
3140 static void
free_conflict_function(conflict_function * f)3141 free_conflict_function (conflict_function *f)
3142 {
3143   unsigned i;
3144 
3145   if (CF_NONTRIVIAL_P (f))
3146     {
3147       for (i = 0; i < f->n; i++)
3148 	affine_fn_free (f->fns[i]);
3149     }
3150   free (f);
3151 }
3152 
3153 /* Frees memory used by SUBSCRIPTS.  */
3154 
3155 static void
free_subscripts(vec<subscript_p> subscripts)3156 free_subscripts (vec<subscript_p> subscripts)
3157 {
3158   unsigned i;
3159   subscript_p s;
3160 
3161   FOR_EACH_VEC_ELT (subscripts, i, s)
3162     {
3163       free_conflict_function (s->conflicting_iterations_in_a);
3164       free_conflict_function (s->conflicting_iterations_in_b);
3165       free (s);
3166     }
3167   subscripts.release ();
3168 }
3169 
3170 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
3171    description.  */
3172 
3173 static inline void
finalize_ddr_dependent(struct data_dependence_relation * ddr,tree chrec)3174 finalize_ddr_dependent (struct data_dependence_relation *ddr,
3175 			tree chrec)
3176 {
3177   DDR_ARE_DEPENDENT (ddr) = chrec;
3178   free_subscripts (DDR_SUBSCRIPTS (ddr));
3179   DDR_SUBSCRIPTS (ddr).create (0);
3180 }
3181 
3182 /* The dependence relation DDR cannot be represented by a distance
3183    vector.  */
3184 
3185 static inline void
non_affine_dependence_relation(struct data_dependence_relation * ddr)3186 non_affine_dependence_relation (struct data_dependence_relation *ddr)
3187 {
3188   if (dump_file && (dump_flags & TDF_DETAILS))
3189     fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
3190 
3191   DDR_AFFINE_P (ddr) = false;
3192 }
3193 
3194 
3195 
3196 /* This section contains the classic Banerjee tests.  */
3197 
3198 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
3199    variables, i.e., if the ZIV (Zero Index Variable) test is true.  */
3200 
3201 static inline bool
ziv_subscript_p(const_tree chrec_a,const_tree chrec_b)3202 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
3203 {
3204   return (evolution_function_is_constant_p (chrec_a)
3205 	  && evolution_function_is_constant_p (chrec_b));
3206 }
3207 
3208 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
3209    variable, i.e., if the SIV (Single Index Variable) test is true.  */
3210 
3211 static bool
siv_subscript_p(const_tree chrec_a,const_tree chrec_b)3212 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
3213 {
3214   if ((evolution_function_is_constant_p (chrec_a)
3215        && evolution_function_is_univariate_p (chrec_b))
3216       || (evolution_function_is_constant_p (chrec_b)
3217 	  && evolution_function_is_univariate_p (chrec_a)))
3218     return true;
3219 
3220   if (evolution_function_is_univariate_p (chrec_a)
3221       && evolution_function_is_univariate_p (chrec_b))
3222     {
3223       switch (TREE_CODE (chrec_a))
3224 	{
3225 	case POLYNOMIAL_CHREC:
3226 	  switch (TREE_CODE (chrec_b))
3227 	    {
3228 	    case POLYNOMIAL_CHREC:
3229 	      if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
3230 		return false;
3231 	      /* FALLTHRU */
3232 
3233 	    default:
3234 	      return true;
3235 	    }
3236 
3237 	default:
3238 	  return true;
3239 	}
3240     }
3241 
3242   return false;
3243 }
3244 
3245 /* Creates a conflict function with N dimensions.  The affine functions
3246    in each dimension follow.  */
3247 
3248 static conflict_function *
conflict_fn(unsigned n,...)3249 conflict_fn (unsigned n, ...)
3250 {
3251   unsigned i;
3252   conflict_function *ret = XCNEW (conflict_function);
3253   va_list ap;
3254 
3255   gcc_assert (n > 0 && n <= MAX_DIM);
3256   va_start (ap, n);
3257 
3258   ret->n = n;
3259   for (i = 0; i < n; i++)
3260     ret->fns[i] = va_arg (ap, affine_fn);
3261   va_end (ap);
3262 
3263   return ret;
3264 }
3265 
3266 /* Returns constant affine function with value CST.  */
3267 
3268 static affine_fn
affine_fn_cst(tree cst)3269 affine_fn_cst (tree cst)
3270 {
3271   affine_fn fn;
3272   fn.create (1);
3273   fn.quick_push (cst);
3274   return fn;
3275 }
3276 
3277 /* Returns affine function with single variable, CST + COEF * x_DIM.  */
3278 
3279 static affine_fn
affine_fn_univar(tree cst,unsigned dim,tree coef)3280 affine_fn_univar (tree cst, unsigned dim, tree coef)
3281 {
3282   affine_fn fn;
3283   fn.create (dim + 1);
3284   unsigned i;
3285 
3286   gcc_assert (dim > 0);
3287   fn.quick_push (cst);
3288   for (i = 1; i < dim; i++)
3289     fn.quick_push (integer_zero_node);
3290   fn.quick_push (coef);
3291   return fn;
3292 }
3293 
3294 /* Analyze a ZIV (Zero Index Variable) subscript.  *OVERLAPS_A and
3295    *OVERLAPS_B are initialized to the functions that describe the
3296    relation between the elements accessed twice by CHREC_A and
3297    CHREC_B.  For k >= 0, the following property is verified:
3298 
3299    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
3300 
3301 static void
analyze_ziv_subscript(tree chrec_a,tree chrec_b,conflict_function ** overlaps_a,conflict_function ** overlaps_b,tree * last_conflicts)3302 analyze_ziv_subscript (tree chrec_a,
3303 		       tree chrec_b,
3304 		       conflict_function **overlaps_a,
3305 		       conflict_function **overlaps_b,
3306 		       tree *last_conflicts)
3307 {
3308   tree type, difference;
3309   dependence_stats.num_ziv++;
3310 
3311   if (dump_file && (dump_flags & TDF_DETAILS))
3312     fprintf (dump_file, "(analyze_ziv_subscript \n");
3313 
3314   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3315   chrec_a = chrec_convert (type, chrec_a, NULL);
3316   chrec_b = chrec_convert (type, chrec_b, NULL);
3317   difference = chrec_fold_minus (type, chrec_a, chrec_b);
3318 
3319   switch (TREE_CODE (difference))
3320     {
3321     case INTEGER_CST:
3322       if (integer_zerop (difference))
3323 	{
3324 	  /* The difference is equal to zero: the accessed index
3325 	     overlaps for each iteration in the loop.  */
3326 	  *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3327 	  *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3328 	  *last_conflicts = chrec_dont_know;
3329 	  dependence_stats.num_ziv_dependent++;
3330 	}
3331       else
3332 	{
3333 	  /* The accesses do not overlap.  */
3334 	  *overlaps_a = conflict_fn_no_dependence ();
3335 	  *overlaps_b = conflict_fn_no_dependence ();
3336 	  *last_conflicts = integer_zero_node;
3337 	  dependence_stats.num_ziv_independent++;
3338 	}
3339       break;
3340 
3341     default:
3342       /* We're not sure whether the indexes overlap.  For the moment,
3343 	 conservatively answer "don't know".  */
3344       if (dump_file && (dump_flags & TDF_DETAILS))
3345 	fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
3346 
3347       *overlaps_a = conflict_fn_not_known ();
3348       *overlaps_b = conflict_fn_not_known ();
3349       *last_conflicts = chrec_dont_know;
3350       dependence_stats.num_ziv_unimplemented++;
3351       break;
3352     }
3353 
3354   if (dump_file && (dump_flags & TDF_DETAILS))
3355     fprintf (dump_file, ")\n");
3356 }
3357 
3358 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
3359    and only if it fits to the int type.  If this is not the case, or the
3360    bound  on the number of iterations of LOOP could not be derived, returns
3361    chrec_dont_know.  */
3362 
3363 static tree
max_stmt_executions_tree(class loop * loop)3364 max_stmt_executions_tree (class loop *loop)
3365 {
3366   widest_int nit;
3367 
3368   if (!max_stmt_executions (loop, &nit))
3369     return chrec_dont_know;
3370 
3371   if (!wi::fits_to_tree_p (nit, unsigned_type_node))
3372     return chrec_dont_know;
3373 
3374   return wide_int_to_tree (unsigned_type_node, nit);
3375 }
3376 
3377 /* Determine whether the CHREC is always positive/negative.  If the expression
3378    cannot be statically analyzed, return false, otherwise set the answer into
3379    VALUE.  */
3380 
3381 static bool
chrec_is_positive(tree chrec,bool * value)3382 chrec_is_positive (tree chrec, bool *value)
3383 {
3384   bool value0, value1, value2;
3385   tree end_value, nb_iter;
3386 
3387   switch (TREE_CODE (chrec))
3388     {
3389     case POLYNOMIAL_CHREC:
3390       if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
3391 	  || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
3392 	return false;
3393 
3394       /* FIXME -- overflows.  */
3395       if (value0 == value1)
3396 	{
3397 	  *value = value0;
3398 	  return true;
3399 	}
3400 
3401       /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
3402 	 and the proof consists in showing that the sign never
3403 	 changes during the execution of the loop, from 0 to
3404 	 loop->nb_iterations.  */
3405       if (!evolution_function_is_affine_p (chrec))
3406 	return false;
3407 
3408       nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
3409       if (chrec_contains_undetermined (nb_iter))
3410 	return false;
3411 
3412 #if 0
3413       /* TODO -- If the test is after the exit, we may decrease the number of
3414 	 iterations by one.  */
3415       if (after_exit)
3416 	nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
3417 #endif
3418 
3419       end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
3420 
3421       if (!chrec_is_positive (end_value, &value2))
3422 	return false;
3423 
3424       *value = value0;
3425       return value0 == value1;
3426 
3427     case INTEGER_CST:
3428       switch (tree_int_cst_sgn (chrec))
3429 	{
3430 	case -1:
3431 	  *value = false;
3432 	  break;
3433 	case 1:
3434 	  *value = true;
3435 	  break;
3436 	default:
3437 	  return false;
3438 	}
3439       return true;
3440 
3441     default:
3442       return false;
3443     }
3444 }
3445 
3446 
3447 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
3448    constant, and CHREC_B is an affine function.  *OVERLAPS_A and
3449    *OVERLAPS_B are initialized to the functions that describe the
3450    relation between the elements accessed twice by CHREC_A and
3451    CHREC_B.  For k >= 0, the following property is verified:
3452 
3453    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
3454 
3455 static void
analyze_siv_subscript_cst_affine(tree chrec_a,tree chrec_b,conflict_function ** overlaps_a,conflict_function ** overlaps_b,tree * last_conflicts)3456 analyze_siv_subscript_cst_affine (tree chrec_a,
3457 				  tree chrec_b,
3458 				  conflict_function **overlaps_a,
3459 				  conflict_function **overlaps_b,
3460 				  tree *last_conflicts)
3461 {
3462   bool value0, value1, value2;
3463   tree type, difference, tmp;
3464 
3465   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3466   chrec_a = chrec_convert (type, chrec_a, NULL);
3467   chrec_b = chrec_convert (type, chrec_b, NULL);
3468   difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
3469 
3470   /* Special case overlap in the first iteration.  */
3471   if (integer_zerop (difference))
3472     {
3473       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3474       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3475       *last_conflicts = integer_one_node;
3476       return;
3477     }
3478 
3479   if (!chrec_is_positive (initial_condition (difference), &value0))
3480     {
3481       if (dump_file && (dump_flags & TDF_DETAILS))
3482 	fprintf (dump_file, "siv test failed: chrec is not positive.\n");
3483 
3484       dependence_stats.num_siv_unimplemented++;
3485       *overlaps_a = conflict_fn_not_known ();
3486       *overlaps_b = conflict_fn_not_known ();
3487       *last_conflicts = chrec_dont_know;
3488       return;
3489     }
3490   else
3491     {
3492       if (value0 == false)
3493 	{
3494 	  if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3495 	      || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
3496 	    {
3497 	      if (dump_file && (dump_flags & TDF_DETAILS))
3498 		fprintf (dump_file, "siv test failed: chrec not positive.\n");
3499 
3500 	      *overlaps_a = conflict_fn_not_known ();
3501 	      *overlaps_b = conflict_fn_not_known ();
3502 	      *last_conflicts = chrec_dont_know;
3503 	      dependence_stats.num_siv_unimplemented++;
3504 	      return;
3505 	    }
3506 	  else
3507 	    {
3508 	      if (value1 == true)
3509 		{
3510 		  /* Example:
3511 		     chrec_a = 12
3512 		     chrec_b = {10, +, 1}
3513 		  */
3514 
3515 		  if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3516 		    {
3517 		      HOST_WIDE_INT numiter;
3518 		      class loop *loop = get_chrec_loop (chrec_b);
3519 
3520 		      *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3521 		      tmp = fold_build2 (EXACT_DIV_EXPR, type,
3522 					 fold_build1 (ABS_EXPR, type, difference),
3523 					 CHREC_RIGHT (chrec_b));
3524 		      *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3525 		      *last_conflicts = integer_one_node;
3526 
3527 
3528 		      /* Perform weak-zero siv test to see if overlap is
3529 			 outside the loop bounds.  */
3530 		      numiter = max_stmt_executions_int (loop);
3531 
3532 		      if (numiter >= 0
3533 			  && compare_tree_int (tmp, numiter) > 0)
3534 			{
3535 			  free_conflict_function (*overlaps_a);
3536 			  free_conflict_function (*overlaps_b);
3537 			  *overlaps_a = conflict_fn_no_dependence ();
3538 			  *overlaps_b = conflict_fn_no_dependence ();
3539 			  *last_conflicts = integer_zero_node;
3540 			  dependence_stats.num_siv_independent++;
3541 			  return;
3542 			}
3543 		      dependence_stats.num_siv_dependent++;
3544 		      return;
3545 		    }
3546 
3547 		  /* When the step does not divide the difference, there are
3548 		     no overlaps.  */
3549 		  else
3550 		    {
3551 		      *overlaps_a = conflict_fn_no_dependence ();
3552 		      *overlaps_b = conflict_fn_no_dependence ();
3553 		      *last_conflicts = integer_zero_node;
3554 		      dependence_stats.num_siv_independent++;
3555 		      return;
3556 		    }
3557 		}
3558 
3559 	      else
3560 		{
3561 		  /* Example:
3562 		     chrec_a = 12
3563 		     chrec_b = {10, +, -1}
3564 
3565 		     In this case, chrec_a will not overlap with chrec_b.  */
3566 		  *overlaps_a = conflict_fn_no_dependence ();
3567 		  *overlaps_b = conflict_fn_no_dependence ();
3568 		  *last_conflicts = integer_zero_node;
3569 		  dependence_stats.num_siv_independent++;
3570 		  return;
3571 		}
3572 	    }
3573 	}
3574       else
3575 	{
3576 	  if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3577 	      || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
3578 	    {
3579 	      if (dump_file && (dump_flags & TDF_DETAILS))
3580 		fprintf (dump_file, "siv test failed: chrec not positive.\n");
3581 
3582 	      *overlaps_a = conflict_fn_not_known ();
3583 	      *overlaps_b = conflict_fn_not_known ();
3584 	      *last_conflicts = chrec_dont_know;
3585 	      dependence_stats.num_siv_unimplemented++;
3586 	      return;
3587 	    }
3588 	  else
3589 	    {
3590 	      if (value2 == false)
3591 		{
3592 		  /* Example:
3593 		     chrec_a = 3
3594 		     chrec_b = {10, +, -1}
3595 		  */
3596 		  if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3597 		    {
3598 		      HOST_WIDE_INT numiter;
3599 		      class loop *loop = get_chrec_loop (chrec_b);
3600 
3601 		      *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3602 		      tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
3603 					 CHREC_RIGHT (chrec_b));
3604 		      *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3605 		      *last_conflicts = integer_one_node;
3606 
3607 		      /* Perform weak-zero siv test to see if overlap is
3608 			 outside the loop bounds.  */
3609 		      numiter = max_stmt_executions_int (loop);
3610 
3611 		      if (numiter >= 0
3612 			  && compare_tree_int (tmp, numiter) > 0)
3613 			{
3614 			  free_conflict_function (*overlaps_a);
3615 			  free_conflict_function (*overlaps_b);
3616 			  *overlaps_a = conflict_fn_no_dependence ();
3617 			  *overlaps_b = conflict_fn_no_dependence ();
3618 			  *last_conflicts = integer_zero_node;
3619 			  dependence_stats.num_siv_independent++;
3620 			  return;
3621 			}
3622 		      dependence_stats.num_siv_dependent++;
3623 		      return;
3624 		    }
3625 
3626 		  /* When the step does not divide the difference, there
3627 		     are no overlaps.  */
3628 		  else
3629 		    {
3630 		      *overlaps_a = conflict_fn_no_dependence ();
3631 		      *overlaps_b = conflict_fn_no_dependence ();
3632 		      *last_conflicts = integer_zero_node;
3633 		      dependence_stats.num_siv_independent++;
3634 		      return;
3635 		    }
3636 		}
3637 	      else
3638 		{
3639 		  /* Example:
3640 		     chrec_a = 3
3641 		     chrec_b = {4, +, 1}
3642 
3643 		     In this case, chrec_a will not overlap with chrec_b.  */
3644 		  *overlaps_a = conflict_fn_no_dependence ();
3645 		  *overlaps_b = conflict_fn_no_dependence ();
3646 		  *last_conflicts = integer_zero_node;
3647 		  dependence_stats.num_siv_independent++;
3648 		  return;
3649 		}
3650 	    }
3651 	}
3652     }
3653 }
3654 
3655 /* Helper recursive function for initializing the matrix A.  Returns
3656    the initial value of CHREC.  */
3657 
3658 static tree
initialize_matrix_A(lambda_matrix A,tree chrec,unsigned index,int mult)3659 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
3660 {
3661   gcc_assert (chrec);
3662 
3663   switch (TREE_CODE (chrec))
3664     {
3665     case POLYNOMIAL_CHREC:
3666       if (!cst_and_fits_in_hwi (CHREC_RIGHT (chrec)))
3667 	return chrec_dont_know;
3668       A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
3669       return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
3670 
3671     case PLUS_EXPR:
3672     case MULT_EXPR:
3673     case MINUS_EXPR:
3674       {
3675 	tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3676 	tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
3677 
3678 	return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
3679       }
3680 
3681     CASE_CONVERT:
3682       {
3683 	tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3684 	return chrec_convert (chrec_type (chrec), op, NULL);
3685       }
3686 
3687     case BIT_NOT_EXPR:
3688       {
3689 	/* Handle ~X as -1 - X.  */
3690 	tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3691 	return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
3692 			      build_int_cst (TREE_TYPE (chrec), -1), op);
3693       }
3694 
3695     case INTEGER_CST:
3696       return chrec;
3697 
3698     default:
3699       gcc_unreachable ();
3700       return NULL_TREE;
3701     }
3702 }
3703 
3704 #define FLOOR_DIV(x,y) ((x) / (y))
3705 
3706 /* Solves the special case of the Diophantine equation:
3707    | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
3708 
3709    Computes the descriptions OVERLAPS_A and OVERLAPS_B.  NITER is the
3710    number of iterations that loops X and Y run.  The overlaps will be
3711    constructed as evolutions in dimension DIM.  */
3712 
3713 static void
compute_overlap_steps_for_affine_univar(HOST_WIDE_INT niter,HOST_WIDE_INT step_a,HOST_WIDE_INT step_b,affine_fn * overlaps_a,affine_fn * overlaps_b,tree * last_conflicts,int dim)3714 compute_overlap_steps_for_affine_univar (HOST_WIDE_INT niter,
3715 					 HOST_WIDE_INT step_a,
3716 					 HOST_WIDE_INT step_b,
3717 					 affine_fn *overlaps_a,
3718 					 affine_fn *overlaps_b,
3719 					 tree *last_conflicts, int dim)
3720 {
3721   if (((step_a > 0 && step_b > 0)
3722        || (step_a < 0 && step_b < 0)))
3723     {
3724       HOST_WIDE_INT step_overlaps_a, step_overlaps_b;
3725       HOST_WIDE_INT gcd_steps_a_b, last_conflict, tau2;
3726 
3727       gcd_steps_a_b = gcd (step_a, step_b);
3728       step_overlaps_a = step_b / gcd_steps_a_b;
3729       step_overlaps_b = step_a / gcd_steps_a_b;
3730 
3731       if (niter > 0)
3732 	{
3733 	  tau2 = FLOOR_DIV (niter, step_overlaps_a);
3734 	  tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
3735 	  last_conflict = tau2;
3736 	  *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
3737 	}
3738       else
3739 	*last_conflicts = chrec_dont_know;
3740 
3741       *overlaps_a = affine_fn_univar (integer_zero_node, dim,
3742 				      build_int_cst (NULL_TREE,
3743 						     step_overlaps_a));
3744       *overlaps_b = affine_fn_univar (integer_zero_node, dim,
3745 				      build_int_cst (NULL_TREE,
3746 						     step_overlaps_b));
3747     }
3748 
3749   else
3750     {
3751       *overlaps_a = affine_fn_cst (integer_zero_node);
3752       *overlaps_b = affine_fn_cst (integer_zero_node);
3753       *last_conflicts = integer_zero_node;
3754     }
3755 }
3756 
3757 /* Solves the special case of a Diophantine equation where CHREC_A is
3758    an affine bivariate function, and CHREC_B is an affine univariate
3759    function.  For example,
3760 
3761    | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
3762 
3763    has the following overlapping functions:
3764 
3765    | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
3766    | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
3767    | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
3768 
3769    FORNOW: This is a specialized implementation for a case occurring in
3770    a common benchmark.  Implement the general algorithm.  */
3771 
3772 static void
compute_overlap_steps_for_affine_1_2(tree chrec_a,tree chrec_b,conflict_function ** overlaps_a,conflict_function ** overlaps_b,tree * last_conflicts)3773 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
3774 				      conflict_function **overlaps_a,
3775 				      conflict_function **overlaps_b,
3776 				      tree *last_conflicts)
3777 {
3778   bool xz_p, yz_p, xyz_p;
3779   HOST_WIDE_INT step_x, step_y, step_z;
3780   HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
3781   affine_fn overlaps_a_xz, overlaps_b_xz;
3782   affine_fn overlaps_a_yz, overlaps_b_yz;
3783   affine_fn overlaps_a_xyz, overlaps_b_xyz;
3784   affine_fn ova1, ova2, ovb;
3785   tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
3786 
3787   step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
3788   step_y = int_cst_value (CHREC_RIGHT (chrec_a));
3789   step_z = int_cst_value (CHREC_RIGHT (chrec_b));
3790 
3791   niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
3792   niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
3793   niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
3794 
3795   if (niter_x < 0 || niter_y < 0 || niter_z < 0)
3796     {
3797       if (dump_file && (dump_flags & TDF_DETAILS))
3798 	fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
3799 
3800       *overlaps_a = conflict_fn_not_known ();
3801       *overlaps_b = conflict_fn_not_known ();
3802       *last_conflicts = chrec_dont_know;
3803       return;
3804     }
3805 
3806   niter = MIN (niter_x, niter_z);
3807   compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
3808 					   &overlaps_a_xz,
3809 					   &overlaps_b_xz,
3810 					   &last_conflicts_xz, 1);
3811   niter = MIN (niter_y, niter_z);
3812   compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
3813 					   &overlaps_a_yz,
3814 					   &overlaps_b_yz,
3815 					   &last_conflicts_yz, 2);
3816   niter = MIN (niter_x, niter_z);
3817   niter = MIN (niter_y, niter);
3818   compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
3819 					   &overlaps_a_xyz,
3820 					   &overlaps_b_xyz,
3821 					   &last_conflicts_xyz, 3);
3822 
3823   xz_p = !integer_zerop (last_conflicts_xz);
3824   yz_p = !integer_zerop (last_conflicts_yz);
3825   xyz_p = !integer_zerop (last_conflicts_xyz);
3826 
3827   if (xz_p || yz_p || xyz_p)
3828     {
3829       ova1 = affine_fn_cst (integer_zero_node);
3830       ova2 = affine_fn_cst (integer_zero_node);
3831       ovb = affine_fn_cst (integer_zero_node);
3832       if (xz_p)
3833 	{
3834 	  affine_fn t0 = ova1;
3835 	  affine_fn t2 = ovb;
3836 
3837 	  ova1 = affine_fn_plus (ova1, overlaps_a_xz);
3838 	  ovb = affine_fn_plus (ovb, overlaps_b_xz);
3839 	  affine_fn_free (t0);
3840 	  affine_fn_free (t2);
3841 	  *last_conflicts = last_conflicts_xz;
3842 	}
3843       if (yz_p)
3844 	{
3845 	  affine_fn t0 = ova2;
3846 	  affine_fn t2 = ovb;
3847 
3848 	  ova2 = affine_fn_plus (ova2, overlaps_a_yz);
3849 	  ovb = affine_fn_plus (ovb, overlaps_b_yz);
3850 	  affine_fn_free (t0);
3851 	  affine_fn_free (t2);
3852 	  *last_conflicts = last_conflicts_yz;
3853 	}
3854       if (xyz_p)
3855 	{
3856 	  affine_fn t0 = ova1;
3857 	  affine_fn t2 = ova2;
3858 	  affine_fn t4 = ovb;
3859 
3860 	  ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
3861 	  ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
3862 	  ovb = affine_fn_plus (ovb, overlaps_b_xyz);
3863 	  affine_fn_free (t0);
3864 	  affine_fn_free (t2);
3865 	  affine_fn_free (t4);
3866 	  *last_conflicts = last_conflicts_xyz;
3867 	}
3868       *overlaps_a = conflict_fn (2, ova1, ova2);
3869       *overlaps_b = conflict_fn (1, ovb);
3870     }
3871   else
3872     {
3873       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3874       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3875       *last_conflicts = integer_zero_node;
3876     }
3877 
3878   affine_fn_free (overlaps_a_xz);
3879   affine_fn_free (overlaps_b_xz);
3880   affine_fn_free (overlaps_a_yz);
3881   affine_fn_free (overlaps_b_yz);
3882   affine_fn_free (overlaps_a_xyz);
3883   affine_fn_free (overlaps_b_xyz);
3884 }
3885 
3886 /* Copy the elements of vector VEC1 with length SIZE to VEC2.  */
3887 
3888 static void
lambda_vector_copy(lambda_vector vec1,lambda_vector vec2,int size)3889 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
3890 		    int size)
3891 {
3892   memcpy (vec2, vec1, size * sizeof (*vec1));
3893 }
3894 
3895 /* Copy the elements of M x N matrix MAT1 to MAT2.  */
3896 
3897 static void
lambda_matrix_copy(lambda_matrix mat1,lambda_matrix mat2,int m,int n)3898 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
3899 		    int m, int n)
3900 {
3901   int i;
3902 
3903   for (i = 0; i < m; i++)
3904     lambda_vector_copy (mat1[i], mat2[i], n);
3905 }
3906 
3907 /* Store the N x N identity matrix in MAT.  */
3908 
3909 static void
lambda_matrix_id(lambda_matrix mat,int size)3910 lambda_matrix_id (lambda_matrix mat, int size)
3911 {
3912   int i, j;
3913 
3914   for (i = 0; i < size; i++)
3915     for (j = 0; j < size; j++)
3916       mat[i][j] = (i == j) ? 1 : 0;
3917 }
3918 
3919 /* Return the index of the first nonzero element of vector VEC1 between
3920    START and N.  We must have START <= N.
3921    Returns N if VEC1 is the zero vector.  */
3922 
3923 static int
lambda_vector_first_nz(lambda_vector vec1,int n,int start)3924 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
3925 {
3926   int j = start;
3927   while (j < n && vec1[j] == 0)
3928     j++;
3929   return j;
3930 }
3931 
3932 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
3933    R2 = R2 + CONST1 * R1.  */
3934 
3935 static void
lambda_matrix_row_add(lambda_matrix mat,int n,int r1,int r2,lambda_int const1)3936 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2,
3937 		       lambda_int const1)
3938 {
3939   int i;
3940 
3941   if (const1 == 0)
3942     return;
3943 
3944   for (i = 0; i < n; i++)
3945     mat[r2][i] += const1 * mat[r1][i];
3946 }
3947 
3948 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
3949    and store the result in VEC2.  */
3950 
3951 static void
lambda_vector_mult_const(lambda_vector vec1,lambda_vector vec2,int size,lambda_int const1)3952 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
3953 			  int size, lambda_int const1)
3954 {
3955   int i;
3956 
3957   if (const1 == 0)
3958     lambda_vector_clear (vec2, size);
3959   else
3960     for (i = 0; i < size; i++)
3961       vec2[i] = const1 * vec1[i];
3962 }
3963 
3964 /* Negate vector VEC1 with length SIZE and store it in VEC2.  */
3965 
3966 static void
lambda_vector_negate(lambda_vector vec1,lambda_vector vec2,int size)3967 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
3968 		      int size)
3969 {
3970   lambda_vector_mult_const (vec1, vec2, size, -1);
3971 }
3972 
3973 /* Negate row R1 of matrix MAT which has N columns.  */
3974 
3975 static void
lambda_matrix_row_negate(lambda_matrix mat,int n,int r1)3976 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
3977 {
3978   lambda_vector_negate (mat[r1], mat[r1], n);
3979 }
3980 
3981 /* Return true if two vectors are equal.  */
3982 
3983 static bool
lambda_vector_equal(lambda_vector vec1,lambda_vector vec2,int size)3984 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
3985 {
3986   int i;
3987   for (i = 0; i < size; i++)
3988     if (vec1[i] != vec2[i])
3989       return false;
3990   return true;
3991 }
3992 
3993 /* Given an M x N integer matrix A, this function determines an M x
3994    M unimodular matrix U, and an M x N echelon matrix S such that
3995    "U.A = S".  This decomposition is also known as "right Hermite".
3996 
3997    Ref: Algorithm 2.1 page 33 in "Loop Transformations for
3998    Restructuring Compilers" Utpal Banerjee.  */
3999 
4000 static void
lambda_matrix_right_hermite(lambda_matrix A,int m,int n,lambda_matrix S,lambda_matrix U)4001 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
4002 			     lambda_matrix S, lambda_matrix U)
4003 {
4004   int i, j, i0 = 0;
4005 
4006   lambda_matrix_copy (A, S, m, n);
4007   lambda_matrix_id (U, m);
4008 
4009   for (j = 0; j < n; j++)
4010     {
4011       if (lambda_vector_first_nz (S[j], m, i0) < m)
4012 	{
4013 	  ++i0;
4014 	  for (i = m - 1; i >= i0; i--)
4015 	    {
4016 	      while (S[i][j] != 0)
4017 		{
4018 		  lambda_int sigma, factor, a, b;
4019 
4020 		  a = S[i-1][j];
4021 		  b = S[i][j];
4022 		  sigma = ((a < 0) ^ (b < 0)) ? -1: 1;
4023 		  unsigned HOST_WIDE_INT abs_a = absu_hwi (a);
4024 		  unsigned HOST_WIDE_INT abs_b = absu_hwi (b);
4025 		  factor = sigma * (lambda_int)(abs_a / abs_b);
4026 
4027 		  lambda_matrix_row_add (S, n, i, i-1, -factor);
4028 		  std::swap (S[i], S[i-1]);
4029 
4030 		  lambda_matrix_row_add (U, m, i, i-1, -factor);
4031 		  std::swap (U[i], U[i-1]);
4032 		}
4033 	    }
4034 	}
4035     }
4036 }
4037 
4038 /* Determines the overlapping elements due to accesses CHREC_A and
4039    CHREC_B, that are affine functions.  This function cannot handle
4040    symbolic evolution functions, ie. when initial conditions are
4041    parameters, because it uses lambda matrices of integers.  */
4042 
4043 static void
analyze_subscript_affine_affine(tree chrec_a,tree chrec_b,conflict_function ** overlaps_a,conflict_function ** overlaps_b,tree * last_conflicts)4044 analyze_subscript_affine_affine (tree chrec_a,
4045 				 tree chrec_b,
4046 				 conflict_function **overlaps_a,
4047 				 conflict_function **overlaps_b,
4048 				 tree *last_conflicts)
4049 {
4050   unsigned nb_vars_a, nb_vars_b, dim;
4051   lambda_int gamma, gcd_alpha_beta;
4052   lambda_matrix A, U, S;
4053   struct obstack scratch_obstack;
4054 
4055   if (eq_evolutions_p (chrec_a, chrec_b))
4056     {
4057       /* The accessed index overlaps for each iteration in the
4058 	 loop.  */
4059       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4060       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4061       *last_conflicts = chrec_dont_know;
4062       return;
4063     }
4064   if (dump_file && (dump_flags & TDF_DETAILS))
4065     fprintf (dump_file, "(analyze_subscript_affine_affine \n");
4066 
4067   /* For determining the initial intersection, we have to solve a
4068      Diophantine equation.  This is the most time consuming part.
4069 
4070      For answering to the question: "Is there a dependence?" we have
4071      to prove that there exists a solution to the Diophantine
4072      equation, and that the solution is in the iteration domain,
4073      i.e. the solution is positive or zero, and that the solution
4074      happens before the upper bound loop.nb_iterations.  Otherwise
4075      there is no dependence.  This function outputs a description of
4076      the iterations that hold the intersections.  */
4077 
4078   nb_vars_a = nb_vars_in_chrec (chrec_a);
4079   nb_vars_b = nb_vars_in_chrec (chrec_b);
4080 
4081   gcc_obstack_init (&scratch_obstack);
4082 
4083   dim = nb_vars_a + nb_vars_b;
4084   U = lambda_matrix_new (dim, dim, &scratch_obstack);
4085   A = lambda_matrix_new (dim, 1, &scratch_obstack);
4086   S = lambda_matrix_new (dim, 1, &scratch_obstack);
4087 
4088   tree init_a = initialize_matrix_A (A, chrec_a, 0, 1);
4089   tree init_b = initialize_matrix_A (A, chrec_b, nb_vars_a, -1);
4090   if (init_a == chrec_dont_know
4091       || init_b == chrec_dont_know)
4092     {
4093       if (dump_file && (dump_flags & TDF_DETAILS))
4094 	fprintf (dump_file, "affine-affine test failed: "
4095 		 "representation issue.\n");
4096       *overlaps_a = conflict_fn_not_known ();
4097       *overlaps_b = conflict_fn_not_known ();
4098       *last_conflicts = chrec_dont_know;
4099       goto end_analyze_subs_aa;
4100     }
4101   gamma = int_cst_value (init_b) - int_cst_value (init_a);
4102 
4103   /* Don't do all the hard work of solving the Diophantine equation
4104      when we already know the solution: for example,
4105      | {3, +, 1}_1
4106      | {3, +, 4}_2
4107      | gamma = 3 - 3 = 0.
4108      Then the first overlap occurs during the first iterations:
4109      | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
4110   */
4111   if (gamma == 0)
4112     {
4113       if (nb_vars_a == 1 && nb_vars_b == 1)
4114 	{
4115 	  HOST_WIDE_INT step_a, step_b;
4116 	  HOST_WIDE_INT niter, niter_a, niter_b;
4117 	  affine_fn ova, ovb;
4118 
4119 	  niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
4120 	  niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
4121 	  niter = MIN (niter_a, niter_b);
4122 	  step_a = int_cst_value (CHREC_RIGHT (chrec_a));
4123 	  step_b = int_cst_value (CHREC_RIGHT (chrec_b));
4124 
4125 	  compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
4126 						   &ova, &ovb,
4127 						   last_conflicts, 1);
4128 	  *overlaps_a = conflict_fn (1, ova);
4129 	  *overlaps_b = conflict_fn (1, ovb);
4130 	}
4131 
4132       else if (nb_vars_a == 2 && nb_vars_b == 1)
4133 	compute_overlap_steps_for_affine_1_2
4134 	  (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
4135 
4136       else if (nb_vars_a == 1 && nb_vars_b == 2)
4137 	compute_overlap_steps_for_affine_1_2
4138 	  (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
4139 
4140       else
4141 	{
4142 	  if (dump_file && (dump_flags & TDF_DETAILS))
4143 	    fprintf (dump_file, "affine-affine test failed: too many variables.\n");
4144 	  *overlaps_a = conflict_fn_not_known ();
4145 	  *overlaps_b = conflict_fn_not_known ();
4146 	  *last_conflicts = chrec_dont_know;
4147 	}
4148       goto end_analyze_subs_aa;
4149     }
4150 
4151   /* U.A = S */
4152   lambda_matrix_right_hermite (A, dim, 1, S, U);
4153 
4154   if (S[0][0] < 0)
4155     {
4156       S[0][0] *= -1;
4157       lambda_matrix_row_negate (U, dim, 0);
4158     }
4159   gcd_alpha_beta = S[0][0];
4160 
4161   /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
4162      but that is a quite strange case.  Instead of ICEing, answer
4163      don't know.  */
4164   if (gcd_alpha_beta == 0)
4165     {
4166       *overlaps_a = conflict_fn_not_known ();
4167       *overlaps_b = conflict_fn_not_known ();
4168       *last_conflicts = chrec_dont_know;
4169       goto end_analyze_subs_aa;
4170     }
4171 
4172   /* The classic "gcd-test".  */
4173   if (!int_divides_p (gcd_alpha_beta, gamma))
4174     {
4175       /* The "gcd-test" has determined that there is no integer
4176 	 solution, i.e. there is no dependence.  */
4177       *overlaps_a = conflict_fn_no_dependence ();
4178       *overlaps_b = conflict_fn_no_dependence ();
4179       *last_conflicts = integer_zero_node;
4180     }
4181 
4182   /* Both access functions are univariate.  This includes SIV and MIV cases.  */
4183   else if (nb_vars_a == 1 && nb_vars_b == 1)
4184     {
4185       /* Both functions should have the same evolution sign.  */
4186       if (((A[0][0] > 0 && -A[1][0] > 0)
4187 	   || (A[0][0] < 0 && -A[1][0] < 0)))
4188 	{
4189 	  /* The solutions are given by:
4190 	     |
4191 	     | [GAMMA/GCD_ALPHA_BETA  t].[u11 u12]  = [x0]
4192 	     |                           [u21 u22]    [y0]
4193 
4194 	     For a given integer t.  Using the following variables,
4195 
4196 	     | i0 = u11 * gamma / gcd_alpha_beta
4197 	     | j0 = u12 * gamma / gcd_alpha_beta
4198 	     | i1 = u21
4199 	     | j1 = u22
4200 
4201 	     the solutions are:
4202 
4203 	     | x0 = i0 + i1 * t,
4204 	     | y0 = j0 + j1 * t.  */
4205       	  HOST_WIDE_INT i0, j0, i1, j1;
4206 
4207 	  i0 = U[0][0] * gamma / gcd_alpha_beta;
4208 	  j0 = U[0][1] * gamma / gcd_alpha_beta;
4209 	  i1 = U[1][0];
4210 	  j1 = U[1][1];
4211 
4212 	  if ((i1 == 0 && i0 < 0)
4213 	      || (j1 == 0 && j0 < 0))
4214 	    {
4215 	      /* There is no solution.
4216 		 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
4217 		 falls in here, but for the moment we don't look at the
4218 		 upper bound of the iteration domain.  */
4219 	      *overlaps_a = conflict_fn_no_dependence ();
4220 	      *overlaps_b = conflict_fn_no_dependence ();
4221 	      *last_conflicts = integer_zero_node;
4222 	      goto end_analyze_subs_aa;
4223 	    }
4224 
4225 	  if (i1 > 0 && j1 > 0)
4226 	    {
4227 	      HOST_WIDE_INT niter_a
4228 		= max_stmt_executions_int (get_chrec_loop (chrec_a));
4229 	      HOST_WIDE_INT niter_b
4230 		= max_stmt_executions_int (get_chrec_loop (chrec_b));
4231 	      HOST_WIDE_INT niter = MIN (niter_a, niter_b);
4232 
4233 	      /* (X0, Y0) is a solution of the Diophantine equation:
4234 		 "chrec_a (X0) = chrec_b (Y0)".  */
4235 	      HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
4236 					CEIL (-j0, j1));
4237 	      HOST_WIDE_INT x0 = i1 * tau1 + i0;
4238 	      HOST_WIDE_INT y0 = j1 * tau1 + j0;
4239 
4240 	      /* (X1, Y1) is the smallest positive solution of the eq
4241 		 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
4242 		 first conflict occurs.  */
4243 	      HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
4244 	      HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
4245 	      HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
4246 
4247 	      if (niter > 0)
4248 		{
4249 		  /* If the overlap occurs outside of the bounds of the
4250 		     loop, there is no dependence.  */
4251 		  if (x1 >= niter_a || y1 >= niter_b)
4252 		    {
4253 		      *overlaps_a = conflict_fn_no_dependence ();
4254 		      *overlaps_b = conflict_fn_no_dependence ();
4255 		      *last_conflicts = integer_zero_node;
4256 		      goto end_analyze_subs_aa;
4257 		    }
4258 
4259 		  /* max stmt executions can get quite large, avoid
4260 		     overflows by using wide ints here.  */
4261 		  widest_int tau2
4262 		    = wi::smin (wi::sdiv_floor (wi::sub (niter_a, i0), i1),
4263 				wi::sdiv_floor (wi::sub (niter_b, j0), j1));
4264 		  widest_int last_conflict = wi::sub (tau2, (x1 - i0)/i1);
4265 		  if (wi::min_precision (last_conflict, SIGNED)
4266 		      <= TYPE_PRECISION (integer_type_node))
4267 		    *last_conflicts
4268 		       = build_int_cst (integer_type_node,
4269 					last_conflict.to_shwi ());
4270 		  else
4271 		    *last_conflicts = chrec_dont_know;
4272 		}
4273 	      else
4274 		*last_conflicts = chrec_dont_know;
4275 
4276 	      *overlaps_a
4277 		= conflict_fn (1,
4278 			       affine_fn_univar (build_int_cst (NULL_TREE, x1),
4279 						 1,
4280 						 build_int_cst (NULL_TREE, i1)));
4281 	      *overlaps_b
4282 		= conflict_fn (1,
4283 			       affine_fn_univar (build_int_cst (NULL_TREE, y1),
4284 						 1,
4285 						 build_int_cst (NULL_TREE, j1)));
4286 	    }
4287 	  else
4288 	    {
4289 	      /* FIXME: For the moment, the upper bound of the
4290 		 iteration domain for i and j is not checked.  */
4291 	      if (dump_file && (dump_flags & TDF_DETAILS))
4292 		fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4293 	      *overlaps_a = conflict_fn_not_known ();
4294 	      *overlaps_b = conflict_fn_not_known ();
4295 	      *last_conflicts = chrec_dont_know;
4296 	    }
4297 	}
4298       else
4299 	{
4300 	  if (dump_file && (dump_flags & TDF_DETAILS))
4301 	    fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4302 	  *overlaps_a = conflict_fn_not_known ();
4303 	  *overlaps_b = conflict_fn_not_known ();
4304 	  *last_conflicts = chrec_dont_know;
4305 	}
4306     }
4307   else
4308     {
4309       if (dump_file && (dump_flags & TDF_DETAILS))
4310 	fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4311       *overlaps_a = conflict_fn_not_known ();
4312       *overlaps_b = conflict_fn_not_known ();
4313       *last_conflicts = chrec_dont_know;
4314     }
4315 
4316 end_analyze_subs_aa:
4317   obstack_free (&scratch_obstack, NULL);
4318   if (dump_file && (dump_flags & TDF_DETAILS))
4319     {
4320       fprintf (dump_file, "  (overlaps_a = ");
4321       dump_conflict_function (dump_file, *overlaps_a);
4322       fprintf (dump_file, ")\n  (overlaps_b = ");
4323       dump_conflict_function (dump_file, *overlaps_b);
4324       fprintf (dump_file, "))\n");
4325     }
4326 }
4327 
4328 /* Returns true when analyze_subscript_affine_affine can be used for
4329    determining the dependence relation between chrec_a and chrec_b,
4330    that contain symbols.  This function modifies chrec_a and chrec_b
4331    such that the analysis result is the same, and such that they don't
4332    contain symbols, and then can safely be passed to the analyzer.
4333 
4334    Example: The analysis of the following tuples of evolutions produce
4335    the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
4336    vs. {0, +, 1}_1
4337 
4338    {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
4339    {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
4340 */
4341 
4342 static bool
can_use_analyze_subscript_affine_affine(tree * chrec_a,tree * chrec_b)4343 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
4344 {
4345   tree diff, type, left_a, left_b, right_b;
4346 
4347   if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
4348       || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
4349     /* FIXME: For the moment not handled.  Might be refined later.  */
4350     return false;
4351 
4352   type = chrec_type (*chrec_a);
4353   left_a = CHREC_LEFT (*chrec_a);
4354   left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
4355   diff = chrec_fold_minus (type, left_a, left_b);
4356 
4357   if (!evolution_function_is_constant_p (diff))
4358     return false;
4359 
4360   if (dump_file && (dump_flags & TDF_DETAILS))
4361     fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
4362 
4363   *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
4364 				     diff, CHREC_RIGHT (*chrec_a));
4365   right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
4366   *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
4367 				     build_int_cst (type, 0),
4368 				     right_b);
4369   return true;
4370 }
4371 
4372 /* Analyze a SIV (Single Index Variable) subscript.  *OVERLAPS_A and
4373    *OVERLAPS_B are initialized to the functions that describe the
4374    relation between the elements accessed twice by CHREC_A and
4375    CHREC_B.  For k >= 0, the following property is verified:
4376 
4377    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
4378 
4379 static void
analyze_siv_subscript(tree chrec_a,tree chrec_b,conflict_function ** overlaps_a,conflict_function ** overlaps_b,tree * last_conflicts,int loop_nest_num)4380 analyze_siv_subscript (tree chrec_a,
4381 		       tree chrec_b,
4382 		       conflict_function **overlaps_a,
4383 		       conflict_function **overlaps_b,
4384 		       tree *last_conflicts,
4385 		       int loop_nest_num)
4386 {
4387   dependence_stats.num_siv++;
4388 
4389   if (dump_file && (dump_flags & TDF_DETAILS))
4390     fprintf (dump_file, "(analyze_siv_subscript \n");
4391 
4392   if (evolution_function_is_constant_p (chrec_a)
4393       && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
4394     analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
4395 				      overlaps_a, overlaps_b, last_conflicts);
4396 
4397   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
4398 	   && evolution_function_is_constant_p (chrec_b))
4399     analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
4400 				      overlaps_b, overlaps_a, last_conflicts);
4401 
4402   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
4403 	   && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
4404     {
4405       if (!chrec_contains_symbols (chrec_a)
4406 	  && !chrec_contains_symbols (chrec_b))
4407 	{
4408 	  analyze_subscript_affine_affine (chrec_a, chrec_b,
4409 					   overlaps_a, overlaps_b,
4410 					   last_conflicts);
4411 
4412 	  if (CF_NOT_KNOWN_P (*overlaps_a)
4413 	      || CF_NOT_KNOWN_P (*overlaps_b))
4414 	    dependence_stats.num_siv_unimplemented++;
4415 	  else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4416 		   || CF_NO_DEPENDENCE_P (*overlaps_b))
4417 	    dependence_stats.num_siv_independent++;
4418 	  else
4419 	    dependence_stats.num_siv_dependent++;
4420 	}
4421       else if (can_use_analyze_subscript_affine_affine (&chrec_a,
4422 							&chrec_b))
4423 	{
4424 	  analyze_subscript_affine_affine (chrec_a, chrec_b,
4425 					   overlaps_a, overlaps_b,
4426 					   last_conflicts);
4427 
4428 	  if (CF_NOT_KNOWN_P (*overlaps_a)
4429 	      || CF_NOT_KNOWN_P (*overlaps_b))
4430 	    dependence_stats.num_siv_unimplemented++;
4431 	  else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4432 		   || CF_NO_DEPENDENCE_P (*overlaps_b))
4433 	    dependence_stats.num_siv_independent++;
4434 	  else
4435 	    dependence_stats.num_siv_dependent++;
4436 	}
4437       else
4438 	goto siv_subscript_dontknow;
4439     }
4440 
4441   else
4442     {
4443     siv_subscript_dontknow:;
4444       if (dump_file && (dump_flags & TDF_DETAILS))
4445 	fprintf (dump_file, "  siv test failed: unimplemented");
4446       *overlaps_a = conflict_fn_not_known ();
4447       *overlaps_b = conflict_fn_not_known ();
4448       *last_conflicts = chrec_dont_know;
4449       dependence_stats.num_siv_unimplemented++;
4450     }
4451 
4452   if (dump_file && (dump_flags & TDF_DETAILS))
4453     fprintf (dump_file, ")\n");
4454 }
4455 
4456 /* Returns false if we can prove that the greatest common divisor of the steps
4457    of CHREC does not divide CST, false otherwise.  */
4458 
4459 static bool
gcd_of_steps_may_divide_p(const_tree chrec,const_tree cst)4460 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
4461 {
4462   HOST_WIDE_INT cd = 0, val;
4463   tree step;
4464 
4465   if (!tree_fits_shwi_p (cst))
4466     return true;
4467   val = tree_to_shwi (cst);
4468 
4469   while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
4470     {
4471       step = CHREC_RIGHT (chrec);
4472       if (!tree_fits_shwi_p (step))
4473 	return true;
4474       cd = gcd (cd, tree_to_shwi (step));
4475       chrec = CHREC_LEFT (chrec);
4476     }
4477 
4478   return val % cd == 0;
4479 }
4480 
4481 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
4482    LOOP_NEST.  *OVERLAPS_A and *OVERLAPS_B are initialized to the
4483    functions that describe the relation between the elements accessed
4484    twice by CHREC_A and CHREC_B.  For k >= 0, the following property
4485    is verified:
4486 
4487    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
4488 
4489 static void
analyze_miv_subscript(tree chrec_a,tree chrec_b,conflict_function ** overlaps_a,conflict_function ** overlaps_b,tree * last_conflicts,class loop * loop_nest)4490 analyze_miv_subscript (tree chrec_a,
4491 		       tree chrec_b,
4492 		       conflict_function **overlaps_a,
4493 		       conflict_function **overlaps_b,
4494 		       tree *last_conflicts,
4495 		       class loop *loop_nest)
4496 {
4497   tree type, difference;
4498 
4499   dependence_stats.num_miv++;
4500   if (dump_file && (dump_flags & TDF_DETAILS))
4501     fprintf (dump_file, "(analyze_miv_subscript \n");
4502 
4503   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
4504   chrec_a = chrec_convert (type, chrec_a, NULL);
4505   chrec_b = chrec_convert (type, chrec_b, NULL);
4506   difference = chrec_fold_minus (type, chrec_a, chrec_b);
4507 
4508   if (eq_evolutions_p (chrec_a, chrec_b))
4509     {
4510       /* Access functions are the same: all the elements are accessed
4511 	 in the same order.  */
4512       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4513       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4514       *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
4515       dependence_stats.num_miv_dependent++;
4516     }
4517 
4518   else if (evolution_function_is_constant_p (difference)
4519 	   && evolution_function_is_affine_multivariate_p (chrec_a,
4520 							   loop_nest->num)
4521 	   && !gcd_of_steps_may_divide_p (chrec_a, difference))
4522     {
4523       /* testsuite/.../ssa-chrec-33.c
4524 	 {{21, +, 2}_1, +, -2}_2  vs.  {{20, +, 2}_1, +, -2}_2
4525 
4526 	 The difference is 1, and all the evolution steps are multiples
4527 	 of 2, consequently there are no overlapping elements.  */
4528       *overlaps_a = conflict_fn_no_dependence ();
4529       *overlaps_b = conflict_fn_no_dependence ();
4530       *last_conflicts = integer_zero_node;
4531       dependence_stats.num_miv_independent++;
4532     }
4533 
4534   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest->num)
4535 	   && !chrec_contains_symbols (chrec_a, loop_nest)
4536 	   && evolution_function_is_affine_in_loop (chrec_b, loop_nest->num)
4537 	   && !chrec_contains_symbols (chrec_b, loop_nest))
4538     {
4539       /* testsuite/.../ssa-chrec-35.c
4540 	 {0, +, 1}_2  vs.  {0, +, 1}_3
4541 	 the overlapping elements are respectively located at iterations:
4542 	 {0, +, 1}_x and {0, +, 1}_x,
4543 	 in other words, we have the equality:
4544 	 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
4545 
4546 	 Other examples:
4547 	 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
4548 	 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
4549 
4550 	 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
4551 	 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
4552       */
4553       analyze_subscript_affine_affine (chrec_a, chrec_b,
4554 				       overlaps_a, overlaps_b, last_conflicts);
4555 
4556       if (CF_NOT_KNOWN_P (*overlaps_a)
4557  	  || CF_NOT_KNOWN_P (*overlaps_b))
4558 	dependence_stats.num_miv_unimplemented++;
4559       else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4560 	       || CF_NO_DEPENDENCE_P (*overlaps_b))
4561 	dependence_stats.num_miv_independent++;
4562       else
4563 	dependence_stats.num_miv_dependent++;
4564     }
4565 
4566   else
4567     {
4568       /* When the analysis is too difficult, answer "don't know".  */
4569       if (dump_file && (dump_flags & TDF_DETAILS))
4570 	fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
4571 
4572       *overlaps_a = conflict_fn_not_known ();
4573       *overlaps_b = conflict_fn_not_known ();
4574       *last_conflicts = chrec_dont_know;
4575       dependence_stats.num_miv_unimplemented++;
4576     }
4577 
4578   if (dump_file && (dump_flags & TDF_DETAILS))
4579     fprintf (dump_file, ")\n");
4580 }
4581 
4582 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
4583    with respect to LOOP_NEST.  OVERLAP_ITERATIONS_A and
4584    OVERLAP_ITERATIONS_B are initialized with two functions that
4585    describe the iterations that contain conflicting elements.
4586 
4587    Remark: For an integer k >= 0, the following equality is true:
4588 
4589    CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
4590 */
4591 
4592 static void
analyze_overlapping_iterations(tree chrec_a,tree chrec_b,conflict_function ** overlap_iterations_a,conflict_function ** overlap_iterations_b,tree * last_conflicts,class loop * loop_nest)4593 analyze_overlapping_iterations (tree chrec_a,
4594 				tree chrec_b,
4595 				conflict_function **overlap_iterations_a,
4596 				conflict_function **overlap_iterations_b,
4597 				tree *last_conflicts, class loop *loop_nest)
4598 {
4599   unsigned int lnn = loop_nest->num;
4600 
4601   dependence_stats.num_subscript_tests++;
4602 
4603   if (dump_file && (dump_flags & TDF_DETAILS))
4604     {
4605       fprintf (dump_file, "(analyze_overlapping_iterations \n");
4606       fprintf (dump_file, "  (chrec_a = ");
4607       print_generic_expr (dump_file, chrec_a);
4608       fprintf (dump_file, ")\n  (chrec_b = ");
4609       print_generic_expr (dump_file, chrec_b);
4610       fprintf (dump_file, ")\n");
4611     }
4612 
4613   if (chrec_a == NULL_TREE
4614       || chrec_b == NULL_TREE
4615       || chrec_contains_undetermined (chrec_a)
4616       || chrec_contains_undetermined (chrec_b))
4617     {
4618       dependence_stats.num_subscript_undetermined++;
4619 
4620       *overlap_iterations_a = conflict_fn_not_known ();
4621       *overlap_iterations_b = conflict_fn_not_known ();
4622     }
4623 
4624   /* If they are the same chrec, and are affine, they overlap
4625      on every iteration.  */
4626   else if (eq_evolutions_p (chrec_a, chrec_b)
4627 	   && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4628 	       || operand_equal_p (chrec_a, chrec_b, 0)))
4629     {
4630       dependence_stats.num_same_subscript_function++;
4631       *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4632       *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4633       *last_conflicts = chrec_dont_know;
4634     }
4635 
4636   /* If they aren't the same, and aren't affine, we can't do anything
4637      yet.  */
4638   else if ((chrec_contains_symbols (chrec_a)
4639 	    || chrec_contains_symbols (chrec_b))
4640 	   && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4641 	       || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
4642     {
4643       dependence_stats.num_subscript_undetermined++;
4644       *overlap_iterations_a = conflict_fn_not_known ();
4645       *overlap_iterations_b = conflict_fn_not_known ();
4646     }
4647 
4648   else if (ziv_subscript_p (chrec_a, chrec_b))
4649     analyze_ziv_subscript (chrec_a, chrec_b,
4650 			   overlap_iterations_a, overlap_iterations_b,
4651 			   last_conflicts);
4652 
4653   else if (siv_subscript_p (chrec_a, chrec_b))
4654     analyze_siv_subscript (chrec_a, chrec_b,
4655 			   overlap_iterations_a, overlap_iterations_b,
4656 			   last_conflicts, lnn);
4657 
4658   else
4659     analyze_miv_subscript (chrec_a, chrec_b,
4660 			   overlap_iterations_a, overlap_iterations_b,
4661 			   last_conflicts, loop_nest);
4662 
4663   if (dump_file && (dump_flags & TDF_DETAILS))
4664     {
4665       fprintf (dump_file, "  (overlap_iterations_a = ");
4666       dump_conflict_function (dump_file, *overlap_iterations_a);
4667       fprintf (dump_file, ")\n  (overlap_iterations_b = ");
4668       dump_conflict_function (dump_file, *overlap_iterations_b);
4669       fprintf (dump_file, "))\n");
4670     }
4671 }
4672 
4673 /* Helper function for uniquely inserting distance vectors.  */
4674 
4675 static void
save_dist_v(struct data_dependence_relation * ddr,lambda_vector dist_v)4676 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
4677 {
4678   unsigned i;
4679   lambda_vector v;
4680 
4681   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, v)
4682     if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
4683       return;
4684 
4685   DDR_DIST_VECTS (ddr).safe_push (dist_v);
4686 }
4687 
4688 /* Helper function for uniquely inserting direction vectors.  */
4689 
4690 static void
save_dir_v(struct data_dependence_relation * ddr,lambda_vector dir_v)4691 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
4692 {
4693   unsigned i;
4694   lambda_vector v;
4695 
4696   FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), i, v)
4697     if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
4698       return;
4699 
4700   DDR_DIR_VECTS (ddr).safe_push (dir_v);
4701 }
4702 
4703 /* Add a distance of 1 on all the loops outer than INDEX.  If we
4704    haven't yet determined a distance for this outer loop, push a new
4705    distance vector composed of the previous distance, and a distance
4706    of 1 for this outer loop.  Example:
4707 
4708    | loop_1
4709    |   loop_2
4710    |     A[10]
4711    |   endloop_2
4712    | endloop_1
4713 
4714    Saved vectors are of the form (dist_in_1, dist_in_2).  First, we
4715    save (0, 1), then we have to save (1, 0).  */
4716 
4717 static void
add_outer_distances(struct data_dependence_relation * ddr,lambda_vector dist_v,int index)4718 add_outer_distances (struct data_dependence_relation *ddr,
4719 		     lambda_vector dist_v, int index)
4720 {
4721   /* For each outer loop where init_v is not set, the accesses are
4722      in dependence of distance 1 in the loop.  */
4723   while (--index >= 0)
4724     {
4725       lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4726       lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
4727       save_v[index] = 1;
4728       save_dist_v (ddr, save_v);
4729     }
4730 }
4731 
4732 /* Return false when fail to represent the data dependence as a
4733    distance vector.  A_INDEX is the index of the first reference
4734    (0 for DDR_A, 1 for DDR_B) and B_INDEX is the index of the
4735    second reference.  INIT_B is set to true when a component has been
4736    added to the distance vector DIST_V.  INDEX_CARRY is then set to
4737    the index in DIST_V that carries the dependence.  */
4738 
4739 static bool
build_classic_dist_vector_1(struct data_dependence_relation * ddr,unsigned int a_index,unsigned int b_index,lambda_vector dist_v,bool * init_b,int * index_carry)4740 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
4741 			     unsigned int a_index, unsigned int b_index,
4742 			     lambda_vector dist_v, bool *init_b,
4743 			     int *index_carry)
4744 {
4745   unsigned i;
4746   lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4747   class loop *loop = DDR_LOOP_NEST (ddr)[0];
4748 
4749   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
4750     {
4751       tree access_fn_a, access_fn_b;
4752       struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
4753 
4754       if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
4755 	{
4756 	  non_affine_dependence_relation (ddr);
4757 	  return false;
4758 	}
4759 
4760       access_fn_a = SUB_ACCESS_FN (subscript, a_index);
4761       access_fn_b = SUB_ACCESS_FN (subscript, b_index);
4762 
4763       if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
4764 	  && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
4765 	{
4766 	  HOST_WIDE_INT dist;
4767 	  int index;
4768 	  int var_a = CHREC_VARIABLE (access_fn_a);
4769 	  int var_b = CHREC_VARIABLE (access_fn_b);
4770 
4771 	  if (var_a != var_b
4772 	      || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
4773 	    {
4774 	      non_affine_dependence_relation (ddr);
4775 	      return false;
4776 	    }
4777 
4778 	  /* When data references are collected in a loop while data
4779 	     dependences are analyzed in loop nest nested in the loop, we
4780 	     would have more number of access functions than number of
4781 	     loops.  Skip access functions of loops not in the loop nest.
4782 
4783 	     See PR89725 for more information.  */
4784 	  if (flow_loop_nested_p (get_loop (cfun, var_a), loop))
4785 	    continue;
4786 
4787 	  dist = int_cst_value (SUB_DISTANCE (subscript));
4788 	  index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
4789 	  *index_carry = MIN (index, *index_carry);
4790 
4791 	  /* This is the subscript coupling test.  If we have already
4792 	     recorded a distance for this loop (a distance coming from
4793 	     another subscript), it should be the same.  For example,
4794 	     in the following code, there is no dependence:
4795 
4796 	     | loop i = 0, N, 1
4797 	     |   T[i+1][i] = ...
4798 	     |   ... = T[i][i]
4799 	     | endloop
4800 	  */
4801 	  if (init_v[index] != 0 && dist_v[index] != dist)
4802 	    {
4803 	      finalize_ddr_dependent (ddr, chrec_known);
4804 	      return false;
4805 	    }
4806 
4807 	  dist_v[index] = dist;
4808 	  init_v[index] = 1;
4809 	  *init_b = true;
4810 	}
4811       else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
4812 	{
4813 	  /* This can be for example an affine vs. constant dependence
4814 	     (T[i] vs. T[3]) that is not an affine dependence and is
4815 	     not representable as a distance vector.  */
4816 	  non_affine_dependence_relation (ddr);
4817 	  return false;
4818 	}
4819     }
4820 
4821   return true;
4822 }
4823 
4824 /* Return true when the DDR contains only invariant access functions wrto. loop
4825    number LNUM.  */
4826 
4827 static bool
invariant_access_functions(const struct data_dependence_relation * ddr,int lnum)4828 invariant_access_functions (const struct data_dependence_relation *ddr,
4829 			    int lnum)
4830 {
4831   unsigned i;
4832   subscript *sub;
4833 
4834   FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4835     if (!evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 0), lnum)
4836 	|| !evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 1), lnum))
4837       return false;
4838 
4839   return true;
4840 }
4841 
4842 /* Helper function for the case where DDR_A and DDR_B are the same
4843    multivariate access function with a constant step.  For an example
4844    see pr34635-1.c.  */
4845 
4846 static void
add_multivariate_self_dist(struct data_dependence_relation * ddr,tree c_2)4847 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
4848 {
4849   int x_1, x_2;
4850   tree c_1 = CHREC_LEFT (c_2);
4851   tree c_0 = CHREC_LEFT (c_1);
4852   lambda_vector dist_v;
4853   HOST_WIDE_INT v1, v2, cd;
4854 
4855   /* Polynomials with more than 2 variables are not handled yet.  When
4856      the evolution steps are parameters, it is not possible to
4857      represent the dependence using classical distance vectors.  */
4858   if (TREE_CODE (c_0) != INTEGER_CST
4859       || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
4860       || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
4861     {
4862       DDR_AFFINE_P (ddr) = false;
4863       return;
4864     }
4865 
4866   x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
4867   x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
4868 
4869   /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2).  */
4870   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4871   v1 = int_cst_value (CHREC_RIGHT (c_1));
4872   v2 = int_cst_value (CHREC_RIGHT (c_2));
4873   cd = gcd (v1, v2);
4874   v1 /= cd;
4875   v2 /= cd;
4876 
4877   if (v2 < 0)
4878     {
4879       v2 = -v2;
4880       v1 = -v1;
4881     }
4882 
4883   dist_v[x_1] = v2;
4884   dist_v[x_2] = -v1;
4885   save_dist_v (ddr, dist_v);
4886 
4887   add_outer_distances (ddr, dist_v, x_1);
4888 }
4889 
4890 /* Helper function for the case where DDR_A and DDR_B are the same
4891    access functions.  */
4892 
4893 static void
add_other_self_distances(struct data_dependence_relation * ddr)4894 add_other_self_distances (struct data_dependence_relation *ddr)
4895 {
4896   lambda_vector dist_v;
4897   unsigned i;
4898   int index_carry = DDR_NB_LOOPS (ddr);
4899   subscript *sub;
4900   class loop *loop = DDR_LOOP_NEST (ddr)[0];
4901 
4902   FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4903     {
4904       tree access_fun = SUB_ACCESS_FN (sub, 0);
4905 
4906       if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
4907 	{
4908 	  if (!evolution_function_is_univariate_p (access_fun, loop->num))
4909 	    {
4910 	      if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
4911 		{
4912 		  DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
4913 		  return;
4914 		}
4915 
4916 	      access_fun = SUB_ACCESS_FN (DDR_SUBSCRIPT (ddr, 0), 0);
4917 
4918 	      if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
4919 		add_multivariate_self_dist (ddr, access_fun);
4920 	      else
4921 		/* The evolution step is not constant: it varies in
4922 		   the outer loop, so this cannot be represented by a
4923 		   distance vector.  For example in pr34635.c the
4924 		   evolution is {0, +, {0, +, 4}_1}_2.  */
4925 		DDR_AFFINE_P (ddr) = false;
4926 
4927 	      return;
4928 	    }
4929 
4930 	  /* When data references are collected in a loop while data
4931 	     dependences are analyzed in loop nest nested in the loop, we
4932 	     would have more number of access functions than number of
4933 	     loops.  Skip access functions of loops not in the loop nest.
4934 
4935 	     See PR89725 for more information.  */
4936 	  if (flow_loop_nested_p (get_loop (cfun, CHREC_VARIABLE (access_fun)),
4937 				  loop))
4938 	    continue;
4939 
4940 	  index_carry = MIN (index_carry,
4941 			     index_in_loop_nest (CHREC_VARIABLE (access_fun),
4942 						 DDR_LOOP_NEST (ddr)));
4943 	}
4944     }
4945 
4946   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4947   add_outer_distances (ddr, dist_v, index_carry);
4948 }
4949 
4950 static void
insert_innermost_unit_dist_vector(struct data_dependence_relation * ddr)4951 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
4952 {
4953   lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4954 
4955   dist_v[0] = 1;
4956   save_dist_v (ddr, dist_v);
4957 }
4958 
4959 /* Adds a unit distance vector to DDR when there is a 0 overlap.  This
4960    is the case for example when access functions are the same and
4961    equal to a constant, as in:
4962 
4963    | loop_1
4964    |   A[3] = ...
4965    |   ... = A[3]
4966    | endloop_1
4967 
4968    in which case the distance vectors are (0) and (1).  */
4969 
4970 static void
add_distance_for_zero_overlaps(struct data_dependence_relation * ddr)4971 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
4972 {
4973   unsigned i, j;
4974 
4975   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
4976     {
4977       subscript_p sub = DDR_SUBSCRIPT (ddr, i);
4978       conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
4979       conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
4980 
4981       for (j = 0; j < ca->n; j++)
4982 	if (affine_function_zero_p (ca->fns[j]))
4983 	  {
4984 	    insert_innermost_unit_dist_vector (ddr);
4985 	    return;
4986 	  }
4987 
4988       for (j = 0; j < cb->n; j++)
4989 	if (affine_function_zero_p (cb->fns[j]))
4990 	  {
4991 	    insert_innermost_unit_dist_vector (ddr);
4992 	    return;
4993 	  }
4994     }
4995 }
4996 
4997 /* Return true when the DDR contains two data references that have the
4998    same access functions.  */
4999 
5000 static inline bool
same_access_functions(const struct data_dependence_relation * ddr)5001 same_access_functions (const struct data_dependence_relation *ddr)
5002 {
5003   unsigned i;
5004   subscript *sub;
5005 
5006   FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
5007     if (!eq_evolutions_p (SUB_ACCESS_FN (sub, 0),
5008 			  SUB_ACCESS_FN (sub, 1)))
5009       return false;
5010 
5011   return true;
5012 }
5013 
5014 /* Compute the classic per loop distance vector.  DDR is the data
5015    dependence relation to build a vector from.  Return false when fail
5016    to represent the data dependence as a distance vector.  */
5017 
5018 static bool
build_classic_dist_vector(struct data_dependence_relation * ddr,class loop * loop_nest)5019 build_classic_dist_vector (struct data_dependence_relation *ddr,
5020 			   class loop *loop_nest)
5021 {
5022   bool init_b = false;
5023   int index_carry = DDR_NB_LOOPS (ddr);
5024   lambda_vector dist_v;
5025 
5026   if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
5027     return false;
5028 
5029   if (same_access_functions (ddr))
5030     {
5031       /* Save the 0 vector.  */
5032       dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5033       save_dist_v (ddr, dist_v);
5034 
5035       if (invariant_access_functions (ddr, loop_nest->num))
5036 	add_distance_for_zero_overlaps (ddr);
5037 
5038       if (DDR_NB_LOOPS (ddr) > 1)
5039 	add_other_self_distances (ddr);
5040 
5041       return true;
5042     }
5043 
5044   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5045   if (!build_classic_dist_vector_1 (ddr, 0, 1, dist_v, &init_b, &index_carry))
5046     return false;
5047 
5048   /* Save the distance vector if we initialized one.  */
5049   if (init_b)
5050     {
5051       /* Verify a basic constraint: classic distance vectors should
5052 	 always be lexicographically positive.
5053 
5054 	 Data references are collected in the order of execution of
5055 	 the program, thus for the following loop
5056 
5057 	 | for (i = 1; i < 100; i++)
5058 	 |   for (j = 1; j < 100; j++)
5059 	 |     {
5060 	 |       t = T[j+1][i-1];  // A
5061 	 |       T[j][i] = t + 2;  // B
5062 	 |     }
5063 
5064 	 references are collected following the direction of the wind:
5065 	 A then B.  The data dependence tests are performed also
5066 	 following this order, such that we're looking at the distance
5067 	 separating the elements accessed by A from the elements later
5068 	 accessed by B.  But in this example, the distance returned by
5069 	 test_dep (A, B) is lexicographically negative (-1, 1), that
5070 	 means that the access A occurs later than B with respect to
5071 	 the outer loop, ie. we're actually looking upwind.  In this
5072 	 case we solve test_dep (B, A) looking downwind to the
5073 	 lexicographically positive solution, that returns the
5074 	 distance vector (1, -1).  */
5075       if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
5076 	{
5077 	  lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5078 	  if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
5079 	    return false;
5080 	  compute_subscript_distance (ddr);
5081 	  if (!build_classic_dist_vector_1 (ddr, 1, 0, save_v, &init_b,
5082 					    &index_carry))
5083 	    return false;
5084 	  save_dist_v (ddr, save_v);
5085 	  DDR_REVERSED_P (ddr) = true;
5086 
5087 	  /* In this case there is a dependence forward for all the
5088 	     outer loops:
5089 
5090 	     | for (k = 1; k < 100; k++)
5091 	     |  for (i = 1; i < 100; i++)
5092 	     |   for (j = 1; j < 100; j++)
5093 	     |     {
5094 	     |       t = T[j+1][i-1];  // A
5095 	     |       T[j][i] = t + 2;  // B
5096 	     |     }
5097 
5098 	     the vectors are:
5099 	     (0,  1, -1)
5100 	     (1,  1, -1)
5101 	     (1, -1,  1)
5102 	  */
5103 	  if (DDR_NB_LOOPS (ddr) > 1)
5104 	    {
5105  	      add_outer_distances (ddr, save_v, index_carry);
5106 	      add_outer_distances (ddr, dist_v, index_carry);
5107 	    }
5108 	}
5109       else
5110 	{
5111 	  lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5112 	  lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
5113 
5114 	  if (DDR_NB_LOOPS (ddr) > 1)
5115 	    {
5116 	      lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5117 
5118 	      if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
5119 		return false;
5120 	      compute_subscript_distance (ddr);
5121 	      if (!build_classic_dist_vector_1 (ddr, 1, 0, opposite_v, &init_b,
5122 						&index_carry))
5123 		return false;
5124 
5125 	      save_dist_v (ddr, save_v);
5126 	      add_outer_distances (ddr, dist_v, index_carry);
5127 	      add_outer_distances (ddr, opposite_v, index_carry);
5128 	    }
5129 	  else
5130 	    save_dist_v (ddr, save_v);
5131 	}
5132     }
5133   else
5134     {
5135       /* There is a distance of 1 on all the outer loops: Example:
5136 	 there is a dependence of distance 1 on loop_1 for the array A.
5137 
5138 	 | loop_1
5139 	 |   A[5] = ...
5140 	 | endloop
5141       */
5142       add_outer_distances (ddr, dist_v,
5143 			   lambda_vector_first_nz (dist_v,
5144 						   DDR_NB_LOOPS (ddr), 0));
5145     }
5146 
5147   if (dump_file && (dump_flags & TDF_DETAILS))
5148     {
5149       unsigned i;
5150 
5151       fprintf (dump_file, "(build_classic_dist_vector\n");
5152       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
5153 	{
5154 	  fprintf (dump_file, "  dist_vector = (");
5155 	  print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
5156 			       DDR_NB_LOOPS (ddr));
5157 	  fprintf (dump_file, "  )\n");
5158 	}
5159       fprintf (dump_file, ")\n");
5160     }
5161 
5162   return true;
5163 }
5164 
5165 /* Return the direction for a given distance.
5166    FIXME: Computing dir this way is suboptimal, since dir can catch
5167    cases that dist is unable to represent.  */
5168 
5169 static inline enum data_dependence_direction
dir_from_dist(int dist)5170 dir_from_dist (int dist)
5171 {
5172   if (dist > 0)
5173     return dir_positive;
5174   else if (dist < 0)
5175     return dir_negative;
5176   else
5177     return dir_equal;
5178 }
5179 
5180 /* Compute the classic per loop direction vector.  DDR is the data
5181    dependence relation to build a vector from.  */
5182 
5183 static void
build_classic_dir_vector(struct data_dependence_relation * ddr)5184 build_classic_dir_vector (struct data_dependence_relation *ddr)
5185 {
5186   unsigned i, j;
5187   lambda_vector dist_v;
5188 
5189   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
5190     {
5191       lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5192 
5193       for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
5194 	dir_v[j] = dir_from_dist (dist_v[j]);
5195 
5196       save_dir_v (ddr, dir_v);
5197     }
5198 }
5199 
5200 /* Helper function.  Returns true when there is a dependence between the
5201    data references.  A_INDEX is the index of the first reference (0 for
5202    DDR_A, 1 for DDR_B) and B_INDEX is the index of the second reference.  */
5203 
5204 static bool
subscript_dependence_tester_1(struct data_dependence_relation * ddr,unsigned int a_index,unsigned int b_index,class loop * loop_nest)5205 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
5206 			       unsigned int a_index, unsigned int b_index,
5207 			       class loop *loop_nest)
5208 {
5209   unsigned int i;
5210   tree last_conflicts;
5211   struct subscript *subscript;
5212   tree res = NULL_TREE;
5213 
5214   for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++)
5215     {
5216       conflict_function *overlaps_a, *overlaps_b;
5217 
5218       analyze_overlapping_iterations (SUB_ACCESS_FN (subscript, a_index),
5219 				      SUB_ACCESS_FN (subscript, b_index),
5220 				      &overlaps_a, &overlaps_b,
5221 				      &last_conflicts, loop_nest);
5222 
5223       if (SUB_CONFLICTS_IN_A (subscript))
5224 	free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
5225       if (SUB_CONFLICTS_IN_B (subscript))
5226 	free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
5227 
5228       SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
5229       SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
5230       SUB_LAST_CONFLICT (subscript) = last_conflicts;
5231 
5232       /* If there is any undetermined conflict function we have to
5233          give a conservative answer in case we cannot prove that
5234 	 no dependence exists when analyzing another subscript.  */
5235       if (CF_NOT_KNOWN_P (overlaps_a)
5236  	  || CF_NOT_KNOWN_P (overlaps_b))
5237  	{
5238 	  res = chrec_dont_know;
5239 	  continue;
5240  	}
5241 
5242       /* When there is a subscript with no dependence we can stop.  */
5243       else if (CF_NO_DEPENDENCE_P (overlaps_a)
5244  	       || CF_NO_DEPENDENCE_P (overlaps_b))
5245  	{
5246 	  res = chrec_known;
5247 	  break;
5248  	}
5249     }
5250 
5251   if (res == NULL_TREE)
5252     return true;
5253 
5254   if (res == chrec_known)
5255     dependence_stats.num_dependence_independent++;
5256   else
5257     dependence_stats.num_dependence_undetermined++;
5258   finalize_ddr_dependent (ddr, res);
5259   return false;
5260 }
5261 
5262 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR.  */
5263 
5264 static void
subscript_dependence_tester(struct data_dependence_relation * ddr,class loop * loop_nest)5265 subscript_dependence_tester (struct data_dependence_relation *ddr,
5266 			     class loop *loop_nest)
5267 {
5268   if (subscript_dependence_tester_1 (ddr, 0, 1, loop_nest))
5269     dependence_stats.num_dependence_dependent++;
5270 
5271   compute_subscript_distance (ddr);
5272   if (build_classic_dist_vector (ddr, loop_nest))
5273     build_classic_dir_vector (ddr);
5274 }
5275 
5276 /* Returns true when all the access functions of A are affine or
5277    constant with respect to LOOP_NEST.  */
5278 
5279 static bool
access_functions_are_affine_or_constant_p(const struct data_reference * a,const class loop * loop_nest)5280 access_functions_are_affine_or_constant_p (const struct data_reference *a,
5281 					   const class loop *loop_nest)
5282 {
5283   unsigned int i;
5284   vec<tree> fns = DR_ACCESS_FNS (a);
5285   tree t;
5286 
5287   FOR_EACH_VEC_ELT (fns, i, t)
5288     if (!evolution_function_is_invariant_p (t, loop_nest->num)
5289 	&& !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
5290       return false;
5291 
5292   return true;
5293 }
5294 
5295 /* This computes the affine dependence relation between A and B with
5296    respect to LOOP_NEST.  CHREC_KNOWN is used for representing the
5297    independence between two accesses, while CHREC_DONT_KNOW is used
5298    for representing the unknown relation.
5299 
5300    Note that it is possible to stop the computation of the dependence
5301    relation the first time we detect a CHREC_KNOWN element for a given
5302    subscript.  */
5303 
5304 void
compute_affine_dependence(struct data_dependence_relation * ddr,class loop * loop_nest)5305 compute_affine_dependence (struct data_dependence_relation *ddr,
5306 			   class loop *loop_nest)
5307 {
5308   struct data_reference *dra = DDR_A (ddr);
5309   struct data_reference *drb = DDR_B (ddr);
5310 
5311   if (dump_file && (dump_flags & TDF_DETAILS))
5312     {
5313       fprintf (dump_file, "(compute_affine_dependence\n");
5314       fprintf (dump_file, "  stmt_a: ");
5315       print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
5316       fprintf (dump_file, "  stmt_b: ");
5317       print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
5318     }
5319 
5320   /* Analyze only when the dependence relation is not yet known.  */
5321   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
5322     {
5323       dependence_stats.num_dependence_tests++;
5324 
5325       if (access_functions_are_affine_or_constant_p (dra, loop_nest)
5326 	  && access_functions_are_affine_or_constant_p (drb, loop_nest))
5327 	subscript_dependence_tester (ddr, loop_nest);
5328 
5329       /* As a last case, if the dependence cannot be determined, or if
5330 	 the dependence is considered too difficult to determine, answer
5331 	 "don't know".  */
5332       else
5333 	{
5334 	  dependence_stats.num_dependence_undetermined++;
5335 
5336 	  if (dump_file && (dump_flags & TDF_DETAILS))
5337 	    {
5338 	      fprintf (dump_file, "Data ref a:\n");
5339 	      dump_data_reference (dump_file, dra);
5340 	      fprintf (dump_file, "Data ref b:\n");
5341 	      dump_data_reference (dump_file, drb);
5342 	      fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
5343 	    }
5344 	  finalize_ddr_dependent (ddr, chrec_dont_know);
5345 	}
5346     }
5347 
5348   if (dump_file && (dump_flags & TDF_DETAILS))
5349     {
5350       if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
5351 	fprintf (dump_file, ") -> no dependence\n");
5352       else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
5353 	fprintf (dump_file, ") -> dependence analysis failed\n");
5354       else
5355 	fprintf (dump_file, ")\n");
5356     }
5357 }
5358 
5359 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
5360    the data references in DATAREFS, in the LOOP_NEST.  When
5361    COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
5362    relations.  Return true when successful, i.e. data references number
5363    is small enough to be handled.  */
5364 
5365 bool
compute_all_dependences(vec<data_reference_p> datarefs,vec<ddr_p> * dependence_relations,vec<loop_p> loop_nest,bool compute_self_and_rr)5366 compute_all_dependences (vec<data_reference_p> datarefs,
5367 			 vec<ddr_p> *dependence_relations,
5368 			 vec<loop_p> loop_nest,
5369 			 bool compute_self_and_rr)
5370 {
5371   struct data_dependence_relation *ddr;
5372   struct data_reference *a, *b;
5373   unsigned int i, j;
5374 
5375   if ((int) datarefs.length ()
5376       > param_loop_max_datarefs_for_datadeps)
5377     {
5378       struct data_dependence_relation *ddr;
5379 
5380       /* Insert a single relation into dependence_relations:
5381 	 chrec_dont_know.  */
5382       ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
5383       dependence_relations->safe_push (ddr);
5384       return false;
5385     }
5386 
5387   FOR_EACH_VEC_ELT (datarefs, i, a)
5388     for (j = i + 1; datarefs.iterate (j, &b); j++)
5389       if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
5390 	{
5391 	  ddr = initialize_data_dependence_relation (a, b, loop_nest);
5392 	  dependence_relations->safe_push (ddr);
5393           if (loop_nest.exists ())
5394    	    compute_affine_dependence (ddr, loop_nest[0]);
5395 	}
5396 
5397   if (compute_self_and_rr)
5398     FOR_EACH_VEC_ELT (datarefs, i, a)
5399       {
5400 	ddr = initialize_data_dependence_relation (a, a, loop_nest);
5401 	dependence_relations->safe_push (ddr);
5402         if (loop_nest.exists ())
5403    	  compute_affine_dependence (ddr, loop_nest[0]);
5404       }
5405 
5406   return true;
5407 }
5408 
5409 /* Describes a location of a memory reference.  */
5410 
5411 struct data_ref_loc
5412 {
5413   /* The memory reference.  */
5414   tree ref;
5415 
5416   /* True if the memory reference is read.  */
5417   bool is_read;
5418 
5419   /* True if the data reference is conditional within the containing
5420      statement, i.e. if it might not occur even when the statement
5421      is executed and runs to completion.  */
5422   bool is_conditional_in_stmt;
5423 };
5424 
5425 
5426 /* Stores the locations of memory references in STMT to REFERENCES.  Returns
5427    true if STMT clobbers memory, false otherwise.  */
5428 
5429 static bool
get_references_in_stmt(gimple * stmt,vec<data_ref_loc,va_heap> * references)5430 get_references_in_stmt (gimple *stmt, vec<data_ref_loc, va_heap> *references)
5431 {
5432   bool clobbers_memory = false;
5433   data_ref_loc ref;
5434   tree op0, op1;
5435   enum gimple_code stmt_code = gimple_code (stmt);
5436 
5437   /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
5438      As we cannot model data-references to not spelled out
5439      accesses give up if they may occur.  */
5440   if (stmt_code == GIMPLE_CALL
5441       && !(gimple_call_flags (stmt) & ECF_CONST))
5442     {
5443       /* Allow IFN_GOMP_SIMD_LANE in their own loops.  */
5444       if (gimple_call_internal_p (stmt))
5445 	switch (gimple_call_internal_fn (stmt))
5446 	  {
5447 	  case IFN_GOMP_SIMD_LANE:
5448 	    {
5449 	      class loop *loop = gimple_bb (stmt)->loop_father;
5450 	      tree uid = gimple_call_arg (stmt, 0);
5451 	      gcc_assert (TREE_CODE (uid) == SSA_NAME);
5452 	      if (loop == NULL
5453 		  || loop->simduid != SSA_NAME_VAR (uid))
5454 		clobbers_memory = true;
5455 	      break;
5456 	    }
5457 	  case IFN_MASK_LOAD:
5458 	  case IFN_MASK_STORE:
5459 	    break;
5460 	  default:
5461 	    clobbers_memory = true;
5462 	    break;
5463 	  }
5464       else
5465 	clobbers_memory = true;
5466     }
5467   else if (stmt_code == GIMPLE_ASM
5468 	   && (gimple_asm_volatile_p (as_a <gasm *> (stmt))
5469 	       || gimple_vuse (stmt)))
5470     clobbers_memory = true;
5471 
5472   if (!gimple_vuse (stmt))
5473     return clobbers_memory;
5474 
5475   if (stmt_code == GIMPLE_ASSIGN)
5476     {
5477       tree base;
5478       op0 = gimple_assign_lhs (stmt);
5479       op1 = gimple_assign_rhs1 (stmt);
5480 
5481       if (DECL_P (op1)
5482 	  || (REFERENCE_CLASS_P (op1)
5483 	      && (base = get_base_address (op1))
5484 	      && TREE_CODE (base) != SSA_NAME
5485 	      && !is_gimple_min_invariant (base)))
5486 	{
5487 	  ref.ref = op1;
5488 	  ref.is_read = true;
5489 	  ref.is_conditional_in_stmt = false;
5490 	  references->safe_push (ref);
5491 	}
5492     }
5493   else if (stmt_code == GIMPLE_CALL)
5494     {
5495       unsigned i, n;
5496       tree ptr, type;
5497       unsigned int align;
5498 
5499       ref.is_read = false;
5500       if (gimple_call_internal_p (stmt))
5501 	switch (gimple_call_internal_fn (stmt))
5502 	  {
5503 	  case IFN_MASK_LOAD:
5504 	    if (gimple_call_lhs (stmt) == NULL_TREE)
5505 	      break;
5506 	    ref.is_read = true;
5507 	    /* FALLTHRU */
5508 	  case IFN_MASK_STORE:
5509 	    ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)), 0);
5510 	    align = tree_to_shwi (gimple_call_arg (stmt, 1));
5511 	    if (ref.is_read)
5512 	      type = TREE_TYPE (gimple_call_lhs (stmt));
5513 	    else
5514 	      type = TREE_TYPE (gimple_call_arg (stmt, 3));
5515 	    if (TYPE_ALIGN (type) != align)
5516 	      type = build_aligned_type (type, align);
5517 	    ref.is_conditional_in_stmt = true;
5518 	    ref.ref = fold_build2 (MEM_REF, type, gimple_call_arg (stmt, 0),
5519 				   ptr);
5520 	    references->safe_push (ref);
5521 	    return false;
5522 	  default:
5523 	    break;
5524 	  }
5525 
5526       op0 = gimple_call_lhs (stmt);
5527       n = gimple_call_num_args (stmt);
5528       for (i = 0; i < n; i++)
5529 	{
5530 	  op1 = gimple_call_arg (stmt, i);
5531 
5532 	  if (DECL_P (op1)
5533 	      || (REFERENCE_CLASS_P (op1) && get_base_address (op1)))
5534 	    {
5535 	      ref.ref = op1;
5536 	      ref.is_read = true;
5537 	      ref.is_conditional_in_stmt = false;
5538 	      references->safe_push (ref);
5539 	    }
5540 	}
5541     }
5542   else
5543     return clobbers_memory;
5544 
5545   if (op0
5546       && (DECL_P (op0)
5547 	  || (REFERENCE_CLASS_P (op0) && get_base_address (op0))))
5548     {
5549       ref.ref = op0;
5550       ref.is_read = false;
5551       ref.is_conditional_in_stmt = false;
5552       references->safe_push (ref);
5553     }
5554   return clobbers_memory;
5555 }
5556 
5557 
5558 /* Returns true if the loop-nest has any data reference.  */
5559 
5560 bool
loop_nest_has_data_refs(loop_p loop)5561 loop_nest_has_data_refs (loop_p loop)
5562 {
5563   basic_block *bbs = get_loop_body (loop);
5564   auto_vec<data_ref_loc, 3> references;
5565 
5566   for (unsigned i = 0; i < loop->num_nodes; i++)
5567     {
5568       basic_block bb = bbs[i];
5569       gimple_stmt_iterator bsi;
5570 
5571       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5572 	{
5573 	  gimple *stmt = gsi_stmt (bsi);
5574 	  get_references_in_stmt (stmt, &references);
5575 	  if (references.length ())
5576 	    {
5577 	      free (bbs);
5578 	      return true;
5579 	    }
5580 	}
5581     }
5582   free (bbs);
5583   return false;
5584 }
5585 
5586 /* Stores the data references in STMT to DATAREFS.  If there is an unanalyzable
5587    reference, returns false, otherwise returns true.  NEST is the outermost
5588    loop of the loop nest in which the references should be analyzed.  */
5589 
5590 opt_result
find_data_references_in_stmt(class loop * nest,gimple * stmt,vec<data_reference_p> * datarefs)5591 find_data_references_in_stmt (class loop *nest, gimple *stmt,
5592 			      vec<data_reference_p> *datarefs)
5593 {
5594   unsigned i;
5595   auto_vec<data_ref_loc, 2> references;
5596   data_ref_loc *ref;
5597   data_reference_p dr;
5598 
5599   if (get_references_in_stmt (stmt, &references))
5600     return opt_result::failure_at (stmt, "statement clobbers memory: %G",
5601 				   stmt);
5602 
5603   FOR_EACH_VEC_ELT (references, i, ref)
5604     {
5605       dr = create_data_ref (nest ? loop_preheader_edge (nest) : NULL,
5606 			    loop_containing_stmt (stmt), ref->ref,
5607 			    stmt, ref->is_read, ref->is_conditional_in_stmt);
5608       gcc_assert (dr != NULL);
5609       datarefs->safe_push (dr);
5610     }
5611 
5612   return opt_result::success ();
5613 }
5614 
5615 /* Stores the data references in STMT to DATAREFS.  If there is an
5616    unanalyzable reference, returns false, otherwise returns true.
5617    NEST is the outermost loop of the loop nest in which the references
5618    should be instantiated, LOOP is the loop in which the references
5619    should be analyzed.  */
5620 
5621 bool
graphite_find_data_references_in_stmt(edge nest,loop_p loop,gimple * stmt,vec<data_reference_p> * datarefs)5622 graphite_find_data_references_in_stmt (edge nest, loop_p loop, gimple *stmt,
5623 				       vec<data_reference_p> *datarefs)
5624 {
5625   unsigned i;
5626   auto_vec<data_ref_loc, 2> references;
5627   data_ref_loc *ref;
5628   bool ret = true;
5629   data_reference_p dr;
5630 
5631   if (get_references_in_stmt (stmt, &references))
5632     return false;
5633 
5634   FOR_EACH_VEC_ELT (references, i, ref)
5635     {
5636       dr = create_data_ref (nest, loop, ref->ref, stmt, ref->is_read,
5637 			    ref->is_conditional_in_stmt);
5638       gcc_assert (dr != NULL);
5639       datarefs->safe_push (dr);
5640     }
5641 
5642   return ret;
5643 }
5644 
5645 /* Search the data references in LOOP, and record the information into
5646    DATAREFS.  Returns chrec_dont_know when failing to analyze a
5647    difficult case, returns NULL_TREE otherwise.  */
5648 
5649 tree
find_data_references_in_bb(class loop * loop,basic_block bb,vec<data_reference_p> * datarefs)5650 find_data_references_in_bb (class loop *loop, basic_block bb,
5651                             vec<data_reference_p> *datarefs)
5652 {
5653   gimple_stmt_iterator bsi;
5654 
5655   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5656     {
5657       gimple *stmt = gsi_stmt (bsi);
5658 
5659       if (!find_data_references_in_stmt (loop, stmt, datarefs))
5660         {
5661           struct data_reference *res;
5662           res = XCNEW (struct data_reference);
5663           datarefs->safe_push (res);
5664 
5665           return chrec_dont_know;
5666         }
5667     }
5668 
5669   return NULL_TREE;
5670 }
5671 
5672 /* Search the data references in LOOP, and record the information into
5673    DATAREFS.  Returns chrec_dont_know when failing to analyze a
5674    difficult case, returns NULL_TREE otherwise.
5675 
5676    TODO: This function should be made smarter so that it can handle address
5677    arithmetic as if they were array accesses, etc.  */
5678 
5679 tree
find_data_references_in_loop(class loop * loop,vec<data_reference_p> * datarefs)5680 find_data_references_in_loop (class loop *loop,
5681 			      vec<data_reference_p> *datarefs)
5682 {
5683   basic_block bb, *bbs;
5684   unsigned int i;
5685 
5686   bbs = get_loop_body_in_dom_order (loop);
5687 
5688   for (i = 0; i < loop->num_nodes; i++)
5689     {
5690       bb = bbs[i];
5691 
5692       if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
5693         {
5694           free (bbs);
5695           return chrec_dont_know;
5696         }
5697     }
5698   free (bbs);
5699 
5700   return NULL_TREE;
5701 }
5702 
5703 /* Return the alignment in bytes that DRB is guaranteed to have at all
5704    times.  */
5705 
5706 unsigned int
dr_alignment(innermost_loop_behavior * drb)5707 dr_alignment (innermost_loop_behavior *drb)
5708 {
5709   /* Get the alignment of BASE_ADDRESS + INIT.  */
5710   unsigned int alignment = drb->base_alignment;
5711   unsigned int misalignment = (drb->base_misalignment
5712 			       + TREE_INT_CST_LOW (drb->init));
5713   if (misalignment != 0)
5714     alignment = MIN (alignment, misalignment & -misalignment);
5715 
5716   /* Cap it to the alignment of OFFSET.  */
5717   if (!integer_zerop (drb->offset))
5718     alignment = MIN (alignment, drb->offset_alignment);
5719 
5720   /* Cap it to the alignment of STEP.  */
5721   if (!integer_zerop (drb->step))
5722     alignment = MIN (alignment, drb->step_alignment);
5723 
5724   return alignment;
5725 }
5726 
5727 /* If BASE is a pointer-typed SSA name, try to find the object that it
5728    is based on.  Return this object X on success and store the alignment
5729    in bytes of BASE - &X in *ALIGNMENT_OUT.  */
5730 
5731 static tree
get_base_for_alignment_1(tree base,unsigned int * alignment_out)5732 get_base_for_alignment_1 (tree base, unsigned int *alignment_out)
5733 {
5734   if (TREE_CODE (base) != SSA_NAME || !POINTER_TYPE_P (TREE_TYPE (base)))
5735     return NULL_TREE;
5736 
5737   gimple *def = SSA_NAME_DEF_STMT (base);
5738   base = analyze_scalar_evolution (loop_containing_stmt (def), base);
5739 
5740   /* Peel chrecs and record the minimum alignment preserved by
5741      all steps.  */
5742   unsigned int alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
5743   while (TREE_CODE (base) == POLYNOMIAL_CHREC)
5744     {
5745       unsigned int step_alignment = highest_pow2_factor (CHREC_RIGHT (base));
5746       alignment = MIN (alignment, step_alignment);
5747       base = CHREC_LEFT (base);
5748     }
5749 
5750   /* Punt if the expression is too complicated to handle.  */
5751   if (tree_contains_chrecs (base, NULL) || !POINTER_TYPE_P (TREE_TYPE (base)))
5752     return NULL_TREE;
5753 
5754   /* The only useful cases are those for which a dereference folds to something
5755      other than an INDIRECT_REF.  */
5756   tree ref_type = TREE_TYPE (TREE_TYPE (base));
5757   tree ref = fold_indirect_ref_1 (UNKNOWN_LOCATION, ref_type, base);
5758   if (!ref)
5759     return NULL_TREE;
5760 
5761   /* Analyze the base to which the steps we peeled were applied.  */
5762   poly_int64 bitsize, bitpos, bytepos;
5763   machine_mode mode;
5764   int unsignedp, reversep, volatilep;
5765   tree offset;
5766   base = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
5767 			      &unsignedp, &reversep, &volatilep);
5768   if (!base || !multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
5769     return NULL_TREE;
5770 
5771   /* Restrict the alignment to that guaranteed by the offsets.  */
5772   unsigned int bytepos_alignment = known_alignment (bytepos);
5773   if (bytepos_alignment != 0)
5774     alignment = MIN (alignment, bytepos_alignment);
5775   if (offset)
5776     {
5777       unsigned int offset_alignment = highest_pow2_factor (offset);
5778       alignment = MIN (alignment, offset_alignment);
5779     }
5780 
5781   *alignment_out = alignment;
5782   return base;
5783 }
5784 
5785 /* Return the object whose alignment would need to be changed in order
5786    to increase the alignment of ADDR.  Store the maximum achievable
5787    alignment in *MAX_ALIGNMENT.  */
5788 
5789 tree
get_base_for_alignment(tree addr,unsigned int * max_alignment)5790 get_base_for_alignment (tree addr, unsigned int *max_alignment)
5791 {
5792   tree base = get_base_for_alignment_1 (addr, max_alignment);
5793   if (base)
5794     return base;
5795 
5796   if (TREE_CODE (addr) == ADDR_EXPR)
5797     addr = TREE_OPERAND (addr, 0);
5798   *max_alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
5799   return addr;
5800 }
5801 
5802 /* Recursive helper function.  */
5803 
5804 static bool
find_loop_nest_1(class loop * loop,vec<loop_p> * loop_nest)5805 find_loop_nest_1 (class loop *loop, vec<loop_p> *loop_nest)
5806 {
5807   /* Inner loops of the nest should not contain siblings.  Example:
5808      when there are two consecutive loops,
5809 
5810      | loop_0
5811      |   loop_1
5812      |     A[{0, +, 1}_1]
5813      |   endloop_1
5814      |   loop_2
5815      |     A[{0, +, 1}_2]
5816      |   endloop_2
5817      | endloop_0
5818 
5819      the dependence relation cannot be captured by the distance
5820      abstraction.  */
5821   if (loop->next)
5822     return false;
5823 
5824   loop_nest->safe_push (loop);
5825   if (loop->inner)
5826     return find_loop_nest_1 (loop->inner, loop_nest);
5827   return true;
5828 }
5829 
5830 /* Return false when the LOOP is not well nested.  Otherwise return
5831    true and insert in LOOP_NEST the loops of the nest.  LOOP_NEST will
5832    contain the loops from the outermost to the innermost, as they will
5833    appear in the classic distance vector.  */
5834 
5835 bool
find_loop_nest(class loop * loop,vec<loop_p> * loop_nest)5836 find_loop_nest (class loop *loop, vec<loop_p> *loop_nest)
5837 {
5838   loop_nest->safe_push (loop);
5839   if (loop->inner)
5840     return find_loop_nest_1 (loop->inner, loop_nest);
5841   return true;
5842 }
5843 
5844 /* Returns true when the data dependences have been computed, false otherwise.
5845    Given a loop nest LOOP, the following vectors are returned:
5846    DATAREFS is initialized to all the array elements contained in this loop,
5847    DEPENDENCE_RELATIONS contains the relations between the data references.
5848    Compute read-read and self relations if
5849    COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE.  */
5850 
5851 bool
compute_data_dependences_for_loop(class loop * loop,bool compute_self_and_read_read_dependences,vec<loop_p> * loop_nest,vec<data_reference_p> * datarefs,vec<ddr_p> * dependence_relations)5852 compute_data_dependences_for_loop (class loop *loop,
5853 				   bool compute_self_and_read_read_dependences,
5854 				   vec<loop_p> *loop_nest,
5855 				   vec<data_reference_p> *datarefs,
5856 				   vec<ddr_p> *dependence_relations)
5857 {
5858   bool res = true;
5859 
5860   memset (&dependence_stats, 0, sizeof (dependence_stats));
5861 
5862   /* If the loop nest is not well formed, or one of the data references
5863      is not computable, give up without spending time to compute other
5864      dependences.  */
5865   if (!loop
5866       || !find_loop_nest (loop, loop_nest)
5867       || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
5868       || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
5869 				   compute_self_and_read_read_dependences))
5870     res = false;
5871 
5872   if (dump_file && (dump_flags & TDF_STATS))
5873     {
5874       fprintf (dump_file, "Dependence tester statistics:\n");
5875 
5876       fprintf (dump_file, "Number of dependence tests: %d\n",
5877 	       dependence_stats.num_dependence_tests);
5878       fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
5879 	       dependence_stats.num_dependence_dependent);
5880       fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
5881 	       dependence_stats.num_dependence_independent);
5882       fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
5883 	       dependence_stats.num_dependence_undetermined);
5884 
5885       fprintf (dump_file, "Number of subscript tests: %d\n",
5886 	       dependence_stats.num_subscript_tests);
5887       fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
5888 	       dependence_stats.num_subscript_undetermined);
5889       fprintf (dump_file, "Number of same subscript function: %d\n",
5890 	       dependence_stats.num_same_subscript_function);
5891 
5892       fprintf (dump_file, "Number of ziv tests: %d\n",
5893 	       dependence_stats.num_ziv);
5894       fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
5895 	       dependence_stats.num_ziv_dependent);
5896       fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
5897 	       dependence_stats.num_ziv_independent);
5898       fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
5899 	       dependence_stats.num_ziv_unimplemented);
5900 
5901       fprintf (dump_file, "Number of siv tests: %d\n",
5902 	       dependence_stats.num_siv);
5903       fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
5904 	       dependence_stats.num_siv_dependent);
5905       fprintf (dump_file, "Number of siv tests returning independent: %d\n",
5906 	       dependence_stats.num_siv_independent);
5907       fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
5908 	       dependence_stats.num_siv_unimplemented);
5909 
5910       fprintf (dump_file, "Number of miv tests: %d\n",
5911 	       dependence_stats.num_miv);
5912       fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
5913 	       dependence_stats.num_miv_dependent);
5914       fprintf (dump_file, "Number of miv tests returning independent: %d\n",
5915 	       dependence_stats.num_miv_independent);
5916       fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
5917 	       dependence_stats.num_miv_unimplemented);
5918     }
5919 
5920   return res;
5921 }
5922 
5923 /* Free the memory used by a data dependence relation DDR.  */
5924 
5925 void
free_dependence_relation(struct data_dependence_relation * ddr)5926 free_dependence_relation (struct data_dependence_relation *ddr)
5927 {
5928   if (ddr == NULL)
5929     return;
5930 
5931   if (DDR_SUBSCRIPTS (ddr).exists ())
5932     free_subscripts (DDR_SUBSCRIPTS (ddr));
5933   DDR_DIST_VECTS (ddr).release ();
5934   DDR_DIR_VECTS (ddr).release ();
5935 
5936   free (ddr);
5937 }
5938 
5939 /* Free the memory used by the data dependence relations from
5940    DEPENDENCE_RELATIONS.  */
5941 
5942 void
free_dependence_relations(vec<ddr_p> dependence_relations)5943 free_dependence_relations (vec<ddr_p> dependence_relations)
5944 {
5945   unsigned int i;
5946   struct data_dependence_relation *ddr;
5947 
5948   FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
5949     if (ddr)
5950       free_dependence_relation (ddr);
5951 
5952   dependence_relations.release ();
5953 }
5954 
5955 /* Free the memory used by the data references from DATAREFS.  */
5956 
5957 void
free_data_refs(vec<data_reference_p> datarefs)5958 free_data_refs (vec<data_reference_p> datarefs)
5959 {
5960   unsigned int i;
5961   struct data_reference *dr;
5962 
5963   FOR_EACH_VEC_ELT (datarefs, i, dr)
5964     free_data_ref (dr);
5965   datarefs.release ();
5966 }
5967 
5968 /* Common routine implementing both dr_direction_indicator and
5969    dr_zero_step_indicator.  Return USEFUL_MIN if the indicator is known
5970    to be >= USEFUL_MIN and -1 if the indicator is known to be negative.
5971    Return the step as the indicator otherwise.  */
5972 
5973 static tree
dr_step_indicator(struct data_reference * dr,int useful_min)5974 dr_step_indicator (struct data_reference *dr, int useful_min)
5975 {
5976   tree step = DR_STEP (dr);
5977   if (!step)
5978     return NULL_TREE;
5979   STRIP_NOPS (step);
5980   /* Look for cases where the step is scaled by a positive constant
5981      integer, which will often be the access size.  If the multiplication
5982      doesn't change the sign (due to overflow effects) then we can
5983      test the unscaled value instead.  */
5984   if (TREE_CODE (step) == MULT_EXPR
5985       && TREE_CODE (TREE_OPERAND (step, 1)) == INTEGER_CST
5986       && tree_int_cst_sgn (TREE_OPERAND (step, 1)) > 0)
5987     {
5988       tree factor = TREE_OPERAND (step, 1);
5989       step = TREE_OPERAND (step, 0);
5990 
5991       /* Strip widening and truncating conversions as well as nops.  */
5992       if (CONVERT_EXPR_P (step)
5993 	  && INTEGRAL_TYPE_P (TREE_TYPE (TREE_OPERAND (step, 0))))
5994 	step = TREE_OPERAND (step, 0);
5995       tree type = TREE_TYPE (step);
5996 
5997       /* Get the range of step values that would not cause overflow.  */
5998       widest_int minv = (wi::to_widest (TYPE_MIN_VALUE (ssizetype))
5999 			 / wi::to_widest (factor));
6000       widest_int maxv = (wi::to_widest (TYPE_MAX_VALUE (ssizetype))
6001 			 / wi::to_widest (factor));
6002 
6003       /* Get the range of values that the unconverted step actually has.  */
6004       wide_int step_min, step_max;
6005       if (TREE_CODE (step) != SSA_NAME
6006 	  || get_range_info (step, &step_min, &step_max) != VR_RANGE)
6007 	{
6008 	  step_min = wi::to_wide (TYPE_MIN_VALUE (type));
6009 	  step_max = wi::to_wide (TYPE_MAX_VALUE (type));
6010 	}
6011 
6012       /* Check whether the unconverted step has an acceptable range.  */
6013       signop sgn = TYPE_SIGN (type);
6014       if (wi::les_p (minv, widest_int::from (step_min, sgn))
6015 	  && wi::ges_p (maxv, widest_int::from (step_max, sgn)))
6016 	{
6017 	  if (wi::ge_p (step_min, useful_min, sgn))
6018 	    return ssize_int (useful_min);
6019 	  else if (wi::lt_p (step_max, 0, sgn))
6020 	    return ssize_int (-1);
6021 	  else
6022 	    return fold_convert (ssizetype, step);
6023 	}
6024     }
6025   return DR_STEP (dr);
6026 }
6027 
6028 /* Return a value that is negative iff DR has a negative step.  */
6029 
6030 tree
dr_direction_indicator(struct data_reference * dr)6031 dr_direction_indicator (struct data_reference *dr)
6032 {
6033   return dr_step_indicator (dr, 0);
6034 }
6035 
6036 /* Return a value that is zero iff DR has a zero step.  */
6037 
6038 tree
dr_zero_step_indicator(struct data_reference * dr)6039 dr_zero_step_indicator (struct data_reference *dr)
6040 {
6041   return dr_step_indicator (dr, 1);
6042 }
6043 
6044 /* Return true if DR is known to have a nonnegative (but possibly zero)
6045    step.  */
6046 
6047 bool
dr_known_forward_stride_p(struct data_reference * dr)6048 dr_known_forward_stride_p (struct data_reference *dr)
6049 {
6050   tree indicator = dr_direction_indicator (dr);
6051   tree neg_step_val = fold_binary (LT_EXPR, boolean_type_node,
6052 				   fold_convert (ssizetype, indicator),
6053 				   ssize_int (0));
6054   return neg_step_val && integer_zerop (neg_step_val);
6055 }
6056