1 /* Data references and dependences detectors.
2 Copyright (C) 2003-2013 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* This pass walks a given loop structure searching for array
22 references. The information about the array accesses is recorded
23 in DATA_REFERENCE structures.
24
25 The basic test for determining the dependences is:
26 given two access functions chrec1 and chrec2 to a same array, and
27 x and y two vectors from the iteration domain, the same element of
28 the array is accessed twice at iterations x and y if and only if:
29 | chrec1 (x) == chrec2 (y).
30
31 The goals of this analysis are:
32
33 - to determine the independence: the relation between two
34 independent accesses is qualified with the chrec_known (this
35 information allows a loop parallelization),
36
37 - when two data references access the same data, to qualify the
38 dependence relation with classic dependence representations:
39
40 - distance vectors
41 - direction vectors
42 - loop carried level dependence
43 - polyhedron dependence
44 or with the chains of recurrences based representation,
45
46 - to define a knowledge base for storing the data dependence
47 information,
48
49 - to define an interface to access this data.
50
51
52 Definitions:
53
54 - subscript: given two array accesses a subscript is the tuple
55 composed of the access functions for a given dimension. Example:
56 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
57 (f1, g1), (f2, g2), (f3, g3).
58
59 - Diophantine equation: an equation whose coefficients and
60 solutions are integer constants, for example the equation
61 | 3*x + 2*y = 1
62 has an integer solution x = 1 and y = -1.
63
64 References:
65
66 - "Advanced Compilation for High Performance Computing" by Randy
67 Allen and Ken Kennedy.
68 http://citeseer.ist.psu.edu/goff91practical.html
69
70 - "Loop Transformations for Restructuring Compilers - The Foundations"
71 by Utpal Banerjee.
72
73
74 */
75
76 #include "config.h"
77 #include "system.h"
78 #include "coretypes.h"
79 #include "gimple-pretty-print.h"
80 #include "tree-flow.h"
81 #include "cfgloop.h"
82 #include "tree-data-ref.h"
83 #include "tree-scalar-evolution.h"
84 #include "dumpfile.h"
85 #include "langhooks.h"
86 #include "tree-affine.h"
87 #include "params.h"
88
89 static struct datadep_stats
90 {
91 int num_dependence_tests;
92 int num_dependence_dependent;
93 int num_dependence_independent;
94 int num_dependence_undetermined;
95
96 int num_subscript_tests;
97 int num_subscript_undetermined;
98 int num_same_subscript_function;
99
100 int num_ziv;
101 int num_ziv_independent;
102 int num_ziv_dependent;
103 int num_ziv_unimplemented;
104
105 int num_siv;
106 int num_siv_independent;
107 int num_siv_dependent;
108 int num_siv_unimplemented;
109
110 int num_miv;
111 int num_miv_independent;
112 int num_miv_dependent;
113 int num_miv_unimplemented;
114 } dependence_stats;
115
116 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
117 struct data_reference *,
118 struct data_reference *,
119 struct loop *);
120 /* Returns true iff A divides B. */
121
122 static inline bool
tree_fold_divides_p(const_tree a,const_tree b)123 tree_fold_divides_p (const_tree a, const_tree b)
124 {
125 gcc_assert (TREE_CODE (a) == INTEGER_CST);
126 gcc_assert (TREE_CODE (b) == INTEGER_CST);
127 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
128 }
129
130 /* Returns true iff A divides B. */
131
132 static inline bool
int_divides_p(int a,int b)133 int_divides_p (int a, int b)
134 {
135 return ((b % a) == 0);
136 }
137
138
139
140 /* Dump into FILE all the data references from DATAREFS. */
141
142 static void
dump_data_references(FILE * file,vec<data_reference_p> datarefs)143 dump_data_references (FILE *file, vec<data_reference_p> datarefs)
144 {
145 unsigned int i;
146 struct data_reference *dr;
147
148 FOR_EACH_VEC_ELT (datarefs, i, dr)
149 dump_data_reference (file, dr);
150 }
151
152 /* Dump into STDERR all the data references from DATAREFS. */
153
154 DEBUG_FUNCTION void
debug_data_references(vec<data_reference_p> datarefs)155 debug_data_references (vec<data_reference_p> datarefs)
156 {
157 dump_data_references (stderr, datarefs);
158 }
159
160 /* Print to STDERR the data_reference DR. */
161
162 DEBUG_FUNCTION void
debug_data_reference(struct data_reference * dr)163 debug_data_reference (struct data_reference *dr)
164 {
165 dump_data_reference (stderr, dr);
166 }
167
168 /* Dump function for a DATA_REFERENCE structure. */
169
170 void
dump_data_reference(FILE * outf,struct data_reference * dr)171 dump_data_reference (FILE *outf,
172 struct data_reference *dr)
173 {
174 unsigned int i;
175
176 fprintf (outf, "#(Data Ref: \n");
177 fprintf (outf, "# bb: %d \n", gimple_bb (DR_STMT (dr))->index);
178 fprintf (outf, "# stmt: ");
179 print_gimple_stmt (outf, DR_STMT (dr), 0, 0);
180 fprintf (outf, "# ref: ");
181 print_generic_stmt (outf, DR_REF (dr), 0);
182 fprintf (outf, "# base_object: ");
183 print_generic_stmt (outf, DR_BASE_OBJECT (dr), 0);
184
185 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
186 {
187 fprintf (outf, "# Access function %d: ", i);
188 print_generic_stmt (outf, DR_ACCESS_FN (dr, i), 0);
189 }
190 fprintf (outf, "#)\n");
191 }
192
193 /* Dumps the affine function described by FN to the file OUTF. */
194
195 static void
dump_affine_function(FILE * outf,affine_fn fn)196 dump_affine_function (FILE *outf, affine_fn fn)
197 {
198 unsigned i;
199 tree coef;
200
201 print_generic_expr (outf, fn[0], TDF_SLIM);
202 for (i = 1; fn.iterate (i, &coef); i++)
203 {
204 fprintf (outf, " + ");
205 print_generic_expr (outf, coef, TDF_SLIM);
206 fprintf (outf, " * x_%u", i);
207 }
208 }
209
210 /* Dumps the conflict function CF to the file OUTF. */
211
212 static void
dump_conflict_function(FILE * outf,conflict_function * cf)213 dump_conflict_function (FILE *outf, conflict_function *cf)
214 {
215 unsigned i;
216
217 if (cf->n == NO_DEPENDENCE)
218 fprintf (outf, "no dependence");
219 else if (cf->n == NOT_KNOWN)
220 fprintf (outf, "not known");
221 else
222 {
223 for (i = 0; i < cf->n; i++)
224 {
225 if (i != 0)
226 fprintf (outf, " ");
227 fprintf (outf, "[");
228 dump_affine_function (outf, cf->fns[i]);
229 fprintf (outf, "]");
230 }
231 }
232 }
233
234 /* Dump function for a SUBSCRIPT structure. */
235
236 static void
dump_subscript(FILE * outf,struct subscript * subscript)237 dump_subscript (FILE *outf, struct subscript *subscript)
238 {
239 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
240
241 fprintf (outf, "\n (subscript \n");
242 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
243 dump_conflict_function (outf, cf);
244 if (CF_NONTRIVIAL_P (cf))
245 {
246 tree last_iteration = SUB_LAST_CONFLICT (subscript);
247 fprintf (outf, "\n last_conflict: ");
248 print_generic_expr (outf, last_iteration, 0);
249 }
250
251 cf = SUB_CONFLICTS_IN_B (subscript);
252 fprintf (outf, "\n iterations_that_access_an_element_twice_in_B: ");
253 dump_conflict_function (outf, cf);
254 if (CF_NONTRIVIAL_P (cf))
255 {
256 tree last_iteration = SUB_LAST_CONFLICT (subscript);
257 fprintf (outf, "\n last_conflict: ");
258 print_generic_expr (outf, last_iteration, 0);
259 }
260
261 fprintf (outf, "\n (Subscript distance: ");
262 print_generic_expr (outf, SUB_DISTANCE (subscript), 0);
263 fprintf (outf, " ))\n");
264 }
265
266 /* Print the classic direction vector DIRV to OUTF. */
267
268 static void
print_direction_vector(FILE * outf,lambda_vector dirv,int length)269 print_direction_vector (FILE *outf,
270 lambda_vector dirv,
271 int length)
272 {
273 int eq;
274
275 for (eq = 0; eq < length; eq++)
276 {
277 enum data_dependence_direction dir = ((enum data_dependence_direction)
278 dirv[eq]);
279
280 switch (dir)
281 {
282 case dir_positive:
283 fprintf (outf, " +");
284 break;
285 case dir_negative:
286 fprintf (outf, " -");
287 break;
288 case dir_equal:
289 fprintf (outf, " =");
290 break;
291 case dir_positive_or_equal:
292 fprintf (outf, " +=");
293 break;
294 case dir_positive_or_negative:
295 fprintf (outf, " +-");
296 break;
297 case dir_negative_or_equal:
298 fprintf (outf, " -=");
299 break;
300 case dir_star:
301 fprintf (outf, " *");
302 break;
303 default:
304 fprintf (outf, "indep");
305 break;
306 }
307 }
308 fprintf (outf, "\n");
309 }
310
311 /* Print a vector of direction vectors. */
312
313 static void
print_dir_vectors(FILE * outf,vec<lambda_vector> dir_vects,int length)314 print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
315 int length)
316 {
317 unsigned j;
318 lambda_vector v;
319
320 FOR_EACH_VEC_ELT (dir_vects, j, v)
321 print_direction_vector (outf, v, length);
322 }
323
324 /* Print out a vector VEC of length N to OUTFILE. */
325
326 static inline void
print_lambda_vector(FILE * outfile,lambda_vector vector,int n)327 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
328 {
329 int i;
330
331 for (i = 0; i < n; i++)
332 fprintf (outfile, "%3d ", vector[i]);
333 fprintf (outfile, "\n");
334 }
335
336 /* Print a vector of distance vectors. */
337
338 static void
print_dist_vectors(FILE * outf,vec<lambda_vector> dist_vects,int length)339 print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
340 int length)
341 {
342 unsigned j;
343 lambda_vector v;
344
345 FOR_EACH_VEC_ELT (dist_vects, j, v)
346 print_lambda_vector (outf, v, length);
347 }
348
349 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
350
351 static void
dump_data_dependence_relation(FILE * outf,struct data_dependence_relation * ddr)352 dump_data_dependence_relation (FILE *outf,
353 struct data_dependence_relation *ddr)
354 {
355 struct data_reference *dra, *drb;
356
357 fprintf (outf, "(Data Dep: \n");
358
359 if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
360 {
361 if (ddr)
362 {
363 dra = DDR_A (ddr);
364 drb = DDR_B (ddr);
365 if (dra)
366 dump_data_reference (outf, dra);
367 else
368 fprintf (outf, " (nil)\n");
369 if (drb)
370 dump_data_reference (outf, drb);
371 else
372 fprintf (outf, " (nil)\n");
373 }
374 fprintf (outf, " (don't know)\n)\n");
375 return;
376 }
377
378 dra = DDR_A (ddr);
379 drb = DDR_B (ddr);
380 dump_data_reference (outf, dra);
381 dump_data_reference (outf, drb);
382
383 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
384 fprintf (outf, " (no dependence)\n");
385
386 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
387 {
388 unsigned int i;
389 struct loop *loopi;
390
391 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
392 {
393 fprintf (outf, " access_fn_A: ");
394 print_generic_stmt (outf, DR_ACCESS_FN (dra, i), 0);
395 fprintf (outf, " access_fn_B: ");
396 print_generic_stmt (outf, DR_ACCESS_FN (drb, i), 0);
397 dump_subscript (outf, DDR_SUBSCRIPT (ddr, i));
398 }
399
400 fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr));
401 fprintf (outf, " loop nest: (");
402 FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
403 fprintf (outf, "%d ", loopi->num);
404 fprintf (outf, ")\n");
405
406 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
407 {
408 fprintf (outf, " distance_vector: ");
409 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
410 DDR_NB_LOOPS (ddr));
411 }
412
413 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
414 {
415 fprintf (outf, " direction_vector: ");
416 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
417 DDR_NB_LOOPS (ddr));
418 }
419 }
420
421 fprintf (outf, ")\n");
422 }
423
424 /* Debug version. */
425
426 DEBUG_FUNCTION void
debug_data_dependence_relation(struct data_dependence_relation * ddr)427 debug_data_dependence_relation (struct data_dependence_relation *ddr)
428 {
429 dump_data_dependence_relation (stderr, ddr);
430 }
431
432 /* Dump into FILE all the dependence relations from DDRS. */
433
434 void
dump_data_dependence_relations(FILE * file,vec<ddr_p> ddrs)435 dump_data_dependence_relations (FILE *file,
436 vec<ddr_p> ddrs)
437 {
438 unsigned int i;
439 struct data_dependence_relation *ddr;
440
441 FOR_EACH_VEC_ELT (ddrs, i, ddr)
442 dump_data_dependence_relation (file, ddr);
443 }
444
445 /* Dump to STDERR all the dependence relations from DDRS. */
446
447 DEBUG_FUNCTION void
debug_data_dependence_relations(vec<ddr_p> ddrs)448 debug_data_dependence_relations (vec<ddr_p> ddrs)
449 {
450 dump_data_dependence_relations (stderr, ddrs);
451 }
452
453 /* Dumps the distance and direction vectors in FILE. DDRS contains
454 the dependence relations, and VECT_SIZE is the size of the
455 dependence vectors, or in other words the number of loops in the
456 considered nest. */
457
458 static void
dump_dist_dir_vectors(FILE * file,vec<ddr_p> ddrs)459 dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
460 {
461 unsigned int i, j;
462 struct data_dependence_relation *ddr;
463 lambda_vector v;
464
465 FOR_EACH_VEC_ELT (ddrs, i, ddr)
466 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
467 {
468 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), j, v)
469 {
470 fprintf (file, "DISTANCE_V (");
471 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
472 fprintf (file, ")\n");
473 }
474
475 FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), j, v)
476 {
477 fprintf (file, "DIRECTION_V (");
478 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
479 fprintf (file, ")\n");
480 }
481 }
482
483 fprintf (file, "\n\n");
484 }
485
486 /* Dumps the data dependence relations DDRS in FILE. */
487
488 static void
dump_ddrs(FILE * file,vec<ddr_p> ddrs)489 dump_ddrs (FILE *file, vec<ddr_p> ddrs)
490 {
491 unsigned int i;
492 struct data_dependence_relation *ddr;
493
494 FOR_EACH_VEC_ELT (ddrs, i, ddr)
495 dump_data_dependence_relation (file, ddr);
496
497 fprintf (file, "\n\n");
498 }
499
500 DEBUG_FUNCTION void
debug_ddrs(vec<ddr_p> ddrs)501 debug_ddrs (vec<ddr_p> ddrs)
502 {
503 dump_ddrs (stderr, ddrs);
504 }
505
506 /* Helper function for split_constant_offset. Expresses OP0 CODE OP1
507 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
508 constant of type ssizetype, and returns true. If we cannot do this
509 with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
510 is returned. */
511
512 static bool
split_constant_offset_1(tree type,tree op0,enum tree_code code,tree op1,tree * var,tree * off)513 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
514 tree *var, tree *off)
515 {
516 tree var0, var1;
517 tree off0, off1;
518 enum tree_code ocode = code;
519
520 *var = NULL_TREE;
521 *off = NULL_TREE;
522
523 switch (code)
524 {
525 case INTEGER_CST:
526 *var = build_int_cst (type, 0);
527 *off = fold_convert (ssizetype, op0);
528 return true;
529
530 case POINTER_PLUS_EXPR:
531 ocode = PLUS_EXPR;
532 /* FALLTHROUGH */
533 case PLUS_EXPR:
534 case MINUS_EXPR:
535 split_constant_offset (op0, &var0, &off0);
536 split_constant_offset (op1, &var1, &off1);
537 *var = fold_build2 (code, type, var0, var1);
538 *off = size_binop (ocode, off0, off1);
539 return true;
540
541 case MULT_EXPR:
542 if (TREE_CODE (op1) != INTEGER_CST)
543 return false;
544
545 split_constant_offset (op0, &var0, &off0);
546 *var = fold_build2 (MULT_EXPR, type, var0, op1);
547 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
548 return true;
549
550 case ADDR_EXPR:
551 {
552 tree base, poffset;
553 HOST_WIDE_INT pbitsize, pbitpos;
554 enum machine_mode pmode;
555 int punsignedp, pvolatilep;
556
557 op0 = TREE_OPERAND (op0, 0);
558 base = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset,
559 &pmode, &punsignedp, &pvolatilep, false);
560
561 if (pbitpos % BITS_PER_UNIT != 0)
562 return false;
563 base = build_fold_addr_expr (base);
564 off0 = ssize_int (pbitpos / BITS_PER_UNIT);
565
566 if (poffset)
567 {
568 split_constant_offset (poffset, &poffset, &off1);
569 off0 = size_binop (PLUS_EXPR, off0, off1);
570 if (POINTER_TYPE_P (TREE_TYPE (base)))
571 base = fold_build_pointer_plus (base, poffset);
572 else
573 base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
574 fold_convert (TREE_TYPE (base), poffset));
575 }
576
577 var0 = fold_convert (type, base);
578
579 /* If variable length types are involved, punt, otherwise casts
580 might be converted into ARRAY_REFs in gimplify_conversion.
581 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
582 possibly no longer appears in current GIMPLE, might resurface.
583 This perhaps could run
584 if (CONVERT_EXPR_P (var0))
585 {
586 gimplify_conversion (&var0);
587 // Attempt to fill in any within var0 found ARRAY_REF's
588 // element size from corresponding op embedded ARRAY_REF,
589 // if unsuccessful, just punt.
590 } */
591 while (POINTER_TYPE_P (type))
592 type = TREE_TYPE (type);
593 if (int_size_in_bytes (type) < 0)
594 return false;
595
596 *var = var0;
597 *off = off0;
598 return true;
599 }
600
601 case SSA_NAME:
602 {
603 gimple def_stmt = SSA_NAME_DEF_STMT (op0);
604 enum tree_code subcode;
605
606 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
607 return false;
608
609 var0 = gimple_assign_rhs1 (def_stmt);
610 subcode = gimple_assign_rhs_code (def_stmt);
611 var1 = gimple_assign_rhs2 (def_stmt);
612
613 return split_constant_offset_1 (type, var0, subcode, var1, var, off);
614 }
615 CASE_CONVERT:
616 {
617 /* We must not introduce undefined overflow, and we must not change the value.
618 Hence we're okay if the inner type doesn't overflow to start with
619 (pointer or signed), the outer type also is an integer or pointer
620 and the outer precision is at least as large as the inner. */
621 tree itype = TREE_TYPE (op0);
622 if ((POINTER_TYPE_P (itype)
623 || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
624 && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
625 && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
626 {
627 split_constant_offset (op0, &var0, off);
628 *var = fold_convert (type, var0);
629 return true;
630 }
631 return false;
632 }
633
634 default:
635 return false;
636 }
637 }
638
639 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
640 will be ssizetype. */
641
642 void
split_constant_offset(tree exp,tree * var,tree * off)643 split_constant_offset (tree exp, tree *var, tree *off)
644 {
645 tree type = TREE_TYPE (exp), otype, op0, op1, e, o;
646 enum tree_code code;
647
648 *var = exp;
649 *off = ssize_int (0);
650 STRIP_NOPS (exp);
651
652 if (tree_is_chrec (exp)
653 || get_gimple_rhs_class (TREE_CODE (exp)) == GIMPLE_TERNARY_RHS)
654 return;
655
656 otype = TREE_TYPE (exp);
657 code = TREE_CODE (exp);
658 extract_ops_from_tree (exp, &code, &op0, &op1);
659 if (split_constant_offset_1 (otype, op0, code, op1, &e, &o))
660 {
661 *var = fold_convert (type, e);
662 *off = o;
663 }
664 }
665
666 /* Returns the address ADDR of an object in a canonical shape (without nop
667 casts, and with type of pointer to the object). */
668
669 static tree
canonicalize_base_object_address(tree addr)670 canonicalize_base_object_address (tree addr)
671 {
672 tree orig = addr;
673
674 STRIP_NOPS (addr);
675
676 /* The base address may be obtained by casting from integer, in that case
677 keep the cast. */
678 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
679 return orig;
680
681 if (TREE_CODE (addr) != ADDR_EXPR)
682 return addr;
683
684 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
685 }
686
687 /* Analyzes the behavior of the memory reference DR in the innermost loop or
688 basic block that contains it. Returns true if analysis succeed or false
689 otherwise. */
690
691 bool
dr_analyze_innermost(struct data_reference * dr,struct loop * nest)692 dr_analyze_innermost (struct data_reference *dr, struct loop *nest)
693 {
694 gimple stmt = DR_STMT (dr);
695 struct loop *loop = loop_containing_stmt (stmt);
696 tree ref = DR_REF (dr);
697 HOST_WIDE_INT pbitsize, pbitpos;
698 tree base, poffset;
699 enum machine_mode pmode;
700 int punsignedp, pvolatilep;
701 affine_iv base_iv, offset_iv;
702 tree init, dinit, step;
703 bool in_loop = (loop && loop->num);
704
705 if (dump_file && (dump_flags & TDF_DETAILS))
706 fprintf (dump_file, "analyze_innermost: ");
707
708 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset,
709 &pmode, &punsignedp, &pvolatilep, false);
710 gcc_assert (base != NULL_TREE);
711
712 if (pbitpos % BITS_PER_UNIT != 0)
713 {
714 if (dump_file && (dump_flags & TDF_DETAILS))
715 fprintf (dump_file, "failed: bit offset alignment.\n");
716 return false;
717 }
718
719 if (TREE_CODE (base) == MEM_REF)
720 {
721 if (!integer_zerop (TREE_OPERAND (base, 1)))
722 {
723 double_int moff = mem_ref_offset (base);
724 tree mofft = double_int_to_tree (sizetype, moff);
725 if (!poffset)
726 poffset = mofft;
727 else
728 poffset = size_binop (PLUS_EXPR, poffset, mofft);
729 }
730 base = TREE_OPERAND (base, 0);
731 }
732 else
733 base = build_fold_addr_expr (base);
734
735 if (in_loop)
736 {
737 if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv,
738 nest ? true : false))
739 {
740 if (nest)
741 {
742 if (dump_file && (dump_flags & TDF_DETAILS))
743 fprintf (dump_file, "failed: evolution of base is not"
744 " affine.\n");
745 return false;
746 }
747 else
748 {
749 base_iv.base = base;
750 base_iv.step = ssize_int (0);
751 base_iv.no_overflow = true;
752 }
753 }
754 }
755 else
756 {
757 base_iv.base = base;
758 base_iv.step = ssize_int (0);
759 base_iv.no_overflow = true;
760 }
761
762 if (!poffset)
763 {
764 offset_iv.base = ssize_int (0);
765 offset_iv.step = ssize_int (0);
766 }
767 else
768 {
769 if (!in_loop)
770 {
771 offset_iv.base = poffset;
772 offset_iv.step = ssize_int (0);
773 }
774 else if (!simple_iv (loop, loop_containing_stmt (stmt),
775 poffset, &offset_iv,
776 nest ? true : false))
777 {
778 if (nest)
779 {
780 if (dump_file && (dump_flags & TDF_DETAILS))
781 fprintf (dump_file, "failed: evolution of offset is not"
782 " affine.\n");
783 return false;
784 }
785 else
786 {
787 offset_iv.base = poffset;
788 offset_iv.step = ssize_int (0);
789 }
790 }
791 }
792
793 init = ssize_int (pbitpos / BITS_PER_UNIT);
794 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
795 init = size_binop (PLUS_EXPR, init, dinit);
796 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
797 init = size_binop (PLUS_EXPR, init, dinit);
798
799 step = size_binop (PLUS_EXPR,
800 fold_convert (ssizetype, base_iv.step),
801 fold_convert (ssizetype, offset_iv.step));
802
803 DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base);
804
805 DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base);
806 DR_INIT (dr) = init;
807 DR_STEP (dr) = step;
808
809 DR_ALIGNED_TO (dr) = size_int (highest_pow2_factor (offset_iv.base));
810
811 if (dump_file && (dump_flags & TDF_DETAILS))
812 fprintf (dump_file, "success.\n");
813
814 return true;
815 }
816
817 /* Determines the base object and the list of indices of memory reference
818 DR, analyzed in LOOP and instantiated in loop nest NEST. */
819
820 static void
dr_analyze_indices(struct data_reference * dr,loop_p nest,loop_p loop)821 dr_analyze_indices (struct data_reference *dr, loop_p nest, loop_p loop)
822 {
823 vec<tree> access_fns = vNULL;
824 tree ref, op;
825 tree base, off, access_fn;
826 basic_block before_loop;
827
828 /* If analyzing a basic-block there are no indices to analyze
829 and thus no access functions. */
830 if (!nest)
831 {
832 DR_BASE_OBJECT (dr) = DR_REF (dr);
833 DR_ACCESS_FNS (dr).create (0);
834 return;
835 }
836
837 ref = DR_REF (dr);
838 before_loop = block_before_loop (nest);
839
840 /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
841 into a two element array with a constant index. The base is
842 then just the immediate underlying object. */
843 if (TREE_CODE (ref) == REALPART_EXPR)
844 {
845 ref = TREE_OPERAND (ref, 0);
846 access_fns.safe_push (integer_zero_node);
847 }
848 else if (TREE_CODE (ref) == IMAGPART_EXPR)
849 {
850 ref = TREE_OPERAND (ref, 0);
851 access_fns.safe_push (integer_one_node);
852 }
853
854 /* Analyze access functions of dimensions we know to be independent. */
855 while (handled_component_p (ref))
856 {
857 if (TREE_CODE (ref) == ARRAY_REF)
858 {
859 op = TREE_OPERAND (ref, 1);
860 access_fn = analyze_scalar_evolution (loop, op);
861 access_fn = instantiate_scev (before_loop, loop, access_fn);
862 access_fns.safe_push (access_fn);
863 }
864 else if (TREE_CODE (ref) == COMPONENT_REF
865 && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
866 {
867 /* For COMPONENT_REFs of records (but not unions!) use the
868 FIELD_DECL offset as constant access function so we can
869 disambiguate a[i].f1 and a[i].f2. */
870 tree off = component_ref_field_offset (ref);
871 off = size_binop (PLUS_EXPR,
872 size_binop (MULT_EXPR,
873 fold_convert (bitsizetype, off),
874 bitsize_int (BITS_PER_UNIT)),
875 DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
876 access_fns.safe_push (off);
877 }
878 else
879 /* If we have an unhandled component we could not translate
880 to an access function stop analyzing. We have determined
881 our base object in this case. */
882 break;
883
884 ref = TREE_OPERAND (ref, 0);
885 }
886
887 /* If the address operand of a MEM_REF base has an evolution in the
888 analyzed nest, add it as an additional independent access-function. */
889 if (TREE_CODE (ref) == MEM_REF)
890 {
891 op = TREE_OPERAND (ref, 0);
892 access_fn = analyze_scalar_evolution (loop, op);
893 access_fn = instantiate_scev (before_loop, loop, access_fn);
894 if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
895 {
896 tree orig_type;
897 tree memoff = TREE_OPERAND (ref, 1);
898 base = initial_condition (access_fn);
899 orig_type = TREE_TYPE (base);
900 STRIP_USELESS_TYPE_CONVERSION (base);
901 split_constant_offset (base, &base, &off);
902 /* Fold the MEM_REF offset into the evolutions initial
903 value to make more bases comparable. */
904 if (!integer_zerop (memoff))
905 {
906 off = size_binop (PLUS_EXPR, off,
907 fold_convert (ssizetype, memoff));
908 memoff = build_int_cst (TREE_TYPE (memoff), 0);
909 }
910 access_fn = chrec_replace_initial_condition
911 (access_fn, fold_convert (orig_type, off));
912 /* ??? This is still not a suitable base object for
913 dr_may_alias_p - the base object needs to be an
914 access that covers the object as whole. With
915 an evolution in the pointer this cannot be
916 guaranteed.
917 As a band-aid, mark the access so we can special-case
918 it in dr_may_alias_p. */
919 ref = fold_build2_loc (EXPR_LOCATION (ref),
920 MEM_REF, TREE_TYPE (ref),
921 base, memoff);
922 DR_UNCONSTRAINED_BASE (dr) = true;
923 access_fns.safe_push (access_fn);
924 }
925 }
926 else if (DECL_P (ref))
927 {
928 /* Canonicalize DR_BASE_OBJECT to MEM_REF form. */
929 ref = build2 (MEM_REF, TREE_TYPE (ref),
930 build_fold_addr_expr (ref),
931 build_int_cst (reference_alias_ptr_type (ref), 0));
932 }
933
934 DR_BASE_OBJECT (dr) = ref;
935 DR_ACCESS_FNS (dr) = access_fns;
936 }
937
938 /* Extracts the alias analysis information from the memory reference DR. */
939
940 static void
dr_analyze_alias(struct data_reference * dr)941 dr_analyze_alias (struct data_reference *dr)
942 {
943 tree ref = DR_REF (dr);
944 tree base = get_base_address (ref), addr;
945
946 if (INDIRECT_REF_P (base)
947 || TREE_CODE (base) == MEM_REF)
948 {
949 addr = TREE_OPERAND (base, 0);
950 if (TREE_CODE (addr) == SSA_NAME)
951 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
952 }
953 }
954
955 /* Frees data reference DR. */
956
957 void
free_data_ref(data_reference_p dr)958 free_data_ref (data_reference_p dr)
959 {
960 DR_ACCESS_FNS (dr).release ();
961 free (dr);
962 }
963
964 /* Analyzes memory reference MEMREF accessed in STMT. The reference
965 is read if IS_READ is true, write otherwise. Returns the
966 data_reference description of MEMREF. NEST is the outermost loop
967 in which the reference should be instantiated, LOOP is the loop in
968 which the data reference should be analyzed. */
969
970 struct data_reference *
create_data_ref(loop_p nest,loop_p loop,tree memref,gimple stmt,bool is_read)971 create_data_ref (loop_p nest, loop_p loop, tree memref, gimple stmt,
972 bool is_read)
973 {
974 struct data_reference *dr;
975
976 if (dump_file && (dump_flags & TDF_DETAILS))
977 {
978 fprintf (dump_file, "Creating dr for ");
979 print_generic_expr (dump_file, memref, TDF_SLIM);
980 fprintf (dump_file, "\n");
981 }
982
983 dr = XCNEW (struct data_reference);
984 DR_STMT (dr) = stmt;
985 DR_REF (dr) = memref;
986 DR_IS_READ (dr) = is_read;
987
988 dr_analyze_innermost (dr, nest);
989 dr_analyze_indices (dr, nest, loop);
990 dr_analyze_alias (dr);
991
992 if (dump_file && (dump_flags & TDF_DETAILS))
993 {
994 unsigned i;
995 fprintf (dump_file, "\tbase_address: ");
996 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
997 fprintf (dump_file, "\n\toffset from base address: ");
998 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
999 fprintf (dump_file, "\n\tconstant offset from base address: ");
1000 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1001 fprintf (dump_file, "\n\tstep: ");
1002 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1003 fprintf (dump_file, "\n\taligned to: ");
1004 print_generic_expr (dump_file, DR_ALIGNED_TO (dr), TDF_SLIM);
1005 fprintf (dump_file, "\n\tbase_object: ");
1006 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1007 fprintf (dump_file, "\n");
1008 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1009 {
1010 fprintf (dump_file, "\tAccess function %d: ", i);
1011 print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1012 }
1013 }
1014
1015 return dr;
1016 }
1017
1018 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
1019 expressions. */
1020 static bool
dr_equal_offsets_p1(tree offset1,tree offset2)1021 dr_equal_offsets_p1 (tree offset1, tree offset2)
1022 {
1023 bool res;
1024
1025 STRIP_NOPS (offset1);
1026 STRIP_NOPS (offset2);
1027
1028 if (offset1 == offset2)
1029 return true;
1030
1031 if (TREE_CODE (offset1) != TREE_CODE (offset2)
1032 || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
1033 return false;
1034
1035 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
1036 TREE_OPERAND (offset2, 0));
1037
1038 if (!res || !BINARY_CLASS_P (offset1))
1039 return res;
1040
1041 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
1042 TREE_OPERAND (offset2, 1));
1043
1044 return res;
1045 }
1046
1047 /* Check if DRA and DRB have equal offsets. */
1048 bool
dr_equal_offsets_p(struct data_reference * dra,struct data_reference * drb)1049 dr_equal_offsets_p (struct data_reference *dra,
1050 struct data_reference *drb)
1051 {
1052 tree offset1, offset2;
1053
1054 offset1 = DR_OFFSET (dra);
1055 offset2 = DR_OFFSET (drb);
1056
1057 return dr_equal_offsets_p1 (offset1, offset2);
1058 }
1059
1060 /* Returns true if FNA == FNB. */
1061
1062 static bool
affine_function_equal_p(affine_fn fna,affine_fn fnb)1063 affine_function_equal_p (affine_fn fna, affine_fn fnb)
1064 {
1065 unsigned i, n = fna.length ();
1066
1067 if (n != fnb.length ())
1068 return false;
1069
1070 for (i = 0; i < n; i++)
1071 if (!operand_equal_p (fna[i], fnb[i], 0))
1072 return false;
1073
1074 return true;
1075 }
1076
1077 /* If all the functions in CF are the same, returns one of them,
1078 otherwise returns NULL. */
1079
1080 static affine_fn
common_affine_function(conflict_function * cf)1081 common_affine_function (conflict_function *cf)
1082 {
1083 unsigned i;
1084 affine_fn comm;
1085
1086 if (!CF_NONTRIVIAL_P (cf))
1087 return affine_fn();
1088
1089 comm = cf->fns[0];
1090
1091 for (i = 1; i < cf->n; i++)
1092 if (!affine_function_equal_p (comm, cf->fns[i]))
1093 return affine_fn();
1094
1095 return comm;
1096 }
1097
1098 /* Returns the base of the affine function FN. */
1099
1100 static tree
affine_function_base(affine_fn fn)1101 affine_function_base (affine_fn fn)
1102 {
1103 return fn[0];
1104 }
1105
1106 /* Returns true if FN is a constant. */
1107
1108 static bool
affine_function_constant_p(affine_fn fn)1109 affine_function_constant_p (affine_fn fn)
1110 {
1111 unsigned i;
1112 tree coef;
1113
1114 for (i = 1; fn.iterate (i, &coef); i++)
1115 if (!integer_zerop (coef))
1116 return false;
1117
1118 return true;
1119 }
1120
1121 /* Returns true if FN is the zero constant function. */
1122
1123 static bool
affine_function_zero_p(affine_fn fn)1124 affine_function_zero_p (affine_fn fn)
1125 {
1126 return (integer_zerop (affine_function_base (fn))
1127 && affine_function_constant_p (fn));
1128 }
1129
1130 /* Returns a signed integer type with the largest precision from TA
1131 and TB. */
1132
1133 static tree
signed_type_for_types(tree ta,tree tb)1134 signed_type_for_types (tree ta, tree tb)
1135 {
1136 if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
1137 return signed_type_for (ta);
1138 else
1139 return signed_type_for (tb);
1140 }
1141
1142 /* Applies operation OP on affine functions FNA and FNB, and returns the
1143 result. */
1144
1145 static affine_fn
affine_fn_op(enum tree_code op,affine_fn fna,affine_fn fnb)1146 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
1147 {
1148 unsigned i, n, m;
1149 affine_fn ret;
1150 tree coef;
1151
1152 if (fnb.length () > fna.length ())
1153 {
1154 n = fna.length ();
1155 m = fnb.length ();
1156 }
1157 else
1158 {
1159 n = fnb.length ();
1160 m = fna.length ();
1161 }
1162
1163 ret.create (m);
1164 for (i = 0; i < n; i++)
1165 {
1166 tree type = signed_type_for_types (TREE_TYPE (fna[i]),
1167 TREE_TYPE (fnb[i]));
1168 ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
1169 }
1170
1171 for (; fna.iterate (i, &coef); i++)
1172 ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1173 coef, integer_zero_node));
1174 for (; fnb.iterate (i, &coef); i++)
1175 ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1176 integer_zero_node, coef));
1177
1178 return ret;
1179 }
1180
1181 /* Returns the sum of affine functions FNA and FNB. */
1182
1183 static affine_fn
affine_fn_plus(affine_fn fna,affine_fn fnb)1184 affine_fn_plus (affine_fn fna, affine_fn fnb)
1185 {
1186 return affine_fn_op (PLUS_EXPR, fna, fnb);
1187 }
1188
1189 /* Returns the difference of affine functions FNA and FNB. */
1190
1191 static affine_fn
affine_fn_minus(affine_fn fna,affine_fn fnb)1192 affine_fn_minus (affine_fn fna, affine_fn fnb)
1193 {
1194 return affine_fn_op (MINUS_EXPR, fna, fnb);
1195 }
1196
1197 /* Frees affine function FN. */
1198
1199 static void
affine_fn_free(affine_fn fn)1200 affine_fn_free (affine_fn fn)
1201 {
1202 fn.release ();
1203 }
1204
1205 /* Determine for each subscript in the data dependence relation DDR
1206 the distance. */
1207
1208 static void
compute_subscript_distance(struct data_dependence_relation * ddr)1209 compute_subscript_distance (struct data_dependence_relation *ddr)
1210 {
1211 conflict_function *cf_a, *cf_b;
1212 affine_fn fn_a, fn_b, diff;
1213
1214 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1215 {
1216 unsigned int i;
1217
1218 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
1219 {
1220 struct subscript *subscript;
1221
1222 subscript = DDR_SUBSCRIPT (ddr, i);
1223 cf_a = SUB_CONFLICTS_IN_A (subscript);
1224 cf_b = SUB_CONFLICTS_IN_B (subscript);
1225
1226 fn_a = common_affine_function (cf_a);
1227 fn_b = common_affine_function (cf_b);
1228 if (!fn_a.exists () || !fn_b.exists ())
1229 {
1230 SUB_DISTANCE (subscript) = chrec_dont_know;
1231 return;
1232 }
1233 diff = affine_fn_minus (fn_a, fn_b);
1234
1235 if (affine_function_constant_p (diff))
1236 SUB_DISTANCE (subscript) = affine_function_base (diff);
1237 else
1238 SUB_DISTANCE (subscript) = chrec_dont_know;
1239
1240 affine_fn_free (diff);
1241 }
1242 }
1243 }
1244
1245 /* Returns the conflict function for "unknown". */
1246
1247 static conflict_function *
conflict_fn_not_known(void)1248 conflict_fn_not_known (void)
1249 {
1250 conflict_function *fn = XCNEW (conflict_function);
1251 fn->n = NOT_KNOWN;
1252
1253 return fn;
1254 }
1255
1256 /* Returns the conflict function for "independent". */
1257
1258 static conflict_function *
conflict_fn_no_dependence(void)1259 conflict_fn_no_dependence (void)
1260 {
1261 conflict_function *fn = XCNEW (conflict_function);
1262 fn->n = NO_DEPENDENCE;
1263
1264 return fn;
1265 }
1266
1267 /* Returns true if the address of OBJ is invariant in LOOP. */
1268
1269 static bool
object_address_invariant_in_loop_p(const struct loop * loop,const_tree obj)1270 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
1271 {
1272 while (handled_component_p (obj))
1273 {
1274 if (TREE_CODE (obj) == ARRAY_REF)
1275 {
1276 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1277 need to check the stride and the lower bound of the reference. */
1278 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1279 loop->num)
1280 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
1281 loop->num))
1282 return false;
1283 }
1284 else if (TREE_CODE (obj) == COMPONENT_REF)
1285 {
1286 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1287 loop->num))
1288 return false;
1289 }
1290 obj = TREE_OPERAND (obj, 0);
1291 }
1292
1293 if (!INDIRECT_REF_P (obj)
1294 && TREE_CODE (obj) != MEM_REF)
1295 return true;
1296
1297 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
1298 loop->num);
1299 }
1300
1301 /* Returns false if we can prove that data references A and B do not alias,
1302 true otherwise. If LOOP_NEST is false no cross-iteration aliases are
1303 considered. */
1304
1305 bool
dr_may_alias_p(const struct data_reference * a,const struct data_reference * b,bool loop_nest)1306 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
1307 bool loop_nest)
1308 {
1309 tree addr_a = DR_BASE_OBJECT (a);
1310 tree addr_b = DR_BASE_OBJECT (b);
1311
1312 /* If we are not processing a loop nest but scalar code we
1313 do not need to care about possible cross-iteration dependences
1314 and thus can process the full original reference. Do so,
1315 similar to how loop invariant motion applies extra offset-based
1316 disambiguation. */
1317 if (!loop_nest)
1318 {
1319 aff_tree off1, off2;
1320 double_int size1, size2;
1321 get_inner_reference_aff (DR_REF (a), &off1, &size1);
1322 get_inner_reference_aff (DR_REF (b), &off2, &size2);
1323 aff_combination_scale (&off1, double_int_minus_one);
1324 aff_combination_add (&off2, &off1);
1325 if (aff_comb_cannot_overlap_p (&off2, size1, size2))
1326 return false;
1327 }
1328
1329 /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
1330 do not know the size of the base-object. So we cannot do any
1331 offset/overlap based analysis but have to rely on points-to
1332 information only. */
1333 if (TREE_CODE (addr_a) == MEM_REF
1334 && (DR_UNCONSTRAINED_BASE (a)
1335 || TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME))
1336 {
1337 /* For true dependences we can apply TBAA. */
1338 if (flag_strict_aliasing
1339 && DR_IS_WRITE (a) && DR_IS_READ (b)
1340 && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
1341 get_alias_set (DR_REF (b))))
1342 return false;
1343 if (TREE_CODE (addr_b) == MEM_REF)
1344 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
1345 TREE_OPERAND (addr_b, 0));
1346 else
1347 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
1348 build_fold_addr_expr (addr_b));
1349 }
1350 else if (TREE_CODE (addr_b) == MEM_REF
1351 && (DR_UNCONSTRAINED_BASE (b)
1352 || TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME))
1353 {
1354 /* For true dependences we can apply TBAA. */
1355 if (flag_strict_aliasing
1356 && DR_IS_WRITE (a) && DR_IS_READ (b)
1357 && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
1358 get_alias_set (DR_REF (b))))
1359 return false;
1360 if (TREE_CODE (addr_a) == MEM_REF)
1361 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
1362 TREE_OPERAND (addr_b, 0));
1363 else
1364 return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
1365 TREE_OPERAND (addr_b, 0));
1366 }
1367
1368 /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
1369 that is being subsetted in the loop nest. */
1370 if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
1371 return refs_output_dependent_p (addr_a, addr_b);
1372 else if (DR_IS_READ (a) && DR_IS_WRITE (b))
1373 return refs_anti_dependent_p (addr_a, addr_b);
1374 return refs_may_alias_p (addr_a, addr_b);
1375 }
1376
1377 /* Initialize a data dependence relation between data accesses A and
1378 B. NB_LOOPS is the number of loops surrounding the references: the
1379 size of the classic distance/direction vectors. */
1380
1381 struct data_dependence_relation *
initialize_data_dependence_relation(struct data_reference * a,struct data_reference * b,vec<loop_p> loop_nest)1382 initialize_data_dependence_relation (struct data_reference *a,
1383 struct data_reference *b,
1384 vec<loop_p> loop_nest)
1385 {
1386 struct data_dependence_relation *res;
1387 unsigned int i;
1388
1389 res = XNEW (struct data_dependence_relation);
1390 DDR_A (res) = a;
1391 DDR_B (res) = b;
1392 DDR_LOOP_NEST (res).create (0);
1393 DDR_REVERSED_P (res) = false;
1394 DDR_SUBSCRIPTS (res).create (0);
1395 DDR_DIR_VECTS (res).create (0);
1396 DDR_DIST_VECTS (res).create (0);
1397
1398 if (a == NULL || b == NULL)
1399 {
1400 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1401 return res;
1402 }
1403
1404 /* If the data references do not alias, then they are independent. */
1405 if (!dr_may_alias_p (a, b, loop_nest.exists ()))
1406 {
1407 DDR_ARE_DEPENDENT (res) = chrec_known;
1408 return res;
1409 }
1410
1411 /* The case where the references are exactly the same. */
1412 if (operand_equal_p (DR_REF (a), DR_REF (b), 0))
1413 {
1414 if (loop_nest.exists ()
1415 && !object_address_invariant_in_loop_p (loop_nest[0],
1416 DR_BASE_OBJECT (a)))
1417 {
1418 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1419 return res;
1420 }
1421 DDR_AFFINE_P (res) = true;
1422 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1423 DDR_SUBSCRIPTS (res).create (DR_NUM_DIMENSIONS (a));
1424 DDR_LOOP_NEST (res) = loop_nest;
1425 DDR_INNER_LOOP (res) = 0;
1426 DDR_SELF_REFERENCE (res) = true;
1427 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1428 {
1429 struct subscript *subscript;
1430
1431 subscript = XNEW (struct subscript);
1432 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1433 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1434 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1435 SUB_DISTANCE (subscript) = chrec_dont_know;
1436 DDR_SUBSCRIPTS (res).safe_push (subscript);
1437 }
1438 return res;
1439 }
1440
1441 /* If the references do not access the same object, we do not know
1442 whether they alias or not. */
1443 if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0))
1444 {
1445 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1446 return res;
1447 }
1448
1449 /* If the base of the object is not invariant in the loop nest, we cannot
1450 analyze it. TODO -- in fact, it would suffice to record that there may
1451 be arbitrary dependences in the loops where the base object varies. */
1452 if (loop_nest.exists ()
1453 && !object_address_invariant_in_loop_p (loop_nest[0],
1454 DR_BASE_OBJECT (a)))
1455 {
1456 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1457 return res;
1458 }
1459
1460 /* If the number of dimensions of the access to not agree we can have
1461 a pointer access to a component of the array element type and an
1462 array access while the base-objects are still the same. Punt. */
1463 if (DR_NUM_DIMENSIONS (a) != DR_NUM_DIMENSIONS (b))
1464 {
1465 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1466 return res;
1467 }
1468
1469 DDR_AFFINE_P (res) = true;
1470 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1471 DDR_SUBSCRIPTS (res).create (DR_NUM_DIMENSIONS (a));
1472 DDR_LOOP_NEST (res) = loop_nest;
1473 DDR_INNER_LOOP (res) = 0;
1474 DDR_SELF_REFERENCE (res) = false;
1475
1476 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1477 {
1478 struct subscript *subscript;
1479
1480 subscript = XNEW (struct subscript);
1481 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1482 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1483 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1484 SUB_DISTANCE (subscript) = chrec_dont_know;
1485 DDR_SUBSCRIPTS (res).safe_push (subscript);
1486 }
1487
1488 return res;
1489 }
1490
1491 /* Frees memory used by the conflict function F. */
1492
1493 static void
free_conflict_function(conflict_function * f)1494 free_conflict_function (conflict_function *f)
1495 {
1496 unsigned i;
1497
1498 if (CF_NONTRIVIAL_P (f))
1499 {
1500 for (i = 0; i < f->n; i++)
1501 affine_fn_free (f->fns[i]);
1502 }
1503 free (f);
1504 }
1505
1506 /* Frees memory used by SUBSCRIPTS. */
1507
1508 static void
free_subscripts(vec<subscript_p> subscripts)1509 free_subscripts (vec<subscript_p> subscripts)
1510 {
1511 unsigned i;
1512 subscript_p s;
1513
1514 FOR_EACH_VEC_ELT (subscripts, i, s)
1515 {
1516 free_conflict_function (s->conflicting_iterations_in_a);
1517 free_conflict_function (s->conflicting_iterations_in_b);
1518 free (s);
1519 }
1520 subscripts.release ();
1521 }
1522
1523 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1524 description. */
1525
1526 static inline void
finalize_ddr_dependent(struct data_dependence_relation * ddr,tree chrec)1527 finalize_ddr_dependent (struct data_dependence_relation *ddr,
1528 tree chrec)
1529 {
1530 DDR_ARE_DEPENDENT (ddr) = chrec;
1531 free_subscripts (DDR_SUBSCRIPTS (ddr));
1532 DDR_SUBSCRIPTS (ddr).create (0);
1533 }
1534
1535 /* The dependence relation DDR cannot be represented by a distance
1536 vector. */
1537
1538 static inline void
non_affine_dependence_relation(struct data_dependence_relation * ddr)1539 non_affine_dependence_relation (struct data_dependence_relation *ddr)
1540 {
1541 if (dump_file && (dump_flags & TDF_DETAILS))
1542 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
1543
1544 DDR_AFFINE_P (ddr) = false;
1545 }
1546
1547
1548
1549 /* This section contains the classic Banerjee tests. */
1550
1551 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1552 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
1553
1554 static inline bool
ziv_subscript_p(const_tree chrec_a,const_tree chrec_b)1555 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1556 {
1557 return (evolution_function_is_constant_p (chrec_a)
1558 && evolution_function_is_constant_p (chrec_b));
1559 }
1560
1561 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
1562 variable, i.e., if the SIV (Single Index Variable) test is true. */
1563
1564 static bool
siv_subscript_p(const_tree chrec_a,const_tree chrec_b)1565 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1566 {
1567 if ((evolution_function_is_constant_p (chrec_a)
1568 && evolution_function_is_univariate_p (chrec_b))
1569 || (evolution_function_is_constant_p (chrec_b)
1570 && evolution_function_is_univariate_p (chrec_a)))
1571 return true;
1572
1573 if (evolution_function_is_univariate_p (chrec_a)
1574 && evolution_function_is_univariate_p (chrec_b))
1575 {
1576 switch (TREE_CODE (chrec_a))
1577 {
1578 case POLYNOMIAL_CHREC:
1579 switch (TREE_CODE (chrec_b))
1580 {
1581 case POLYNOMIAL_CHREC:
1582 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
1583 return false;
1584
1585 default:
1586 return true;
1587 }
1588
1589 default:
1590 return true;
1591 }
1592 }
1593
1594 return false;
1595 }
1596
1597 /* Creates a conflict function with N dimensions. The affine functions
1598 in each dimension follow. */
1599
1600 static conflict_function *
conflict_fn(unsigned n,...)1601 conflict_fn (unsigned n, ...)
1602 {
1603 unsigned i;
1604 conflict_function *ret = XCNEW (conflict_function);
1605 va_list ap;
1606
1607 gcc_assert (0 < n && n <= MAX_DIM);
1608 va_start(ap, n);
1609
1610 ret->n = n;
1611 for (i = 0; i < n; i++)
1612 ret->fns[i] = va_arg (ap, affine_fn);
1613 va_end(ap);
1614
1615 return ret;
1616 }
1617
1618 /* Returns constant affine function with value CST. */
1619
1620 static affine_fn
affine_fn_cst(tree cst)1621 affine_fn_cst (tree cst)
1622 {
1623 affine_fn fn;
1624 fn.create (1);
1625 fn.quick_push (cst);
1626 return fn;
1627 }
1628
1629 /* Returns affine function with single variable, CST + COEF * x_DIM. */
1630
1631 static affine_fn
affine_fn_univar(tree cst,unsigned dim,tree coef)1632 affine_fn_univar (tree cst, unsigned dim, tree coef)
1633 {
1634 affine_fn fn;
1635 fn.create (dim + 1);
1636 unsigned i;
1637
1638 gcc_assert (dim > 0);
1639 fn.quick_push (cst);
1640 for (i = 1; i < dim; i++)
1641 fn.quick_push (integer_zero_node);
1642 fn.quick_push (coef);
1643 return fn;
1644 }
1645
1646 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
1647 *OVERLAPS_B are initialized to the functions that describe the
1648 relation between the elements accessed twice by CHREC_A and
1649 CHREC_B. For k >= 0, the following property is verified:
1650
1651 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1652
1653 static void
analyze_ziv_subscript(tree chrec_a,tree chrec_b,conflict_function ** overlaps_a,conflict_function ** overlaps_b,tree * last_conflicts)1654 analyze_ziv_subscript (tree chrec_a,
1655 tree chrec_b,
1656 conflict_function **overlaps_a,
1657 conflict_function **overlaps_b,
1658 tree *last_conflicts)
1659 {
1660 tree type, difference;
1661 dependence_stats.num_ziv++;
1662
1663 if (dump_file && (dump_flags & TDF_DETAILS))
1664 fprintf (dump_file, "(analyze_ziv_subscript \n");
1665
1666 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1667 chrec_a = chrec_convert (type, chrec_a, NULL);
1668 chrec_b = chrec_convert (type, chrec_b, NULL);
1669 difference = chrec_fold_minus (type, chrec_a, chrec_b);
1670
1671 switch (TREE_CODE (difference))
1672 {
1673 case INTEGER_CST:
1674 if (integer_zerop (difference))
1675 {
1676 /* The difference is equal to zero: the accessed index
1677 overlaps for each iteration in the loop. */
1678 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1679 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1680 *last_conflicts = chrec_dont_know;
1681 dependence_stats.num_ziv_dependent++;
1682 }
1683 else
1684 {
1685 /* The accesses do not overlap. */
1686 *overlaps_a = conflict_fn_no_dependence ();
1687 *overlaps_b = conflict_fn_no_dependence ();
1688 *last_conflicts = integer_zero_node;
1689 dependence_stats.num_ziv_independent++;
1690 }
1691 break;
1692
1693 default:
1694 /* We're not sure whether the indexes overlap. For the moment,
1695 conservatively answer "don't know". */
1696 if (dump_file && (dump_flags & TDF_DETAILS))
1697 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
1698
1699 *overlaps_a = conflict_fn_not_known ();
1700 *overlaps_b = conflict_fn_not_known ();
1701 *last_conflicts = chrec_dont_know;
1702 dependence_stats.num_ziv_unimplemented++;
1703 break;
1704 }
1705
1706 if (dump_file && (dump_flags & TDF_DETAILS))
1707 fprintf (dump_file, ")\n");
1708 }
1709
1710 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
1711 and only if it fits to the int type. If this is not the case, or the
1712 bound on the number of iterations of LOOP could not be derived, returns
1713 chrec_dont_know. */
1714
1715 static tree
max_stmt_executions_tree(struct loop * loop)1716 max_stmt_executions_tree (struct loop *loop)
1717 {
1718 double_int nit;
1719
1720 if (!max_stmt_executions (loop, &nit))
1721 return chrec_dont_know;
1722
1723 if (!double_int_fits_to_tree_p (unsigned_type_node, nit))
1724 return chrec_dont_know;
1725
1726 return double_int_to_tree (unsigned_type_node, nit);
1727 }
1728
1729 /* Determine whether the CHREC is always positive/negative. If the expression
1730 cannot be statically analyzed, return false, otherwise set the answer into
1731 VALUE. */
1732
1733 static bool
chrec_is_positive(tree chrec,bool * value)1734 chrec_is_positive (tree chrec, bool *value)
1735 {
1736 bool value0, value1, value2;
1737 tree end_value, nb_iter;
1738
1739 switch (TREE_CODE (chrec))
1740 {
1741 case POLYNOMIAL_CHREC:
1742 if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
1743 || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
1744 return false;
1745
1746 /* FIXME -- overflows. */
1747 if (value0 == value1)
1748 {
1749 *value = value0;
1750 return true;
1751 }
1752
1753 /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
1754 and the proof consists in showing that the sign never
1755 changes during the execution of the loop, from 0 to
1756 loop->nb_iterations. */
1757 if (!evolution_function_is_affine_p (chrec))
1758 return false;
1759
1760 nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
1761 if (chrec_contains_undetermined (nb_iter))
1762 return false;
1763
1764 #if 0
1765 /* TODO -- If the test is after the exit, we may decrease the number of
1766 iterations by one. */
1767 if (after_exit)
1768 nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
1769 #endif
1770
1771 end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
1772
1773 if (!chrec_is_positive (end_value, &value2))
1774 return false;
1775
1776 *value = value0;
1777 return value0 == value1;
1778
1779 case INTEGER_CST:
1780 switch (tree_int_cst_sgn (chrec))
1781 {
1782 case -1:
1783 *value = false;
1784 break;
1785 case 1:
1786 *value = true;
1787 break;
1788 default:
1789 return false;
1790 }
1791 return true;
1792
1793 default:
1794 return false;
1795 }
1796 }
1797
1798
1799 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1800 constant, and CHREC_B is an affine function. *OVERLAPS_A and
1801 *OVERLAPS_B are initialized to the functions that describe the
1802 relation between the elements accessed twice by CHREC_A and
1803 CHREC_B. For k >= 0, the following property is verified:
1804
1805 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1806
1807 static void
analyze_siv_subscript_cst_affine(tree chrec_a,tree chrec_b,conflict_function ** overlaps_a,conflict_function ** overlaps_b,tree * last_conflicts)1808 analyze_siv_subscript_cst_affine (tree chrec_a,
1809 tree chrec_b,
1810 conflict_function **overlaps_a,
1811 conflict_function **overlaps_b,
1812 tree *last_conflicts)
1813 {
1814 bool value0, value1, value2;
1815 tree type, difference, tmp;
1816
1817 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1818 chrec_a = chrec_convert (type, chrec_a, NULL);
1819 chrec_b = chrec_convert (type, chrec_b, NULL);
1820 difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
1821
1822 /* Special case overlap in the first iteration. */
1823 if (integer_zerop (difference))
1824 {
1825 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1826 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1827 *last_conflicts = integer_one_node;
1828 return;
1829 }
1830
1831 if (!chrec_is_positive (initial_condition (difference), &value0))
1832 {
1833 if (dump_file && (dump_flags & TDF_DETAILS))
1834 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
1835
1836 dependence_stats.num_siv_unimplemented++;
1837 *overlaps_a = conflict_fn_not_known ();
1838 *overlaps_b = conflict_fn_not_known ();
1839 *last_conflicts = chrec_dont_know;
1840 return;
1841 }
1842 else
1843 {
1844 if (value0 == false)
1845 {
1846 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
1847 {
1848 if (dump_file && (dump_flags & TDF_DETAILS))
1849 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1850
1851 *overlaps_a = conflict_fn_not_known ();
1852 *overlaps_b = conflict_fn_not_known ();
1853 *last_conflicts = chrec_dont_know;
1854 dependence_stats.num_siv_unimplemented++;
1855 return;
1856 }
1857 else
1858 {
1859 if (value1 == true)
1860 {
1861 /* Example:
1862 chrec_a = 12
1863 chrec_b = {10, +, 1}
1864 */
1865
1866 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1867 {
1868 HOST_WIDE_INT numiter;
1869 struct loop *loop = get_chrec_loop (chrec_b);
1870
1871 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1872 tmp = fold_build2 (EXACT_DIV_EXPR, type,
1873 fold_build1 (ABS_EXPR, type, difference),
1874 CHREC_RIGHT (chrec_b));
1875 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1876 *last_conflicts = integer_one_node;
1877
1878
1879 /* Perform weak-zero siv test to see if overlap is
1880 outside the loop bounds. */
1881 numiter = max_stmt_executions_int (loop);
1882
1883 if (numiter >= 0
1884 && compare_tree_int (tmp, numiter) > 0)
1885 {
1886 free_conflict_function (*overlaps_a);
1887 free_conflict_function (*overlaps_b);
1888 *overlaps_a = conflict_fn_no_dependence ();
1889 *overlaps_b = conflict_fn_no_dependence ();
1890 *last_conflicts = integer_zero_node;
1891 dependence_stats.num_siv_independent++;
1892 return;
1893 }
1894 dependence_stats.num_siv_dependent++;
1895 return;
1896 }
1897
1898 /* When the step does not divide the difference, there are
1899 no overlaps. */
1900 else
1901 {
1902 *overlaps_a = conflict_fn_no_dependence ();
1903 *overlaps_b = conflict_fn_no_dependence ();
1904 *last_conflicts = integer_zero_node;
1905 dependence_stats.num_siv_independent++;
1906 return;
1907 }
1908 }
1909
1910 else
1911 {
1912 /* Example:
1913 chrec_a = 12
1914 chrec_b = {10, +, -1}
1915
1916 In this case, chrec_a will not overlap with chrec_b. */
1917 *overlaps_a = conflict_fn_no_dependence ();
1918 *overlaps_b = conflict_fn_no_dependence ();
1919 *last_conflicts = integer_zero_node;
1920 dependence_stats.num_siv_independent++;
1921 return;
1922 }
1923 }
1924 }
1925 else
1926 {
1927 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
1928 {
1929 if (dump_file && (dump_flags & TDF_DETAILS))
1930 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1931
1932 *overlaps_a = conflict_fn_not_known ();
1933 *overlaps_b = conflict_fn_not_known ();
1934 *last_conflicts = chrec_dont_know;
1935 dependence_stats.num_siv_unimplemented++;
1936 return;
1937 }
1938 else
1939 {
1940 if (value2 == false)
1941 {
1942 /* Example:
1943 chrec_a = 3
1944 chrec_b = {10, +, -1}
1945 */
1946 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1947 {
1948 HOST_WIDE_INT numiter;
1949 struct loop *loop = get_chrec_loop (chrec_b);
1950
1951 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1952 tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
1953 CHREC_RIGHT (chrec_b));
1954 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1955 *last_conflicts = integer_one_node;
1956
1957 /* Perform weak-zero siv test to see if overlap is
1958 outside the loop bounds. */
1959 numiter = max_stmt_executions_int (loop);
1960
1961 if (numiter >= 0
1962 && compare_tree_int (tmp, numiter) > 0)
1963 {
1964 free_conflict_function (*overlaps_a);
1965 free_conflict_function (*overlaps_b);
1966 *overlaps_a = conflict_fn_no_dependence ();
1967 *overlaps_b = conflict_fn_no_dependence ();
1968 *last_conflicts = integer_zero_node;
1969 dependence_stats.num_siv_independent++;
1970 return;
1971 }
1972 dependence_stats.num_siv_dependent++;
1973 return;
1974 }
1975
1976 /* When the step does not divide the difference, there
1977 are no overlaps. */
1978 else
1979 {
1980 *overlaps_a = conflict_fn_no_dependence ();
1981 *overlaps_b = conflict_fn_no_dependence ();
1982 *last_conflicts = integer_zero_node;
1983 dependence_stats.num_siv_independent++;
1984 return;
1985 }
1986 }
1987 else
1988 {
1989 /* Example:
1990 chrec_a = 3
1991 chrec_b = {4, +, 1}
1992
1993 In this case, chrec_a will not overlap with chrec_b. */
1994 *overlaps_a = conflict_fn_no_dependence ();
1995 *overlaps_b = conflict_fn_no_dependence ();
1996 *last_conflicts = integer_zero_node;
1997 dependence_stats.num_siv_independent++;
1998 return;
1999 }
2000 }
2001 }
2002 }
2003 }
2004
2005 /* Helper recursive function for initializing the matrix A. Returns
2006 the initial value of CHREC. */
2007
2008 static tree
initialize_matrix_A(lambda_matrix A,tree chrec,unsigned index,int mult)2009 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
2010 {
2011 gcc_assert (chrec);
2012
2013 switch (TREE_CODE (chrec))
2014 {
2015 case POLYNOMIAL_CHREC:
2016 gcc_assert (TREE_CODE (CHREC_RIGHT (chrec)) == INTEGER_CST);
2017
2018 A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
2019 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
2020
2021 case PLUS_EXPR:
2022 case MULT_EXPR:
2023 case MINUS_EXPR:
2024 {
2025 tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
2026 tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
2027
2028 return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
2029 }
2030
2031 case NOP_EXPR:
2032 {
2033 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
2034 return chrec_convert (chrec_type (chrec), op, NULL);
2035 }
2036
2037 case BIT_NOT_EXPR:
2038 {
2039 /* Handle ~X as -1 - X. */
2040 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
2041 return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
2042 build_int_cst (TREE_TYPE (chrec), -1), op);
2043 }
2044
2045 case INTEGER_CST:
2046 return chrec;
2047
2048 default:
2049 gcc_unreachable ();
2050 return NULL_TREE;
2051 }
2052 }
2053
2054 #define FLOOR_DIV(x,y) ((x) / (y))
2055
2056 /* Solves the special case of the Diophantine equation:
2057 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
2058
2059 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
2060 number of iterations that loops X and Y run. The overlaps will be
2061 constructed as evolutions in dimension DIM. */
2062
2063 static void
compute_overlap_steps_for_affine_univar(int niter,int step_a,int step_b,affine_fn * overlaps_a,affine_fn * overlaps_b,tree * last_conflicts,int dim)2064 compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b,
2065 affine_fn *overlaps_a,
2066 affine_fn *overlaps_b,
2067 tree *last_conflicts, int dim)
2068 {
2069 if (((step_a > 0 && step_b > 0)
2070 || (step_a < 0 && step_b < 0)))
2071 {
2072 int step_overlaps_a, step_overlaps_b;
2073 int gcd_steps_a_b, last_conflict, tau2;
2074
2075 gcd_steps_a_b = gcd (step_a, step_b);
2076 step_overlaps_a = step_b / gcd_steps_a_b;
2077 step_overlaps_b = step_a / gcd_steps_a_b;
2078
2079 if (niter > 0)
2080 {
2081 tau2 = FLOOR_DIV (niter, step_overlaps_a);
2082 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
2083 last_conflict = tau2;
2084 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2085 }
2086 else
2087 *last_conflicts = chrec_dont_know;
2088
2089 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
2090 build_int_cst (NULL_TREE,
2091 step_overlaps_a));
2092 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
2093 build_int_cst (NULL_TREE,
2094 step_overlaps_b));
2095 }
2096
2097 else
2098 {
2099 *overlaps_a = affine_fn_cst (integer_zero_node);
2100 *overlaps_b = affine_fn_cst (integer_zero_node);
2101 *last_conflicts = integer_zero_node;
2102 }
2103 }
2104
2105 /* Solves the special case of a Diophantine equation where CHREC_A is
2106 an affine bivariate function, and CHREC_B is an affine univariate
2107 function. For example,
2108
2109 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
2110
2111 has the following overlapping functions:
2112
2113 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
2114 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
2115 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
2116
2117 FORNOW: This is a specialized implementation for a case occurring in
2118 a common benchmark. Implement the general algorithm. */
2119
2120 static void
compute_overlap_steps_for_affine_1_2(tree chrec_a,tree chrec_b,conflict_function ** overlaps_a,conflict_function ** overlaps_b,tree * last_conflicts)2121 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
2122 conflict_function **overlaps_a,
2123 conflict_function **overlaps_b,
2124 tree *last_conflicts)
2125 {
2126 bool xz_p, yz_p, xyz_p;
2127 int step_x, step_y, step_z;
2128 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
2129 affine_fn overlaps_a_xz, overlaps_b_xz;
2130 affine_fn overlaps_a_yz, overlaps_b_yz;
2131 affine_fn overlaps_a_xyz, overlaps_b_xyz;
2132 affine_fn ova1, ova2, ovb;
2133 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
2134
2135 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
2136 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
2137 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
2138
2139 niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
2140 niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
2141 niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
2142
2143 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
2144 {
2145 if (dump_file && (dump_flags & TDF_DETAILS))
2146 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
2147
2148 *overlaps_a = conflict_fn_not_known ();
2149 *overlaps_b = conflict_fn_not_known ();
2150 *last_conflicts = chrec_dont_know;
2151 return;
2152 }
2153
2154 niter = MIN (niter_x, niter_z);
2155 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
2156 &overlaps_a_xz,
2157 &overlaps_b_xz,
2158 &last_conflicts_xz, 1);
2159 niter = MIN (niter_y, niter_z);
2160 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
2161 &overlaps_a_yz,
2162 &overlaps_b_yz,
2163 &last_conflicts_yz, 2);
2164 niter = MIN (niter_x, niter_z);
2165 niter = MIN (niter_y, niter);
2166 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
2167 &overlaps_a_xyz,
2168 &overlaps_b_xyz,
2169 &last_conflicts_xyz, 3);
2170
2171 xz_p = !integer_zerop (last_conflicts_xz);
2172 yz_p = !integer_zerop (last_conflicts_yz);
2173 xyz_p = !integer_zerop (last_conflicts_xyz);
2174
2175 if (xz_p || yz_p || xyz_p)
2176 {
2177 ova1 = affine_fn_cst (integer_zero_node);
2178 ova2 = affine_fn_cst (integer_zero_node);
2179 ovb = affine_fn_cst (integer_zero_node);
2180 if (xz_p)
2181 {
2182 affine_fn t0 = ova1;
2183 affine_fn t2 = ovb;
2184
2185 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
2186 ovb = affine_fn_plus (ovb, overlaps_b_xz);
2187 affine_fn_free (t0);
2188 affine_fn_free (t2);
2189 *last_conflicts = last_conflicts_xz;
2190 }
2191 if (yz_p)
2192 {
2193 affine_fn t0 = ova2;
2194 affine_fn t2 = ovb;
2195
2196 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
2197 ovb = affine_fn_plus (ovb, overlaps_b_yz);
2198 affine_fn_free (t0);
2199 affine_fn_free (t2);
2200 *last_conflicts = last_conflicts_yz;
2201 }
2202 if (xyz_p)
2203 {
2204 affine_fn t0 = ova1;
2205 affine_fn t2 = ova2;
2206 affine_fn t4 = ovb;
2207
2208 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
2209 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
2210 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
2211 affine_fn_free (t0);
2212 affine_fn_free (t2);
2213 affine_fn_free (t4);
2214 *last_conflicts = last_conflicts_xyz;
2215 }
2216 *overlaps_a = conflict_fn (2, ova1, ova2);
2217 *overlaps_b = conflict_fn (1, ovb);
2218 }
2219 else
2220 {
2221 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2222 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2223 *last_conflicts = integer_zero_node;
2224 }
2225
2226 affine_fn_free (overlaps_a_xz);
2227 affine_fn_free (overlaps_b_xz);
2228 affine_fn_free (overlaps_a_yz);
2229 affine_fn_free (overlaps_b_yz);
2230 affine_fn_free (overlaps_a_xyz);
2231 affine_fn_free (overlaps_b_xyz);
2232 }
2233
2234 /* Copy the elements of vector VEC1 with length SIZE to VEC2. */
2235
2236 static void
lambda_vector_copy(lambda_vector vec1,lambda_vector vec2,int size)2237 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
2238 int size)
2239 {
2240 memcpy (vec2, vec1, size * sizeof (*vec1));
2241 }
2242
2243 /* Copy the elements of M x N matrix MAT1 to MAT2. */
2244
2245 static void
lambda_matrix_copy(lambda_matrix mat1,lambda_matrix mat2,int m,int n)2246 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
2247 int m, int n)
2248 {
2249 int i;
2250
2251 for (i = 0; i < m; i++)
2252 lambda_vector_copy (mat1[i], mat2[i], n);
2253 }
2254
2255 /* Store the N x N identity matrix in MAT. */
2256
2257 static void
lambda_matrix_id(lambda_matrix mat,int size)2258 lambda_matrix_id (lambda_matrix mat, int size)
2259 {
2260 int i, j;
2261
2262 for (i = 0; i < size; i++)
2263 for (j = 0; j < size; j++)
2264 mat[i][j] = (i == j) ? 1 : 0;
2265 }
2266
2267 /* Return the first nonzero element of vector VEC1 between START and N.
2268 We must have START <= N. Returns N if VEC1 is the zero vector. */
2269
2270 static int
lambda_vector_first_nz(lambda_vector vec1,int n,int start)2271 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
2272 {
2273 int j = start;
2274 while (j < n && vec1[j] == 0)
2275 j++;
2276 return j;
2277 }
2278
2279 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
2280 R2 = R2 + CONST1 * R1. */
2281
2282 static void
lambda_matrix_row_add(lambda_matrix mat,int n,int r1,int r2,int const1)2283 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2, int const1)
2284 {
2285 int i;
2286
2287 if (const1 == 0)
2288 return;
2289
2290 for (i = 0; i < n; i++)
2291 mat[r2][i] += const1 * mat[r1][i];
2292 }
2293
2294 /* Swap rows R1 and R2 in matrix MAT. */
2295
2296 static void
lambda_matrix_row_exchange(lambda_matrix mat,int r1,int r2)2297 lambda_matrix_row_exchange (lambda_matrix mat, int r1, int r2)
2298 {
2299 lambda_vector row;
2300
2301 row = mat[r1];
2302 mat[r1] = mat[r2];
2303 mat[r2] = row;
2304 }
2305
2306 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
2307 and store the result in VEC2. */
2308
2309 static void
lambda_vector_mult_const(lambda_vector vec1,lambda_vector vec2,int size,int const1)2310 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
2311 int size, int const1)
2312 {
2313 int i;
2314
2315 if (const1 == 0)
2316 lambda_vector_clear (vec2, size);
2317 else
2318 for (i = 0; i < size; i++)
2319 vec2[i] = const1 * vec1[i];
2320 }
2321
2322 /* Negate vector VEC1 with length SIZE and store it in VEC2. */
2323
2324 static void
lambda_vector_negate(lambda_vector vec1,lambda_vector vec2,int size)2325 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
2326 int size)
2327 {
2328 lambda_vector_mult_const (vec1, vec2, size, -1);
2329 }
2330
2331 /* Negate row R1 of matrix MAT which has N columns. */
2332
2333 static void
lambda_matrix_row_negate(lambda_matrix mat,int n,int r1)2334 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
2335 {
2336 lambda_vector_negate (mat[r1], mat[r1], n);
2337 }
2338
2339 /* Return true if two vectors are equal. */
2340
2341 static bool
lambda_vector_equal(lambda_vector vec1,lambda_vector vec2,int size)2342 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
2343 {
2344 int i;
2345 for (i = 0; i < size; i++)
2346 if (vec1[i] != vec2[i])
2347 return false;
2348 return true;
2349 }
2350
2351 /* Given an M x N integer matrix A, this function determines an M x
2352 M unimodular matrix U, and an M x N echelon matrix S such that
2353 "U.A = S". This decomposition is also known as "right Hermite".
2354
2355 Ref: Algorithm 2.1 page 33 in "Loop Transformations for
2356 Restructuring Compilers" Utpal Banerjee. */
2357
2358 static void
lambda_matrix_right_hermite(lambda_matrix A,int m,int n,lambda_matrix S,lambda_matrix U)2359 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
2360 lambda_matrix S, lambda_matrix U)
2361 {
2362 int i, j, i0 = 0;
2363
2364 lambda_matrix_copy (A, S, m, n);
2365 lambda_matrix_id (U, m);
2366
2367 for (j = 0; j < n; j++)
2368 {
2369 if (lambda_vector_first_nz (S[j], m, i0) < m)
2370 {
2371 ++i0;
2372 for (i = m - 1; i >= i0; i--)
2373 {
2374 while (S[i][j] != 0)
2375 {
2376 int sigma, factor, a, b;
2377
2378 a = S[i-1][j];
2379 b = S[i][j];
2380 sigma = (a * b < 0) ? -1: 1;
2381 a = abs (a);
2382 b = abs (b);
2383 factor = sigma * (a / b);
2384
2385 lambda_matrix_row_add (S, n, i, i-1, -factor);
2386 lambda_matrix_row_exchange (S, i, i-1);
2387
2388 lambda_matrix_row_add (U, m, i, i-1, -factor);
2389 lambda_matrix_row_exchange (U, i, i-1);
2390 }
2391 }
2392 }
2393 }
2394 }
2395
2396 /* Determines the overlapping elements due to accesses CHREC_A and
2397 CHREC_B, that are affine functions. This function cannot handle
2398 symbolic evolution functions, ie. when initial conditions are
2399 parameters, because it uses lambda matrices of integers. */
2400
2401 static void
analyze_subscript_affine_affine(tree chrec_a,tree chrec_b,conflict_function ** overlaps_a,conflict_function ** overlaps_b,tree * last_conflicts)2402 analyze_subscript_affine_affine (tree chrec_a,
2403 tree chrec_b,
2404 conflict_function **overlaps_a,
2405 conflict_function **overlaps_b,
2406 tree *last_conflicts)
2407 {
2408 unsigned nb_vars_a, nb_vars_b, dim;
2409 HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
2410 lambda_matrix A, U, S;
2411 struct obstack scratch_obstack;
2412
2413 if (eq_evolutions_p (chrec_a, chrec_b))
2414 {
2415 /* The accessed index overlaps for each iteration in the
2416 loop. */
2417 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2418 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2419 *last_conflicts = chrec_dont_know;
2420 return;
2421 }
2422 if (dump_file && (dump_flags & TDF_DETAILS))
2423 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
2424
2425 /* For determining the initial intersection, we have to solve a
2426 Diophantine equation. This is the most time consuming part.
2427
2428 For answering to the question: "Is there a dependence?" we have
2429 to prove that there exists a solution to the Diophantine
2430 equation, and that the solution is in the iteration domain,
2431 i.e. the solution is positive or zero, and that the solution
2432 happens before the upper bound loop.nb_iterations. Otherwise
2433 there is no dependence. This function outputs a description of
2434 the iterations that hold the intersections. */
2435
2436 nb_vars_a = nb_vars_in_chrec (chrec_a);
2437 nb_vars_b = nb_vars_in_chrec (chrec_b);
2438
2439 gcc_obstack_init (&scratch_obstack);
2440
2441 dim = nb_vars_a + nb_vars_b;
2442 U = lambda_matrix_new (dim, dim, &scratch_obstack);
2443 A = lambda_matrix_new (dim, 1, &scratch_obstack);
2444 S = lambda_matrix_new (dim, 1, &scratch_obstack);
2445
2446 init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
2447 init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
2448 gamma = init_b - init_a;
2449
2450 /* Don't do all the hard work of solving the Diophantine equation
2451 when we already know the solution: for example,
2452 | {3, +, 1}_1
2453 | {3, +, 4}_2
2454 | gamma = 3 - 3 = 0.
2455 Then the first overlap occurs during the first iterations:
2456 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2457 */
2458 if (gamma == 0)
2459 {
2460 if (nb_vars_a == 1 && nb_vars_b == 1)
2461 {
2462 HOST_WIDE_INT step_a, step_b;
2463 HOST_WIDE_INT niter, niter_a, niter_b;
2464 affine_fn ova, ovb;
2465
2466 niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
2467 niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
2468 niter = MIN (niter_a, niter_b);
2469 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
2470 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
2471
2472 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
2473 &ova, &ovb,
2474 last_conflicts, 1);
2475 *overlaps_a = conflict_fn (1, ova);
2476 *overlaps_b = conflict_fn (1, ovb);
2477 }
2478
2479 else if (nb_vars_a == 2 && nb_vars_b == 1)
2480 compute_overlap_steps_for_affine_1_2
2481 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
2482
2483 else if (nb_vars_a == 1 && nb_vars_b == 2)
2484 compute_overlap_steps_for_affine_1_2
2485 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
2486
2487 else
2488 {
2489 if (dump_file && (dump_flags & TDF_DETAILS))
2490 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
2491 *overlaps_a = conflict_fn_not_known ();
2492 *overlaps_b = conflict_fn_not_known ();
2493 *last_conflicts = chrec_dont_know;
2494 }
2495 goto end_analyze_subs_aa;
2496 }
2497
2498 /* U.A = S */
2499 lambda_matrix_right_hermite (A, dim, 1, S, U);
2500
2501 if (S[0][0] < 0)
2502 {
2503 S[0][0] *= -1;
2504 lambda_matrix_row_negate (U, dim, 0);
2505 }
2506 gcd_alpha_beta = S[0][0];
2507
2508 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2509 but that is a quite strange case. Instead of ICEing, answer
2510 don't know. */
2511 if (gcd_alpha_beta == 0)
2512 {
2513 *overlaps_a = conflict_fn_not_known ();
2514 *overlaps_b = conflict_fn_not_known ();
2515 *last_conflicts = chrec_dont_know;
2516 goto end_analyze_subs_aa;
2517 }
2518
2519 /* The classic "gcd-test". */
2520 if (!int_divides_p (gcd_alpha_beta, gamma))
2521 {
2522 /* The "gcd-test" has determined that there is no integer
2523 solution, i.e. there is no dependence. */
2524 *overlaps_a = conflict_fn_no_dependence ();
2525 *overlaps_b = conflict_fn_no_dependence ();
2526 *last_conflicts = integer_zero_node;
2527 }
2528
2529 /* Both access functions are univariate. This includes SIV and MIV cases. */
2530 else if (nb_vars_a == 1 && nb_vars_b == 1)
2531 {
2532 /* Both functions should have the same evolution sign. */
2533 if (((A[0][0] > 0 && -A[1][0] > 0)
2534 || (A[0][0] < 0 && -A[1][0] < 0)))
2535 {
2536 /* The solutions are given by:
2537 |
2538 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
2539 | [u21 u22] [y0]
2540
2541 For a given integer t. Using the following variables,
2542
2543 | i0 = u11 * gamma / gcd_alpha_beta
2544 | j0 = u12 * gamma / gcd_alpha_beta
2545 | i1 = u21
2546 | j1 = u22
2547
2548 the solutions are:
2549
2550 | x0 = i0 + i1 * t,
2551 | y0 = j0 + j1 * t. */
2552 HOST_WIDE_INT i0, j0, i1, j1;
2553
2554 i0 = U[0][0] * gamma / gcd_alpha_beta;
2555 j0 = U[0][1] * gamma / gcd_alpha_beta;
2556 i1 = U[1][0];
2557 j1 = U[1][1];
2558
2559 if ((i1 == 0 && i0 < 0)
2560 || (j1 == 0 && j0 < 0))
2561 {
2562 /* There is no solution.
2563 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2564 falls in here, but for the moment we don't look at the
2565 upper bound of the iteration domain. */
2566 *overlaps_a = conflict_fn_no_dependence ();
2567 *overlaps_b = conflict_fn_no_dependence ();
2568 *last_conflicts = integer_zero_node;
2569 goto end_analyze_subs_aa;
2570 }
2571
2572 if (i1 > 0 && j1 > 0)
2573 {
2574 HOST_WIDE_INT niter_a
2575 = max_stmt_executions_int (get_chrec_loop (chrec_a));
2576 HOST_WIDE_INT niter_b
2577 = max_stmt_executions_int (get_chrec_loop (chrec_b));
2578 HOST_WIDE_INT niter = MIN (niter_a, niter_b);
2579
2580 /* (X0, Y0) is a solution of the Diophantine equation:
2581 "chrec_a (X0) = chrec_b (Y0)". */
2582 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
2583 CEIL (-j0, j1));
2584 HOST_WIDE_INT x0 = i1 * tau1 + i0;
2585 HOST_WIDE_INT y0 = j1 * tau1 + j0;
2586
2587 /* (X1, Y1) is the smallest positive solution of the eq
2588 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
2589 first conflict occurs. */
2590 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
2591 HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
2592 HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
2593
2594 if (niter > 0)
2595 {
2596 HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter - i0, i1),
2597 FLOOR_DIV (niter - j0, j1));
2598 HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
2599
2600 /* If the overlap occurs outside of the bounds of the
2601 loop, there is no dependence. */
2602 if (x1 >= niter || y1 >= niter)
2603 {
2604 *overlaps_a = conflict_fn_no_dependence ();
2605 *overlaps_b = conflict_fn_no_dependence ();
2606 *last_conflicts = integer_zero_node;
2607 goto end_analyze_subs_aa;
2608 }
2609 else
2610 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2611 }
2612 else
2613 *last_conflicts = chrec_dont_know;
2614
2615 *overlaps_a
2616 = conflict_fn (1,
2617 affine_fn_univar (build_int_cst (NULL_TREE, x1),
2618 1,
2619 build_int_cst (NULL_TREE, i1)));
2620 *overlaps_b
2621 = conflict_fn (1,
2622 affine_fn_univar (build_int_cst (NULL_TREE, y1),
2623 1,
2624 build_int_cst (NULL_TREE, j1)));
2625 }
2626 else
2627 {
2628 /* FIXME: For the moment, the upper bound of the
2629 iteration domain for i and j is not checked. */
2630 if (dump_file && (dump_flags & TDF_DETAILS))
2631 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2632 *overlaps_a = conflict_fn_not_known ();
2633 *overlaps_b = conflict_fn_not_known ();
2634 *last_conflicts = chrec_dont_know;
2635 }
2636 }
2637 else
2638 {
2639 if (dump_file && (dump_flags & TDF_DETAILS))
2640 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2641 *overlaps_a = conflict_fn_not_known ();
2642 *overlaps_b = conflict_fn_not_known ();
2643 *last_conflicts = chrec_dont_know;
2644 }
2645 }
2646 else
2647 {
2648 if (dump_file && (dump_flags & TDF_DETAILS))
2649 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2650 *overlaps_a = conflict_fn_not_known ();
2651 *overlaps_b = conflict_fn_not_known ();
2652 *last_conflicts = chrec_dont_know;
2653 }
2654
2655 end_analyze_subs_aa:
2656 obstack_free (&scratch_obstack, NULL);
2657 if (dump_file && (dump_flags & TDF_DETAILS))
2658 {
2659 fprintf (dump_file, " (overlaps_a = ");
2660 dump_conflict_function (dump_file, *overlaps_a);
2661 fprintf (dump_file, ")\n (overlaps_b = ");
2662 dump_conflict_function (dump_file, *overlaps_b);
2663 fprintf (dump_file, "))\n");
2664 }
2665 }
2666
2667 /* Returns true when analyze_subscript_affine_affine can be used for
2668 determining the dependence relation between chrec_a and chrec_b,
2669 that contain symbols. This function modifies chrec_a and chrec_b
2670 such that the analysis result is the same, and such that they don't
2671 contain symbols, and then can safely be passed to the analyzer.
2672
2673 Example: The analysis of the following tuples of evolutions produce
2674 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2675 vs. {0, +, 1}_1
2676
2677 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2678 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2679 */
2680
2681 static bool
can_use_analyze_subscript_affine_affine(tree * chrec_a,tree * chrec_b)2682 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
2683 {
2684 tree diff, type, left_a, left_b, right_b;
2685
2686 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
2687 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
2688 /* FIXME: For the moment not handled. Might be refined later. */
2689 return false;
2690
2691 type = chrec_type (*chrec_a);
2692 left_a = CHREC_LEFT (*chrec_a);
2693 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
2694 diff = chrec_fold_minus (type, left_a, left_b);
2695
2696 if (!evolution_function_is_constant_p (diff))
2697 return false;
2698
2699 if (dump_file && (dump_flags & TDF_DETAILS))
2700 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
2701
2702 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
2703 diff, CHREC_RIGHT (*chrec_a));
2704 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
2705 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
2706 build_int_cst (type, 0),
2707 right_b);
2708 return true;
2709 }
2710
2711 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
2712 *OVERLAPS_B are initialized to the functions that describe the
2713 relation between the elements accessed twice by CHREC_A and
2714 CHREC_B. For k >= 0, the following property is verified:
2715
2716 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2717
2718 static void
analyze_siv_subscript(tree chrec_a,tree chrec_b,conflict_function ** overlaps_a,conflict_function ** overlaps_b,tree * last_conflicts,int loop_nest_num)2719 analyze_siv_subscript (tree chrec_a,
2720 tree chrec_b,
2721 conflict_function **overlaps_a,
2722 conflict_function **overlaps_b,
2723 tree *last_conflicts,
2724 int loop_nest_num)
2725 {
2726 dependence_stats.num_siv++;
2727
2728 if (dump_file && (dump_flags & TDF_DETAILS))
2729 fprintf (dump_file, "(analyze_siv_subscript \n");
2730
2731 if (evolution_function_is_constant_p (chrec_a)
2732 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2733 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
2734 overlaps_a, overlaps_b, last_conflicts);
2735
2736 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2737 && evolution_function_is_constant_p (chrec_b))
2738 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
2739 overlaps_b, overlaps_a, last_conflicts);
2740
2741 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2742 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2743 {
2744 if (!chrec_contains_symbols (chrec_a)
2745 && !chrec_contains_symbols (chrec_b))
2746 {
2747 analyze_subscript_affine_affine (chrec_a, chrec_b,
2748 overlaps_a, overlaps_b,
2749 last_conflicts);
2750
2751 if (CF_NOT_KNOWN_P (*overlaps_a)
2752 || CF_NOT_KNOWN_P (*overlaps_b))
2753 dependence_stats.num_siv_unimplemented++;
2754 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2755 || CF_NO_DEPENDENCE_P (*overlaps_b))
2756 dependence_stats.num_siv_independent++;
2757 else
2758 dependence_stats.num_siv_dependent++;
2759 }
2760 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
2761 &chrec_b))
2762 {
2763 analyze_subscript_affine_affine (chrec_a, chrec_b,
2764 overlaps_a, overlaps_b,
2765 last_conflicts);
2766
2767 if (CF_NOT_KNOWN_P (*overlaps_a)
2768 || CF_NOT_KNOWN_P (*overlaps_b))
2769 dependence_stats.num_siv_unimplemented++;
2770 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2771 || CF_NO_DEPENDENCE_P (*overlaps_b))
2772 dependence_stats.num_siv_independent++;
2773 else
2774 dependence_stats.num_siv_dependent++;
2775 }
2776 else
2777 goto siv_subscript_dontknow;
2778 }
2779
2780 else
2781 {
2782 siv_subscript_dontknow:;
2783 if (dump_file && (dump_flags & TDF_DETAILS))
2784 fprintf (dump_file, " siv test failed: unimplemented");
2785 *overlaps_a = conflict_fn_not_known ();
2786 *overlaps_b = conflict_fn_not_known ();
2787 *last_conflicts = chrec_dont_know;
2788 dependence_stats.num_siv_unimplemented++;
2789 }
2790
2791 if (dump_file && (dump_flags & TDF_DETAILS))
2792 fprintf (dump_file, ")\n");
2793 }
2794
2795 /* Returns false if we can prove that the greatest common divisor of the steps
2796 of CHREC does not divide CST, false otherwise. */
2797
2798 static bool
gcd_of_steps_may_divide_p(const_tree chrec,const_tree cst)2799 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
2800 {
2801 HOST_WIDE_INT cd = 0, val;
2802 tree step;
2803
2804 if (!host_integerp (cst, 0))
2805 return true;
2806 val = tree_low_cst (cst, 0);
2807
2808 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
2809 {
2810 step = CHREC_RIGHT (chrec);
2811 if (!host_integerp (step, 0))
2812 return true;
2813 cd = gcd (cd, tree_low_cst (step, 0));
2814 chrec = CHREC_LEFT (chrec);
2815 }
2816
2817 return val % cd == 0;
2818 }
2819
2820 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
2821 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
2822 functions that describe the relation between the elements accessed
2823 twice by CHREC_A and CHREC_B. For k >= 0, the following property
2824 is verified:
2825
2826 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2827
2828 static void
analyze_miv_subscript(tree chrec_a,tree chrec_b,conflict_function ** overlaps_a,conflict_function ** overlaps_b,tree * last_conflicts,struct loop * loop_nest)2829 analyze_miv_subscript (tree chrec_a,
2830 tree chrec_b,
2831 conflict_function **overlaps_a,
2832 conflict_function **overlaps_b,
2833 tree *last_conflicts,
2834 struct loop *loop_nest)
2835 {
2836 tree type, difference;
2837
2838 dependence_stats.num_miv++;
2839 if (dump_file && (dump_flags & TDF_DETAILS))
2840 fprintf (dump_file, "(analyze_miv_subscript \n");
2841
2842 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2843 chrec_a = chrec_convert (type, chrec_a, NULL);
2844 chrec_b = chrec_convert (type, chrec_b, NULL);
2845 difference = chrec_fold_minus (type, chrec_a, chrec_b);
2846
2847 if (eq_evolutions_p (chrec_a, chrec_b))
2848 {
2849 /* Access functions are the same: all the elements are accessed
2850 in the same order. */
2851 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2852 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2853 *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
2854 dependence_stats.num_miv_dependent++;
2855 }
2856
2857 else if (evolution_function_is_constant_p (difference)
2858 /* For the moment, the following is verified:
2859 evolution_function_is_affine_multivariate_p (chrec_a,
2860 loop_nest->num) */
2861 && !gcd_of_steps_may_divide_p (chrec_a, difference))
2862 {
2863 /* testsuite/.../ssa-chrec-33.c
2864 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
2865
2866 The difference is 1, and all the evolution steps are multiples
2867 of 2, consequently there are no overlapping elements. */
2868 *overlaps_a = conflict_fn_no_dependence ();
2869 *overlaps_b = conflict_fn_no_dependence ();
2870 *last_conflicts = integer_zero_node;
2871 dependence_stats.num_miv_independent++;
2872 }
2873
2874 else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
2875 && !chrec_contains_symbols (chrec_a)
2876 && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
2877 && !chrec_contains_symbols (chrec_b))
2878 {
2879 /* testsuite/.../ssa-chrec-35.c
2880 {0, +, 1}_2 vs. {0, +, 1}_3
2881 the overlapping elements are respectively located at iterations:
2882 {0, +, 1}_x and {0, +, 1}_x,
2883 in other words, we have the equality:
2884 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
2885
2886 Other examples:
2887 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
2888 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2889
2890 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
2891 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
2892 */
2893 analyze_subscript_affine_affine (chrec_a, chrec_b,
2894 overlaps_a, overlaps_b, last_conflicts);
2895
2896 if (CF_NOT_KNOWN_P (*overlaps_a)
2897 || CF_NOT_KNOWN_P (*overlaps_b))
2898 dependence_stats.num_miv_unimplemented++;
2899 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2900 || CF_NO_DEPENDENCE_P (*overlaps_b))
2901 dependence_stats.num_miv_independent++;
2902 else
2903 dependence_stats.num_miv_dependent++;
2904 }
2905
2906 else
2907 {
2908 /* When the analysis is too difficult, answer "don't know". */
2909 if (dump_file && (dump_flags & TDF_DETAILS))
2910 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
2911
2912 *overlaps_a = conflict_fn_not_known ();
2913 *overlaps_b = conflict_fn_not_known ();
2914 *last_conflicts = chrec_dont_know;
2915 dependence_stats.num_miv_unimplemented++;
2916 }
2917
2918 if (dump_file && (dump_flags & TDF_DETAILS))
2919 fprintf (dump_file, ")\n");
2920 }
2921
2922 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
2923 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
2924 OVERLAP_ITERATIONS_B are initialized with two functions that
2925 describe the iterations that contain conflicting elements.
2926
2927 Remark: For an integer k >= 0, the following equality is true:
2928
2929 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
2930 */
2931
2932 static void
analyze_overlapping_iterations(tree chrec_a,tree chrec_b,conflict_function ** overlap_iterations_a,conflict_function ** overlap_iterations_b,tree * last_conflicts,struct loop * loop_nest)2933 analyze_overlapping_iterations (tree chrec_a,
2934 tree chrec_b,
2935 conflict_function **overlap_iterations_a,
2936 conflict_function **overlap_iterations_b,
2937 tree *last_conflicts, struct loop *loop_nest)
2938 {
2939 unsigned int lnn = loop_nest->num;
2940
2941 dependence_stats.num_subscript_tests++;
2942
2943 if (dump_file && (dump_flags & TDF_DETAILS))
2944 {
2945 fprintf (dump_file, "(analyze_overlapping_iterations \n");
2946 fprintf (dump_file, " (chrec_a = ");
2947 print_generic_expr (dump_file, chrec_a, 0);
2948 fprintf (dump_file, ")\n (chrec_b = ");
2949 print_generic_expr (dump_file, chrec_b, 0);
2950 fprintf (dump_file, ")\n");
2951 }
2952
2953 if (chrec_a == NULL_TREE
2954 || chrec_b == NULL_TREE
2955 || chrec_contains_undetermined (chrec_a)
2956 || chrec_contains_undetermined (chrec_b))
2957 {
2958 dependence_stats.num_subscript_undetermined++;
2959
2960 *overlap_iterations_a = conflict_fn_not_known ();
2961 *overlap_iterations_b = conflict_fn_not_known ();
2962 }
2963
2964 /* If they are the same chrec, and are affine, they overlap
2965 on every iteration. */
2966 else if (eq_evolutions_p (chrec_a, chrec_b)
2967 && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2968 || operand_equal_p (chrec_a, chrec_b, 0)))
2969 {
2970 dependence_stats.num_same_subscript_function++;
2971 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2972 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2973 *last_conflicts = chrec_dont_know;
2974 }
2975
2976 /* If they aren't the same, and aren't affine, we can't do anything
2977 yet. */
2978 else if ((chrec_contains_symbols (chrec_a)
2979 || chrec_contains_symbols (chrec_b))
2980 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2981 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
2982 {
2983 dependence_stats.num_subscript_undetermined++;
2984 *overlap_iterations_a = conflict_fn_not_known ();
2985 *overlap_iterations_b = conflict_fn_not_known ();
2986 }
2987
2988 else if (ziv_subscript_p (chrec_a, chrec_b))
2989 analyze_ziv_subscript (chrec_a, chrec_b,
2990 overlap_iterations_a, overlap_iterations_b,
2991 last_conflicts);
2992
2993 else if (siv_subscript_p (chrec_a, chrec_b))
2994 analyze_siv_subscript (chrec_a, chrec_b,
2995 overlap_iterations_a, overlap_iterations_b,
2996 last_conflicts, lnn);
2997
2998 else
2999 analyze_miv_subscript (chrec_a, chrec_b,
3000 overlap_iterations_a, overlap_iterations_b,
3001 last_conflicts, loop_nest);
3002
3003 if (dump_file && (dump_flags & TDF_DETAILS))
3004 {
3005 fprintf (dump_file, " (overlap_iterations_a = ");
3006 dump_conflict_function (dump_file, *overlap_iterations_a);
3007 fprintf (dump_file, ")\n (overlap_iterations_b = ");
3008 dump_conflict_function (dump_file, *overlap_iterations_b);
3009 fprintf (dump_file, "))\n");
3010 }
3011 }
3012
3013 /* Helper function for uniquely inserting distance vectors. */
3014
3015 static void
save_dist_v(struct data_dependence_relation * ddr,lambda_vector dist_v)3016 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
3017 {
3018 unsigned i;
3019 lambda_vector v;
3020
3021 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, v)
3022 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
3023 return;
3024
3025 DDR_DIST_VECTS (ddr).safe_push (dist_v);
3026 }
3027
3028 /* Helper function for uniquely inserting direction vectors. */
3029
3030 static void
save_dir_v(struct data_dependence_relation * ddr,lambda_vector dir_v)3031 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
3032 {
3033 unsigned i;
3034 lambda_vector v;
3035
3036 FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), i, v)
3037 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
3038 return;
3039
3040 DDR_DIR_VECTS (ddr).safe_push (dir_v);
3041 }
3042
3043 /* Add a distance of 1 on all the loops outer than INDEX. If we
3044 haven't yet determined a distance for this outer loop, push a new
3045 distance vector composed of the previous distance, and a distance
3046 of 1 for this outer loop. Example:
3047
3048 | loop_1
3049 | loop_2
3050 | A[10]
3051 | endloop_2
3052 | endloop_1
3053
3054 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
3055 save (0, 1), then we have to save (1, 0). */
3056
3057 static void
add_outer_distances(struct data_dependence_relation * ddr,lambda_vector dist_v,int index)3058 add_outer_distances (struct data_dependence_relation *ddr,
3059 lambda_vector dist_v, int index)
3060 {
3061 /* For each outer loop where init_v is not set, the accesses are
3062 in dependence of distance 1 in the loop. */
3063 while (--index >= 0)
3064 {
3065 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3066 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3067 save_v[index] = 1;
3068 save_dist_v (ddr, save_v);
3069 }
3070 }
3071
3072 /* Return false when fail to represent the data dependence as a
3073 distance vector. INIT_B is set to true when a component has been
3074 added to the distance vector DIST_V. INDEX_CARRY is then set to
3075 the index in DIST_V that carries the dependence. */
3076
3077 static bool
build_classic_dist_vector_1(struct data_dependence_relation * ddr,struct data_reference * ddr_a,struct data_reference * ddr_b,lambda_vector dist_v,bool * init_b,int * index_carry)3078 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
3079 struct data_reference *ddr_a,
3080 struct data_reference *ddr_b,
3081 lambda_vector dist_v, bool *init_b,
3082 int *index_carry)
3083 {
3084 unsigned i;
3085 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3086
3087 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3088 {
3089 tree access_fn_a, access_fn_b;
3090 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
3091
3092 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
3093 {
3094 non_affine_dependence_relation (ddr);
3095 return false;
3096 }
3097
3098 access_fn_a = DR_ACCESS_FN (ddr_a, i);
3099 access_fn_b = DR_ACCESS_FN (ddr_b, i);
3100
3101 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
3102 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
3103 {
3104 int dist, index;
3105 int var_a = CHREC_VARIABLE (access_fn_a);
3106 int var_b = CHREC_VARIABLE (access_fn_b);
3107
3108 if (var_a != var_b
3109 || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
3110 {
3111 non_affine_dependence_relation (ddr);
3112 return false;
3113 }
3114
3115 dist = int_cst_value (SUB_DISTANCE (subscript));
3116 index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
3117 *index_carry = MIN (index, *index_carry);
3118
3119 /* This is the subscript coupling test. If we have already
3120 recorded a distance for this loop (a distance coming from
3121 another subscript), it should be the same. For example,
3122 in the following code, there is no dependence:
3123
3124 | loop i = 0, N, 1
3125 | T[i+1][i] = ...
3126 | ... = T[i][i]
3127 | endloop
3128 */
3129 if (init_v[index] != 0 && dist_v[index] != dist)
3130 {
3131 finalize_ddr_dependent (ddr, chrec_known);
3132 return false;
3133 }
3134
3135 dist_v[index] = dist;
3136 init_v[index] = 1;
3137 *init_b = true;
3138 }
3139 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
3140 {
3141 /* This can be for example an affine vs. constant dependence
3142 (T[i] vs. T[3]) that is not an affine dependence and is
3143 not representable as a distance vector. */
3144 non_affine_dependence_relation (ddr);
3145 return false;
3146 }
3147 }
3148
3149 return true;
3150 }
3151
3152 /* Return true when the DDR contains only constant access functions. */
3153
3154 static bool
constant_access_functions(const struct data_dependence_relation * ddr)3155 constant_access_functions (const struct data_dependence_relation *ddr)
3156 {
3157 unsigned i;
3158
3159 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3160 if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr), i))
3161 || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr), i)))
3162 return false;
3163
3164 return true;
3165 }
3166
3167 /* Helper function for the case where DDR_A and DDR_B are the same
3168 multivariate access function with a constant step. For an example
3169 see pr34635-1.c. */
3170
3171 static void
add_multivariate_self_dist(struct data_dependence_relation * ddr,tree c_2)3172 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
3173 {
3174 int x_1, x_2;
3175 tree c_1 = CHREC_LEFT (c_2);
3176 tree c_0 = CHREC_LEFT (c_1);
3177 lambda_vector dist_v;
3178 int v1, v2, cd;
3179
3180 /* Polynomials with more than 2 variables are not handled yet. When
3181 the evolution steps are parameters, it is not possible to
3182 represent the dependence using classical distance vectors. */
3183 if (TREE_CODE (c_0) != INTEGER_CST
3184 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
3185 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
3186 {
3187 DDR_AFFINE_P (ddr) = false;
3188 return;
3189 }
3190
3191 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
3192 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
3193
3194 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
3195 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3196 v1 = int_cst_value (CHREC_RIGHT (c_1));
3197 v2 = int_cst_value (CHREC_RIGHT (c_2));
3198 cd = gcd (v1, v2);
3199 v1 /= cd;
3200 v2 /= cd;
3201
3202 if (v2 < 0)
3203 {
3204 v2 = -v2;
3205 v1 = -v1;
3206 }
3207
3208 dist_v[x_1] = v2;
3209 dist_v[x_2] = -v1;
3210 save_dist_v (ddr, dist_v);
3211
3212 add_outer_distances (ddr, dist_v, x_1);
3213 }
3214
3215 /* Helper function for the case where DDR_A and DDR_B are the same
3216 access functions. */
3217
3218 static void
add_other_self_distances(struct data_dependence_relation * ddr)3219 add_other_self_distances (struct data_dependence_relation *ddr)
3220 {
3221 lambda_vector dist_v;
3222 unsigned i;
3223 int index_carry = DDR_NB_LOOPS (ddr);
3224
3225 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3226 {
3227 tree access_fun = DR_ACCESS_FN (DDR_A (ddr), i);
3228
3229 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
3230 {
3231 if (!evolution_function_is_univariate_p (access_fun))
3232 {
3233 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
3234 {
3235 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
3236 return;
3237 }
3238
3239 access_fun = DR_ACCESS_FN (DDR_A (ddr), 0);
3240
3241 if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
3242 add_multivariate_self_dist (ddr, access_fun);
3243 else
3244 /* The evolution step is not constant: it varies in
3245 the outer loop, so this cannot be represented by a
3246 distance vector. For example in pr34635.c the
3247 evolution is {0, +, {0, +, 4}_1}_2. */
3248 DDR_AFFINE_P (ddr) = false;
3249
3250 return;
3251 }
3252
3253 index_carry = MIN (index_carry,
3254 index_in_loop_nest (CHREC_VARIABLE (access_fun),
3255 DDR_LOOP_NEST (ddr)));
3256 }
3257 }
3258
3259 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3260 add_outer_distances (ddr, dist_v, index_carry);
3261 }
3262
3263 static void
insert_innermost_unit_dist_vector(struct data_dependence_relation * ddr)3264 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
3265 {
3266 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3267
3268 dist_v[DDR_INNER_LOOP (ddr)] = 1;
3269 save_dist_v (ddr, dist_v);
3270 }
3271
3272 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
3273 is the case for example when access functions are the same and
3274 equal to a constant, as in:
3275
3276 | loop_1
3277 | A[3] = ...
3278 | ... = A[3]
3279 | endloop_1
3280
3281 in which case the distance vectors are (0) and (1). */
3282
3283 static void
add_distance_for_zero_overlaps(struct data_dependence_relation * ddr)3284 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
3285 {
3286 unsigned i, j;
3287
3288 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3289 {
3290 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
3291 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
3292 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
3293
3294 for (j = 0; j < ca->n; j++)
3295 if (affine_function_zero_p (ca->fns[j]))
3296 {
3297 insert_innermost_unit_dist_vector (ddr);
3298 return;
3299 }
3300
3301 for (j = 0; j < cb->n; j++)
3302 if (affine_function_zero_p (cb->fns[j]))
3303 {
3304 insert_innermost_unit_dist_vector (ddr);
3305 return;
3306 }
3307 }
3308 }
3309
3310 /* Compute the classic per loop distance vector. DDR is the data
3311 dependence relation to build a vector from. Return false when fail
3312 to represent the data dependence as a distance vector. */
3313
3314 static bool
build_classic_dist_vector(struct data_dependence_relation * ddr,struct loop * loop_nest)3315 build_classic_dist_vector (struct data_dependence_relation *ddr,
3316 struct loop *loop_nest)
3317 {
3318 bool init_b = false;
3319 int index_carry = DDR_NB_LOOPS (ddr);
3320 lambda_vector dist_v;
3321
3322 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
3323 return false;
3324
3325 if (same_access_functions (ddr))
3326 {
3327 /* Save the 0 vector. */
3328 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3329 save_dist_v (ddr, dist_v);
3330
3331 if (constant_access_functions (ddr))
3332 add_distance_for_zero_overlaps (ddr);
3333
3334 if (DDR_NB_LOOPS (ddr) > 1)
3335 add_other_self_distances (ddr);
3336
3337 return true;
3338 }
3339
3340 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3341 if (!build_classic_dist_vector_1 (ddr, DDR_A (ddr), DDR_B (ddr),
3342 dist_v, &init_b, &index_carry))
3343 return false;
3344
3345 /* Save the distance vector if we initialized one. */
3346 if (init_b)
3347 {
3348 /* Verify a basic constraint: classic distance vectors should
3349 always be lexicographically positive.
3350
3351 Data references are collected in the order of execution of
3352 the program, thus for the following loop
3353
3354 | for (i = 1; i < 100; i++)
3355 | for (j = 1; j < 100; j++)
3356 | {
3357 | t = T[j+1][i-1]; // A
3358 | T[j][i] = t + 2; // B
3359 | }
3360
3361 references are collected following the direction of the wind:
3362 A then B. The data dependence tests are performed also
3363 following this order, such that we're looking at the distance
3364 separating the elements accessed by A from the elements later
3365 accessed by B. But in this example, the distance returned by
3366 test_dep (A, B) is lexicographically negative (-1, 1), that
3367 means that the access A occurs later than B with respect to
3368 the outer loop, ie. we're actually looking upwind. In this
3369 case we solve test_dep (B, A) looking downwind to the
3370 lexicographically positive solution, that returns the
3371 distance vector (1, -1). */
3372 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
3373 {
3374 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3375 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3376 loop_nest))
3377 return false;
3378 compute_subscript_distance (ddr);
3379 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3380 save_v, &init_b, &index_carry))
3381 return false;
3382 save_dist_v (ddr, save_v);
3383 DDR_REVERSED_P (ddr) = true;
3384
3385 /* In this case there is a dependence forward for all the
3386 outer loops:
3387
3388 | for (k = 1; k < 100; k++)
3389 | for (i = 1; i < 100; i++)
3390 | for (j = 1; j < 100; j++)
3391 | {
3392 | t = T[j+1][i-1]; // A
3393 | T[j][i] = t + 2; // B
3394 | }
3395
3396 the vectors are:
3397 (0, 1, -1)
3398 (1, 1, -1)
3399 (1, -1, 1)
3400 */
3401 if (DDR_NB_LOOPS (ddr) > 1)
3402 {
3403 add_outer_distances (ddr, save_v, index_carry);
3404 add_outer_distances (ddr, dist_v, index_carry);
3405 }
3406 }
3407 else
3408 {
3409 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3410 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3411
3412 if (DDR_NB_LOOPS (ddr) > 1)
3413 {
3414 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3415
3416 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr),
3417 DDR_A (ddr), loop_nest))
3418 return false;
3419 compute_subscript_distance (ddr);
3420 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3421 opposite_v, &init_b,
3422 &index_carry))
3423 return false;
3424
3425 save_dist_v (ddr, save_v);
3426 add_outer_distances (ddr, dist_v, index_carry);
3427 add_outer_distances (ddr, opposite_v, index_carry);
3428 }
3429 else
3430 save_dist_v (ddr, save_v);
3431 }
3432 }
3433 else
3434 {
3435 /* There is a distance of 1 on all the outer loops: Example:
3436 there is a dependence of distance 1 on loop_1 for the array A.
3437
3438 | loop_1
3439 | A[5] = ...
3440 | endloop
3441 */
3442 add_outer_distances (ddr, dist_v,
3443 lambda_vector_first_nz (dist_v,
3444 DDR_NB_LOOPS (ddr), 0));
3445 }
3446
3447 if (dump_file && (dump_flags & TDF_DETAILS))
3448 {
3449 unsigned i;
3450
3451 fprintf (dump_file, "(build_classic_dist_vector\n");
3452 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3453 {
3454 fprintf (dump_file, " dist_vector = (");
3455 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
3456 DDR_NB_LOOPS (ddr));
3457 fprintf (dump_file, " )\n");
3458 }
3459 fprintf (dump_file, ")\n");
3460 }
3461
3462 return true;
3463 }
3464
3465 /* Return the direction for a given distance.
3466 FIXME: Computing dir this way is suboptimal, since dir can catch
3467 cases that dist is unable to represent. */
3468
3469 static inline enum data_dependence_direction
dir_from_dist(int dist)3470 dir_from_dist (int dist)
3471 {
3472 if (dist > 0)
3473 return dir_positive;
3474 else if (dist < 0)
3475 return dir_negative;
3476 else
3477 return dir_equal;
3478 }
3479
3480 /* Compute the classic per loop direction vector. DDR is the data
3481 dependence relation to build a vector from. */
3482
3483 static void
build_classic_dir_vector(struct data_dependence_relation * ddr)3484 build_classic_dir_vector (struct data_dependence_relation *ddr)
3485 {
3486 unsigned i, j;
3487 lambda_vector dist_v;
3488
3489 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
3490 {
3491 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3492
3493 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3494 dir_v[j] = dir_from_dist (dist_v[j]);
3495
3496 save_dir_v (ddr, dir_v);
3497 }
3498 }
3499
3500 /* Helper function. Returns true when there is a dependence between
3501 data references DRA and DRB. */
3502
3503 static bool
subscript_dependence_tester_1(struct data_dependence_relation * ddr,struct data_reference * dra,struct data_reference * drb,struct loop * loop_nest)3504 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
3505 struct data_reference *dra,
3506 struct data_reference *drb,
3507 struct loop *loop_nest)
3508 {
3509 unsigned int i;
3510 tree last_conflicts;
3511 struct subscript *subscript;
3512 tree res = NULL_TREE;
3513
3514 for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++)
3515 {
3516 conflict_function *overlaps_a, *overlaps_b;
3517
3518 analyze_overlapping_iterations (DR_ACCESS_FN (dra, i),
3519 DR_ACCESS_FN (drb, i),
3520 &overlaps_a, &overlaps_b,
3521 &last_conflicts, loop_nest);
3522
3523 if (SUB_CONFLICTS_IN_A (subscript))
3524 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
3525 if (SUB_CONFLICTS_IN_B (subscript))
3526 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
3527
3528 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
3529 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
3530 SUB_LAST_CONFLICT (subscript) = last_conflicts;
3531
3532 /* If there is any undetermined conflict function we have to
3533 give a conservative answer in case we cannot prove that
3534 no dependence exists when analyzing another subscript. */
3535 if (CF_NOT_KNOWN_P (overlaps_a)
3536 || CF_NOT_KNOWN_P (overlaps_b))
3537 {
3538 res = chrec_dont_know;
3539 continue;
3540 }
3541
3542 /* When there is a subscript with no dependence we can stop. */
3543 else if (CF_NO_DEPENDENCE_P (overlaps_a)
3544 || CF_NO_DEPENDENCE_P (overlaps_b))
3545 {
3546 res = chrec_known;
3547 break;
3548 }
3549 }
3550
3551 if (res == NULL_TREE)
3552 return true;
3553
3554 if (res == chrec_known)
3555 dependence_stats.num_dependence_independent++;
3556 else
3557 dependence_stats.num_dependence_undetermined++;
3558 finalize_ddr_dependent (ddr, res);
3559 return false;
3560 }
3561
3562 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
3563
3564 static void
subscript_dependence_tester(struct data_dependence_relation * ddr,struct loop * loop_nest)3565 subscript_dependence_tester (struct data_dependence_relation *ddr,
3566 struct loop *loop_nest)
3567 {
3568 if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr), loop_nest))
3569 dependence_stats.num_dependence_dependent++;
3570
3571 compute_subscript_distance (ddr);
3572 if (build_classic_dist_vector (ddr, loop_nest))
3573 build_classic_dir_vector (ddr);
3574 }
3575
3576 /* Returns true when all the access functions of A are affine or
3577 constant with respect to LOOP_NEST. */
3578
3579 static bool
access_functions_are_affine_or_constant_p(const struct data_reference * a,const struct loop * loop_nest)3580 access_functions_are_affine_or_constant_p (const struct data_reference *a,
3581 const struct loop *loop_nest)
3582 {
3583 unsigned int i;
3584 vec<tree> fns = DR_ACCESS_FNS (a);
3585 tree t;
3586
3587 FOR_EACH_VEC_ELT (fns, i, t)
3588 if (!evolution_function_is_invariant_p (t, loop_nest->num)
3589 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
3590 return false;
3591
3592 return true;
3593 }
3594
3595 /* Initializes an equation for an OMEGA problem using the information
3596 contained in the ACCESS_FUN. Returns true when the operation
3597 succeeded.
3598
3599 PB is the omega constraint system.
3600 EQ is the number of the equation to be initialized.
3601 OFFSET is used for shifting the variables names in the constraints:
3602 a constrain is composed of 2 * the number of variables surrounding
3603 dependence accesses. OFFSET is set either to 0 for the first n variables,
3604 then it is set to n.
3605 ACCESS_FUN is expected to be an affine chrec. */
3606
3607 static bool
init_omega_eq_with_af(omega_pb pb,unsigned eq,unsigned int offset,tree access_fun,struct data_dependence_relation * ddr)3608 init_omega_eq_with_af (omega_pb pb, unsigned eq,
3609 unsigned int offset, tree access_fun,
3610 struct data_dependence_relation *ddr)
3611 {
3612 switch (TREE_CODE (access_fun))
3613 {
3614 case POLYNOMIAL_CHREC:
3615 {
3616 tree left = CHREC_LEFT (access_fun);
3617 tree right = CHREC_RIGHT (access_fun);
3618 int var = CHREC_VARIABLE (access_fun);
3619 unsigned var_idx;
3620
3621 if (TREE_CODE (right) != INTEGER_CST)
3622 return false;
3623
3624 var_idx = index_in_loop_nest (var, DDR_LOOP_NEST (ddr));
3625 pb->eqs[eq].coef[offset + var_idx + 1] = int_cst_value (right);
3626
3627 /* Compute the innermost loop index. */
3628 DDR_INNER_LOOP (ddr) = MAX (DDR_INNER_LOOP (ddr), var_idx);
3629
3630 if (offset == 0)
3631 pb->eqs[eq].coef[var_idx + DDR_NB_LOOPS (ddr) + 1]
3632 += int_cst_value (right);
3633
3634 switch (TREE_CODE (left))
3635 {
3636 case POLYNOMIAL_CHREC:
3637 return init_omega_eq_with_af (pb, eq, offset, left, ddr);
3638
3639 case INTEGER_CST:
3640 pb->eqs[eq].coef[0] += int_cst_value (left);
3641 return true;
3642
3643 default:
3644 return false;
3645 }
3646 }
3647
3648 case INTEGER_CST:
3649 pb->eqs[eq].coef[0] += int_cst_value (access_fun);
3650 return true;
3651
3652 default:
3653 return false;
3654 }
3655 }
3656
3657 /* As explained in the comments preceding init_omega_for_ddr, we have
3658 to set up a system for each loop level, setting outer loops
3659 variation to zero, and current loop variation to positive or zero.
3660 Save each lexico positive distance vector. */
3661
3662 static void
omega_extract_distance_vectors(omega_pb pb,struct data_dependence_relation * ddr)3663 omega_extract_distance_vectors (omega_pb pb,
3664 struct data_dependence_relation *ddr)
3665 {
3666 int eq, geq;
3667 unsigned i, j;
3668 struct loop *loopi, *loopj;
3669 enum omega_result res;
3670
3671 /* Set a new problem for each loop in the nest. The basis is the
3672 problem that we have initialized until now. On top of this we
3673 add new constraints. */
3674 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3675 && DDR_LOOP_NEST (ddr).iterate (i, &loopi); i++)
3676 {
3677 int dist = 0;
3678 omega_pb copy = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr),
3679 DDR_NB_LOOPS (ddr));
3680
3681 omega_copy_problem (copy, pb);
3682
3683 /* For all the outer loops "loop_j", add "dj = 0". */
3684 for (j = 0; j < i && DDR_LOOP_NEST (ddr).iterate (j, &loopj); j++)
3685 {
3686 eq = omega_add_zero_eq (copy, omega_black);
3687 copy->eqs[eq].coef[j + 1] = 1;
3688 }
3689
3690 /* For "loop_i", add "0 <= di". */
3691 geq = omega_add_zero_geq (copy, omega_black);
3692 copy->geqs[geq].coef[i + 1] = 1;
3693
3694 /* Reduce the constraint system, and test that the current
3695 problem is feasible. */
3696 res = omega_simplify_problem (copy);
3697 if (res == omega_false
3698 || res == omega_unknown
3699 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3700 goto next_problem;
3701
3702 for (eq = 0; eq < copy->num_subs; eq++)
3703 if (copy->subs[eq].key == (int) i + 1)
3704 {
3705 dist = copy->subs[eq].coef[0];
3706 goto found_dist;
3707 }
3708
3709 if (dist == 0)
3710 {
3711 /* Reinitialize problem... */
3712 omega_copy_problem (copy, pb);
3713 for (j = 0; j < i && DDR_LOOP_NEST (ddr).iterate (j, &loopj); j++)
3714 {
3715 eq = omega_add_zero_eq (copy, omega_black);
3716 copy->eqs[eq].coef[j + 1] = 1;
3717 }
3718
3719 /* ..., but this time "di = 1". */
3720 eq = omega_add_zero_eq (copy, omega_black);
3721 copy->eqs[eq].coef[i + 1] = 1;
3722 copy->eqs[eq].coef[0] = -1;
3723
3724 res = omega_simplify_problem (copy);
3725 if (res == omega_false
3726 || res == omega_unknown
3727 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3728 goto next_problem;
3729
3730 for (eq = 0; eq < copy->num_subs; eq++)
3731 if (copy->subs[eq].key == (int) i + 1)
3732 {
3733 dist = copy->subs[eq].coef[0];
3734 goto found_dist;
3735 }
3736 }
3737
3738 found_dist:;
3739 /* Save the lexicographically positive distance vector. */
3740 if (dist >= 0)
3741 {
3742 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3743 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3744
3745 dist_v[i] = dist;
3746
3747 for (eq = 0; eq < copy->num_subs; eq++)
3748 if (copy->subs[eq].key > 0)
3749 {
3750 dist = copy->subs[eq].coef[0];
3751 dist_v[copy->subs[eq].key - 1] = dist;
3752 }
3753
3754 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3755 dir_v[j] = dir_from_dist (dist_v[j]);
3756
3757 save_dist_v (ddr, dist_v);
3758 save_dir_v (ddr, dir_v);
3759 }
3760
3761 next_problem:;
3762 omega_free_problem (copy);
3763 }
3764 }
3765
3766 /* This is called for each subscript of a tuple of data references:
3767 insert an equality for representing the conflicts. */
3768
3769 static bool
omega_setup_subscript(tree access_fun_a,tree access_fun_b,struct data_dependence_relation * ddr,omega_pb pb,bool * maybe_dependent)3770 omega_setup_subscript (tree access_fun_a, tree access_fun_b,
3771 struct data_dependence_relation *ddr,
3772 omega_pb pb, bool *maybe_dependent)
3773 {
3774 int eq;
3775 tree type = signed_type_for_types (TREE_TYPE (access_fun_a),
3776 TREE_TYPE (access_fun_b));
3777 tree fun_a = chrec_convert (type, access_fun_a, NULL);
3778 tree fun_b = chrec_convert (type, access_fun_b, NULL);
3779 tree difference = chrec_fold_minus (type, fun_a, fun_b);
3780 tree minus_one;
3781
3782 /* When the fun_a - fun_b is not constant, the dependence is not
3783 captured by the classic distance vector representation. */
3784 if (TREE_CODE (difference) != INTEGER_CST)
3785 return false;
3786
3787 /* ZIV test. */
3788 if (ziv_subscript_p (fun_a, fun_b) && !integer_zerop (difference))
3789 {
3790 /* There is no dependence. */
3791 *maybe_dependent = false;
3792 return true;
3793 }
3794
3795 minus_one = build_int_cst (type, -1);
3796 fun_b = chrec_fold_multiply (type, fun_b, minus_one);
3797
3798 eq = omega_add_zero_eq (pb, omega_black);
3799 if (!init_omega_eq_with_af (pb, eq, DDR_NB_LOOPS (ddr), fun_a, ddr)
3800 || !init_omega_eq_with_af (pb, eq, 0, fun_b, ddr))
3801 /* There is probably a dependence, but the system of
3802 constraints cannot be built: answer "don't know". */
3803 return false;
3804
3805 /* GCD test. */
3806 if (DDR_NB_LOOPS (ddr) != 0 && pb->eqs[eq].coef[0]
3807 && !int_divides_p (lambda_vector_gcd
3808 ((lambda_vector) &(pb->eqs[eq].coef[1]),
3809 2 * DDR_NB_LOOPS (ddr)),
3810 pb->eqs[eq].coef[0]))
3811 {
3812 /* There is no dependence. */
3813 *maybe_dependent = false;
3814 return true;
3815 }
3816
3817 return true;
3818 }
3819
3820 /* Helper function, same as init_omega_for_ddr but specialized for
3821 data references A and B. */
3822
3823 static bool
init_omega_for_ddr_1(struct data_reference * dra,struct data_reference * drb,struct data_dependence_relation * ddr,omega_pb pb,bool * maybe_dependent)3824 init_omega_for_ddr_1 (struct data_reference *dra, struct data_reference *drb,
3825 struct data_dependence_relation *ddr,
3826 omega_pb pb, bool *maybe_dependent)
3827 {
3828 unsigned i;
3829 int ineq;
3830 struct loop *loopi;
3831 unsigned nb_loops = DDR_NB_LOOPS (ddr);
3832
3833 /* Insert an equality per subscript. */
3834 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3835 {
3836 if (!omega_setup_subscript (DR_ACCESS_FN (dra, i), DR_ACCESS_FN (drb, i),
3837 ddr, pb, maybe_dependent))
3838 return false;
3839 else if (*maybe_dependent == false)
3840 {
3841 /* There is no dependence. */
3842 DDR_ARE_DEPENDENT (ddr) = chrec_known;
3843 return true;
3844 }
3845 }
3846
3847 /* Insert inequalities: constraints corresponding to the iteration
3848 domain, i.e. the loops surrounding the references "loop_x" and
3849 the distance variables "dx". The layout of the OMEGA
3850 representation is as follows:
3851 - coef[0] is the constant
3852 - coef[1..nb_loops] are the protected variables that will not be
3853 removed by the solver: the "dx"
3854 - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
3855 */
3856 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3857 && DDR_LOOP_NEST (ddr).iterate (i, &loopi); i++)
3858 {
3859 HOST_WIDE_INT nbi = max_stmt_executions_int (loopi);
3860
3861 /* 0 <= loop_x */
3862 ineq = omega_add_zero_geq (pb, omega_black);
3863 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3864
3865 /* 0 <= loop_x + dx */
3866 ineq = omega_add_zero_geq (pb, omega_black);
3867 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3868 pb->geqs[ineq].coef[i + 1] = 1;
3869
3870 if (nbi != -1)
3871 {
3872 /* loop_x <= nb_iters */
3873 ineq = omega_add_zero_geq (pb, omega_black);
3874 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3875 pb->geqs[ineq].coef[0] = nbi;
3876
3877 /* loop_x + dx <= nb_iters */
3878 ineq = omega_add_zero_geq (pb, omega_black);
3879 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3880 pb->geqs[ineq].coef[i + 1] = -1;
3881 pb->geqs[ineq].coef[0] = nbi;
3882
3883 /* A step "dx" bigger than nb_iters is not feasible, so
3884 add "0 <= nb_iters + dx", */
3885 ineq = omega_add_zero_geq (pb, omega_black);
3886 pb->geqs[ineq].coef[i + 1] = 1;
3887 pb->geqs[ineq].coef[0] = nbi;
3888 /* and "dx <= nb_iters". */
3889 ineq = omega_add_zero_geq (pb, omega_black);
3890 pb->geqs[ineq].coef[i + 1] = -1;
3891 pb->geqs[ineq].coef[0] = nbi;
3892 }
3893 }
3894
3895 omega_extract_distance_vectors (pb, ddr);
3896
3897 return true;
3898 }
3899
3900 /* Sets up the Omega dependence problem for the data dependence
3901 relation DDR. Returns false when the constraint system cannot be
3902 built, ie. when the test answers "don't know". Returns true
3903 otherwise, and when independence has been proved (using one of the
3904 trivial dependence test), set MAYBE_DEPENDENT to false, otherwise
3905 set MAYBE_DEPENDENT to true.
3906
3907 Example: for setting up the dependence system corresponding to the
3908 conflicting accesses
3909
3910 | loop_i
3911 | loop_j
3912 | A[i, i+1] = ...
3913 | ... A[2*j, 2*(i + j)]
3914 | endloop_j
3915 | endloop_i
3916
3917 the following constraints come from the iteration domain:
3918
3919 0 <= i <= Ni
3920 0 <= i + di <= Ni
3921 0 <= j <= Nj
3922 0 <= j + dj <= Nj
3923
3924 where di, dj are the distance variables. The constraints
3925 representing the conflicting elements are:
3926
3927 i = 2 * (j + dj)
3928 i + 1 = 2 * (i + di + j + dj)
3929
3930 For asking that the resulting distance vector (di, dj) be
3931 lexicographically positive, we insert the constraint "di >= 0". If
3932 "di = 0" in the solution, we fix that component to zero, and we
3933 look at the inner loops: we set a new problem where all the outer
3934 loop distances are zero, and fix this inner component to be
3935 positive. When one of the components is positive, we save that
3936 distance, and set a new problem where the distance on this loop is
3937 zero, searching for other distances in the inner loops. Here is
3938 the classic example that illustrates that we have to set for each
3939 inner loop a new problem:
3940
3941 | loop_1
3942 | loop_2
3943 | A[10]
3944 | endloop_2
3945 | endloop_1
3946
3947 we have to save two distances (1, 0) and (0, 1).
3948
3949 Given two array references, refA and refB, we have to set the
3950 dependence problem twice, refA vs. refB and refB vs. refA, and we
3951 cannot do a single test, as refB might occur before refA in the
3952 inner loops, and the contrary when considering outer loops: ex.
3953
3954 | loop_0
3955 | loop_1
3956 | loop_2
3957 | T[{1,+,1}_2][{1,+,1}_1] // refA
3958 | T[{2,+,1}_2][{0,+,1}_1] // refB
3959 | endloop_2
3960 | endloop_1
3961 | endloop_0
3962
3963 refB touches the elements in T before refA, and thus for the same
3964 loop_0 refB precedes refA: ie. the distance vector (0, 1, -1)
3965 but for successive loop_0 iterations, we have (1, -1, 1)
3966
3967 The Omega solver expects the distance variables ("di" in the
3968 previous example) to come first in the constraint system (as
3969 variables to be protected, or "safe" variables), the constraint
3970 system is built using the following layout:
3971
3972 "cst | distance vars | index vars".
3973 */
3974
3975 static bool
init_omega_for_ddr(struct data_dependence_relation * ddr,bool * maybe_dependent)3976 init_omega_for_ddr (struct data_dependence_relation *ddr,
3977 bool *maybe_dependent)
3978 {
3979 omega_pb pb;
3980 bool res = false;
3981
3982 *maybe_dependent = true;
3983
3984 if (same_access_functions (ddr))
3985 {
3986 unsigned j;
3987 lambda_vector dir_v;
3988
3989 /* Save the 0 vector. */
3990 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3991 dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3992 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3993 dir_v[j] = dir_equal;
3994 save_dir_v (ddr, dir_v);
3995
3996 /* Save the dependences carried by outer loops. */
3997 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3998 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3999 maybe_dependent);
4000 omega_free_problem (pb);
4001 return res;
4002 }
4003
4004 /* Omega expects the protected variables (those that have to be kept
4005 after elimination) to appear first in the constraint system.
4006 These variables are the distance variables. In the following
4007 initialization we declare NB_LOOPS safe variables, and the total
4008 number of variables for the constraint system is 2*NB_LOOPS. */
4009 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
4010 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
4011 maybe_dependent);
4012 omega_free_problem (pb);
4013
4014 /* Stop computation if not decidable, or no dependence. */
4015 if (res == false || *maybe_dependent == false)
4016 return res;
4017
4018 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
4019 res = init_omega_for_ddr_1 (DDR_B (ddr), DDR_A (ddr), ddr, pb,
4020 maybe_dependent);
4021 omega_free_problem (pb);
4022
4023 return res;
4024 }
4025
4026 /* Return true when DDR contains the same information as that stored
4027 in DIR_VECTS and in DIST_VECTS, return false otherwise. */
4028
4029 static bool
ddr_consistent_p(FILE * file,struct data_dependence_relation * ddr,vec<lambda_vector> dist_vects,vec<lambda_vector> dir_vects)4030 ddr_consistent_p (FILE *file,
4031 struct data_dependence_relation *ddr,
4032 vec<lambda_vector> dist_vects,
4033 vec<lambda_vector> dir_vects)
4034 {
4035 unsigned int i, j;
4036
4037 /* If dump_file is set, output there. */
4038 if (dump_file && (dump_flags & TDF_DETAILS))
4039 file = dump_file;
4040
4041 if (dist_vects.length () != DDR_NUM_DIST_VECTS (ddr))
4042 {
4043 lambda_vector b_dist_v;
4044 fprintf (file, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n",
4045 dist_vects.length (),
4046 DDR_NUM_DIST_VECTS (ddr));
4047
4048 fprintf (file, "Banerjee dist vectors:\n");
4049 FOR_EACH_VEC_ELT (dist_vects, i, b_dist_v)
4050 print_lambda_vector (file, b_dist_v, DDR_NB_LOOPS (ddr));
4051
4052 fprintf (file, "Omega dist vectors:\n");
4053 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
4054 print_lambda_vector (file, DDR_DIST_VECT (ddr, i), DDR_NB_LOOPS (ddr));
4055
4056 fprintf (file, "data dependence relation:\n");
4057 dump_data_dependence_relation (file, ddr);
4058
4059 fprintf (file, ")\n");
4060 return false;
4061 }
4062
4063 if (dir_vects.length () != DDR_NUM_DIR_VECTS (ddr))
4064 {
4065 fprintf (file, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n",
4066 dir_vects.length (),
4067 DDR_NUM_DIR_VECTS (ddr));
4068 return false;
4069 }
4070
4071 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
4072 {
4073 lambda_vector a_dist_v;
4074 lambda_vector b_dist_v = DDR_DIST_VECT (ddr, i);
4075
4076 /* Distance vectors are not ordered in the same way in the DDR
4077 and in the DIST_VECTS: search for a matching vector. */
4078 FOR_EACH_VEC_ELT (dist_vects, j, a_dist_v)
4079 if (lambda_vector_equal (a_dist_v, b_dist_v, DDR_NB_LOOPS (ddr)))
4080 break;
4081
4082 if (j == dist_vects.length ())
4083 {
4084 fprintf (file, "\n(Dist vectors from the first dependence analyzer:\n");
4085 print_dist_vectors (file, dist_vects, DDR_NB_LOOPS (ddr));
4086 fprintf (file, "not found in Omega dist vectors:\n");
4087 print_dist_vectors (file, DDR_DIST_VECTS (ddr), DDR_NB_LOOPS (ddr));
4088 fprintf (file, "data dependence relation:\n");
4089 dump_data_dependence_relation (file, ddr);
4090 fprintf (file, ")\n");
4091 }
4092 }
4093
4094 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
4095 {
4096 lambda_vector a_dir_v;
4097 lambda_vector b_dir_v = DDR_DIR_VECT (ddr, i);
4098
4099 /* Direction vectors are not ordered in the same way in the DDR
4100 and in the DIR_VECTS: search for a matching vector. */
4101 FOR_EACH_VEC_ELT (dir_vects, j, a_dir_v)
4102 if (lambda_vector_equal (a_dir_v, b_dir_v, DDR_NB_LOOPS (ddr)))
4103 break;
4104
4105 if (j == dist_vects.length ())
4106 {
4107 fprintf (file, "\n(Dir vectors from the first dependence analyzer:\n");
4108 print_dir_vectors (file, dir_vects, DDR_NB_LOOPS (ddr));
4109 fprintf (file, "not found in Omega dir vectors:\n");
4110 print_dir_vectors (file, DDR_DIR_VECTS (ddr), DDR_NB_LOOPS (ddr));
4111 fprintf (file, "data dependence relation:\n");
4112 dump_data_dependence_relation (file, ddr);
4113 fprintf (file, ")\n");
4114 }
4115 }
4116
4117 return true;
4118 }
4119
4120 /* This computes the affine dependence relation between A and B with
4121 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
4122 independence between two accesses, while CHREC_DONT_KNOW is used
4123 for representing the unknown relation.
4124
4125 Note that it is possible to stop the computation of the dependence
4126 relation the first time we detect a CHREC_KNOWN element for a given
4127 subscript. */
4128
4129 void
compute_affine_dependence(struct data_dependence_relation * ddr,struct loop * loop_nest)4130 compute_affine_dependence (struct data_dependence_relation *ddr,
4131 struct loop *loop_nest)
4132 {
4133 struct data_reference *dra = DDR_A (ddr);
4134 struct data_reference *drb = DDR_B (ddr);
4135
4136 if (dump_file && (dump_flags & TDF_DETAILS))
4137 {
4138 fprintf (dump_file, "(compute_affine_dependence\n");
4139 fprintf (dump_file, " stmt_a: ");
4140 print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
4141 fprintf (dump_file, " stmt_b: ");
4142 print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
4143 }
4144
4145 /* Analyze only when the dependence relation is not yet known. */
4146 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4147 {
4148 dependence_stats.num_dependence_tests++;
4149
4150 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
4151 && access_functions_are_affine_or_constant_p (drb, loop_nest))
4152 {
4153 subscript_dependence_tester (ddr, loop_nest);
4154
4155 if (flag_check_data_deps)
4156 {
4157 /* Dump the dependences from the first algorithm. */
4158 if (dump_file && (dump_flags & TDF_DETAILS))
4159 {
4160 fprintf (dump_file, "\n\nBanerjee Analyzer\n");
4161 dump_data_dependence_relation (dump_file, ddr);
4162 }
4163
4164 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4165 {
4166 bool maybe_dependent;
4167 vec<lambda_vector> dir_vects, dist_vects;
4168
4169 /* Save the result of the first DD analyzer. */
4170 dist_vects = DDR_DIST_VECTS (ddr);
4171 dir_vects = DDR_DIR_VECTS (ddr);
4172
4173 /* Reset the information. */
4174 DDR_DIST_VECTS (ddr).create (0);
4175 DDR_DIR_VECTS (ddr).create (0);
4176
4177 /* Compute the same information using Omega. */
4178 if (!init_omega_for_ddr (ddr, &maybe_dependent))
4179 goto csys_dont_know;
4180
4181 if (dump_file && (dump_flags & TDF_DETAILS))
4182 {
4183 fprintf (dump_file, "Omega Analyzer\n");
4184 dump_data_dependence_relation (dump_file, ddr);
4185 }
4186
4187 /* Check that we get the same information. */
4188 if (maybe_dependent)
4189 gcc_assert (ddr_consistent_p (stderr, ddr, dist_vects,
4190 dir_vects));
4191 }
4192 }
4193 }
4194
4195 /* As a last case, if the dependence cannot be determined, or if
4196 the dependence is considered too difficult to determine, answer
4197 "don't know". */
4198 else
4199 {
4200 csys_dont_know:;
4201 dependence_stats.num_dependence_undetermined++;
4202
4203 if (dump_file && (dump_flags & TDF_DETAILS))
4204 {
4205 fprintf (dump_file, "Data ref a:\n");
4206 dump_data_reference (dump_file, dra);
4207 fprintf (dump_file, "Data ref b:\n");
4208 dump_data_reference (dump_file, drb);
4209 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
4210 }
4211 finalize_ddr_dependent (ddr, chrec_dont_know);
4212 }
4213 }
4214
4215 if (dump_file && (dump_flags & TDF_DETAILS))
4216 {
4217 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4218 fprintf (dump_file, ") -> no dependence\n");
4219 else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
4220 fprintf (dump_file, ") -> dependence analysis failed\n");
4221 else
4222 fprintf (dump_file, ")\n");
4223 }
4224 }
4225
4226 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4227 the data references in DATAREFS, in the LOOP_NEST. When
4228 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4229 relations. Return true when successful, i.e. data references number
4230 is small enough to be handled. */
4231
4232 bool
compute_all_dependences(vec<data_reference_p> datarefs,vec<ddr_p> * dependence_relations,vec<loop_p> loop_nest,bool compute_self_and_rr)4233 compute_all_dependences (vec<data_reference_p> datarefs,
4234 vec<ddr_p> *dependence_relations,
4235 vec<loop_p> loop_nest,
4236 bool compute_self_and_rr)
4237 {
4238 struct data_dependence_relation *ddr;
4239 struct data_reference *a, *b;
4240 unsigned int i, j;
4241
4242 if ((int) datarefs.length ()
4243 > PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS))
4244 {
4245 struct data_dependence_relation *ddr;
4246
4247 /* Insert a single relation into dependence_relations:
4248 chrec_dont_know. */
4249 ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
4250 dependence_relations->safe_push (ddr);
4251 return false;
4252 }
4253
4254 FOR_EACH_VEC_ELT (datarefs, i, a)
4255 for (j = i + 1; datarefs.iterate (j, &b); j++)
4256 if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
4257 {
4258 ddr = initialize_data_dependence_relation (a, b, loop_nest);
4259 dependence_relations->safe_push (ddr);
4260 if (loop_nest.exists ())
4261 compute_affine_dependence (ddr, loop_nest[0]);
4262 }
4263
4264 if (compute_self_and_rr)
4265 FOR_EACH_VEC_ELT (datarefs, i, a)
4266 {
4267 ddr = initialize_data_dependence_relation (a, a, loop_nest);
4268 dependence_relations->safe_push (ddr);
4269 if (loop_nest.exists ())
4270 compute_affine_dependence (ddr, loop_nest[0]);
4271 }
4272
4273 return true;
4274 }
4275
4276 /* Describes a location of a memory reference. */
4277
4278 typedef struct data_ref_loc_d
4279 {
4280 /* Position of the memory reference. */
4281 tree *pos;
4282
4283 /* True if the memory reference is read. */
4284 bool is_read;
4285 } data_ref_loc;
4286
4287
4288 /* Stores the locations of memory references in STMT to REFERENCES. Returns
4289 true if STMT clobbers memory, false otherwise. */
4290
4291 static bool
get_references_in_stmt(gimple stmt,vec<data_ref_loc> * references)4292 get_references_in_stmt (gimple stmt, vec<data_ref_loc> *references)
4293 {
4294 bool clobbers_memory = false;
4295 data_ref_loc ref;
4296 tree *op0, *op1;
4297 enum gimple_code stmt_code = gimple_code (stmt);
4298
4299 references->create (0);
4300
4301 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4302 As we cannot model data-references to not spelled out
4303 accesses give up if they may occur. */
4304 if ((stmt_code == GIMPLE_CALL
4305 && !(gimple_call_flags (stmt) & ECF_CONST))
4306 || (stmt_code == GIMPLE_ASM
4307 && (gimple_asm_volatile_p (stmt) || gimple_vuse (stmt))))
4308 clobbers_memory = true;
4309
4310 if (!gimple_vuse (stmt))
4311 return clobbers_memory;
4312
4313 if (stmt_code == GIMPLE_ASSIGN)
4314 {
4315 tree base;
4316 op0 = gimple_assign_lhs_ptr (stmt);
4317 op1 = gimple_assign_rhs1_ptr (stmt);
4318
4319 if (DECL_P (*op1)
4320 || (REFERENCE_CLASS_P (*op1)
4321 && (base = get_base_address (*op1))
4322 && TREE_CODE (base) != SSA_NAME))
4323 {
4324 ref.pos = op1;
4325 ref.is_read = true;
4326 references->safe_push (ref);
4327 }
4328 }
4329 else if (stmt_code == GIMPLE_CALL)
4330 {
4331 unsigned i, n;
4332
4333 op0 = gimple_call_lhs_ptr (stmt);
4334 n = gimple_call_num_args (stmt);
4335 for (i = 0; i < n; i++)
4336 {
4337 op1 = gimple_call_arg_ptr (stmt, i);
4338
4339 if (DECL_P (*op1)
4340 || (REFERENCE_CLASS_P (*op1) && get_base_address (*op1)))
4341 {
4342 ref.pos = op1;
4343 ref.is_read = true;
4344 references->safe_push (ref);
4345 }
4346 }
4347 }
4348 else
4349 return clobbers_memory;
4350
4351 if (*op0
4352 && (DECL_P (*op0)
4353 || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0))))
4354 {
4355 ref.pos = op0;
4356 ref.is_read = false;
4357 references->safe_push (ref);
4358 }
4359 return clobbers_memory;
4360 }
4361
4362 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
4363 reference, returns false, otherwise returns true. NEST is the outermost
4364 loop of the loop nest in which the references should be analyzed. */
4365
4366 bool
find_data_references_in_stmt(struct loop * nest,gimple stmt,vec<data_reference_p> * datarefs)4367 find_data_references_in_stmt (struct loop *nest, gimple stmt,
4368 vec<data_reference_p> *datarefs)
4369 {
4370 unsigned i;
4371 vec<data_ref_loc> references;
4372 data_ref_loc *ref;
4373 bool ret = true;
4374 data_reference_p dr;
4375
4376 if (get_references_in_stmt (stmt, &references))
4377 {
4378 references.release ();
4379 return false;
4380 }
4381
4382 FOR_EACH_VEC_ELT (references, i, ref)
4383 {
4384 dr = create_data_ref (nest, loop_containing_stmt (stmt),
4385 *ref->pos, stmt, ref->is_read);
4386 gcc_assert (dr != NULL);
4387 datarefs->safe_push (dr);
4388 }
4389 references.release ();
4390 return ret;
4391 }
4392
4393 /* Stores the data references in STMT to DATAREFS. If there is an
4394 unanalyzable reference, returns false, otherwise returns true.
4395 NEST is the outermost loop of the loop nest in which the references
4396 should be instantiated, LOOP is the loop in which the references
4397 should be analyzed. */
4398
4399 bool
graphite_find_data_references_in_stmt(loop_p nest,loop_p loop,gimple stmt,vec<data_reference_p> * datarefs)4400 graphite_find_data_references_in_stmt (loop_p nest, loop_p loop, gimple stmt,
4401 vec<data_reference_p> *datarefs)
4402 {
4403 unsigned i;
4404 vec<data_ref_loc> references;
4405 data_ref_loc *ref;
4406 bool ret = true;
4407 data_reference_p dr;
4408
4409 if (get_references_in_stmt (stmt, &references))
4410 {
4411 references.release ();
4412 return false;
4413 }
4414
4415 FOR_EACH_VEC_ELT (references, i, ref)
4416 {
4417 dr = create_data_ref (nest, loop, *ref->pos, stmt, ref->is_read);
4418 gcc_assert (dr != NULL);
4419 datarefs->safe_push (dr);
4420 }
4421
4422 references.release ();
4423 return ret;
4424 }
4425
4426 /* Search the data references in LOOP, and record the information into
4427 DATAREFS. Returns chrec_dont_know when failing to analyze a
4428 difficult case, returns NULL_TREE otherwise. */
4429
4430 tree
find_data_references_in_bb(struct loop * loop,basic_block bb,vec<data_reference_p> * datarefs)4431 find_data_references_in_bb (struct loop *loop, basic_block bb,
4432 vec<data_reference_p> *datarefs)
4433 {
4434 gimple_stmt_iterator bsi;
4435
4436 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4437 {
4438 gimple stmt = gsi_stmt (bsi);
4439
4440 if (!find_data_references_in_stmt (loop, stmt, datarefs))
4441 {
4442 struct data_reference *res;
4443 res = XCNEW (struct data_reference);
4444 datarefs->safe_push (res);
4445
4446 return chrec_dont_know;
4447 }
4448 }
4449
4450 return NULL_TREE;
4451 }
4452
4453 /* Search the data references in LOOP, and record the information into
4454 DATAREFS. Returns chrec_dont_know when failing to analyze a
4455 difficult case, returns NULL_TREE otherwise.
4456
4457 TODO: This function should be made smarter so that it can handle address
4458 arithmetic as if they were array accesses, etc. */
4459
4460 static tree
find_data_references_in_loop(struct loop * loop,vec<data_reference_p> * datarefs)4461 find_data_references_in_loop (struct loop *loop,
4462 vec<data_reference_p> *datarefs)
4463 {
4464 basic_block bb, *bbs;
4465 unsigned int i;
4466
4467 bbs = get_loop_body_in_dom_order (loop);
4468
4469 for (i = 0; i < loop->num_nodes; i++)
4470 {
4471 bb = bbs[i];
4472
4473 if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
4474 {
4475 free (bbs);
4476 return chrec_dont_know;
4477 }
4478 }
4479 free (bbs);
4480
4481 return NULL_TREE;
4482 }
4483
4484 /* Recursive helper function. */
4485
4486 static bool
find_loop_nest_1(struct loop * loop,vec<loop_p> * loop_nest)4487 find_loop_nest_1 (struct loop *loop, vec<loop_p> *loop_nest)
4488 {
4489 /* Inner loops of the nest should not contain siblings. Example:
4490 when there are two consecutive loops,
4491
4492 | loop_0
4493 | loop_1
4494 | A[{0, +, 1}_1]
4495 | endloop_1
4496 | loop_2
4497 | A[{0, +, 1}_2]
4498 | endloop_2
4499 | endloop_0
4500
4501 the dependence relation cannot be captured by the distance
4502 abstraction. */
4503 if (loop->next)
4504 return false;
4505
4506 loop_nest->safe_push (loop);
4507 if (loop->inner)
4508 return find_loop_nest_1 (loop->inner, loop_nest);
4509 return true;
4510 }
4511
4512 /* Return false when the LOOP is not well nested. Otherwise return
4513 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
4514 contain the loops from the outermost to the innermost, as they will
4515 appear in the classic distance vector. */
4516
4517 bool
find_loop_nest(struct loop * loop,vec<loop_p> * loop_nest)4518 find_loop_nest (struct loop *loop, vec<loop_p> *loop_nest)
4519 {
4520 loop_nest->safe_push (loop);
4521 if (loop->inner)
4522 return find_loop_nest_1 (loop->inner, loop_nest);
4523 return true;
4524 }
4525
4526 /* Returns true when the data dependences have been computed, false otherwise.
4527 Given a loop nest LOOP, the following vectors are returned:
4528 DATAREFS is initialized to all the array elements contained in this loop,
4529 DEPENDENCE_RELATIONS contains the relations between the data references.
4530 Compute read-read and self relations if
4531 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4532
4533 bool
compute_data_dependences_for_loop(struct loop * loop,bool compute_self_and_read_read_dependences,vec<loop_p> * loop_nest,vec<data_reference_p> * datarefs,vec<ddr_p> * dependence_relations)4534 compute_data_dependences_for_loop (struct loop *loop,
4535 bool compute_self_and_read_read_dependences,
4536 vec<loop_p> *loop_nest,
4537 vec<data_reference_p> *datarefs,
4538 vec<ddr_p> *dependence_relations)
4539 {
4540 bool res = true;
4541
4542 memset (&dependence_stats, 0, sizeof (dependence_stats));
4543
4544 /* If the loop nest is not well formed, or one of the data references
4545 is not computable, give up without spending time to compute other
4546 dependences. */
4547 if (!loop
4548 || !find_loop_nest (loop, loop_nest)
4549 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
4550 || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
4551 compute_self_and_read_read_dependences))
4552 res = false;
4553
4554 if (dump_file && (dump_flags & TDF_STATS))
4555 {
4556 fprintf (dump_file, "Dependence tester statistics:\n");
4557
4558 fprintf (dump_file, "Number of dependence tests: %d\n",
4559 dependence_stats.num_dependence_tests);
4560 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
4561 dependence_stats.num_dependence_dependent);
4562 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
4563 dependence_stats.num_dependence_independent);
4564 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
4565 dependence_stats.num_dependence_undetermined);
4566
4567 fprintf (dump_file, "Number of subscript tests: %d\n",
4568 dependence_stats.num_subscript_tests);
4569 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
4570 dependence_stats.num_subscript_undetermined);
4571 fprintf (dump_file, "Number of same subscript function: %d\n",
4572 dependence_stats.num_same_subscript_function);
4573
4574 fprintf (dump_file, "Number of ziv tests: %d\n",
4575 dependence_stats.num_ziv);
4576 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
4577 dependence_stats.num_ziv_dependent);
4578 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
4579 dependence_stats.num_ziv_independent);
4580 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
4581 dependence_stats.num_ziv_unimplemented);
4582
4583 fprintf (dump_file, "Number of siv tests: %d\n",
4584 dependence_stats.num_siv);
4585 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
4586 dependence_stats.num_siv_dependent);
4587 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
4588 dependence_stats.num_siv_independent);
4589 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
4590 dependence_stats.num_siv_unimplemented);
4591
4592 fprintf (dump_file, "Number of miv tests: %d\n",
4593 dependence_stats.num_miv);
4594 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
4595 dependence_stats.num_miv_dependent);
4596 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
4597 dependence_stats.num_miv_independent);
4598 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
4599 dependence_stats.num_miv_unimplemented);
4600 }
4601
4602 return res;
4603 }
4604
4605 /* Returns true when the data dependences for the basic block BB have been
4606 computed, false otherwise.
4607 DATAREFS is initialized to all the array elements contained in this basic
4608 block, DEPENDENCE_RELATIONS contains the relations between the data
4609 references. Compute read-read and self relations if
4610 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4611 bool
compute_data_dependences_for_bb(basic_block bb,bool compute_self_and_read_read_dependences,vec<data_reference_p> * datarefs,vec<ddr_p> * dependence_relations)4612 compute_data_dependences_for_bb (basic_block bb,
4613 bool compute_self_and_read_read_dependences,
4614 vec<data_reference_p> *datarefs,
4615 vec<ddr_p> *dependence_relations)
4616 {
4617 if (find_data_references_in_bb (NULL, bb, datarefs) == chrec_dont_know)
4618 return false;
4619
4620 return compute_all_dependences (*datarefs, dependence_relations, vNULL,
4621 compute_self_and_read_read_dependences);
4622 }
4623
4624 /* Entry point (for testing only). Analyze all the data references
4625 and the dependence relations in LOOP.
4626
4627 The data references are computed first.
4628
4629 A relation on these nodes is represented by a complete graph. Some
4630 of the relations could be of no interest, thus the relations can be
4631 computed on demand.
4632
4633 In the following function we compute all the relations. This is
4634 just a first implementation that is here for:
4635 - for showing how to ask for the dependence relations,
4636 - for the debugging the whole dependence graph,
4637 - for the dejagnu testcases and maintenance.
4638
4639 It is possible to ask only for a part of the graph, avoiding to
4640 compute the whole dependence graph. The computed dependences are
4641 stored in a knowledge base (KB) such that later queries don't
4642 recompute the same information. The implementation of this KB is
4643 transparent to the optimizer, and thus the KB can be changed with a
4644 more efficient implementation, or the KB could be disabled. */
4645 static void
analyze_all_data_dependences(struct loop * loop)4646 analyze_all_data_dependences (struct loop *loop)
4647 {
4648 unsigned int i;
4649 int nb_data_refs = 10;
4650 vec<data_reference_p> datarefs;
4651 datarefs.create (nb_data_refs);
4652 vec<ddr_p> dependence_relations;
4653 dependence_relations.create (nb_data_refs * nb_data_refs);
4654 vec<loop_p> loop_nest;
4655 loop_nest.create (3);
4656
4657 /* Compute DDs on the whole function. */
4658 compute_data_dependences_for_loop (loop, false, &loop_nest, &datarefs,
4659 &dependence_relations);
4660
4661 if (dump_file)
4662 {
4663 dump_data_dependence_relations (dump_file, dependence_relations);
4664 fprintf (dump_file, "\n\n");
4665
4666 if (dump_flags & TDF_DETAILS)
4667 dump_dist_dir_vectors (dump_file, dependence_relations);
4668
4669 if (dump_flags & TDF_STATS)
4670 {
4671 unsigned nb_top_relations = 0;
4672 unsigned nb_bot_relations = 0;
4673 unsigned nb_chrec_relations = 0;
4674 struct data_dependence_relation *ddr;
4675
4676 FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
4677 {
4678 if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr)))
4679 nb_top_relations++;
4680
4681 else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4682 nb_bot_relations++;
4683
4684 else
4685 nb_chrec_relations++;
4686 }
4687
4688 gather_stats_on_scev_database ();
4689 }
4690 }
4691
4692 loop_nest.release ();
4693 free_dependence_relations (dependence_relations);
4694 free_data_refs (datarefs);
4695 }
4696
4697 /* Computes all the data dependences and check that the results of
4698 several analyzers are the same. */
4699
4700 void
tree_check_data_deps(void)4701 tree_check_data_deps (void)
4702 {
4703 loop_iterator li;
4704 struct loop *loop_nest;
4705
4706 FOR_EACH_LOOP (li, loop_nest, 0)
4707 analyze_all_data_dependences (loop_nest);
4708 }
4709
4710 /* Free the memory used by a data dependence relation DDR. */
4711
4712 void
free_dependence_relation(struct data_dependence_relation * ddr)4713 free_dependence_relation (struct data_dependence_relation *ddr)
4714 {
4715 if (ddr == NULL)
4716 return;
4717
4718 if (DDR_SUBSCRIPTS (ddr).exists ())
4719 free_subscripts (DDR_SUBSCRIPTS (ddr));
4720 DDR_DIST_VECTS (ddr).release ();
4721 DDR_DIR_VECTS (ddr).release ();
4722
4723 free (ddr);
4724 }
4725
4726 /* Free the memory used by the data dependence relations from
4727 DEPENDENCE_RELATIONS. */
4728
4729 void
free_dependence_relations(vec<ddr_p> dependence_relations)4730 free_dependence_relations (vec<ddr_p> dependence_relations)
4731 {
4732 unsigned int i;
4733 struct data_dependence_relation *ddr;
4734
4735 FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
4736 if (ddr)
4737 free_dependence_relation (ddr);
4738
4739 dependence_relations.release ();
4740 }
4741
4742 /* Free the memory used by the data references from DATAREFS. */
4743
4744 void
free_data_refs(vec<data_reference_p> datarefs)4745 free_data_refs (vec<data_reference_p> datarefs)
4746 {
4747 unsigned int i;
4748 struct data_reference *dr;
4749
4750 FOR_EACH_VEC_ELT (datarefs, i, dr)
4751 free_data_ref (dr);
4752 datarefs.release ();
4753 }
4754
4755
4756
4757 /* Dump vertex I in RDG to FILE. */
4758
4759 static void
dump_rdg_vertex(FILE * file,struct graph * rdg,int i)4760 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
4761 {
4762 struct vertex *v = &(rdg->vertices[i]);
4763 struct graph_edge *e;
4764
4765 fprintf (file, "(vertex %d: (%s%s) (in:", i,
4766 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
4767 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
4768
4769 if (v->pred)
4770 for (e = v->pred; e; e = e->pred_next)
4771 fprintf (file, " %d", e->src);
4772
4773 fprintf (file, ") (out:");
4774
4775 if (v->succ)
4776 for (e = v->succ; e; e = e->succ_next)
4777 fprintf (file, " %d", e->dest);
4778
4779 fprintf (file, ")\n");
4780 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
4781 fprintf (file, ")\n");
4782 }
4783
4784 /* Call dump_rdg_vertex on stderr. */
4785
4786 DEBUG_FUNCTION void
debug_rdg_vertex(struct graph * rdg,int i)4787 debug_rdg_vertex (struct graph *rdg, int i)
4788 {
4789 dump_rdg_vertex (stderr, rdg, i);
4790 }
4791
4792 /* Dump component C of RDG to FILE. If DUMPED is non-null, set the
4793 dumped vertices to that bitmap. */
4794
4795 static void
dump_rdg_component(FILE * file,struct graph * rdg,int c,bitmap dumped)4796 dump_rdg_component (FILE *file, struct graph *rdg, int c, bitmap dumped)
4797 {
4798 int i;
4799
4800 fprintf (file, "(%d\n", c);
4801
4802 for (i = 0; i < rdg->n_vertices; i++)
4803 if (rdg->vertices[i].component == c)
4804 {
4805 if (dumped)
4806 bitmap_set_bit (dumped, i);
4807
4808 dump_rdg_vertex (file, rdg, i);
4809 }
4810
4811 fprintf (file, ")\n");
4812 }
4813
4814 /* Call dump_rdg_vertex on stderr. */
4815
4816 DEBUG_FUNCTION void
debug_rdg_component(struct graph * rdg,int c)4817 debug_rdg_component (struct graph *rdg, int c)
4818 {
4819 dump_rdg_component (stderr, rdg, c, NULL);
4820 }
4821
4822 /* Dump the reduced dependence graph RDG to FILE. */
4823
4824 void
dump_rdg(FILE * file,struct graph * rdg)4825 dump_rdg (FILE *file, struct graph *rdg)
4826 {
4827 int i;
4828 bitmap dumped = BITMAP_ALLOC (NULL);
4829
4830 fprintf (file, "(rdg\n");
4831
4832 for (i = 0; i < rdg->n_vertices; i++)
4833 if (!bitmap_bit_p (dumped, i))
4834 dump_rdg_component (file, rdg, rdg->vertices[i].component, dumped);
4835
4836 fprintf (file, ")\n");
4837 BITMAP_FREE (dumped);
4838 }
4839
4840 /* Call dump_rdg on stderr. */
4841
4842 DEBUG_FUNCTION void
debug_rdg(struct graph * rdg)4843 debug_rdg (struct graph *rdg)
4844 {
4845 dump_rdg (stderr, rdg);
4846 }
4847
4848 static void
dot_rdg_1(FILE * file,struct graph * rdg)4849 dot_rdg_1 (FILE *file, struct graph *rdg)
4850 {
4851 int i;
4852
4853 fprintf (file, "digraph RDG {\n");
4854
4855 for (i = 0; i < rdg->n_vertices; i++)
4856 {
4857 struct vertex *v = &(rdg->vertices[i]);
4858 struct graph_edge *e;
4859
4860 /* Highlight reads from memory. */
4861 if (RDG_MEM_READS_STMT (rdg, i))
4862 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
4863
4864 /* Highlight stores to memory. */
4865 if (RDG_MEM_WRITE_STMT (rdg, i))
4866 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
4867
4868 if (v->succ)
4869 for (e = v->succ; e; e = e->succ_next)
4870 switch (RDGE_TYPE (e))
4871 {
4872 case input_dd:
4873 fprintf (file, "%d -> %d [label=input] \n", i, e->dest);
4874 break;
4875
4876 case output_dd:
4877 fprintf (file, "%d -> %d [label=output] \n", i, e->dest);
4878 break;
4879
4880 case flow_dd:
4881 /* These are the most common dependences: don't print these. */
4882 fprintf (file, "%d -> %d \n", i, e->dest);
4883 break;
4884
4885 case anti_dd:
4886 fprintf (file, "%d -> %d [label=anti] \n", i, e->dest);
4887 break;
4888
4889 default:
4890 gcc_unreachable ();
4891 }
4892 }
4893
4894 fprintf (file, "}\n\n");
4895 }
4896
4897 /* Display the Reduced Dependence Graph using dotty. */
4898 extern void dot_rdg (struct graph *);
4899
4900 DEBUG_FUNCTION void
dot_rdg(struct graph * rdg)4901 dot_rdg (struct graph *rdg)
4902 {
4903 /* When debugging, enable the following code. This cannot be used
4904 in production compilers because it calls "system". */
4905 #if 0
4906 FILE *file = fopen ("/tmp/rdg.dot", "w");
4907 gcc_assert (file != NULL);
4908
4909 dot_rdg_1 (file, rdg);
4910 fclose (file);
4911
4912 system ("dotty /tmp/rdg.dot &");
4913 #else
4914 dot_rdg_1 (stderr, rdg);
4915 #endif
4916 }
4917
4918 /* Returns the index of STMT in RDG. */
4919
4920 int
rdg_vertex_for_stmt(struct graph * rdg ATTRIBUTE_UNUSED,gimple stmt)4921 rdg_vertex_for_stmt (struct graph *rdg ATTRIBUTE_UNUSED, gimple stmt)
4922 {
4923 int index = gimple_uid (stmt);
4924 gcc_checking_assert (index == -1 || RDG_STMT (rdg, index) == stmt);
4925 return index;
4926 }
4927
4928 /* Creates an edge in RDG for each distance vector from DDR. The
4929 order that we keep track of in the RDG is the order in which
4930 statements have to be executed. */
4931
4932 static void
create_rdg_edge_for_ddr(struct graph * rdg,ddr_p ddr)4933 create_rdg_edge_for_ddr (struct graph *rdg, ddr_p ddr)
4934 {
4935 struct graph_edge *e;
4936 int va, vb;
4937 data_reference_p dra = DDR_A (ddr);
4938 data_reference_p drb = DDR_B (ddr);
4939 unsigned level = ddr_dependence_level (ddr);
4940
4941 /* For non scalar dependences, when the dependence is REVERSED,
4942 statement B has to be executed before statement A. */
4943 if (level > 0
4944 && !DDR_REVERSED_P (ddr))
4945 {
4946 data_reference_p tmp = dra;
4947 dra = drb;
4948 drb = tmp;
4949 }
4950
4951 va = rdg_vertex_for_stmt (rdg, DR_STMT (dra));
4952 vb = rdg_vertex_for_stmt (rdg, DR_STMT (drb));
4953
4954 if (va < 0 || vb < 0)
4955 return;
4956
4957 e = add_edge (rdg, va, vb);
4958 e->data = XNEW (struct rdg_edge);
4959
4960 RDGE_LEVEL (e) = level;
4961 RDGE_RELATION (e) = ddr;
4962
4963 /* Determines the type of the data dependence. */
4964 if (DR_IS_READ (dra) && DR_IS_READ (drb))
4965 RDGE_TYPE (e) = input_dd;
4966 else if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
4967 RDGE_TYPE (e) = output_dd;
4968 else if (DR_IS_WRITE (dra) && DR_IS_READ (drb))
4969 RDGE_TYPE (e) = flow_dd;
4970 else if (DR_IS_READ (dra) && DR_IS_WRITE (drb))
4971 RDGE_TYPE (e) = anti_dd;
4972 }
4973
4974 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
4975 the index of DEF in RDG. */
4976
4977 static void
create_rdg_edges_for_scalar(struct graph * rdg,tree def,int idef)4978 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
4979 {
4980 use_operand_p imm_use_p;
4981 imm_use_iterator iterator;
4982
4983 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
4984 {
4985 struct graph_edge *e;
4986 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
4987
4988 if (use < 0)
4989 continue;
4990
4991 e = add_edge (rdg, idef, use);
4992 e->data = XNEW (struct rdg_edge);
4993 RDGE_TYPE (e) = flow_dd;
4994 RDGE_RELATION (e) = NULL;
4995 }
4996 }
4997
4998 /* Creates the edges of the reduced dependence graph RDG. */
4999
5000 static void
create_rdg_edges(struct graph * rdg,vec<ddr_p> ddrs)5001 create_rdg_edges (struct graph *rdg, vec<ddr_p> ddrs)
5002 {
5003 int i;
5004 struct data_dependence_relation *ddr;
5005 def_operand_p def_p;
5006 ssa_op_iter iter;
5007
5008 FOR_EACH_VEC_ELT (ddrs, i, ddr)
5009 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
5010 create_rdg_edge_for_ddr (rdg, ddr);
5011
5012 for (i = 0; i < rdg->n_vertices; i++)
5013 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
5014 iter, SSA_OP_DEF)
5015 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
5016 }
5017
5018 /* Build the vertices of the reduced dependence graph RDG. */
5019
5020 static void
create_rdg_vertices(struct graph * rdg,vec<gimple> stmts,loop_p loop)5021 create_rdg_vertices (struct graph *rdg, vec<gimple> stmts, loop_p loop)
5022 {
5023 int i, j;
5024 gimple stmt;
5025
5026 FOR_EACH_VEC_ELT (stmts, i, stmt)
5027 {
5028 vec<data_ref_loc> references;
5029 data_ref_loc *ref;
5030 struct vertex *v = &(rdg->vertices[i]);
5031
5032 /* Record statement to vertex mapping. */
5033 gimple_set_uid (stmt, i);
5034
5035 v->data = XNEW (struct rdg_vertex);
5036 RDGV_STMT (v) = stmt;
5037 RDGV_DATAREFS (v).create (0);
5038 RDGV_HAS_MEM_WRITE (v) = false;
5039 RDGV_HAS_MEM_READS (v) = false;
5040 if (gimple_code (stmt) == GIMPLE_PHI)
5041 continue;
5042
5043 get_references_in_stmt (stmt, &references);
5044 FOR_EACH_VEC_ELT (references, j, ref)
5045 {
5046 data_reference_p dr;
5047 if (!ref->is_read)
5048 RDGV_HAS_MEM_WRITE (v) = true;
5049 else
5050 RDGV_HAS_MEM_READS (v) = true;
5051 dr = create_data_ref (loop, loop_containing_stmt (stmt),
5052 *ref->pos, stmt, ref->is_read);
5053 if (dr)
5054 RDGV_DATAREFS (v).safe_push (dr);
5055 }
5056 references.release ();
5057 }
5058 }
5059
5060 /* Initialize STMTS with all the statements of LOOP. When
5061 INCLUDE_PHIS is true, include also the PHI nodes. The order in
5062 which we discover statements is important as
5063 generate_loops_for_partition is using the same traversal for
5064 identifying statements. */
5065
5066 static void
stmts_from_loop(struct loop * loop,vec<gimple> * stmts)5067 stmts_from_loop (struct loop *loop, vec<gimple> *stmts)
5068 {
5069 unsigned int i;
5070 basic_block *bbs = get_loop_body_in_dom_order (loop);
5071
5072 for (i = 0; i < loop->num_nodes; i++)
5073 {
5074 basic_block bb = bbs[i];
5075 gimple_stmt_iterator bsi;
5076 gimple stmt;
5077
5078 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5079 stmts->safe_push (gsi_stmt (bsi));
5080
5081 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5082 {
5083 stmt = gsi_stmt (bsi);
5084 if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
5085 stmts->safe_push (stmt);
5086 }
5087 }
5088
5089 free (bbs);
5090 }
5091
5092 /* Returns true when all the dependences are computable. */
5093
5094 static bool
known_dependences_p(vec<ddr_p> dependence_relations)5095 known_dependences_p (vec<ddr_p> dependence_relations)
5096 {
5097 ddr_p ddr;
5098 unsigned int i;
5099
5100 FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
5101 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
5102 return false;
5103
5104 return true;
5105 }
5106
5107 /* Build the Reduced Dependence Graph (RDG) with one vertex per
5108 statement of the loop nest, and one edge per data dependence or
5109 scalar dependence. */
5110
5111 struct graph *
build_empty_rdg(int n_stmts)5112 build_empty_rdg (int n_stmts)
5113 {
5114 struct graph *rdg = new_graph (n_stmts);
5115 return rdg;
5116 }
5117
5118 /* Build the Reduced Dependence Graph (RDG) with one vertex per
5119 statement of the loop nest, and one edge per data dependence or
5120 scalar dependence. */
5121
5122 struct graph *
build_rdg(struct loop * loop,vec<loop_p> * loop_nest,vec<ddr_p> * dependence_relations,vec<data_reference_p> * datarefs)5123 build_rdg (struct loop *loop,
5124 vec<loop_p> *loop_nest,
5125 vec<ddr_p> *dependence_relations,
5126 vec<data_reference_p> *datarefs)
5127 {
5128 struct graph *rdg = NULL;
5129
5130 if (compute_data_dependences_for_loop (loop, false, loop_nest, datarefs,
5131 dependence_relations)
5132 && known_dependences_p (*dependence_relations))
5133 {
5134 vec<gimple> stmts;
5135 stmts.create (10);
5136 stmts_from_loop (loop, &stmts);
5137 rdg = build_empty_rdg (stmts.length ());
5138 create_rdg_vertices (rdg, stmts, loop);
5139 create_rdg_edges (rdg, *dependence_relations);
5140 stmts.release ();
5141 }
5142
5143 return rdg;
5144 }
5145
5146 /* Free the reduced dependence graph RDG. */
5147
5148 void
free_rdg(struct graph * rdg)5149 free_rdg (struct graph *rdg)
5150 {
5151 int i;
5152
5153 for (i = 0; i < rdg->n_vertices; i++)
5154 {
5155 struct vertex *v = &(rdg->vertices[i]);
5156 struct graph_edge *e;
5157
5158 for (e = v->succ; e; e = e->succ_next)
5159 free (e->data);
5160
5161 gimple_set_uid (RDGV_STMT (v), -1);
5162 free_data_refs (RDGV_DATAREFS (v));
5163 free (v->data);
5164 }
5165
5166 free_graph (rdg);
5167 }
5168
5169 /* Determines whether the statement from vertex V of the RDG has a
5170 definition used outside the loop that contains this statement. */
5171
5172 bool
rdg_defs_used_in_other_loops_p(struct graph * rdg,int v)5173 rdg_defs_used_in_other_loops_p (struct graph *rdg, int v)
5174 {
5175 gimple stmt = RDG_STMT (rdg, v);
5176 struct loop *loop = loop_containing_stmt (stmt);
5177 use_operand_p imm_use_p;
5178 imm_use_iterator iterator;
5179 ssa_op_iter it;
5180 def_operand_p def_p;
5181
5182 if (!loop)
5183 return true;
5184
5185 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, it, SSA_OP_DEF)
5186 {
5187 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, DEF_FROM_PTR (def_p))
5188 {
5189 if (loop_containing_stmt (USE_STMT (imm_use_p)) != loop)
5190 return true;
5191 }
5192 }
5193
5194 return false;
5195 }
5196