1 /*
2  * Copyright 2011      INRIA Saclay
3  * Copyright 2012-2014 Ecole Normale Superieure
4  * Copyright 2015-2016 Sven Verdoolaege
5  * Copyright 2016      INRIA Paris
6  * Copyright 2017      Sven Verdoolaege
7  *
8  * Use of this software is governed by the MIT license
9  *
10  * Written by Sven Verdoolaege, INRIA Saclay - Ile-de-France,
11  * Parc Club Orsay Universite, ZAC des vignes, 4 rue Jacques Monod,
12  * 91893 Orsay, France
13  * and Ecole Normale Superieure, 45 rue d'Ulm, 75230 Paris, France
14  * and Centre de Recherche Inria de Paris, 2 rue Simone Iff - Voie DQ12,
15  * CS 42112, 75589 Paris Cedex 12, France
16  */
17 
18 #include <isl_ctx_private.h>
19 #include <isl_map_private.h>
20 #include <isl_space_private.h>
21 #include <isl_aff_private.h>
22 #include <isl/hash.h>
23 #include <isl/id.h>
24 #include <isl/constraint.h>
25 #include <isl/schedule.h>
26 #include <isl_schedule_constraints.h>
27 #include <isl/schedule_node.h>
28 #include <isl_mat_private.h>
29 #include <isl_vec_private.h>
30 #include <isl/set.h>
31 #include <isl_union_set_private.h>
32 #include <isl_seq.h>
33 #include <isl_tab.h>
34 #include <isl_dim_map.h>
35 #include <isl/map_to_basic_set.h>
36 #include <isl_sort.h>
37 #include <isl_options_private.h>
38 #include <isl_tarjan.h>
39 #include <isl_morph.h>
40 #include <isl/ilp.h>
41 #include <isl_val_private.h>
42 
43 /*
44  * The scheduling algorithm implemented in this file was inspired by
45  * Bondhugula et al., "Automatic Transformations for Communication-Minimized
46  * Parallelization and Locality Optimization in the Polyhedral Model".
47  *
48  * For a detailed description of the variant implemented in isl,
49  * see Verdoolaege and Janssens, "Scheduling for PPCG" (2017).
50  */
51 
52 
53 /* Internal information about a node that is used during the construction
54  * of a schedule.
55  * space represents the original space in which the domain lives;
56  *	that is, the space is not affected by compression
57  * sched is a matrix representation of the schedule being constructed
58  *	for this node; if compressed is set, then this schedule is
59  *	defined over the compressed domain space
60  * sched_map is an isl_map representation of the same (partial) schedule
61  *	sched_map may be NULL; if compressed is set, then this map
62  *	is defined over the uncompressed domain space
63  * rank is the number of linearly independent rows in the linear part
64  *	of sched
65  * the rows of "vmap" represent a change of basis for the node
66  *	variables; the first rank rows span the linear part of
67  *	the schedule rows; the remaining rows are linearly independent
68  * the rows of "indep" represent linear combinations of the schedule
69  * coefficients that are non-zero when the schedule coefficients are
70  * linearly independent of previously computed schedule rows.
71  * start is the first variable in the LP problem in the sequences that
72  *	represents the schedule coefficients of this node
73  * nvar is the dimension of the (compressed) domain
74  * nparam is the number of parameters or 0 if we are not constructing
75  *	a parametric schedule
76  *
77  * If compressed is set, then hull represents the constraints
78  * that were used to derive the compression, while compress and
79  * decompress map the original space to the compressed space and
80  * vice versa.
81  *
82  * scc is the index of SCC (or WCC) this node belongs to
83  *
84  * "cluster" is only used inside extract_clusters and identifies
85  * the cluster of SCCs that the node belongs to.
86  *
87  * coincident contains a boolean for each of the rows of the schedule,
88  * indicating whether the corresponding scheduling dimension satisfies
89  * the coincidence constraints in the sense that the corresponding
90  * dependence distances are zero.
91  *
92  * If the schedule_treat_coalescing option is set, then
93  * "sizes" contains the sizes of the (compressed) instance set
94  * in each direction.  If there is no fixed size in a given direction,
95  * then the corresponding size value is set to infinity.
96  * If the schedule_treat_coalescing option or the schedule_max_coefficient
97  * option is set, then "max" contains the maximal values for
98  * schedule coefficients of the (compressed) variables.  If no bound
99  * needs to be imposed on a particular variable, then the corresponding
100  * value is negative.
101  * If not NULL, then "bounds" contains a non-parametric set
102  * in the compressed space that is bounded by the size in each direction.
103  */
104 struct isl_sched_node {
105 	isl_space *space;
106 	int	compressed;
107 	isl_set	*hull;
108 	isl_multi_aff *compress;
109 	isl_pw_multi_aff *decompress;
110 	isl_mat *sched;
111 	isl_map *sched_map;
112 	int	 rank;
113 	isl_mat *indep;
114 	isl_mat *vmap;
115 	int	 start;
116 	int	 nvar;
117 	int	 nparam;
118 
119 	int	 scc;
120 	int	 cluster;
121 
122 	int	*coincident;
123 
124 	isl_multi_val *sizes;
125 	isl_basic_set *bounds;
126 	isl_vec *max;
127 };
128 
node_has_tuples(const void * entry,const void * val)129 static isl_bool node_has_tuples(const void *entry, const void *val)
130 {
131 	struct isl_sched_node *node = (struct isl_sched_node *)entry;
132 	isl_space *space = (isl_space *) val;
133 
134 	return isl_space_has_equal_tuples(node->space, space);
135 }
136 
node_scc_exactly(struct isl_sched_node * node,int scc)137 static int node_scc_exactly(struct isl_sched_node *node, int scc)
138 {
139 	return node->scc == scc;
140 }
141 
node_scc_at_most(struct isl_sched_node * node,int scc)142 static int node_scc_at_most(struct isl_sched_node *node, int scc)
143 {
144 	return node->scc <= scc;
145 }
146 
node_scc_at_least(struct isl_sched_node * node,int scc)147 static int node_scc_at_least(struct isl_sched_node *node, int scc)
148 {
149 	return node->scc >= scc;
150 }
151 
152 /* An edge in the dependence graph.  An edge may be used to
153  * ensure validity of the generated schedule, to minimize the dependence
154  * distance or both
155  *
156  * map is the dependence relation, with i -> j in the map if j depends on i
157  * tagged_condition and tagged_validity contain the union of all tagged
158  *	condition or conditional validity dependence relations that
159  *	specialize the dependence relation "map"; that is,
160  *	if (i -> a) -> (j -> b) is an element of "tagged_condition"
161  *	or "tagged_validity", then i -> j is an element of "map".
162  *	If these fields are NULL, then they represent the empty relation.
163  * src is the source node
164  * dst is the sink node
165  *
166  * types is a bit vector containing the types of this edge.
167  * validity is set if the edge is used to ensure correctness
168  * coincidence is used to enforce zero dependence distances
169  * proximity is set if the edge is used to minimize dependence distances
170  * condition is set if the edge represents a condition
171  *	for a conditional validity schedule constraint
172  * local can only be set for condition edges and indicates that
173  *	the dependence distance over the edge should be zero
174  * conditional_validity is set if the edge is used to conditionally
175  *	ensure correctness
176  *
177  * For validity edges, start and end mark the sequence of inequality
178  * constraints in the LP problem that encode the validity constraint
179  * corresponding to this edge.
180  *
181  * During clustering, an edge may be marked "no_merge" if it should
182  * not be used to merge clusters.
183  * The weight is also only used during clustering and it is
184  * an indication of how many schedule dimensions on either side
185  * of the schedule constraints can be aligned.
186  * If the weight is negative, then this means that this edge was postponed
187  * by has_bounded_distances or any_no_merge.  The original weight can
188  * be retrieved by adding 1 + graph->max_weight, with "graph"
189  * the graph containing this edge.
190  */
191 struct isl_sched_edge {
192 	isl_map *map;
193 	isl_union_map *tagged_condition;
194 	isl_union_map *tagged_validity;
195 
196 	struct isl_sched_node *src;
197 	struct isl_sched_node *dst;
198 
199 	unsigned types;
200 
201 	int start;
202 	int end;
203 
204 	int no_merge;
205 	int weight;
206 };
207 
208 /* Is "edge" marked as being of type "type"?
209  */
is_type(struct isl_sched_edge * edge,enum isl_edge_type type)210 static int is_type(struct isl_sched_edge *edge, enum isl_edge_type type)
211 {
212 	return ISL_FL_ISSET(edge->types, 1 << type);
213 }
214 
215 /* Mark "edge" as being of type "type".
216  */
set_type(struct isl_sched_edge * edge,enum isl_edge_type type)217 static void set_type(struct isl_sched_edge *edge, enum isl_edge_type type)
218 {
219 	ISL_FL_SET(edge->types, 1 << type);
220 }
221 
222 /* No longer mark "edge" as being of type "type"?
223  */
clear_type(struct isl_sched_edge * edge,enum isl_edge_type type)224 static void clear_type(struct isl_sched_edge *edge, enum isl_edge_type type)
225 {
226 	ISL_FL_CLR(edge->types, 1 << type);
227 }
228 
229 /* Is "edge" marked as a validity edge?
230  */
is_validity(struct isl_sched_edge * edge)231 static int is_validity(struct isl_sched_edge *edge)
232 {
233 	return is_type(edge, isl_edge_validity);
234 }
235 
236 /* Mark "edge" as a validity edge.
237  */
set_validity(struct isl_sched_edge * edge)238 static void set_validity(struct isl_sched_edge *edge)
239 {
240 	set_type(edge, isl_edge_validity);
241 }
242 
243 /* Is "edge" marked as a proximity edge?
244  */
is_proximity(struct isl_sched_edge * edge)245 static int is_proximity(struct isl_sched_edge *edge)
246 {
247 	return is_type(edge, isl_edge_proximity);
248 }
249 
250 /* Is "edge" marked as a local edge?
251  */
is_local(struct isl_sched_edge * edge)252 static int is_local(struct isl_sched_edge *edge)
253 {
254 	return is_type(edge, isl_edge_local);
255 }
256 
257 /* Mark "edge" as a local edge.
258  */
set_local(struct isl_sched_edge * edge)259 static void set_local(struct isl_sched_edge *edge)
260 {
261 	set_type(edge, isl_edge_local);
262 }
263 
264 /* No longer mark "edge" as a local edge.
265  */
clear_local(struct isl_sched_edge * edge)266 static void clear_local(struct isl_sched_edge *edge)
267 {
268 	clear_type(edge, isl_edge_local);
269 }
270 
271 /* Is "edge" marked as a coincidence edge?
272  */
is_coincidence(struct isl_sched_edge * edge)273 static int is_coincidence(struct isl_sched_edge *edge)
274 {
275 	return is_type(edge, isl_edge_coincidence);
276 }
277 
278 /* Is "edge" marked as a condition edge?
279  */
is_condition(struct isl_sched_edge * edge)280 static int is_condition(struct isl_sched_edge *edge)
281 {
282 	return is_type(edge, isl_edge_condition);
283 }
284 
285 /* Is "edge" marked as a conditional validity edge?
286  */
is_conditional_validity(struct isl_sched_edge * edge)287 static int is_conditional_validity(struct isl_sched_edge *edge)
288 {
289 	return is_type(edge, isl_edge_conditional_validity);
290 }
291 
292 /* Is "edge" of a type that can appear multiple times between
293  * the same pair of nodes?
294  *
295  * Condition edges and conditional validity edges may have tagged
296  * dependence relations, in which case an edge is added for each
297  * pair of tags.
298  */
is_multi_edge_type(struct isl_sched_edge * edge)299 static int is_multi_edge_type(struct isl_sched_edge *edge)
300 {
301 	return is_condition(edge) || is_conditional_validity(edge);
302 }
303 
304 /* Internal information about the dependence graph used during
305  * the construction of the schedule.
306  *
307  * intra_hmap is a cache, mapping dependence relations to their dual,
308  *	for dependences from a node to itself, possibly without
309  *	coefficients for the parameters
310  * intra_hmap_param is a cache, mapping dependence relations to their dual,
311  *	for dependences from a node to itself, including coefficients
312  *	for the parameters
313  * inter_hmap is a cache, mapping dependence relations to their dual,
314  *	for dependences between distinct nodes
315  * if compression is involved then the key for these maps
316  * is the original, uncompressed dependence relation, while
317  * the value is the dual of the compressed dependence relation.
318  *
319  * n is the number of nodes
320  * node is the list of nodes
321  * maxvar is the maximal number of variables over all nodes
322  * max_row is the allocated number of rows in the schedule
323  * n_row is the current (maximal) number of linearly independent
324  *	rows in the node schedules
325  * n_total_row is the current number of rows in the node schedules
326  * band_start is the starting row in the node schedules of the current band
327  * root is set to the original dependence graph from which this graph
328  *	is derived through splitting.  If this graph is not the result of
329  *	splitting, then the root field points to the graph itself.
330  *
331  * sorted contains a list of node indices sorted according to the
332  *	SCC to which a node belongs
333  *
334  * n_edge is the number of edges
335  * edge is the list of edges
336  * max_edge contains the maximal number of edges of each type;
337  *	in particular, it contains the number of edges in the inital graph.
338  * edge_table contains pointers into the edge array, hashed on the source
339  *	and sink spaces; there is one such table for each type;
340  *	a given edge may be referenced from more than one table
341  *	if the corresponding relation appears in more than one of the
342  *	sets of dependences; however, for each type there is only
343  *	a single edge between a given pair of source and sink space
344  *	in the entire graph
345  *
346  * node_table contains pointers into the node array, hashed on the space tuples
347  *
348  * region contains a list of variable sequences that should be non-trivial
349  *
350  * lp contains the (I)LP problem used to obtain new schedule rows
351  *
352  * src_scc and dst_scc are the source and sink SCCs of an edge with
353  *	conflicting constraints
354  *
355  * scc represents the number of components
356  * weak is set if the components are weakly connected
357  *
358  * max_weight is used during clustering and represents the maximal
359  * weight of the relevant proximity edges.
360  */
361 struct isl_sched_graph {
362 	isl_map_to_basic_set *intra_hmap;
363 	isl_map_to_basic_set *intra_hmap_param;
364 	isl_map_to_basic_set *inter_hmap;
365 
366 	struct isl_sched_node *node;
367 	int n;
368 	int maxvar;
369 	int max_row;
370 	int n_row;
371 
372 	int *sorted;
373 
374 	int n_total_row;
375 	int band_start;
376 
377 	struct isl_sched_graph *root;
378 
379 	struct isl_sched_edge *edge;
380 	int n_edge;
381 	int max_edge[isl_edge_last + 1];
382 	struct isl_hash_table *edge_table[isl_edge_last + 1];
383 
384 	struct isl_hash_table *node_table;
385 	struct isl_trivial_region *region;
386 
387 	isl_basic_set *lp;
388 
389 	int src_scc;
390 	int dst_scc;
391 
392 	int scc;
393 	int weak;
394 
395 	int max_weight;
396 };
397 
398 /* Initialize node_table based on the list of nodes.
399  */
graph_init_table(isl_ctx * ctx,struct isl_sched_graph * graph)400 static int graph_init_table(isl_ctx *ctx, struct isl_sched_graph *graph)
401 {
402 	int i;
403 
404 	graph->node_table = isl_hash_table_alloc(ctx, graph->n);
405 	if (!graph->node_table)
406 		return -1;
407 
408 	for (i = 0; i < graph->n; ++i) {
409 		struct isl_hash_table_entry *entry;
410 		uint32_t hash;
411 
412 		hash = isl_space_get_tuple_hash(graph->node[i].space);
413 		entry = isl_hash_table_find(ctx, graph->node_table, hash,
414 					    &node_has_tuples,
415 					    graph->node[i].space, 1);
416 		if (!entry)
417 			return -1;
418 		entry->data = &graph->node[i];
419 	}
420 
421 	return 0;
422 }
423 
424 /* Return a pointer to the node that lives within the given space,
425  * an invalid node if there is no such node, or NULL in case of error.
426  */
graph_find_node(isl_ctx * ctx,struct isl_sched_graph * graph,__isl_keep isl_space * space)427 static struct isl_sched_node *graph_find_node(isl_ctx *ctx,
428 	struct isl_sched_graph *graph, __isl_keep isl_space *space)
429 {
430 	struct isl_hash_table_entry *entry;
431 	uint32_t hash;
432 
433 	if (!space)
434 		return NULL;
435 
436 	hash = isl_space_get_tuple_hash(space);
437 	entry = isl_hash_table_find(ctx, graph->node_table, hash,
438 				    &node_has_tuples, space, 0);
439 	if (!entry)
440 		return NULL;
441 	if (entry == isl_hash_table_entry_none)
442 		return graph->node + graph->n;
443 
444 	return entry->data;
445 }
446 
447 /* Is "node" a node in "graph"?
448  */
is_node(struct isl_sched_graph * graph,struct isl_sched_node * node)449 static int is_node(struct isl_sched_graph *graph,
450 	struct isl_sched_node *node)
451 {
452 	return node && node >= &graph->node[0] && node < &graph->node[graph->n];
453 }
454 
edge_has_src_and_dst(const void * entry,const void * val)455 static isl_bool edge_has_src_and_dst(const void *entry, const void *val)
456 {
457 	const struct isl_sched_edge *edge = entry;
458 	const struct isl_sched_edge *temp = val;
459 
460 	return isl_bool_ok(edge->src == temp->src && edge->dst == temp->dst);
461 }
462 
463 /* Add the given edge to graph->edge_table[type].
464  */
graph_edge_table_add(isl_ctx * ctx,struct isl_sched_graph * graph,enum isl_edge_type type,struct isl_sched_edge * edge)465 static isl_stat graph_edge_table_add(isl_ctx *ctx,
466 	struct isl_sched_graph *graph, enum isl_edge_type type,
467 	struct isl_sched_edge *edge)
468 {
469 	struct isl_hash_table_entry *entry;
470 	uint32_t hash;
471 
472 	hash = isl_hash_init();
473 	hash = isl_hash_builtin(hash, edge->src);
474 	hash = isl_hash_builtin(hash, edge->dst);
475 	entry = isl_hash_table_find(ctx, graph->edge_table[type], hash,
476 				    &edge_has_src_and_dst, edge, 1);
477 	if (!entry)
478 		return isl_stat_error;
479 	entry->data = edge;
480 
481 	return isl_stat_ok;
482 }
483 
484 /* Add "edge" to all relevant edge tables.
485  * That is, for every type of the edge, add it to the corresponding table.
486  */
graph_edge_tables_add(isl_ctx * ctx,struct isl_sched_graph * graph,struct isl_sched_edge * edge)487 static isl_stat graph_edge_tables_add(isl_ctx *ctx,
488 	struct isl_sched_graph *graph, struct isl_sched_edge *edge)
489 {
490 	enum isl_edge_type t;
491 
492 	for (t = isl_edge_first; t <= isl_edge_last; ++t) {
493 		if (!is_type(edge, t))
494 			continue;
495 		if (graph_edge_table_add(ctx, graph, t, edge) < 0)
496 			return isl_stat_error;
497 	}
498 
499 	return isl_stat_ok;
500 }
501 
502 /* Allocate the edge_tables based on the maximal number of edges of
503  * each type.
504  */
graph_init_edge_tables(isl_ctx * ctx,struct isl_sched_graph * graph)505 static int graph_init_edge_tables(isl_ctx *ctx, struct isl_sched_graph *graph)
506 {
507 	int i;
508 
509 	for (i = 0; i <= isl_edge_last; ++i) {
510 		graph->edge_table[i] = isl_hash_table_alloc(ctx,
511 							    graph->max_edge[i]);
512 		if (!graph->edge_table[i])
513 			return -1;
514 	}
515 
516 	return 0;
517 }
518 
519 /* If graph->edge_table[type] contains an edge from the given source
520  * to the given destination, then return the hash table entry of this edge.
521  * Otherwise, return NULL.
522  */
graph_find_edge_entry(struct isl_sched_graph * graph,enum isl_edge_type type,struct isl_sched_node * src,struct isl_sched_node * dst)523 static struct isl_hash_table_entry *graph_find_edge_entry(
524 	struct isl_sched_graph *graph,
525 	enum isl_edge_type type,
526 	struct isl_sched_node *src, struct isl_sched_node *dst)
527 {
528 	isl_ctx *ctx = isl_space_get_ctx(src->space);
529 	uint32_t hash;
530 	struct isl_sched_edge temp = { .src = src, .dst = dst };
531 
532 	hash = isl_hash_init();
533 	hash = isl_hash_builtin(hash, temp.src);
534 	hash = isl_hash_builtin(hash, temp.dst);
535 	return isl_hash_table_find(ctx, graph->edge_table[type], hash,
536 				    &edge_has_src_and_dst, &temp, 0);
537 }
538 
539 
540 /* If graph->edge_table[type] contains an edge from the given source
541  * to the given destination, then return this edge.
542  * Return "none" if no such edge can be found.
543  * Return NULL on error.
544  */
graph_find_edge(struct isl_sched_graph * graph,enum isl_edge_type type,struct isl_sched_node * src,struct isl_sched_node * dst,struct isl_sched_edge * none)545 static struct isl_sched_edge *graph_find_edge(struct isl_sched_graph *graph,
546 	enum isl_edge_type type,
547 	struct isl_sched_node *src, struct isl_sched_node *dst,
548 	struct isl_sched_edge *none)
549 {
550 	struct isl_hash_table_entry *entry;
551 
552 	entry = graph_find_edge_entry(graph, type, src, dst);
553 	if (!entry)
554 		return NULL;
555 	if (entry == isl_hash_table_entry_none)
556 		return none;
557 
558 	return entry->data;
559 }
560 
561 /* Check whether the dependence graph has an edge of the given type
562  * between the given two nodes.
563  */
graph_has_edge(struct isl_sched_graph * graph,enum isl_edge_type type,struct isl_sched_node * src,struct isl_sched_node * dst)564 static isl_bool graph_has_edge(struct isl_sched_graph *graph,
565 	enum isl_edge_type type,
566 	struct isl_sched_node *src, struct isl_sched_node *dst)
567 {
568 	struct isl_sched_edge dummy;
569 	struct isl_sched_edge *edge;
570 	isl_bool empty;
571 
572 	edge = graph_find_edge(graph, type, src, dst, &dummy);
573 	if (!edge)
574 		return isl_bool_error;
575 	if (edge == &dummy)
576 		return isl_bool_false;
577 
578 	empty = isl_map_plain_is_empty(edge->map);
579 
580 	return isl_bool_not(empty);
581 }
582 
583 /* Look for any edge with the same src, dst and map fields as "model".
584  *
585  * Return the matching edge if one can be found.
586  * Return "model" if no matching edge is found.
587  * Return NULL on error.
588  */
graph_find_matching_edge(struct isl_sched_graph * graph,struct isl_sched_edge * model)589 static struct isl_sched_edge *graph_find_matching_edge(
590 	struct isl_sched_graph *graph, struct isl_sched_edge *model)
591 {
592 	enum isl_edge_type i;
593 	struct isl_sched_edge *edge;
594 
595 	for (i = isl_edge_first; i <= isl_edge_last; ++i) {
596 		int is_equal;
597 
598 		edge = graph_find_edge(graph, i, model->src, model->dst, model);
599 		if (!edge)
600 			return NULL;
601 		if (edge == model)
602 			continue;
603 		is_equal = isl_map_plain_is_equal(model->map, edge->map);
604 		if (is_equal < 0)
605 			return NULL;
606 		if (is_equal)
607 			return edge;
608 	}
609 
610 	return model;
611 }
612 
613 /* Remove the given edge from all the edge_tables that refer to it.
614  */
graph_remove_edge(struct isl_sched_graph * graph,struct isl_sched_edge * edge)615 static isl_stat graph_remove_edge(struct isl_sched_graph *graph,
616 	struct isl_sched_edge *edge)
617 {
618 	isl_ctx *ctx = isl_map_get_ctx(edge->map);
619 	enum isl_edge_type i;
620 
621 	for (i = isl_edge_first; i <= isl_edge_last; ++i) {
622 		struct isl_hash_table_entry *entry;
623 
624 		entry = graph_find_edge_entry(graph, i, edge->src, edge->dst);
625 		if (!entry)
626 			return isl_stat_error;
627 		if (entry == isl_hash_table_entry_none)
628 			continue;
629 		if (entry->data != edge)
630 			continue;
631 		isl_hash_table_remove(ctx, graph->edge_table[i], entry);
632 	}
633 
634 	return isl_stat_ok;
635 }
636 
637 /* Check whether the dependence graph has any edge
638  * between the given two nodes.
639  */
graph_has_any_edge(struct isl_sched_graph * graph,struct isl_sched_node * src,struct isl_sched_node * dst)640 static isl_bool graph_has_any_edge(struct isl_sched_graph *graph,
641 	struct isl_sched_node *src, struct isl_sched_node *dst)
642 {
643 	enum isl_edge_type i;
644 	isl_bool r;
645 
646 	for (i = isl_edge_first; i <= isl_edge_last; ++i) {
647 		r = graph_has_edge(graph, i, src, dst);
648 		if (r < 0 || r)
649 			return r;
650 	}
651 
652 	return r;
653 }
654 
655 /* Check whether the dependence graph has a validity edge
656  * between the given two nodes.
657  *
658  * Conditional validity edges are essentially validity edges that
659  * can be ignored if the corresponding condition edges are iteration private.
660  * Here, we are only checking for the presence of validity
661  * edges, so we need to consider the conditional validity edges too.
662  * In particular, this function is used during the detection
663  * of strongly connected components and we cannot ignore
664  * conditional validity edges during this detection.
665  */
graph_has_validity_edge(struct isl_sched_graph * graph,struct isl_sched_node * src,struct isl_sched_node * dst)666 static isl_bool graph_has_validity_edge(struct isl_sched_graph *graph,
667 	struct isl_sched_node *src, struct isl_sched_node *dst)
668 {
669 	isl_bool r;
670 
671 	r = graph_has_edge(graph, isl_edge_validity, src, dst);
672 	if (r < 0 || r)
673 		return r;
674 
675 	return graph_has_edge(graph, isl_edge_conditional_validity, src, dst);
676 }
677 
678 /* Perform all the required memory allocations for a schedule graph "graph"
679  * with "n_node" nodes and "n_edge" edge and initialize the corresponding
680  * fields.
681  */
graph_alloc(isl_ctx * ctx,struct isl_sched_graph * graph,int n_node,int n_edge)682 static isl_stat graph_alloc(isl_ctx *ctx, struct isl_sched_graph *graph,
683 	int n_node, int n_edge)
684 {
685 	int i;
686 
687 	graph->n = n_node;
688 	graph->n_edge = n_edge;
689 	graph->node = isl_calloc_array(ctx, struct isl_sched_node, graph->n);
690 	graph->sorted = isl_calloc_array(ctx, int, graph->n);
691 	graph->region = isl_alloc_array(ctx,
692 					struct isl_trivial_region, graph->n);
693 	graph->edge = isl_calloc_array(ctx,
694 					struct isl_sched_edge, graph->n_edge);
695 
696 	graph->intra_hmap = isl_map_to_basic_set_alloc(ctx, 2 * n_edge);
697 	graph->intra_hmap_param = isl_map_to_basic_set_alloc(ctx, 2 * n_edge);
698 	graph->inter_hmap = isl_map_to_basic_set_alloc(ctx, 2 * n_edge);
699 
700 	if (!graph->node || !graph->region || (graph->n_edge && !graph->edge) ||
701 	    !graph->sorted)
702 		return isl_stat_error;
703 
704 	for(i = 0; i < graph->n; ++i)
705 		graph->sorted[i] = i;
706 
707 	return isl_stat_ok;
708 }
709 
710 /* Free the memory associated to node "node" in "graph".
711  * The "coincident" field is shared by nodes in a graph and its subgraph.
712  * It therefore only needs to be freed for the original dependence graph,
713  * i.e., one that is not the result of splitting.
714  */
clear_node(struct isl_sched_graph * graph,struct isl_sched_node * node)715 static void clear_node(struct isl_sched_graph *graph,
716 	struct isl_sched_node *node)
717 {
718 	isl_space_free(node->space);
719 	isl_set_free(node->hull);
720 	isl_multi_aff_free(node->compress);
721 	isl_pw_multi_aff_free(node->decompress);
722 	isl_mat_free(node->sched);
723 	isl_map_free(node->sched_map);
724 	isl_mat_free(node->indep);
725 	isl_mat_free(node->vmap);
726 	if (graph->root == graph)
727 		free(node->coincident);
728 	isl_multi_val_free(node->sizes);
729 	isl_basic_set_free(node->bounds);
730 	isl_vec_free(node->max);
731 }
732 
graph_free(isl_ctx * ctx,struct isl_sched_graph * graph)733 static void graph_free(isl_ctx *ctx, struct isl_sched_graph *graph)
734 {
735 	int i;
736 
737 	isl_map_to_basic_set_free(graph->intra_hmap);
738 	isl_map_to_basic_set_free(graph->intra_hmap_param);
739 	isl_map_to_basic_set_free(graph->inter_hmap);
740 
741 	if (graph->node)
742 		for (i = 0; i < graph->n; ++i)
743 			clear_node(graph, &graph->node[i]);
744 	free(graph->node);
745 	free(graph->sorted);
746 	if (graph->edge)
747 		for (i = 0; i < graph->n_edge; ++i) {
748 			isl_map_free(graph->edge[i].map);
749 			isl_union_map_free(graph->edge[i].tagged_condition);
750 			isl_union_map_free(graph->edge[i].tagged_validity);
751 		}
752 	free(graph->edge);
753 	free(graph->region);
754 	for (i = 0; i <= isl_edge_last; ++i)
755 		isl_hash_table_free(ctx, graph->edge_table[i]);
756 	isl_hash_table_free(ctx, graph->node_table);
757 	isl_basic_set_free(graph->lp);
758 }
759 
760 /* For each "set" on which this function is called, increment
761  * graph->n by one and update graph->maxvar.
762  */
init_n_maxvar(__isl_take isl_set * set,void * user)763 static isl_stat init_n_maxvar(__isl_take isl_set *set, void *user)
764 {
765 	struct isl_sched_graph *graph = user;
766 	isl_size nvar = isl_set_dim(set, isl_dim_set);
767 
768 	graph->n++;
769 	if (nvar > graph->maxvar)
770 		graph->maxvar = nvar;
771 
772 	isl_set_free(set);
773 
774 	if (nvar < 0)
775 		return isl_stat_error;
776 	return isl_stat_ok;
777 }
778 
779 /* Compute the number of rows that should be allocated for the schedule.
780  * In particular, we need one row for each variable or one row
781  * for each basic map in the dependences.
782  * Note that it is practically impossible to exhaust both
783  * the number of dependences and the number of variables.
784  */
compute_max_row(struct isl_sched_graph * graph,__isl_keep isl_schedule_constraints * sc)785 static isl_stat compute_max_row(struct isl_sched_graph *graph,
786 	__isl_keep isl_schedule_constraints *sc)
787 {
788 	int n_edge;
789 	isl_stat r;
790 	isl_union_set *domain;
791 
792 	graph->n = 0;
793 	graph->maxvar = 0;
794 	domain = isl_schedule_constraints_get_domain(sc);
795 	r = isl_union_set_foreach_set(domain, &init_n_maxvar, graph);
796 	isl_union_set_free(domain);
797 	if (r < 0)
798 		return isl_stat_error;
799 	n_edge = isl_schedule_constraints_n_basic_map(sc);
800 	if (n_edge < 0)
801 		return isl_stat_error;
802 	graph->max_row = n_edge + graph->maxvar;
803 
804 	return isl_stat_ok;
805 }
806 
807 /* Does "bset" have any defining equalities for its set variables?
808  */
has_any_defining_equality(__isl_keep isl_basic_set * bset)809 static isl_bool has_any_defining_equality(__isl_keep isl_basic_set *bset)
810 {
811 	int i;
812 	isl_size n;
813 
814 	n = isl_basic_set_dim(bset, isl_dim_set);
815 	if (n < 0)
816 		return isl_bool_error;
817 
818 	for (i = 0; i < n; ++i) {
819 		isl_bool has;
820 
821 		has = isl_basic_set_has_defining_equality(bset, isl_dim_set, i,
822 							NULL);
823 		if (has < 0 || has)
824 			return has;
825 	}
826 
827 	return isl_bool_false;
828 }
829 
830 /* Set the entries of node->max to the value of the schedule_max_coefficient
831  * option, if set.
832  */
set_max_coefficient(isl_ctx * ctx,struct isl_sched_node * node)833 static isl_stat set_max_coefficient(isl_ctx *ctx, struct isl_sched_node *node)
834 {
835 	int max;
836 
837 	max = isl_options_get_schedule_max_coefficient(ctx);
838 	if (max == -1)
839 		return isl_stat_ok;
840 
841 	node->max = isl_vec_alloc(ctx, node->nvar);
842 	node->max = isl_vec_set_si(node->max, max);
843 	if (!node->max)
844 		return isl_stat_error;
845 
846 	return isl_stat_ok;
847 }
848 
849 /* Set the entries of node->max to the minimum of the schedule_max_coefficient
850  * option (if set) and half of the minimum of the sizes in the other
851  * dimensions.  Round up when computing the half such that
852  * if the minimum of the sizes is one, half of the size is taken to be one
853  * rather than zero.
854  * If the global minimum is unbounded (i.e., if both
855  * the schedule_max_coefficient is not set and the sizes in the other
856  * dimensions are unbounded), then store a negative value.
857  * If the schedule coefficient is close to the size of the instance set
858  * in another dimension, then the schedule may represent a loop
859  * coalescing transformation (especially if the coefficient
860  * in that other dimension is one).  Forcing the coefficient to be
861  * smaller than or equal to half the minimal size should avoid this
862  * situation.
863  */
compute_max_coefficient(isl_ctx * ctx,struct isl_sched_node * node)864 static isl_stat compute_max_coefficient(isl_ctx *ctx,
865 	struct isl_sched_node *node)
866 {
867 	int max;
868 	int i, j;
869 	isl_vec *v;
870 
871 	max = isl_options_get_schedule_max_coefficient(ctx);
872 	v = isl_vec_alloc(ctx, node->nvar);
873 	if (!v)
874 		return isl_stat_error;
875 
876 	for (i = 0; i < node->nvar; ++i) {
877 		isl_int_set_si(v->el[i], max);
878 		isl_int_mul_si(v->el[i], v->el[i], 2);
879 	}
880 
881 	for (i = 0; i < node->nvar; ++i) {
882 		isl_val *size;
883 
884 		size = isl_multi_val_get_val(node->sizes, i);
885 		if (!size)
886 			goto error;
887 		if (!isl_val_is_int(size)) {
888 			isl_val_free(size);
889 			continue;
890 		}
891 		for (j = 0; j < node->nvar; ++j) {
892 			if (j == i)
893 				continue;
894 			if (isl_int_is_neg(v->el[j]) ||
895 			    isl_int_gt(v->el[j], size->n))
896 				isl_int_set(v->el[j], size->n);
897 		}
898 		isl_val_free(size);
899 	}
900 
901 	for (i = 0; i < node->nvar; ++i)
902 		isl_int_cdiv_q_ui(v->el[i], v->el[i], 2);
903 
904 	node->max = v;
905 	return isl_stat_ok;
906 error:
907 	isl_vec_free(v);
908 	return isl_stat_error;
909 }
910 
911 /* Construct an identifier for node "node", which will represent "set".
912  * The name of the identifier is either "compressed" or
913  * "compressed_<name>", with <name> the name of the space of "set".
914  * The user pointer of the identifier points to "node".
915  */
construct_compressed_id(__isl_keep isl_set * set,struct isl_sched_node * node)916 static __isl_give isl_id *construct_compressed_id(__isl_keep isl_set *set,
917 	struct isl_sched_node *node)
918 {
919 	isl_bool has_name;
920 	isl_ctx *ctx;
921 	isl_id *id;
922 	isl_printer *p;
923 	const char *name;
924 	char *id_name;
925 
926 	has_name = isl_set_has_tuple_name(set);
927 	if (has_name < 0)
928 		return NULL;
929 
930 	ctx = isl_set_get_ctx(set);
931 	if (!has_name)
932 		return isl_id_alloc(ctx, "compressed", node);
933 
934 	p = isl_printer_to_str(ctx);
935 	name = isl_set_get_tuple_name(set);
936 	p = isl_printer_print_str(p, "compressed_");
937 	p = isl_printer_print_str(p, name);
938 	id_name = isl_printer_get_str(p);
939 	isl_printer_free(p);
940 
941 	id = isl_id_alloc(ctx, id_name, node);
942 	free(id_name);
943 
944 	return id;
945 }
946 
947 /* Construct a map that isolates the variable in position "pos" in "set".
948  *
949  * That is, construct
950  *
951  *	[i_0, ..., i_pos-1, i_pos+1, ...] -> [i_pos]
952  */
isolate(__isl_take isl_set * set,int pos)953 static __isl_give isl_map *isolate(__isl_take isl_set *set, int pos)
954 {
955 	isl_map *map;
956 
957 	map = isl_set_project_onto_map(set, isl_dim_set, pos, 1);
958 	map = isl_map_project_out(map, isl_dim_in, pos, 1);
959 	return map;
960 }
961 
962 /* Compute and return the size of "set" in dimension "dim".
963  * The size is taken to be the difference in values for that variable
964  * for fixed values of the other variables.
965  * This assumes that "set" is convex.
966  * In particular, the variable is first isolated from the other variables
967  * in the range of a map
968  *
969  *	[i_0, ..., i_dim-1, i_dim+1, ...] -> [i_dim]
970  *
971  * and then duplicated
972  *
973  *	[i_0, ..., i_dim-1, i_dim+1, ...] -> [[i_dim] -> [i_dim']]
974  *
975  * The shared variables are then projected out and the maximal value
976  * of i_dim' - i_dim is computed.
977  */
compute_size(__isl_take isl_set * set,int dim)978 static __isl_give isl_val *compute_size(__isl_take isl_set *set, int dim)
979 {
980 	isl_map *map;
981 	isl_local_space *ls;
982 	isl_aff *obj;
983 	isl_val *v;
984 
985 	map = isolate(set, dim);
986 	map = isl_map_range_product(map, isl_map_copy(map));
987 	map = isl_set_unwrap(isl_map_range(map));
988 	set = isl_map_deltas(map);
989 	ls = isl_local_space_from_space(isl_set_get_space(set));
990 	obj = isl_aff_var_on_domain(ls, isl_dim_set, 0);
991 	v = isl_set_max_val(set, obj);
992 	isl_aff_free(obj);
993 	isl_set_free(set);
994 
995 	return v;
996 }
997 
998 /* Perform a compression on "node" where "hull" represents the constraints
999  * that were used to derive the compression, while "compress" and
1000  * "decompress" map the original space to the compressed space and
1001  * vice versa.
1002  *
1003  * If "node" was not compressed already, then simply store
1004  * the compression information.
1005  * Otherwise the "original" space is actually the result
1006  * of a previous compression, which is then combined
1007  * with the present compression.
1008  *
1009  * The dimensionality of the compressed domain is also adjusted.
1010  * Other information, such as the sizes and the maximal coefficient values,
1011  * has not been computed yet and therefore does not need to be adjusted.
1012  */
compress_node(struct isl_sched_node * node,__isl_take isl_set * hull,__isl_take isl_multi_aff * compress,__isl_take isl_pw_multi_aff * decompress)1013 static isl_stat compress_node(struct isl_sched_node *node,
1014 	__isl_take isl_set *hull, __isl_take isl_multi_aff *compress,
1015 	__isl_take isl_pw_multi_aff *decompress)
1016 {
1017 	node->nvar = isl_multi_aff_dim(compress, isl_dim_out);
1018 	if (!node->compressed) {
1019 		node->compressed = 1;
1020 		node->hull = hull;
1021 		node->compress = compress;
1022 		node->decompress = decompress;
1023 	} else {
1024 		hull = isl_set_preimage_multi_aff(hull,
1025 					    isl_multi_aff_copy(node->compress));
1026 		node->hull = isl_set_intersect(node->hull, hull);
1027 		node->compress = isl_multi_aff_pullback_multi_aff(
1028 						compress, node->compress);
1029 		node->decompress = isl_pw_multi_aff_pullback_pw_multi_aff(
1030 						node->decompress, decompress);
1031 	}
1032 
1033 	if (!node->hull || !node->compress || !node->decompress)
1034 		return isl_stat_error;
1035 
1036 	return isl_stat_ok;
1037 }
1038 
1039 /* Given that dimension "pos" in "set" has a fixed value
1040  * in terms of the other dimensions, (further) compress "node"
1041  * by projecting out this dimension.
1042  * "set" may be the result of a previous compression.
1043  * "uncompressed" is the original domain (without compression).
1044  *
1045  * The compression function simply projects out the dimension.
1046  * The decompression function adds back the dimension
1047  * in the right position as an expression of the other dimensions
1048  * derived from "set".
1049  * As in extract_node, the compressed space has an identifier
1050  * that references "node" such that each compressed space is unique and
1051  * such that the node can be recovered from the compressed space.
1052  *
1053  * The constraint removed through the compression is added to the "hull"
1054  * such that only edges that relate to the original domains
1055  * are taken into account.
1056  * In particular, it is obtained by composing compression and decompression and
1057  * taking the relation among the variables in the range.
1058  */
project_out_fixed(struct isl_sched_node * node,__isl_keep isl_set * uncompressed,__isl_take isl_set * set,int pos)1059 static isl_stat project_out_fixed(struct isl_sched_node *node,
1060 	__isl_keep isl_set *uncompressed, __isl_take isl_set *set, int pos)
1061 {
1062 	isl_id *id;
1063 	isl_space *space;
1064 	isl_set *domain;
1065 	isl_map *map;
1066 	isl_multi_aff *compress;
1067 	isl_pw_multi_aff *decompress, *pma;
1068 	isl_multi_pw_aff *mpa;
1069 	isl_set *hull;
1070 
1071 	map = isolate(isl_set_copy(set), pos);
1072 	pma = isl_pw_multi_aff_from_map(map);
1073 	domain = isl_pw_multi_aff_domain(isl_pw_multi_aff_copy(pma));
1074 	pma = isl_pw_multi_aff_gist(pma, domain);
1075 	space = isl_pw_multi_aff_get_domain_space(pma);
1076 	mpa = isl_multi_pw_aff_identity(isl_space_map_from_set(space));
1077 	mpa = isl_multi_pw_aff_range_splice(mpa, pos,
1078 				    isl_multi_pw_aff_from_pw_multi_aff(pma));
1079 	decompress = isl_pw_multi_aff_from_multi_pw_aff(mpa);
1080 	space = isl_set_get_space(set);
1081 	compress = isl_multi_aff_project_out_map(space, isl_dim_set, pos, 1);
1082 	id = construct_compressed_id(uncompressed, node);
1083 	compress = isl_multi_aff_set_tuple_id(compress, isl_dim_out, id);
1084 	space = isl_space_reverse(isl_multi_aff_get_space(compress));
1085 	decompress = isl_pw_multi_aff_reset_space(decompress, space);
1086 	pma = isl_pw_multi_aff_pullback_multi_aff(
1087 	    isl_pw_multi_aff_copy(decompress), isl_multi_aff_copy(compress));
1088 	hull = isl_map_range(isl_map_from_pw_multi_aff(pma));
1089 
1090 	isl_set_free(set);
1091 
1092 	return compress_node(node, hull, compress, decompress);
1093 }
1094 
1095 /* Compute the size of the compressed domain in each dimension and
1096  * store the results in node->sizes.
1097  * "uncompressed" is the original domain (without compression).
1098  *
1099  * First compress the domain if needed and then compute the size
1100  * in each direction.
1101  * If the domain is not convex, then the sizes are computed
1102  * on a convex superset in order to avoid picking up sizes
1103  * that are valid for the individual disjuncts, but not for
1104  * the domain as a whole.
1105  *
1106  * If any of the sizes turns out to be zero, then this means
1107  * that this dimension has a fixed value in terms of
1108  * the other dimensions.  Perform an (extra) compression
1109  * to remove this dimensions.
1110  */
compute_sizes(struct isl_sched_node * node,__isl_keep isl_set * uncompressed)1111 static isl_stat compute_sizes(struct isl_sched_node *node,
1112 	__isl_keep isl_set *uncompressed)
1113 {
1114 	int j;
1115 	isl_size n;
1116 	isl_multi_val *mv;
1117 	isl_set *set = isl_set_copy(uncompressed);
1118 
1119 	if (node->compressed)
1120 		set = isl_set_preimage_pw_multi_aff(set,
1121 				    isl_pw_multi_aff_copy(node->decompress));
1122 	set = isl_set_from_basic_set(isl_set_simple_hull(set));
1123 	mv = isl_multi_val_zero(isl_set_get_space(set));
1124 	n = isl_set_dim(set, isl_dim_set);
1125 	if (n < 0)
1126 		mv = isl_multi_val_free(mv);
1127 	for (j = 0; j < n; ++j) {
1128 		isl_bool is_zero;
1129 		isl_val *v;
1130 
1131 		v = compute_size(isl_set_copy(set), j);
1132 		is_zero = isl_val_is_zero(v);
1133 		mv = isl_multi_val_set_val(mv, j, v);
1134 		if (is_zero >= 0 && is_zero) {
1135 			isl_multi_val_free(mv);
1136 			if (project_out_fixed(node, uncompressed, set, j) < 0)
1137 				return isl_stat_error;
1138 			return compute_sizes(node, uncompressed);
1139 		}
1140 	}
1141 	node->sizes = mv;
1142 	isl_set_free(set);
1143 	if (!node->sizes)
1144 		return isl_stat_error;
1145 	return isl_stat_ok;
1146 }
1147 
1148 /* Compute the size of the instance set "set" of "node", after compression,
1149  * as well as bounds on the corresponding coefficients, if needed.
1150  *
1151  * The sizes are needed when the schedule_treat_coalescing option is set.
1152  * The bounds are needed when the schedule_treat_coalescing option or
1153  * the schedule_max_coefficient option is set.
1154  *
1155  * If the schedule_treat_coalescing option is not set, then at most
1156  * the bounds need to be set and this is done in set_max_coefficient.
1157  * Otherwise, compute the size of the compressed domain
1158  * in each direction and store the results in node->size.
1159  * Finally, set the bounds on the coefficients based on the sizes
1160  * and the schedule_max_coefficient option in compute_max_coefficient.
1161  */
compute_sizes_and_max(isl_ctx * ctx,struct isl_sched_node * node,__isl_take isl_set * set)1162 static isl_stat compute_sizes_and_max(isl_ctx *ctx, struct isl_sched_node *node,
1163 	__isl_take isl_set *set)
1164 {
1165 	isl_stat r;
1166 
1167 	if (!isl_options_get_schedule_treat_coalescing(ctx)) {
1168 		isl_set_free(set);
1169 		return set_max_coefficient(ctx, node);
1170 	}
1171 
1172 	r = compute_sizes(node, set);
1173 	isl_set_free(set);
1174 	if (r < 0)
1175 		return isl_stat_error;
1176 	return compute_max_coefficient(ctx, node);
1177 }
1178 
1179 /* Add a new node to the graph representing the given instance set.
1180  * "nvar" is the (possibly compressed) number of variables and
1181  * may be smaller than then number of set variables in "set"
1182  * if "compressed" is set.
1183  * If "compressed" is set, then "hull" represents the constraints
1184  * that were used to derive the compression, while "compress" and
1185  * "decompress" map the original space to the compressed space and
1186  * vice versa.
1187  * If "compressed" is not set, then "hull", "compress" and "decompress"
1188  * should be NULL.
1189  *
1190  * Compute the size of the instance set and bounds on the coefficients,
1191  * if needed.
1192  */
add_node(struct isl_sched_graph * graph,__isl_take isl_set * set,int nvar,int compressed,__isl_take isl_set * hull,__isl_take isl_multi_aff * compress,__isl_take isl_pw_multi_aff * decompress)1193 static isl_stat add_node(struct isl_sched_graph *graph,
1194 	__isl_take isl_set *set, int nvar, int compressed,
1195 	__isl_take isl_set *hull, __isl_take isl_multi_aff *compress,
1196 	__isl_take isl_pw_multi_aff *decompress)
1197 {
1198 	isl_size nparam;
1199 	isl_ctx *ctx;
1200 	isl_mat *sched;
1201 	isl_space *space;
1202 	int *coincident;
1203 	struct isl_sched_node *node;
1204 
1205 	nparam = isl_set_dim(set, isl_dim_param);
1206 	if (nparam < 0)
1207 		goto error;
1208 
1209 	ctx = isl_set_get_ctx(set);
1210 	if (!ctx->opt->schedule_parametric)
1211 		nparam = 0;
1212 	sched = isl_mat_alloc(ctx, 0, 1 + nparam + nvar);
1213 	node = &graph->node[graph->n];
1214 	graph->n++;
1215 	space = isl_set_get_space(set);
1216 	node->space = space;
1217 	node->nvar = nvar;
1218 	node->nparam = nparam;
1219 	node->sched = sched;
1220 	node->sched_map = NULL;
1221 	coincident = isl_calloc_array(ctx, int, graph->max_row);
1222 	node->coincident = coincident;
1223 	node->compressed = compressed;
1224 	node->hull = hull;
1225 	node->compress = compress;
1226 	node->decompress = decompress;
1227 	if (compute_sizes_and_max(ctx, node, set) < 0)
1228 		return isl_stat_error;
1229 
1230 	if (!space || !sched || (graph->max_row && !coincident))
1231 		return isl_stat_error;
1232 	if (compressed && (!hull || !compress || !decompress))
1233 		return isl_stat_error;
1234 
1235 	return isl_stat_ok;
1236 error:
1237 	isl_set_free(set);
1238 	isl_set_free(hull);
1239 	isl_multi_aff_free(compress);
1240 	isl_pw_multi_aff_free(decompress);
1241 	return isl_stat_error;
1242 }
1243 
1244 /* Add a new node to the graph representing the given set.
1245  *
1246  * If any of the set variables is defined by an equality, then
1247  * we perform variable compression such that we can perform
1248  * the scheduling on the compressed domain.
1249  * In this case, an identifier is used that references the new node
1250  * such that each compressed space is unique and
1251  * such that the node can be recovered from the compressed space.
1252  */
extract_node(__isl_take isl_set * set,void * user)1253 static isl_stat extract_node(__isl_take isl_set *set, void *user)
1254 {
1255 	isl_size nvar;
1256 	isl_bool has_equality;
1257 	isl_id *id;
1258 	isl_basic_set *hull;
1259 	isl_set *hull_set;
1260 	isl_morph *morph;
1261 	isl_multi_aff *compress, *decompress_ma;
1262 	isl_pw_multi_aff *decompress;
1263 	struct isl_sched_graph *graph = user;
1264 
1265 	hull = isl_set_affine_hull(isl_set_copy(set));
1266 	hull = isl_basic_set_remove_divs(hull);
1267 	nvar = isl_set_dim(set, isl_dim_set);
1268 	has_equality = has_any_defining_equality(hull);
1269 
1270 	if (nvar < 0 || has_equality < 0)
1271 		goto error;
1272 	if (!has_equality) {
1273 		isl_basic_set_free(hull);
1274 		return add_node(graph, set, nvar, 0, NULL, NULL, NULL);
1275 	}
1276 
1277 	id = construct_compressed_id(set, &graph->node[graph->n]);
1278 	morph = isl_basic_set_variable_compression_with_id(hull, id);
1279 	isl_id_free(id);
1280 	nvar = isl_morph_ran_dim(morph, isl_dim_set);
1281 	if (nvar < 0)
1282 		set = isl_set_free(set);
1283 	compress = isl_morph_get_var_multi_aff(morph);
1284 	morph = isl_morph_inverse(morph);
1285 	decompress_ma = isl_morph_get_var_multi_aff(morph);
1286 	decompress = isl_pw_multi_aff_from_multi_aff(decompress_ma);
1287 	isl_morph_free(morph);
1288 
1289 	hull_set = isl_set_from_basic_set(hull);
1290 	return add_node(graph, set, nvar, 1, hull_set, compress, decompress);
1291 error:
1292 	isl_basic_set_free(hull);
1293 	isl_set_free(set);
1294 	return isl_stat_error;
1295 }
1296 
1297 struct isl_extract_edge_data {
1298 	enum isl_edge_type type;
1299 	struct isl_sched_graph *graph;
1300 };
1301 
1302 /* Merge edge2 into edge1, freeing the contents of edge2.
1303  * Return 0 on success and -1 on failure.
1304  *
1305  * edge1 and edge2 are assumed to have the same value for the map field.
1306  */
merge_edge(struct isl_sched_edge * edge1,struct isl_sched_edge * edge2)1307 static int merge_edge(struct isl_sched_edge *edge1,
1308 	struct isl_sched_edge *edge2)
1309 {
1310 	edge1->types |= edge2->types;
1311 	isl_map_free(edge2->map);
1312 
1313 	if (is_condition(edge2)) {
1314 		if (!edge1->tagged_condition)
1315 			edge1->tagged_condition = edge2->tagged_condition;
1316 		else
1317 			edge1->tagged_condition =
1318 				isl_union_map_union(edge1->tagged_condition,
1319 						    edge2->tagged_condition);
1320 	}
1321 
1322 	if (is_conditional_validity(edge2)) {
1323 		if (!edge1->tagged_validity)
1324 			edge1->tagged_validity = edge2->tagged_validity;
1325 		else
1326 			edge1->tagged_validity =
1327 				isl_union_map_union(edge1->tagged_validity,
1328 						    edge2->tagged_validity);
1329 	}
1330 
1331 	if (is_condition(edge2) && !edge1->tagged_condition)
1332 		return -1;
1333 	if (is_conditional_validity(edge2) && !edge1->tagged_validity)
1334 		return -1;
1335 
1336 	return 0;
1337 }
1338 
1339 /* Insert dummy tags in domain and range of "map".
1340  *
1341  * In particular, if "map" is of the form
1342  *
1343  *	A -> B
1344  *
1345  * then return
1346  *
1347  *	[A -> dummy_tag] -> [B -> dummy_tag]
1348  *
1349  * where the dummy_tags are identical and equal to any dummy tags
1350  * introduced by any other call to this function.
1351  */
insert_dummy_tags(__isl_take isl_map * map)1352 static __isl_give isl_map *insert_dummy_tags(__isl_take isl_map *map)
1353 {
1354 	static char dummy;
1355 	isl_ctx *ctx;
1356 	isl_id *id;
1357 	isl_space *space;
1358 	isl_set *domain, *range;
1359 
1360 	ctx = isl_map_get_ctx(map);
1361 
1362 	id = isl_id_alloc(ctx, NULL, &dummy);
1363 	space = isl_space_params(isl_map_get_space(map));
1364 	space = isl_space_set_from_params(space);
1365 	space = isl_space_set_tuple_id(space, isl_dim_set, id);
1366 	space = isl_space_map_from_set(space);
1367 
1368 	domain = isl_map_wrap(map);
1369 	range = isl_map_wrap(isl_map_universe(space));
1370 	map = isl_map_from_domain_and_range(domain, range);
1371 	map = isl_map_zip(map);
1372 
1373 	return map;
1374 }
1375 
1376 /* Given that at least one of "src" or "dst" is compressed, return
1377  * a map between the spaces of these nodes restricted to the affine
1378  * hull that was used in the compression.
1379  */
extract_hull(struct isl_sched_node * src,struct isl_sched_node * dst)1380 static __isl_give isl_map *extract_hull(struct isl_sched_node *src,
1381 	struct isl_sched_node *dst)
1382 {
1383 	isl_set *dom, *ran;
1384 
1385 	if (src->compressed)
1386 		dom = isl_set_copy(src->hull);
1387 	else
1388 		dom = isl_set_universe(isl_space_copy(src->space));
1389 	if (dst->compressed)
1390 		ran = isl_set_copy(dst->hull);
1391 	else
1392 		ran = isl_set_universe(isl_space_copy(dst->space));
1393 
1394 	return isl_map_from_domain_and_range(dom, ran);
1395 }
1396 
1397 /* Intersect the domains of the nested relations in domain and range
1398  * of "tagged" with "map".
1399  */
map_intersect_domains(__isl_take isl_map * tagged,__isl_keep isl_map * map)1400 static __isl_give isl_map *map_intersect_domains(__isl_take isl_map *tagged,
1401 	__isl_keep isl_map *map)
1402 {
1403 	isl_set *set;
1404 
1405 	tagged = isl_map_zip(tagged);
1406 	set = isl_map_wrap(isl_map_copy(map));
1407 	tagged = isl_map_intersect_domain(tagged, set);
1408 	tagged = isl_map_zip(tagged);
1409 	return tagged;
1410 }
1411 
1412 /* Return a pointer to the node that lives in the domain space of "map",
1413  * an invalid node if there is no such node, or NULL in case of error.
1414  */
find_domain_node(isl_ctx * ctx,struct isl_sched_graph * graph,__isl_keep isl_map * map)1415 static struct isl_sched_node *find_domain_node(isl_ctx *ctx,
1416 	struct isl_sched_graph *graph, __isl_keep isl_map *map)
1417 {
1418 	struct isl_sched_node *node;
1419 	isl_space *space;
1420 
1421 	space = isl_space_domain(isl_map_get_space(map));
1422 	node = graph_find_node(ctx, graph, space);
1423 	isl_space_free(space);
1424 
1425 	return node;
1426 }
1427 
1428 /* Return a pointer to the node that lives in the range space of "map",
1429  * an invalid node if there is no such node, or NULL in case of error.
1430  */
find_range_node(isl_ctx * ctx,struct isl_sched_graph * graph,__isl_keep isl_map * map)1431 static struct isl_sched_node *find_range_node(isl_ctx *ctx,
1432 	struct isl_sched_graph *graph, __isl_keep isl_map *map)
1433 {
1434 	struct isl_sched_node *node;
1435 	isl_space *space;
1436 
1437 	space = isl_space_range(isl_map_get_space(map));
1438 	node = graph_find_node(ctx, graph, space);
1439 	isl_space_free(space);
1440 
1441 	return node;
1442 }
1443 
1444 /* Refrain from adding a new edge based on "map".
1445  * Instead, just free the map.
1446  * "tagged" is either a copy of "map" with additional tags or NULL.
1447  */
skip_edge(__isl_take isl_map * map,__isl_take isl_map * tagged)1448 static isl_stat skip_edge(__isl_take isl_map *map, __isl_take isl_map *tagged)
1449 {
1450 	isl_map_free(map);
1451 	isl_map_free(tagged);
1452 
1453 	return isl_stat_ok;
1454 }
1455 
1456 /* Add a new edge to the graph based on the given map
1457  * and add it to data->graph->edge_table[data->type].
1458  * If a dependence relation of a given type happens to be identical
1459  * to one of the dependence relations of a type that was added before,
1460  * then we don't create a new edge, but instead mark the original edge
1461  * as also representing a dependence of the current type.
1462  *
1463  * Edges of type isl_edge_condition or isl_edge_conditional_validity
1464  * may be specified as "tagged" dependence relations.  That is, "map"
1465  * may contain elements (i -> a) -> (j -> b), where i -> j denotes
1466  * the dependence on iterations and a and b are tags.
1467  * edge->map is set to the relation containing the elements i -> j,
1468  * while edge->tagged_condition and edge->tagged_validity contain
1469  * the union of all the "map" relations
1470  * for which extract_edge is called that result in the same edge->map.
1471  *
1472  * If the source or the destination node is compressed, then
1473  * intersect both "map" and "tagged" with the constraints that
1474  * were used to construct the compression.
1475  * This ensures that there are no schedule constraints defined
1476  * outside of these domains, while the scheduler no longer has
1477  * any control over those outside parts.
1478  */
extract_edge(__isl_take isl_map * map,void * user)1479 static isl_stat extract_edge(__isl_take isl_map *map, void *user)
1480 {
1481 	isl_bool empty;
1482 	isl_ctx *ctx = isl_map_get_ctx(map);
1483 	struct isl_extract_edge_data *data = user;
1484 	struct isl_sched_graph *graph = data->graph;
1485 	struct isl_sched_node *src, *dst;
1486 	struct isl_sched_edge *edge;
1487 	isl_map *tagged = NULL;
1488 
1489 	if (data->type == isl_edge_condition ||
1490 	    data->type == isl_edge_conditional_validity) {
1491 		if (isl_map_can_zip(map)) {
1492 			tagged = isl_map_copy(map);
1493 			map = isl_set_unwrap(isl_map_domain(isl_map_zip(map)));
1494 		} else {
1495 			tagged = insert_dummy_tags(isl_map_copy(map));
1496 		}
1497 	}
1498 
1499 	src = find_domain_node(ctx, graph, map);
1500 	dst = find_range_node(ctx, graph, map);
1501 
1502 	if (!src || !dst)
1503 		goto error;
1504 	if (!is_node(graph, src) || !is_node(graph, dst))
1505 		return skip_edge(map, tagged);
1506 
1507 	if (src->compressed || dst->compressed) {
1508 		isl_map *hull;
1509 		hull = extract_hull(src, dst);
1510 		if (tagged)
1511 			tagged = map_intersect_domains(tagged, hull);
1512 		map = isl_map_intersect(map, hull);
1513 	}
1514 
1515 	empty = isl_map_plain_is_empty(map);
1516 	if (empty < 0)
1517 		goto error;
1518 	if (empty)
1519 		return skip_edge(map, tagged);
1520 
1521 	graph->edge[graph->n_edge].src = src;
1522 	graph->edge[graph->n_edge].dst = dst;
1523 	graph->edge[graph->n_edge].map = map;
1524 	graph->edge[graph->n_edge].types = 0;
1525 	graph->edge[graph->n_edge].tagged_condition = NULL;
1526 	graph->edge[graph->n_edge].tagged_validity = NULL;
1527 	set_type(&graph->edge[graph->n_edge], data->type);
1528 	if (data->type == isl_edge_condition)
1529 		graph->edge[graph->n_edge].tagged_condition =
1530 					isl_union_map_from_map(tagged);
1531 	if (data->type == isl_edge_conditional_validity)
1532 		graph->edge[graph->n_edge].tagged_validity =
1533 					isl_union_map_from_map(tagged);
1534 
1535 	edge = graph_find_matching_edge(graph, &graph->edge[graph->n_edge]);
1536 	if (!edge) {
1537 		graph->n_edge++;
1538 		return isl_stat_error;
1539 	}
1540 	if (edge == &graph->edge[graph->n_edge])
1541 		return graph_edge_table_add(ctx, graph, data->type,
1542 				    &graph->edge[graph->n_edge++]);
1543 
1544 	if (merge_edge(edge, &graph->edge[graph->n_edge]) < 0)
1545 		return isl_stat_error;
1546 
1547 	return graph_edge_table_add(ctx, graph, data->type, edge);
1548 error:
1549 	isl_map_free(map);
1550 	isl_map_free(tagged);
1551 	return isl_stat_error;
1552 }
1553 
1554 /* Initialize the schedule graph "graph" from the schedule constraints "sc".
1555  *
1556  * The context is included in the domain before the nodes of
1557  * the graphs are extracted in order to be able to exploit
1558  * any possible additional equalities.
1559  * Note that this intersection is only performed locally here.
1560  */
graph_init(struct isl_sched_graph * graph,__isl_keep isl_schedule_constraints * sc)1561 static isl_stat graph_init(struct isl_sched_graph *graph,
1562 	__isl_keep isl_schedule_constraints *sc)
1563 {
1564 	isl_ctx *ctx;
1565 	isl_union_set *domain;
1566 	isl_union_map *c;
1567 	struct isl_extract_edge_data data;
1568 	enum isl_edge_type i;
1569 	isl_stat r;
1570 	isl_size n;
1571 
1572 	if (!sc)
1573 		return isl_stat_error;
1574 
1575 	ctx = isl_schedule_constraints_get_ctx(sc);
1576 
1577 	domain = isl_schedule_constraints_get_domain(sc);
1578 	n = isl_union_set_n_set(domain);
1579 	graph->n = n;
1580 	isl_union_set_free(domain);
1581 	if (n < 0)
1582 		return isl_stat_error;
1583 
1584 	n = isl_schedule_constraints_n_map(sc);
1585 	if (n < 0 || graph_alloc(ctx, graph, graph->n, n) < 0)
1586 		return isl_stat_error;
1587 
1588 	if (compute_max_row(graph, sc) < 0)
1589 		return isl_stat_error;
1590 	graph->root = graph;
1591 	graph->n = 0;
1592 	domain = isl_schedule_constraints_get_domain(sc);
1593 	domain = isl_union_set_intersect_params(domain,
1594 				    isl_schedule_constraints_get_context(sc));
1595 	r = isl_union_set_foreach_set(domain, &extract_node, graph);
1596 	isl_union_set_free(domain);
1597 	if (r < 0)
1598 		return isl_stat_error;
1599 	if (graph_init_table(ctx, graph) < 0)
1600 		return isl_stat_error;
1601 	for (i = isl_edge_first; i <= isl_edge_last; ++i) {
1602 		isl_size n;
1603 
1604 		c = isl_schedule_constraints_get(sc, i);
1605 		n = isl_union_map_n_map(c);
1606 		graph->max_edge[i] = n;
1607 		isl_union_map_free(c);
1608 		if (n < 0)
1609 			return isl_stat_error;
1610 	}
1611 	if (graph_init_edge_tables(ctx, graph) < 0)
1612 		return isl_stat_error;
1613 	graph->n_edge = 0;
1614 	data.graph = graph;
1615 	for (i = isl_edge_first; i <= isl_edge_last; ++i) {
1616 		isl_stat r;
1617 
1618 		data.type = i;
1619 		c = isl_schedule_constraints_get(sc, i);
1620 		r = isl_union_map_foreach_map(c, &extract_edge, &data);
1621 		isl_union_map_free(c);
1622 		if (r < 0)
1623 			return isl_stat_error;
1624 	}
1625 
1626 	return isl_stat_ok;
1627 }
1628 
1629 /* Check whether there is any dependence from node[j] to node[i]
1630  * or from node[i] to node[j].
1631  */
node_follows_weak(int i,int j,void * user)1632 static isl_bool node_follows_weak(int i, int j, void *user)
1633 {
1634 	isl_bool f;
1635 	struct isl_sched_graph *graph = user;
1636 
1637 	f = graph_has_any_edge(graph, &graph->node[j], &graph->node[i]);
1638 	if (f < 0 || f)
1639 		return f;
1640 	return graph_has_any_edge(graph, &graph->node[i], &graph->node[j]);
1641 }
1642 
1643 /* Check whether there is a (conditional) validity dependence from node[j]
1644  * to node[i], forcing node[i] to follow node[j].
1645  */
node_follows_strong(int i,int j,void * user)1646 static isl_bool node_follows_strong(int i, int j, void *user)
1647 {
1648 	struct isl_sched_graph *graph = user;
1649 
1650 	return graph_has_validity_edge(graph, &graph->node[j], &graph->node[i]);
1651 }
1652 
1653 /* Use Tarjan's algorithm for computing the strongly connected components
1654  * in the dependence graph only considering those edges defined by "follows".
1655  */
detect_ccs(isl_ctx * ctx,struct isl_sched_graph * graph,isl_bool (* follows)(int i,int j,void * user))1656 static isl_stat detect_ccs(isl_ctx *ctx, struct isl_sched_graph *graph,
1657 	isl_bool (*follows)(int i, int j, void *user))
1658 {
1659 	int i, n;
1660 	struct isl_tarjan_graph *g = NULL;
1661 
1662 	g = isl_tarjan_graph_init(ctx, graph->n, follows, graph);
1663 	if (!g)
1664 		return isl_stat_error;
1665 
1666 	graph->scc = 0;
1667 	i = 0;
1668 	n = graph->n;
1669 	while (n) {
1670 		while (g->order[i] != -1) {
1671 			graph->node[g->order[i]].scc = graph->scc;
1672 			--n;
1673 			++i;
1674 		}
1675 		++i;
1676 		graph->scc++;
1677 	}
1678 
1679 	isl_tarjan_graph_free(g);
1680 
1681 	return isl_stat_ok;
1682 }
1683 
1684 /* Apply Tarjan's algorithm to detect the strongly connected components
1685  * in the dependence graph.
1686  * Only consider the (conditional) validity dependences and clear "weak".
1687  */
detect_sccs(isl_ctx * ctx,struct isl_sched_graph * graph)1688 static isl_stat detect_sccs(isl_ctx *ctx, struct isl_sched_graph *graph)
1689 {
1690 	graph->weak = 0;
1691 	return detect_ccs(ctx, graph, &node_follows_strong);
1692 }
1693 
1694 /* Apply Tarjan's algorithm to detect the (weakly) connected components
1695  * in the dependence graph.
1696  * Consider all dependences and set "weak".
1697  */
detect_wccs(isl_ctx * ctx,struct isl_sched_graph * graph)1698 static isl_stat detect_wccs(isl_ctx *ctx, struct isl_sched_graph *graph)
1699 {
1700 	graph->weak = 1;
1701 	return detect_ccs(ctx, graph, &node_follows_weak);
1702 }
1703 
cmp_scc(const void * a,const void * b,void * data)1704 static int cmp_scc(const void *a, const void *b, void *data)
1705 {
1706 	struct isl_sched_graph *graph = data;
1707 	const int *i1 = a;
1708 	const int *i2 = b;
1709 
1710 	return graph->node[*i1].scc - graph->node[*i2].scc;
1711 }
1712 
1713 /* Sort the elements of graph->sorted according to the corresponding SCCs.
1714  */
sort_sccs(struct isl_sched_graph * graph)1715 static int sort_sccs(struct isl_sched_graph *graph)
1716 {
1717 	return isl_sort(graph->sorted, graph->n, sizeof(int), &cmp_scc, graph);
1718 }
1719 
1720 /* Return a non-parametric set in the compressed space of "node" that is
1721  * bounded by the size in each direction
1722  *
1723  *	{ [x] : -S_i <= x_i <= S_i }
1724  *
1725  * If S_i is infinity in direction i, then there are no constraints
1726  * in that direction.
1727  *
1728  * Cache the result in node->bounds.
1729  */
get_size_bounds(struct isl_sched_node * node)1730 static __isl_give isl_basic_set *get_size_bounds(struct isl_sched_node *node)
1731 {
1732 	isl_space *space;
1733 	isl_basic_set *bounds;
1734 	int i;
1735 
1736 	if (node->bounds)
1737 		return isl_basic_set_copy(node->bounds);
1738 
1739 	if (node->compressed)
1740 		space = isl_pw_multi_aff_get_domain_space(node->decompress);
1741 	else
1742 		space = isl_space_copy(node->space);
1743 	space = isl_space_drop_all_params(space);
1744 	bounds = isl_basic_set_universe(space);
1745 
1746 	for (i = 0; i < node->nvar; ++i) {
1747 		isl_val *size;
1748 
1749 		size = isl_multi_val_get_val(node->sizes, i);
1750 		if (!size)
1751 			return isl_basic_set_free(bounds);
1752 		if (!isl_val_is_int(size)) {
1753 			isl_val_free(size);
1754 			continue;
1755 		}
1756 		bounds = isl_basic_set_upper_bound_val(bounds, isl_dim_set, i,
1757 							isl_val_copy(size));
1758 		bounds = isl_basic_set_lower_bound_val(bounds, isl_dim_set, i,
1759 							isl_val_neg(size));
1760 	}
1761 
1762 	node->bounds = isl_basic_set_copy(bounds);
1763 	return bounds;
1764 }
1765 
1766 /* Compress the dependence relation "map", if needed, i.e.,
1767  * when the source node "src" and/or the destination node "dst"
1768  * has been compressed.
1769  */
compress(__isl_take isl_map * map,struct isl_sched_node * src,struct isl_sched_node * dst)1770 static __isl_give isl_map *compress(__isl_take isl_map *map,
1771 	struct isl_sched_node *src, struct isl_sched_node *dst)
1772 {
1773 	if (src->compressed)
1774 		map = isl_map_preimage_domain_pw_multi_aff(map,
1775 					isl_pw_multi_aff_copy(src->decompress));
1776 	if (dst->compressed)
1777 		map = isl_map_preimage_range_pw_multi_aff(map,
1778 					isl_pw_multi_aff_copy(dst->decompress));
1779 	return map;
1780 }
1781 
1782 /* Drop some constraints from "delta" that could be exploited
1783  * to construct loop coalescing schedules.
1784  * In particular, drop those constraint that bound the difference
1785  * to the size of the domain.
1786  * First project out the parameters to improve the effectiveness.
1787  */
drop_coalescing_constraints(__isl_take isl_set * delta,struct isl_sched_node * node)1788 static __isl_give isl_set *drop_coalescing_constraints(
1789 	__isl_take isl_set *delta, struct isl_sched_node *node)
1790 {
1791 	isl_size nparam;
1792 	isl_basic_set *bounds;
1793 
1794 	nparam = isl_set_dim(delta, isl_dim_param);
1795 	if (nparam < 0)
1796 		return isl_set_free(delta);
1797 
1798 	bounds = get_size_bounds(node);
1799 
1800 	delta = isl_set_project_out(delta, isl_dim_param, 0, nparam);
1801 	delta = isl_set_remove_divs(delta);
1802 	delta = isl_set_plain_gist_basic_set(delta, bounds);
1803 	return delta;
1804 }
1805 
1806 /* Given a dependence relation R from "node" to itself,
1807  * construct the set of coefficients of valid constraints for elements
1808  * in that dependence relation.
1809  * In particular, the result contains tuples of coefficients
1810  * c_0, c_n, c_x such that
1811  *
1812  *	c_0 + c_n n + c_x y - c_x x >= 0 for each (x,y) in R
1813  *
1814  * or, equivalently,
1815  *
1816  *	c_0 + c_n n + c_x d >= 0 for each d in delta R = { y - x | (x,y) in R }
1817  *
1818  * We choose here to compute the dual of delta R.
1819  * Alternatively, we could have computed the dual of R, resulting
1820  * in a set of tuples c_0, c_n, c_x, c_y, and then
1821  * plugged in (c_0, c_n, c_x, -c_x).
1822  *
1823  * If "need_param" is set, then the resulting coefficients effectively
1824  * include coefficients for the parameters c_n.  Otherwise, they may
1825  * have been projected out already.
1826  * Since the constraints may be different for these two cases,
1827  * they are stored in separate caches.
1828  * In particular, if no parameter coefficients are required and
1829  * the schedule_treat_coalescing option is set, then the parameters
1830  * are projected out and some constraints that could be exploited
1831  * to construct coalescing schedules are removed before the dual
1832  * is computed.
1833  *
1834  * If "node" has been compressed, then the dependence relation
1835  * is also compressed before the set of coefficients is computed.
1836  */
intra_coefficients(struct isl_sched_graph * graph,struct isl_sched_node * node,__isl_take isl_map * map,int need_param)1837 static __isl_give isl_basic_set *intra_coefficients(
1838 	struct isl_sched_graph *graph, struct isl_sched_node *node,
1839 	__isl_take isl_map *map, int need_param)
1840 {
1841 	isl_ctx *ctx;
1842 	isl_set *delta;
1843 	isl_map *key;
1844 	isl_basic_set *coef;
1845 	isl_maybe_isl_basic_set m;
1846 	isl_map_to_basic_set **hmap = &graph->intra_hmap;
1847 	int treat;
1848 
1849 	if (!map)
1850 		return NULL;
1851 
1852 	ctx = isl_map_get_ctx(map);
1853 	treat = !need_param && isl_options_get_schedule_treat_coalescing(ctx);
1854 	if (!treat)
1855 		hmap = &graph->intra_hmap_param;
1856 	m = isl_map_to_basic_set_try_get(*hmap, map);
1857 	if (m.valid < 0 || m.valid) {
1858 		isl_map_free(map);
1859 		return m.value;
1860 	}
1861 
1862 	key = isl_map_copy(map);
1863 	map = compress(map, node, node);
1864 	delta = isl_map_deltas(map);
1865 	if (treat)
1866 		delta = drop_coalescing_constraints(delta, node);
1867 	delta = isl_set_remove_divs(delta);
1868 	coef = isl_set_coefficients(delta);
1869 	*hmap = isl_map_to_basic_set_set(*hmap, key, isl_basic_set_copy(coef));
1870 
1871 	return coef;
1872 }
1873 
1874 /* Given a dependence relation R, construct the set of coefficients
1875  * of valid constraints for elements in that dependence relation.
1876  * In particular, the result contains tuples of coefficients
1877  * c_0, c_n, c_x, c_y such that
1878  *
1879  *	c_0 + c_n n + c_x x + c_y y >= 0 for each (x,y) in R
1880  *
1881  * If the source or destination nodes of "edge" have been compressed,
1882  * then the dependence relation is also compressed before
1883  * the set of coefficients is computed.
1884  */
inter_coefficients(struct isl_sched_graph * graph,struct isl_sched_edge * edge,__isl_take isl_map * map)1885 static __isl_give isl_basic_set *inter_coefficients(
1886 	struct isl_sched_graph *graph, struct isl_sched_edge *edge,
1887 	__isl_take isl_map *map)
1888 {
1889 	isl_set *set;
1890 	isl_map *key;
1891 	isl_basic_set *coef;
1892 	isl_maybe_isl_basic_set m;
1893 
1894 	m = isl_map_to_basic_set_try_get(graph->inter_hmap, map);
1895 	if (m.valid < 0 || m.valid) {
1896 		isl_map_free(map);
1897 		return m.value;
1898 	}
1899 
1900 	key = isl_map_copy(map);
1901 	map = compress(map, edge->src, edge->dst);
1902 	set = isl_map_wrap(isl_map_remove_divs(map));
1903 	coef = isl_set_coefficients(set);
1904 	graph->inter_hmap = isl_map_to_basic_set_set(graph->inter_hmap, key,
1905 					isl_basic_set_copy(coef));
1906 
1907 	return coef;
1908 }
1909 
1910 /* Return the position of the coefficients of the variables in
1911  * the coefficients constraints "coef".
1912  *
1913  * The space of "coef" is of the form
1914  *
1915  *	{ coefficients[[cst, params] -> S] }
1916  *
1917  * Return the position of S.
1918  */
coef_var_offset(__isl_keep isl_basic_set * coef)1919 static isl_size coef_var_offset(__isl_keep isl_basic_set *coef)
1920 {
1921 	isl_size offset;
1922 	isl_space *space;
1923 
1924 	space = isl_space_unwrap(isl_basic_set_get_space(coef));
1925 	offset = isl_space_dim(space, isl_dim_in);
1926 	isl_space_free(space);
1927 
1928 	return offset;
1929 }
1930 
1931 /* Return the offset of the coefficient of the constant term of "node"
1932  * within the (I)LP.
1933  *
1934  * Within each node, the coefficients have the following order:
1935  *	- positive and negative parts of c_i_x
1936  *	- c_i_n (if parametric)
1937  *	- c_i_0
1938  */
node_cst_coef_offset(struct isl_sched_node * node)1939 static int node_cst_coef_offset(struct isl_sched_node *node)
1940 {
1941 	return node->start + 2 * node->nvar + node->nparam;
1942 }
1943 
1944 /* Return the offset of the coefficients of the parameters of "node"
1945  * within the (I)LP.
1946  *
1947  * Within each node, the coefficients have the following order:
1948  *	- positive and negative parts of c_i_x
1949  *	- c_i_n (if parametric)
1950  *	- c_i_0
1951  */
node_par_coef_offset(struct isl_sched_node * node)1952 static int node_par_coef_offset(struct isl_sched_node *node)
1953 {
1954 	return node->start + 2 * node->nvar;
1955 }
1956 
1957 /* Return the offset of the coefficients of the variables of "node"
1958  * within the (I)LP.
1959  *
1960  * Within each node, the coefficients have the following order:
1961  *	- positive and negative parts of c_i_x
1962  *	- c_i_n (if parametric)
1963  *	- c_i_0
1964  */
node_var_coef_offset(struct isl_sched_node * node)1965 static int node_var_coef_offset(struct isl_sched_node *node)
1966 {
1967 	return node->start;
1968 }
1969 
1970 /* Return the position of the pair of variables encoding
1971  * coefficient "i" of "node".
1972  *
1973  * The order of these variable pairs is the opposite of
1974  * that of the coefficients, with 2 variables per coefficient.
1975  */
node_var_coef_pos(struct isl_sched_node * node,int i)1976 static int node_var_coef_pos(struct isl_sched_node *node, int i)
1977 {
1978 	return node_var_coef_offset(node) + 2 * (node->nvar - 1 - i);
1979 }
1980 
1981 /* Construct an isl_dim_map for mapping constraints on coefficients
1982  * for "node" to the corresponding positions in graph->lp.
1983  * "offset" is the offset of the coefficients for the variables
1984  * in the input constraints.
1985  * "s" is the sign of the mapping.
1986  *
1987  * The input constraints are given in terms of the coefficients
1988  * (c_0, c_x) or (c_0, c_n, c_x).
1989  * The mapping produced by this function essentially plugs in
1990  * (0, c_i_x^+ - c_i_x^-) if s = 1 and
1991  * (0, -c_i_x^+ + c_i_x^-) if s = -1 or
1992  * (0, 0, c_i_x^+ - c_i_x^-) if s = 1 and
1993  * (0, 0, -c_i_x^+ + c_i_x^-) if s = -1.
1994  * In graph->lp, the c_i_x^- appear before their c_i_x^+ counterpart.
1995  * Furthermore, the order of these pairs is the opposite of that
1996  * of the corresponding coefficients.
1997  *
1998  * The caller can extend the mapping to also map the other coefficients
1999  * (and therefore not plug in 0).
2000  */
intra_dim_map(isl_ctx * ctx,struct isl_sched_graph * graph,struct isl_sched_node * node,int offset,int s)2001 static __isl_give isl_dim_map *intra_dim_map(isl_ctx *ctx,
2002 	struct isl_sched_graph *graph, struct isl_sched_node *node,
2003 	int offset, int s)
2004 {
2005 	int pos;
2006 	isl_size total;
2007 	isl_dim_map *dim_map;
2008 
2009 	total = isl_basic_set_dim(graph->lp, isl_dim_all);
2010 	if (!node || total < 0)
2011 		return NULL;
2012 
2013 	pos = node_var_coef_pos(node, 0);
2014 	dim_map = isl_dim_map_alloc(ctx, total);
2015 	isl_dim_map_range(dim_map, pos, -2, offset, 1, node->nvar, -s);
2016 	isl_dim_map_range(dim_map, pos + 1, -2, offset, 1, node->nvar, s);
2017 
2018 	return dim_map;
2019 }
2020 
2021 /* Construct an isl_dim_map for mapping constraints on coefficients
2022  * for "src" (node i) and "dst" (node j) to the corresponding positions
2023  * in graph->lp.
2024  * "offset" is the offset of the coefficients for the variables of "src"
2025  * in the input constraints.
2026  * "s" is the sign of the mapping.
2027  *
2028  * The input constraints are given in terms of the coefficients
2029  * (c_0, c_n, c_x, c_y).
2030  * The mapping produced by this function essentially plugs in
2031  * (c_j_0 - c_i_0, c_j_n - c_i_n,
2032  *  -(c_i_x^+ - c_i_x^-), c_j_x^+ - c_j_x^-) if s = 1 and
2033  * (-c_j_0 + c_i_0, -c_j_n + c_i_n,
2034  *  c_i_x^+ - c_i_x^-, -(c_j_x^+ - c_j_x^-)) if s = -1.
2035  * In graph->lp, the c_*^- appear before their c_*^+ counterpart.
2036  * Furthermore, the order of these pairs is the opposite of that
2037  * of the corresponding coefficients.
2038  *
2039  * The caller can further extend the mapping.
2040  */
inter_dim_map(isl_ctx * ctx,struct isl_sched_graph * graph,struct isl_sched_node * src,struct isl_sched_node * dst,int offset,int s)2041 static __isl_give isl_dim_map *inter_dim_map(isl_ctx *ctx,
2042 	struct isl_sched_graph *graph, struct isl_sched_node *src,
2043 	struct isl_sched_node *dst, int offset, int s)
2044 {
2045 	int pos;
2046 	isl_size total;
2047 	isl_dim_map *dim_map;
2048 
2049 	total = isl_basic_set_dim(graph->lp, isl_dim_all);
2050 	if (!src || !dst || total < 0)
2051 		return NULL;
2052 
2053 	dim_map = isl_dim_map_alloc(ctx, total);
2054 
2055 	pos = node_cst_coef_offset(dst);
2056 	isl_dim_map_range(dim_map, pos, 0, 0, 0, 1, s);
2057 	pos = node_par_coef_offset(dst);
2058 	isl_dim_map_range(dim_map, pos, 1, 1, 1, dst->nparam, s);
2059 	pos = node_var_coef_pos(dst, 0);
2060 	isl_dim_map_range(dim_map, pos, -2, offset + src->nvar, 1,
2061 			  dst->nvar, -s);
2062 	isl_dim_map_range(dim_map, pos + 1, -2, offset + src->nvar, 1,
2063 			  dst->nvar, s);
2064 
2065 	pos = node_cst_coef_offset(src);
2066 	isl_dim_map_range(dim_map, pos, 0, 0, 0, 1, -s);
2067 	pos = node_par_coef_offset(src);
2068 	isl_dim_map_range(dim_map, pos, 1, 1, 1, src->nparam, -s);
2069 	pos = node_var_coef_pos(src, 0);
2070 	isl_dim_map_range(dim_map, pos, -2, offset, 1, src->nvar, s);
2071 	isl_dim_map_range(dim_map, pos + 1, -2, offset, 1, src->nvar, -s);
2072 
2073 	return dim_map;
2074 }
2075 
2076 /* Add the constraints from "src" to "dst" using "dim_map",
2077  * after making sure there is enough room in "dst" for the extra constraints.
2078  */
add_constraints_dim_map(__isl_take isl_basic_set * dst,__isl_take isl_basic_set * src,__isl_take isl_dim_map * dim_map)2079 static __isl_give isl_basic_set *add_constraints_dim_map(
2080 	__isl_take isl_basic_set *dst, __isl_take isl_basic_set *src,
2081 	__isl_take isl_dim_map *dim_map)
2082 {
2083 	int n_eq, n_ineq;
2084 
2085 	n_eq = isl_basic_set_n_equality(src);
2086 	n_ineq = isl_basic_set_n_inequality(src);
2087 	dst = isl_basic_set_extend_constraints(dst, n_eq, n_ineq);
2088 	dst = isl_basic_set_add_constraints_dim_map(dst, src, dim_map);
2089 	return dst;
2090 }
2091 
2092 /* Add constraints to graph->lp that force validity for the given
2093  * dependence from a node i to itself.
2094  * That is, add constraints that enforce
2095  *
2096  *	(c_i_0 + c_i_n n + c_i_x y) - (c_i_0 + c_i_n n + c_i_x x)
2097  *	= c_i_x (y - x) >= 0
2098  *
2099  * for each (x,y) in R.
2100  * We obtain general constraints on coefficients (c_0, c_x)
2101  * of valid constraints for (y - x) and then plug in (0, c_i_x^+ - c_i_x^-),
2102  * where c_i_x = c_i_x^+ - c_i_x^-, with c_i_x^+ and c_i_x^- non-negative.
2103  * In graph->lp, the c_i_x^- appear before their c_i_x^+ counterpart.
2104  * Note that the result of intra_coefficients may also contain
2105  * parameter coefficients c_n, in which case 0 is plugged in for them as well.
2106  */
add_intra_validity_constraints(struct isl_sched_graph * graph,struct isl_sched_edge * edge)2107 static isl_stat add_intra_validity_constraints(struct isl_sched_graph *graph,
2108 	struct isl_sched_edge *edge)
2109 {
2110 	isl_size offset;
2111 	isl_map *map = isl_map_copy(edge->map);
2112 	isl_ctx *ctx = isl_map_get_ctx(map);
2113 	isl_dim_map *dim_map;
2114 	isl_basic_set *coef;
2115 	struct isl_sched_node *node = edge->src;
2116 
2117 	coef = intra_coefficients(graph, node, map, 0);
2118 
2119 	offset = coef_var_offset(coef);
2120 	if (offset < 0)
2121 		coef = isl_basic_set_free(coef);
2122 	if (!coef)
2123 		return isl_stat_error;
2124 
2125 	dim_map = intra_dim_map(ctx, graph, node, offset, 1);
2126 	graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map);
2127 
2128 	return isl_stat_ok;
2129 }
2130 
2131 /* Add constraints to graph->lp that force validity for the given
2132  * dependence from node i to node j.
2133  * That is, add constraints that enforce
2134  *
2135  *	(c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) >= 0
2136  *
2137  * for each (x,y) in R.
2138  * We obtain general constraints on coefficients (c_0, c_n, c_x, c_y)
2139  * of valid constraints for R and then plug in
2140  * (c_j_0 - c_i_0, c_j_n - c_i_n, -(c_i_x^+ - c_i_x^-), c_j_x^+ - c_j_x^-),
2141  * where c_* = c_*^+ - c_*^-, with c_*^+ and c_*^- non-negative.
2142  * In graph->lp, the c_*^- appear before their c_*^+ counterpart.
2143  */
add_inter_validity_constraints(struct isl_sched_graph * graph,struct isl_sched_edge * edge)2144 static isl_stat add_inter_validity_constraints(struct isl_sched_graph *graph,
2145 	struct isl_sched_edge *edge)
2146 {
2147 	isl_size offset;
2148 	isl_map *map;
2149 	isl_ctx *ctx;
2150 	isl_dim_map *dim_map;
2151 	isl_basic_set *coef;
2152 	struct isl_sched_node *src = edge->src;
2153 	struct isl_sched_node *dst = edge->dst;
2154 
2155 	if (!graph->lp)
2156 		return isl_stat_error;
2157 
2158 	map = isl_map_copy(edge->map);
2159 	ctx = isl_map_get_ctx(map);
2160 	coef = inter_coefficients(graph, edge, map);
2161 
2162 	offset = coef_var_offset(coef);
2163 	if (offset < 0)
2164 		coef = isl_basic_set_free(coef);
2165 	if (!coef)
2166 		return isl_stat_error;
2167 
2168 	dim_map = inter_dim_map(ctx, graph, src, dst, offset, 1);
2169 
2170 	edge->start = graph->lp->n_ineq;
2171 	graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map);
2172 	if (!graph->lp)
2173 		return isl_stat_error;
2174 	edge->end = graph->lp->n_ineq;
2175 
2176 	return isl_stat_ok;
2177 }
2178 
2179 /* Add constraints to graph->lp that bound the dependence distance for the given
2180  * dependence from a node i to itself.
2181  * If s = 1, we add the constraint
2182  *
2183  *	c_i_x (y - x) <= m_0 + m_n n
2184  *
2185  * or
2186  *
2187  *	-c_i_x (y - x) + m_0 + m_n n >= 0
2188  *
2189  * for each (x,y) in R.
2190  * If s = -1, we add the constraint
2191  *
2192  *	-c_i_x (y - x) <= m_0 + m_n n
2193  *
2194  * or
2195  *
2196  *	c_i_x (y - x) + m_0 + m_n n >= 0
2197  *
2198  * for each (x,y) in R.
2199  * We obtain general constraints on coefficients (c_0, c_n, c_x)
2200  * of valid constraints for (y - x) and then plug in (m_0, m_n, -s * c_i_x),
2201  * with each coefficient (except m_0) represented as a pair of non-negative
2202  * coefficients.
2203  *
2204  *
2205  * If "local" is set, then we add constraints
2206  *
2207  *	c_i_x (y - x) <= 0
2208  *
2209  * or
2210  *
2211  *	-c_i_x (y - x) <= 0
2212  *
2213  * instead, forcing the dependence distance to be (less than or) equal to 0.
2214  * That is, we plug in (0, 0, -s * c_i_x),
2215  * intra_coefficients is not required to have c_n in its result when
2216  * "local" is set.  If they are missing, then (0, -s * c_i_x) is plugged in.
2217  * Note that dependences marked local are treated as validity constraints
2218  * by add_all_validity_constraints and therefore also have
2219  * their distances bounded by 0 from below.
2220  */
add_intra_proximity_constraints(struct isl_sched_graph * graph,struct isl_sched_edge * edge,int s,int local)2221 static isl_stat add_intra_proximity_constraints(struct isl_sched_graph *graph,
2222 	struct isl_sched_edge *edge, int s, int local)
2223 {
2224 	isl_size offset;
2225 	isl_size nparam;
2226 	isl_map *map = isl_map_copy(edge->map);
2227 	isl_ctx *ctx = isl_map_get_ctx(map);
2228 	isl_dim_map *dim_map;
2229 	isl_basic_set *coef;
2230 	struct isl_sched_node *node = edge->src;
2231 
2232 	coef = intra_coefficients(graph, node, map, !local);
2233 	nparam = isl_space_dim(node->space, isl_dim_param);
2234 
2235 	offset = coef_var_offset(coef);
2236 	if (nparam < 0 || offset < 0)
2237 		coef = isl_basic_set_free(coef);
2238 	if (!coef)
2239 		return isl_stat_error;
2240 
2241 	dim_map = intra_dim_map(ctx, graph, node, offset, -s);
2242 
2243 	if (!local) {
2244 		isl_dim_map_range(dim_map, 1, 0, 0, 0, 1, 1);
2245 		isl_dim_map_range(dim_map, 4, 2, 1, 1, nparam, -1);
2246 		isl_dim_map_range(dim_map, 5, 2, 1, 1, nparam, 1);
2247 	}
2248 	graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map);
2249 
2250 	return isl_stat_ok;
2251 }
2252 
2253 /* Add constraints to graph->lp that bound the dependence distance for the given
2254  * dependence from node i to node j.
2255  * If s = 1, we add the constraint
2256  *
2257  *	(c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x)
2258  *		<= m_0 + m_n n
2259  *
2260  * or
2261  *
2262  *	-(c_j_0 + c_j_n n + c_j_x y) + (c_i_0 + c_i_n n + c_i_x x) +
2263  *		m_0 + m_n n >= 0
2264  *
2265  * for each (x,y) in R.
2266  * If s = -1, we add the constraint
2267  *
2268  *	-((c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x))
2269  *		<= m_0 + m_n n
2270  *
2271  * or
2272  *
2273  *	(c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) +
2274  *		m_0 + m_n n >= 0
2275  *
2276  * for each (x,y) in R.
2277  * We obtain general constraints on coefficients (c_0, c_n, c_x, c_y)
2278  * of valid constraints for R and then plug in
2279  * (m_0 - s*c_j_0 + s*c_i_0, m_n - s*c_j_n + s*c_i_n,
2280  *  s*c_i_x, -s*c_j_x)
2281  * with each coefficient (except m_0, c_*_0 and c_*_n)
2282  * represented as a pair of non-negative coefficients.
2283  *
2284  *
2285  * If "local" is set (and s = 1), then we add constraints
2286  *
2287  *	(c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) <= 0
2288  *
2289  * or
2290  *
2291  *	-((c_j_0 + c_j_n n + c_j_x y) + (c_i_0 + c_i_n n + c_i_x x)) >= 0
2292  *
2293  * instead, forcing the dependence distance to be (less than or) equal to 0.
2294  * That is, we plug in
2295  * (-s*c_j_0 + s*c_i_0, -s*c_j_n + s*c_i_n, s*c_i_x, -s*c_j_x).
2296  * Note that dependences marked local are treated as validity constraints
2297  * by add_all_validity_constraints and therefore also have
2298  * their distances bounded by 0 from below.
2299  */
add_inter_proximity_constraints(struct isl_sched_graph * graph,struct isl_sched_edge * edge,int s,int local)2300 static isl_stat add_inter_proximity_constraints(struct isl_sched_graph *graph,
2301 	struct isl_sched_edge *edge, int s, int local)
2302 {
2303 	isl_size offset;
2304 	isl_size nparam;
2305 	isl_map *map = isl_map_copy(edge->map);
2306 	isl_ctx *ctx = isl_map_get_ctx(map);
2307 	isl_dim_map *dim_map;
2308 	isl_basic_set *coef;
2309 	struct isl_sched_node *src = edge->src;
2310 	struct isl_sched_node *dst = edge->dst;
2311 
2312 	coef = inter_coefficients(graph, edge, map);
2313 	nparam = isl_space_dim(src->space, isl_dim_param);
2314 
2315 	offset = coef_var_offset(coef);
2316 	if (nparam < 0 || offset < 0)
2317 		coef = isl_basic_set_free(coef);
2318 	if (!coef)
2319 		return isl_stat_error;
2320 
2321 	dim_map = inter_dim_map(ctx, graph, src, dst, offset, -s);
2322 
2323 	if (!local) {
2324 		isl_dim_map_range(dim_map, 1, 0, 0, 0, 1, 1);
2325 		isl_dim_map_range(dim_map, 4, 2, 1, 1, nparam, -1);
2326 		isl_dim_map_range(dim_map, 5, 2, 1, 1, nparam, 1);
2327 	}
2328 
2329 	graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map);
2330 
2331 	return isl_stat_ok;
2332 }
2333 
2334 /* Should the distance over "edge" be forced to zero?
2335  * That is, is it marked as a local edge?
2336  * If "use_coincidence" is set, then coincidence edges are treated
2337  * as local edges.
2338  */
force_zero(struct isl_sched_edge * edge,int use_coincidence)2339 static int force_zero(struct isl_sched_edge *edge, int use_coincidence)
2340 {
2341 	return is_local(edge) || (use_coincidence && is_coincidence(edge));
2342 }
2343 
2344 /* Add all validity constraints to graph->lp.
2345  *
2346  * An edge that is forced to be local needs to have its dependence
2347  * distances equal to zero.  We take care of bounding them by 0 from below
2348  * here.  add_all_proximity_constraints takes care of bounding them by 0
2349  * from above.
2350  *
2351  * If "use_coincidence" is set, then we treat coincidence edges as local edges.
2352  * Otherwise, we ignore them.
2353  */
add_all_validity_constraints(struct isl_sched_graph * graph,int use_coincidence)2354 static int add_all_validity_constraints(struct isl_sched_graph *graph,
2355 	int use_coincidence)
2356 {
2357 	int i;
2358 
2359 	for (i = 0; i < graph->n_edge; ++i) {
2360 		struct isl_sched_edge *edge = &graph->edge[i];
2361 		int zero;
2362 
2363 		zero = force_zero(edge, use_coincidence);
2364 		if (!is_validity(edge) && !zero)
2365 			continue;
2366 		if (edge->src != edge->dst)
2367 			continue;
2368 		if (add_intra_validity_constraints(graph, edge) < 0)
2369 			return -1;
2370 	}
2371 
2372 	for (i = 0; i < graph->n_edge; ++i) {
2373 		struct isl_sched_edge *edge = &graph->edge[i];
2374 		int zero;
2375 
2376 		zero = force_zero(edge, use_coincidence);
2377 		if (!is_validity(edge) && !zero)
2378 			continue;
2379 		if (edge->src == edge->dst)
2380 			continue;
2381 		if (add_inter_validity_constraints(graph, edge) < 0)
2382 			return -1;
2383 	}
2384 
2385 	return 0;
2386 }
2387 
2388 /* Add constraints to graph->lp that bound the dependence distance
2389  * for all dependence relations.
2390  * If a given proximity dependence is identical to a validity
2391  * dependence, then the dependence distance is already bounded
2392  * from below (by zero), so we only need to bound the distance
2393  * from above.  (This includes the case of "local" dependences
2394  * which are treated as validity dependence by add_all_validity_constraints.)
2395  * Otherwise, we need to bound the distance both from above and from below.
2396  *
2397  * If "use_coincidence" is set, then we treat coincidence edges as local edges.
2398  * Otherwise, we ignore them.
2399  */
add_all_proximity_constraints(struct isl_sched_graph * graph,int use_coincidence)2400 static int add_all_proximity_constraints(struct isl_sched_graph *graph,
2401 	int use_coincidence)
2402 {
2403 	int i;
2404 
2405 	for (i = 0; i < graph->n_edge; ++i) {
2406 		struct isl_sched_edge *edge = &graph->edge[i];
2407 		int zero;
2408 
2409 		zero = force_zero(edge, use_coincidence);
2410 		if (!is_proximity(edge) && !zero)
2411 			continue;
2412 		if (edge->src == edge->dst &&
2413 		    add_intra_proximity_constraints(graph, edge, 1, zero) < 0)
2414 			return -1;
2415 		if (edge->src != edge->dst &&
2416 		    add_inter_proximity_constraints(graph, edge, 1, zero) < 0)
2417 			return -1;
2418 		if (is_validity(edge) || zero)
2419 			continue;
2420 		if (edge->src == edge->dst &&
2421 		    add_intra_proximity_constraints(graph, edge, -1, 0) < 0)
2422 			return -1;
2423 		if (edge->src != edge->dst &&
2424 		    add_inter_proximity_constraints(graph, edge, -1, 0) < 0)
2425 			return -1;
2426 	}
2427 
2428 	return 0;
2429 }
2430 
2431 /* Normalize the rows of "indep" such that all rows are lexicographically
2432  * positive and such that each row contains as many final zeros as possible,
2433  * given the choice for the previous rows.
2434  * Do this by performing elementary row operations.
2435  */
normalize_independent(__isl_take isl_mat * indep)2436 static __isl_give isl_mat *normalize_independent(__isl_take isl_mat *indep)
2437 {
2438 	indep = isl_mat_reverse_gauss(indep);
2439 	indep = isl_mat_lexnonneg_rows(indep);
2440 	return indep;
2441 }
2442 
2443 /* Extract the linear part of the current schedule for node "node".
2444  */
extract_linear_schedule(struct isl_sched_node * node)2445 static __isl_give isl_mat *extract_linear_schedule(struct isl_sched_node *node)
2446 {
2447 	isl_size n_row = isl_mat_rows(node->sched);
2448 
2449 	if (n_row < 0)
2450 		return NULL;
2451 	return isl_mat_sub_alloc(node->sched, 0, n_row,
2452 			      1 + node->nparam, node->nvar);
2453 }
2454 
2455 /* Compute a basis for the rows in the linear part of the schedule
2456  * and extend this basis to a full basis.  The remaining rows
2457  * can then be used to force linear independence from the rows
2458  * in the schedule.
2459  *
2460  * In particular, given the schedule rows S, we compute
2461  *
2462  *	S   = H Q
2463  *	S U = H
2464  *
2465  * with H the Hermite normal form of S.  That is, all but the
2466  * first rank columns of H are zero and so each row in S is
2467  * a linear combination of the first rank rows of Q.
2468  * The matrix Q can be used as a variable transformation
2469  * that isolates the directions of S in the first rank rows.
2470  * Transposing S U = H yields
2471  *
2472  *	U^T S^T = H^T
2473  *
2474  * with all but the first rank rows of H^T zero.
2475  * The last rows of U^T are therefore linear combinations
2476  * of schedule coefficients that are all zero on schedule
2477  * coefficients that are linearly dependent on the rows of S.
2478  * At least one of these combinations is non-zero on
2479  * linearly independent schedule coefficients.
2480  * The rows are normalized to involve as few of the last
2481  * coefficients as possible and to have a positive initial value.
2482  */
node_update_vmap(struct isl_sched_node * node)2483 static int node_update_vmap(struct isl_sched_node *node)
2484 {
2485 	isl_mat *H, *U, *Q;
2486 
2487 	H = extract_linear_schedule(node);
2488 
2489 	H = isl_mat_left_hermite(H, 0, &U, &Q);
2490 	isl_mat_free(node->indep);
2491 	isl_mat_free(node->vmap);
2492 	node->vmap = Q;
2493 	node->indep = isl_mat_transpose(U);
2494 	node->rank = isl_mat_initial_non_zero_cols(H);
2495 	node->indep = isl_mat_drop_rows(node->indep, 0, node->rank);
2496 	node->indep = normalize_independent(node->indep);
2497 	isl_mat_free(H);
2498 
2499 	if (!node->indep || !node->vmap || node->rank < 0)
2500 		return -1;
2501 	return 0;
2502 }
2503 
2504 /* Is "edge" marked as a validity or a conditional validity edge?
2505  */
is_any_validity(struct isl_sched_edge * edge)2506 static int is_any_validity(struct isl_sched_edge *edge)
2507 {
2508 	return is_validity(edge) || is_conditional_validity(edge);
2509 }
2510 
2511 /* How many times should we count the constraints in "edge"?
2512  *
2513  * We count as follows
2514  * validity		-> 1 (>= 0)
2515  * validity+proximity	-> 2 (>= 0 and upper bound)
2516  * proximity		-> 2 (lower and upper bound)
2517  * local(+any)		-> 2 (>= 0 and <= 0)
2518  *
2519  * If an edge is only marked conditional_validity then it counts
2520  * as zero since it is only checked afterwards.
2521  *
2522  * If "use_coincidence" is set, then we treat coincidence edges as local edges.
2523  * Otherwise, we ignore them.
2524  */
edge_multiplicity(struct isl_sched_edge * edge,int use_coincidence)2525 static int edge_multiplicity(struct isl_sched_edge *edge, int use_coincidence)
2526 {
2527 	if (is_proximity(edge) || force_zero(edge, use_coincidence))
2528 		return 2;
2529 	if (is_validity(edge))
2530 		return 1;
2531 	return 0;
2532 }
2533 
2534 /* How many times should the constraints in "edge" be counted
2535  * as a parametric intra-node constraint?
2536  *
2537  * Only proximity edges that are not forced zero need
2538  * coefficient constraints that include coefficients for parameters.
2539  * If the edge is also a validity edge, then only
2540  * an upper bound is introduced.  Otherwise, both lower and upper bounds
2541  * are introduced.
2542  */
parametric_intra_edge_multiplicity(struct isl_sched_edge * edge,int use_coincidence)2543 static int parametric_intra_edge_multiplicity(struct isl_sched_edge *edge,
2544 	int use_coincidence)
2545 {
2546 	if (edge->src != edge->dst)
2547 		return 0;
2548 	if (!is_proximity(edge))
2549 		return 0;
2550 	if (force_zero(edge, use_coincidence))
2551 		return 0;
2552 	if (is_validity(edge))
2553 		return 1;
2554 	else
2555 		return 2;
2556 }
2557 
2558 /* Add "f" times the number of equality and inequality constraints of "bset"
2559  * to "n_eq" and "n_ineq" and free "bset".
2560  */
update_count(__isl_take isl_basic_set * bset,int f,int * n_eq,int * n_ineq)2561 static isl_stat update_count(__isl_take isl_basic_set *bset,
2562 	int f, int *n_eq, int *n_ineq)
2563 {
2564 	if (!bset)
2565 		return isl_stat_error;
2566 
2567 	*n_eq += isl_basic_set_n_equality(bset);
2568 	*n_ineq += isl_basic_set_n_inequality(bset);
2569 	isl_basic_set_free(bset);
2570 
2571 	return isl_stat_ok;
2572 }
2573 
2574 /* Count the number of equality and inequality constraints
2575  * that will be added for the given map.
2576  *
2577  * The edges that require parameter coefficients are counted separately.
2578  *
2579  * "use_coincidence" is set if we should take into account coincidence edges.
2580  */
count_map_constraints(struct isl_sched_graph * graph,struct isl_sched_edge * edge,__isl_take isl_map * map,int * n_eq,int * n_ineq,int use_coincidence)2581 static isl_stat count_map_constraints(struct isl_sched_graph *graph,
2582 	struct isl_sched_edge *edge, __isl_take isl_map *map,
2583 	int *n_eq, int *n_ineq, int use_coincidence)
2584 {
2585 	isl_map *copy;
2586 	isl_basic_set *coef;
2587 	int f = edge_multiplicity(edge, use_coincidence);
2588 	int fp = parametric_intra_edge_multiplicity(edge, use_coincidence);
2589 
2590 	if (f == 0) {
2591 		isl_map_free(map);
2592 		return isl_stat_ok;
2593 	}
2594 
2595 	if (edge->src != edge->dst) {
2596 		coef = inter_coefficients(graph, edge, map);
2597 		return update_count(coef, f, n_eq, n_ineq);
2598 	}
2599 
2600 	if (fp > 0) {
2601 		copy = isl_map_copy(map);
2602 		coef = intra_coefficients(graph, edge->src, copy, 1);
2603 		if (update_count(coef, fp, n_eq, n_ineq) < 0)
2604 			goto error;
2605 	}
2606 
2607 	if (f > fp) {
2608 		copy = isl_map_copy(map);
2609 		coef = intra_coefficients(graph, edge->src, copy, 0);
2610 		if (update_count(coef, f - fp, n_eq, n_ineq) < 0)
2611 			goto error;
2612 	}
2613 
2614 	isl_map_free(map);
2615 	return isl_stat_ok;
2616 error:
2617 	isl_map_free(map);
2618 	return isl_stat_error;
2619 }
2620 
2621 /* Count the number of equality and inequality constraints
2622  * that will be added to the main lp problem.
2623  * We count as follows
2624  * validity		-> 1 (>= 0)
2625  * validity+proximity	-> 2 (>= 0 and upper bound)
2626  * proximity		-> 2 (lower and upper bound)
2627  * local(+any)		-> 2 (>= 0 and <= 0)
2628  *
2629  * If "use_coincidence" is set, then we treat coincidence edges as local edges.
2630  * Otherwise, we ignore them.
2631  */
count_constraints(struct isl_sched_graph * graph,int * n_eq,int * n_ineq,int use_coincidence)2632 static int count_constraints(struct isl_sched_graph *graph,
2633 	int *n_eq, int *n_ineq, int use_coincidence)
2634 {
2635 	int i;
2636 
2637 	*n_eq = *n_ineq = 0;
2638 	for (i = 0; i < graph->n_edge; ++i) {
2639 		struct isl_sched_edge *edge = &graph->edge[i];
2640 		isl_map *map = isl_map_copy(edge->map);
2641 
2642 		if (count_map_constraints(graph, edge, map, n_eq, n_ineq,
2643 					    use_coincidence) < 0)
2644 			return -1;
2645 	}
2646 
2647 	return 0;
2648 }
2649 
2650 /* Count the number of constraints that will be added by
2651  * add_bound_constant_constraints to bound the values of the constant terms
2652  * and increment *n_eq and *n_ineq accordingly.
2653  *
2654  * In practice, add_bound_constant_constraints only adds inequalities.
2655  */
count_bound_constant_constraints(isl_ctx * ctx,struct isl_sched_graph * graph,int * n_eq,int * n_ineq)2656 static isl_stat count_bound_constant_constraints(isl_ctx *ctx,
2657 	struct isl_sched_graph *graph, int *n_eq, int *n_ineq)
2658 {
2659 	if (isl_options_get_schedule_max_constant_term(ctx) == -1)
2660 		return isl_stat_ok;
2661 
2662 	*n_ineq += graph->n;
2663 
2664 	return isl_stat_ok;
2665 }
2666 
2667 /* Add constraints to bound the values of the constant terms in the schedule,
2668  * if requested by the user.
2669  *
2670  * The maximal value of the constant terms is defined by the option
2671  * "schedule_max_constant_term".
2672  */
add_bound_constant_constraints(isl_ctx * ctx,struct isl_sched_graph * graph)2673 static isl_stat add_bound_constant_constraints(isl_ctx *ctx,
2674 	struct isl_sched_graph *graph)
2675 {
2676 	int i, k;
2677 	int max;
2678 	isl_size total;
2679 
2680 	max = isl_options_get_schedule_max_constant_term(ctx);
2681 	if (max == -1)
2682 		return isl_stat_ok;
2683 
2684 	total = isl_basic_set_dim(graph->lp, isl_dim_set);
2685 	if (total < 0)
2686 		return isl_stat_error;
2687 
2688 	for (i = 0; i < graph->n; ++i) {
2689 		struct isl_sched_node *node = &graph->node[i];
2690 		int pos;
2691 
2692 		k = isl_basic_set_alloc_inequality(graph->lp);
2693 		if (k < 0)
2694 			return isl_stat_error;
2695 		isl_seq_clr(graph->lp->ineq[k], 1 + total);
2696 		pos = node_cst_coef_offset(node);
2697 		isl_int_set_si(graph->lp->ineq[k][1 + pos], -1);
2698 		isl_int_set_si(graph->lp->ineq[k][0], max);
2699 	}
2700 
2701 	return isl_stat_ok;
2702 }
2703 
2704 /* Count the number of constraints that will be added by
2705  * add_bound_coefficient_constraints and increment *n_eq and *n_ineq
2706  * accordingly.
2707  *
2708  * In practice, add_bound_coefficient_constraints only adds inequalities.
2709  */
count_bound_coefficient_constraints(isl_ctx * ctx,struct isl_sched_graph * graph,int * n_eq,int * n_ineq)2710 static int count_bound_coefficient_constraints(isl_ctx *ctx,
2711 	struct isl_sched_graph *graph, int *n_eq, int *n_ineq)
2712 {
2713 	int i;
2714 
2715 	if (isl_options_get_schedule_max_coefficient(ctx) == -1 &&
2716 	    !isl_options_get_schedule_treat_coalescing(ctx))
2717 		return 0;
2718 
2719 	for (i = 0; i < graph->n; ++i)
2720 		*n_ineq += graph->node[i].nparam + 2 * graph->node[i].nvar;
2721 
2722 	return 0;
2723 }
2724 
2725 /* Add constraints to graph->lp that bound the values of
2726  * the parameter schedule coefficients of "node" to "max" and
2727  * the variable schedule coefficients to the corresponding entry
2728  * in node->max.
2729  * In either case, a negative value means that no bound needs to be imposed.
2730  *
2731  * For parameter coefficients, this amounts to adding a constraint
2732  *
2733  *	c_n <= max
2734  *
2735  * i.e.,
2736  *
2737  *	-c_n + max >= 0
2738  *
2739  * The variables coefficients are, however, not represented directly.
2740  * Instead, the variable coefficients c_x are written as differences
2741  * c_x = c_x^+ - c_x^-.
2742  * That is,
2743  *
2744  *	-max_i <= c_x_i <= max_i
2745  *
2746  * is encoded as
2747  *
2748  *	-max_i <= c_x_i^+ - c_x_i^- <= max_i
2749  *
2750  * or
2751  *
2752  *	-(c_x_i^+ - c_x_i^-) + max_i >= 0
2753  *	c_x_i^+ - c_x_i^- + max_i >= 0
2754  */
node_add_coefficient_constraints(isl_ctx * ctx,struct isl_sched_graph * graph,struct isl_sched_node * node,int max)2755 static isl_stat node_add_coefficient_constraints(isl_ctx *ctx,
2756 	struct isl_sched_graph *graph, struct isl_sched_node *node, int max)
2757 {
2758 	int i, j, k;
2759 	isl_size total;
2760 	isl_vec *ineq;
2761 
2762 	total = isl_basic_set_dim(graph->lp, isl_dim_set);
2763 	if (total < 0)
2764 		return isl_stat_error;
2765 
2766 	for (j = 0; j < node->nparam; ++j) {
2767 		int dim;
2768 
2769 		if (max < 0)
2770 			continue;
2771 
2772 		k = isl_basic_set_alloc_inequality(graph->lp);
2773 		if (k < 0)
2774 			return isl_stat_error;
2775 		dim = 1 + node_par_coef_offset(node) + j;
2776 		isl_seq_clr(graph->lp->ineq[k], 1 + total);
2777 		isl_int_set_si(graph->lp->ineq[k][dim], -1);
2778 		isl_int_set_si(graph->lp->ineq[k][0], max);
2779 	}
2780 
2781 	ineq = isl_vec_alloc(ctx, 1 + total);
2782 	ineq = isl_vec_clr(ineq);
2783 	if (!ineq)
2784 		return isl_stat_error;
2785 	for (i = 0; i < node->nvar; ++i) {
2786 		int pos = 1 + node_var_coef_pos(node, i);
2787 
2788 		if (isl_int_is_neg(node->max->el[i]))
2789 			continue;
2790 
2791 		isl_int_set_si(ineq->el[pos], 1);
2792 		isl_int_set_si(ineq->el[pos + 1], -1);
2793 		isl_int_set(ineq->el[0], node->max->el[i]);
2794 
2795 		k = isl_basic_set_alloc_inequality(graph->lp);
2796 		if (k < 0)
2797 			goto error;
2798 		isl_seq_cpy(graph->lp->ineq[k], ineq->el, 1 + total);
2799 
2800 		isl_seq_neg(ineq->el + pos, ineq->el + pos, 2);
2801 		k = isl_basic_set_alloc_inequality(graph->lp);
2802 		if (k < 0)
2803 			goto error;
2804 		isl_seq_cpy(graph->lp->ineq[k], ineq->el, 1 + total);
2805 
2806 		isl_seq_clr(ineq->el + pos, 2);
2807 	}
2808 	isl_vec_free(ineq);
2809 
2810 	return isl_stat_ok;
2811 error:
2812 	isl_vec_free(ineq);
2813 	return isl_stat_error;
2814 }
2815 
2816 /* Add constraints that bound the values of the variable and parameter
2817  * coefficients of the schedule.
2818  *
2819  * The maximal value of the coefficients is defined by the option
2820  * 'schedule_max_coefficient' and the entries in node->max.
2821  * These latter entries are only set if either the schedule_max_coefficient
2822  * option or the schedule_treat_coalescing option is set.
2823  */
add_bound_coefficient_constraints(isl_ctx * ctx,struct isl_sched_graph * graph)2824 static isl_stat add_bound_coefficient_constraints(isl_ctx *ctx,
2825 	struct isl_sched_graph *graph)
2826 {
2827 	int i;
2828 	int max;
2829 
2830 	max = isl_options_get_schedule_max_coefficient(ctx);
2831 
2832 	if (max == -1 && !isl_options_get_schedule_treat_coalescing(ctx))
2833 		return isl_stat_ok;
2834 
2835 	for (i = 0; i < graph->n; ++i) {
2836 		struct isl_sched_node *node = &graph->node[i];
2837 
2838 		if (node_add_coefficient_constraints(ctx, graph, node, max) < 0)
2839 			return isl_stat_error;
2840 	}
2841 
2842 	return isl_stat_ok;
2843 }
2844 
2845 /* Add a constraint to graph->lp that equates the value at position
2846  * "sum_pos" to the sum of the "n" values starting at "first".
2847  */
add_sum_constraint(struct isl_sched_graph * graph,int sum_pos,int first,int n)2848 static isl_stat add_sum_constraint(struct isl_sched_graph *graph,
2849 	int sum_pos, int first, int n)
2850 {
2851 	int i, k;
2852 	isl_size total;
2853 
2854 	total = isl_basic_set_dim(graph->lp, isl_dim_set);
2855 	if (total < 0)
2856 		return isl_stat_error;
2857 
2858 	k = isl_basic_set_alloc_equality(graph->lp);
2859 	if (k < 0)
2860 		return isl_stat_error;
2861 	isl_seq_clr(graph->lp->eq[k], 1 + total);
2862 	isl_int_set_si(graph->lp->eq[k][1 + sum_pos], -1);
2863 	for (i = 0; i < n; ++i)
2864 		isl_int_set_si(graph->lp->eq[k][1 + first + i], 1);
2865 
2866 	return isl_stat_ok;
2867 }
2868 
2869 /* Add a constraint to graph->lp that equates the value at position
2870  * "sum_pos" to the sum of the parameter coefficients of all nodes.
2871  */
add_param_sum_constraint(struct isl_sched_graph * graph,int sum_pos)2872 static isl_stat add_param_sum_constraint(struct isl_sched_graph *graph,
2873 	int sum_pos)
2874 {
2875 	int i, j, k;
2876 	isl_size total;
2877 
2878 	total = isl_basic_set_dim(graph->lp, isl_dim_set);
2879 	if (total < 0)
2880 		return isl_stat_error;
2881 
2882 	k = isl_basic_set_alloc_equality(graph->lp);
2883 	if (k < 0)
2884 		return isl_stat_error;
2885 	isl_seq_clr(graph->lp->eq[k], 1 + total);
2886 	isl_int_set_si(graph->lp->eq[k][1 + sum_pos], -1);
2887 	for (i = 0; i < graph->n; ++i) {
2888 		int pos = 1 + node_par_coef_offset(&graph->node[i]);
2889 
2890 		for (j = 0; j < graph->node[i].nparam; ++j)
2891 			isl_int_set_si(graph->lp->eq[k][pos + j], 1);
2892 	}
2893 
2894 	return isl_stat_ok;
2895 }
2896 
2897 /* Add a constraint to graph->lp that equates the value at position
2898  * "sum_pos" to the sum of the variable coefficients of all nodes.
2899  */
add_var_sum_constraint(struct isl_sched_graph * graph,int sum_pos)2900 static isl_stat add_var_sum_constraint(struct isl_sched_graph *graph,
2901 	int sum_pos)
2902 {
2903 	int i, j, k;
2904 	isl_size total;
2905 
2906 	total = isl_basic_set_dim(graph->lp, isl_dim_set);
2907 	if (total < 0)
2908 		return isl_stat_error;
2909 
2910 	k = isl_basic_set_alloc_equality(graph->lp);
2911 	if (k < 0)
2912 		return isl_stat_error;
2913 	isl_seq_clr(graph->lp->eq[k], 1 + total);
2914 	isl_int_set_si(graph->lp->eq[k][1 + sum_pos], -1);
2915 	for (i = 0; i < graph->n; ++i) {
2916 		struct isl_sched_node *node = &graph->node[i];
2917 		int pos = 1 + node_var_coef_offset(node);
2918 
2919 		for (j = 0; j < 2 * node->nvar; ++j)
2920 			isl_int_set_si(graph->lp->eq[k][pos + j], 1);
2921 	}
2922 
2923 	return isl_stat_ok;
2924 }
2925 
2926 /* Construct an ILP problem for finding schedule coefficients
2927  * that result in non-negative, but small dependence distances
2928  * over all dependences.
2929  * In particular, the dependence distances over proximity edges
2930  * are bounded by m_0 + m_n n and we compute schedule coefficients
2931  * with small values (preferably zero) of m_n and m_0.
2932  *
2933  * All variables of the ILP are non-negative.  The actual coefficients
2934  * may be negative, so each coefficient is represented as the difference
2935  * of two non-negative variables.  The negative part always appears
2936  * immediately before the positive part.
2937  * Other than that, the variables have the following order
2938  *
2939  *	- sum of positive and negative parts of m_n coefficients
2940  *	- m_0
2941  *	- sum of all c_n coefficients
2942  *		(unconstrained when computing non-parametric schedules)
2943  *	- sum of positive and negative parts of all c_x coefficients
2944  *	- positive and negative parts of m_n coefficients
2945  *	- for each node
2946  *		- positive and negative parts of c_i_x, in opposite order
2947  *		- c_i_n (if parametric)
2948  *		- c_i_0
2949  *
2950  * The constraints are those from the edges plus two or three equalities
2951  * to express the sums.
2952  *
2953  * If "use_coincidence" is set, then we treat coincidence edges as local edges.
2954  * Otherwise, we ignore them.
2955  */
setup_lp(isl_ctx * ctx,struct isl_sched_graph * graph,int use_coincidence)2956 static isl_stat setup_lp(isl_ctx *ctx, struct isl_sched_graph *graph,
2957 	int use_coincidence)
2958 {
2959 	int i;
2960 	isl_size nparam;
2961 	unsigned total;
2962 	isl_space *space;
2963 	int parametric;
2964 	int param_pos;
2965 	int n_eq, n_ineq;
2966 
2967 	parametric = ctx->opt->schedule_parametric;
2968 	nparam = isl_space_dim(graph->node[0].space, isl_dim_param);
2969 	if (nparam < 0)
2970 		return isl_stat_error;
2971 	param_pos = 4;
2972 	total = param_pos + 2 * nparam;
2973 	for (i = 0; i < graph->n; ++i) {
2974 		struct isl_sched_node *node = &graph->node[graph->sorted[i]];
2975 		if (node_update_vmap(node) < 0)
2976 			return isl_stat_error;
2977 		node->start = total;
2978 		total += 1 + node->nparam + 2 * node->nvar;
2979 	}
2980 
2981 	if (count_constraints(graph, &n_eq, &n_ineq, use_coincidence) < 0)
2982 		return isl_stat_error;
2983 	if (count_bound_constant_constraints(ctx, graph, &n_eq, &n_ineq) < 0)
2984 		return isl_stat_error;
2985 	if (count_bound_coefficient_constraints(ctx, graph, &n_eq, &n_ineq) < 0)
2986 		return isl_stat_error;
2987 
2988 	space = isl_space_set_alloc(ctx, 0, total);
2989 	isl_basic_set_free(graph->lp);
2990 	n_eq += 2 + parametric;
2991 
2992 	graph->lp = isl_basic_set_alloc_space(space, 0, n_eq, n_ineq);
2993 
2994 	if (add_sum_constraint(graph, 0, param_pos, 2 * nparam) < 0)
2995 		return isl_stat_error;
2996 	if (parametric && add_param_sum_constraint(graph, 2) < 0)
2997 		return isl_stat_error;
2998 	if (add_var_sum_constraint(graph, 3) < 0)
2999 		return isl_stat_error;
3000 	if (add_bound_constant_constraints(ctx, graph) < 0)
3001 		return isl_stat_error;
3002 	if (add_bound_coefficient_constraints(ctx, graph) < 0)
3003 		return isl_stat_error;
3004 	if (add_all_validity_constraints(graph, use_coincidence) < 0)
3005 		return isl_stat_error;
3006 	if (add_all_proximity_constraints(graph, use_coincidence) < 0)
3007 		return isl_stat_error;
3008 
3009 	return isl_stat_ok;
3010 }
3011 
3012 /* Analyze the conflicting constraint found by
3013  * isl_tab_basic_set_non_trivial_lexmin.  If it corresponds to the validity
3014  * constraint of one of the edges between distinct nodes, living, moreover
3015  * in distinct SCCs, then record the source and sink SCC as this may
3016  * be a good place to cut between SCCs.
3017  */
check_conflict(int con,void * user)3018 static int check_conflict(int con, void *user)
3019 {
3020 	int i;
3021 	struct isl_sched_graph *graph = user;
3022 
3023 	if (graph->src_scc >= 0)
3024 		return 0;
3025 
3026 	con -= graph->lp->n_eq;
3027 
3028 	if (con >= graph->lp->n_ineq)
3029 		return 0;
3030 
3031 	for (i = 0; i < graph->n_edge; ++i) {
3032 		if (!is_validity(&graph->edge[i]))
3033 			continue;
3034 		if (graph->edge[i].src == graph->edge[i].dst)
3035 			continue;
3036 		if (graph->edge[i].src->scc == graph->edge[i].dst->scc)
3037 			continue;
3038 		if (graph->edge[i].start > con)
3039 			continue;
3040 		if (graph->edge[i].end <= con)
3041 			continue;
3042 		graph->src_scc = graph->edge[i].src->scc;
3043 		graph->dst_scc = graph->edge[i].dst->scc;
3044 	}
3045 
3046 	return 0;
3047 }
3048 
3049 /* Check whether the next schedule row of the given node needs to be
3050  * non-trivial.  Lower-dimensional domains may have some trivial rows,
3051  * but as soon as the number of remaining required non-trivial rows
3052  * is as large as the number or remaining rows to be computed,
3053  * all remaining rows need to be non-trivial.
3054  */
needs_row(struct isl_sched_graph * graph,struct isl_sched_node * node)3055 static int needs_row(struct isl_sched_graph *graph, struct isl_sched_node *node)
3056 {
3057 	return node->nvar - node->rank >= graph->maxvar - graph->n_row;
3058 }
3059 
3060 /* Construct a non-triviality region with triviality directions
3061  * corresponding to the rows of "indep".
3062  * The rows of "indep" are expressed in terms of the schedule coefficients c_i,
3063  * while the triviality directions are expressed in terms of
3064  * pairs of non-negative variables c^+_i - c^-_i, with c^-_i appearing
3065  * before c^+_i.  Furthermore,
3066  * the pairs of non-negative variables representing the coefficients
3067  * are stored in the opposite order.
3068  */
construct_trivial(__isl_keep isl_mat * indep)3069 static __isl_give isl_mat *construct_trivial(__isl_keep isl_mat *indep)
3070 {
3071 	isl_ctx *ctx;
3072 	isl_mat *mat;
3073 	int i, j;
3074 	isl_size n, n_var;
3075 
3076 	n = isl_mat_rows(indep);
3077 	n_var = isl_mat_cols(indep);
3078 	if (n < 0 || n_var < 0)
3079 		return NULL;
3080 
3081 	ctx = isl_mat_get_ctx(indep);
3082 	mat = isl_mat_alloc(ctx, n, 2 * n_var);
3083 	if (!mat)
3084 		return NULL;
3085 	for (i = 0; i < n; ++i) {
3086 		for (j = 0; j < n_var; ++j) {
3087 			int nj = n_var - 1 - j;
3088 			isl_int_neg(mat->row[i][2 * nj], indep->row[i][j]);
3089 			isl_int_set(mat->row[i][2 * nj + 1], indep->row[i][j]);
3090 		}
3091 	}
3092 
3093 	return mat;
3094 }
3095 
3096 /* Solve the ILP problem constructed in setup_lp.
3097  * For each node such that all the remaining rows of its schedule
3098  * need to be non-trivial, we construct a non-triviality region.
3099  * This region imposes that the next row is independent of previous rows.
3100  * In particular, the non-triviality region enforces that at least
3101  * one of the linear combinations in the rows of node->indep is non-zero.
3102  */
solve_lp(isl_ctx * ctx,struct isl_sched_graph * graph)3103 static __isl_give isl_vec *solve_lp(isl_ctx *ctx, struct isl_sched_graph *graph)
3104 {
3105 	int i;
3106 	isl_vec *sol;
3107 	isl_basic_set *lp;
3108 
3109 	for (i = 0; i < graph->n; ++i) {
3110 		struct isl_sched_node *node = &graph->node[i];
3111 		isl_mat *trivial;
3112 
3113 		graph->region[i].pos = node_var_coef_offset(node);
3114 		if (needs_row(graph, node))
3115 			trivial = construct_trivial(node->indep);
3116 		else
3117 			trivial = isl_mat_zero(ctx, 0, 0);
3118 		graph->region[i].trivial = trivial;
3119 	}
3120 	lp = isl_basic_set_copy(graph->lp);
3121 	sol = isl_tab_basic_set_non_trivial_lexmin(lp, 2, graph->n,
3122 				       graph->region, &check_conflict, graph);
3123 	for (i = 0; i < graph->n; ++i)
3124 		isl_mat_free(graph->region[i].trivial);
3125 	return sol;
3126 }
3127 
3128 /* Extract the coefficients for the variables of "node" from "sol".
3129  *
3130  * Each schedule coefficient c_i_x is represented as the difference
3131  * between two non-negative variables c_i_x^+ - c_i_x^-.
3132  * The c_i_x^- appear before their c_i_x^+ counterpart.
3133  * Furthermore, the order of these pairs is the opposite of that
3134  * of the corresponding coefficients.
3135  *
3136  * Return c_i_x = c_i_x^+ - c_i_x^-
3137  */
extract_var_coef(struct isl_sched_node * node,__isl_keep isl_vec * sol)3138 static __isl_give isl_vec *extract_var_coef(struct isl_sched_node *node,
3139 	__isl_keep isl_vec *sol)
3140 {
3141 	int i;
3142 	int pos;
3143 	isl_vec *csol;
3144 
3145 	if (!sol)
3146 		return NULL;
3147 	csol = isl_vec_alloc(isl_vec_get_ctx(sol), node->nvar);
3148 	if (!csol)
3149 		return NULL;
3150 
3151 	pos = 1 + node_var_coef_offset(node);
3152 	for (i = 0; i < node->nvar; ++i)
3153 		isl_int_sub(csol->el[node->nvar - 1 - i],
3154 			    sol->el[pos + 2 * i + 1], sol->el[pos + 2 * i]);
3155 
3156 	return csol;
3157 }
3158 
3159 /* Update the schedules of all nodes based on the given solution
3160  * of the LP problem.
3161  * The new row is added to the current band.
3162  * All possibly negative coefficients are encoded as a difference
3163  * of two non-negative variables, so we need to perform the subtraction
3164  * here.
3165  *
3166  * If coincident is set, then the caller guarantees that the new
3167  * row satisfies the coincidence constraints.
3168  */
update_schedule(struct isl_sched_graph * graph,__isl_take isl_vec * sol,int coincident)3169 static int update_schedule(struct isl_sched_graph *graph,
3170 	__isl_take isl_vec *sol, int coincident)
3171 {
3172 	int i, j;
3173 	isl_vec *csol = NULL;
3174 
3175 	if (!sol)
3176 		goto error;
3177 	if (sol->size == 0)
3178 		isl_die(sol->ctx, isl_error_internal,
3179 			"no solution found", goto error);
3180 	if (graph->n_total_row >= graph->max_row)
3181 		isl_die(sol->ctx, isl_error_internal,
3182 			"too many schedule rows", goto error);
3183 
3184 	for (i = 0; i < graph->n; ++i) {
3185 		struct isl_sched_node *node = &graph->node[i];
3186 		int pos;
3187 		isl_size row = isl_mat_rows(node->sched);
3188 
3189 		isl_vec_free(csol);
3190 		csol = extract_var_coef(node, sol);
3191 		if (row < 0 || !csol)
3192 			goto error;
3193 
3194 		isl_map_free(node->sched_map);
3195 		node->sched_map = NULL;
3196 		node->sched = isl_mat_add_rows(node->sched, 1);
3197 		if (!node->sched)
3198 			goto error;
3199 		pos = node_cst_coef_offset(node);
3200 		node->sched = isl_mat_set_element(node->sched,
3201 					row, 0, sol->el[1 + pos]);
3202 		pos = node_par_coef_offset(node);
3203 		for (j = 0; j < node->nparam; ++j)
3204 			node->sched = isl_mat_set_element(node->sched,
3205 					row, 1 + j, sol->el[1 + pos + j]);
3206 		for (j = 0; j < node->nvar; ++j)
3207 			node->sched = isl_mat_set_element(node->sched,
3208 					row, 1 + node->nparam + j, csol->el[j]);
3209 		node->coincident[graph->n_total_row] = coincident;
3210 	}
3211 	isl_vec_free(sol);
3212 	isl_vec_free(csol);
3213 
3214 	graph->n_row++;
3215 	graph->n_total_row++;
3216 
3217 	return 0;
3218 error:
3219 	isl_vec_free(sol);
3220 	isl_vec_free(csol);
3221 	return -1;
3222 }
3223 
3224 /* Convert row "row" of node->sched into an isl_aff living in "ls"
3225  * and return this isl_aff.
3226  */
extract_schedule_row(__isl_take isl_local_space * ls,struct isl_sched_node * node,int row)3227 static __isl_give isl_aff *extract_schedule_row(__isl_take isl_local_space *ls,
3228 	struct isl_sched_node *node, int row)
3229 {
3230 	int j;
3231 	isl_int v;
3232 	isl_aff *aff;
3233 
3234 	isl_int_init(v);
3235 
3236 	aff = isl_aff_zero_on_domain(ls);
3237 	if (isl_mat_get_element(node->sched, row, 0, &v) < 0)
3238 		goto error;
3239 	aff = isl_aff_set_constant(aff, v);
3240 	for (j = 0; j < node->nparam; ++j) {
3241 		if (isl_mat_get_element(node->sched, row, 1 + j, &v) < 0)
3242 			goto error;
3243 		aff = isl_aff_set_coefficient(aff, isl_dim_param, j, v);
3244 	}
3245 	for (j = 0; j < node->nvar; ++j) {
3246 		if (isl_mat_get_element(node->sched, row,
3247 					1 + node->nparam + j, &v) < 0)
3248 			goto error;
3249 		aff = isl_aff_set_coefficient(aff, isl_dim_in, j, v);
3250 	}
3251 
3252 	isl_int_clear(v);
3253 
3254 	return aff;
3255 error:
3256 	isl_int_clear(v);
3257 	isl_aff_free(aff);
3258 	return NULL;
3259 }
3260 
3261 /* Convert the "n" rows starting at "first" of node->sched into a multi_aff
3262  * and return this multi_aff.
3263  *
3264  * The result is defined over the uncompressed node domain.
3265  */
node_extract_partial_schedule_multi_aff(struct isl_sched_node * node,int first,int n)3266 static __isl_give isl_multi_aff *node_extract_partial_schedule_multi_aff(
3267 	struct isl_sched_node *node, int first, int n)
3268 {
3269 	int i;
3270 	isl_space *space;
3271 	isl_local_space *ls;
3272 	isl_aff *aff;
3273 	isl_multi_aff *ma;
3274 	isl_size nrow;
3275 
3276 	if (!node)
3277 		return NULL;
3278 	nrow = isl_mat_rows(node->sched);
3279 	if (nrow < 0)
3280 		return NULL;
3281 	if (node->compressed)
3282 		space = isl_pw_multi_aff_get_domain_space(node->decompress);
3283 	else
3284 		space = isl_space_copy(node->space);
3285 	ls = isl_local_space_from_space(isl_space_copy(space));
3286 	space = isl_space_from_domain(space);
3287 	space = isl_space_add_dims(space, isl_dim_out, n);
3288 	ma = isl_multi_aff_zero(space);
3289 
3290 	for (i = first; i < first + n; ++i) {
3291 		aff = extract_schedule_row(isl_local_space_copy(ls), node, i);
3292 		ma = isl_multi_aff_set_aff(ma, i - first, aff);
3293 	}
3294 
3295 	isl_local_space_free(ls);
3296 
3297 	if (node->compressed)
3298 		ma = isl_multi_aff_pullback_multi_aff(ma,
3299 					isl_multi_aff_copy(node->compress));
3300 
3301 	return ma;
3302 }
3303 
3304 /* Convert node->sched into a multi_aff and return this multi_aff.
3305  *
3306  * The result is defined over the uncompressed node domain.
3307  */
node_extract_schedule_multi_aff(struct isl_sched_node * node)3308 static __isl_give isl_multi_aff *node_extract_schedule_multi_aff(
3309 	struct isl_sched_node *node)
3310 {
3311 	isl_size nrow;
3312 
3313 	nrow = isl_mat_rows(node->sched);
3314 	if (nrow < 0)
3315 		return NULL;
3316 	return node_extract_partial_schedule_multi_aff(node, 0, nrow);
3317 }
3318 
3319 /* Convert node->sched into a map and return this map.
3320  *
3321  * The result is cached in node->sched_map, which needs to be released
3322  * whenever node->sched is updated.
3323  * It is defined over the uncompressed node domain.
3324  */
node_extract_schedule(struct isl_sched_node * node)3325 static __isl_give isl_map *node_extract_schedule(struct isl_sched_node *node)
3326 {
3327 	if (!node->sched_map) {
3328 		isl_multi_aff *ma;
3329 
3330 		ma = node_extract_schedule_multi_aff(node);
3331 		node->sched_map = isl_map_from_multi_aff(ma);
3332 	}
3333 
3334 	return isl_map_copy(node->sched_map);
3335 }
3336 
3337 /* Construct a map that can be used to update a dependence relation
3338  * based on the current schedule.
3339  * That is, construct a map expressing that source and sink
3340  * are executed within the same iteration of the current schedule.
3341  * This map can then be intersected with the dependence relation.
3342  * This is not the most efficient way, but this shouldn't be a critical
3343  * operation.
3344  */
specializer(struct isl_sched_node * src,struct isl_sched_node * dst)3345 static __isl_give isl_map *specializer(struct isl_sched_node *src,
3346 	struct isl_sched_node *dst)
3347 {
3348 	isl_map *src_sched, *dst_sched;
3349 
3350 	src_sched = node_extract_schedule(src);
3351 	dst_sched = node_extract_schedule(dst);
3352 	return isl_map_apply_range(src_sched, isl_map_reverse(dst_sched));
3353 }
3354 
3355 /* Intersect the domains of the nested relations in domain and range
3356  * of "umap" with "map".
3357  */
intersect_domains(__isl_take isl_union_map * umap,__isl_keep isl_map * map)3358 static __isl_give isl_union_map *intersect_domains(
3359 	__isl_take isl_union_map *umap, __isl_keep isl_map *map)
3360 {
3361 	isl_union_set *uset;
3362 
3363 	umap = isl_union_map_zip(umap);
3364 	uset = isl_union_set_from_set(isl_map_wrap(isl_map_copy(map)));
3365 	umap = isl_union_map_intersect_domain(umap, uset);
3366 	umap = isl_union_map_zip(umap);
3367 	return umap;
3368 }
3369 
3370 /* Update the dependence relation of the given edge based
3371  * on the current schedule.
3372  * If the dependence is carried completely by the current schedule, then
3373  * it is removed from the edge_tables.  It is kept in the list of edges
3374  * as otherwise all edge_tables would have to be recomputed.
3375  *
3376  * If the edge is of a type that can appear multiple times
3377  * between the same pair of nodes, then it is added to
3378  * the edge table (again).  This prevents the situation
3379  * where none of these edges is referenced from the edge table
3380  * because the one that was referenced turned out to be empty and
3381  * was therefore removed from the table.
3382  */
update_edge(isl_ctx * ctx,struct isl_sched_graph * graph,struct isl_sched_edge * edge)3383 static isl_stat update_edge(isl_ctx *ctx, struct isl_sched_graph *graph,
3384 	struct isl_sched_edge *edge)
3385 {
3386 	int empty;
3387 	isl_map *id;
3388 
3389 	id = specializer(edge->src, edge->dst);
3390 	edge->map = isl_map_intersect(edge->map, isl_map_copy(id));
3391 	if (!edge->map)
3392 		goto error;
3393 
3394 	if (edge->tagged_condition) {
3395 		edge->tagged_condition =
3396 			intersect_domains(edge->tagged_condition, id);
3397 		if (!edge->tagged_condition)
3398 			goto error;
3399 	}
3400 	if (edge->tagged_validity) {
3401 		edge->tagged_validity =
3402 			intersect_domains(edge->tagged_validity, id);
3403 		if (!edge->tagged_validity)
3404 			goto error;
3405 	}
3406 
3407 	empty = isl_map_plain_is_empty(edge->map);
3408 	if (empty < 0)
3409 		goto error;
3410 	if (empty) {
3411 		if (graph_remove_edge(graph, edge) < 0)
3412 			goto error;
3413 	} else if (is_multi_edge_type(edge)) {
3414 		if (graph_edge_tables_add(ctx, graph, edge) < 0)
3415 			goto error;
3416 	}
3417 
3418 	isl_map_free(id);
3419 	return isl_stat_ok;
3420 error:
3421 	isl_map_free(id);
3422 	return isl_stat_error;
3423 }
3424 
3425 /* Does the domain of "umap" intersect "uset"?
3426  */
domain_intersects(__isl_keep isl_union_map * umap,__isl_keep isl_union_set * uset)3427 static int domain_intersects(__isl_keep isl_union_map *umap,
3428 	__isl_keep isl_union_set *uset)
3429 {
3430 	int empty;
3431 
3432 	umap = isl_union_map_copy(umap);
3433 	umap = isl_union_map_intersect_domain(umap, isl_union_set_copy(uset));
3434 	empty = isl_union_map_is_empty(umap);
3435 	isl_union_map_free(umap);
3436 
3437 	return empty < 0 ? -1 : !empty;
3438 }
3439 
3440 /* Does the range of "umap" intersect "uset"?
3441  */
range_intersects(__isl_keep isl_union_map * umap,__isl_keep isl_union_set * uset)3442 static int range_intersects(__isl_keep isl_union_map *umap,
3443 	__isl_keep isl_union_set *uset)
3444 {
3445 	int empty;
3446 
3447 	umap = isl_union_map_copy(umap);
3448 	umap = isl_union_map_intersect_range(umap, isl_union_set_copy(uset));
3449 	empty = isl_union_map_is_empty(umap);
3450 	isl_union_map_free(umap);
3451 
3452 	return empty < 0 ? -1 : !empty;
3453 }
3454 
3455 /* Are the condition dependences of "edge" local with respect to
3456  * the current schedule?
3457  *
3458  * That is, are domain and range of the condition dependences mapped
3459  * to the same point?
3460  *
3461  * In other words, is the condition false?
3462  */
is_condition_false(struct isl_sched_edge * edge)3463 static int is_condition_false(struct isl_sched_edge *edge)
3464 {
3465 	isl_union_map *umap;
3466 	isl_map *map, *sched, *test;
3467 	int empty, local;
3468 
3469 	empty = isl_union_map_is_empty(edge->tagged_condition);
3470 	if (empty < 0 || empty)
3471 		return empty;
3472 
3473 	umap = isl_union_map_copy(edge->tagged_condition);
3474 	umap = isl_union_map_zip(umap);
3475 	umap = isl_union_set_unwrap(isl_union_map_domain(umap));
3476 	map = isl_map_from_union_map(umap);
3477 
3478 	sched = node_extract_schedule(edge->src);
3479 	map = isl_map_apply_domain(map, sched);
3480 	sched = node_extract_schedule(edge->dst);
3481 	map = isl_map_apply_range(map, sched);
3482 
3483 	test = isl_map_identity(isl_map_get_space(map));
3484 	local = isl_map_is_subset(map, test);
3485 	isl_map_free(map);
3486 	isl_map_free(test);
3487 
3488 	return local;
3489 }
3490 
3491 /* For each conditional validity constraint that is adjacent
3492  * to a condition with domain in condition_source or range in condition_sink,
3493  * turn it into an unconditional validity constraint.
3494  */
unconditionalize_adjacent_validity(struct isl_sched_graph * graph,__isl_take isl_union_set * condition_source,__isl_take isl_union_set * condition_sink)3495 static int unconditionalize_adjacent_validity(struct isl_sched_graph *graph,
3496 	__isl_take isl_union_set *condition_source,
3497 	__isl_take isl_union_set *condition_sink)
3498 {
3499 	int i;
3500 
3501 	condition_source = isl_union_set_coalesce(condition_source);
3502 	condition_sink = isl_union_set_coalesce(condition_sink);
3503 
3504 	for (i = 0; i < graph->n_edge; ++i) {
3505 		int adjacent;
3506 		isl_union_map *validity;
3507 
3508 		if (!is_conditional_validity(&graph->edge[i]))
3509 			continue;
3510 		if (is_validity(&graph->edge[i]))
3511 			continue;
3512 
3513 		validity = graph->edge[i].tagged_validity;
3514 		adjacent = domain_intersects(validity, condition_sink);
3515 		if (adjacent >= 0 && !adjacent)
3516 			adjacent = range_intersects(validity, condition_source);
3517 		if (adjacent < 0)
3518 			goto error;
3519 		if (!adjacent)
3520 			continue;
3521 
3522 		set_validity(&graph->edge[i]);
3523 	}
3524 
3525 	isl_union_set_free(condition_source);
3526 	isl_union_set_free(condition_sink);
3527 	return 0;
3528 error:
3529 	isl_union_set_free(condition_source);
3530 	isl_union_set_free(condition_sink);
3531 	return -1;
3532 }
3533 
3534 /* Update the dependence relations of all edges based on the current schedule
3535  * and enforce conditional validity constraints that are adjacent
3536  * to satisfied condition constraints.
3537  *
3538  * First check if any of the condition constraints are satisfied
3539  * (i.e., not local to the outer schedule) and keep track of
3540  * their domain and range.
3541  * Then update all dependence relations (which removes the non-local
3542  * constraints).
3543  * Finally, if any condition constraints turned out to be satisfied,
3544  * then turn all adjacent conditional validity constraints into
3545  * unconditional validity constraints.
3546  */
update_edges(isl_ctx * ctx,struct isl_sched_graph * graph)3547 static int update_edges(isl_ctx *ctx, struct isl_sched_graph *graph)
3548 {
3549 	int i;
3550 	int any = 0;
3551 	isl_union_set *source, *sink;
3552 
3553 	source = isl_union_set_empty(isl_space_params_alloc(ctx, 0));
3554 	sink = isl_union_set_empty(isl_space_params_alloc(ctx, 0));
3555 	for (i = 0; i < graph->n_edge; ++i) {
3556 		int local;
3557 		isl_union_set *uset;
3558 		isl_union_map *umap;
3559 
3560 		if (!is_condition(&graph->edge[i]))
3561 			continue;
3562 		if (is_local(&graph->edge[i]))
3563 			continue;
3564 		local = is_condition_false(&graph->edge[i]);
3565 		if (local < 0)
3566 			goto error;
3567 		if (local)
3568 			continue;
3569 
3570 		any = 1;
3571 
3572 		umap = isl_union_map_copy(graph->edge[i].tagged_condition);
3573 		uset = isl_union_map_domain(umap);
3574 		source = isl_union_set_union(source, uset);
3575 
3576 		umap = isl_union_map_copy(graph->edge[i].tagged_condition);
3577 		uset = isl_union_map_range(umap);
3578 		sink = isl_union_set_union(sink, uset);
3579 	}
3580 
3581 	for (i = 0; i < graph->n_edge; ++i) {
3582 		if (update_edge(ctx, graph, &graph->edge[i]) < 0)
3583 			goto error;
3584 	}
3585 
3586 	if (any)
3587 		return unconditionalize_adjacent_validity(graph, source, sink);
3588 
3589 	isl_union_set_free(source);
3590 	isl_union_set_free(sink);
3591 	return 0;
3592 error:
3593 	isl_union_set_free(source);
3594 	isl_union_set_free(sink);
3595 	return -1;
3596 }
3597 
next_band(struct isl_sched_graph * graph)3598 static void next_band(struct isl_sched_graph *graph)
3599 {
3600 	graph->band_start = graph->n_total_row;
3601 }
3602 
3603 /* Return the union of the universe domains of the nodes in "graph"
3604  * that satisfy "pred".
3605  */
isl_sched_graph_domain(isl_ctx * ctx,struct isl_sched_graph * graph,int (* pred)(struct isl_sched_node * node,int data),int data)3606 static __isl_give isl_union_set *isl_sched_graph_domain(isl_ctx *ctx,
3607 	struct isl_sched_graph *graph,
3608 	int (*pred)(struct isl_sched_node *node, int data), int data)
3609 {
3610 	int i;
3611 	isl_set *set;
3612 	isl_union_set *dom;
3613 
3614 	for (i = 0; i < graph->n; ++i)
3615 		if (pred(&graph->node[i], data))
3616 			break;
3617 
3618 	if (i >= graph->n)
3619 		isl_die(ctx, isl_error_internal,
3620 			"empty component", return NULL);
3621 
3622 	set = isl_set_universe(isl_space_copy(graph->node[i].space));
3623 	dom = isl_union_set_from_set(set);
3624 
3625 	for (i = i + 1; i < graph->n; ++i) {
3626 		if (!pred(&graph->node[i], data))
3627 			continue;
3628 		set = isl_set_universe(isl_space_copy(graph->node[i].space));
3629 		dom = isl_union_set_union(dom, isl_union_set_from_set(set));
3630 	}
3631 
3632 	return dom;
3633 }
3634 
3635 /* Return a list of unions of universe domains, where each element
3636  * in the list corresponds to an SCC (or WCC) indexed by node->scc.
3637  */
extract_sccs(isl_ctx * ctx,struct isl_sched_graph * graph)3638 static __isl_give isl_union_set_list *extract_sccs(isl_ctx *ctx,
3639 	struct isl_sched_graph *graph)
3640 {
3641 	int i;
3642 	isl_union_set_list *filters;
3643 
3644 	filters = isl_union_set_list_alloc(ctx, graph->scc);
3645 	for (i = 0; i < graph->scc; ++i) {
3646 		isl_union_set *dom;
3647 
3648 		dom = isl_sched_graph_domain(ctx, graph, &node_scc_exactly, i);
3649 		filters = isl_union_set_list_add(filters, dom);
3650 	}
3651 
3652 	return filters;
3653 }
3654 
3655 /* Return a list of two unions of universe domains, one for the SCCs up
3656  * to and including graph->src_scc and another for the other SCCs.
3657  */
extract_split(isl_ctx * ctx,struct isl_sched_graph * graph)3658 static __isl_give isl_union_set_list *extract_split(isl_ctx *ctx,
3659 	struct isl_sched_graph *graph)
3660 {
3661 	isl_union_set *dom;
3662 	isl_union_set_list *filters;
3663 
3664 	filters = isl_union_set_list_alloc(ctx, 2);
3665 	dom = isl_sched_graph_domain(ctx, graph,
3666 					&node_scc_at_most, graph->src_scc);
3667 	filters = isl_union_set_list_add(filters, dom);
3668 	dom = isl_sched_graph_domain(ctx, graph,
3669 					&node_scc_at_least, graph->src_scc + 1);
3670 	filters = isl_union_set_list_add(filters, dom);
3671 
3672 	return filters;
3673 }
3674 
3675 /* Copy nodes that satisfy node_pred from the src dependence graph
3676  * to the dst dependence graph.
3677  */
copy_nodes(struct isl_sched_graph * dst,struct isl_sched_graph * src,int (* node_pred)(struct isl_sched_node * node,int data),int data)3678 static isl_stat copy_nodes(struct isl_sched_graph *dst,
3679 	struct isl_sched_graph *src,
3680 	int (*node_pred)(struct isl_sched_node *node, int data), int data)
3681 {
3682 	int i;
3683 
3684 	dst->n = 0;
3685 	for (i = 0; i < src->n; ++i) {
3686 		int j;
3687 
3688 		if (!node_pred(&src->node[i], data))
3689 			continue;
3690 
3691 		j = dst->n;
3692 		dst->node[j].space = isl_space_copy(src->node[i].space);
3693 		dst->node[j].compressed = src->node[i].compressed;
3694 		dst->node[j].hull = isl_set_copy(src->node[i].hull);
3695 		dst->node[j].compress =
3696 			isl_multi_aff_copy(src->node[i].compress);
3697 		dst->node[j].decompress =
3698 			isl_pw_multi_aff_copy(src->node[i].decompress);
3699 		dst->node[j].nvar = src->node[i].nvar;
3700 		dst->node[j].nparam = src->node[i].nparam;
3701 		dst->node[j].sched = isl_mat_copy(src->node[i].sched);
3702 		dst->node[j].sched_map = isl_map_copy(src->node[i].sched_map);
3703 		dst->node[j].coincident = src->node[i].coincident;
3704 		dst->node[j].sizes = isl_multi_val_copy(src->node[i].sizes);
3705 		dst->node[j].bounds = isl_basic_set_copy(src->node[i].bounds);
3706 		dst->node[j].max = isl_vec_copy(src->node[i].max);
3707 		dst->n++;
3708 
3709 		if (!dst->node[j].space || !dst->node[j].sched)
3710 			return isl_stat_error;
3711 		if (dst->node[j].compressed &&
3712 		    (!dst->node[j].hull || !dst->node[j].compress ||
3713 		     !dst->node[j].decompress))
3714 			return isl_stat_error;
3715 	}
3716 
3717 	return isl_stat_ok;
3718 }
3719 
3720 /* Copy non-empty edges that satisfy edge_pred from the src dependence graph
3721  * to the dst dependence graph.
3722  * If the source or destination node of the edge is not in the destination
3723  * graph, then it must be a backward proximity edge and it should simply
3724  * be ignored.
3725  */
copy_edges(isl_ctx * ctx,struct isl_sched_graph * dst,struct isl_sched_graph * src,int (* edge_pred)(struct isl_sched_edge * edge,int data),int data)3726 static isl_stat copy_edges(isl_ctx *ctx, struct isl_sched_graph *dst,
3727 	struct isl_sched_graph *src,
3728 	int (*edge_pred)(struct isl_sched_edge *edge, int data), int data)
3729 {
3730 	int i;
3731 
3732 	dst->n_edge = 0;
3733 	for (i = 0; i < src->n_edge; ++i) {
3734 		struct isl_sched_edge *edge = &src->edge[i];
3735 		isl_map *map;
3736 		isl_union_map *tagged_condition;
3737 		isl_union_map *tagged_validity;
3738 		struct isl_sched_node *dst_src, *dst_dst;
3739 
3740 		if (!edge_pred(edge, data))
3741 			continue;
3742 
3743 		if (isl_map_plain_is_empty(edge->map))
3744 			continue;
3745 
3746 		dst_src = graph_find_node(ctx, dst, edge->src->space);
3747 		dst_dst = graph_find_node(ctx, dst, edge->dst->space);
3748 		if (!dst_src || !dst_dst)
3749 			return isl_stat_error;
3750 		if (!is_node(dst, dst_src) || !is_node(dst, dst_dst)) {
3751 			if (is_validity(edge) || is_conditional_validity(edge))
3752 				isl_die(ctx, isl_error_internal,
3753 					"backward (conditional) validity edge",
3754 					return isl_stat_error);
3755 			continue;
3756 		}
3757 
3758 		map = isl_map_copy(edge->map);
3759 		tagged_condition = isl_union_map_copy(edge->tagged_condition);
3760 		tagged_validity = isl_union_map_copy(edge->tagged_validity);
3761 
3762 		dst->edge[dst->n_edge].src = dst_src;
3763 		dst->edge[dst->n_edge].dst = dst_dst;
3764 		dst->edge[dst->n_edge].map = map;
3765 		dst->edge[dst->n_edge].tagged_condition = tagged_condition;
3766 		dst->edge[dst->n_edge].tagged_validity = tagged_validity;
3767 		dst->edge[dst->n_edge].types = edge->types;
3768 		dst->n_edge++;
3769 
3770 		if (edge->tagged_condition && !tagged_condition)
3771 			return isl_stat_error;
3772 		if (edge->tagged_validity && !tagged_validity)
3773 			return isl_stat_error;
3774 
3775 		if (graph_edge_tables_add(ctx, dst,
3776 					    &dst->edge[dst->n_edge - 1]) < 0)
3777 			return isl_stat_error;
3778 	}
3779 
3780 	return isl_stat_ok;
3781 }
3782 
3783 /* Compute the maximal number of variables over all nodes.
3784  * This is the maximal number of linearly independent schedule
3785  * rows that we need to compute.
3786  * Just in case we end up in a part of the dependence graph
3787  * with only lower-dimensional domains, we make sure we will
3788  * compute the required amount of extra linearly independent rows.
3789  */
compute_maxvar(struct isl_sched_graph * graph)3790 static int compute_maxvar(struct isl_sched_graph *graph)
3791 {
3792 	int i;
3793 
3794 	graph->maxvar = 0;
3795 	for (i = 0; i < graph->n; ++i) {
3796 		struct isl_sched_node *node = &graph->node[i];
3797 		int nvar;
3798 
3799 		if (node_update_vmap(node) < 0)
3800 			return -1;
3801 		nvar = node->nvar + graph->n_row - node->rank;
3802 		if (nvar > graph->maxvar)
3803 			graph->maxvar = nvar;
3804 	}
3805 
3806 	return 0;
3807 }
3808 
3809 /* Extract the subgraph of "graph" that consists of the nodes satisfying
3810  * "node_pred" and the edges satisfying "edge_pred" and store
3811  * the result in "sub".
3812  */
extract_sub_graph(isl_ctx * ctx,struct isl_sched_graph * graph,int (* node_pred)(struct isl_sched_node * node,int data),int (* edge_pred)(struct isl_sched_edge * edge,int data),int data,struct isl_sched_graph * sub)3813 static isl_stat extract_sub_graph(isl_ctx *ctx, struct isl_sched_graph *graph,
3814 	int (*node_pred)(struct isl_sched_node *node, int data),
3815 	int (*edge_pred)(struct isl_sched_edge *edge, int data),
3816 	int data, struct isl_sched_graph *sub)
3817 {
3818 	int i, n = 0, n_edge = 0;
3819 	int t;
3820 
3821 	for (i = 0; i < graph->n; ++i)
3822 		if (node_pred(&graph->node[i], data))
3823 			++n;
3824 	for (i = 0; i < graph->n_edge; ++i)
3825 		if (edge_pred(&graph->edge[i], data))
3826 			++n_edge;
3827 	if (graph_alloc(ctx, sub, n, n_edge) < 0)
3828 		return isl_stat_error;
3829 	sub->root = graph->root;
3830 	if (copy_nodes(sub, graph, node_pred, data) < 0)
3831 		return isl_stat_error;
3832 	if (graph_init_table(ctx, sub) < 0)
3833 		return isl_stat_error;
3834 	for (t = 0; t <= isl_edge_last; ++t)
3835 		sub->max_edge[t] = graph->max_edge[t];
3836 	if (graph_init_edge_tables(ctx, sub) < 0)
3837 		return isl_stat_error;
3838 	if (copy_edges(ctx, sub, graph, edge_pred, data) < 0)
3839 		return isl_stat_error;
3840 	sub->n_row = graph->n_row;
3841 	sub->max_row = graph->max_row;
3842 	sub->n_total_row = graph->n_total_row;
3843 	sub->band_start = graph->band_start;
3844 
3845 	return isl_stat_ok;
3846 }
3847 
3848 static __isl_give isl_schedule_node *compute_schedule(isl_schedule_node *node,
3849 	struct isl_sched_graph *graph);
3850 static __isl_give isl_schedule_node *compute_schedule_wcc(
3851 	isl_schedule_node *node, struct isl_sched_graph *graph);
3852 
3853 /* Compute a schedule for a subgraph of "graph".  In particular, for
3854  * the graph composed of nodes that satisfy node_pred and edges that
3855  * that satisfy edge_pred.
3856  * If the subgraph is known to consist of a single component, then wcc should
3857  * be set and then we call compute_schedule_wcc on the constructed subgraph.
3858  * Otherwise, we call compute_schedule, which will check whether the subgraph
3859  * is connected.
3860  *
3861  * The schedule is inserted at "node" and the updated schedule node
3862  * is returned.
3863  */
compute_sub_schedule(__isl_take isl_schedule_node * node,isl_ctx * ctx,struct isl_sched_graph * graph,int (* node_pred)(struct isl_sched_node * node,int data),int (* edge_pred)(struct isl_sched_edge * edge,int data),int data,int wcc)3864 static __isl_give isl_schedule_node *compute_sub_schedule(
3865 	__isl_take isl_schedule_node *node, isl_ctx *ctx,
3866 	struct isl_sched_graph *graph,
3867 	int (*node_pred)(struct isl_sched_node *node, int data),
3868 	int (*edge_pred)(struct isl_sched_edge *edge, int data),
3869 	int data, int wcc)
3870 {
3871 	struct isl_sched_graph split = { 0 };
3872 
3873 	if (extract_sub_graph(ctx, graph, node_pred, edge_pred, data,
3874 				&split) < 0)
3875 		goto error;
3876 
3877 	if (wcc)
3878 		node = compute_schedule_wcc(node, &split);
3879 	else
3880 		node = compute_schedule(node, &split);
3881 
3882 	graph_free(ctx, &split);
3883 	return node;
3884 error:
3885 	graph_free(ctx, &split);
3886 	return isl_schedule_node_free(node);
3887 }
3888 
edge_scc_exactly(struct isl_sched_edge * edge,int scc)3889 static int edge_scc_exactly(struct isl_sched_edge *edge, int scc)
3890 {
3891 	return edge->src->scc == scc && edge->dst->scc == scc;
3892 }
3893 
edge_dst_scc_at_most(struct isl_sched_edge * edge,int scc)3894 static int edge_dst_scc_at_most(struct isl_sched_edge *edge, int scc)
3895 {
3896 	return edge->dst->scc <= scc;
3897 }
3898 
edge_src_scc_at_least(struct isl_sched_edge * edge,int scc)3899 static int edge_src_scc_at_least(struct isl_sched_edge *edge, int scc)
3900 {
3901 	return edge->src->scc >= scc;
3902 }
3903 
3904 /* Reset the current band by dropping all its schedule rows.
3905  */
reset_band(struct isl_sched_graph * graph)3906 static isl_stat reset_band(struct isl_sched_graph *graph)
3907 {
3908 	int i;
3909 	int drop;
3910 
3911 	drop = graph->n_total_row - graph->band_start;
3912 	graph->n_total_row -= drop;
3913 	graph->n_row -= drop;
3914 
3915 	for (i = 0; i < graph->n; ++i) {
3916 		struct isl_sched_node *node = &graph->node[i];
3917 
3918 		isl_map_free(node->sched_map);
3919 		node->sched_map = NULL;
3920 
3921 		node->sched = isl_mat_drop_rows(node->sched,
3922 						graph->band_start, drop);
3923 
3924 		if (!node->sched)
3925 			return isl_stat_error;
3926 	}
3927 
3928 	return isl_stat_ok;
3929 }
3930 
3931 /* Split the current graph into two parts and compute a schedule for each
3932  * part individually.  In particular, one part consists of all SCCs up
3933  * to and including graph->src_scc, while the other part contains the other
3934  * SCCs.  The split is enforced by a sequence node inserted at position "node"
3935  * in the schedule tree.  Return the updated schedule node.
3936  * If either of these two parts consists of a sequence, then it is spliced
3937  * into the sequence containing the two parts.
3938  *
3939  * The current band is reset. It would be possible to reuse
3940  * the previously computed rows as the first rows in the next
3941  * band, but recomputing them may result in better rows as we are looking
3942  * at a smaller part of the dependence graph.
3943  */
compute_split_schedule(__isl_take isl_schedule_node * node,struct isl_sched_graph * graph)3944 static __isl_give isl_schedule_node *compute_split_schedule(
3945 	__isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
3946 {
3947 	int is_seq;
3948 	isl_ctx *ctx;
3949 	isl_union_set_list *filters;
3950 
3951 	if (!node)
3952 		return NULL;
3953 
3954 	if (reset_band(graph) < 0)
3955 		return isl_schedule_node_free(node);
3956 
3957 	next_band(graph);
3958 
3959 	ctx = isl_schedule_node_get_ctx(node);
3960 	filters = extract_split(ctx, graph);
3961 	node = isl_schedule_node_insert_sequence(node, filters);
3962 	node = isl_schedule_node_child(node, 1);
3963 	node = isl_schedule_node_child(node, 0);
3964 
3965 	node = compute_sub_schedule(node, ctx, graph,
3966 				&node_scc_at_least, &edge_src_scc_at_least,
3967 				graph->src_scc + 1, 0);
3968 	is_seq = isl_schedule_node_get_type(node) == isl_schedule_node_sequence;
3969 	node = isl_schedule_node_parent(node);
3970 	node = isl_schedule_node_parent(node);
3971 	if (is_seq)
3972 		node = isl_schedule_node_sequence_splice_child(node, 1);
3973 	node = isl_schedule_node_child(node, 0);
3974 	node = isl_schedule_node_child(node, 0);
3975 	node = compute_sub_schedule(node, ctx, graph,
3976 				&node_scc_at_most, &edge_dst_scc_at_most,
3977 				graph->src_scc, 0);
3978 	is_seq = isl_schedule_node_get_type(node) == isl_schedule_node_sequence;
3979 	node = isl_schedule_node_parent(node);
3980 	node = isl_schedule_node_parent(node);
3981 	if (is_seq)
3982 		node = isl_schedule_node_sequence_splice_child(node, 0);
3983 
3984 	return node;
3985 }
3986 
3987 /* Insert a band node at position "node" in the schedule tree corresponding
3988  * to the current band in "graph".  Mark the band node permutable
3989  * if "permutable" is set.
3990  * The partial schedules and the coincidence property are extracted
3991  * from the graph nodes.
3992  * Return the updated schedule node.
3993  */
insert_current_band(__isl_take isl_schedule_node * node,struct isl_sched_graph * graph,int permutable)3994 static __isl_give isl_schedule_node *insert_current_band(
3995 	__isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
3996 	int permutable)
3997 {
3998 	int i;
3999 	int start, end, n;
4000 	isl_multi_aff *ma;
4001 	isl_multi_pw_aff *mpa;
4002 	isl_multi_union_pw_aff *mupa;
4003 
4004 	if (!node)
4005 		return NULL;
4006 
4007 	if (graph->n < 1)
4008 		isl_die(isl_schedule_node_get_ctx(node), isl_error_internal,
4009 			"graph should have at least one node",
4010 			return isl_schedule_node_free(node));
4011 
4012 	start = graph->band_start;
4013 	end = graph->n_total_row;
4014 	n = end - start;
4015 
4016 	ma = node_extract_partial_schedule_multi_aff(&graph->node[0], start, n);
4017 	mpa = isl_multi_pw_aff_from_multi_aff(ma);
4018 	mupa = isl_multi_union_pw_aff_from_multi_pw_aff(mpa);
4019 
4020 	for (i = 1; i < graph->n; ++i) {
4021 		isl_multi_union_pw_aff *mupa_i;
4022 
4023 		ma = node_extract_partial_schedule_multi_aff(&graph->node[i],
4024 								start, n);
4025 		mpa = isl_multi_pw_aff_from_multi_aff(ma);
4026 		mupa_i = isl_multi_union_pw_aff_from_multi_pw_aff(mpa);
4027 		mupa = isl_multi_union_pw_aff_union_add(mupa, mupa_i);
4028 	}
4029 	node = isl_schedule_node_insert_partial_schedule(node, mupa);
4030 
4031 	for (i = 0; i < n; ++i)
4032 		node = isl_schedule_node_band_member_set_coincident(node, i,
4033 					graph->node[0].coincident[start + i]);
4034 	node = isl_schedule_node_band_set_permutable(node, permutable);
4035 
4036 	return node;
4037 }
4038 
4039 /* Update the dependence relations based on the current schedule,
4040  * add the current band to "node" and then continue with the computation
4041  * of the next band.
4042  * Return the updated schedule node.
4043  */
compute_next_band(__isl_take isl_schedule_node * node,struct isl_sched_graph * graph,int permutable)4044 static __isl_give isl_schedule_node *compute_next_band(
4045 	__isl_take isl_schedule_node *node,
4046 	struct isl_sched_graph *graph, int permutable)
4047 {
4048 	isl_ctx *ctx;
4049 
4050 	if (!node)
4051 		return NULL;
4052 
4053 	ctx = isl_schedule_node_get_ctx(node);
4054 	if (update_edges(ctx, graph) < 0)
4055 		return isl_schedule_node_free(node);
4056 	node = insert_current_band(node, graph, permutable);
4057 	next_band(graph);
4058 
4059 	node = isl_schedule_node_child(node, 0);
4060 	node = compute_schedule(node, graph);
4061 	node = isl_schedule_node_parent(node);
4062 
4063 	return node;
4064 }
4065 
4066 /* Add the constraints "coef" derived from an edge from "node" to itself
4067  * to graph->lp in order to respect the dependences and to try and carry them.
4068  * "pos" is the sequence number of the edge that needs to be carried.
4069  * "coef" represents general constraints on coefficients (c_0, c_x)
4070  * of valid constraints for (y - x) with x and y instances of the node.
4071  *
4072  * The constraints added to graph->lp need to enforce
4073  *
4074  *	(c_j_0 + c_j_x y) - (c_j_0 + c_j_x x)
4075  *	= c_j_x (y - x) >= e_i
4076  *
4077  * for each (x,y) in the dependence relation of the edge.
4078  * That is, (-e_i, c_j_x) needs to be plugged in for (c_0, c_x),
4079  * taking into account that each coefficient in c_j_x is represented
4080  * as a pair of non-negative coefficients.
4081  */
add_intra_constraints(struct isl_sched_graph * graph,struct isl_sched_node * node,__isl_take isl_basic_set * coef,int pos)4082 static isl_stat add_intra_constraints(struct isl_sched_graph *graph,
4083 	struct isl_sched_node *node, __isl_take isl_basic_set *coef, int pos)
4084 {
4085 	isl_size offset;
4086 	isl_ctx *ctx;
4087 	isl_dim_map *dim_map;
4088 
4089 	offset = coef_var_offset(coef);
4090 	if (offset < 0)
4091 		coef = isl_basic_set_free(coef);
4092 	if (!coef)
4093 		return isl_stat_error;
4094 
4095 	ctx = isl_basic_set_get_ctx(coef);
4096 	dim_map = intra_dim_map(ctx, graph, node, offset, 1);
4097 	isl_dim_map_range(dim_map, 3 + pos, 0, 0, 0, 1, -1);
4098 	graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map);
4099 
4100 	return isl_stat_ok;
4101 }
4102 
4103 /* Add the constraints "coef" derived from an edge from "src" to "dst"
4104  * to graph->lp in order to respect the dependences and to try and carry them.
4105  * "pos" is the sequence number of the edge that needs to be carried or
4106  * -1 if no attempt should be made to carry the dependences.
4107  * "coef" represents general constraints on coefficients (c_0, c_n, c_x, c_y)
4108  * of valid constraints for (x, y) with x and y instances of "src" and "dst".
4109  *
4110  * The constraints added to graph->lp need to enforce
4111  *
4112  *	(c_k_0 + c_k_n n + c_k_x y) - (c_j_0 + c_j_n n + c_j_x x) >= e_i
4113  *
4114  * for each (x,y) in the dependence relation of the edge or
4115  *
4116  *	(c_k_0 + c_k_n n + c_k_x y) - (c_j_0 + c_j_n n + c_j_x x) >= 0
4117  *
4118  * if pos is -1.
4119  * That is,
4120  * (-e_i + c_k_0 - c_j_0, c_k_n - c_j_n, -c_j_x, c_k_x)
4121  * or
4122  * (c_k_0 - c_j_0, c_k_n - c_j_n, -c_j_x, c_k_x)
4123  * needs to be plugged in for (c_0, c_n, c_x, c_y),
4124  * taking into account that each coefficient in c_j_x and c_k_x is represented
4125  * as a pair of non-negative coefficients.
4126  */
add_inter_constraints(struct isl_sched_graph * graph,struct isl_sched_node * src,struct isl_sched_node * dst,__isl_take isl_basic_set * coef,int pos)4127 static isl_stat add_inter_constraints(struct isl_sched_graph *graph,
4128 	struct isl_sched_node *src, struct isl_sched_node *dst,
4129 	__isl_take isl_basic_set *coef, int pos)
4130 {
4131 	isl_size offset;
4132 	isl_ctx *ctx;
4133 	isl_dim_map *dim_map;
4134 
4135 	offset = coef_var_offset(coef);
4136 	if (offset < 0)
4137 		coef = isl_basic_set_free(coef);
4138 	if (!coef)
4139 		return isl_stat_error;
4140 
4141 	ctx = isl_basic_set_get_ctx(coef);
4142 	dim_map = inter_dim_map(ctx, graph, src, dst, offset, 1);
4143 	if (pos >= 0)
4144 		isl_dim_map_range(dim_map, 3 + pos, 0, 0, 0, 1, -1);
4145 	graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map);
4146 
4147 	return isl_stat_ok;
4148 }
4149 
4150 /* Data structure for keeping track of the data needed
4151  * to exploit non-trivial lineality spaces.
4152  *
4153  * "any_non_trivial" is true if there are any non-trivial lineality spaces.
4154  * If "any_non_trivial" is not true, then "equivalent" and "mask" may be NULL.
4155  * "equivalent" connects instances to other instances on the same line(s).
4156  * "mask" contains the domain spaces of "equivalent".
4157  * Any instance set not in "mask" does not have a non-trivial lineality space.
4158  */
4159 struct isl_exploit_lineality_data {
4160 	isl_bool any_non_trivial;
4161 	isl_union_map *equivalent;
4162 	isl_union_set *mask;
4163 };
4164 
4165 /* Data structure collecting information used during the construction
4166  * of an LP for carrying dependences.
4167  *
4168  * "intra" is a sequence of coefficient constraints for intra-node edges.
4169  * "inter" is a sequence of coefficient constraints for inter-node edges.
4170  * "lineality" contains data used to exploit non-trivial lineality spaces.
4171  */
4172 struct isl_carry {
4173 	isl_basic_set_list *intra;
4174 	isl_basic_set_list *inter;
4175 	struct isl_exploit_lineality_data lineality;
4176 };
4177 
4178 /* Free all the data stored in "carry".
4179  */
isl_carry_clear(struct isl_carry * carry)4180 static void isl_carry_clear(struct isl_carry *carry)
4181 {
4182 	isl_basic_set_list_free(carry->intra);
4183 	isl_basic_set_list_free(carry->inter);
4184 	isl_union_map_free(carry->lineality.equivalent);
4185 	isl_union_set_free(carry->lineality.mask);
4186 }
4187 
4188 /* Return a pointer to the node in "graph" that lives in "space".
4189  * If the requested node has been compressed, then "space"
4190  * corresponds to the compressed space.
4191  * The graph is assumed to have such a node.
4192  * Return NULL in case of error.
4193  *
4194  * First try and see if "space" is the space of an uncompressed node.
4195  * If so, return that node.
4196  * Otherwise, "space" was constructed by construct_compressed_id and
4197  * contains a user pointer pointing to the node in the tuple id.
4198  * However, this node belongs to the original dependence graph.
4199  * If "graph" is a subgraph of this original dependence graph,
4200  * then the node with the same space still needs to be looked up
4201  * in the current graph.
4202  */
graph_find_compressed_node(isl_ctx * ctx,struct isl_sched_graph * graph,__isl_keep isl_space * space)4203 static struct isl_sched_node *graph_find_compressed_node(isl_ctx *ctx,
4204 	struct isl_sched_graph *graph, __isl_keep isl_space *space)
4205 {
4206 	isl_id *id;
4207 	struct isl_sched_node *node;
4208 
4209 	if (!space)
4210 		return NULL;
4211 
4212 	node = graph_find_node(ctx, graph, space);
4213 	if (!node)
4214 		return NULL;
4215 	if (is_node(graph, node))
4216 		return node;
4217 
4218 	id = isl_space_get_tuple_id(space, isl_dim_set);
4219 	node = isl_id_get_user(id);
4220 	isl_id_free(id);
4221 
4222 	if (!node)
4223 		return NULL;
4224 
4225 	if (!is_node(graph->root, node))
4226 		isl_die(ctx, isl_error_internal,
4227 			"space points to invalid node", return NULL);
4228 	if (graph != graph->root)
4229 		node = graph_find_node(ctx, graph, node->space);
4230 	if (!is_node(graph, node))
4231 		isl_die(ctx, isl_error_internal,
4232 			"unable to find node", return NULL);
4233 
4234 	return node;
4235 }
4236 
4237 /* Internal data structure for add_all_constraints.
4238  *
4239  * "graph" is the schedule constraint graph for which an LP problem
4240  * is being constructed.
4241  * "carry_inter" indicates whether inter-node edges should be carried.
4242  * "pos" is the position of the next edge that needs to be carried.
4243  */
4244 struct isl_add_all_constraints_data {
4245 	isl_ctx *ctx;
4246 	struct isl_sched_graph *graph;
4247 	int carry_inter;
4248 	int pos;
4249 };
4250 
4251 /* Add the constraints "coef" derived from an edge from a node to itself
4252  * to data->graph->lp in order to respect the dependences and
4253  * to try and carry them.
4254  *
4255  * The space of "coef" is of the form
4256  *
4257  *	coefficients[[c_cst] -> S[c_x]]
4258  *
4259  * with S[c_x] the (compressed) space of the node.
4260  * Extract the node from the space and call add_intra_constraints.
4261  */
lp_add_intra(__isl_take isl_basic_set * coef,void * user)4262 static isl_stat lp_add_intra(__isl_take isl_basic_set *coef, void *user)
4263 {
4264 	struct isl_add_all_constraints_data *data = user;
4265 	isl_space *space;
4266 	struct isl_sched_node *node;
4267 
4268 	space = isl_basic_set_get_space(coef);
4269 	space = isl_space_range(isl_space_unwrap(space));
4270 	node = graph_find_compressed_node(data->ctx, data->graph, space);
4271 	isl_space_free(space);
4272 	return add_intra_constraints(data->graph, node, coef, data->pos++);
4273 }
4274 
4275 /* Add the constraints "coef" derived from an edge from a node j
4276  * to a node k to data->graph->lp in order to respect the dependences and
4277  * to try and carry them (provided data->carry_inter is set).
4278  *
4279  * The space of "coef" is of the form
4280  *
4281  *	coefficients[[c_cst, c_n] -> [S_j[c_x] -> S_k[c_y]]]
4282  *
4283  * with S_j[c_x] and S_k[c_y] the (compressed) spaces of the nodes.
4284  * Extract the nodes from the space and call add_inter_constraints.
4285  */
lp_add_inter(__isl_take isl_basic_set * coef,void * user)4286 static isl_stat lp_add_inter(__isl_take isl_basic_set *coef, void *user)
4287 {
4288 	struct isl_add_all_constraints_data *data = user;
4289 	isl_space *space, *dom;
4290 	struct isl_sched_node *src, *dst;
4291 	int pos;
4292 
4293 	space = isl_basic_set_get_space(coef);
4294 	space = isl_space_unwrap(isl_space_range(isl_space_unwrap(space)));
4295 	dom = isl_space_domain(isl_space_copy(space));
4296 	src = graph_find_compressed_node(data->ctx, data->graph, dom);
4297 	isl_space_free(dom);
4298 	space = isl_space_range(space);
4299 	dst = graph_find_compressed_node(data->ctx, data->graph, space);
4300 	isl_space_free(space);
4301 
4302 	pos = data->carry_inter ? data->pos++ : -1;
4303 	return add_inter_constraints(data->graph, src, dst, coef, pos);
4304 }
4305 
4306 /* Add constraints to graph->lp that force all (conditional) validity
4307  * dependences to be respected and attempt to carry them.
4308  * "intra" is the sequence of coefficient constraints for intra-node edges.
4309  * "inter" is the sequence of coefficient constraints for inter-node edges.
4310  * "carry_inter" indicates whether inter-node edges should be carried or
4311  * only respected.
4312  */
add_all_constraints(isl_ctx * ctx,struct isl_sched_graph * graph,__isl_keep isl_basic_set_list * intra,__isl_keep isl_basic_set_list * inter,int carry_inter)4313 static isl_stat add_all_constraints(isl_ctx *ctx, struct isl_sched_graph *graph,
4314 	__isl_keep isl_basic_set_list *intra,
4315 	__isl_keep isl_basic_set_list *inter, int carry_inter)
4316 {
4317 	struct isl_add_all_constraints_data data = { ctx, graph, carry_inter };
4318 
4319 	data.pos = 0;
4320 	if (isl_basic_set_list_foreach(intra, &lp_add_intra, &data) < 0)
4321 		return isl_stat_error;
4322 	if (isl_basic_set_list_foreach(inter, &lp_add_inter, &data) < 0)
4323 		return isl_stat_error;
4324 	return isl_stat_ok;
4325 }
4326 
4327 /* Internal data structure for count_all_constraints
4328  * for keeping track of the number of equality and inequality constraints.
4329  */
4330 struct isl_sched_count {
4331 	int n_eq;
4332 	int n_ineq;
4333 };
4334 
4335 /* Add the number of equality and inequality constraints of "bset"
4336  * to data->n_eq and data->n_ineq.
4337  */
bset_update_count(__isl_take isl_basic_set * bset,void * user)4338 static isl_stat bset_update_count(__isl_take isl_basic_set *bset, void *user)
4339 {
4340 	struct isl_sched_count *data = user;
4341 
4342 	return update_count(bset, 1, &data->n_eq, &data->n_ineq);
4343 }
4344 
4345 /* Count the number of equality and inequality constraints
4346  * that will be added to the carry_lp problem.
4347  * We count each edge exactly once.
4348  * "intra" is the sequence of coefficient constraints for intra-node edges.
4349  * "inter" is the sequence of coefficient constraints for inter-node edges.
4350  */
count_all_constraints(__isl_keep isl_basic_set_list * intra,__isl_keep isl_basic_set_list * inter,int * n_eq,int * n_ineq)4351 static isl_stat count_all_constraints(__isl_keep isl_basic_set_list *intra,
4352 	__isl_keep isl_basic_set_list *inter, int *n_eq, int *n_ineq)
4353 {
4354 	struct isl_sched_count data;
4355 
4356 	data.n_eq = data.n_ineq = 0;
4357 	if (isl_basic_set_list_foreach(inter, &bset_update_count, &data) < 0)
4358 		return isl_stat_error;
4359 	if (isl_basic_set_list_foreach(intra, &bset_update_count, &data) < 0)
4360 		return isl_stat_error;
4361 
4362 	*n_eq = data.n_eq;
4363 	*n_ineq = data.n_ineq;
4364 
4365 	return isl_stat_ok;
4366 }
4367 
4368 /* Construct an LP problem for finding schedule coefficients
4369  * such that the schedule carries as many validity dependences as possible.
4370  * In particular, for each dependence i, we bound the dependence distance
4371  * from below by e_i, with 0 <= e_i <= 1 and then maximize the sum
4372  * of all e_i's.  Dependences with e_i = 0 in the solution are simply
4373  * respected, while those with e_i > 0 (in practice e_i = 1) are carried.
4374  * "intra" is the sequence of coefficient constraints for intra-node edges.
4375  * "inter" is the sequence of coefficient constraints for inter-node edges.
4376  * "n_edge" is the total number of edges.
4377  * "carry_inter" indicates whether inter-node edges should be carried or
4378  * only respected.  That is, if "carry_inter" is not set, then
4379  * no e_i variables are introduced for the inter-node edges.
4380  *
4381  * All variables of the LP are non-negative.  The actual coefficients
4382  * may be negative, so each coefficient is represented as the difference
4383  * of two non-negative variables.  The negative part always appears
4384  * immediately before the positive part.
4385  * Other than that, the variables have the following order
4386  *
4387  *	- sum of (1 - e_i) over all edges
4388  *	- sum of all c_n coefficients
4389  *		(unconstrained when computing non-parametric schedules)
4390  *	- sum of positive and negative parts of all c_x coefficients
4391  *	- for each edge
4392  *		- e_i
4393  *	- for each node
4394  *		- positive and negative parts of c_i_x, in opposite order
4395  *		- c_i_n (if parametric)
4396  *		- c_i_0
4397  *
4398  * The constraints are those from the (validity) edges plus three equalities
4399  * to express the sums and n_edge inequalities to express e_i <= 1.
4400  */
setup_carry_lp(isl_ctx * ctx,struct isl_sched_graph * graph,int n_edge,__isl_keep isl_basic_set_list * intra,__isl_keep isl_basic_set_list * inter,int carry_inter)4401 static isl_stat setup_carry_lp(isl_ctx *ctx, struct isl_sched_graph *graph,
4402 	int n_edge, __isl_keep isl_basic_set_list *intra,
4403 	__isl_keep isl_basic_set_list *inter, int carry_inter)
4404 {
4405 	int i;
4406 	int k;
4407 	isl_space *dim;
4408 	unsigned total;
4409 	int n_eq, n_ineq;
4410 
4411 	total = 3 + n_edge;
4412 	for (i = 0; i < graph->n; ++i) {
4413 		struct isl_sched_node *node = &graph->node[graph->sorted[i]];
4414 		node->start = total;
4415 		total += 1 + node->nparam + 2 * node->nvar;
4416 	}
4417 
4418 	if (count_all_constraints(intra, inter, &n_eq, &n_ineq) < 0)
4419 		return isl_stat_error;
4420 
4421 	dim = isl_space_set_alloc(ctx, 0, total);
4422 	isl_basic_set_free(graph->lp);
4423 	n_eq += 3;
4424 	n_ineq += n_edge;
4425 	graph->lp = isl_basic_set_alloc_space(dim, 0, n_eq, n_ineq);
4426 	graph->lp = isl_basic_set_set_rational(graph->lp);
4427 
4428 	k = isl_basic_set_alloc_equality(graph->lp);
4429 	if (k < 0)
4430 		return isl_stat_error;
4431 	isl_seq_clr(graph->lp->eq[k], 1 + total);
4432 	isl_int_set_si(graph->lp->eq[k][0], -n_edge);
4433 	isl_int_set_si(graph->lp->eq[k][1], 1);
4434 	for (i = 0; i < n_edge; ++i)
4435 		isl_int_set_si(graph->lp->eq[k][4 + i], 1);
4436 
4437 	if (add_param_sum_constraint(graph, 1) < 0)
4438 		return isl_stat_error;
4439 	if (add_var_sum_constraint(graph, 2) < 0)
4440 		return isl_stat_error;
4441 
4442 	for (i = 0; i < n_edge; ++i) {
4443 		k = isl_basic_set_alloc_inequality(graph->lp);
4444 		if (k < 0)
4445 			return isl_stat_error;
4446 		isl_seq_clr(graph->lp->ineq[k], 1 + total);
4447 		isl_int_set_si(graph->lp->ineq[k][4 + i], -1);
4448 		isl_int_set_si(graph->lp->ineq[k][0], 1);
4449 	}
4450 
4451 	if (add_all_constraints(ctx, graph, intra, inter, carry_inter) < 0)
4452 		return isl_stat_error;
4453 
4454 	return isl_stat_ok;
4455 }
4456 
4457 static __isl_give isl_schedule_node *compute_component_schedule(
4458 	__isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
4459 	int wcc);
4460 
4461 /* If the schedule_split_scaled option is set and if the linear
4462  * parts of the scheduling rows for all nodes in the graphs have
4463  * a non-trivial common divisor, then remove this
4464  * common divisor from the linear part.
4465  * Otherwise, insert a band node directly and continue with
4466  * the construction of the schedule.
4467  *
4468  * If a non-trivial common divisor is found, then
4469  * the linear part is reduced and the remainder is ignored.
4470  * The pieces of the graph that are assigned different remainders
4471  * form (groups of) strongly connected components within
4472  * the scaled down band.  If needed, they can therefore
4473  * be ordered along this remainder in a sequence node.
4474  * However, this ordering is not enforced here in order to allow
4475  * the scheduler to combine some of the strongly connected components.
4476  */
split_scaled(__isl_take isl_schedule_node * node,struct isl_sched_graph * graph)4477 static __isl_give isl_schedule_node *split_scaled(
4478 	__isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
4479 {
4480 	int i;
4481 	int row;
4482 	isl_ctx *ctx;
4483 	isl_int gcd, gcd_i;
4484 	isl_size n_row;
4485 
4486 	if (!node)
4487 		return NULL;
4488 
4489 	ctx = isl_schedule_node_get_ctx(node);
4490 	if (!ctx->opt->schedule_split_scaled)
4491 		return compute_next_band(node, graph, 0);
4492 	if (graph->n <= 1)
4493 		return compute_next_band(node, graph, 0);
4494 	n_row = isl_mat_rows(graph->node[0].sched);
4495 	if (n_row < 0)
4496 		return isl_schedule_node_free(node);
4497 
4498 	isl_int_init(gcd);
4499 	isl_int_init(gcd_i);
4500 
4501 	isl_int_set_si(gcd, 0);
4502 
4503 	row = n_row - 1;
4504 
4505 	for (i = 0; i < graph->n; ++i) {
4506 		struct isl_sched_node *node = &graph->node[i];
4507 		isl_size cols = isl_mat_cols(node->sched);
4508 
4509 		if (cols < 0)
4510 			break;
4511 		isl_seq_gcd(node->sched->row[row] + 1, cols - 1, &gcd_i);
4512 		isl_int_gcd(gcd, gcd, gcd_i);
4513 	}
4514 
4515 	isl_int_clear(gcd_i);
4516 	if (i < graph->n)
4517 		goto error;
4518 
4519 	if (isl_int_cmp_si(gcd, 1) <= 0) {
4520 		isl_int_clear(gcd);
4521 		return compute_next_band(node, graph, 0);
4522 	}
4523 
4524 	for (i = 0; i < graph->n; ++i) {
4525 		struct isl_sched_node *node = &graph->node[i];
4526 
4527 		isl_int_fdiv_q(node->sched->row[row][0],
4528 			       node->sched->row[row][0], gcd);
4529 		isl_int_mul(node->sched->row[row][0],
4530 			    node->sched->row[row][0], gcd);
4531 		node->sched = isl_mat_scale_down_row(node->sched, row, gcd);
4532 		if (!node->sched)
4533 			goto error;
4534 	}
4535 
4536 	isl_int_clear(gcd);
4537 
4538 	return compute_next_band(node, graph, 0);
4539 error:
4540 	isl_int_clear(gcd);
4541 	return isl_schedule_node_free(node);
4542 }
4543 
4544 /* Is the schedule row "sol" trivial on node "node"?
4545  * That is, is the solution zero on the dimensions linearly independent of
4546  * the previously found solutions?
4547  * Return 1 if the solution is trivial, 0 if it is not and -1 on error.
4548  *
4549  * Each coefficient is represented as the difference between
4550  * two non-negative values in "sol".
4551  * We construct the schedule row s and check if it is linearly
4552  * independent of previously computed schedule rows
4553  * by computing T s, with T the linear combinations that are zero
4554  * on linearly dependent schedule rows.
4555  * If the result consists of all zeros, then the solution is trivial.
4556  */
is_trivial(struct isl_sched_node * node,__isl_keep isl_vec * sol)4557 static int is_trivial(struct isl_sched_node *node, __isl_keep isl_vec *sol)
4558 {
4559 	int trivial;
4560 	isl_vec *node_sol;
4561 
4562 	if (!sol)
4563 		return -1;
4564 	if (node->nvar == node->rank)
4565 		return 0;
4566 
4567 	node_sol = extract_var_coef(node, sol);
4568 	node_sol = isl_mat_vec_product(isl_mat_copy(node->indep), node_sol);
4569 	if (!node_sol)
4570 		return -1;
4571 
4572 	trivial = isl_seq_first_non_zero(node_sol->el,
4573 					node->nvar - node->rank) == -1;
4574 
4575 	isl_vec_free(node_sol);
4576 
4577 	return trivial;
4578 }
4579 
4580 /* Is the schedule row "sol" trivial on any node where it should
4581  * not be trivial?
4582  * Return 1 if any solution is trivial, 0 if they are not and -1 on error.
4583  */
is_any_trivial(struct isl_sched_graph * graph,__isl_keep isl_vec * sol)4584 static int is_any_trivial(struct isl_sched_graph *graph,
4585 	__isl_keep isl_vec *sol)
4586 {
4587 	int i;
4588 
4589 	for (i = 0; i < graph->n; ++i) {
4590 		struct isl_sched_node *node = &graph->node[i];
4591 		int trivial;
4592 
4593 		if (!needs_row(graph, node))
4594 			continue;
4595 		trivial = is_trivial(node, sol);
4596 		if (trivial < 0 || trivial)
4597 			return trivial;
4598 	}
4599 
4600 	return 0;
4601 }
4602 
4603 /* Does the schedule represented by "sol" perform loop coalescing on "node"?
4604  * If so, return the position of the coalesced dimension.
4605  * Otherwise, return node->nvar or -1 on error.
4606  *
4607  * In particular, look for pairs of coefficients c_i and c_j such that
4608  * |c_j/c_i| > ceil(size_i/2), i.e., |c_j| > |c_i * ceil(size_i/2)|.
4609  * If any such pair is found, then return i.
4610  * If size_i is infinity, then no check on c_i needs to be performed.
4611  */
find_node_coalescing(struct isl_sched_node * node,__isl_keep isl_vec * sol)4612 static int find_node_coalescing(struct isl_sched_node *node,
4613 	__isl_keep isl_vec *sol)
4614 {
4615 	int i, j;
4616 	isl_int max;
4617 	isl_vec *csol;
4618 
4619 	if (node->nvar <= 1)
4620 		return node->nvar;
4621 
4622 	csol = extract_var_coef(node, sol);
4623 	if (!csol)
4624 		return -1;
4625 	isl_int_init(max);
4626 	for (i = 0; i < node->nvar; ++i) {
4627 		isl_val *v;
4628 
4629 		if (isl_int_is_zero(csol->el[i]))
4630 			continue;
4631 		v = isl_multi_val_get_val(node->sizes, i);
4632 		if (!v)
4633 			goto error;
4634 		if (!isl_val_is_int(v)) {
4635 			isl_val_free(v);
4636 			continue;
4637 		}
4638 		v = isl_val_div_ui(v, 2);
4639 		v = isl_val_ceil(v);
4640 		if (!v)
4641 			goto error;
4642 		isl_int_mul(max, v->n, csol->el[i]);
4643 		isl_val_free(v);
4644 
4645 		for (j = 0; j < node->nvar; ++j) {
4646 			if (j == i)
4647 				continue;
4648 			if (isl_int_abs_gt(csol->el[j], max))
4649 				break;
4650 		}
4651 		if (j < node->nvar)
4652 			break;
4653 	}
4654 
4655 	isl_int_clear(max);
4656 	isl_vec_free(csol);
4657 	return i;
4658 error:
4659 	isl_int_clear(max);
4660 	isl_vec_free(csol);
4661 	return -1;
4662 }
4663 
4664 /* Force the schedule coefficient at position "pos" of "node" to be zero
4665  * in "tl".
4666  * The coefficient is encoded as the difference between two non-negative
4667  * variables.  Force these two variables to have the same value.
4668  */
zero_out_node_coef(__isl_take isl_tab_lexmin * tl,struct isl_sched_node * node,int pos)4669 static __isl_give isl_tab_lexmin *zero_out_node_coef(
4670 	__isl_take isl_tab_lexmin *tl, struct isl_sched_node *node, int pos)
4671 {
4672 	int dim;
4673 	isl_ctx *ctx;
4674 	isl_vec *eq;
4675 
4676 	ctx = isl_space_get_ctx(node->space);
4677 	dim = isl_tab_lexmin_dim(tl);
4678 	if (dim < 0)
4679 		return isl_tab_lexmin_free(tl);
4680 	eq = isl_vec_alloc(ctx, 1 + dim);
4681 	eq = isl_vec_clr(eq);
4682 	if (!eq)
4683 		return isl_tab_lexmin_free(tl);
4684 
4685 	pos = 1 + node_var_coef_pos(node, pos);
4686 	isl_int_set_si(eq->el[pos], 1);
4687 	isl_int_set_si(eq->el[pos + 1], -1);
4688 	tl = isl_tab_lexmin_add_eq(tl, eq->el);
4689 	isl_vec_free(eq);
4690 
4691 	return tl;
4692 }
4693 
4694 /* Return the lexicographically smallest rational point in the basic set
4695  * from which "tl" was constructed, double checking that this input set
4696  * was not empty.
4697  */
non_empty_solution(__isl_keep isl_tab_lexmin * tl)4698 static __isl_give isl_vec *non_empty_solution(__isl_keep isl_tab_lexmin *tl)
4699 {
4700 	isl_vec *sol;
4701 
4702 	sol = isl_tab_lexmin_get_solution(tl);
4703 	if (!sol)
4704 		return NULL;
4705 	if (sol->size == 0)
4706 		isl_die(isl_vec_get_ctx(sol), isl_error_internal,
4707 			"error in schedule construction",
4708 			return isl_vec_free(sol));
4709 	return sol;
4710 }
4711 
4712 /* Does the solution "sol" of the LP problem constructed by setup_carry_lp
4713  * carry any of the "n_edge" groups of dependences?
4714  * The value in the first position is the sum of (1 - e_i) over all "n_edge"
4715  * edges, with 0 <= e_i <= 1 equal to 1 when the dependences represented
4716  * by the edge are carried by the solution.
4717  * If the sum of the (1 - e_i) is smaller than "n_edge" then at least
4718  * one of those is carried.
4719  *
4720  * Note that despite the fact that the problem is solved using a rational
4721  * solver, the solution is guaranteed to be integral.
4722  * Specifically, the dependence distance lower bounds e_i (and therefore
4723  * also their sum) are integers.  See Lemma 5 of [1].
4724  *
4725  * Any potential denominator of the sum is cleared by this function.
4726  * The denominator is not relevant for any of the other elements
4727  * in the solution.
4728  *
4729  * [1] P. Feautrier, Some Efficient Solutions to the Affine Scheduling
4730  *     Problem, Part II: Multi-Dimensional Time.
4731  *     In Intl. Journal of Parallel Programming, 1992.
4732  */
carries_dependences(__isl_keep isl_vec * sol,int n_edge)4733 static int carries_dependences(__isl_keep isl_vec *sol, int n_edge)
4734 {
4735 	isl_int_divexact(sol->el[1], sol->el[1], sol->el[0]);
4736 	isl_int_set_si(sol->el[0], 1);
4737 	return isl_int_cmp_si(sol->el[1], n_edge) < 0;
4738 }
4739 
4740 /* Return the lexicographically smallest rational point in "lp",
4741  * assuming that all variables are non-negative and performing some
4742  * additional sanity checks.
4743  * If "want_integral" is set, then compute the lexicographically smallest
4744  * integer point instead.
4745  * In particular, "lp" should not be empty by construction.
4746  * Double check that this is the case.
4747  * If dependences are not carried for any of the "n_edge" edges,
4748  * then return an empty vector.
4749  *
4750  * If the schedule_treat_coalescing option is set and
4751  * if the computed schedule performs loop coalescing on a given node,
4752  * i.e., if it is of the form
4753  *
4754  *	c_i i + c_j j + ...
4755  *
4756  * with |c_j/c_i| >= size_i, then force the coefficient c_i to be zero
4757  * to cut out this solution.  Repeat this process until no more loop
4758  * coalescing occurs or until no more dependences can be carried.
4759  * In the latter case, revert to the previously computed solution.
4760  *
4761  * If the caller requests an integral solution and if coalescing should
4762  * be treated, then perform the coalescing treatment first as
4763  * an integral solution computed before coalescing treatment
4764  * would carry the same number of edges and would therefore probably
4765  * also be coalescing.
4766  *
4767  * To allow the coalescing treatment to be performed first,
4768  * the initial solution is allowed to be rational and it is only
4769  * cut out (if needed) in the next iteration, if no coalescing measures
4770  * were taken.
4771  */
non_neg_lexmin(struct isl_sched_graph * graph,__isl_take isl_basic_set * lp,int n_edge,int want_integral)4772 static __isl_give isl_vec *non_neg_lexmin(struct isl_sched_graph *graph,
4773 	__isl_take isl_basic_set *lp, int n_edge, int want_integral)
4774 {
4775 	int i, pos, cut;
4776 	isl_ctx *ctx;
4777 	isl_tab_lexmin *tl;
4778 	isl_vec *sol = NULL, *prev;
4779 	int treat_coalescing;
4780 	int try_again;
4781 
4782 	if (!lp)
4783 		return NULL;
4784 	ctx = isl_basic_set_get_ctx(lp);
4785 	treat_coalescing = isl_options_get_schedule_treat_coalescing(ctx);
4786 	tl = isl_tab_lexmin_from_basic_set(lp);
4787 
4788 	cut = 0;
4789 	do {
4790 		int integral;
4791 
4792 		try_again = 0;
4793 		if (cut)
4794 			tl = isl_tab_lexmin_cut_to_integer(tl);
4795 		prev = sol;
4796 		sol = non_empty_solution(tl);
4797 		if (!sol)
4798 			goto error;
4799 
4800 		integral = isl_int_is_one(sol->el[0]);
4801 		if (!carries_dependences(sol, n_edge)) {
4802 			if (!prev)
4803 				prev = isl_vec_alloc(ctx, 0);
4804 			isl_vec_free(sol);
4805 			sol = prev;
4806 			break;
4807 		}
4808 		prev = isl_vec_free(prev);
4809 		cut = want_integral && !integral;
4810 		if (cut)
4811 			try_again = 1;
4812 		if (!treat_coalescing)
4813 			continue;
4814 		for (i = 0; i < graph->n; ++i) {
4815 			struct isl_sched_node *node = &graph->node[i];
4816 
4817 			pos = find_node_coalescing(node, sol);
4818 			if (pos < 0)
4819 				goto error;
4820 			if (pos < node->nvar)
4821 				break;
4822 		}
4823 		if (i < graph->n) {
4824 			try_again = 1;
4825 			tl = zero_out_node_coef(tl, &graph->node[i], pos);
4826 			cut = 0;
4827 		}
4828 	} while (try_again);
4829 
4830 	isl_tab_lexmin_free(tl);
4831 
4832 	return sol;
4833 error:
4834 	isl_tab_lexmin_free(tl);
4835 	isl_vec_free(prev);
4836 	isl_vec_free(sol);
4837 	return NULL;
4838 }
4839 
4840 /* If "edge" is an edge from a node to itself, then add the corresponding
4841  * dependence relation to "umap".
4842  * If "node" has been compressed, then the dependence relation
4843  * is also compressed first.
4844  */
add_intra(__isl_take isl_union_map * umap,struct isl_sched_edge * edge)4845 static __isl_give isl_union_map *add_intra(__isl_take isl_union_map *umap,
4846 	struct isl_sched_edge *edge)
4847 {
4848 	isl_map *map;
4849 	struct isl_sched_node *node = edge->src;
4850 
4851 	if (edge->src != edge->dst)
4852 		return umap;
4853 
4854 	map = isl_map_copy(edge->map);
4855 	map = compress(map, node, node);
4856 	umap = isl_union_map_add_map(umap, map);
4857 	return umap;
4858 }
4859 
4860 /* If "edge" is an edge from a node to another node, then add the corresponding
4861  * dependence relation to "umap".
4862  * If the source or destination nodes of "edge" have been compressed,
4863  * then the dependence relation is also compressed first.
4864  */
add_inter(__isl_take isl_union_map * umap,struct isl_sched_edge * edge)4865 static __isl_give isl_union_map *add_inter(__isl_take isl_union_map *umap,
4866 	struct isl_sched_edge *edge)
4867 {
4868 	isl_map *map;
4869 
4870 	if (edge->src == edge->dst)
4871 		return umap;
4872 
4873 	map = isl_map_copy(edge->map);
4874 	map = compress(map, edge->src, edge->dst);
4875 	umap = isl_union_map_add_map(umap, map);
4876 	return umap;
4877 }
4878 
4879 /* Internal data structure used by union_drop_coalescing_constraints
4880  * to collect bounds on all relevant statements.
4881  *
4882  * "graph" is the schedule constraint graph for which an LP problem
4883  * is being constructed.
4884  * "bounds" collects the bounds.
4885  */
4886 struct isl_collect_bounds_data {
4887 	isl_ctx *ctx;
4888 	struct isl_sched_graph *graph;
4889 	isl_union_set *bounds;
4890 };
4891 
4892 /* Add the size bounds for the node with instance deltas in "set"
4893  * to data->bounds.
4894  */
collect_bounds(__isl_take isl_set * set,void * user)4895 static isl_stat collect_bounds(__isl_take isl_set *set, void *user)
4896 {
4897 	struct isl_collect_bounds_data *data = user;
4898 	struct isl_sched_node *node;
4899 	isl_space *space;
4900 	isl_set *bounds;
4901 
4902 	space = isl_set_get_space(set);
4903 	isl_set_free(set);
4904 
4905 	node = graph_find_compressed_node(data->ctx, data->graph, space);
4906 	isl_space_free(space);
4907 
4908 	bounds = isl_set_from_basic_set(get_size_bounds(node));
4909 	data->bounds = isl_union_set_add_set(data->bounds, bounds);
4910 
4911 	return isl_stat_ok;
4912 }
4913 
4914 /* Drop some constraints from "delta" that could be exploited
4915  * to construct loop coalescing schedules.
4916  * In particular, drop those constraint that bound the difference
4917  * to the size of the domain.
4918  * Do this for each set/node in "delta" separately.
4919  * The parameters are assumed to have been projected out by the caller.
4920  */
union_drop_coalescing_constraints(isl_ctx * ctx,struct isl_sched_graph * graph,__isl_take isl_union_set * delta)4921 static __isl_give isl_union_set *union_drop_coalescing_constraints(isl_ctx *ctx,
4922 	struct isl_sched_graph *graph, __isl_take isl_union_set *delta)
4923 {
4924 	struct isl_collect_bounds_data data = { ctx, graph };
4925 
4926 	data.bounds = isl_union_set_empty(isl_space_params_alloc(ctx, 0));
4927 	if (isl_union_set_foreach_set(delta, &collect_bounds, &data) < 0)
4928 		data.bounds = isl_union_set_free(data.bounds);
4929 	delta = isl_union_set_plain_gist(delta, data.bounds);
4930 
4931 	return delta;
4932 }
4933 
4934 /* Given a non-trivial lineality space "lineality", add the corresponding
4935  * universe set to data->mask and add a map from elements to
4936  * other elements along the lines in "lineality" to data->equivalent.
4937  * If this is the first time this function gets called
4938  * (data->any_non_trivial is still false), then set data->any_non_trivial and
4939  * initialize data->mask and data->equivalent.
4940  *
4941  * In particular, if the lineality space is defined by equality constraints
4942  *
4943  *	E x = 0
4944  *
4945  * then construct an affine mapping
4946  *
4947  *	f : x -> E x
4948  *
4949  * and compute the equivalence relation of having the same image under f:
4950  *
4951  *	{ x -> x' : E x = E x' }
4952  */
add_non_trivial_lineality(__isl_take isl_basic_set * lineality,struct isl_exploit_lineality_data * data)4953 static isl_stat add_non_trivial_lineality(__isl_take isl_basic_set *lineality,
4954 	struct isl_exploit_lineality_data *data)
4955 {
4956 	isl_mat *eq;
4957 	isl_space *space;
4958 	isl_set *univ;
4959 	isl_multi_aff *ma;
4960 	isl_multi_pw_aff *mpa;
4961 	isl_map *map;
4962 	isl_size n;
4963 
4964 	if (isl_basic_set_check_no_locals(lineality) < 0)
4965 		goto error;
4966 
4967 	space = isl_basic_set_get_space(lineality);
4968 	if (!data->any_non_trivial) {
4969 		data->equivalent = isl_union_map_empty(isl_space_copy(space));
4970 		data->mask = isl_union_set_empty(isl_space_copy(space));
4971 	}
4972 	data->any_non_trivial = isl_bool_true;
4973 
4974 	univ = isl_set_universe(isl_space_copy(space));
4975 	data->mask = isl_union_set_add_set(data->mask, univ);
4976 
4977 	eq = isl_basic_set_extract_equalities(lineality);
4978 	n = isl_mat_rows(eq);
4979 	if (n < 0)
4980 		space = isl_space_free(space);
4981 	eq = isl_mat_insert_zero_rows(eq, 0, 1);
4982 	eq = isl_mat_set_element_si(eq, 0, 0, 1);
4983 	space = isl_space_from_domain(space);
4984 	space = isl_space_add_dims(space, isl_dim_out, n);
4985 	ma = isl_multi_aff_from_aff_mat(space, eq);
4986 	mpa = isl_multi_pw_aff_from_multi_aff(ma);
4987 	map = isl_multi_pw_aff_eq_map(mpa, isl_multi_pw_aff_copy(mpa));
4988 	data->equivalent = isl_union_map_add_map(data->equivalent, map);
4989 
4990 	isl_basic_set_free(lineality);
4991 	return isl_stat_ok;
4992 error:
4993 	isl_basic_set_free(lineality);
4994 	return isl_stat_error;
4995 }
4996 
4997 /* Check if the lineality space "set" is non-trivial (i.e., is not just
4998  * the origin or, in other words, satisfies a number of equality constraints
4999  * that is smaller than the dimension of the set).
5000  * If so, extend data->mask and data->equivalent accordingly.
5001  *
5002  * The input should not have any local variables already, but
5003  * isl_set_remove_divs is called to make sure it does not.
5004  */
add_lineality(__isl_take isl_set * set,void * user)5005 static isl_stat add_lineality(__isl_take isl_set *set, void *user)
5006 {
5007 	struct isl_exploit_lineality_data *data = user;
5008 	isl_basic_set *hull;
5009 	isl_size dim;
5010 	int n_eq;
5011 
5012 	set = isl_set_remove_divs(set);
5013 	hull = isl_set_unshifted_simple_hull(set);
5014 	dim = isl_basic_set_dim(hull, isl_dim_set);
5015 	n_eq = isl_basic_set_n_equality(hull);
5016 	if (dim < 0)
5017 		goto error;
5018 	if (dim != n_eq)
5019 		return add_non_trivial_lineality(hull, data);
5020 	isl_basic_set_free(hull);
5021 	return isl_stat_ok;
5022 error:
5023 	isl_basic_set_free(hull);
5024 	return isl_stat_error;
5025 }
5026 
5027 /* Check if the difference set on intra-node schedule constraints "intra"
5028  * has any non-trivial lineality space.
5029  * If so, then extend the difference set to a difference set
5030  * on equivalent elements.  That is, if "intra" is
5031  *
5032  *	{ y - x : (x,y) \in V }
5033  *
5034  * and elements are equivalent if they have the same image under f,
5035  * then return
5036  *
5037  *	{ y' - x' : (x,y) \in V and f(x) = f(x') and f(y) = f(y') }
5038  *
5039  * or, since f is linear,
5040  *
5041  *	{ y' - x' : (x,y) \in V and f(y - x) = f(y' - x') }
5042  *
5043  * The results of the search for non-trivial lineality spaces is stored
5044  * in "data".
5045  */
exploit_intra_lineality(__isl_take isl_union_set * intra,struct isl_exploit_lineality_data * data)5046 static __isl_give isl_union_set *exploit_intra_lineality(
5047 	__isl_take isl_union_set *intra,
5048 	struct isl_exploit_lineality_data *data)
5049 {
5050 	isl_union_set *lineality;
5051 	isl_union_set *uset;
5052 
5053 	data->any_non_trivial = isl_bool_false;
5054 	lineality = isl_union_set_copy(intra);
5055 	lineality = isl_union_set_combined_lineality_space(lineality);
5056 	if (isl_union_set_foreach_set(lineality, &add_lineality, data) < 0)
5057 		data->any_non_trivial = isl_bool_error;
5058 	isl_union_set_free(lineality);
5059 
5060 	if (data->any_non_trivial < 0)
5061 		return isl_union_set_free(intra);
5062 	if (!data->any_non_trivial)
5063 		return intra;
5064 
5065 	uset = isl_union_set_copy(intra);
5066 	intra = isl_union_set_subtract(intra, isl_union_set_copy(data->mask));
5067 	uset = isl_union_set_apply(uset, isl_union_map_copy(data->equivalent));
5068 	intra = isl_union_set_union(intra, uset);
5069 
5070 	intra = isl_union_set_remove_divs(intra);
5071 
5072 	return intra;
5073 }
5074 
5075 /* If the difference set on intra-node schedule constraints was found to have
5076  * any non-trivial lineality space by exploit_intra_lineality,
5077  * as recorded in "data", then extend the inter-node
5078  * schedule constraints "inter" to schedule constraints on equivalent elements.
5079  * That is, if "inter" is V and
5080  * elements are equivalent if they have the same image under f, then return
5081  *
5082  *	{ (x', y') : (x,y) \in V and f(x) = f(x') and f(y) = f(y') }
5083  */
exploit_inter_lineality(__isl_take isl_union_map * inter,struct isl_exploit_lineality_data * data)5084 static __isl_give isl_union_map *exploit_inter_lineality(
5085 	__isl_take isl_union_map *inter,
5086 	struct isl_exploit_lineality_data *data)
5087 {
5088 	isl_union_map *umap;
5089 
5090 	if (data->any_non_trivial < 0)
5091 		return isl_union_map_free(inter);
5092 	if (!data->any_non_trivial)
5093 		return inter;
5094 
5095 	umap = isl_union_map_copy(inter);
5096 	inter = isl_union_map_subtract_range(inter,
5097 				isl_union_set_copy(data->mask));
5098 	umap = isl_union_map_apply_range(umap,
5099 				isl_union_map_copy(data->equivalent));
5100 	inter = isl_union_map_union(inter, umap);
5101 	umap = isl_union_map_copy(inter);
5102 	inter = isl_union_map_subtract_domain(inter,
5103 				isl_union_set_copy(data->mask));
5104 	umap = isl_union_map_apply_range(isl_union_map_copy(data->equivalent),
5105 				umap);
5106 	inter = isl_union_map_union(inter, umap);
5107 
5108 	inter = isl_union_map_remove_divs(inter);
5109 
5110 	return inter;
5111 }
5112 
5113 /* For each (conditional) validity edge in "graph",
5114  * add the corresponding dependence relation using "add"
5115  * to a collection of dependence relations and return the result.
5116  * If "coincidence" is set, then coincidence edges are considered as well.
5117  */
collect_validity(struct isl_sched_graph * graph,__isl_give isl_union_map * (* add)(__isl_take isl_union_map * umap,struct isl_sched_edge * edge),int coincidence)5118 static __isl_give isl_union_map *collect_validity(struct isl_sched_graph *graph,
5119 	__isl_give isl_union_map *(*add)(__isl_take isl_union_map *umap,
5120 		struct isl_sched_edge *edge), int coincidence)
5121 {
5122 	int i;
5123 	isl_space *space;
5124 	isl_union_map *umap;
5125 
5126 	space = isl_space_copy(graph->node[0].space);
5127 	umap = isl_union_map_empty(space);
5128 
5129 	for (i = 0; i < graph->n_edge; ++i) {
5130 		struct isl_sched_edge *edge = &graph->edge[i];
5131 
5132 		if (!is_any_validity(edge) &&
5133 		    (!coincidence || !is_coincidence(edge)))
5134 			continue;
5135 
5136 		umap = add(umap, edge);
5137 	}
5138 
5139 	return umap;
5140 }
5141 
5142 /* For each dependence relation on a (conditional) validity edge
5143  * from a node to itself,
5144  * construct the set of coefficients of valid constraints for elements
5145  * in that dependence relation and collect the results.
5146  * If "coincidence" is set, then coincidence edges are considered as well.
5147  *
5148  * In particular, for each dependence relation R, constraints
5149  * on coefficients (c_0, c_x) are constructed such that
5150  *
5151  *	c_0 + c_x d >= 0 for each d in delta R = { y - x | (x,y) in R }
5152  *
5153  * If the schedule_treat_coalescing option is set, then some constraints
5154  * that could be exploited to construct coalescing schedules
5155  * are removed before the dual is computed, but after the parameters
5156  * have been projected out.
5157  * The entire computation is essentially the same as that performed
5158  * by intra_coefficients, except that it operates on multiple
5159  * edges together and that the parameters are always projected out.
5160  *
5161  * Additionally, exploit any non-trivial lineality space
5162  * in the difference set after removing coalescing constraints and
5163  * store the results of the non-trivial lineality space detection in "data".
5164  * The procedure is currently run unconditionally, but it is unlikely
5165  * to find any non-trivial lineality spaces if no coalescing constraints
5166  * have been removed.
5167  *
5168  * Note that if a dependence relation is a union of basic maps,
5169  * then each basic map needs to be treated individually as it may only
5170  * be possible to carry the dependences expressed by some of those
5171  * basic maps and not all of them.
5172  * The collected validity constraints are therefore not coalesced and
5173  * it is assumed that they are not coalesced automatically.
5174  * Duplicate basic maps can be removed, however.
5175  * In particular, if the same basic map appears as a disjunct
5176  * in multiple edges, then it only needs to be carried once.
5177  */
collect_intra_validity(isl_ctx * ctx,struct isl_sched_graph * graph,int coincidence,struct isl_exploit_lineality_data * data)5178 static __isl_give isl_basic_set_list *collect_intra_validity(isl_ctx *ctx,
5179 	struct isl_sched_graph *graph, int coincidence,
5180 	struct isl_exploit_lineality_data *data)
5181 {
5182 	isl_union_map *intra;
5183 	isl_union_set *delta;
5184 	isl_basic_set_list *list;
5185 
5186 	intra = collect_validity(graph, &add_intra, coincidence);
5187 	delta = isl_union_map_deltas(intra);
5188 	delta = isl_union_set_project_out_all_params(delta);
5189 	delta = isl_union_set_remove_divs(delta);
5190 	if (isl_options_get_schedule_treat_coalescing(ctx))
5191 		delta = union_drop_coalescing_constraints(ctx, graph, delta);
5192 	delta = exploit_intra_lineality(delta, data);
5193 	list = isl_union_set_get_basic_set_list(delta);
5194 	isl_union_set_free(delta);
5195 
5196 	return isl_basic_set_list_coefficients(list);
5197 }
5198 
5199 /* For each dependence relation on a (conditional) validity edge
5200  * from a node to some other node,
5201  * construct the set of coefficients of valid constraints for elements
5202  * in that dependence relation and collect the results.
5203  * If "coincidence" is set, then coincidence edges are considered as well.
5204  *
5205  * In particular, for each dependence relation R, constraints
5206  * on coefficients (c_0, c_n, c_x, c_y) are constructed such that
5207  *
5208  *	c_0 + c_n n + c_x x + c_y y >= 0 for each (x,y) in R
5209  *
5210  * This computation is essentially the same as that performed
5211  * by inter_coefficients, except that it operates on multiple
5212  * edges together.
5213  *
5214  * Additionally, exploit any non-trivial lineality space
5215  * that may have been discovered by collect_intra_validity
5216  * (as stored in "data").
5217  *
5218  * Note that if a dependence relation is a union of basic maps,
5219  * then each basic map needs to be treated individually as it may only
5220  * be possible to carry the dependences expressed by some of those
5221  * basic maps and not all of them.
5222  * The collected validity constraints are therefore not coalesced and
5223  * it is assumed that they are not coalesced automatically.
5224  * Duplicate basic maps can be removed, however.
5225  * In particular, if the same basic map appears as a disjunct
5226  * in multiple edges, then it only needs to be carried once.
5227  */
collect_inter_validity(struct isl_sched_graph * graph,int coincidence,struct isl_exploit_lineality_data * data)5228 static __isl_give isl_basic_set_list *collect_inter_validity(
5229 	struct isl_sched_graph *graph, int coincidence,
5230 	struct isl_exploit_lineality_data *data)
5231 {
5232 	isl_union_map *inter;
5233 	isl_union_set *wrap;
5234 	isl_basic_set_list *list;
5235 
5236 	inter = collect_validity(graph, &add_inter, coincidence);
5237 	inter = exploit_inter_lineality(inter, data);
5238 	inter = isl_union_map_remove_divs(inter);
5239 	wrap = isl_union_map_wrap(inter);
5240 	list = isl_union_set_get_basic_set_list(wrap);
5241 	isl_union_set_free(wrap);
5242 	return isl_basic_set_list_coefficients(list);
5243 }
5244 
5245 /* Construct an LP problem for finding schedule coefficients
5246  * such that the schedule carries as many of the "n_edge" groups of
5247  * dependences as possible based on the corresponding coefficient
5248  * constraints and return the lexicographically smallest non-trivial solution.
5249  * "intra" is the sequence of coefficient constraints for intra-node edges.
5250  * "inter" is the sequence of coefficient constraints for inter-node edges.
5251  * If "want_integral" is set, then compute an integral solution
5252  * for the coefficients rather than using the numerators
5253  * of a rational solution.
5254  * "carry_inter" indicates whether inter-node edges should be carried or
5255  * only respected.
5256  *
5257  * If none of the "n_edge" groups can be carried
5258  * then return an empty vector.
5259  */
compute_carrying_sol_coef(isl_ctx * ctx,struct isl_sched_graph * graph,int n_edge,__isl_keep isl_basic_set_list * intra,__isl_keep isl_basic_set_list * inter,int want_integral,int carry_inter)5260 static __isl_give isl_vec *compute_carrying_sol_coef(isl_ctx *ctx,
5261 	struct isl_sched_graph *graph, int n_edge,
5262 	__isl_keep isl_basic_set_list *intra,
5263 	__isl_keep isl_basic_set_list *inter, int want_integral,
5264 	int carry_inter)
5265 {
5266 	isl_basic_set *lp;
5267 
5268 	if (setup_carry_lp(ctx, graph, n_edge, intra, inter, carry_inter) < 0)
5269 		return NULL;
5270 
5271 	lp = isl_basic_set_copy(graph->lp);
5272 	return non_neg_lexmin(graph, lp, n_edge, want_integral);
5273 }
5274 
5275 /* Construct an LP problem for finding schedule coefficients
5276  * such that the schedule carries as many of the validity dependences
5277  * as possible and
5278  * return the lexicographically smallest non-trivial solution.
5279  * If "fallback" is set, then the carrying is performed as a fallback
5280  * for the Pluto-like scheduler.
5281  * If "coincidence" is set, then try and carry coincidence edges as well.
5282  *
5283  * The variable "n_edge" stores the number of groups that should be carried.
5284  * If none of the "n_edge" groups can be carried
5285  * then return an empty vector.
5286  * If, moreover, "n_edge" is zero, then the LP problem does not even
5287  * need to be constructed.
5288  *
5289  * If a fallback solution is being computed, then compute an integral solution
5290  * for the coefficients rather than using the numerators
5291  * of a rational solution.
5292  *
5293  * If a fallback solution is being computed, if there are any intra-node
5294  * dependences, and if requested by the user, then first try
5295  * to only carry those intra-node dependences.
5296  * If this fails to carry any dependences, then try again
5297  * with the inter-node dependences included.
5298  */
compute_carrying_sol(isl_ctx * ctx,struct isl_sched_graph * graph,int fallback,int coincidence)5299 static __isl_give isl_vec *compute_carrying_sol(isl_ctx *ctx,
5300 	struct isl_sched_graph *graph, int fallback, int coincidence)
5301 {
5302 	isl_size n_intra, n_inter;
5303 	int n_edge;
5304 	struct isl_carry carry = { 0 };
5305 	isl_vec *sol;
5306 
5307 	carry.intra = collect_intra_validity(ctx, graph, coincidence,
5308 						&carry.lineality);
5309 	carry.inter = collect_inter_validity(graph, coincidence,
5310 						&carry.lineality);
5311 	n_intra = isl_basic_set_list_n_basic_set(carry.intra);
5312 	n_inter = isl_basic_set_list_n_basic_set(carry.inter);
5313 	if (n_intra < 0 || n_inter < 0)
5314 		goto error;
5315 
5316 	if (fallback && n_intra > 0 &&
5317 	    isl_options_get_schedule_carry_self_first(ctx)) {
5318 		sol = compute_carrying_sol_coef(ctx, graph, n_intra,
5319 				carry.intra, carry.inter, fallback, 0);
5320 		if (!sol || sol->size != 0 || n_inter == 0) {
5321 			isl_carry_clear(&carry);
5322 			return sol;
5323 		}
5324 		isl_vec_free(sol);
5325 	}
5326 
5327 	n_edge = n_intra + n_inter;
5328 	if (n_edge == 0) {
5329 		isl_carry_clear(&carry);
5330 		return isl_vec_alloc(ctx, 0);
5331 	}
5332 
5333 	sol = compute_carrying_sol_coef(ctx, graph, n_edge,
5334 				carry.intra, carry.inter, fallback, 1);
5335 	isl_carry_clear(&carry);
5336 	return sol;
5337 error:
5338 	isl_carry_clear(&carry);
5339 	return NULL;
5340 }
5341 
5342 /* Construct a schedule row for each node such that as many validity dependences
5343  * as possible are carried and then continue with the next band.
5344  * If "fallback" is set, then the carrying is performed as a fallback
5345  * for the Pluto-like scheduler.
5346  * If "coincidence" is set, then try and carry coincidence edges as well.
5347  *
5348  * If there are no validity dependences, then no dependence can be carried and
5349  * the procedure is guaranteed to fail.  If there is more than one component,
5350  * then try computing a schedule on each component separately
5351  * to prevent or at least postpone this failure.
5352  *
5353  * If a schedule row is computed, then check that dependences are carried
5354  * for at least one of the edges.
5355  *
5356  * If the computed schedule row turns out to be trivial on one or
5357  * more nodes where it should not be trivial, then we throw it away
5358  * and try again on each component separately.
5359  *
5360  * If there is only one component, then we accept the schedule row anyway,
5361  * but we do not consider it as a complete row and therefore do not
5362  * increment graph->n_row.  Note that the ranks of the nodes that
5363  * do get a non-trivial schedule part will get updated regardless and
5364  * graph->maxvar is computed based on these ranks.  The test for
5365  * whether more schedule rows are required in compute_schedule_wcc
5366  * is therefore not affected.
5367  *
5368  * Insert a band corresponding to the schedule row at position "node"
5369  * of the schedule tree and continue with the construction of the schedule.
5370  * This insertion and the continued construction is performed by split_scaled
5371  * after optionally checking for non-trivial common divisors.
5372  */
carry(__isl_take isl_schedule_node * node,struct isl_sched_graph * graph,int fallback,int coincidence)5373 static __isl_give isl_schedule_node *carry(__isl_take isl_schedule_node *node,
5374 	struct isl_sched_graph *graph, int fallback, int coincidence)
5375 {
5376 	int trivial;
5377 	isl_ctx *ctx;
5378 	isl_vec *sol;
5379 
5380 	if (!node)
5381 		return NULL;
5382 
5383 	ctx = isl_schedule_node_get_ctx(node);
5384 	sol = compute_carrying_sol(ctx, graph, fallback, coincidence);
5385 	if (!sol)
5386 		return isl_schedule_node_free(node);
5387 	if (sol->size == 0) {
5388 		isl_vec_free(sol);
5389 		if (graph->scc > 1)
5390 			return compute_component_schedule(node, graph, 1);
5391 		isl_die(ctx, isl_error_unknown, "unable to carry dependences",
5392 			return isl_schedule_node_free(node));
5393 	}
5394 
5395 	trivial = is_any_trivial(graph, sol);
5396 	if (trivial < 0) {
5397 		sol = isl_vec_free(sol);
5398 	} else if (trivial && graph->scc > 1) {
5399 		isl_vec_free(sol);
5400 		return compute_component_schedule(node, graph, 1);
5401 	}
5402 
5403 	if (update_schedule(graph, sol, 0) < 0)
5404 		return isl_schedule_node_free(node);
5405 	if (trivial)
5406 		graph->n_row--;
5407 
5408 	return split_scaled(node, graph);
5409 }
5410 
5411 /* Construct a schedule row for each node such that as many validity dependences
5412  * as possible are carried and then continue with the next band.
5413  * Do so as a fallback for the Pluto-like scheduler.
5414  * If "coincidence" is set, then try and carry coincidence edges as well.
5415  */
carry_fallback(__isl_take isl_schedule_node * node,struct isl_sched_graph * graph,int coincidence)5416 static __isl_give isl_schedule_node *carry_fallback(
5417 	__isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
5418 	int coincidence)
5419 {
5420 	return carry(node, graph, 1, coincidence);
5421 }
5422 
5423 /* Construct a schedule row for each node such that as many validity dependences
5424  * as possible are carried and then continue with the next band.
5425  * Do so for the case where the Feautrier scheduler was selected
5426  * by the user.
5427  */
carry_feautrier(__isl_take isl_schedule_node * node,struct isl_sched_graph * graph)5428 static __isl_give isl_schedule_node *carry_feautrier(
5429 	__isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
5430 {
5431 	return carry(node, graph, 0, 0);
5432 }
5433 
5434 /* Construct a schedule row for each node such that as many validity dependences
5435  * as possible are carried and then continue with the next band.
5436  * Do so as a fallback for the Pluto-like scheduler.
5437  */
carry_dependences(__isl_take isl_schedule_node * node,struct isl_sched_graph * graph)5438 static __isl_give isl_schedule_node *carry_dependences(
5439 	__isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
5440 {
5441 	return carry_fallback(node, graph, 0);
5442 }
5443 
5444 /* Construct a schedule row for each node such that as many validity or
5445  * coincidence dependences as possible are carried and
5446  * then continue with the next band.
5447  * Do so as a fallback for the Pluto-like scheduler.
5448  */
carry_coincidence(__isl_take isl_schedule_node * node,struct isl_sched_graph * graph)5449 static __isl_give isl_schedule_node *carry_coincidence(
5450 	__isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
5451 {
5452 	return carry_fallback(node, graph, 1);
5453 }
5454 
5455 /* Topologically sort statements mapped to the same schedule iteration
5456  * and add insert a sequence node in front of "node"
5457  * corresponding to this order.
5458  * If "initialized" is set, then it may be assumed that compute_maxvar
5459  * has been called on the current band.  Otherwise, call
5460  * compute_maxvar if and before carry_dependences gets called.
5461  *
5462  * If it turns out to be impossible to sort the statements apart,
5463  * because different dependences impose different orderings
5464  * on the statements, then we extend the schedule such that
5465  * it carries at least one more dependence.
5466  */
sort_statements(__isl_take isl_schedule_node * node,struct isl_sched_graph * graph,int initialized)5467 static __isl_give isl_schedule_node *sort_statements(
5468 	__isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
5469 	int initialized)
5470 {
5471 	isl_ctx *ctx;
5472 	isl_union_set_list *filters;
5473 
5474 	if (!node)
5475 		return NULL;
5476 
5477 	ctx = isl_schedule_node_get_ctx(node);
5478 	if (graph->n < 1)
5479 		isl_die(ctx, isl_error_internal,
5480 			"graph should have at least one node",
5481 			return isl_schedule_node_free(node));
5482 
5483 	if (graph->n == 1)
5484 		return node;
5485 
5486 	if (update_edges(ctx, graph) < 0)
5487 		return isl_schedule_node_free(node);
5488 
5489 	if (graph->n_edge == 0)
5490 		return node;
5491 
5492 	if (detect_sccs(ctx, graph) < 0)
5493 		return isl_schedule_node_free(node);
5494 
5495 	next_band(graph);
5496 	if (graph->scc < graph->n) {
5497 		if (!initialized && compute_maxvar(graph) < 0)
5498 			return isl_schedule_node_free(node);
5499 		return carry_dependences(node, graph);
5500 	}
5501 
5502 	filters = extract_sccs(ctx, graph);
5503 	node = isl_schedule_node_insert_sequence(node, filters);
5504 
5505 	return node;
5506 }
5507 
5508 /* Are there any (non-empty) (conditional) validity edges in the graph?
5509  */
has_validity_edges(struct isl_sched_graph * graph)5510 static int has_validity_edges(struct isl_sched_graph *graph)
5511 {
5512 	int i;
5513 
5514 	for (i = 0; i < graph->n_edge; ++i) {
5515 		int empty;
5516 
5517 		empty = isl_map_plain_is_empty(graph->edge[i].map);
5518 		if (empty < 0)
5519 			return -1;
5520 		if (empty)
5521 			continue;
5522 		if (is_any_validity(&graph->edge[i]))
5523 			return 1;
5524 	}
5525 
5526 	return 0;
5527 }
5528 
5529 /* Should we apply a Feautrier step?
5530  * That is, did the user request the Feautrier algorithm and are
5531  * there any validity dependences (left)?
5532  */
need_feautrier_step(isl_ctx * ctx,struct isl_sched_graph * graph)5533 static int need_feautrier_step(isl_ctx *ctx, struct isl_sched_graph *graph)
5534 {
5535 	if (ctx->opt->schedule_algorithm != ISL_SCHEDULE_ALGORITHM_FEAUTRIER)
5536 		return 0;
5537 
5538 	return has_validity_edges(graph);
5539 }
5540 
5541 /* Compute a schedule for a connected dependence graph using Feautrier's
5542  * multi-dimensional scheduling algorithm and return the updated schedule node.
5543  *
5544  * The original algorithm is described in [1].
5545  * The main idea is to minimize the number of scheduling dimensions, by
5546  * trying to satisfy as many dependences as possible per scheduling dimension.
5547  *
5548  * [1] P. Feautrier, Some Efficient Solutions to the Affine Scheduling
5549  *     Problem, Part II: Multi-Dimensional Time.
5550  *     In Intl. Journal of Parallel Programming, 1992.
5551  */
compute_schedule_wcc_feautrier(isl_schedule_node * node,struct isl_sched_graph * graph)5552 static __isl_give isl_schedule_node *compute_schedule_wcc_feautrier(
5553 	isl_schedule_node *node, struct isl_sched_graph *graph)
5554 {
5555 	return carry_feautrier(node, graph);
5556 }
5557 
5558 /* Turn off the "local" bit on all (condition) edges.
5559  */
clear_local_edges(struct isl_sched_graph * graph)5560 static void clear_local_edges(struct isl_sched_graph *graph)
5561 {
5562 	int i;
5563 
5564 	for (i = 0; i < graph->n_edge; ++i)
5565 		if (is_condition(&graph->edge[i]))
5566 			clear_local(&graph->edge[i]);
5567 }
5568 
5569 /* Does "graph" have both condition and conditional validity edges?
5570  */
need_condition_check(struct isl_sched_graph * graph)5571 static int need_condition_check(struct isl_sched_graph *graph)
5572 {
5573 	int i;
5574 	int any_condition = 0;
5575 	int any_conditional_validity = 0;
5576 
5577 	for (i = 0; i < graph->n_edge; ++i) {
5578 		if (is_condition(&graph->edge[i]))
5579 			any_condition = 1;
5580 		if (is_conditional_validity(&graph->edge[i]))
5581 			any_conditional_validity = 1;
5582 	}
5583 
5584 	return any_condition && any_conditional_validity;
5585 }
5586 
5587 /* Does "graph" contain any coincidence edge?
5588  */
has_any_coincidence(struct isl_sched_graph * graph)5589 static int has_any_coincidence(struct isl_sched_graph *graph)
5590 {
5591 	int i;
5592 
5593 	for (i = 0; i < graph->n_edge; ++i)
5594 		if (is_coincidence(&graph->edge[i]))
5595 			return 1;
5596 
5597 	return 0;
5598 }
5599 
5600 /* Extract the final schedule row as a map with the iteration domain
5601  * of "node" as domain.
5602  */
final_row(struct isl_sched_node * node)5603 static __isl_give isl_map *final_row(struct isl_sched_node *node)
5604 {
5605 	isl_multi_aff *ma;
5606 	isl_size n_row;
5607 
5608 	n_row = isl_mat_rows(node->sched);
5609 	if (n_row < 0)
5610 		return NULL;
5611 	ma = node_extract_partial_schedule_multi_aff(node, n_row - 1, 1);
5612 	return isl_map_from_multi_aff(ma);
5613 }
5614 
5615 /* Is the conditional validity dependence in the edge with index "edge_index"
5616  * violated by the latest (i.e., final) row of the schedule?
5617  * That is, is i scheduled after j
5618  * for any conditional validity dependence i -> j?
5619  */
is_violated(struct isl_sched_graph * graph,int edge_index)5620 static int is_violated(struct isl_sched_graph *graph, int edge_index)
5621 {
5622 	isl_map *src_sched, *dst_sched, *map;
5623 	struct isl_sched_edge *edge = &graph->edge[edge_index];
5624 	int empty;
5625 
5626 	src_sched = final_row(edge->src);
5627 	dst_sched = final_row(edge->dst);
5628 	map = isl_map_copy(edge->map);
5629 	map = isl_map_apply_domain(map, src_sched);
5630 	map = isl_map_apply_range(map, dst_sched);
5631 	map = isl_map_order_gt(map, isl_dim_in, 0, isl_dim_out, 0);
5632 	empty = isl_map_is_empty(map);
5633 	isl_map_free(map);
5634 
5635 	if (empty < 0)
5636 		return -1;
5637 
5638 	return !empty;
5639 }
5640 
5641 /* Does "graph" have any satisfied condition edges that
5642  * are adjacent to the conditional validity constraint with
5643  * domain "conditional_source" and range "conditional_sink"?
5644  *
5645  * A satisfied condition is one that is not local.
5646  * If a condition was forced to be local already (i.e., marked as local)
5647  * then there is no need to check if it is in fact local.
5648  *
5649  * Additionally, mark all adjacent condition edges found as local.
5650  */
has_adjacent_true_conditions(struct isl_sched_graph * graph,__isl_keep isl_union_set * conditional_source,__isl_keep isl_union_set * conditional_sink)5651 static int has_adjacent_true_conditions(struct isl_sched_graph *graph,
5652 	__isl_keep isl_union_set *conditional_source,
5653 	__isl_keep isl_union_set *conditional_sink)
5654 {
5655 	int i;
5656 	int any = 0;
5657 
5658 	for (i = 0; i < graph->n_edge; ++i) {
5659 		int adjacent, local;
5660 		isl_union_map *condition;
5661 
5662 		if (!is_condition(&graph->edge[i]))
5663 			continue;
5664 		if (is_local(&graph->edge[i]))
5665 			continue;
5666 
5667 		condition = graph->edge[i].tagged_condition;
5668 		adjacent = domain_intersects(condition, conditional_sink);
5669 		if (adjacent >= 0 && !adjacent)
5670 			adjacent = range_intersects(condition,
5671 							conditional_source);
5672 		if (adjacent < 0)
5673 			return -1;
5674 		if (!adjacent)
5675 			continue;
5676 
5677 		set_local(&graph->edge[i]);
5678 
5679 		local = is_condition_false(&graph->edge[i]);
5680 		if (local < 0)
5681 			return -1;
5682 		if (!local)
5683 			any = 1;
5684 	}
5685 
5686 	return any;
5687 }
5688 
5689 /* Are there any violated conditional validity dependences with
5690  * adjacent condition dependences that are not local with respect
5691  * to the current schedule?
5692  * That is, is the conditional validity constraint violated?
5693  *
5694  * Additionally, mark all those adjacent condition dependences as local.
5695  * We also mark those adjacent condition dependences that were not marked
5696  * as local before, but just happened to be local already.  This ensures
5697  * that they remain local if the schedule is recomputed.
5698  *
5699  * We first collect domain and range of all violated conditional validity
5700  * dependences and then check if there are any adjacent non-local
5701  * condition dependences.
5702  */
has_violated_conditional_constraint(isl_ctx * ctx,struct isl_sched_graph * graph)5703 static int has_violated_conditional_constraint(isl_ctx *ctx,
5704 	struct isl_sched_graph *graph)
5705 {
5706 	int i;
5707 	int any = 0;
5708 	isl_union_set *source, *sink;
5709 
5710 	source = isl_union_set_empty(isl_space_params_alloc(ctx, 0));
5711 	sink = isl_union_set_empty(isl_space_params_alloc(ctx, 0));
5712 	for (i = 0; i < graph->n_edge; ++i) {
5713 		isl_union_set *uset;
5714 		isl_union_map *umap;
5715 		int violated;
5716 
5717 		if (!is_conditional_validity(&graph->edge[i]))
5718 			continue;
5719 
5720 		violated = is_violated(graph, i);
5721 		if (violated < 0)
5722 			goto error;
5723 		if (!violated)
5724 			continue;
5725 
5726 		any = 1;
5727 
5728 		umap = isl_union_map_copy(graph->edge[i].tagged_validity);
5729 		uset = isl_union_map_domain(umap);
5730 		source = isl_union_set_union(source, uset);
5731 		source = isl_union_set_coalesce(source);
5732 
5733 		umap = isl_union_map_copy(graph->edge[i].tagged_validity);
5734 		uset = isl_union_map_range(umap);
5735 		sink = isl_union_set_union(sink, uset);
5736 		sink = isl_union_set_coalesce(sink);
5737 	}
5738 
5739 	if (any)
5740 		any = has_adjacent_true_conditions(graph, source, sink);
5741 
5742 	isl_union_set_free(source);
5743 	isl_union_set_free(sink);
5744 	return any;
5745 error:
5746 	isl_union_set_free(source);
5747 	isl_union_set_free(sink);
5748 	return -1;
5749 }
5750 
5751 /* Examine the current band (the rows between graph->band_start and
5752  * graph->n_total_row), deciding whether to drop it or add it to "node"
5753  * and then continue with the computation of the next band, if any.
5754  * If "initialized" is set, then it may be assumed that compute_maxvar
5755  * has been called on the current band.  Otherwise, call
5756  * compute_maxvar if and before carry_dependences gets called.
5757  *
5758  * The caller keeps looking for a new row as long as
5759  * graph->n_row < graph->maxvar.  If the latest attempt to find
5760  * such a row failed (i.e., we still have graph->n_row < graph->maxvar),
5761  * then we either
5762  * - split between SCCs and start over (assuming we found an interesting
5763  *	pair of SCCs between which to split)
5764  * - continue with the next band (assuming the current band has at least
5765  *	one row)
5766  * - if there is more than one SCC left, then split along all SCCs
5767  * - if outer coincidence needs to be enforced, then try to carry as many
5768  *	validity or coincidence dependences as possible and
5769  *	continue with the next band
5770  * - try to carry as many validity dependences as possible and
5771  *	continue with the next band
5772  * In each case, we first insert a band node in the schedule tree
5773  * if any rows have been computed.
5774  *
5775  * If the caller managed to complete the schedule and the current band
5776  * is empty, then finish off by topologically
5777  * sorting the statements based on the remaining dependences.
5778  * If, on the other hand, the current band has at least one row,
5779  * then continue with the next band.  Note that this next band
5780  * will necessarily be empty, but the graph may still be split up
5781  * into weakly connected components before arriving back here.
5782  */
compute_schedule_finish_band(__isl_take isl_schedule_node * node,struct isl_sched_graph * graph,int initialized)5783 static __isl_give isl_schedule_node *compute_schedule_finish_band(
5784 	__isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
5785 	int initialized)
5786 {
5787 	int empty;
5788 
5789 	if (!node)
5790 		return NULL;
5791 
5792 	empty = graph->n_total_row == graph->band_start;
5793 	if (graph->n_row < graph->maxvar) {
5794 		isl_ctx *ctx;
5795 
5796 		ctx = isl_schedule_node_get_ctx(node);
5797 		if (!ctx->opt->schedule_maximize_band_depth && !empty)
5798 			return compute_next_band(node, graph, 1);
5799 		if (graph->src_scc >= 0)
5800 			return compute_split_schedule(node, graph);
5801 		if (!empty)
5802 			return compute_next_band(node, graph, 1);
5803 		if (graph->scc > 1)
5804 			return compute_component_schedule(node, graph, 1);
5805 		if (!initialized && compute_maxvar(graph) < 0)
5806 			return isl_schedule_node_free(node);
5807 		if (isl_options_get_schedule_outer_coincidence(ctx))
5808 			return carry_coincidence(node, graph);
5809 		return carry_dependences(node, graph);
5810 	}
5811 
5812 	if (!empty)
5813 		return compute_next_band(node, graph, 1);
5814 	return sort_statements(node, graph, initialized);
5815 }
5816 
5817 /* Construct a band of schedule rows for a connected dependence graph.
5818  * The caller is responsible for determining the strongly connected
5819  * components and calling compute_maxvar first.
5820  *
5821  * We try to find a sequence of as many schedule rows as possible that result
5822  * in non-negative dependence distances (independent of the previous rows
5823  * in the sequence, i.e., such that the sequence is tilable), with as
5824  * many of the initial rows as possible satisfying the coincidence constraints.
5825  * The computation stops if we can't find any more rows or if we have found
5826  * all the rows we wanted to find.
5827  *
5828  * If ctx->opt->schedule_outer_coincidence is set, then we force the
5829  * outermost dimension to satisfy the coincidence constraints.  If this
5830  * turns out to be impossible, we fall back on the general scheme above
5831  * and try to carry as many dependences as possible.
5832  *
5833  * If "graph" contains both condition and conditional validity dependences,
5834  * then we need to check that that the conditional schedule constraint
5835  * is satisfied, i.e., there are no violated conditional validity dependences
5836  * that are adjacent to any non-local condition dependences.
5837  * If there are, then we mark all those adjacent condition dependences
5838  * as local and recompute the current band.  Those dependences that
5839  * are marked local will then be forced to be local.
5840  * The initial computation is performed with no dependences marked as local.
5841  * If we are lucky, then there will be no violated conditional validity
5842  * dependences adjacent to any non-local condition dependences.
5843  * Otherwise, we mark some additional condition dependences as local and
5844  * recompute.  We continue this process until there are no violations left or
5845  * until we are no longer able to compute a schedule.
5846  * Since there are only a finite number of dependences,
5847  * there will only be a finite number of iterations.
5848  */
compute_schedule_wcc_band(isl_ctx * ctx,struct isl_sched_graph * graph)5849 static isl_stat compute_schedule_wcc_band(isl_ctx *ctx,
5850 	struct isl_sched_graph *graph)
5851 {
5852 	int has_coincidence;
5853 	int use_coincidence;
5854 	int force_coincidence = 0;
5855 	int check_conditional;
5856 
5857 	if (sort_sccs(graph) < 0)
5858 		return isl_stat_error;
5859 
5860 	clear_local_edges(graph);
5861 	check_conditional = need_condition_check(graph);
5862 	has_coincidence = has_any_coincidence(graph);
5863 
5864 	if (ctx->opt->schedule_outer_coincidence)
5865 		force_coincidence = 1;
5866 
5867 	use_coincidence = has_coincidence;
5868 	while (graph->n_row < graph->maxvar) {
5869 		isl_vec *sol;
5870 		int violated;
5871 		int coincident;
5872 
5873 		graph->src_scc = -1;
5874 		graph->dst_scc = -1;
5875 
5876 		if (setup_lp(ctx, graph, use_coincidence) < 0)
5877 			return isl_stat_error;
5878 		sol = solve_lp(ctx, graph);
5879 		if (!sol)
5880 			return isl_stat_error;
5881 		if (sol->size == 0) {
5882 			int empty = graph->n_total_row == graph->band_start;
5883 
5884 			isl_vec_free(sol);
5885 			if (use_coincidence && (!force_coincidence || !empty)) {
5886 				use_coincidence = 0;
5887 				continue;
5888 			}
5889 			return isl_stat_ok;
5890 		}
5891 		coincident = !has_coincidence || use_coincidence;
5892 		if (update_schedule(graph, sol, coincident) < 0)
5893 			return isl_stat_error;
5894 
5895 		if (!check_conditional)
5896 			continue;
5897 		violated = has_violated_conditional_constraint(ctx, graph);
5898 		if (violated < 0)
5899 			return isl_stat_error;
5900 		if (!violated)
5901 			continue;
5902 		if (reset_band(graph) < 0)
5903 			return isl_stat_error;
5904 		use_coincidence = has_coincidence;
5905 	}
5906 
5907 	return isl_stat_ok;
5908 }
5909 
5910 /* Compute a schedule for a connected dependence graph by considering
5911  * the graph as a whole and return the updated schedule node.
5912  *
5913  * The actual schedule rows of the current band are computed by
5914  * compute_schedule_wcc_band.  compute_schedule_finish_band takes
5915  * care of integrating the band into "node" and continuing
5916  * the computation.
5917  */
compute_schedule_wcc_whole(__isl_take isl_schedule_node * node,struct isl_sched_graph * graph)5918 static __isl_give isl_schedule_node *compute_schedule_wcc_whole(
5919 	__isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
5920 {
5921 	isl_ctx *ctx;
5922 
5923 	if (!node)
5924 		return NULL;
5925 
5926 	ctx = isl_schedule_node_get_ctx(node);
5927 	if (compute_schedule_wcc_band(ctx, graph) < 0)
5928 		return isl_schedule_node_free(node);
5929 
5930 	return compute_schedule_finish_band(node, graph, 1);
5931 }
5932 
5933 /* Clustering information used by compute_schedule_wcc_clustering.
5934  *
5935  * "n" is the number of SCCs in the original dependence graph
5936  * "scc" is an array of "n" elements, each representing an SCC
5937  * of the original dependence graph.  All entries in the same cluster
5938  * have the same number of schedule rows.
5939  * "scc_cluster" maps each SCC index to the cluster to which it belongs,
5940  * where each cluster is represented by the index of the first SCC
5941  * in the cluster.  Initially, each SCC belongs to a cluster containing
5942  * only that SCC.
5943  *
5944  * "scc_in_merge" is used by merge_clusters_along_edge to keep
5945  * track of which SCCs need to be merged.
5946  *
5947  * "cluster" contains the merged clusters of SCCs after the clustering
5948  * has completed.
5949  *
5950  * "scc_node" is a temporary data structure used inside copy_partial.
5951  * For each SCC, it keeps track of the number of nodes in the SCC
5952  * that have already been copied.
5953  */
5954 struct isl_clustering {
5955 	int n;
5956 	struct isl_sched_graph *scc;
5957 	struct isl_sched_graph *cluster;
5958 	int *scc_cluster;
5959 	int *scc_node;
5960 	int *scc_in_merge;
5961 };
5962 
5963 /* Initialize the clustering data structure "c" from "graph".
5964  *
5965  * In particular, allocate memory, extract the SCCs from "graph"
5966  * into c->scc, initialize scc_cluster and construct
5967  * a band of schedule rows for each SCC.
5968  * Within each SCC, there is only one SCC by definition.
5969  * Each SCC initially belongs to a cluster containing only that SCC.
5970  */
clustering_init(isl_ctx * ctx,struct isl_clustering * c,struct isl_sched_graph * graph)5971 static isl_stat clustering_init(isl_ctx *ctx, struct isl_clustering *c,
5972 	struct isl_sched_graph *graph)
5973 {
5974 	int i;
5975 
5976 	c->n = graph->scc;
5977 	c->scc = isl_calloc_array(ctx, struct isl_sched_graph, c->n);
5978 	c->cluster = isl_calloc_array(ctx, struct isl_sched_graph, c->n);
5979 	c->scc_cluster = isl_calloc_array(ctx, int, c->n);
5980 	c->scc_node = isl_calloc_array(ctx, int, c->n);
5981 	c->scc_in_merge = isl_calloc_array(ctx, int, c->n);
5982 	if (!c->scc || !c->cluster ||
5983 	    !c->scc_cluster || !c->scc_node || !c->scc_in_merge)
5984 		return isl_stat_error;
5985 
5986 	for (i = 0; i < c->n; ++i) {
5987 		if (extract_sub_graph(ctx, graph, &node_scc_exactly,
5988 					&edge_scc_exactly, i, &c->scc[i]) < 0)
5989 			return isl_stat_error;
5990 		c->scc[i].scc = 1;
5991 		if (compute_maxvar(&c->scc[i]) < 0)
5992 			return isl_stat_error;
5993 		if (compute_schedule_wcc_band(ctx, &c->scc[i]) < 0)
5994 			return isl_stat_error;
5995 		c->scc_cluster[i] = i;
5996 	}
5997 
5998 	return isl_stat_ok;
5999 }
6000 
6001 /* Free all memory allocated for "c".
6002  */
clustering_free(isl_ctx * ctx,struct isl_clustering * c)6003 static void clustering_free(isl_ctx *ctx, struct isl_clustering *c)
6004 {
6005 	int i;
6006 
6007 	if (c->scc)
6008 		for (i = 0; i < c->n; ++i)
6009 			graph_free(ctx, &c->scc[i]);
6010 	free(c->scc);
6011 	if (c->cluster)
6012 		for (i = 0; i < c->n; ++i)
6013 			graph_free(ctx, &c->cluster[i]);
6014 	free(c->cluster);
6015 	free(c->scc_cluster);
6016 	free(c->scc_node);
6017 	free(c->scc_in_merge);
6018 }
6019 
6020 /* Should we refrain from merging the cluster in "graph" with
6021  * any other cluster?
6022  * In particular, is its current schedule band empty and incomplete.
6023  */
bad_cluster(struct isl_sched_graph * graph)6024 static int bad_cluster(struct isl_sched_graph *graph)
6025 {
6026 	return graph->n_row < graph->maxvar &&
6027 		graph->n_total_row == graph->band_start;
6028 }
6029 
6030 /* Is "edge" a proximity edge with a non-empty dependence relation?
6031  */
is_non_empty_proximity(struct isl_sched_edge * edge)6032 static isl_bool is_non_empty_proximity(struct isl_sched_edge *edge)
6033 {
6034 	if (!is_proximity(edge))
6035 		return isl_bool_false;
6036 	return isl_bool_not(isl_map_plain_is_empty(edge->map));
6037 }
6038 
6039 /* Return the index of an edge in "graph" that can be used to merge
6040  * two clusters in "c".
6041  * Return graph->n_edge if no such edge can be found.
6042  * Return -1 on error.
6043  *
6044  * In particular, return a proximity edge between two clusters
6045  * that is not marked "no_merge" and such that neither of the
6046  * two clusters has an incomplete, empty band.
6047  *
6048  * If there are multiple such edges, then try and find the most
6049  * appropriate edge to use for merging.  In particular, pick the edge
6050  * with the greatest weight.  If there are multiple of those,
6051  * then pick one with the shortest distance between
6052  * the two cluster representatives.
6053  */
find_proximity(struct isl_sched_graph * graph,struct isl_clustering * c)6054 static int find_proximity(struct isl_sched_graph *graph,
6055 	struct isl_clustering *c)
6056 {
6057 	int i, best = graph->n_edge, best_dist, best_weight;
6058 
6059 	for (i = 0; i < graph->n_edge; ++i) {
6060 		struct isl_sched_edge *edge = &graph->edge[i];
6061 		int dist, weight;
6062 		isl_bool prox;
6063 
6064 		prox = is_non_empty_proximity(edge);
6065 		if (prox < 0)
6066 			return -1;
6067 		if (!prox)
6068 			continue;
6069 		if (edge->no_merge)
6070 			continue;
6071 		if (bad_cluster(&c->scc[edge->src->scc]) ||
6072 		    bad_cluster(&c->scc[edge->dst->scc]))
6073 			continue;
6074 		dist = c->scc_cluster[edge->dst->scc] -
6075 			c->scc_cluster[edge->src->scc];
6076 		if (dist == 0)
6077 			continue;
6078 		weight = edge->weight;
6079 		if (best < graph->n_edge) {
6080 			if (best_weight > weight)
6081 				continue;
6082 			if (best_weight == weight && best_dist <= dist)
6083 				continue;
6084 		}
6085 		best = i;
6086 		best_dist = dist;
6087 		best_weight = weight;
6088 	}
6089 
6090 	return best;
6091 }
6092 
6093 /* Internal data structure used in mark_merge_sccs.
6094  *
6095  * "graph" is the dependence graph in which a strongly connected
6096  * component is constructed.
6097  * "scc_cluster" maps each SCC index to the cluster to which it belongs.
6098  * "src" and "dst" are the indices of the nodes that are being merged.
6099  */
6100 struct isl_mark_merge_sccs_data {
6101 	struct isl_sched_graph *graph;
6102 	int *scc_cluster;
6103 	int src;
6104 	int dst;
6105 };
6106 
6107 /* Check whether the cluster containing node "i" depends on the cluster
6108  * containing node "j".  If "i" and "j" belong to the same cluster,
6109  * then they are taken to depend on each other to ensure that
6110  * the resulting strongly connected component consists of complete
6111  * clusters.  Furthermore, if "i" and "j" are the two nodes that
6112  * are being merged, then they are taken to depend on each other as well.
6113  * Otherwise, check if there is a (conditional) validity dependence
6114  * from node[j] to node[i], forcing node[i] to follow node[j].
6115  */
cluster_follows(int i,int j,void * user)6116 static isl_bool cluster_follows(int i, int j, void *user)
6117 {
6118 	struct isl_mark_merge_sccs_data *data = user;
6119 	struct isl_sched_graph *graph = data->graph;
6120 	int *scc_cluster = data->scc_cluster;
6121 
6122 	if (data->src == i && data->dst == j)
6123 		return isl_bool_true;
6124 	if (data->src == j && data->dst == i)
6125 		return isl_bool_true;
6126 	if (scc_cluster[graph->node[i].scc] == scc_cluster[graph->node[j].scc])
6127 		return isl_bool_true;
6128 
6129 	return graph_has_validity_edge(graph, &graph->node[j], &graph->node[i]);
6130 }
6131 
6132 /* Mark all SCCs that belong to either of the two clusters in "c"
6133  * connected by the edge in "graph" with index "edge", or to any
6134  * of the intermediate clusters.
6135  * The marking is recorded in c->scc_in_merge.
6136  *
6137  * The given edge has been selected for merging two clusters,
6138  * meaning that there is at least a proximity edge between the two nodes.
6139  * However, there may also be (indirect) validity dependences
6140  * between the two nodes.  When merging the two clusters, all clusters
6141  * containing one or more of the intermediate nodes along the
6142  * indirect validity dependences need to be merged in as well.
6143  *
6144  * First collect all such nodes by computing the strongly connected
6145  * component (SCC) containing the two nodes connected by the edge, where
6146  * the two nodes are considered to depend on each other to make
6147  * sure they end up in the same SCC.  Similarly, each node is considered
6148  * to depend on every other node in the same cluster to ensure
6149  * that the SCC consists of complete clusters.
6150  *
6151  * Then the original SCCs that contain any of these nodes are marked
6152  * in c->scc_in_merge.
6153  */
mark_merge_sccs(isl_ctx * ctx,struct isl_sched_graph * graph,int edge,struct isl_clustering * c)6154 static isl_stat mark_merge_sccs(isl_ctx *ctx, struct isl_sched_graph *graph,
6155 	int edge, struct isl_clustering *c)
6156 {
6157 	struct isl_mark_merge_sccs_data data;
6158 	struct isl_tarjan_graph *g;
6159 	int i;
6160 
6161 	for (i = 0; i < c->n; ++i)
6162 		c->scc_in_merge[i] = 0;
6163 
6164 	data.graph = graph;
6165 	data.scc_cluster = c->scc_cluster;
6166 	data.src = graph->edge[edge].src - graph->node;
6167 	data.dst = graph->edge[edge].dst - graph->node;
6168 
6169 	g = isl_tarjan_graph_component(ctx, graph->n, data.dst,
6170 					&cluster_follows, &data);
6171 	if (!g)
6172 		goto error;
6173 
6174 	i = g->op;
6175 	if (i < 3)
6176 		isl_die(ctx, isl_error_internal,
6177 			"expecting at least two nodes in component",
6178 			goto error);
6179 	if (g->order[--i] != -1)
6180 		isl_die(ctx, isl_error_internal,
6181 			"expecting end of component marker", goto error);
6182 
6183 	for (--i; i >= 0 && g->order[i] != -1; --i) {
6184 		int scc = graph->node[g->order[i]].scc;
6185 		c->scc_in_merge[scc] = 1;
6186 	}
6187 
6188 	isl_tarjan_graph_free(g);
6189 	return isl_stat_ok;
6190 error:
6191 	isl_tarjan_graph_free(g);
6192 	return isl_stat_error;
6193 }
6194 
6195 /* Construct the identifier "cluster_i".
6196  */
cluster_id(isl_ctx * ctx,int i)6197 static __isl_give isl_id *cluster_id(isl_ctx *ctx, int i)
6198 {
6199 	char name[40];
6200 
6201 	snprintf(name, sizeof(name), "cluster_%d", i);
6202 	return isl_id_alloc(ctx, name, NULL);
6203 }
6204 
6205 /* Construct the space of the cluster with index "i" containing
6206  * the strongly connected component "scc".
6207  *
6208  * In particular, construct a space called cluster_i with dimension equal
6209  * to the number of schedule rows in the current band of "scc".
6210  */
cluster_space(struct isl_sched_graph * scc,int i)6211 static __isl_give isl_space *cluster_space(struct isl_sched_graph *scc, int i)
6212 {
6213 	int nvar;
6214 	isl_space *space;
6215 	isl_id *id;
6216 
6217 	nvar = scc->n_total_row - scc->band_start;
6218 	space = isl_space_copy(scc->node[0].space);
6219 	space = isl_space_params(space);
6220 	space = isl_space_set_from_params(space);
6221 	space = isl_space_add_dims(space, isl_dim_set, nvar);
6222 	id = cluster_id(isl_space_get_ctx(space), i);
6223 	space = isl_space_set_tuple_id(space, isl_dim_set, id);
6224 
6225 	return space;
6226 }
6227 
6228 /* Collect the domain of the graph for merging clusters.
6229  *
6230  * In particular, for each cluster with first SCC "i", construct
6231  * a set in the space called cluster_i with dimension equal
6232  * to the number of schedule rows in the current band of the cluster.
6233  */
collect_domain(isl_ctx * ctx,struct isl_sched_graph * graph,struct isl_clustering * c)6234 static __isl_give isl_union_set *collect_domain(isl_ctx *ctx,
6235 	struct isl_sched_graph *graph, struct isl_clustering *c)
6236 {
6237 	int i;
6238 	isl_space *space;
6239 	isl_union_set *domain;
6240 
6241 	space = isl_space_params_alloc(ctx, 0);
6242 	domain = isl_union_set_empty(space);
6243 
6244 	for (i = 0; i < graph->scc; ++i) {
6245 		isl_space *space;
6246 
6247 		if (!c->scc_in_merge[i])
6248 			continue;
6249 		if (c->scc_cluster[i] != i)
6250 			continue;
6251 		space = cluster_space(&c->scc[i], i);
6252 		domain = isl_union_set_add_set(domain, isl_set_universe(space));
6253 	}
6254 
6255 	return domain;
6256 }
6257 
6258 /* Construct a map from the original instances to the corresponding
6259  * cluster instance in the current bands of the clusters in "c".
6260  */
collect_cluster_map(isl_ctx * ctx,struct isl_sched_graph * graph,struct isl_clustering * c)6261 static __isl_give isl_union_map *collect_cluster_map(isl_ctx *ctx,
6262 	struct isl_sched_graph *graph, struct isl_clustering *c)
6263 {
6264 	int i, j;
6265 	isl_space *space;
6266 	isl_union_map *cluster_map;
6267 
6268 	space = isl_space_params_alloc(ctx, 0);
6269 	cluster_map = isl_union_map_empty(space);
6270 	for (i = 0; i < graph->scc; ++i) {
6271 		int start, n;
6272 		isl_id *id;
6273 
6274 		if (!c->scc_in_merge[i])
6275 			continue;
6276 
6277 		id = cluster_id(ctx, c->scc_cluster[i]);
6278 		start = c->scc[i].band_start;
6279 		n = c->scc[i].n_total_row - start;
6280 		for (j = 0; j < c->scc[i].n; ++j) {
6281 			isl_multi_aff *ma;
6282 			isl_map *map;
6283 			struct isl_sched_node *node = &c->scc[i].node[j];
6284 
6285 			ma = node_extract_partial_schedule_multi_aff(node,
6286 								    start, n);
6287 			ma = isl_multi_aff_set_tuple_id(ma, isl_dim_out,
6288 							    isl_id_copy(id));
6289 			map = isl_map_from_multi_aff(ma);
6290 			cluster_map = isl_union_map_add_map(cluster_map, map);
6291 		}
6292 		isl_id_free(id);
6293 	}
6294 
6295 	return cluster_map;
6296 }
6297 
6298 /* Add "umap" to the schedule constraints "sc" of all types of "edge"
6299  * that are not isl_edge_condition or isl_edge_conditional_validity.
6300  */
add_non_conditional_constraints(struct isl_sched_edge * edge,__isl_keep isl_union_map * umap,__isl_take isl_schedule_constraints * sc)6301 static __isl_give isl_schedule_constraints *add_non_conditional_constraints(
6302 	struct isl_sched_edge *edge, __isl_keep isl_union_map *umap,
6303 	__isl_take isl_schedule_constraints *sc)
6304 {
6305 	enum isl_edge_type t;
6306 
6307 	if (!sc)
6308 		return NULL;
6309 
6310 	for (t = isl_edge_first; t <= isl_edge_last; ++t) {
6311 		if (t == isl_edge_condition ||
6312 		    t == isl_edge_conditional_validity)
6313 			continue;
6314 		if (!is_type(edge, t))
6315 			continue;
6316 		sc = isl_schedule_constraints_add(sc, t,
6317 						    isl_union_map_copy(umap));
6318 	}
6319 
6320 	return sc;
6321 }
6322 
6323 /* Add schedule constraints of types isl_edge_condition and
6324  * isl_edge_conditional_validity to "sc" by applying "umap" to
6325  * the domains of the wrapped relations in domain and range
6326  * of the corresponding tagged constraints of "edge".
6327  */
add_conditional_constraints(struct isl_sched_edge * edge,__isl_keep isl_union_map * umap,__isl_take isl_schedule_constraints * sc)6328 static __isl_give isl_schedule_constraints *add_conditional_constraints(
6329 	struct isl_sched_edge *edge, __isl_keep isl_union_map *umap,
6330 	__isl_take isl_schedule_constraints *sc)
6331 {
6332 	enum isl_edge_type t;
6333 	isl_union_map *tagged;
6334 
6335 	for (t = isl_edge_condition; t <= isl_edge_conditional_validity; ++t) {
6336 		if (!is_type(edge, t))
6337 			continue;
6338 		if (t == isl_edge_condition)
6339 			tagged = isl_union_map_copy(edge->tagged_condition);
6340 		else
6341 			tagged = isl_union_map_copy(edge->tagged_validity);
6342 		tagged = isl_union_map_zip(tagged);
6343 		tagged = isl_union_map_apply_domain(tagged,
6344 					isl_union_map_copy(umap));
6345 		tagged = isl_union_map_zip(tagged);
6346 		sc = isl_schedule_constraints_add(sc, t, tagged);
6347 		if (!sc)
6348 			return NULL;
6349 	}
6350 
6351 	return sc;
6352 }
6353 
6354 /* Given a mapping "cluster_map" from the original instances to
6355  * the cluster instances, add schedule constraints on the clusters
6356  * to "sc" corresponding to the original constraints represented by "edge".
6357  *
6358  * For non-tagged dependence constraints, the cluster constraints
6359  * are obtained by applying "cluster_map" to the edge->map.
6360  *
6361  * For tagged dependence constraints, "cluster_map" needs to be applied
6362  * to the domains of the wrapped relations in domain and range
6363  * of the tagged dependence constraints.  Pick out the mappings
6364  * from these domains from "cluster_map" and construct their product.
6365  * This mapping can then be applied to the pair of domains.
6366  */
collect_edge_constraints(struct isl_sched_edge * edge,__isl_keep isl_union_map * cluster_map,__isl_take isl_schedule_constraints * sc)6367 static __isl_give isl_schedule_constraints *collect_edge_constraints(
6368 	struct isl_sched_edge *edge, __isl_keep isl_union_map *cluster_map,
6369 	__isl_take isl_schedule_constraints *sc)
6370 {
6371 	isl_union_map *umap;
6372 	isl_space *space;
6373 	isl_union_set *uset;
6374 	isl_union_map *umap1, *umap2;
6375 
6376 	if (!sc)
6377 		return NULL;
6378 
6379 	umap = isl_union_map_from_map(isl_map_copy(edge->map));
6380 	umap = isl_union_map_apply_domain(umap,
6381 				isl_union_map_copy(cluster_map));
6382 	umap = isl_union_map_apply_range(umap,
6383 				isl_union_map_copy(cluster_map));
6384 	sc = add_non_conditional_constraints(edge, umap, sc);
6385 	isl_union_map_free(umap);
6386 
6387 	if (!sc || (!is_condition(edge) && !is_conditional_validity(edge)))
6388 		return sc;
6389 
6390 	space = isl_space_domain(isl_map_get_space(edge->map));
6391 	uset = isl_union_set_from_set(isl_set_universe(space));
6392 	umap1 = isl_union_map_copy(cluster_map);
6393 	umap1 = isl_union_map_intersect_domain(umap1, uset);
6394 	space = isl_space_range(isl_map_get_space(edge->map));
6395 	uset = isl_union_set_from_set(isl_set_universe(space));
6396 	umap2 = isl_union_map_copy(cluster_map);
6397 	umap2 = isl_union_map_intersect_domain(umap2, uset);
6398 	umap = isl_union_map_product(umap1, umap2);
6399 
6400 	sc = add_conditional_constraints(edge, umap, sc);
6401 
6402 	isl_union_map_free(umap);
6403 	return sc;
6404 }
6405 
6406 /* Given a mapping "cluster_map" from the original instances to
6407  * the cluster instances, add schedule constraints on the clusters
6408  * to "sc" corresponding to all edges in "graph" between nodes that
6409  * belong to SCCs that are marked for merging in "scc_in_merge".
6410  */
collect_constraints(struct isl_sched_graph * graph,int * scc_in_merge,__isl_keep isl_union_map * cluster_map,__isl_take isl_schedule_constraints * sc)6411 static __isl_give isl_schedule_constraints *collect_constraints(
6412 	struct isl_sched_graph *graph, int *scc_in_merge,
6413 	__isl_keep isl_union_map *cluster_map,
6414 	__isl_take isl_schedule_constraints *sc)
6415 {
6416 	int i;
6417 
6418 	for (i = 0; i < graph->n_edge; ++i) {
6419 		struct isl_sched_edge *edge = &graph->edge[i];
6420 
6421 		if (!scc_in_merge[edge->src->scc])
6422 			continue;
6423 		if (!scc_in_merge[edge->dst->scc])
6424 			continue;
6425 		sc = collect_edge_constraints(edge, cluster_map, sc);
6426 	}
6427 
6428 	return sc;
6429 }
6430 
6431 /* Construct a dependence graph for scheduling clusters with respect
6432  * to each other and store the result in "merge_graph".
6433  * In particular, the nodes of the graph correspond to the schedule
6434  * dimensions of the current bands of those clusters that have been
6435  * marked for merging in "c".
6436  *
6437  * First construct an isl_schedule_constraints object for this domain
6438  * by transforming the edges in "graph" to the domain.
6439  * Then initialize a dependence graph for scheduling from these
6440  * constraints.
6441  */
init_merge_graph(isl_ctx * ctx,struct isl_sched_graph * graph,struct isl_clustering * c,struct isl_sched_graph * merge_graph)6442 static isl_stat init_merge_graph(isl_ctx *ctx, struct isl_sched_graph *graph,
6443 	struct isl_clustering *c, struct isl_sched_graph *merge_graph)
6444 {
6445 	isl_union_set *domain;
6446 	isl_union_map *cluster_map;
6447 	isl_schedule_constraints *sc;
6448 	isl_stat r;
6449 
6450 	domain = collect_domain(ctx, graph, c);
6451 	sc = isl_schedule_constraints_on_domain(domain);
6452 	if (!sc)
6453 		return isl_stat_error;
6454 	cluster_map = collect_cluster_map(ctx, graph, c);
6455 	sc = collect_constraints(graph, c->scc_in_merge, cluster_map, sc);
6456 	isl_union_map_free(cluster_map);
6457 
6458 	r = graph_init(merge_graph, sc);
6459 
6460 	isl_schedule_constraints_free(sc);
6461 
6462 	return r;
6463 }
6464 
6465 /* Compute the maximal number of remaining schedule rows that still need
6466  * to be computed for the nodes that belong to clusters with the maximal
6467  * dimension for the current band (i.e., the band that is to be merged).
6468  * Only clusters that are about to be merged are considered.
6469  * "maxvar" is the maximal dimension for the current band.
6470  * "c" contains information about the clusters.
6471  *
6472  * Return the maximal number of remaining schedule rows or -1 on error.
6473  */
compute_maxvar_max_slack(int maxvar,struct isl_clustering * c)6474 static int compute_maxvar_max_slack(int maxvar, struct isl_clustering *c)
6475 {
6476 	int i, j;
6477 	int max_slack;
6478 
6479 	max_slack = 0;
6480 	for (i = 0; i < c->n; ++i) {
6481 		int nvar;
6482 		struct isl_sched_graph *scc;
6483 
6484 		if (!c->scc_in_merge[i])
6485 			continue;
6486 		scc = &c->scc[i];
6487 		nvar = scc->n_total_row - scc->band_start;
6488 		if (nvar != maxvar)
6489 			continue;
6490 		for (j = 0; j < scc->n; ++j) {
6491 			struct isl_sched_node *node = &scc->node[j];
6492 			int slack;
6493 
6494 			if (node_update_vmap(node) < 0)
6495 				return -1;
6496 			slack = node->nvar - node->rank;
6497 			if (slack > max_slack)
6498 				max_slack = slack;
6499 		}
6500 	}
6501 
6502 	return max_slack;
6503 }
6504 
6505 /* If there are any clusters where the dimension of the current band
6506  * (i.e., the band that is to be merged) is smaller than "maxvar" and
6507  * if there are any nodes in such a cluster where the number
6508  * of remaining schedule rows that still need to be computed
6509  * is greater than "max_slack", then return the smallest current band
6510  * dimension of all these clusters.  Otherwise return the original value
6511  * of "maxvar".  Return -1 in case of any error.
6512  * Only clusters that are about to be merged are considered.
6513  * "c" contains information about the clusters.
6514  */
limit_maxvar_to_slack(int maxvar,int max_slack,struct isl_clustering * c)6515 static int limit_maxvar_to_slack(int maxvar, int max_slack,
6516 	struct isl_clustering *c)
6517 {
6518 	int i, j;
6519 
6520 	for (i = 0; i < c->n; ++i) {
6521 		int nvar;
6522 		struct isl_sched_graph *scc;
6523 
6524 		if (!c->scc_in_merge[i])
6525 			continue;
6526 		scc = &c->scc[i];
6527 		nvar = scc->n_total_row - scc->band_start;
6528 		if (nvar >= maxvar)
6529 			continue;
6530 		for (j = 0; j < scc->n; ++j) {
6531 			struct isl_sched_node *node = &scc->node[j];
6532 			int slack;
6533 
6534 			if (node_update_vmap(node) < 0)
6535 				return -1;
6536 			slack = node->nvar - node->rank;
6537 			if (slack > max_slack) {
6538 				maxvar = nvar;
6539 				break;
6540 			}
6541 		}
6542 	}
6543 
6544 	return maxvar;
6545 }
6546 
6547 /* Adjust merge_graph->maxvar based on the number of remaining schedule rows
6548  * that still need to be computed.  In particular, if there is a node
6549  * in a cluster where the dimension of the current band is smaller
6550  * than merge_graph->maxvar, but the number of remaining schedule rows
6551  * is greater than that of any node in a cluster with the maximal
6552  * dimension for the current band (i.e., merge_graph->maxvar),
6553  * then adjust merge_graph->maxvar to the (smallest) current band dimension
6554  * of those clusters.  Without this adjustment, the total number of
6555  * schedule dimensions would be increased, resulting in a skewed view
6556  * of the number of coincident dimensions.
6557  * "c" contains information about the clusters.
6558  *
6559  * If the maximize_band_depth option is set and merge_graph->maxvar is reduced,
6560  * then there is no point in attempting any merge since it will be rejected
6561  * anyway.  Set merge_graph->maxvar to zero in such cases.
6562  */
adjust_maxvar_to_slack(isl_ctx * ctx,struct isl_sched_graph * merge_graph,struct isl_clustering * c)6563 static isl_stat adjust_maxvar_to_slack(isl_ctx *ctx,
6564 	struct isl_sched_graph *merge_graph, struct isl_clustering *c)
6565 {
6566 	int max_slack, maxvar;
6567 
6568 	max_slack = compute_maxvar_max_slack(merge_graph->maxvar, c);
6569 	if (max_slack < 0)
6570 		return isl_stat_error;
6571 	maxvar = limit_maxvar_to_slack(merge_graph->maxvar, max_slack, c);
6572 	if (maxvar < 0)
6573 		return isl_stat_error;
6574 
6575 	if (maxvar < merge_graph->maxvar) {
6576 		if (isl_options_get_schedule_maximize_band_depth(ctx))
6577 			merge_graph->maxvar = 0;
6578 		else
6579 			merge_graph->maxvar = maxvar;
6580 	}
6581 
6582 	return isl_stat_ok;
6583 }
6584 
6585 /* Return the number of coincident dimensions in the current band of "graph",
6586  * where the nodes of "graph" are assumed to be scheduled by a single band.
6587  */
get_n_coincident(struct isl_sched_graph * graph)6588 static int get_n_coincident(struct isl_sched_graph *graph)
6589 {
6590 	int i;
6591 
6592 	for (i = graph->band_start; i < graph->n_total_row; ++i)
6593 		if (!graph->node[0].coincident[i])
6594 			break;
6595 
6596 	return i - graph->band_start;
6597 }
6598 
6599 /* Should the clusters be merged based on the cluster schedule
6600  * in the current (and only) band of "merge_graph", given that
6601  * coincidence should be maximized?
6602  *
6603  * If the number of coincident schedule dimensions in the merged band
6604  * would be less than the maximal number of coincident schedule dimensions
6605  * in any of the merged clusters, then the clusters should not be merged.
6606  */
ok_to_merge_coincident(struct isl_clustering * c,struct isl_sched_graph * merge_graph)6607 static isl_bool ok_to_merge_coincident(struct isl_clustering *c,
6608 	struct isl_sched_graph *merge_graph)
6609 {
6610 	int i;
6611 	int n_coincident;
6612 	int max_coincident;
6613 
6614 	max_coincident = 0;
6615 	for (i = 0; i < c->n; ++i) {
6616 		if (!c->scc_in_merge[i])
6617 			continue;
6618 		n_coincident = get_n_coincident(&c->scc[i]);
6619 		if (n_coincident > max_coincident)
6620 			max_coincident = n_coincident;
6621 	}
6622 
6623 	n_coincident = get_n_coincident(merge_graph);
6624 
6625 	return isl_bool_ok(n_coincident >= max_coincident);
6626 }
6627 
6628 /* Return the transformation on "node" expressed by the current (and only)
6629  * band of "merge_graph" applied to the clusters in "c".
6630  *
6631  * First find the representation of "node" in its SCC in "c" and
6632  * extract the transformation expressed by the current band.
6633  * Then extract the transformation applied by "merge_graph"
6634  * to the cluster to which this SCC belongs.
6635  * Combine the two to obtain the complete transformation on the node.
6636  *
6637  * Note that the range of the first transformation is an anonymous space,
6638  * while the domain of the second is named "cluster_X".  The range
6639  * of the former therefore needs to be adjusted before the two
6640  * can be combined.
6641  */
extract_node_transformation(isl_ctx * ctx,struct isl_sched_node * node,struct isl_clustering * c,struct isl_sched_graph * merge_graph)6642 static __isl_give isl_map *extract_node_transformation(isl_ctx *ctx,
6643 	struct isl_sched_node *node, struct isl_clustering *c,
6644 	struct isl_sched_graph *merge_graph)
6645 {
6646 	struct isl_sched_node *scc_node, *cluster_node;
6647 	int start, n;
6648 	isl_id *id;
6649 	isl_space *space;
6650 	isl_multi_aff *ma, *ma2;
6651 
6652 	scc_node = graph_find_node(ctx, &c->scc[node->scc], node->space);
6653 	if (scc_node && !is_node(&c->scc[node->scc], scc_node))
6654 		isl_die(ctx, isl_error_internal, "unable to find node",
6655 			return NULL);
6656 	start = c->scc[node->scc].band_start;
6657 	n = c->scc[node->scc].n_total_row - start;
6658 	ma = node_extract_partial_schedule_multi_aff(scc_node, start, n);
6659 	space = cluster_space(&c->scc[node->scc], c->scc_cluster[node->scc]);
6660 	cluster_node = graph_find_node(ctx, merge_graph, space);
6661 	if (cluster_node && !is_node(merge_graph, cluster_node))
6662 		isl_die(ctx, isl_error_internal, "unable to find cluster",
6663 			space = isl_space_free(space));
6664 	id = isl_space_get_tuple_id(space, isl_dim_set);
6665 	ma = isl_multi_aff_set_tuple_id(ma, isl_dim_out, id);
6666 	isl_space_free(space);
6667 	n = merge_graph->n_total_row;
6668 	ma2 = node_extract_partial_schedule_multi_aff(cluster_node, 0, n);
6669 	ma = isl_multi_aff_pullback_multi_aff(ma2, ma);
6670 
6671 	return isl_map_from_multi_aff(ma);
6672 }
6673 
6674 /* Give a set of distances "set", are they bounded by a small constant
6675  * in direction "pos"?
6676  * In practice, check if they are bounded by 2 by checking that there
6677  * are no elements with a value greater than or equal to 3 or
6678  * smaller than or equal to -3.
6679  */
distance_is_bounded(__isl_keep isl_set * set,int pos)6680 static isl_bool distance_is_bounded(__isl_keep isl_set *set, int pos)
6681 {
6682 	isl_bool bounded;
6683 	isl_set *test;
6684 
6685 	if (!set)
6686 		return isl_bool_error;
6687 
6688 	test = isl_set_copy(set);
6689 	test = isl_set_lower_bound_si(test, isl_dim_set, pos, 3);
6690 	bounded = isl_set_is_empty(test);
6691 	isl_set_free(test);
6692 
6693 	if (bounded < 0 || !bounded)
6694 		return bounded;
6695 
6696 	test = isl_set_copy(set);
6697 	test = isl_set_upper_bound_si(test, isl_dim_set, pos, -3);
6698 	bounded = isl_set_is_empty(test);
6699 	isl_set_free(test);
6700 
6701 	return bounded;
6702 }
6703 
6704 /* Does the set "set" have a fixed (but possible parametric) value
6705  * at dimension "pos"?
6706  */
has_single_value(__isl_keep isl_set * set,int pos)6707 static isl_bool has_single_value(__isl_keep isl_set *set, int pos)
6708 {
6709 	isl_size n;
6710 	isl_bool single;
6711 
6712 	n = isl_set_dim(set, isl_dim_set);
6713 	if (n < 0)
6714 		return isl_bool_error;
6715 	set = isl_set_copy(set);
6716 	set = isl_set_project_out(set, isl_dim_set, pos + 1, n - (pos + 1));
6717 	set = isl_set_project_out(set, isl_dim_set, 0, pos);
6718 	single = isl_set_is_singleton(set);
6719 	isl_set_free(set);
6720 
6721 	return single;
6722 }
6723 
6724 /* Does "map" have a fixed (but possible parametric) value
6725  * at dimension "pos" of either its domain or its range?
6726  */
has_singular_src_or_dst(__isl_keep isl_map * map,int pos)6727 static isl_bool has_singular_src_or_dst(__isl_keep isl_map *map, int pos)
6728 {
6729 	isl_set *set;
6730 	isl_bool single;
6731 
6732 	set = isl_map_domain(isl_map_copy(map));
6733 	single = has_single_value(set, pos);
6734 	isl_set_free(set);
6735 
6736 	if (single < 0 || single)
6737 		return single;
6738 
6739 	set = isl_map_range(isl_map_copy(map));
6740 	single = has_single_value(set, pos);
6741 	isl_set_free(set);
6742 
6743 	return single;
6744 }
6745 
6746 /* Does the edge "edge" from "graph" have bounded dependence distances
6747  * in the merged graph "merge_graph" of a selection of clusters in "c"?
6748  *
6749  * Extract the complete transformations of the source and destination
6750  * nodes of the edge, apply them to the edge constraints and
6751  * compute the differences.  Finally, check if these differences are bounded
6752  * in each direction.
6753  *
6754  * If the dimension of the band is greater than the number of
6755  * dimensions that can be expected to be optimized by the edge
6756  * (based on its weight), then also allow the differences to be unbounded
6757  * in the remaining dimensions, but only if either the source or
6758  * the destination has a fixed value in that direction.
6759  * This allows a statement that produces values that are used by
6760  * several instances of another statement to be merged with that
6761  * other statement.
6762  * However, merging such clusters will introduce an inherently
6763  * large proximity distance inside the merged cluster, meaning
6764  * that proximity distances will no longer be optimized in
6765  * subsequent merges.  These merges are therefore only allowed
6766  * after all other possible merges have been tried.
6767  * The first time such a merge is encountered, the weight of the edge
6768  * is replaced by a negative weight.  The second time (i.e., after
6769  * all merges over edges with a non-negative weight have been tried),
6770  * the merge is allowed.
6771  */
has_bounded_distances(isl_ctx * ctx,struct isl_sched_edge * edge,struct isl_sched_graph * graph,struct isl_clustering * c,struct isl_sched_graph * merge_graph)6772 static isl_bool has_bounded_distances(isl_ctx *ctx, struct isl_sched_edge *edge,
6773 	struct isl_sched_graph *graph, struct isl_clustering *c,
6774 	struct isl_sched_graph *merge_graph)
6775 {
6776 	int i, n_slack;
6777 	isl_size n;
6778 	isl_bool bounded;
6779 	isl_map *map, *t;
6780 	isl_set *dist;
6781 
6782 	map = isl_map_copy(edge->map);
6783 	t = extract_node_transformation(ctx, edge->src, c, merge_graph);
6784 	map = isl_map_apply_domain(map, t);
6785 	t = extract_node_transformation(ctx, edge->dst, c, merge_graph);
6786 	map = isl_map_apply_range(map, t);
6787 	dist = isl_map_deltas(isl_map_copy(map));
6788 
6789 	bounded = isl_bool_true;
6790 	n = isl_set_dim(dist, isl_dim_set);
6791 	if (n < 0)
6792 		goto error;
6793 	n_slack = n - edge->weight;
6794 	if (edge->weight < 0)
6795 		n_slack -= graph->max_weight + 1;
6796 	for (i = 0; i < n; ++i) {
6797 		isl_bool bounded_i, singular_i;
6798 
6799 		bounded_i = distance_is_bounded(dist, i);
6800 		if (bounded_i < 0)
6801 			goto error;
6802 		if (bounded_i)
6803 			continue;
6804 		if (edge->weight >= 0)
6805 			bounded = isl_bool_false;
6806 		n_slack--;
6807 		if (n_slack < 0)
6808 			break;
6809 		singular_i = has_singular_src_or_dst(map, i);
6810 		if (singular_i < 0)
6811 			goto error;
6812 		if (singular_i)
6813 			continue;
6814 		bounded = isl_bool_false;
6815 		break;
6816 	}
6817 	if (!bounded && i >= n && edge->weight >= 0)
6818 		edge->weight -= graph->max_weight + 1;
6819 	isl_map_free(map);
6820 	isl_set_free(dist);
6821 
6822 	return bounded;
6823 error:
6824 	isl_map_free(map);
6825 	isl_set_free(dist);
6826 	return isl_bool_error;
6827 }
6828 
6829 /* Should the clusters be merged based on the cluster schedule
6830  * in the current (and only) band of "merge_graph"?
6831  * "graph" is the original dependence graph, while "c" records
6832  * which SCCs are involved in the latest merge.
6833  *
6834  * In particular, is there at least one proximity constraint
6835  * that is optimized by the merge?
6836  *
6837  * A proximity constraint is considered to be optimized
6838  * if the dependence distances are small.
6839  */
ok_to_merge_proximity(isl_ctx * ctx,struct isl_sched_graph * graph,struct isl_clustering * c,struct isl_sched_graph * merge_graph)6840 static isl_bool ok_to_merge_proximity(isl_ctx *ctx,
6841 	struct isl_sched_graph *graph, struct isl_clustering *c,
6842 	struct isl_sched_graph *merge_graph)
6843 {
6844 	int i;
6845 
6846 	for (i = 0; i < graph->n_edge; ++i) {
6847 		struct isl_sched_edge *edge = &graph->edge[i];
6848 		isl_bool bounded;
6849 
6850 		if (!is_proximity(edge))
6851 			continue;
6852 		if (!c->scc_in_merge[edge->src->scc])
6853 			continue;
6854 		if (!c->scc_in_merge[edge->dst->scc])
6855 			continue;
6856 		if (c->scc_cluster[edge->dst->scc] ==
6857 		    c->scc_cluster[edge->src->scc])
6858 			continue;
6859 		bounded = has_bounded_distances(ctx, edge, graph, c,
6860 						merge_graph);
6861 		if (bounded < 0 || bounded)
6862 			return bounded;
6863 	}
6864 
6865 	return isl_bool_false;
6866 }
6867 
6868 /* Should the clusters be merged based on the cluster schedule
6869  * in the current (and only) band of "merge_graph"?
6870  * "graph" is the original dependence graph, while "c" records
6871  * which SCCs are involved in the latest merge.
6872  *
6873  * If the current band is empty, then the clusters should not be merged.
6874  *
6875  * If the band depth should be maximized and the merge schedule
6876  * is incomplete (meaning that the dimension of some of the schedule
6877  * bands in the original schedule will be reduced), then the clusters
6878  * should not be merged.
6879  *
6880  * If the schedule_maximize_coincidence option is set, then check that
6881  * the number of coincident schedule dimensions is not reduced.
6882  *
6883  * Finally, only allow the merge if at least one proximity
6884  * constraint is optimized.
6885  */
ok_to_merge(isl_ctx * ctx,struct isl_sched_graph * graph,struct isl_clustering * c,struct isl_sched_graph * merge_graph)6886 static isl_bool ok_to_merge(isl_ctx *ctx, struct isl_sched_graph *graph,
6887 	struct isl_clustering *c, struct isl_sched_graph *merge_graph)
6888 {
6889 	if (merge_graph->n_total_row == merge_graph->band_start)
6890 		return isl_bool_false;
6891 
6892 	if (isl_options_get_schedule_maximize_band_depth(ctx) &&
6893 	    merge_graph->n_total_row < merge_graph->maxvar)
6894 		return isl_bool_false;
6895 
6896 	if (isl_options_get_schedule_maximize_coincidence(ctx)) {
6897 		isl_bool ok;
6898 
6899 		ok = ok_to_merge_coincident(c, merge_graph);
6900 		if (ok < 0 || !ok)
6901 			return ok;
6902 	}
6903 
6904 	return ok_to_merge_proximity(ctx, graph, c, merge_graph);
6905 }
6906 
6907 /* Apply the schedule in "t_node" to the "n" rows starting at "first"
6908  * of the schedule in "node" and return the result.
6909  *
6910  * That is, essentially compute
6911  *
6912  *	T * N(first:first+n-1)
6913  *
6914  * taking into account the constant term and the parameter coefficients
6915  * in "t_node".
6916  */
node_transformation(isl_ctx * ctx,struct isl_sched_node * t_node,struct isl_sched_node * node,int first,int n)6917 static __isl_give isl_mat *node_transformation(isl_ctx *ctx,
6918 	struct isl_sched_node *t_node, struct isl_sched_node *node,
6919 	int first, int n)
6920 {
6921 	int i, j;
6922 	isl_mat *t;
6923 	isl_size n_row, n_col;
6924 	int n_param, n_var;
6925 
6926 	n_param = node->nparam;
6927 	n_var = node->nvar;
6928 	n_row = isl_mat_rows(t_node->sched);
6929 	n_col = isl_mat_cols(node->sched);
6930 	if (n_row < 0 || n_col < 0)
6931 		return NULL;
6932 	t = isl_mat_alloc(ctx, n_row, n_col);
6933 	if (!t)
6934 		return NULL;
6935 	for (i = 0; i < n_row; ++i) {
6936 		isl_seq_cpy(t->row[i], t_node->sched->row[i], 1 + n_param);
6937 		isl_seq_clr(t->row[i] + 1 + n_param, n_var);
6938 		for (j = 0; j < n; ++j)
6939 			isl_seq_addmul(t->row[i],
6940 					t_node->sched->row[i][1 + n_param + j],
6941 					node->sched->row[first + j],
6942 					1 + n_param + n_var);
6943 	}
6944 	return t;
6945 }
6946 
6947 /* Apply the cluster schedule in "t_node" to the current band
6948  * schedule of the nodes in "graph".
6949  *
6950  * In particular, replace the rows starting at band_start
6951  * by the result of applying the cluster schedule in "t_node"
6952  * to the original rows.
6953  *
6954  * The coincidence of the schedule is determined by the coincidence
6955  * of the cluster schedule.
6956  */
transform(isl_ctx * ctx,struct isl_sched_graph * graph,struct isl_sched_node * t_node)6957 static isl_stat transform(isl_ctx *ctx, struct isl_sched_graph *graph,
6958 	struct isl_sched_node *t_node)
6959 {
6960 	int i, j;
6961 	isl_size n_new;
6962 	int start, n;
6963 
6964 	start = graph->band_start;
6965 	n = graph->n_total_row - start;
6966 
6967 	n_new = isl_mat_rows(t_node->sched);
6968 	if (n_new < 0)
6969 		return isl_stat_error;
6970 	for (i = 0; i < graph->n; ++i) {
6971 		struct isl_sched_node *node = &graph->node[i];
6972 		isl_mat *t;
6973 
6974 		t = node_transformation(ctx, t_node, node, start, n);
6975 		node->sched = isl_mat_drop_rows(node->sched, start, n);
6976 		node->sched = isl_mat_concat(node->sched, t);
6977 		node->sched_map = isl_map_free(node->sched_map);
6978 		if (!node->sched)
6979 			return isl_stat_error;
6980 		for (j = 0; j < n_new; ++j)
6981 			node->coincident[start + j] = t_node->coincident[j];
6982 	}
6983 	graph->n_total_row -= n;
6984 	graph->n_row -= n;
6985 	graph->n_total_row += n_new;
6986 	graph->n_row += n_new;
6987 
6988 	return isl_stat_ok;
6989 }
6990 
6991 /* Merge the clusters marked for merging in "c" into a single
6992  * cluster using the cluster schedule in the current band of "merge_graph".
6993  * The representative SCC for the new cluster is the SCC with
6994  * the smallest index.
6995  *
6996  * The current band schedule of each SCC in the new cluster is obtained
6997  * by applying the schedule of the corresponding original cluster
6998  * to the original band schedule.
6999  * All SCCs in the new cluster have the same number of schedule rows.
7000  */
merge(isl_ctx * ctx,struct isl_clustering * c,struct isl_sched_graph * merge_graph)7001 static isl_stat merge(isl_ctx *ctx, struct isl_clustering *c,
7002 	struct isl_sched_graph *merge_graph)
7003 {
7004 	int i;
7005 	int cluster = -1;
7006 	isl_space *space;
7007 
7008 	for (i = 0; i < c->n; ++i) {
7009 		struct isl_sched_node *node;
7010 
7011 		if (!c->scc_in_merge[i])
7012 			continue;
7013 		if (cluster < 0)
7014 			cluster = i;
7015 		space = cluster_space(&c->scc[i], c->scc_cluster[i]);
7016 		node = graph_find_node(ctx, merge_graph, space);
7017 		isl_space_free(space);
7018 		if (!node)
7019 			return isl_stat_error;
7020 		if (!is_node(merge_graph, node))
7021 			isl_die(ctx, isl_error_internal,
7022 				"unable to find cluster",
7023 				return isl_stat_error);
7024 		if (transform(ctx, &c->scc[i], node) < 0)
7025 			return isl_stat_error;
7026 		c->scc_cluster[i] = cluster;
7027 	}
7028 
7029 	return isl_stat_ok;
7030 }
7031 
7032 /* Try and merge the clusters of SCCs marked in c->scc_in_merge
7033  * by scheduling the current cluster bands with respect to each other.
7034  *
7035  * Construct a dependence graph with a space for each cluster and
7036  * with the coordinates of each space corresponding to the schedule
7037  * dimensions of the current band of that cluster.
7038  * Construct a cluster schedule in this cluster dependence graph and
7039  * apply it to the current cluster bands if it is applicable
7040  * according to ok_to_merge.
7041  *
7042  * If the number of remaining schedule dimensions in a cluster
7043  * with a non-maximal current schedule dimension is greater than
7044  * the number of remaining schedule dimensions in clusters
7045  * with a maximal current schedule dimension, then restrict
7046  * the number of rows to be computed in the cluster schedule
7047  * to the minimal such non-maximal current schedule dimension.
7048  * Do this by adjusting merge_graph.maxvar.
7049  *
7050  * Return isl_bool_true if the clusters have effectively been merged
7051  * into a single cluster.
7052  *
7053  * Note that since the standard scheduling algorithm minimizes the maximal
7054  * distance over proximity constraints, the proximity constraints between
7055  * the merged clusters may not be optimized any further than what is
7056  * sufficient to bring the distances within the limits of the internal
7057  * proximity constraints inside the individual clusters.
7058  * It may therefore make sense to perform an additional translation step
7059  * to bring the clusters closer to each other, while maintaining
7060  * the linear part of the merging schedule found using the standard
7061  * scheduling algorithm.
7062  */
try_merge(isl_ctx * ctx,struct isl_sched_graph * graph,struct isl_clustering * c)7063 static isl_bool try_merge(isl_ctx *ctx, struct isl_sched_graph *graph,
7064 	struct isl_clustering *c)
7065 {
7066 	struct isl_sched_graph merge_graph = { 0 };
7067 	isl_bool merged;
7068 
7069 	if (init_merge_graph(ctx, graph, c, &merge_graph) < 0)
7070 		goto error;
7071 
7072 	if (compute_maxvar(&merge_graph) < 0)
7073 		goto error;
7074 	if (adjust_maxvar_to_slack(ctx, &merge_graph,c) < 0)
7075 		goto error;
7076 	if (compute_schedule_wcc_band(ctx, &merge_graph) < 0)
7077 		goto error;
7078 	merged = ok_to_merge(ctx, graph, c, &merge_graph);
7079 	if (merged && merge(ctx, c, &merge_graph) < 0)
7080 		goto error;
7081 
7082 	graph_free(ctx, &merge_graph);
7083 	return merged;
7084 error:
7085 	graph_free(ctx, &merge_graph);
7086 	return isl_bool_error;
7087 }
7088 
7089 /* Is there any edge marked "no_merge" between two SCCs that are
7090  * about to be merged (i.e., that are set in "scc_in_merge")?
7091  * "merge_edge" is the proximity edge along which the clusters of SCCs
7092  * are going to be merged.
7093  *
7094  * If there is any edge between two SCCs with a negative weight,
7095  * while the weight of "merge_edge" is non-negative, then this
7096  * means that the edge was postponed.  "merge_edge" should then
7097  * also be postponed since merging along the edge with negative weight should
7098  * be postponed until all edges with non-negative weight have been tried.
7099  * Replace the weight of "merge_edge" by a negative weight as well and
7100  * tell the caller not to attempt a merge.
7101  */
any_no_merge(struct isl_sched_graph * graph,int * scc_in_merge,struct isl_sched_edge * merge_edge)7102 static int any_no_merge(struct isl_sched_graph *graph, int *scc_in_merge,
7103 	struct isl_sched_edge *merge_edge)
7104 {
7105 	int i;
7106 
7107 	for (i = 0; i < graph->n_edge; ++i) {
7108 		struct isl_sched_edge *edge = &graph->edge[i];
7109 
7110 		if (!scc_in_merge[edge->src->scc])
7111 			continue;
7112 		if (!scc_in_merge[edge->dst->scc])
7113 			continue;
7114 		if (edge->no_merge)
7115 			return 1;
7116 		if (merge_edge->weight >= 0 && edge->weight < 0) {
7117 			merge_edge->weight -= graph->max_weight + 1;
7118 			return 1;
7119 		}
7120 	}
7121 
7122 	return 0;
7123 }
7124 
7125 /* Merge the two clusters in "c" connected by the edge in "graph"
7126  * with index "edge" into a single cluster.
7127  * If it turns out to be impossible to merge these two clusters,
7128  * then mark the edge as "no_merge" such that it will not be
7129  * considered again.
7130  *
7131  * First mark all SCCs that need to be merged.  This includes the SCCs
7132  * in the two clusters, but it may also include the SCCs
7133  * of intermediate clusters.
7134  * If there is already a no_merge edge between any pair of such SCCs,
7135  * then simply mark the current edge as no_merge as well.
7136  * Likewise, if any of those edges was postponed by has_bounded_distances,
7137  * then postpone the current edge as well.
7138  * Otherwise, try and merge the clusters and mark "edge" as "no_merge"
7139  * if the clusters did not end up getting merged, unless the non-merge
7140  * is due to the fact that the edge was postponed.  This postponement
7141  * can be recognized by a change in weight (from non-negative to negative).
7142  */
merge_clusters_along_edge(isl_ctx * ctx,struct isl_sched_graph * graph,int edge,struct isl_clustering * c)7143 static isl_stat merge_clusters_along_edge(isl_ctx *ctx,
7144 	struct isl_sched_graph *graph, int edge, struct isl_clustering *c)
7145 {
7146 	isl_bool merged;
7147 	int edge_weight = graph->edge[edge].weight;
7148 
7149 	if (mark_merge_sccs(ctx, graph, edge, c) < 0)
7150 		return isl_stat_error;
7151 
7152 	if (any_no_merge(graph, c->scc_in_merge, &graph->edge[edge]))
7153 		merged = isl_bool_false;
7154 	else
7155 		merged = try_merge(ctx, graph, c);
7156 	if (merged < 0)
7157 		return isl_stat_error;
7158 	if (!merged && edge_weight == graph->edge[edge].weight)
7159 		graph->edge[edge].no_merge = 1;
7160 
7161 	return isl_stat_ok;
7162 }
7163 
7164 /* Does "node" belong to the cluster identified by "cluster"?
7165  */
node_cluster_exactly(struct isl_sched_node * node,int cluster)7166 static int node_cluster_exactly(struct isl_sched_node *node, int cluster)
7167 {
7168 	return node->cluster == cluster;
7169 }
7170 
7171 /* Does "edge" connect two nodes belonging to the cluster
7172  * identified by "cluster"?
7173  */
edge_cluster_exactly(struct isl_sched_edge * edge,int cluster)7174 static int edge_cluster_exactly(struct isl_sched_edge *edge, int cluster)
7175 {
7176 	return edge->src->cluster == cluster && edge->dst->cluster == cluster;
7177 }
7178 
7179 /* Swap the schedule of "node1" and "node2".
7180  * Both nodes have been derived from the same node in a common parent graph.
7181  * Since the "coincident" field is shared with that node
7182  * in the parent graph, there is no need to also swap this field.
7183  */
swap_sched(struct isl_sched_node * node1,struct isl_sched_node * node2)7184 static void swap_sched(struct isl_sched_node *node1,
7185 	struct isl_sched_node *node2)
7186 {
7187 	isl_mat *sched;
7188 	isl_map *sched_map;
7189 
7190 	sched = node1->sched;
7191 	node1->sched = node2->sched;
7192 	node2->sched = sched;
7193 
7194 	sched_map = node1->sched_map;
7195 	node1->sched_map = node2->sched_map;
7196 	node2->sched_map = sched_map;
7197 }
7198 
7199 /* Copy the current band schedule from the SCCs that form the cluster
7200  * with index "pos" to the actual cluster at position "pos".
7201  * By construction, the index of the first SCC that belongs to the cluster
7202  * is also "pos".
7203  *
7204  * The order of the nodes inside both the SCCs and the cluster
7205  * is assumed to be same as the order in the original "graph".
7206  *
7207  * Since the SCC graphs will no longer be used after this function,
7208  * the schedules are actually swapped rather than copied.
7209  */
copy_partial(struct isl_sched_graph * graph,struct isl_clustering * c,int pos)7210 static isl_stat copy_partial(struct isl_sched_graph *graph,
7211 	struct isl_clustering *c, int pos)
7212 {
7213 	int i, j;
7214 
7215 	c->cluster[pos].n_total_row = c->scc[pos].n_total_row;
7216 	c->cluster[pos].n_row = c->scc[pos].n_row;
7217 	c->cluster[pos].maxvar = c->scc[pos].maxvar;
7218 	j = 0;
7219 	for (i = 0; i < graph->n; ++i) {
7220 		int k;
7221 		int s;
7222 
7223 		if (graph->node[i].cluster != pos)
7224 			continue;
7225 		s = graph->node[i].scc;
7226 		k = c->scc_node[s]++;
7227 		swap_sched(&c->cluster[pos].node[j], &c->scc[s].node[k]);
7228 		if (c->scc[s].maxvar > c->cluster[pos].maxvar)
7229 			c->cluster[pos].maxvar = c->scc[s].maxvar;
7230 		++j;
7231 	}
7232 
7233 	return isl_stat_ok;
7234 }
7235 
7236 /* Is there a (conditional) validity dependence from node[j] to node[i],
7237  * forcing node[i] to follow node[j] or do the nodes belong to the same
7238  * cluster?
7239  */
node_follows_strong_or_same_cluster(int i,int j,void * user)7240 static isl_bool node_follows_strong_or_same_cluster(int i, int j, void *user)
7241 {
7242 	struct isl_sched_graph *graph = user;
7243 
7244 	if (graph->node[i].cluster == graph->node[j].cluster)
7245 		return isl_bool_true;
7246 	return graph_has_validity_edge(graph, &graph->node[j], &graph->node[i]);
7247 }
7248 
7249 /* Extract the merged clusters of SCCs in "graph", sort them, and
7250  * store them in c->clusters.  Update c->scc_cluster accordingly.
7251  *
7252  * First keep track of the cluster containing the SCC to which a node
7253  * belongs in the node itself.
7254  * Then extract the clusters into c->clusters, copying the current
7255  * band schedule from the SCCs that belong to the cluster.
7256  * Do this only once per cluster.
7257  *
7258  * Finally, topologically sort the clusters and update c->scc_cluster
7259  * to match the new scc numbering.  While the SCCs were originally
7260  * sorted already, some SCCs that depend on some other SCCs may
7261  * have been merged with SCCs that appear before these other SCCs.
7262  * A reordering may therefore be required.
7263  */
extract_clusters(isl_ctx * ctx,struct isl_sched_graph * graph,struct isl_clustering * c)7264 static isl_stat extract_clusters(isl_ctx *ctx, struct isl_sched_graph *graph,
7265 	struct isl_clustering *c)
7266 {
7267 	int i;
7268 
7269 	for (i = 0; i < graph->n; ++i)
7270 		graph->node[i].cluster = c->scc_cluster[graph->node[i].scc];
7271 
7272 	for (i = 0; i < graph->scc; ++i) {
7273 		if (c->scc_cluster[i] != i)
7274 			continue;
7275 		if (extract_sub_graph(ctx, graph, &node_cluster_exactly,
7276 				&edge_cluster_exactly, i, &c->cluster[i]) < 0)
7277 			return isl_stat_error;
7278 		c->cluster[i].src_scc = -1;
7279 		c->cluster[i].dst_scc = -1;
7280 		if (copy_partial(graph, c, i) < 0)
7281 			return isl_stat_error;
7282 	}
7283 
7284 	if (detect_ccs(ctx, graph, &node_follows_strong_or_same_cluster) < 0)
7285 		return isl_stat_error;
7286 	for (i = 0; i < graph->n; ++i)
7287 		c->scc_cluster[graph->node[i].scc] = graph->node[i].cluster;
7288 
7289 	return isl_stat_ok;
7290 }
7291 
7292 /* Compute weights on the proximity edges of "graph" that can
7293  * be used by find_proximity to find the most appropriate
7294  * proximity edge to use to merge two clusters in "c".
7295  * The weights are also used by has_bounded_distances to determine
7296  * whether the merge should be allowed.
7297  * Store the maximum of the computed weights in graph->max_weight.
7298  *
7299  * The computed weight is a measure for the number of remaining schedule
7300  * dimensions that can still be completely aligned.
7301  * In particular, compute the number of equalities between
7302  * input dimensions and output dimensions in the proximity constraints.
7303  * The directions that are already handled by outer schedule bands
7304  * are projected out prior to determining this number.
7305  *
7306  * Edges that will never be considered by find_proximity are ignored.
7307  */
compute_weights(struct isl_sched_graph * graph,struct isl_clustering * c)7308 static isl_stat compute_weights(struct isl_sched_graph *graph,
7309 	struct isl_clustering *c)
7310 {
7311 	int i;
7312 
7313 	graph->max_weight = 0;
7314 
7315 	for (i = 0; i < graph->n_edge; ++i) {
7316 		struct isl_sched_edge *edge = &graph->edge[i];
7317 		struct isl_sched_node *src = edge->src;
7318 		struct isl_sched_node *dst = edge->dst;
7319 		isl_basic_map *hull;
7320 		isl_bool prox;
7321 		isl_size n_in, n_out;
7322 
7323 		prox = is_non_empty_proximity(edge);
7324 		if (prox < 0)
7325 			return isl_stat_error;
7326 		if (!prox)
7327 			continue;
7328 		if (bad_cluster(&c->scc[edge->src->scc]) ||
7329 		    bad_cluster(&c->scc[edge->dst->scc]))
7330 			continue;
7331 		if (c->scc_cluster[edge->dst->scc] ==
7332 		    c->scc_cluster[edge->src->scc])
7333 			continue;
7334 
7335 		hull = isl_map_affine_hull(isl_map_copy(edge->map));
7336 		hull = isl_basic_map_transform_dims(hull, isl_dim_in, 0,
7337 						    isl_mat_copy(src->vmap));
7338 		hull = isl_basic_map_transform_dims(hull, isl_dim_out, 0,
7339 						    isl_mat_copy(dst->vmap));
7340 		hull = isl_basic_map_project_out(hull,
7341 						isl_dim_in, 0, src->rank);
7342 		hull = isl_basic_map_project_out(hull,
7343 						isl_dim_out, 0, dst->rank);
7344 		hull = isl_basic_map_remove_divs(hull);
7345 		n_in = isl_basic_map_dim(hull, isl_dim_in);
7346 		n_out = isl_basic_map_dim(hull, isl_dim_out);
7347 		if (n_in < 0 || n_out < 0)
7348 			hull = isl_basic_map_free(hull);
7349 		hull = isl_basic_map_drop_constraints_not_involving_dims(hull,
7350 							isl_dim_in, 0, n_in);
7351 		hull = isl_basic_map_drop_constraints_not_involving_dims(hull,
7352 							isl_dim_out, 0, n_out);
7353 		if (!hull)
7354 			return isl_stat_error;
7355 		edge->weight = isl_basic_map_n_equality(hull);
7356 		isl_basic_map_free(hull);
7357 
7358 		if (edge->weight > graph->max_weight)
7359 			graph->max_weight = edge->weight;
7360 	}
7361 
7362 	return isl_stat_ok;
7363 }
7364 
7365 /* Call compute_schedule_finish_band on each of the clusters in "c"
7366  * in their topological order.  This order is determined by the scc
7367  * fields of the nodes in "graph".
7368  * Combine the results in a sequence expressing the topological order.
7369  *
7370  * If there is only one cluster left, then there is no need to introduce
7371  * a sequence node.  Also, in this case, the cluster necessarily contains
7372  * the SCC at position 0 in the original graph and is therefore also
7373  * stored in the first cluster of "c".
7374  */
finish_bands_clustering(__isl_take isl_schedule_node * node,struct isl_sched_graph * graph,struct isl_clustering * c)7375 static __isl_give isl_schedule_node *finish_bands_clustering(
7376 	__isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
7377 	struct isl_clustering *c)
7378 {
7379 	int i;
7380 	isl_ctx *ctx;
7381 	isl_union_set_list *filters;
7382 
7383 	if (graph->scc == 1)
7384 		return compute_schedule_finish_band(node, &c->cluster[0], 0);
7385 
7386 	ctx = isl_schedule_node_get_ctx(node);
7387 
7388 	filters = extract_sccs(ctx, graph);
7389 	node = isl_schedule_node_insert_sequence(node, filters);
7390 
7391 	for (i = 0; i < graph->scc; ++i) {
7392 		int j = c->scc_cluster[i];
7393 		node = isl_schedule_node_child(node, i);
7394 		node = isl_schedule_node_child(node, 0);
7395 		node = compute_schedule_finish_band(node, &c->cluster[j], 0);
7396 		node = isl_schedule_node_parent(node);
7397 		node = isl_schedule_node_parent(node);
7398 	}
7399 
7400 	return node;
7401 }
7402 
7403 /* Compute a schedule for a connected dependence graph by first considering
7404  * each strongly connected component (SCC) in the graph separately and then
7405  * incrementally combining them into clusters.
7406  * Return the updated schedule node.
7407  *
7408  * Initially, each cluster consists of a single SCC, each with its
7409  * own band schedule.  The algorithm then tries to merge pairs
7410  * of clusters along a proximity edge until no more suitable
7411  * proximity edges can be found.  During this merging, the schedule
7412  * is maintained in the individual SCCs.
7413  * After the merging is completed, the full resulting clusters
7414  * are extracted and in finish_bands_clustering,
7415  * compute_schedule_finish_band is called on each of them to integrate
7416  * the band into "node" and to continue the computation.
7417  *
7418  * compute_weights initializes the weights that are used by find_proximity.
7419  */
compute_schedule_wcc_clustering(__isl_take isl_schedule_node * node,struct isl_sched_graph * graph)7420 static __isl_give isl_schedule_node *compute_schedule_wcc_clustering(
7421 	__isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
7422 {
7423 	isl_ctx *ctx;
7424 	struct isl_clustering c;
7425 	int i;
7426 
7427 	ctx = isl_schedule_node_get_ctx(node);
7428 
7429 	if (clustering_init(ctx, &c, graph) < 0)
7430 		goto error;
7431 
7432 	if (compute_weights(graph, &c) < 0)
7433 		goto error;
7434 
7435 	for (;;) {
7436 		i = find_proximity(graph, &c);
7437 		if (i < 0)
7438 			goto error;
7439 		if (i >= graph->n_edge)
7440 			break;
7441 		if (merge_clusters_along_edge(ctx, graph, i, &c) < 0)
7442 			goto error;
7443 	}
7444 
7445 	if (extract_clusters(ctx, graph, &c) < 0)
7446 		goto error;
7447 
7448 	node = finish_bands_clustering(node, graph, &c);
7449 
7450 	clustering_free(ctx, &c);
7451 	return node;
7452 error:
7453 	clustering_free(ctx, &c);
7454 	return isl_schedule_node_free(node);
7455 }
7456 
7457 /* Compute a schedule for a connected dependence graph and return
7458  * the updated schedule node.
7459  *
7460  * If Feautrier's algorithm is selected, we first recursively try to satisfy
7461  * as many validity dependences as possible. When all validity dependences
7462  * are satisfied we extend the schedule to a full-dimensional schedule.
7463  *
7464  * Call compute_schedule_wcc_whole or compute_schedule_wcc_clustering
7465  * depending on whether the user has selected the option to try and
7466  * compute a schedule for the entire (weakly connected) component first.
7467  * If there is only a single strongly connected component (SCC), then
7468  * there is no point in trying to combine SCCs
7469  * in compute_schedule_wcc_clustering, so compute_schedule_wcc_whole
7470  * is called instead.
7471  */
compute_schedule_wcc(__isl_take isl_schedule_node * node,struct isl_sched_graph * graph)7472 static __isl_give isl_schedule_node *compute_schedule_wcc(
7473 	__isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
7474 {
7475 	isl_ctx *ctx;
7476 
7477 	if (!node)
7478 		return NULL;
7479 
7480 	ctx = isl_schedule_node_get_ctx(node);
7481 	if (detect_sccs(ctx, graph) < 0)
7482 		return isl_schedule_node_free(node);
7483 
7484 	if (compute_maxvar(graph) < 0)
7485 		return isl_schedule_node_free(node);
7486 
7487 	if (need_feautrier_step(ctx, graph))
7488 		return compute_schedule_wcc_feautrier(node, graph);
7489 
7490 	if (graph->scc <= 1 || isl_options_get_schedule_whole_component(ctx))
7491 		return compute_schedule_wcc_whole(node, graph);
7492 	else
7493 		return compute_schedule_wcc_clustering(node, graph);
7494 }
7495 
7496 /* Compute a schedule for each group of nodes identified by node->scc
7497  * separately and then combine them in a sequence node (or as set node
7498  * if graph->weak is set) inserted at position "node" of the schedule tree.
7499  * Return the updated schedule node.
7500  *
7501  * If "wcc" is set then each of the groups belongs to a single
7502  * weakly connected component in the dependence graph so that
7503  * there is no need for compute_sub_schedule to look for weakly
7504  * connected components.
7505  *
7506  * If a set node would be introduced and if the number of components
7507  * is equal to the number of nodes, then check if the schedule
7508  * is already complete.  If so, a redundant set node would be introduced
7509  * (without any further descendants) stating that the statements
7510  * can be executed in arbitrary order, which is also expressed
7511  * by the absence of any node.  Refrain from inserting any nodes
7512  * in this case and simply return.
7513  */
compute_component_schedule(__isl_take isl_schedule_node * node,struct isl_sched_graph * graph,int wcc)7514 static __isl_give isl_schedule_node *compute_component_schedule(
7515 	__isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
7516 	int wcc)
7517 {
7518 	int component;
7519 	isl_ctx *ctx;
7520 	isl_union_set_list *filters;
7521 
7522 	if (!node)
7523 		return NULL;
7524 
7525 	if (graph->weak && graph->scc == graph->n) {
7526 		if (compute_maxvar(graph) < 0)
7527 			return isl_schedule_node_free(node);
7528 		if (graph->n_row >= graph->maxvar)
7529 			return node;
7530 	}
7531 
7532 	ctx = isl_schedule_node_get_ctx(node);
7533 	filters = extract_sccs(ctx, graph);
7534 	if (graph->weak)
7535 		node = isl_schedule_node_insert_set(node, filters);
7536 	else
7537 		node = isl_schedule_node_insert_sequence(node, filters);
7538 
7539 	for (component = 0; component < graph->scc; ++component) {
7540 		node = isl_schedule_node_child(node, component);
7541 		node = isl_schedule_node_child(node, 0);
7542 		node = compute_sub_schedule(node, ctx, graph,
7543 				    &node_scc_exactly,
7544 				    &edge_scc_exactly, component, wcc);
7545 		node = isl_schedule_node_parent(node);
7546 		node = isl_schedule_node_parent(node);
7547 	}
7548 
7549 	return node;
7550 }
7551 
7552 /* Compute a schedule for the given dependence graph and insert it at "node".
7553  * Return the updated schedule node.
7554  *
7555  * We first check if the graph is connected (through validity and conditional
7556  * validity dependences) and, if not, compute a schedule
7557  * for each component separately.
7558  * If the schedule_serialize_sccs option is set, then we check for strongly
7559  * connected components instead and compute a separate schedule for
7560  * each such strongly connected component.
7561  */
compute_schedule(isl_schedule_node * node,struct isl_sched_graph * graph)7562 static __isl_give isl_schedule_node *compute_schedule(isl_schedule_node *node,
7563 	struct isl_sched_graph *graph)
7564 {
7565 	isl_ctx *ctx;
7566 
7567 	if (!node)
7568 		return NULL;
7569 
7570 	ctx = isl_schedule_node_get_ctx(node);
7571 	if (isl_options_get_schedule_serialize_sccs(ctx)) {
7572 		if (detect_sccs(ctx, graph) < 0)
7573 			return isl_schedule_node_free(node);
7574 	} else {
7575 		if (detect_wccs(ctx, graph) < 0)
7576 			return isl_schedule_node_free(node);
7577 	}
7578 
7579 	if (graph->scc > 1)
7580 		return compute_component_schedule(node, graph, 1);
7581 
7582 	return compute_schedule_wcc(node, graph);
7583 }
7584 
7585 /* Compute a schedule on sc->domain that respects the given schedule
7586  * constraints.
7587  *
7588  * In particular, the schedule respects all the validity dependences.
7589  * If the default isl scheduling algorithm is used, it tries to minimize
7590  * the dependence distances over the proximity dependences.
7591  * If Feautrier's scheduling algorithm is used, the proximity dependence
7592  * distances are only minimized during the extension to a full-dimensional
7593  * schedule.
7594  *
7595  * If there are any condition and conditional validity dependences,
7596  * then the conditional validity dependences may be violated inside
7597  * a tilable band, provided they have no adjacent non-local
7598  * condition dependences.
7599  */
isl_schedule_constraints_compute_schedule(__isl_take isl_schedule_constraints * sc)7600 __isl_give isl_schedule *isl_schedule_constraints_compute_schedule(
7601 	__isl_take isl_schedule_constraints *sc)
7602 {
7603 	isl_ctx *ctx = isl_schedule_constraints_get_ctx(sc);
7604 	struct isl_sched_graph graph = { 0 };
7605 	isl_schedule *sched;
7606 	isl_schedule_node *node;
7607 	isl_union_set *domain;
7608 	isl_size n;
7609 
7610 	sc = isl_schedule_constraints_align_params(sc);
7611 
7612 	domain = isl_schedule_constraints_get_domain(sc);
7613 	n = isl_union_set_n_set(domain);
7614 	if (n == 0) {
7615 		isl_schedule_constraints_free(sc);
7616 		return isl_schedule_from_domain(domain);
7617 	}
7618 
7619 	if (n < 0 || graph_init(&graph, sc) < 0)
7620 		domain = isl_union_set_free(domain);
7621 
7622 	node = isl_schedule_node_from_domain(domain);
7623 	node = isl_schedule_node_child(node, 0);
7624 	if (graph.n > 0)
7625 		node = compute_schedule(node, &graph);
7626 	sched = isl_schedule_node_get_schedule(node);
7627 	isl_schedule_node_free(node);
7628 
7629 	graph_free(ctx, &graph);
7630 	isl_schedule_constraints_free(sc);
7631 
7632 	return sched;
7633 }
7634 
7635 /* Compute a schedule for the given union of domains that respects
7636  * all the validity dependences and minimizes
7637  * the dependence distances over the proximity dependences.
7638  *
7639  * This function is kept for backward compatibility.
7640  */
isl_union_set_compute_schedule(__isl_take isl_union_set * domain,__isl_take isl_union_map * validity,__isl_take isl_union_map * proximity)7641 __isl_give isl_schedule *isl_union_set_compute_schedule(
7642 	__isl_take isl_union_set *domain,
7643 	__isl_take isl_union_map *validity,
7644 	__isl_take isl_union_map *proximity)
7645 {
7646 	isl_schedule_constraints *sc;
7647 
7648 	sc = isl_schedule_constraints_on_domain(domain);
7649 	sc = isl_schedule_constraints_set_validity(sc, validity);
7650 	sc = isl_schedule_constraints_set_proximity(sc, proximity);
7651 
7652 	return isl_schedule_constraints_compute_schedule(sc);
7653 }
7654