1 /*
2  * Copyright 2012 INRIA Paris-Rocquencourt
3  * Copyright 2012 Ecole Normale Superieure
4  *
5  * Use of this software is governed by the MIT license
6  *
7  * Written by Tobias Grosser, INRIA Paris-Rocquencourt,
8  * Domaine de Voluceau, Rocquenqourt, B.P. 105,
9  * 78153 Le Chesnay Cedex France
10  * and Sven Verdoolaege,
11  * Ecole Normale Superieure, 45 rue d'Ulm, 75230 Paris, France
12  */
13 
14 #include <limits.h>
15 #include <stdio.h>
16 #include <string.h>
17 
18 #include <isl/aff.h>
19 #include <isl/ctx.h>
20 #include <isl/flow.h>
21 #include <isl/map.h>
22 #include <isl/ast_build.h>
23 #include <isl/schedule.h>
24 #include <isl/schedule_node.h>
25 #include <pet.h>
26 
27 #include "ppcg.h"
28 #include "ppcg_options.h"
29 #include "cpu.h"
30 #include "print.h"
31 #include "schedule.h"
32 #include "util.h"
33 
34 /* Representation of a statement inside a generated AST.
35  *
36  * "stmt" refers to the original statement.
37  * "ref2expr" maps the reference identifier of each access in
38  * the statement to an AST expression that should be printed
39  * at the place of the access.
40  */
41 struct ppcg_stmt {
42 	struct pet_stmt *stmt;
43 
44 	isl_id_to_ast_expr *ref2expr;
45 };
46 
ppcg_stmt_free(void * user)47 static void ppcg_stmt_free(void *user)
48 {
49 	struct ppcg_stmt *stmt = user;
50 
51 	if (!stmt)
52 		return;
53 
54 	isl_id_to_ast_expr_free(stmt->ref2expr);
55 
56 	free(stmt);
57 }
58 
59 /* Derive the output file name from the input file name.
60  * 'input' is the entire path of the input file. The output
61  * is the file name plus the additional extension.
62  *
63  * We will basically replace everything after the last point
64  * with '.ppcg.c'. This means file.c becomes file.ppcg.c
65  */
get_output_file(const char * input,const char * output)66 static FILE *get_output_file(const char *input, const char *output)
67 {
68 	char name[PATH_MAX];
69 	const char *ext;
70 	const char ppcg_marker[] = ".ppcg";
71 	int len;
72 	FILE *file;
73 
74 	len = ppcg_extract_base_name(name, input);
75 
76 	strcpy(name + len, ppcg_marker);
77 	ext = strrchr(input, '.');
78 	strcpy(name + len + sizeof(ppcg_marker) - 1, ext ? ext : ".c");
79 
80 	if (!output)
81 		output = name;
82 
83 	file = fopen(output, "w");
84 	if (!file) {
85 		fprintf(stderr, "Unable to open '%s' for writing\n", output);
86 		return NULL;
87 	}
88 
89 	return file;
90 }
91 
92 /* Data used to annotate for nodes in the ast.
93  */
94 struct ast_node_userinfo {
95 	/* The for node is an openmp parallel for node. */
96 	int is_openmp;
97 };
98 
99 /* Information used while building the ast.
100  */
101 struct ast_build_userinfo {
102 	/* The current ppcg scop. */
103 	struct ppcg_scop *scop;
104 
105 	/* Are we currently in a parallel for loop? */
106 	int in_parallel_for;
107 };
108 
109 /* Check if the current scheduling dimension is parallel.
110  *
111  * We check for parallelism by verifying that the loop does not carry any
112  * dependences.
113  * If the live_range_reordering option is set, then this currently
114  * includes the order dependences.  In principle, non-zero order dependences
115  * could be allowed, but this would require privatization and/or expansion.
116  *
117  * Parallelism test: if the distance is zero in all outer dimensions, then it
118  * has to be zero in the current dimension as well.
119  * Implementation: first, translate dependences into time space, then force
120  * outer dimensions to be equal.  If the distance is zero in the current
121  * dimension, then the loop is parallel.
122  * The distance is zero in the current dimension if it is a subset of a map
123  * with equal values for the current dimension.
124  */
ast_schedule_dim_is_parallel(__isl_keep isl_ast_build * build,struct ppcg_scop * scop)125 static int ast_schedule_dim_is_parallel(__isl_keep isl_ast_build *build,
126 	struct ppcg_scop *scop)
127 {
128 	isl_union_map *schedule, *deps;
129 	isl_map *schedule_deps, *test;
130 	isl_space *schedule_space;
131 	unsigned i, dimension, is_parallel;
132 
133 	schedule = isl_ast_build_get_schedule(build);
134 	schedule_space = isl_ast_build_get_schedule_space(build);
135 
136 	dimension = isl_space_dim(schedule_space, isl_dim_out) - 1;
137 
138 	deps = isl_union_map_copy(scop->dep_flow);
139 	deps = isl_union_map_union(deps, isl_union_map_copy(scop->dep_false));
140 	if (scop->options->live_range_reordering) {
141 		isl_union_map *order = isl_union_map_copy(scop->dep_order);
142 		deps = isl_union_map_union(deps, order);
143 	}
144 	deps = isl_union_map_apply_range(deps, isl_union_map_copy(schedule));
145 	deps = isl_union_map_apply_domain(deps, schedule);
146 
147 	if (isl_union_map_is_empty(deps)) {
148 		isl_union_map_free(deps);
149 		isl_space_free(schedule_space);
150 		return 1;
151 	}
152 
153 	schedule_deps = isl_map_from_union_map(deps);
154 
155 	for (i = 0; i < dimension; i++)
156 		schedule_deps = isl_map_equate(schedule_deps, isl_dim_out, i,
157 					       isl_dim_in, i);
158 
159 	test = isl_map_universe(isl_map_get_space(schedule_deps));
160 	test = isl_map_equate(test, isl_dim_out, dimension, isl_dim_in,
161 			      dimension);
162 	is_parallel = isl_map_is_subset(schedule_deps, test);
163 
164 	isl_space_free(schedule_space);
165 	isl_map_free(test);
166 	isl_map_free(schedule_deps);
167 
168 	return is_parallel;
169 }
170 
171 /* Mark a for node openmp parallel, if it is the outermost parallel for node.
172  */
mark_openmp_parallel(__isl_keep isl_ast_build * build,struct ast_build_userinfo * build_info,struct ast_node_userinfo * node_info)173 static void mark_openmp_parallel(__isl_keep isl_ast_build *build,
174 	struct ast_build_userinfo *build_info,
175 	struct ast_node_userinfo *node_info)
176 {
177 	if (build_info->in_parallel_for)
178 		return;
179 
180 	if (ast_schedule_dim_is_parallel(build, build_info->scop)) {
181 		build_info->in_parallel_for = 1;
182 		node_info->is_openmp = 1;
183 	}
184 }
185 
186 /* Allocate an ast_node_info structure and initialize it with default values.
187  */
allocate_ast_node_userinfo()188 static struct ast_node_userinfo *allocate_ast_node_userinfo()
189 {
190 	struct ast_node_userinfo *node_info;
191 	node_info = (struct ast_node_userinfo *)
192 		malloc(sizeof(struct ast_node_userinfo));
193 	node_info->is_openmp = 0;
194 	return node_info;
195 }
196 
197 /* Free an ast_node_info structure.
198  */
free_ast_node_userinfo(void * ptr)199 static void free_ast_node_userinfo(void *ptr)
200 {
201 	struct ast_node_userinfo *info;
202 	info = (struct ast_node_userinfo *) ptr;
203 	free(info);
204 }
205 
206 /* This method is executed before the construction of a for node. It creates
207  * an isl_id that is used to annotate the subsequently generated ast for nodes.
208  *
209  * In this function we also run the following analyses:
210  *
211  * 	- Detection of openmp parallel loops
212  */
ast_build_before_for(__isl_keep isl_ast_build * build,void * user)213 static __isl_give isl_id *ast_build_before_for(
214 	__isl_keep isl_ast_build *build, void *user)
215 {
216 	isl_id *id;
217 	struct ast_build_userinfo *build_info;
218 	struct ast_node_userinfo *node_info;
219 
220 	build_info = (struct ast_build_userinfo *) user;
221 	node_info = allocate_ast_node_userinfo();
222 	id = isl_id_alloc(isl_ast_build_get_ctx(build), "", node_info);
223 	id = isl_id_set_free_user(id, free_ast_node_userinfo);
224 
225 	mark_openmp_parallel(build, build_info, node_info);
226 
227 	return id;
228 }
229 
230 /* This method is executed after the construction of a for node.
231  *
232  * It performs the following actions:
233  *
234  * 	- Reset the 'in_parallel_for' flag, as soon as we leave a for node,
235  * 	  that is marked as openmp parallel.
236  *
237  */
ast_build_after_for(__isl_take isl_ast_node * node,__isl_keep isl_ast_build * build,void * user)238 static __isl_give isl_ast_node *ast_build_after_for(
239 	__isl_take isl_ast_node *node, __isl_keep isl_ast_build *build,
240 	void *user)
241 {
242 	isl_id *id;
243 	struct ast_build_userinfo *build_info;
244 	struct ast_node_userinfo *info;
245 
246 	id = isl_ast_node_get_annotation(node);
247 	info = isl_id_get_user(id);
248 
249 	if (info && info->is_openmp) {
250 		build_info = (struct ast_build_userinfo *) user;
251 		build_info->in_parallel_for = 0;
252 	}
253 
254 	isl_id_free(id);
255 
256 	return node;
257 }
258 
259 /* Find the element in scop->stmts that has the given "id".
260  */
find_stmt(struct ppcg_scop * scop,__isl_keep isl_id * id)261 static struct pet_stmt *find_stmt(struct ppcg_scop *scop, __isl_keep isl_id *id)
262 {
263 	int i;
264 
265 	for (i = 0; i < scop->pet->n_stmt; ++i) {
266 		struct pet_stmt *stmt = scop->pet->stmts[i];
267 		isl_id *id_i;
268 
269 		id_i = isl_set_get_tuple_id(stmt->domain);
270 		isl_id_free(id_i);
271 
272 		if (id_i == id)
273 			return stmt;
274 	}
275 
276 	isl_die(isl_id_get_ctx(id), isl_error_internal,
277 		"statement not found", return NULL);
278 }
279 
280 /* Print a user statement in the generated AST.
281  * The ppcg_stmt has been attached to the node in at_each_domain.
282  */
print_user(__isl_take isl_printer * p,__isl_take isl_ast_print_options * print_options,__isl_keep isl_ast_node * node,void * user)283 static __isl_give isl_printer *print_user(__isl_take isl_printer *p,
284 	__isl_take isl_ast_print_options *print_options,
285 	__isl_keep isl_ast_node *node, void *user)
286 {
287 	struct ppcg_stmt *stmt;
288 	isl_id *id;
289 
290 	id = isl_ast_node_get_annotation(node);
291 	stmt = isl_id_get_user(id);
292 	isl_id_free(id);
293 
294 	p = pet_stmt_print_body(stmt->stmt, p, stmt->ref2expr);
295 
296 	isl_ast_print_options_free(print_options);
297 
298 	return p;
299 }
300 
301 
302 /* Print a for loop node as an openmp parallel loop.
303  *
304  * To print an openmp parallel loop we print a normal for loop, but add
305  * "#pragma openmp parallel for" in front.
306  *
307  * Variables that are declared within the body of this for loop are
308  * automatically openmp 'private'. Iterators declared outside of the
309  * for loop are automatically openmp 'shared'. As ppcg declares all iterators
310  * at the position where they are assigned, there is no need to explicitly mark
311  * variables. Their automatically assigned type is already correct.
312  *
313  * This function only generates valid OpenMP code, if the ast was generated
314  * with the 'atomic-bounds' option enabled.
315  *
316  */
print_for_with_openmp(__isl_keep isl_ast_node * node,__isl_take isl_printer * p,__isl_take isl_ast_print_options * print_options)317 static __isl_give isl_printer *print_for_with_openmp(
318 	__isl_keep isl_ast_node *node, __isl_take isl_printer *p,
319 	__isl_take isl_ast_print_options *print_options)
320 {
321 	p = isl_printer_start_line(p);
322 	p = isl_printer_print_str(p, "#pragma omp parallel for");
323 	p = isl_printer_end_line(p);
324 
325 	p = isl_ast_node_for_print(node, p, print_options);
326 
327 	return p;
328 }
329 
330 /* Print a for node.
331  *
332  * Depending on how the node is annotated, we either print a normal
333  * for node or an openmp parallel for node.
334  */
print_for(__isl_take isl_printer * p,__isl_take isl_ast_print_options * print_options,__isl_keep isl_ast_node * node,void * user)335 static __isl_give isl_printer *print_for(__isl_take isl_printer *p,
336 	__isl_take isl_ast_print_options *print_options,
337 	__isl_keep isl_ast_node *node, void *user)
338 {
339 	isl_id *id;
340 	int openmp;
341 
342 	openmp = 0;
343 	id = isl_ast_node_get_annotation(node);
344 
345 	if (id) {
346 		struct ast_node_userinfo *info;
347 
348 		info = (struct ast_node_userinfo *) isl_id_get_user(id);
349 		if (info && info->is_openmp)
350 			openmp = 1;
351 	}
352 
353 	if (openmp)
354 		p = print_for_with_openmp(node, p, print_options);
355 	else
356 		p = isl_ast_node_for_print(node, p, print_options);
357 
358 	isl_id_free(id);
359 
360 	return p;
361 }
362 
363 /* Index transformation callback for pet_stmt_build_ast_exprs.
364  *
365  * "index" expresses the array indices in terms of statement iterators
366  * "iterator_map" expresses the statement iterators in terms of
367  * AST loop iterators.
368  *
369  * The result expresses the array indices in terms of
370  * AST loop iterators.
371  */
pullback_index(__isl_take isl_multi_pw_aff * index,__isl_keep isl_id * id,void * user)372 static __isl_give isl_multi_pw_aff *pullback_index(
373 	__isl_take isl_multi_pw_aff *index, __isl_keep isl_id *id, void *user)
374 {
375 	isl_pw_multi_aff *iterator_map = user;
376 
377 	iterator_map = isl_pw_multi_aff_copy(iterator_map);
378 	return isl_multi_pw_aff_pullback_pw_multi_aff(index, iterator_map);
379 }
380 
381 /* Transform the accesses in the statement associated to the domain
382  * called by "node" to refer to the AST loop iterators, construct
383  * corresponding AST expressions using "build",
384  * collect them in a ppcg_stmt and annotate the node with the ppcg_stmt.
385  */
at_each_domain(__isl_take isl_ast_node * node,__isl_keep isl_ast_build * build,void * user)386 static __isl_give isl_ast_node *at_each_domain(__isl_take isl_ast_node *node,
387 	__isl_keep isl_ast_build *build, void *user)
388 {
389 	struct ppcg_scop *scop = user;
390 	isl_ast_expr *expr, *arg;
391 	isl_ctx *ctx;
392 	isl_id *id;
393 	isl_map *map;
394 	isl_pw_multi_aff *iterator_map;
395 	struct ppcg_stmt *stmt;
396 
397 	ctx = isl_ast_node_get_ctx(node);
398 	stmt = isl_calloc_type(ctx, struct ppcg_stmt);
399 	if (!stmt)
400 		goto error;
401 
402 	expr = isl_ast_node_user_get_expr(node);
403 	arg = isl_ast_expr_get_op_arg(expr, 0);
404 	isl_ast_expr_free(expr);
405 	id = isl_ast_expr_get_id(arg);
406 	isl_ast_expr_free(arg);
407 	stmt->stmt = find_stmt(scop, id);
408 	isl_id_free(id);
409 	if (!stmt->stmt)
410 		goto error;
411 
412 	map = isl_map_from_union_map(isl_ast_build_get_schedule(build));
413 	map = isl_map_reverse(map);
414 	iterator_map = isl_pw_multi_aff_from_map(map);
415 	stmt->ref2expr = pet_stmt_build_ast_exprs(stmt->stmt, build,
416 				    &pullback_index, iterator_map, NULL, NULL);
417 	isl_pw_multi_aff_free(iterator_map);
418 
419 	id = isl_id_alloc(isl_ast_node_get_ctx(node), NULL, stmt);
420 	id = isl_id_set_free_user(id, &ppcg_stmt_free);
421 	return isl_ast_node_set_annotation(node, id);
422 error:
423 	ppcg_stmt_free(stmt);
424 	return isl_ast_node_free(node);
425 }
426 
427 /* Set *depth (initialized to 0 by the caller) to the maximum
428  * of the schedule depths of the leaf nodes for which this function is called.
429  */
update_depth(__isl_keep isl_schedule_node * node,void * user)430 static isl_bool update_depth(__isl_keep isl_schedule_node *node, void *user)
431 {
432 	int *depth = user;
433 	int node_depth;
434 
435 	if (isl_schedule_node_get_type(node) != isl_schedule_node_leaf)
436 		return isl_bool_true;
437 	node_depth = isl_schedule_node_get_schedule_depth(node);
438 	if (node_depth > *depth)
439 		*depth = node_depth;
440 
441 	return isl_bool_false;
442 }
443 
444 /* This function is called for each node in a CPU AST.
445  * In case of a user node, print the macro definitions required
446  * for printing the AST expressions in the annotation, if any.
447  * For other nodes, return true such that descendants are also
448  * visited.
449  *
450  * In particular, print the macro definitions needed for the substitutions
451  * of the original user statements.
452  */
at_node(__isl_keep isl_ast_node * node,void * user)453 static isl_bool at_node(__isl_keep isl_ast_node *node, void *user)
454 {
455 	struct ppcg_stmt *stmt;
456 	isl_id *id;
457 	isl_printer **p = user;
458 
459 	if (isl_ast_node_get_type(node) != isl_ast_node_user)
460 		return isl_bool_true;
461 
462 	id = isl_ast_node_get_annotation(node);
463 	stmt = isl_id_get_user(id);
464 	isl_id_free(id);
465 
466 	if (!stmt)
467 		return isl_bool_error;
468 
469 	*p = ppcg_print_body_macros(*p, stmt->ref2expr);
470 	if (!*p)
471 		return isl_bool_error;
472 
473 	return isl_bool_false;
474 }
475 
476 /* Print the required macros for the CPU AST "node" to "p",
477  * including those needed for the user statements inside the AST.
478  */
cpu_print_macros(__isl_take isl_printer * p,__isl_keep isl_ast_node * node)479 static __isl_give isl_printer *cpu_print_macros(__isl_take isl_printer *p,
480 	__isl_keep isl_ast_node *node)
481 {
482 	if (isl_ast_node_foreach_descendant_top_down(node, &at_node, &p) < 0)
483 		return isl_printer_free(p);
484 	p = ppcg_print_macros(p, node);
485 	return p;
486 }
487 
488 /* Code generate the scop 'scop' using "schedule"
489  * and print the corresponding C code to 'p'.
490  */
print_scop(struct ppcg_scop * scop,__isl_take isl_schedule * schedule,__isl_take isl_printer * p,struct ppcg_options * options)491 static __isl_give isl_printer *print_scop(struct ppcg_scop *scop,
492 	__isl_take isl_schedule *schedule, __isl_take isl_printer *p,
493 	struct ppcg_options *options)
494 {
495 	isl_ctx *ctx = isl_printer_get_ctx(p);
496 	isl_ast_build *build;
497 	isl_ast_print_options *print_options;
498 	isl_ast_node *tree;
499 	isl_id_list *iterators;
500 	struct ast_build_userinfo build_info;
501 	int depth;
502 
503 	depth = 0;
504 	if (isl_schedule_foreach_schedule_node_top_down(schedule, &update_depth,
505 						&depth) < 0)
506 		goto error;
507 
508 	build = isl_ast_build_alloc(ctx);
509 	iterators = ppcg_scop_generate_names(scop, depth, "c");
510 	build = isl_ast_build_set_iterators(build, iterators);
511 	build = isl_ast_build_set_at_each_domain(build, &at_each_domain, scop);
512 
513 	if (options->openmp) {
514 		build_info.scop = scop;
515 		build_info.in_parallel_for = 0;
516 
517 		build = isl_ast_build_set_before_each_for(build,
518 							&ast_build_before_for,
519 							&build_info);
520 		build = isl_ast_build_set_after_each_for(build,
521 							&ast_build_after_for,
522 							&build_info);
523 	}
524 
525 	tree = isl_ast_build_node_from_schedule(build, schedule);
526 	isl_ast_build_free(build);
527 
528 	print_options = isl_ast_print_options_alloc(ctx);
529 	print_options = isl_ast_print_options_set_print_user(print_options,
530 							&print_user, NULL);
531 
532 	print_options = isl_ast_print_options_set_print_for(print_options,
533 							&print_for, NULL);
534 
535 	p = cpu_print_macros(p, tree);
536 	p = isl_ast_node_print(tree, p, print_options);
537 
538 	isl_ast_node_free(tree);
539 
540 	return p;
541 error:
542 	isl_schedule_free(schedule);
543 	isl_printer_free(p);
544 	return NULL;
545 }
546 
547 /* Tile the band node "node" with tile sizes "sizes" and
548  * mark all members of the resulting tile node as "atomic".
549  */
tile(__isl_take isl_schedule_node * node,__isl_take isl_multi_val * sizes)550 static __isl_give isl_schedule_node *tile(__isl_take isl_schedule_node *node,
551 	__isl_take isl_multi_val *sizes)
552 {
553 	node = isl_schedule_node_band_tile(node, sizes);
554 	node = ppcg_set_schedule_node_type(node, isl_ast_loop_atomic);
555 
556 	return node;
557 }
558 
559 /* Tile "node", if it is a band node with at least 2 members.
560  * The tile sizes are set from the "tile_size" option.
561  */
tile_band(__isl_take isl_schedule_node * node,void * user)562 static __isl_give isl_schedule_node *tile_band(
563 	__isl_take isl_schedule_node *node, void *user)
564 {
565 	struct ppcg_scop *scop = user;
566 	int n;
567 	isl_space *space;
568 	isl_multi_val *sizes;
569 
570 	if (isl_schedule_node_get_type(node) != isl_schedule_node_band)
571 		return node;
572 
573 	n = isl_schedule_node_band_n_member(node);
574 	if (n <= 1)
575 		return node;
576 
577 	space = isl_schedule_node_band_get_space(node);
578 	sizes = ppcg_multi_val_from_int(space, scop->options->tile_size);
579 
580 	return tile(node, sizes);
581 }
582 
583 /* Construct schedule constraints from the dependences in ps
584  * for the purpose of computing a schedule for a CPU.
585  *
586  * The proximity constraints are set to the flow dependences.
587  *
588  * If live-range reordering is allowed then the conditional validity
589  * constraints are set to the order dependences with the flow dependences
590  * as condition.  That is, a live-range (flow dependence) will be either
591  * local to an iteration of a band or all adjacent order dependences
592  * will be respected by the band.
593  * The validity constraints are set to the union of the flow dependences
594  * and the forced dependences, while the coincidence constraints
595  * are set to the union of the flow dependences, the forced dependences and
596  * the order dependences.
597  *
598  * If live-range reordering is not allowed, then both the validity
599  * and the coincidence constraints are set to the union of the flow
600  * dependences and the false dependences.
601  *
602  * Note that the coincidence constraints are only set when the "openmp"
603  * options is set.  Even though the way openmp pragmas are introduced
604  * does not rely on the coincident property of the schedule band members,
605  * the coincidence constraints do affect the way the schedule is constructed,
606  * such that more schedule dimensions should be detected as parallel
607  * by ast_schedule_dim_is_parallel.
608  * Since the order dependences are also taken into account by
609  * ast_schedule_dim_is_parallel, they are also added to
610  * the coincidence constraints.  If the openmp handling learns
611  * how to privatize some memory, then the corresponding order
612  * dependences can be removed from the coincidence constraints.
613  */
construct_cpu_schedule_constraints(struct ppcg_scop * ps)614 static __isl_give isl_schedule_constraints *construct_cpu_schedule_constraints(
615 	struct ppcg_scop *ps)
616 {
617 	isl_schedule_constraints *sc;
618 	isl_union_map *validity, *coincidence;
619 
620 	sc = isl_schedule_constraints_on_domain(isl_union_set_copy(ps->domain));
621 	if (ps->options->live_range_reordering) {
622 		sc = isl_schedule_constraints_set_conditional_validity(sc,
623 				isl_union_map_copy(ps->tagged_dep_flow),
624 				isl_union_map_copy(ps->tagged_dep_order));
625 		validity = isl_union_map_copy(ps->dep_flow);
626 		validity = isl_union_map_union(validity,
627 				isl_union_map_copy(ps->dep_forced));
628 		if (ps->options->openmp) {
629 			coincidence = isl_union_map_copy(validity);
630 			coincidence = isl_union_map_union(coincidence,
631 					isl_union_map_copy(ps->dep_order));
632 		}
633 	} else {
634 		validity = isl_union_map_copy(ps->dep_flow);
635 		validity = isl_union_map_union(validity,
636 				isl_union_map_copy(ps->dep_false));
637 		if (ps->options->openmp)
638 			coincidence = isl_union_map_copy(validity);
639 	}
640 	if (ps->options->openmp)
641 		sc = isl_schedule_constraints_set_coincidence(sc, coincidence);
642 	sc = isl_schedule_constraints_set_validity(sc, validity);
643 	sc = isl_schedule_constraints_set_proximity(sc,
644 					isl_union_map_copy(ps->dep_flow));
645 
646 	return sc;
647 }
648 
649 /* Compute a schedule for the scop "ps".
650  *
651  * First derive the appropriate schedule constraints from the dependences
652  * in "ps" and then compute a schedule from those schedule constraints,
653  * possibly grouping statement instances based on the input schedule.
654  */
compute_cpu_schedule(struct ppcg_scop * ps)655 static __isl_give isl_schedule *compute_cpu_schedule(struct ppcg_scop *ps)
656 {
657 	isl_schedule_constraints *sc;
658 	isl_schedule *schedule;
659 
660 	if (!ps)
661 		return NULL;
662 
663 	sc = construct_cpu_schedule_constraints(ps);
664 
665 	if (ps->options->debug->dump_schedule_constraints)
666 		isl_schedule_constraints_dump(sc);
667 	schedule = ppcg_compute_schedule(sc, ps->schedule, ps->options);
668 
669 	return schedule;
670 }
671 
672 /* Compute a new schedule to the scop "ps" if the reschedule option is set.
673  * Otherwise, return a copy of the original schedule.
674  */
optionally_compute_schedule(void * user)675 static __isl_give isl_schedule *optionally_compute_schedule(void *user)
676 {
677 	struct ppcg_scop *ps = user;
678 
679 	if (!ps)
680 		return NULL;
681 	if (!ps->options->reschedule)
682 		return isl_schedule_copy(ps->schedule);
683 	return compute_cpu_schedule(ps);
684 }
685 
686 /* Compute a schedule based on the dependences in "ps" and
687  * tile it if requested by the user.
688  */
get_schedule(struct ppcg_scop * ps,struct ppcg_options * options)689 static __isl_give isl_schedule *get_schedule(struct ppcg_scop *ps,
690 	struct ppcg_options *options)
691 {
692 	isl_ctx *ctx;
693 	isl_schedule *schedule;
694 
695 	if (!ps)
696 		return NULL;
697 
698 	ctx = isl_union_set_get_ctx(ps->domain);
699 	schedule = ppcg_get_schedule(ctx, options,
700 				    &optionally_compute_schedule, ps);
701 	if (ps->options->tile)
702 		schedule = isl_schedule_map_schedule_node_bottom_up(schedule,
703 							&tile_band, ps);
704 
705 	return schedule;
706 }
707 
708 /* Generate CPU code for the scop "ps" using "schedule" and
709  * print the corresponding C code to "p", including variable declarations.
710  */
print_cpu_with_schedule(__isl_take isl_printer * p,struct ppcg_scop * ps,__isl_take isl_schedule * schedule,struct ppcg_options * options)711 static __isl_give isl_printer *print_cpu_with_schedule(
712 	__isl_take isl_printer *p, struct ppcg_scop *ps,
713 	__isl_take isl_schedule *schedule, struct ppcg_options *options)
714 {
715 	int hidden;
716 	isl_set *context;
717 
718 	p = isl_printer_start_line(p);
719 	p = isl_printer_print_str(p, "/* ppcg generated CPU code */");
720 	p = isl_printer_end_line(p);
721 
722 	p = isl_printer_start_line(p);
723 	p = isl_printer_end_line(p);
724 
725 	p = ppcg_set_macro_names(p);
726 	p = ppcg_print_exposed_declarations(p, ps);
727 	hidden = ppcg_scop_any_hidden_declarations(ps);
728 	if (hidden) {
729 		p = ppcg_start_block(p);
730 		p = ppcg_print_hidden_declarations(p, ps);
731 	}
732 
733 	context = isl_set_copy(ps->context);
734 	context = isl_set_from_params(context);
735 	schedule = isl_schedule_insert_context(schedule, context);
736 	if (options->debug->dump_final_schedule)
737 		isl_schedule_dump(schedule);
738 	p = print_scop(ps, schedule, p, options);
739 	if (hidden)
740 		p = ppcg_end_block(p);
741 
742 	return p;
743 }
744 
745 /* Generate CPU code for the scop "ps" and print the corresponding C code
746  * to "p", including variable declarations.
747  */
print_cpu(__isl_take isl_printer * p,struct ppcg_scop * ps,struct ppcg_options * options)748 __isl_give isl_printer *print_cpu(__isl_take isl_printer *p,
749 	struct ppcg_scop *ps, struct ppcg_options *options)
750 {
751 	isl_schedule *schedule;
752 
753 	schedule = isl_schedule_copy(ps->schedule);
754 	return print_cpu_with_schedule(p, ps, schedule, options);
755 }
756 
757 /* Generate CPU code for "scop" and print it to "p".
758  *
759  * First obtain a schedule for "scop" and then print code for "scop"
760  * using that schedule.
761  */
generate(__isl_take isl_printer * p,struct ppcg_scop * scop,struct ppcg_options * options)762 static __isl_give isl_printer *generate(__isl_take isl_printer *p,
763 	struct ppcg_scop *scop, struct ppcg_options *options)
764 {
765 	isl_schedule *schedule;
766 
767 	schedule = get_schedule(scop, options);
768 
769 	return print_cpu_with_schedule(p, scop, schedule, options);
770 }
771 
772 /* Wrapper around generate for use as a ppcg_transform callback.
773  */
print_cpu_wrap(__isl_take isl_printer * p,struct ppcg_scop * scop,void * user)774 static __isl_give isl_printer *print_cpu_wrap(__isl_take isl_printer *p,
775 	struct ppcg_scop *scop, void *user)
776 {
777 	struct ppcg_options *options = user;
778 
779 	return generate(p, scop, options);
780 }
781 
782 /* Transform the code in the file called "input" by replacing
783  * all scops by corresponding CPU code and write the results to a file
784  * called "output".
785  */
generate_cpu(isl_ctx * ctx,struct ppcg_options * options,const char * input,const char * output)786 int generate_cpu(isl_ctx *ctx, struct ppcg_options *options,
787 	const char *input, const char *output)
788 {
789 	FILE *output_file;
790 	int r;
791 
792 	output_file = get_output_file(input, output);
793 	if (!output_file)
794 		return -1;
795 
796 	r = ppcg_transform(ctx, input, output_file, options,
797 					&print_cpu_wrap, options);
798 
799 	fclose(output_file);
800 
801 	return r;
802 }
803