1 /*
2 * Copyright 2012 INRIA Paris-Rocquencourt
3 * Copyright 2012 Ecole Normale Superieure
4 *
5 * Use of this software is governed by the MIT license
6 *
7 * Written by Tobias Grosser, INRIA Paris-Rocquencourt,
8 * Domaine de Voluceau, Rocquenqourt, B.P. 105,
9 * 78153 Le Chesnay Cedex France
10 * and Sven Verdoolaege,
11 * Ecole Normale Superieure, 45 rue d'Ulm, 75230 Paris, France
12 */
13
14 #include <limits.h>
15 #include <stdio.h>
16 #include <string.h>
17
18 #include <isl/aff.h>
19 #include <isl/ctx.h>
20 #include <isl/flow.h>
21 #include <isl/map.h>
22 #include <isl/ast_build.h>
23 #include <isl/schedule.h>
24 #include <isl/schedule_node.h>
25 #include <pet.h>
26
27 #include "ppcg.h"
28 #include "ppcg_options.h"
29 #include "cpu.h"
30 #include "print.h"
31 #include "schedule.h"
32 #include "util.h"
33
34 /* Representation of a statement inside a generated AST.
35 *
36 * "stmt" refers to the original statement.
37 * "ref2expr" maps the reference identifier of each access in
38 * the statement to an AST expression that should be printed
39 * at the place of the access.
40 */
41 struct ppcg_stmt {
42 struct pet_stmt *stmt;
43
44 isl_id_to_ast_expr *ref2expr;
45 };
46
ppcg_stmt_free(void * user)47 static void ppcg_stmt_free(void *user)
48 {
49 struct ppcg_stmt *stmt = user;
50
51 if (!stmt)
52 return;
53
54 isl_id_to_ast_expr_free(stmt->ref2expr);
55
56 free(stmt);
57 }
58
59 /* Derive the output file name from the input file name.
60 * 'input' is the entire path of the input file. The output
61 * is the file name plus the additional extension.
62 *
63 * We will basically replace everything after the last point
64 * with '.ppcg.c'. This means file.c becomes file.ppcg.c
65 */
get_output_file(const char * input,const char * output)66 static FILE *get_output_file(const char *input, const char *output)
67 {
68 char name[PATH_MAX];
69 const char *ext;
70 const char ppcg_marker[] = ".ppcg";
71 int len;
72 FILE *file;
73
74 len = ppcg_extract_base_name(name, input);
75
76 strcpy(name + len, ppcg_marker);
77 ext = strrchr(input, '.');
78 strcpy(name + len + sizeof(ppcg_marker) - 1, ext ? ext : ".c");
79
80 if (!output)
81 output = name;
82
83 file = fopen(output, "w");
84 if (!file) {
85 fprintf(stderr, "Unable to open '%s' for writing\n", output);
86 return NULL;
87 }
88
89 return file;
90 }
91
92 /* Data used to annotate for nodes in the ast.
93 */
94 struct ast_node_userinfo {
95 /* The for node is an openmp parallel for node. */
96 int is_openmp;
97 };
98
99 /* Information used while building the ast.
100 */
101 struct ast_build_userinfo {
102 /* The current ppcg scop. */
103 struct ppcg_scop *scop;
104
105 /* Are we currently in a parallel for loop? */
106 int in_parallel_for;
107 };
108
109 /* Check if the current scheduling dimension is parallel.
110 *
111 * We check for parallelism by verifying that the loop does not carry any
112 * dependences.
113 * If the live_range_reordering option is set, then this currently
114 * includes the order dependences. In principle, non-zero order dependences
115 * could be allowed, but this would require privatization and/or expansion.
116 *
117 * Parallelism test: if the distance is zero in all outer dimensions, then it
118 * has to be zero in the current dimension as well.
119 * Implementation: first, translate dependences into time space, then force
120 * outer dimensions to be equal. If the distance is zero in the current
121 * dimension, then the loop is parallel.
122 * The distance is zero in the current dimension if it is a subset of a map
123 * with equal values for the current dimension.
124 */
ast_schedule_dim_is_parallel(__isl_keep isl_ast_build * build,struct ppcg_scop * scop)125 static int ast_schedule_dim_is_parallel(__isl_keep isl_ast_build *build,
126 struct ppcg_scop *scop)
127 {
128 isl_union_map *schedule, *deps;
129 isl_map *schedule_deps, *test;
130 isl_space *schedule_space;
131 unsigned i, dimension, is_parallel;
132
133 schedule = isl_ast_build_get_schedule(build);
134 schedule_space = isl_ast_build_get_schedule_space(build);
135
136 dimension = isl_space_dim(schedule_space, isl_dim_out) - 1;
137
138 deps = isl_union_map_copy(scop->dep_flow);
139 deps = isl_union_map_union(deps, isl_union_map_copy(scop->dep_false));
140 if (scop->options->live_range_reordering) {
141 isl_union_map *order = isl_union_map_copy(scop->dep_order);
142 deps = isl_union_map_union(deps, order);
143 }
144 deps = isl_union_map_apply_range(deps, isl_union_map_copy(schedule));
145 deps = isl_union_map_apply_domain(deps, schedule);
146
147 if (isl_union_map_is_empty(deps)) {
148 isl_union_map_free(deps);
149 isl_space_free(schedule_space);
150 return 1;
151 }
152
153 schedule_deps = isl_map_from_union_map(deps);
154
155 for (i = 0; i < dimension; i++)
156 schedule_deps = isl_map_equate(schedule_deps, isl_dim_out, i,
157 isl_dim_in, i);
158
159 test = isl_map_universe(isl_map_get_space(schedule_deps));
160 test = isl_map_equate(test, isl_dim_out, dimension, isl_dim_in,
161 dimension);
162 is_parallel = isl_map_is_subset(schedule_deps, test);
163
164 isl_space_free(schedule_space);
165 isl_map_free(test);
166 isl_map_free(schedule_deps);
167
168 return is_parallel;
169 }
170
171 /* Mark a for node openmp parallel, if it is the outermost parallel for node.
172 */
mark_openmp_parallel(__isl_keep isl_ast_build * build,struct ast_build_userinfo * build_info,struct ast_node_userinfo * node_info)173 static void mark_openmp_parallel(__isl_keep isl_ast_build *build,
174 struct ast_build_userinfo *build_info,
175 struct ast_node_userinfo *node_info)
176 {
177 if (build_info->in_parallel_for)
178 return;
179
180 if (ast_schedule_dim_is_parallel(build, build_info->scop)) {
181 build_info->in_parallel_for = 1;
182 node_info->is_openmp = 1;
183 }
184 }
185
186 /* Allocate an ast_node_info structure and initialize it with default values.
187 */
allocate_ast_node_userinfo()188 static struct ast_node_userinfo *allocate_ast_node_userinfo()
189 {
190 struct ast_node_userinfo *node_info;
191 node_info = (struct ast_node_userinfo *)
192 malloc(sizeof(struct ast_node_userinfo));
193 node_info->is_openmp = 0;
194 return node_info;
195 }
196
197 /* Free an ast_node_info structure.
198 */
free_ast_node_userinfo(void * ptr)199 static void free_ast_node_userinfo(void *ptr)
200 {
201 struct ast_node_userinfo *info;
202 info = (struct ast_node_userinfo *) ptr;
203 free(info);
204 }
205
206 /* This method is executed before the construction of a for node. It creates
207 * an isl_id that is used to annotate the subsequently generated ast for nodes.
208 *
209 * In this function we also run the following analyses:
210 *
211 * - Detection of openmp parallel loops
212 */
ast_build_before_for(__isl_keep isl_ast_build * build,void * user)213 static __isl_give isl_id *ast_build_before_for(
214 __isl_keep isl_ast_build *build, void *user)
215 {
216 isl_id *id;
217 struct ast_build_userinfo *build_info;
218 struct ast_node_userinfo *node_info;
219
220 build_info = (struct ast_build_userinfo *) user;
221 node_info = allocate_ast_node_userinfo();
222 id = isl_id_alloc(isl_ast_build_get_ctx(build), "", node_info);
223 id = isl_id_set_free_user(id, free_ast_node_userinfo);
224
225 mark_openmp_parallel(build, build_info, node_info);
226
227 return id;
228 }
229
230 /* This method is executed after the construction of a for node.
231 *
232 * It performs the following actions:
233 *
234 * - Reset the 'in_parallel_for' flag, as soon as we leave a for node,
235 * that is marked as openmp parallel.
236 *
237 */
ast_build_after_for(__isl_take isl_ast_node * node,__isl_keep isl_ast_build * build,void * user)238 static __isl_give isl_ast_node *ast_build_after_for(
239 __isl_take isl_ast_node *node, __isl_keep isl_ast_build *build,
240 void *user)
241 {
242 isl_id *id;
243 struct ast_build_userinfo *build_info;
244 struct ast_node_userinfo *info;
245
246 id = isl_ast_node_get_annotation(node);
247 info = isl_id_get_user(id);
248
249 if (info && info->is_openmp) {
250 build_info = (struct ast_build_userinfo *) user;
251 build_info->in_parallel_for = 0;
252 }
253
254 isl_id_free(id);
255
256 return node;
257 }
258
259 /* Find the element in scop->stmts that has the given "id".
260 */
find_stmt(struct ppcg_scop * scop,__isl_keep isl_id * id)261 static struct pet_stmt *find_stmt(struct ppcg_scop *scop, __isl_keep isl_id *id)
262 {
263 int i;
264
265 for (i = 0; i < scop->pet->n_stmt; ++i) {
266 struct pet_stmt *stmt = scop->pet->stmts[i];
267 isl_id *id_i;
268
269 id_i = isl_set_get_tuple_id(stmt->domain);
270 isl_id_free(id_i);
271
272 if (id_i == id)
273 return stmt;
274 }
275
276 isl_die(isl_id_get_ctx(id), isl_error_internal,
277 "statement not found", return NULL);
278 }
279
280 /* Print a user statement in the generated AST.
281 * The ppcg_stmt has been attached to the node in at_each_domain.
282 */
print_user(__isl_take isl_printer * p,__isl_take isl_ast_print_options * print_options,__isl_keep isl_ast_node * node,void * user)283 static __isl_give isl_printer *print_user(__isl_take isl_printer *p,
284 __isl_take isl_ast_print_options *print_options,
285 __isl_keep isl_ast_node *node, void *user)
286 {
287 struct ppcg_stmt *stmt;
288 isl_id *id;
289
290 id = isl_ast_node_get_annotation(node);
291 stmt = isl_id_get_user(id);
292 isl_id_free(id);
293
294 p = pet_stmt_print_body(stmt->stmt, p, stmt->ref2expr);
295
296 isl_ast_print_options_free(print_options);
297
298 return p;
299 }
300
301
302 /* Print a for loop node as an openmp parallel loop.
303 *
304 * To print an openmp parallel loop we print a normal for loop, but add
305 * "#pragma openmp parallel for" in front.
306 *
307 * Variables that are declared within the body of this for loop are
308 * automatically openmp 'private'. Iterators declared outside of the
309 * for loop are automatically openmp 'shared'. As ppcg declares all iterators
310 * at the position where they are assigned, there is no need to explicitly mark
311 * variables. Their automatically assigned type is already correct.
312 *
313 * This function only generates valid OpenMP code, if the ast was generated
314 * with the 'atomic-bounds' option enabled.
315 *
316 */
print_for_with_openmp(__isl_keep isl_ast_node * node,__isl_take isl_printer * p,__isl_take isl_ast_print_options * print_options)317 static __isl_give isl_printer *print_for_with_openmp(
318 __isl_keep isl_ast_node *node, __isl_take isl_printer *p,
319 __isl_take isl_ast_print_options *print_options)
320 {
321 p = isl_printer_start_line(p);
322 p = isl_printer_print_str(p, "#pragma omp parallel for");
323 p = isl_printer_end_line(p);
324
325 p = isl_ast_node_for_print(node, p, print_options);
326
327 return p;
328 }
329
330 /* Print a for node.
331 *
332 * Depending on how the node is annotated, we either print a normal
333 * for node or an openmp parallel for node.
334 */
print_for(__isl_take isl_printer * p,__isl_take isl_ast_print_options * print_options,__isl_keep isl_ast_node * node,void * user)335 static __isl_give isl_printer *print_for(__isl_take isl_printer *p,
336 __isl_take isl_ast_print_options *print_options,
337 __isl_keep isl_ast_node *node, void *user)
338 {
339 isl_id *id;
340 int openmp;
341
342 openmp = 0;
343 id = isl_ast_node_get_annotation(node);
344
345 if (id) {
346 struct ast_node_userinfo *info;
347
348 info = (struct ast_node_userinfo *) isl_id_get_user(id);
349 if (info && info->is_openmp)
350 openmp = 1;
351 }
352
353 if (openmp)
354 p = print_for_with_openmp(node, p, print_options);
355 else
356 p = isl_ast_node_for_print(node, p, print_options);
357
358 isl_id_free(id);
359
360 return p;
361 }
362
363 /* Index transformation callback for pet_stmt_build_ast_exprs.
364 *
365 * "index" expresses the array indices in terms of statement iterators
366 * "iterator_map" expresses the statement iterators in terms of
367 * AST loop iterators.
368 *
369 * The result expresses the array indices in terms of
370 * AST loop iterators.
371 */
pullback_index(__isl_take isl_multi_pw_aff * index,__isl_keep isl_id * id,void * user)372 static __isl_give isl_multi_pw_aff *pullback_index(
373 __isl_take isl_multi_pw_aff *index, __isl_keep isl_id *id, void *user)
374 {
375 isl_pw_multi_aff *iterator_map = user;
376
377 iterator_map = isl_pw_multi_aff_copy(iterator_map);
378 return isl_multi_pw_aff_pullback_pw_multi_aff(index, iterator_map);
379 }
380
381 /* Transform the accesses in the statement associated to the domain
382 * called by "node" to refer to the AST loop iterators, construct
383 * corresponding AST expressions using "build",
384 * collect them in a ppcg_stmt and annotate the node with the ppcg_stmt.
385 */
at_each_domain(__isl_take isl_ast_node * node,__isl_keep isl_ast_build * build,void * user)386 static __isl_give isl_ast_node *at_each_domain(__isl_take isl_ast_node *node,
387 __isl_keep isl_ast_build *build, void *user)
388 {
389 struct ppcg_scop *scop = user;
390 isl_ast_expr *expr, *arg;
391 isl_ctx *ctx;
392 isl_id *id;
393 isl_map *map;
394 isl_pw_multi_aff *iterator_map;
395 struct ppcg_stmt *stmt;
396
397 ctx = isl_ast_node_get_ctx(node);
398 stmt = isl_calloc_type(ctx, struct ppcg_stmt);
399 if (!stmt)
400 goto error;
401
402 expr = isl_ast_node_user_get_expr(node);
403 arg = isl_ast_expr_get_op_arg(expr, 0);
404 isl_ast_expr_free(expr);
405 id = isl_ast_expr_get_id(arg);
406 isl_ast_expr_free(arg);
407 stmt->stmt = find_stmt(scop, id);
408 isl_id_free(id);
409 if (!stmt->stmt)
410 goto error;
411
412 map = isl_map_from_union_map(isl_ast_build_get_schedule(build));
413 map = isl_map_reverse(map);
414 iterator_map = isl_pw_multi_aff_from_map(map);
415 stmt->ref2expr = pet_stmt_build_ast_exprs(stmt->stmt, build,
416 &pullback_index, iterator_map, NULL, NULL);
417 isl_pw_multi_aff_free(iterator_map);
418
419 id = isl_id_alloc(isl_ast_node_get_ctx(node), NULL, stmt);
420 id = isl_id_set_free_user(id, &ppcg_stmt_free);
421 return isl_ast_node_set_annotation(node, id);
422 error:
423 ppcg_stmt_free(stmt);
424 return isl_ast_node_free(node);
425 }
426
427 /* Set *depth (initialized to 0 by the caller) to the maximum
428 * of the schedule depths of the leaf nodes for which this function is called.
429 */
update_depth(__isl_keep isl_schedule_node * node,void * user)430 static isl_bool update_depth(__isl_keep isl_schedule_node *node, void *user)
431 {
432 int *depth = user;
433 int node_depth;
434
435 if (isl_schedule_node_get_type(node) != isl_schedule_node_leaf)
436 return isl_bool_true;
437 node_depth = isl_schedule_node_get_schedule_depth(node);
438 if (node_depth > *depth)
439 *depth = node_depth;
440
441 return isl_bool_false;
442 }
443
444 /* This function is called for each node in a CPU AST.
445 * In case of a user node, print the macro definitions required
446 * for printing the AST expressions in the annotation, if any.
447 * For other nodes, return true such that descendants are also
448 * visited.
449 *
450 * In particular, print the macro definitions needed for the substitutions
451 * of the original user statements.
452 */
at_node(__isl_keep isl_ast_node * node,void * user)453 static isl_bool at_node(__isl_keep isl_ast_node *node, void *user)
454 {
455 struct ppcg_stmt *stmt;
456 isl_id *id;
457 isl_printer **p = user;
458
459 if (isl_ast_node_get_type(node) != isl_ast_node_user)
460 return isl_bool_true;
461
462 id = isl_ast_node_get_annotation(node);
463 stmt = isl_id_get_user(id);
464 isl_id_free(id);
465
466 if (!stmt)
467 return isl_bool_error;
468
469 *p = ppcg_print_body_macros(*p, stmt->ref2expr);
470 if (!*p)
471 return isl_bool_error;
472
473 return isl_bool_false;
474 }
475
476 /* Print the required macros for the CPU AST "node" to "p",
477 * including those needed for the user statements inside the AST.
478 */
cpu_print_macros(__isl_take isl_printer * p,__isl_keep isl_ast_node * node)479 static __isl_give isl_printer *cpu_print_macros(__isl_take isl_printer *p,
480 __isl_keep isl_ast_node *node)
481 {
482 if (isl_ast_node_foreach_descendant_top_down(node, &at_node, &p) < 0)
483 return isl_printer_free(p);
484 p = ppcg_print_macros(p, node);
485 return p;
486 }
487
488 /* Code generate the scop 'scop' using "schedule"
489 * and print the corresponding C code to 'p'.
490 */
print_scop(struct ppcg_scop * scop,__isl_take isl_schedule * schedule,__isl_take isl_printer * p,struct ppcg_options * options)491 static __isl_give isl_printer *print_scop(struct ppcg_scop *scop,
492 __isl_take isl_schedule *schedule, __isl_take isl_printer *p,
493 struct ppcg_options *options)
494 {
495 isl_ctx *ctx = isl_printer_get_ctx(p);
496 isl_ast_build *build;
497 isl_ast_print_options *print_options;
498 isl_ast_node *tree;
499 isl_id_list *iterators;
500 struct ast_build_userinfo build_info;
501 int depth;
502
503 depth = 0;
504 if (isl_schedule_foreach_schedule_node_top_down(schedule, &update_depth,
505 &depth) < 0)
506 goto error;
507
508 build = isl_ast_build_alloc(ctx);
509 iterators = ppcg_scop_generate_names(scop, depth, "c");
510 build = isl_ast_build_set_iterators(build, iterators);
511 build = isl_ast_build_set_at_each_domain(build, &at_each_domain, scop);
512
513 if (options->openmp) {
514 build_info.scop = scop;
515 build_info.in_parallel_for = 0;
516
517 build = isl_ast_build_set_before_each_for(build,
518 &ast_build_before_for,
519 &build_info);
520 build = isl_ast_build_set_after_each_for(build,
521 &ast_build_after_for,
522 &build_info);
523 }
524
525 tree = isl_ast_build_node_from_schedule(build, schedule);
526 isl_ast_build_free(build);
527
528 print_options = isl_ast_print_options_alloc(ctx);
529 print_options = isl_ast_print_options_set_print_user(print_options,
530 &print_user, NULL);
531
532 print_options = isl_ast_print_options_set_print_for(print_options,
533 &print_for, NULL);
534
535 p = cpu_print_macros(p, tree);
536 p = isl_ast_node_print(tree, p, print_options);
537
538 isl_ast_node_free(tree);
539
540 return p;
541 error:
542 isl_schedule_free(schedule);
543 isl_printer_free(p);
544 return NULL;
545 }
546
547 /* Tile the band node "node" with tile sizes "sizes" and
548 * mark all members of the resulting tile node as "atomic".
549 */
tile(__isl_take isl_schedule_node * node,__isl_take isl_multi_val * sizes)550 static __isl_give isl_schedule_node *tile(__isl_take isl_schedule_node *node,
551 __isl_take isl_multi_val *sizes)
552 {
553 node = isl_schedule_node_band_tile(node, sizes);
554 node = ppcg_set_schedule_node_type(node, isl_ast_loop_atomic);
555
556 return node;
557 }
558
559 /* Tile "node", if it is a band node with at least 2 members.
560 * The tile sizes are set from the "tile_size" option.
561 */
tile_band(__isl_take isl_schedule_node * node,void * user)562 static __isl_give isl_schedule_node *tile_band(
563 __isl_take isl_schedule_node *node, void *user)
564 {
565 struct ppcg_scop *scop = user;
566 int n;
567 isl_space *space;
568 isl_multi_val *sizes;
569
570 if (isl_schedule_node_get_type(node) != isl_schedule_node_band)
571 return node;
572
573 n = isl_schedule_node_band_n_member(node);
574 if (n <= 1)
575 return node;
576
577 space = isl_schedule_node_band_get_space(node);
578 sizes = ppcg_multi_val_from_int(space, scop->options->tile_size);
579
580 return tile(node, sizes);
581 }
582
583 /* Construct schedule constraints from the dependences in ps
584 * for the purpose of computing a schedule for a CPU.
585 *
586 * The proximity constraints are set to the flow dependences.
587 *
588 * If live-range reordering is allowed then the conditional validity
589 * constraints are set to the order dependences with the flow dependences
590 * as condition. That is, a live-range (flow dependence) will be either
591 * local to an iteration of a band or all adjacent order dependences
592 * will be respected by the band.
593 * The validity constraints are set to the union of the flow dependences
594 * and the forced dependences, while the coincidence constraints
595 * are set to the union of the flow dependences, the forced dependences and
596 * the order dependences.
597 *
598 * If live-range reordering is not allowed, then both the validity
599 * and the coincidence constraints are set to the union of the flow
600 * dependences and the false dependences.
601 *
602 * Note that the coincidence constraints are only set when the "openmp"
603 * options is set. Even though the way openmp pragmas are introduced
604 * does not rely on the coincident property of the schedule band members,
605 * the coincidence constraints do affect the way the schedule is constructed,
606 * such that more schedule dimensions should be detected as parallel
607 * by ast_schedule_dim_is_parallel.
608 * Since the order dependences are also taken into account by
609 * ast_schedule_dim_is_parallel, they are also added to
610 * the coincidence constraints. If the openmp handling learns
611 * how to privatize some memory, then the corresponding order
612 * dependences can be removed from the coincidence constraints.
613 */
construct_cpu_schedule_constraints(struct ppcg_scop * ps)614 static __isl_give isl_schedule_constraints *construct_cpu_schedule_constraints(
615 struct ppcg_scop *ps)
616 {
617 isl_schedule_constraints *sc;
618 isl_union_map *validity, *coincidence;
619
620 sc = isl_schedule_constraints_on_domain(isl_union_set_copy(ps->domain));
621 if (ps->options->live_range_reordering) {
622 sc = isl_schedule_constraints_set_conditional_validity(sc,
623 isl_union_map_copy(ps->tagged_dep_flow),
624 isl_union_map_copy(ps->tagged_dep_order));
625 validity = isl_union_map_copy(ps->dep_flow);
626 validity = isl_union_map_union(validity,
627 isl_union_map_copy(ps->dep_forced));
628 if (ps->options->openmp) {
629 coincidence = isl_union_map_copy(validity);
630 coincidence = isl_union_map_union(coincidence,
631 isl_union_map_copy(ps->dep_order));
632 }
633 } else {
634 validity = isl_union_map_copy(ps->dep_flow);
635 validity = isl_union_map_union(validity,
636 isl_union_map_copy(ps->dep_false));
637 if (ps->options->openmp)
638 coincidence = isl_union_map_copy(validity);
639 }
640 if (ps->options->openmp)
641 sc = isl_schedule_constraints_set_coincidence(sc, coincidence);
642 sc = isl_schedule_constraints_set_validity(sc, validity);
643 sc = isl_schedule_constraints_set_proximity(sc,
644 isl_union_map_copy(ps->dep_flow));
645
646 return sc;
647 }
648
649 /* Compute a schedule for the scop "ps".
650 *
651 * First derive the appropriate schedule constraints from the dependences
652 * in "ps" and then compute a schedule from those schedule constraints,
653 * possibly grouping statement instances based on the input schedule.
654 */
compute_cpu_schedule(struct ppcg_scop * ps)655 static __isl_give isl_schedule *compute_cpu_schedule(struct ppcg_scop *ps)
656 {
657 isl_schedule_constraints *sc;
658 isl_schedule *schedule;
659
660 if (!ps)
661 return NULL;
662
663 sc = construct_cpu_schedule_constraints(ps);
664
665 if (ps->options->debug->dump_schedule_constraints)
666 isl_schedule_constraints_dump(sc);
667 schedule = ppcg_compute_schedule(sc, ps->schedule, ps->options);
668
669 return schedule;
670 }
671
672 /* Compute a new schedule to the scop "ps" if the reschedule option is set.
673 * Otherwise, return a copy of the original schedule.
674 */
optionally_compute_schedule(void * user)675 static __isl_give isl_schedule *optionally_compute_schedule(void *user)
676 {
677 struct ppcg_scop *ps = user;
678
679 if (!ps)
680 return NULL;
681 if (!ps->options->reschedule)
682 return isl_schedule_copy(ps->schedule);
683 return compute_cpu_schedule(ps);
684 }
685
686 /* Compute a schedule based on the dependences in "ps" and
687 * tile it if requested by the user.
688 */
get_schedule(struct ppcg_scop * ps,struct ppcg_options * options)689 static __isl_give isl_schedule *get_schedule(struct ppcg_scop *ps,
690 struct ppcg_options *options)
691 {
692 isl_ctx *ctx;
693 isl_schedule *schedule;
694
695 if (!ps)
696 return NULL;
697
698 ctx = isl_union_set_get_ctx(ps->domain);
699 schedule = ppcg_get_schedule(ctx, options,
700 &optionally_compute_schedule, ps);
701 if (ps->options->tile)
702 schedule = isl_schedule_map_schedule_node_bottom_up(schedule,
703 &tile_band, ps);
704
705 return schedule;
706 }
707
708 /* Generate CPU code for the scop "ps" using "schedule" and
709 * print the corresponding C code to "p", including variable declarations.
710 */
print_cpu_with_schedule(__isl_take isl_printer * p,struct ppcg_scop * ps,__isl_take isl_schedule * schedule,struct ppcg_options * options)711 static __isl_give isl_printer *print_cpu_with_schedule(
712 __isl_take isl_printer *p, struct ppcg_scop *ps,
713 __isl_take isl_schedule *schedule, struct ppcg_options *options)
714 {
715 int hidden;
716 isl_set *context;
717
718 p = isl_printer_start_line(p);
719 p = isl_printer_print_str(p, "/* ppcg generated CPU code */");
720 p = isl_printer_end_line(p);
721
722 p = isl_printer_start_line(p);
723 p = isl_printer_end_line(p);
724
725 p = ppcg_set_macro_names(p);
726 p = ppcg_print_exposed_declarations(p, ps);
727 hidden = ppcg_scop_any_hidden_declarations(ps);
728 if (hidden) {
729 p = ppcg_start_block(p);
730 p = ppcg_print_hidden_declarations(p, ps);
731 }
732
733 context = isl_set_copy(ps->context);
734 context = isl_set_from_params(context);
735 schedule = isl_schedule_insert_context(schedule, context);
736 if (options->debug->dump_final_schedule)
737 isl_schedule_dump(schedule);
738 p = print_scop(ps, schedule, p, options);
739 if (hidden)
740 p = ppcg_end_block(p);
741
742 return p;
743 }
744
745 /* Generate CPU code for the scop "ps" and print the corresponding C code
746 * to "p", including variable declarations.
747 */
print_cpu(__isl_take isl_printer * p,struct ppcg_scop * ps,struct ppcg_options * options)748 __isl_give isl_printer *print_cpu(__isl_take isl_printer *p,
749 struct ppcg_scop *ps, struct ppcg_options *options)
750 {
751 isl_schedule *schedule;
752
753 schedule = isl_schedule_copy(ps->schedule);
754 return print_cpu_with_schedule(p, ps, schedule, options);
755 }
756
757 /* Generate CPU code for "scop" and print it to "p".
758 *
759 * First obtain a schedule for "scop" and then print code for "scop"
760 * using that schedule.
761 */
generate(__isl_take isl_printer * p,struct ppcg_scop * scop,struct ppcg_options * options)762 static __isl_give isl_printer *generate(__isl_take isl_printer *p,
763 struct ppcg_scop *scop, struct ppcg_options *options)
764 {
765 isl_schedule *schedule;
766
767 schedule = get_schedule(scop, options);
768
769 return print_cpu_with_schedule(p, scop, schedule, options);
770 }
771
772 /* Wrapper around generate for use as a ppcg_transform callback.
773 */
print_cpu_wrap(__isl_take isl_printer * p,struct ppcg_scop * scop,void * user)774 static __isl_give isl_printer *print_cpu_wrap(__isl_take isl_printer *p,
775 struct ppcg_scop *scop, void *user)
776 {
777 struct ppcg_options *options = user;
778
779 return generate(p, scop, options);
780 }
781
782 /* Transform the code in the file called "input" by replacing
783 * all scops by corresponding CPU code and write the results to a file
784 * called "output".
785 */
generate_cpu(isl_ctx * ctx,struct ppcg_options * options,const char * input,const char * output)786 int generate_cpu(isl_ctx *ctx, struct ppcg_options *options,
787 const char *input, const char *output)
788 {
789 FILE *output_file;
790 int r;
791
792 output_file = get_output_file(input, output);
793 if (!output_file)
794 return -1;
795
796 r = ppcg_transform(ctx, input, output_file, options,
797 &print_cpu_wrap, options);
798
799 fclose(output_file);
800
801 return r;
802 }
803