1 /*
2 * Copyright (c) 1994-2019, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *
16 */
17
18 /**
19 \file
20 \brief Fortran communications optimizer module
21 */
22
23 #include "gbldefs.h"
24 #include "global.h"
25 #include "error.h"
26 #include "comm.h"
27 #include "symtab.h"
28 #include "dtypeutl.h"
29 #include "soc.h"
30 #include "semant.h"
31 #include "ast.h"
32 #include "gramtk.h"
33 #include "transfrm.h"
34 #include "extern.h"
35 #include "commopt.h"
36 #include "ccffinfo.h"
37 #include "optimize.h"
38 #include "nme.h"
39 #include "rte.h"
40 #include "hlvect.h"
41
42 extern int rewrite_opfields;
43
44 static void commopt(void);
45 static void shmem_opt1(void);
46 static void shmem_opt2(void);
47 static void shmemopt1(void);
48 static void shmemopt2(void);
49 static void comm_optimize_init(void);
50 static LOGICAL same_forall_bnds(int, int, int);
51 static LOGICAL is_fusable(int, int, int);
52 static LOGICAL smp_conflict(int, int);
53 static LOGICAL is_in_block(int, int);
54 static LOGICAL is_different_scalar_mask(int, int);
55 static LOGICAL Conflict(int, int, int, LOGICAL, int, int);
56 static LOGICAL is_branch_between(int, int);
57 static LOGICAL is_contains_ast_between(int, int, int);
58 static LOGICAL is_same_align_shape(int, int, int, int);
59
60 static void comm_optimize_end(void);
61 static void eliminate_rt(int);
62 static void eliminate_bnds(int, int, int, int);
63 static void eliminate_alloc(int, int, int, int);
64 static void eliminate_sect(int, int, int, int);
65 static void eliminate_copy(int, int, int, int);
66 static void eliminate_gather(int, int, int, int);
67 static void eliminate_shift(int, int, int, int);
68 static void eliminate_start(int, int, int, int);
69 static void eliminate_get_scalar(void);
70 static void fuse_forall(int);
71 static LOGICAL is_same_descr_for_bnds(int, int);
72 static LOGICAL Conflict_(int);
73 static LOGICAL is_same_idx(int, int);
74 static LOGICAL is_dominator_fg(int, int);
75 static LOGICAL must_follow(int, int, int, int);
76 static int find_lp(int);
77 static void init_optsum(void);
78 #if DEBUG
79 static void optsummary(void);
80 #endif
81 static void alloc2ast(void);
82 static void opt_allocate(void);
83 static LOGICAL is_same_def(int, int);
84 static LOGICAL is_safe_copy(int);
85 static LOGICAL is_allocatable_assign(int ast);
86 static int propagate_bound(LITEMF *defs_to_propagate, int bound);
87 static void rewrite_all_shape(LITEMF *);
88 static void decompose_expression(int, int[], int, int *, int *);
89 static LOGICAL is_avail_expr_with_list(int, int, int, int, int, int[], int);
90 static void put_forall_fg(void);
91 static LOGICAL independent_commtype(int, int);
92 static LOGICAL is_olap_conflict(int, int);
93 static LOGICAL is_pcalls(int, int);
94 static void forall_make_same_idx(int);
95 static void eliminate_ownerproc(void);
96 static void gather_ownerproc(int atyp, int lp);
97 static void share_ownerproc(void);
98 static void transform_ownerproc(void);
99 static void barrier_opt(void);
100 static void opt_anti_barrier(void);
101 static void opt_flow_barrier(void);
102 static LOGICAL can_reach_no_barrier(int fg1, int fg2);
103 static LOGICAL _can_reach(int fg1, int fg2);
104 static void omem_barrier(void);
105
106 #if DEBUG
107 static int dodebug = 0;
108 #define TR(str) \
109 if (dodebug) { \
110 fprintf(gbl.dbgfil, str); \
111 fflush(gbl.dbgfil); \
112 }
113 #define TR1(str) \
114 if (DBGBIT(0, 512)) \
115 dump_stg_stat(str)
116 #else
117 #define TR(str)
118 #define TR1(str)
119 #endif
120
121 INT *lpsort;
122 FTB ftb;
123 OPTSUM optsum;
124
125 static void
selection_sort(void)126 selection_sort(void)
127 {
128 int i, j;
129 int hd, hd1;
130 int dom;
131 int t;
132 int ast, ast1;
133 int std, std1;
134
135 lpsort = (INT *)getitem(FORALL_AREA, (opt.nloops + 1) * sizeof(INT));
136 for (i = 1; i <= opt.nloops; ++i)
137 lpsort[i] = i;
138
139 for (i = 1; i <= opt.nloops - 1; ++i) {
140 dom = i;
141 for (j = i + 1; j <= opt.nloops; ++j) {
142 hd = LP_HEAD(lpsort[dom]);
143 hd1 = LP_HEAD(lpsort[j]);
144 std = FG_STDFIRST(hd);
145 ast = STD_AST(std);
146 std1 = FG_STDFIRST(hd1);
147 ast1 = STD_AST(std1);
148 if (is_dominator(hd1, hd))
149 dom = j;
150 }
151 t = lpsort[dom];
152 lpsort[dom] = lpsort[i];
153 lpsort[i] = t;
154 }
155 }
156
157 void
comm_optimize_post(void)158 comm_optimize_post(void)
159 {
160 alloc2ast();
161 comm_optimize_init();
162 flowgraph(); /* build the flowgraph for the function */
163 #if DEBUG
164 if (DBGBIT(35, 1))
165 dump_flowgraph();
166 #endif
167
168 findloop(HLOPT_ALL); /* find the loops */
169
170 rewrite_opfields = 0x3; /* copy opt1 and opt2 fields */
171 flow(); /* do flow analysis on the loops */
172 rewrite_opfields = 0; /* reset */
173
174 #if DEBUG
175 if (DBGBIT(35, 4)) {
176 dump_flowgraph();
177 dump_loops();
178 }
179 #endif
180
181 commopt();
182
183 #if DEBUG
184 if (DBGBIT(59, 1))
185 optsummary();
186 #endif
187 comm_optimize_end();
188 }
189
190 void
comm_optimize_pre(void)191 comm_optimize_pre(void)
192 {
193
194 comm_optimize_init();
195 flowgraph(); /* build the flowgraph for the function */
196 #if DEBUG
197 if (DBGBIT(35, 1))
198 dump_flowgraph();
199 #endif
200
201 findloop(HLOPT_ALL); /* find the loops */
202
203 flow(); /* do flow analysis on the loops */
204
205 #if DEBUG
206 if (DBGBIT(35, 4)) {
207 dump_flowgraph();
208 dump_loops();
209 }
210 #endif
211 selection_sort();
212 fuse_forall(0);
213 comm_optimize_end();
214 }
215
216 void
forall_init(void)217 forall_init(void)
218 {
219 int std;
220 int ast;
221 int parallel_depth;
222 int task_depth;
223
224 init_ftb();
225 init_dtb();
226 init_optsum();
227 init_bnd();
228
229 for (std = STD_NEXT(0); std;) {
230 ast = STD_AST(std);
231 if (A_TYPEG(ast) == A_FORALL) {
232 forall_opt1(ast);
233 }
234 std = STD_NEXT(std);
235 }
236 /* put calls into calls table */
237 parallel_depth = 0;
238 task_depth = 0;
239 for (std = STD_NEXT(0); std;) {
240 ast = STD_AST(std);
241 switch (A_TYPEG(ast)) {
242 case A_FORALL:
243 put_forall_pcalls(std);
244 forall_make_same_idx(std);
245 break;
246 case A_MP_PARALLEL:
247 ++parallel_depth;
248 /*symutl.sc = SC_PRIVATE;*/
249 set_descriptor_sc(SC_PRIVATE);
250 break;
251 case A_MP_ENDPARALLEL:
252 --parallel_depth;
253 if (parallel_depth == 0 && task_depth == 0) {
254 /*symutl.sc = SC_LOCAL;*/
255 set_descriptor_sc(SC_LOCAL);
256 }
257 break;
258 case A_MP_TASK:
259 case A_MP_TASKLOOP:
260 ++task_depth;
261 set_descriptor_sc(SC_PRIVATE);
262 break;
263 case A_MP_ENDTASK:
264 case A_MP_ETASKLOOP:
265 --task_depth;
266 if (parallel_depth == 0 && task_depth == 0) {
267 set_descriptor_sc(SC_LOCAL);
268 }
269 break;
270 default:
271 break;
272 }
273 std = STD_NEXT(std);
274 }
275 }
276
277 static void
init_optsum(void)278 init_optsum(void)
279 {
280 optsum.fuse = 0;
281 optsum.bnd = 0;
282 optsum.alloc = 0;
283 optsum.sect = 0;
284 optsum.copysection = 0;
285 optsum.gatherx = 0;
286 optsum.scatterx = 0;
287 optsum.shift = 0;
288 optsum.start = 0;
289 }
290
291 #if DEBUG
292 static void
optsummary(void)293 optsummary(void)
294 {
295
296 static char msg0[] = "===== comm opt summary for %s ====\n";
297 static char msg1[] = "%5d loops fused\n";
298 static char msg2[] = "%5d pghpf_localize_bnd calls eliminated\n";
299 static char msg3[] = "%5d allocates eliminated\n";
300 static char msg4[] = "%5d pghpf_sect calls eliminated\n";
301 static char msg5[] = "%5d pghpf_comm_copy calls eliminated\n";
302 static char msg6[] = "%5d pghpf_comm_gatherx calls eliminated\n";
303 static char msg7[] = "%5d pghpf_comm_scatterx calls eliminated\n";
304 static char msg8[] = "%5d pghpf_comm_shift calls eliminated\n";
305 static char msg9[] = "%5d pghpf_comm_start calls eliminated\n";
306 FILE *fil;
307
308 fil = gbl.dbgfil;
309 if (fil == NULL)
310 fil = stderr;
311 fprintf(fil, msg0, SYMNAME(gbl.currsub));
312 fprintf(fil, msg1, optsum.fuse);
313 fprintf(fil, msg2, optsum.bnd);
314 fprintf(fil, msg3, optsum.alloc);
315 fprintf(fil, msg4, optsum.sect);
316 fprintf(fil, msg5, optsum.copysection);
317 fprintf(fil, msg6, optsum.gatherx);
318 fprintf(fil, msg7, optsum.scatterx);
319 fprintf(fil, msg8, optsum.shift);
320 fprintf(fil, msg9, optsum.start);
321 }
322 #endif
323
324 static void
commopt(void)325 commopt(void)
326 {
327 int i;
328 int forall;
329 int std;
330 selection_sort();
331 put_forall_fg();
332
333 if (!XBIT(47, 0x2000))
334 eliminate_rt(1);
335 if (!XBIT(47, 0x4000))
336 eliminate_rt(2);
337 if (!XBIT(47, 0x8000))
338 eliminate_rt(3);
339 if (!XBIT(47, 0x10000))
340 eliminate_rt(4);
341 if (!XBIT(47, 0x20000))
342 eliminate_get_scalar();
343 }
344
345 static void
fuse_forall(int nested)346 fuse_forall(int nested)
347 {
348 int i, j, k, l;
349 int std, std1;
350 int forall, forall1;
351 int nd, nd1;
352 int rt, rt1;
353 int rt_std, rt1_std;
354 int hd, hd1;
355 int type, type1;
356 int ii, jj;
357 int expr, expr1;
358 int newexpr1, newrhs1;
359 int asn, asn1;
360 int rhs, rhs1;
361 int lhs, lhs1;
362 int nidx, nidx1;
363 int list, list1, listp;
364 int isptr;
365 int fg, fg1;
366 int number_of_try;
367
368 for (ii = 1; ii <= opt.nloops; ++ii) {
369 i = lpsort[ii];
370 hd = LP_HEAD(i); /*the flow graph node which is the loop head*/
371 fg = LP_FG(i);
372 std = FG_STDFIRST(hd);
373 forall = STD_AST(std);
374 if (LP_FORALL(i)) {
375
376 hd = LP_HEAD(i); /*the flow graph node which is the loop head*/
377 fg = LP_FG(i);
378 std = FG_STDFIRST(hd);
379 forall = STD_AST(std);
380 nd = A_OPT1G(forall);
381 if (FT_NFUSE(nd, nested) >= MAXFUSE)
382 continue;
383 if (FT_FUSED(nd))
384 continue;
385 number_of_try = 0;
386 for (jj = ii + 1; jj <= opt.nloops; ++jj) {
387 j = lpsort[jj];
388 if (LP_PARENT(i) != LP_PARENT(j))
389 continue;
390 if (FT_NFUSE(nd, nested) >= MAXFUSE)
391 break;
392 if (number_of_try > 25)
393 continue;
394 if (LP_FORALL(j)) {
395 /*the flow graph node which is the loop head */
396 hd1 = LP_HEAD(j);
397 fg1 = LP_FG(j);
398 if (!is_dominator(hd, hd1))
399 continue;
400 std1 = FG_STDFIRST(hd1);
401 if (STD_ACCEL(std) != STD_ACCEL(std1))
402 continue;
403 forall1 = STD_AST(std1);
404 nd1 = A_OPT1G(forall1);
405 if (FT_FUSED(nd1))
406 continue;
407 if (!same_forall_bnds(i, j, nested))
408 continue;
409 if (!is_same_idx(std, std1))
410 continue;
411 number_of_try++;
412 if (!is_fusable(i, j, nested))
413 continue;
414
415 FT_FUSELP(nd, nested, FT_NFUSE(nd, nested)) = j;
416 FT_FUSEDSTD(nd, nested, FT_NFUSE(nd, nested)) = std1;
417 FT_NFUSE(nd, nested)++;
418 FT_FUSED(nd1) = 1;
419 FT_HEADER(nd1) = FT_HEADER(nd);
420 FT_FUSED(nd) = 1;
421 optsum.fuse++;
422 }
423 }
424 }
425 }
426 }
427
428 static LOGICAL
is_same_idx(int std,int std1)429 is_same_idx(int std, int std1)
430 {
431 int idx[7], idx1[7];
432 int list, list1, listp;
433 int forall, forall1;
434 int nidx, nidx1;
435 int isptr, isptr1;
436 int i;
437 int nd;
438 int oldast, newast;
439 int newforall, newforall1;
440 int af, st, nc;
441
442 forall = STD_AST(std);
443 list = A_LISTG(forall);
444 nidx = 0;
445 for (listp = list; listp != 0; listp = ASTLI_NEXT(listp)) {
446 idx[nidx] = listp;
447 nidx++;
448 }
449
450 forall1 = STD_AST(std1);
451 list1 = A_LISTG(forall1);
452 nidx1 = 0;
453 for (listp = list1; listp != 0; listp = ASTLI_NEXT(listp)) {
454 idx1[nidx1] = listp;
455 nidx1++;
456 }
457
458 if (nidx != nidx1)
459 return FALSE;
460
461 for (i = 0; i < nidx; i++) {
462 isptr = ASTLI_SPTR(idx[i]);
463 isptr1 = ASTLI_SPTR(idx1[i]);
464 if (isptr != isptr1)
465 return FALSE;
466 }
467 return TRUE;
468 }
469
470 LOGICAL
same_forall_size(int lp1,int lp2,int nested)471 same_forall_size(int lp1, int lp2, int nested)
472 {
473 int idx1[7], idx2[7];
474 int itriple1, itriple2;
475 int lb1, ub1, st1;
476 int lb2, ub2, st2;
477 int std1, std2;
478 int fg1, fg2;
479 int hd1, hd2;
480 int list1, list2, listp;
481 int forall1, forall2;
482 int nidx1, nidx2;
483 int i, k;
484 int asd1, asd2;
485 int ndim1, ndim2;
486 int order2[7];
487 int no;
488 int lhs1, lhs2, newlhs2, l, l2;
489 int sptr1, sptr2;
490 int isptr1, isptr2;
491 int dim1, dim2;
492 int newast, oldast;
493 LOGICAL same = FALSE;
494
495 hd1 = LP_HEAD(lp1); /*the flow graph node which is the loop head*/
496 fg1 = LP_FG(lp1);
497 std1 = FG_STDFIRST(hd1);
498 forall1 = STD_AST(std1);
499 lhs1 = A_DESTG(A_IFSTMTG(forall1));
500 list1 = A_LISTG(forall1);
501 nidx1 = 0;
502 for (listp = list1; listp != 0; listp = ASTLI_NEXT(listp)) {
503 idx1[nidx1] = listp;
504 ++nidx1;
505 }
506
507 hd2 = LP_HEAD(lp2);
508 fg2 = LP_FG(lp2);
509 if (!is_dominator(hd1, hd2))
510 return FALSE;
511 std2 = FG_STDFIRST(hd2);
512 forall2 = STD_AST(std2);
513 lhs2 = A_DESTG(A_IFSTMTG(forall2));
514 list2 = A_LISTG(forall2);
515 nidx2 = 0;
516 for (listp = list2; listp != 0; listp = ASTLI_NEXT(listp)) {
517 idx2[nidx2] = listp;
518 nidx2++;
519 }
520
521 if (nidx1 != nidx2)
522 return FALSE;
523
524 for (i = 0; i < nidx1 - nested; i++) {
525 itriple1 = ASTLI_TRIPLE(idx1[i]);
526 lb1 = A_LBDG(itriple1);
527 ub1 = A_UPBDG(itriple1);
528 st1 = A_STRIDEG(itriple1);
529 if (st1 == 0)
530 st1 = astb.i1;
531 itriple2 = ASTLI_TRIPLE(idx2[i]);
532 lb2 = A_LBDG(itriple2);
533 ub2 = A_UPBDG(itriple2);
534 st2 = A_STRIDEG(itriple2);
535 if (st2 == 0)
536 st2 = astb.i1;
537 same = FALSE;
538
539 /*is_same_size(lb1,lb2, ub1, ub2, st1, st2) */
540
541 if ((lb1 == lb2 && ub1 == ub2 && st1 == st2) ||
542 ((mk_binop(OP_SUB, ub1, lb1, astb.bnd.dtype) ==
543 mk_binop(OP_SUB, ub2, lb2, astb.bnd.dtype)) &&
544 st1 == st2)) {
545 if ((is_avail_expr(lb1, std1, fg1, std2, fg2) &&
546 is_avail_expr(ub1, std1, fg1, std2, fg2) &&
547 is_avail_expr(st1, std1, fg1, std2, fg2)))
548 same = TRUE;
549 }
550 if (!same)
551 return FALSE;
552 }
553
554 if (!same)
555 return FALSE;
556 return TRUE;
557 }
558
559 static LOGICAL
same_forall_bnds(int lp1,int lp2,int nested)560 same_forall_bnds(int lp1, int lp2, int nested)
561 {
562 int idx1[7], idx2[7];
563 int itriple1, itriple2;
564 int lb1, ub1, st1;
565 int lb2, ub2, st2;
566 int std1, std2;
567 int fg1, fg2;
568 int hd1, hd2;
569 int list1, list2, listp;
570 int forall1, forall2;
571 int nidx1, nidx2;
572 int i, k;
573 int asd1, asd2;
574 int ndim1, ndim2;
575 int order2[7];
576 int no;
577 int lhs1, lhs2, newlhs2, l, l2;
578 int sptr1, sptr2;
579 int isptr1, isptr2;
580 int dim1, dim2;
581 int newast, oldast;
582 LOGICAL same;
583
584 hd1 = LP_HEAD(lp1); /*the flow graph node which is the loop head*/
585 fg1 = LP_FG(lp1);
586 std1 = FG_STDFIRST(hd1);
587 forall1 = STD_AST(std1);
588 lhs1 = A_DESTG(A_IFSTMTG(forall1));
589 list1 = A_LISTG(forall1);
590 nidx1 = 0;
591 for (listp = list1; listp != 0; listp = ASTLI_NEXT(listp)) {
592 idx1[nidx1] = listp;
593 ++nidx1;
594 }
595
596 hd2 = LP_HEAD(lp2);
597 fg2 = LP_FG(lp2);
598 if (!is_dominator(hd1, hd2))
599 return FALSE;
600 std2 = FG_STDFIRST(hd2);
601 forall2 = STD_AST(std2);
602 lhs2 = A_DESTG(A_IFSTMTG(forall2));
603 list2 = A_LISTG(forall2);
604 nidx2 = 0;
605 for (listp = list2; listp != 0; listp = ASTLI_NEXT(listp)) {
606 idx2[nidx2] = listp;
607 nidx2++;
608 }
609
610 if (nidx1 != nidx2)
611 return FALSE;
612
613 for (i = 0; i < nidx1 - nested; i++) {
614 itriple1 = ASTLI_TRIPLE(idx1[i]);
615 lb1 = A_LBDG(itriple1);
616 ub1 = A_UPBDG(itriple1);
617 st1 = A_STRIDEG(itriple1);
618 if (st1 == 0)
619 st1 = astb.i1;
620 itriple2 = ASTLI_TRIPLE(idx2[i]);
621 lb2 = A_LBDG(itriple2);
622 ub2 = A_UPBDG(itriple2);
623 st2 = A_STRIDEG(itriple2);
624 if (st2 == 0)
625 st2 = astb.i1;
626 same = FALSE;
627 if (lb1 == lb2 && ub1 == ub2 && st1 == st2) {
628 if ((is_avail_expr(lb1, std1, fg1, std2, fg2) &&
629 is_avail_expr(ub1, std1, fg1, std2, fg2) &&
630 is_avail_expr(st1, std1, fg1, std2, fg2)))
631 same = TRUE;
632 }
633 if (!same)
634 return FALSE;
635 }
636
637 if (is_duplicate(lhs1, list1))
638 return FALSE;
639 if (is_duplicate(lhs2, list2))
640 return FALSE;
641
642 ast_visit(1, 1);
643 for (i = 0; i < nidx1; i++) {
644 isptr1 = ASTLI_SPTR(idx1[i]);
645 newast = mk_id(isptr1);
646 isptr2 = ASTLI_SPTR(idx2[i]);
647 oldast = mk_id(isptr2);
648 ast_replace(oldast, newast);
649 }
650 newlhs2 = ast_rewrite(lhs2);
651 ast_unvisit();
652 if (!is_ordered(lhs1, newlhs2, list1, order2, &no))
653 return FALSE;
654
655 /* has to have same distribution */
656 l = left_subscript_ast(lhs1);
657 sptr1 = left_array_symbol(lhs1);
658 l2 = left_subscript_ast(newlhs2);
659 sptr2 = left_array_symbol(newlhs2);
660
661 assert(A_TYPEG(l) == A_SUBSCR, "same_forall_bnds: not a subscript", l, 4);
662 asd1 = A_ASDG(l);
663 ndim1 = ASD_NDIM(asd1);
664 assert(A_TYPEG(l2) == A_SUBSCR, "same_forall_bnds: not a subscript", l, 4);
665 asd2 = A_ASDG(l2);
666 ndim2 = ASD_NDIM(asd2);
667
668 for (i = 0; i < nidx1; i++) {
669 isptr1 = ASTLI_SPTR(idx1[i]);
670 dim1 = -1;
671 dim2 = -1;
672 for (k = 0; k < ndim1; k++)
673 if (is_name_in_expr(ASD_SUBS(asd1, k), isptr1))
674 dim1 = k;
675 for (k = 0; k < ndim2; k++)
676 if (is_name_in_expr(ASD_SUBS(asd2, k), isptr1))
677 dim2 = k;
678 if (dim1 == -1 || dim2 == -1) {
679 /* index does not appear in any distributed dimensions;
680 * until I figure out just what this is doing, be safe */
681 return FALSE;
682 }
683 if (!is_same_align_shape(sptr1, dim1, sptr2, dim2))
684 return FALSE;
685 if (ASD_SUBS(asd1, dim1) != ASD_SUBS(asd2, dim2))
686 return FALSE;
687 }
688
689 return TRUE;
690 }
691
692 static struct {
693 int otherlhs;
694 int src;
695 int sink;
696 int list;
697 int after;
698 int order;
699 int forcomm;
700 } conf;
701
702 /* Return TRUE if there is conflict for loop fusion.
703 * order is just for swap for src and sink at Conflict_.
704 * forcomm is that there is Isno_comm TRUE, no need to test iff forcomm set. */
705 static LOGICAL
Conflict(int list,int src,int sink,int after,int order,int forcomm)706 Conflict(int list, int src, int sink, int after, int order, int forcomm)
707 {
708 LOGICAL result = FALSE;
709
710 conf.src = src;
711 conf.sink = sink;
712 conf.list = list;
713 conf.after = after;
714 conf.order = order;
715 conf.forcomm = forcomm;
716 return Conflict_(sink);
717 }
718
719 /* This routine will return TRUE iff,
720 * lhs array appears at sink and their subscripts are different.
721 */
722 static LOGICAL
Conflict_(int sink)723 Conflict_(int sink)
724 {
725 LOGICAL l, r;
726 int argt;
727 int nargs;
728 int asd, asd_lhs;
729 int numdim;
730 int i;
731 int result;
732 int src, sptr, sptr1;
733
734 if (sink == 0)
735 return FALSE;
736 switch (A_TYPEG(sink)) {
737 case A_CMPLXC:
738 case A_CNST:
739 case A_SUBSTR:
740 return FALSE;
741 case A_MEM:
742 if (Conflict_(A_PARENTG(sink))) {
743 /* see if this 'member' appears in the 'conf.src' tree */
744 int a, p, member;
745 a = conf.src;
746 member = A_SPTRG(A_MEMG(sink));
747 while (1) {
748 switch (A_TYPEG(a)) {
749 case A_SUBSCR:
750 a = A_LOPG(a);
751 break;
752 case A_ID:
753 return TRUE;
754 break;
755 case A_MEM:
756 p = A_PARENTG(a);
757 if (DDTG(A_DTYPEG(p)) == ENCLDTYPEG(member)) {
758 /* same member? different? */
759 if (A_MEMG(a) == A_MEMG(sink))
760 return TRUE;
761 return FALSE;
762 }
763 a = p;
764 break;
765 default:
766 interr("Conflict_: unexpected AST in member tree", a, 3);
767 return FALSE;
768 }
769 }
770 }
771 return FALSE;
772 case A_ID:
773 if (A_SPTRG(sink) == sym_of_ast(conf.src))
774 return TRUE;
775 else
776 return FALSE;
777 case A_BINOP:
778 l = Conflict_(A_LOPG(sink));
779 if (l)
780 return TRUE;
781 r = Conflict_(A_ROPG(sink));
782 if (r)
783 return TRUE;
784 return FALSE;
785 case A_UNOP:
786 return Conflict_(A_LOPG(sink));
787 case A_PAREN:
788 case A_CONV:
789 return Conflict_(A_LOPG(sink));
790 case A_SUBSCR:
791 if (Conflict_(A_LOPG(sink))) {
792 if (conf.order)
793 result = dd_array_conflict(conf.list, sink, conf.src, conf.after);
794 else
795 result = dd_array_conflict(conf.list, conf.src, sink, conf.after);
796 return result;
797 }
798 return FALSE;
799 case A_TRIPLE:
800 l = Conflict_(A_LBDG(sink));
801 if (l)
802 return TRUE;
803 r = Conflict_(A_UPBDG(sink));
804 if (r)
805 return TRUE;
806 return Conflict_(A_STRIDEG(sink));
807 case A_INTR:
808 case A_FUNC:
809 nargs = A_ARGCNTG(sink);
810 argt = A_ARGSG(sink);
811 /* dd_array_conflict does not work if the constructed section is
812 * out of bound. a(10);forall(i=1:10) b(i) = cshift(a(i+2))
813 * here a(3:12) is not in bounds.
814 */
815 if (A_OPTYPEG(sink) == I_CSHIFT || A_OPTYPEG(sink) == I_EOSHIFT) {
816 src = ARGT_ARG(argt, 0);
817 sptr = sym_of_ast(src);
818 sptr1 = sym_of_ast(conf.src);
819 if (sptr == sptr1)
820 return TRUE;
821 }
822 for (i = 0; i < nargs; ++i) {
823 l = Conflict_(ARGT_ARG(argt, i));
824 if (l)
825 return TRUE;
826 }
827 return FALSE;
828 case A_LABEL:
829 default:
830 interr("Conflict_: unexpected ast", sink, 2);
831 return FALSE;
832 }
833 }
834
835 static LOGICAL
_olap_conflict(int expr,int expr1)836 _olap_conflict(int expr, int expr1)
837 {
838 LOGICAL l, r;
839 int argt;
840 int nargs;
841 int asd, asd_lhs;
842 int numdim;
843 int i;
844 int result;
845
846 if (expr == 0)
847 return FALSE;
848 switch (A_TYPEG(expr)) {
849 case A_CMPLXC:
850 case A_CNST:
851 case A_ID:
852 case A_SUBSTR:
853 case A_MEM:
854 return FALSE;
855 case A_BINOP:
856 l = _olap_conflict(A_LOPG(expr), expr1);
857 if (l)
858 return TRUE;
859 r = _olap_conflict(A_ROPG(expr), expr1);
860 if (r)
861 return TRUE;
862 return FALSE;
863 case A_UNOP:
864 return _olap_conflict(A_LOPG(expr), expr1);
865 case A_PAREN:
866 case A_CONV:
867 return _olap_conflict(A_LOPG(expr), expr1);
868 case A_SUBSCR:
869 return FALSE;
870 case A_TRIPLE:
871 l = _olap_conflict(A_LBDG(expr), expr1);
872 if (l)
873 return TRUE;
874 r = _olap_conflict(A_UPBDG(expr), expr1);
875 if (r)
876 return TRUE;
877 return _olap_conflict(A_STRIDEG(expr), expr1);
878 case A_INTR:
879 case A_FUNC:
880 nargs = A_ARGCNTG(expr);
881 argt = A_ARGSG(expr);
882 for (i = 0; i < nargs; ++i) {
883 l = _olap_conflict(ARGT_ARG(argt, i), expr1);
884 if (l)
885 return TRUE;
886 }
887 if (A_OPTYPEG(expr) == I_CSHIFT || A_OPTYPEG(expr) == I_EOSHIFT)
888 return is_shift_conflict(expr, argt, expr1);
889 return FALSE;
890 case A_LABEL:
891 default:
892 interr("_olap_conflict: unexpected ast", expr, 2);
893 return FALSE;
894 }
895 }
896
897 static LOGICAL
is_olap_conflict(int forall,int forall1)898 is_olap_conflict(int forall, int forall1)
899 {
900 int expr, rhs;
901 int expr1, rhs1;
902
903 expr = A_IFEXPRG(forall);
904 rhs = A_SRCG(A_IFSTMTG(forall));
905 expr1 = A_IFEXPRG(forall1);
906 rhs1 = A_SRCG(A_IFSTMTG(forall1));
907
908 if (_olap_conflict(expr, expr1))
909 return TRUE;
910 if (_olap_conflict(expr, rhs1))
911 return TRUE;
912 if (_olap_conflict(expr1, expr))
913 return TRUE;
914 if (_olap_conflict(expr1, rhs))
915 return TRUE;
916
917 if (_olap_conflict(rhs, expr1))
918 return TRUE;
919 if (_olap_conflict(rhs, rhs1))
920 return TRUE;
921 if (_olap_conflict(rhs1, expr))
922 return TRUE;
923 if (_olap_conflict(rhs1, rhs))
924 return TRUE;
925 return FALSE;
926 }
927
928 /**
929 \brief ...
930
931 <pre>
932 S1: A(i) = ...
933 S2: ... = ... A(f(i)) ...
934 </pre>
935
936 The loops cannot be fused if either of the following calls to
937 dd_array_conflict() return TRUE:
938 <pre>
939 dd_array_conflict(tripletList, A(f(i)), A(i), FALSE)
940 dd_array_conflict(tripletList, A(i), A(f(i)), TRUE)
941 </pre>
942
943 Alternatively, the statements within the original loops may be of the
944 following form:
945
946 <pre>
947 S1: ... = ... A(f(i))
948 S2: A(i) = ...
949 </pre>
950
951 The loops cannot be fused if the following call to dd_array_conflict()
952 returns TRUE.
953 <pre>
954 dd_array_conflict(tripletList, A(i), A(f(i)), FALSE)
955 </pre>
956
957 The back end recognizes loops with 0 RHS and transforms these into calls
958 to _mzero, so don't fuse assignments with just 0 on the RHS.
959 */
960 static LOGICAL
is_fusable(int lp,int lp1,int nested)961 is_fusable(int lp, int lp1, int nested)
962 {
963
964 int i, j, k, l;
965 int std, std1;
966 int forall, forall1;
967 int nd, nd1;
968 int rt, rt1;
969 int rt_std, rt1_std;
970 int hd, hd1;
971 int type, type1;
972 int ii, jj;
973 int expr, expr1;
974 int newexpr1, newrhs1, newlhs1;
975 int newexpr, newrhs;
976 int asn, asn1;
977 int rhs, rhs1;
978 int rhssptr;
979 int lhs, lhs_array, lhs1, lhs1_array;
980 int lhs_sptr, lhs_sptr1;
981 int nidx, nidx1;
982 int list, list1, listp;
983 int isptr;
984 int fg, fg1;
985 LOGICAL fuseable;
986 int fuselp;
987 int oldast, newast;
988 int triple;
989 int idx[7];
990 int cnt;
991 LOGICAL fuse_cnst_rhs;
992
993 if (XBIT(47, 0x80000000))
994 return FALSE;
995
996 hd = LP_HEAD(lp); /*the flow graph node which is the loop head*/
997 fg = LP_FG(lp);
998 std = FG_STDFIRST(hd);
999 forall = STD_AST(std);
1000 list = A_LISTG(forall);
1001 /*
1002 * NOTES:
1003 * XBIT(47,0x4000000) - fuse even if constant RHS
1004 * XBIT(8,0x8000000) - inhibit mem idioms
1005 */
1006 fuse_cnst_rhs = FALSE;
1007 if (XBIT(47, 0x4000000))
1008 fuse_cnst_rhs = TRUE;
1009 else if (XBIT(8, 0x8000000))
1010 fuse_cnst_rhs = TRUE;
1011 else {
1012 }
1013
1014 expr = A_IFEXPRG(forall);
1015 asn = A_IFSTMTG(forall);
1016 rhs = A_SRCG(asn);
1017 lhs = A_DESTG(asn);
1018 for (lhs_array = lhs; A_TYPEG(lhs_array) == A_MEM;
1019 lhs_array = A_PARENTG(lhs_array))
1020 ;
1021 lhs_sptr = sym_of_ast(lhs);
1022 nd = A_OPT1G(forall);
1023
1024 if (LP_PARENT(lp) != LP_PARENT(lp1))
1025 return FALSE;
1026
1027 if (A_TYPEG(rhs) == A_CONV)
1028 rhs = A_LOPG(rhs);
1029 if (flg.opt >= 2 && !fuse_cnst_rhs && A_TYPEG(rhs) == A_CNST) {
1030 /*
1031 * prefer calling a tuned mzero/mem rather fusing.
1032 */
1033 return FALSE;
1034 }
1035 /*the flow graph node which is the loop head */
1036 hd1 = LP_HEAD(lp1);
1037 fg1 = LP_FG(lp1);
1038 if (!is_dominator(hd, hd1))
1039 return FALSE;
1040 if (smp_conflict(hd, hd1))
1041 return FALSE;
1042 std1 = FG_STDFIRST(hd1);
1043 forall1 = STD_AST(std1);
1044 nd1 = A_OPT1G(forall1);
1045 list1 = A_LISTG(forall1);
1046 expr1 = A_IFEXPRG(forall1);
1047 asn1 = A_IFSTMTG(forall1);
1048 rhs1 = A_SRCG(asn1);
1049 lhs1 = A_DESTG(asn1);
1050 for (lhs1_array = lhs1; A_TYPEG(lhs1_array) == A_MEM;
1051 lhs1_array = A_PARENTG(lhs1_array))
1052 ;
1053 lhs_sptr1 = sym_of_ast(lhs1);
1054 if (A_TYPEG(rhs1) == A_CONV)
1055 rhs1 = A_LOPG(rhs1);
1056 if (flg.opt >= 2 && !fuse_cnst_rhs && A_TYPEG(rhs1) == A_CNST) {
1057 /*
1058 * prefer calling a tuned mzero/mem rather fusing.
1059 */
1060 return FALSE;
1061 }
1062
1063 /* forall1 values should not changed between std and std1 */
1064 /* Except that if they are in block, nothing change them */
1065 if (!is_in_block(std, std1)) {
1066 cnt = 0;
1067 for (listp = list1; listp != 0; listp = ASTLI_NEXT(listp)) {
1068 isptr = ASTLI_SPTR(listp);
1069 idx[cnt] = mk_id(isptr);
1070 cnt++;
1071 }
1072 if (!is_avail_expr_with_list(expr1, std, fg, std1, fg1, idx, cnt) ||
1073 !is_avail_expr_with_list(rhs1, std, fg, std1, fg1, idx, cnt) ||
1074 !is_avail_expr_with_list(lhs1, std, fg, std1, fg1, idx, cnt))
1075 return FALSE;
1076 }
1077
1078 /* dependency */
1079 if (subscr_dependent(rhs1, lhs, std1, std))
1080 return FALSE;
1081 if (rhs1 && A_LOPG(rhs1) && A_TYPEG(rhs1) == A_SUBSCR &&
1082 A_TYPEG(A_LOPG(rhs1)) == A_MEM)
1083 if (subscr_dependent(A_LOPG(rhs1), lhs, std1, std))
1084 return FALSE;
1085 if (subscr_dependent(expr1, lhs, std1, std))
1086 return FALSE;
1087 if (expr1 && A_LOPG(expr1) && A_TYPEG(expr1) == A_SUBSCR &&
1088 A_TYPEG(A_LOPG(expr1)) == A_MEM)
1089 if (subscr_dependent(A_LOPG(expr1), lhs, std1, std))
1090 return FALSE;
1091 if (subscr_dependent(lhs1, lhs, std1, std))
1092 return FALSE;
1093 if (lhs1 && A_LOPG(lhs1) && A_TYPEG(lhs1) == A_SUBSCR &&
1094 A_TYPEG(A_LOPG(lhs1)) == A_MEM)
1095 if (subscr_dependent(A_LOPG(lhs1), lhs, std1, std))
1096 return FALSE;
1097 if (rhs && lhs1 && A_TYPEG(rhs) == A_SUBSCR && A_LOPG(lhs1) &&
1098 A_TYPEG(lhs1) == A_SUBSCR)
1099 if (subscr_dependent(rhs, A_LOPG(lhs1), std1, std))
1100 return FALSE;
1101
1102 /* don't let LHSs appears at FORALL list */
1103 /* forall(i=x(1):x(2)) x(i) = */
1104 for (listp = list1; listp != 0; listp = ASTLI_NEXT(listp)) {
1105 triple = ASTLI_TRIPLE(listp);
1106 if (A_TYPEG(lhs_array) == A_SUBSCR &&
1107 contains_ast(triple, A_LOPG(lhs_array)))
1108 return FALSE;
1109 if (A_TYPEG(lhs1_array) == A_SUBSCR &&
1110 contains_ast(triple, A_LOPG(lhs1_array)))
1111 return FALSE;
1112 }
1113
1114 /* because of using mask pghpf_vsub_gather */
1115 /* this can be optimized, iff expr1 and rhs1
1116 * does not have indirections */
1117 if (A_TYPEG(lhs_array) == A_SUBSCR && contains_ast(expr1, A_LOPG(lhs_array)))
1118 return FALSE;
1119
1120 if (expr1)
1121 if (Conflict(A_LISTG(forall), lhs, expr1, FALSE, 1, 0))
1122 return FALSE;
1123 if (Conflict(A_LISTG(forall), lhs, rhs1, FALSE, 1, 0))
1124 return FALSE;
1125
1126 /* for communication */
1127 conf.otherlhs = lhs1;
1128 if (expr1)
1129 if (Conflict(A_LISTG(forall), lhs, expr1, TRUE, 0, 1))
1130 return FALSE;
1131 if (Conflict(A_LISTG(forall), lhs, rhs1, TRUE, 0, 1))
1132 return FALSE;
1133
1134 if (expr)
1135 if (Conflict(A_LISTG(forall), lhs1, expr, FALSE, 0, 0))
1136 return FALSE;
1137 if (Conflict(A_LISTG(forall), lhs1, rhs, FALSE, 0, 0))
1138 return FALSE;
1139
1140 if (is_branch_between(lp, lp1))
1141 return FALSE;
1142 if (!is_in_block(std, std1))
1143 if (A_TYPEG(lhs1_array) == A_SUBSCR &&
1144 is_contains_ast_between(lp, lp1, A_LOPG(lhs1_array)))
1145 return FALSE;
1146
1147 if (is_olap_conflict(forall, forall1))
1148 return FALSE;
1149 fuseable = TRUE;
1150 for (k = 0; k < FT_NFUSE(nd, nested); k++) {
1151 fuselp = FT_FUSELP(nd, nested, k);
1152 if (!is_fusable(fuselp, lp1, nested))
1153 fuseable = FALSE;
1154 }
1155 if (!fuseable)
1156 return FALSE;
1157
1158 return TRUE;
1159 }
1160
1161 /*
1162 * Determine if two flowgraph nodes conflict with respect to smp
1163 * execution given that fg1 dominates fg2.
1164 */
1165 static LOGICAL
smp_conflict(int fg1,int fg2)1166 smp_conflict(int fg1, int fg2)
1167 {
1168 int fg;
1169 int std;
1170 int ast;
1171
1172 if (!flg.smp)
1173 return FALSE;
1174 if (fg1 == fg2)
1175 /* no conflict if the same node */
1176 return FALSE;
1177 if ((FG_PAR(fg1) && !FG_PAR(fg2)) || (!FG_PAR(fg1) && FG_PAR(fg2)))
1178 /* fg1 is serial & fg2 is parallel, or vice versa */
1179 return TRUE;
1180 if (FG_PAR(fg2)) {
1181 /* both are within a parallel region; determine if it's the
1182 * same region.
1183 */
1184 for (fg = fg1; fg != fg2; fg = FG_LNEXT(fg)) {
1185 rdilts(fg);
1186 for (std = FG_STDFIRST(fg); std; std = STD_NEXT(std)) {
1187 ast = STD_AST(std);
1188 if (A_TYPEG(ast) == A_MP_ENDPARALLEL) {
1189 /* endparallel was seen in a node before fg2 - it must
1190 * be a different parallel region.
1191 */
1192 wrilts(fg);
1193 return TRUE;
1194 }
1195 }
1196 wrilts(fg);
1197 }
1198 }
1199 if (FG_CS(fg1) ^ FG_CS(fg2))
1200 /* fg1 is in a critical section & fg2 is not, or vice versa */
1201 return TRUE;
1202 if (FG_CS(fg2)) {
1203 /* both are within a critical section; determine if it's the
1204 * same critical section.
1205 */
1206 for (fg = fg1; fg != fg2; fg = FG_LNEXT(fg)) {
1207 rdilts(fg);
1208 for (std = FG_STDFIRST(fg); std; std = STD_NEXT(std)) {
1209 ast = STD_AST(std);
1210 if (A_TYPEG(ast) == A_MP_ENDCRITICAL) {
1211 wrilts(fg);
1212 return TRUE;
1213 }
1214 }
1215 wrilts(fg);
1216 }
1217 }
1218 if (FG_PARSECT(fg1) ^ FG_PARSECT(fg2))
1219 /* fg1 is in a parallel section (master, single, sections) & fg2 is
1220 * not, or vice versa
1221 */
1222 return TRUE;
1223 if (FG_PARSECT(fg2)) {
1224 /* both are within a parallel section; determine if it's the
1225 * same parallel section.
1226 */
1227 for (fg = fg1; fg != fg2; fg = FG_LNEXT(fg)) {
1228 rdilts(fg);
1229 for (std = FG_STDFIRST(fg); std; std = STD_NEXT(std)) {
1230 ast = STD_AST(std);
1231 switch (A_TYPEG(ast)) {
1232 case A_MP_ENDMASTER:
1233 case A_MP_ENDSINGLE:
1234 case A_MP_ENDSECTIONS:
1235 wrilts(fg);
1236 return TRUE;
1237 }
1238 }
1239 wrilts(fg);
1240 }
1241 }
1242
1243 return FALSE;
1244 }
1245
1246 static LOGICAL
is_in_block(int std,int std1)1247 is_in_block(int std, int std1)
1248 {
1249 int forall, forall1, forallh;
1250 int header, nextstd, fusedstd;
1251 int nd, k;
1252
1253 forall = STD_AST(std);
1254 forall1 = STD_AST(std1);
1255 nd = A_OPT1G(forall);
1256 header = FT_HEADER(nd);
1257 forallh = STD_AST(header);
1258 assert(A_TYPEG(forallh) == A_FORALL, "is_in_block: expecting forall", forallh,
1259 3);
1260 nd = A_OPT1G(forallh);
1261 assert(nd, "is_in_block: nd is 0", forallh, 3);
1262 for (k = 0; k < FT_NFUSE(nd, 0); k++) {
1263 fusedstd = FT_FUSEDSTD(nd, 0, k);
1264 nextstd = STD_NEXT(header);
1265 while (A_TYPEG(STD_AST(nextstd)) == A_CONTINUE)
1266 nextstd = STD_NEXT(nextstd);
1267 header = nextstd;
1268 if (nextstd == fusedstd)
1269 continue;
1270 return FALSE;
1271 }
1272 nextstd = STD_NEXT(header);
1273 while (A_TYPEG(STD_AST(nextstd)) == A_CONTINUE)
1274 nextstd = STD_NEXT(nextstd);
1275 return (nextstd == std1);
1276 }
1277
1278 static LOGICAL
is_branch_between(int lp,int lp1)1279 is_branch_between(int lp, int lp1)
1280 {
1281 int ast, std;
1282 int hd, fg;
1283 int hd1, fg1;
1284 int type;
1285
1286 hd = LP_HEAD(lp);
1287 fg = LP_FG(lp);
1288 hd1 = LP_HEAD(lp1);
1289 fg1 = LP_FG(lp1);
1290
1291 if (lp == lp1)
1292 return FALSE;
1293
1294 while (TRUE) {
1295 fg = FG_LNEXT(fg);
1296 if (fg == fg1)
1297 return FALSE;
1298 rdilts(fg);
1299 for (std = FG_STDFIRST(fg); std; std = STD_NEXT(std)) {
1300 ast = STD_AST(std);
1301 type = A_TYPEG(ast);
1302 if (type != A_FORALL && type != A_DO && type != A_ENDDO) {
1303 if (STD_BR(std)) {
1304 wrilts(fg);
1305 return TRUE;
1306 }
1307 }
1308 }
1309 wrilts(fg);
1310 }
1311 }
1312
1313 static LOGICAL
is_contains_ast_between(int lp,int lp1,int a)1314 is_contains_ast_between(int lp, int lp1, int a)
1315 {
1316 int ast, std;
1317 int hd, fg;
1318 int hd1, fg1;
1319 int type;
1320
1321 hd = LP_HEAD(lp);
1322 fg = LP_FG(lp);
1323 hd1 = LP_HEAD(lp1);
1324 fg1 = LP_FG(lp1);
1325
1326 while (TRUE) {
1327 fg = FG_LNEXT(fg);
1328 if (fg == fg1)
1329 return FALSE;
1330 rdilts(fg);
1331 for (std = FG_STDFIRST(fg); std; std = STD_NEXT(std)) {
1332 ast = STD_AST(std);
1333 if (contains_ast(ast, a)) {
1334 wrilts(fg);
1335 return TRUE;
1336 }
1337 }
1338 wrilts(fg);
1339 }
1340 }
1341
1342 /* This routine is same as is_avail_expr except that
1343 * it will look at the cnt number of variables given by idx.
1344 * idx has varaible A_ID ast. it will decompose_expression
1345 * and eliminate idx variable from list.
1346 * it helps forall triplet variables ignored.
1347 */
1348 static LOGICAL
is_avail_expr_with_list(int expr,int std,int fg,int std1,int fg1,int idx[],int cnt)1349 is_avail_expr_with_list(int expr, int std, int fg, int std1, int fg1, int idx[],
1350 int cnt)
1351 {
1352 int lst[100], lst1[100];
1353 int size, nvar;
1354 int ele;
1355 int found;
1356 int i, j;
1357 int nvar1;
1358
1359 if (!expr)
1360 return TRUE;
1361 size = 100;
1362 nvar = 0;
1363
1364 decompose_expression(expr, lst, size, &nvar, NULL);
1365 if (nvar > size)
1366 return FALSE;
1367
1368 /* eliminate duplicate variables from the lst */
1369 nvar1 = 0;
1370 for (i = 0; i < nvar; i++) {
1371 found = 0;
1372 for (j = 0; j < nvar1; j++)
1373 if (lst[i] == lst1[j])
1374 found = 1;
1375 if (found)
1376 continue;
1377 lst1[nvar1] = lst[i];
1378 nvar1++;
1379 }
1380
1381 for (i = 0; i < nvar1; i++) {
1382 ele = lst1[i];
1383 found = 0;
1384 for (j = 0; j < cnt; j++)
1385 if (idx[j] == ele)
1386 found = 1;
1387 if (found)
1388 continue;
1389 if (!is_avail_expr(ele, std, fg, std1, fg1))
1390 return FALSE;
1391 }
1392 return TRUE;
1393 }
1394
1395 static void
put_forall_fg(void)1396 put_forall_fg(void)
1397 {
1398 int i, ii;
1399 int std, hd;
1400 int forall;
1401 int nd;
1402 for (ii = 1; ii <= opt.nloops; ++ii) {
1403 i = lpsort[ii];
1404 if (LP_FORALL(i)) {
1405 hd = LP_HEAD(i); /*the flow graph node which is the loop head*/
1406 std = FG_STDFIRST(hd);
1407 forall = STD_AST(std);
1408 nd = A_OPT1G(forall);
1409 FT_FG(nd) = hd;
1410 }
1411 }
1412 }
1413
1414 static void
eliminate_rt(int type0)1415 eliminate_rt(int type0)
1416 {
1417 int i, j, k, l;
1418 int std, std1;
1419 int forall, forall1;
1420 int nd, nd1;
1421 int rt, rt1;
1422 int rt_std, rt1_std;
1423 int hd, hd1;
1424 int type, type1;
1425 int ii, jj;
1426 int header, header1;
1427 int header_forall, header_forall1;
1428 int header_fg, header_fg1;
1429 LITEMF *list_rt, *list_rt1;
1430
1431 for (ii = 1; ii <= opt.nloops; ++ii) {
1432 i = lpsort[ii];
1433 if (LP_FORALL(i)) {
1434 hd = LP_HEAD(i); /*the flow graph node which is the loop head*/
1435 std = FG_STDFIRST(hd);
1436 forall = STD_AST(std);
1437 nd = A_OPT1G(forall);
1438 if (FT_NRT(nd) == 0)
1439 continue;
1440 for (jj = ii; jj <= opt.nloops; ++jj) {
1441 j = lpsort[jj];
1442 if (LP_FORALL(j)) {
1443 /*the flow graph node which is the loop head */
1444 hd1 = LP_HEAD(j);
1445 if (!is_dominator(hd, hd1))
1446 continue;
1447 std1 = FG_STDFIRST(hd1);
1448 forall1 = STD_AST(std1);
1449 nd1 = A_OPT1G(forall1);
1450 if (FT_NRT(nd1) == 0)
1451 continue;
1452
1453 /* when foralls are fused,
1454 * first header should dominate the second header
1455 */
1456 header = FT_HEADER(nd);
1457 header1 = FT_HEADER(nd1);
1458 header_forall = STD_AST(header);
1459 header_forall1 = STD_AST(header1);
1460 assert(A_TYPEG(header_forall) == A_FORALL,
1461 "eliminate_rt:expecting forall", header_forall, 2);
1462 assert(A_TYPEG(header_forall1) == A_FORALL,
1463 "eliminate_rt:expecting forall1", header_forall1, 2);
1464 header_fg = FT_FG(A_OPT1G(header_forall));
1465 header_fg1 = FT_FG(A_OPT1G(header_forall1));
1466 if (header != header1)
1467 if (!is_dominator(header_fg, header_fg1))
1468 continue;
1469
1470 list_rt = FT_RTL(nd);
1471 for (k = 0; k < FT_NRT(nd); k++) {
1472 rt_std = list_rt->item;
1473 list_rt = list_rt->next;
1474 rt = STD_AST(rt_std);
1475 if (STD_DELETE(rt_std))
1476 continue;
1477 list_rt1 = FT_RTL(nd1);
1478 for (l = 0; l < FT_NRT(nd1); l++) {
1479 rt1_std = list_rt1->item;
1480 list_rt1 = list_rt1->next;
1481
1482 rt1 = STD_AST(rt1_std);
1483 type = A_TYPEG(rt);
1484 if (type == A_ASN)
1485 type = A_TYPEG(A_SRCG(rt));
1486 type1 = A_TYPEG(rt1);
1487 if (type1 == A_ASN)
1488 type1 = A_TYPEG(A_SRCG(rt1));
1489 if (STD_DELETE(rt1_std))
1490 continue;
1491 if (rt == rt1)
1492 continue;
1493 if (type != type1)
1494 continue;
1495 if (i == j)
1496 if (k > l)
1497 continue;
1498 if (type == A_HCYCLICLP)
1499 type = A_HLOCALIZEBNDS;
1500 /* to share alloc after sharing comm_start
1501 * since comm_start can change alloc
1502 */
1503
1504 if (!independent_commtype(type, type0))
1505 continue;
1506
1507 /* all run-tme calls require some freeing
1508 * except localize bounds
1509 * a = b
1510 * if(i.eq.0) then
1511 * a =b
1512 * endif
1513 */
1514 /* You need post_dominator here for much better
1515 * optimization
1516 */
1517 if (type != A_HLOCALIZEBNDS && type != A_HCYCLICLP &&
1518 is_branch_between(i, j))
1519 continue;
1520
1521 switch (type) {
1522 case A_HLOCALIZEBNDS: /* type0==4 */
1523 case A_HCYCLICLP: /* type0==4 */
1524 eliminate_bnds(i, j, rt_std, rt1_std);
1525 break;
1526 case A_HALLOBNDS: /* type0==3 */
1527 eliminate_alloc(i, j, rt_std, rt1_std);
1528 break;
1529 case A_HSECT: /* type0==4 */
1530 eliminate_sect(i, j, rt_std, rt1_std);
1531 break;
1532 case A_HCOPYSECT: /* type0==1 */
1533 eliminate_copy(i, j, rt_std, rt1_std);
1534 break;
1535 case A_HGATHER: /* type0==1 */
1536 case A_HSCATTER: /* type0==1 */
1537 eliminate_gather(i, j, rt_std, rt1_std);
1538 break;
1539 case A_HOVLPSHIFT: /* type0==1 */
1540 eliminate_shift(i, j, rt_std, rt1_std);
1541 break;
1542 case A_HCSTART: /* type0==2 */
1543 eliminate_start(i, j, rt_std, rt1_std);
1544 break;
1545 default:
1546 break;
1547 }
1548 }
1549 }
1550 }
1551 }
1552 }
1553 }
1554 }
1555
1556 /* This routine is used by eliminate_rt.
1557 * It is designed to eliminate independent rt at the same
1558 * call of eliminate_rt to speed up compilation.
1559 */
1560 static LOGICAL
independent_commtype(int type,int type0)1561 independent_commtype(int type, int type0)
1562 {
1563 if (type0 == 1) {
1564 if (type == A_HGATHER || type == A_HSCATTER || type == A_HCOPYSECT ||
1565 type == A_HOVLPSHIFT)
1566 return TRUE;
1567 else
1568 return FALSE;
1569 } else if (type0 == 2) {
1570 if (type == A_HCSTART)
1571 return TRUE;
1572 else
1573 return FALSE;
1574 } else if (type0 == 3) {
1575 if (type == A_HALLOBNDS)
1576 return TRUE;
1577 else
1578 return FALSE;
1579 } else if (type0 == 4) {
1580 if (type == A_HSECT || type == A_HLOCALIZEBNDS)
1581 return TRUE;
1582 else
1583 return FALSE;
1584 } else
1585 assert(0, "merged_commtype:wrong-type", type0, 2);
1586 return FALSE;
1587 }
1588
1589 static LOGICAL
is_same_descr_for_bnds(int rt,int rt1)1590 is_same_descr_for_bnds(int rt, int rt1)
1591 {
1592 int lhs, lhs1;
1593 int nd, nd1;
1594 int sptr, sptr1;
1595 int dim, dim1;
1596
1597 nd = A_OPT1G(rt);
1598 nd1 = A_OPT1G(rt1);
1599
1600 lhs = FT_BND_LHS(nd);
1601 sptr = left_array_symbol(lhs);
1602 dim = A_DIMG(rt);
1603 dim = get_int_cval(A_SPTRG(A_ALIASG(dim))) - 1;
1604
1605 lhs1 = FT_BND_LHS(nd1);
1606 sptr1 = left_array_symbol(lhs1);
1607 dim1 = A_DIMG(rt1);
1608 dim1 = get_int_cval(A_SPTRG(A_ALIASG(dim1))) - 1;
1609 if (POINTERG(sptr) || POINTERG(sptr1))
1610 return FALSE;
1611 return is_same_align_shape(sptr, dim, sptr1, dim1);
1612 }
1613
1614 static LOGICAL
is_same_align_shape(int sptr,int dim,int sptr1,int dim1)1615 is_same_align_shape(int sptr, int dim, int sptr1, int dim1)
1616 {
1617 return TRUE;
1618 }
1619
1620 static void
eliminate_bnds(int lp,int lp1,int rt_std,int rt1_std)1621 eliminate_bnds(int lp, int lp1, int rt_std, int rt1_std)
1622 {
1623 int itriple, itriple1;
1624 int lb, ub, st;
1625 int lb1, ub1, st1;
1626 int std, std1;
1627 int fg, fg1;
1628 int rt, rt1;
1629 int nd, nd1;
1630 int hd, hd1;
1631
1632 rt = STD_AST(rt_std);
1633 rt1 = STD_AST(rt1_std);
1634 assert(LP_FORALL(lp), "eliminate_bnds: forall LP not set", lp, 2);
1635 hd = LP_HEAD(lp);
1636 std = FG_STDFIRST(hd);
1637 fg = LP_FG(lp);
1638
1639 assert(LP_FORALL(lp1), "eliminate_bnds: forall1 LP not set", lp1, 2);
1640 hd1 = LP_HEAD(lp1);
1641 std1 = FG_STDFIRST(hd1);
1642 fg1 = LP_FG(lp1);
1643
1644 if (!is_same_descr_for_bnds(rt, rt1))
1645 return;
1646
1647 nd = A_OPT1G(rt);
1648 nd1 = A_OPT1G(rt1);
1649 itriple = A_ITRIPLEG(rt);
1650 lb = A_LBDG(itriple);
1651 ub = A_UPBDG(itriple);
1652 st = A_STRIDEG(itriple);
1653 if (st == 0)
1654 st = astb.i1;
1655
1656 itriple1 = A_ITRIPLEG(rt1);
1657 lb1 = A_LBDG(itriple1);
1658 ub1 = A_UPBDG(itriple1);
1659 st1 = A_STRIDEG(itriple1);
1660 if (st1 == 0)
1661 st1 = astb.i1;
1662 if (lb == lb1 && ub == ub1 && st == st1) {
1663 if (is_avail_expr(lb, std, fg, std1, fg1) &&
1664 is_avail_expr(ub, std, fg, std1, fg1) &&
1665 is_avail_expr(st, std, fg, std1, fg1)) {
1666 FT_BND_SAME(nd1) = rt;
1667 STD_DELETE(rt1_std) = 1;
1668 optsum.bnd++;
1669 }
1670 }
1671 }
1672
1673 static LOGICAL
is_same_array_bounds(int sub,int sub1,int std,int std1,int fg,int fg1)1674 is_same_array_bounds(int sub, int sub1, int std, int std1, int fg, int fg1)
1675 {
1676 int ndim, ndim1;
1677 int asd, asd1;
1678 int count;
1679 int lb, ub, st;
1680 int lb1, ub1, st1;
1681 int i;
1682 int itriple, itriple1;
1683
1684 while (1) {
1685 if (A_TYPEG(sub) != A_TYPEG(sub1))
1686 return FALSE;
1687 switch (A_TYPEG(sub)) {
1688 case A_ID:
1689 return TRUE;
1690 case A_MEM:
1691 sub = A_PARENTG(sub);
1692 sub1 = A_PARENTG(sub1);
1693 break;
1694 case A_SUBSTR:
1695 sub = A_LOPG(sub);
1696 sub1 = A_LOPG(sub1);
1697 break;
1698 case A_SUBSCR:
1699 asd = A_ASDG(sub);
1700 ndim = ASD_NDIM(asd);
1701 asd1 = A_ASDG(sub1);
1702 ndim1 = ASD_NDIM(asd1);
1703 if (ndim != ndim1)
1704 return FALSE;
1705 count = 0;
1706 for (i = 0; i < ndim; i++) {
1707 itriple = ASD_SUBS(asd, i);
1708 if (A_TYPEG(itriple) == A_TRIPLE) {
1709 lb = A_LBDG(itriple);
1710 ub = A_UPBDG(itriple);
1711 st = A_STRIDEG(itriple);
1712 if (st == 0)
1713 st = astb.i1;
1714 } else {
1715 lb = itriple;
1716 ub = astb.i1;
1717 st = astb.i1;
1718 }
1719
1720 itriple1 = ASD_SUBS(asd1, i);
1721 if (A_TYPEG(itriple1) == A_TRIPLE) {
1722 lb1 = A_LOPG(itriple1);
1723 ub1 = A_UPBDG(itriple1);
1724 st1 = A_STRIDEG(itriple1);
1725 if (st1 == 0)
1726 st1 = astb.i1;
1727 } else {
1728 lb1 = itriple1;
1729 ub1 = astb.i1;
1730 st1 = astb.i1;
1731 }
1732
1733 if (lb == lb1 && ub == ub1 && st == st1) {
1734 if (is_avail_expr(lb, std, fg, std1, fg1) &&
1735 is_avail_expr(ub, std, fg, std1, fg1) &&
1736 is_avail_expr(st, std, fg, std1, fg1))
1737 count++;
1738 }
1739 }
1740 if (count != ndim)
1741 return FALSE;
1742 sub = A_LOPG(sub);
1743 sub1 = A_LOPG(sub1);
1744 break;
1745 default:
1746 interr("is_same_array_bounds: unexpected AST type ", sub, 0);
1747 return FALSE;
1748 }
1749 }
1750 }
1751
1752 /* This routine checks whether sub and sub1 arrays have same subscripts
1753 * it can allows that they can have different subscripts iff
1754 * 1-if subscript is scalar and collapsed.
1755 * 2-if trailing subscripts are scalars and they are collapsed.
1756 * Also, each array dimension extent must be the same, up to the
1757 * last nonscalar/nondistributed dimension.
1758 */
1759 static LOGICAL
is_same_array_bounds_for_schedule(int sub,int sub1,int std,int std1,int fg,int fg1)1760 is_same_array_bounds_for_schedule(int sub, int sub1, int std, int std1, int fg,
1761 int fg1)
1762 {
1763 return TRUE;
1764 }
1765
1766 LOGICAL
is_same_array_alignment(int sptr,int sptr1)1767 is_same_array_alignment(int sptr, int sptr1)
1768 {
1769 return TRUE;
1770 }
1771
1772 /* This routine is used for schedule elimination.
1773 * It checks if the arrays are aligned to the same template
1774 * with the same number of distribution and
1775 * aligned to the same template axis.
1776 * And also overlap area has to be the same.
1777 */
1778 static LOGICAL
is_same_array_alignment_for_schedule(int sptr,int sptr1)1779 is_same_array_alignment_for_schedule(int sptr, int sptr1)
1780 {
1781 return TRUE;
1782 }
1783
1784 static int
find_lp(int std)1785 find_lp(int std)
1786 {
1787 int i;
1788 int hd1;
1789 int std1;
1790
1791 for (i = 1; i <= opt.nloops; ++i) {
1792 hd1 = LP_HEAD(i);
1793 std1 = FG_STDFIRST(hd1);
1794 if (std == std1)
1795 return i;
1796 }
1797 assert(0, "find_lp: loop not found", std, 3);
1798 return 0;
1799 }
1800
1801 static void
eliminate_alloc(int lp,int lp1,int rt_std,int rt1_std)1802 eliminate_alloc(int lp, int lp1, int rt_std, int rt1_std)
1803 {
1804 int std, std1;
1805 int fg, fg1;
1806 int rt, rt1;
1807 int nd, nd1;
1808 int i;
1809 int sub, ndim, asd;
1810 int sub1, ndim1, asd1;
1811 int count;
1812 int sptr;
1813 int sptr1;
1814 int hd, hd1;
1815 int freestd, freestd1;
1816 int lp2, hd2;
1817 int lp3, hd3;
1818
1819 if (LP_PARENT(lp) != LP_PARENT(lp1))
1820 return;
1821 rt = STD_AST(rt_std);
1822 rt1 = STD_AST(rt1_std);
1823 assert(LP_FORALL(lp), "eliminate_alloc: expecting forall", lp, 2);
1824 hd = LP_HEAD(lp);
1825 std = FG_STDFIRST(hd);
1826 fg = LP_FG(lp);
1827
1828 assert(LP_FORALL(lp1), "eliminate_alloc: expecting forall1", lp1, 2);
1829 hd1 = LP_HEAD(lp1);
1830 std1 = FG_STDFIRST(hd1);
1831 fg1 = LP_FG(lp1);
1832
1833 /* has to have same distribution */
1834 nd = A_OPT1G(rt);
1835 nd1 = A_OPT1G(rt1);
1836 if (FT_ALLOC_USED(nd1))
1837 return;
1838 if (FT_ALLOC_USED(nd))
1839 return;
1840
1841 /* has to be disjoint live time */
1842 /* first free has to dominate the second alloc */
1843 if (lp == lp1)
1844 return;
1845 freestd = FT_ALLOC_FREE(nd);
1846 if (freestd == std1)
1847 return;
1848 if (!is_dominator_fg(freestd, std1))
1849 return;
1850
1851 freestd1 = FT_ALLOC_FREE(nd1);
1852 if (freestd == freestd1)
1853 return;
1854 if (!is_dominator_fg(freestd, freestd1))
1855 return;
1856
1857 sptr = FT_ALLOC_SPTR(nd);
1858 sptr1 = FT_ALLOC_SPTR(nd1);
1859 if (!is_same_array_alignment(sptr, sptr1))
1860 return;
1861
1862 /* has to have the same type */
1863 if (DTY(DTYPEG(sptr) + 1) != DTY(DTYPEG(sptr1) + 1))
1864 return;
1865
1866 /* can not be re-used at the same loop twice */
1867 if (FT_ALLOC_FREE(nd) == FT_ALLOC_FREE(nd1))
1868 return;
1869
1870 /* has to have the same alloc bounds */
1871 sub = A_LOPG(rt);
1872 sub1 = A_LOPG(rt1);
1873 if (is_same_array_bounds(sub, sub1, std, std1, fg, fg1)) {
1874 FT_ALLOC_SAME(nd1) = rt;
1875 FT_ALLOC_OUT(nd1) = FT_ALLOC_OUT(nd);
1876 if (is_dominator_fg(FT_ALLOC_FREE(nd), FT_ALLOC_FREE(nd1)))
1877 FT_ALLOC_FREE(nd) = FT_ALLOC_FREE(nd1);
1878 FT_ALLOC_REUSE(nd) = 1;
1879 STD_DELETE(rt1_std) = 1;
1880 optsum.alloc++;
1881 }
1882 }
1883
1884 LOGICAL
is_same_array_shape(int sptr,int sptr1)1885 is_same_array_shape(int sptr, int sptr1)
1886 {
1887 LOGICAL result;
1888 int dist, align;
1889 ADSC *ad, *ad1;
1890 int ndim, ndim1;
1891 int lb, lb1;
1892 int ub, ub1;
1893 int i;
1894
1895 result = TRUE;
1896 if (sptr == sptr1)
1897 return TRUE;
1898
1899 if (ALLOCG(sptr) || ALLOCG(sptr1))
1900 result = FALSE;
1901
1902 ad = AD_DPTR(DTYPEG(sptr));
1903 ad1 = AD_DPTR(DTYPEG(sptr1));
1904 ndim = rank_of_sym(sptr);
1905 ndim1 = rank_of_sym(sptr1);
1906
1907 if (ndim != ndim1)
1908 result = FALSE;
1909
1910 for (i = 0; i < ndim; i++) {
1911 lb = AD_LWAST(ad, i);
1912 lb1 = AD_LWAST(ad1, i);
1913 if (lb != lb1)
1914 result = FALSE;
1915 ub = AD_UPAST(ad, i);
1916 ub1 = AD_UPAST(ad1, i);
1917 if (ub != ub1)
1918 result = FALSE;
1919 }
1920
1921 return result;
1922 }
1923
1924 static void
eliminate_sect(int lp,int lp1,int rt_std,int rt1_std)1925 eliminate_sect(int lp, int lp1, int rt_std, int rt1_std)
1926 {
1927 int std, std1;
1928 int fg, fg1;
1929 int rt, rt1;
1930 int nd, nd1;
1931 int i;
1932 int sub, ndim, asd;
1933 int sub1, ndim1, asd1;
1934 int count;
1935 int sptr;
1936 int sptr1;
1937 int hd, hd1;
1938 int sect, sect1;
1939 int allocstd, allocstd1;
1940 int alloc, alloc1;
1941 int nd3;
1942 int arr, arr1;
1943 int sectflag, sectflag1;
1944 int bogus, bogus1;
1945
1946 if (LP_PARENT(lp) != LP_PARENT(lp1))
1947 return;
1948 rt = STD_AST(rt_std);
1949 rt1 = STD_AST(rt1_std);
1950 assert(LP_FORALL(lp), "eliminate_sect: expecting forall", lp, 2);
1951 hd = LP_HEAD(lp);
1952 std = FG_STDFIRST(hd);
1953 fg = LP_FG(lp);
1954
1955 assert(LP_FORALL(lp1), "eliminate_sect: expecting forall1", lp1, 2);
1956 hd1 = LP_HEAD(lp1);
1957 std1 = FG_STDFIRST(hd1);
1958 fg1 = LP_FG(lp1);
1959
1960 sect = A_SRCG(rt);
1961 sect1 = A_SRCG(rt1);
1962 nd = A_OPT1G(sect);
1963 nd1 = A_OPT1G(sect1);
1964 arr = A_LOPG(sect);
1965 arr1 = A_LOPG(sect1);
1966
1967 /* has to have same flag */
1968 sectflag = FT_SECT_FLAG(nd);
1969 sectflag1 = FT_SECT_FLAG(nd1);
1970 if (sectflag != sectflag1)
1971 return;
1972
1973 allocstd = FT_SECT_ALLOC(nd);
1974 if (allocstd) {
1975 alloc = STD_AST(allocstd);
1976 nd3 = A_OPT1G(alloc);
1977 sptr = FT_ALLOC_OUT(nd3);
1978 FT_SECT_SPTR(nd) = sptr;
1979 bogus = getbit(sectflag, 8);
1980 if (is_whole_array(arr) && !bogus) {
1981 DESCUSEDP(sptr, 1);
1982 FT_SECT_OUT(nd) = DESCRG(sptr);
1983 }
1984 }
1985
1986 allocstd1 = FT_SECT_ALLOC(nd1);
1987 if (allocstd1) {
1988 alloc1 = STD_AST(allocstd1);
1989 nd3 = A_OPT1G(alloc1);
1990 sptr = FT_ALLOC_OUT(nd3);
1991 FT_SECT_SPTR(nd1) = sptr;
1992 bogus1 = getbit(sectflag1, 8);
1993 if (is_whole_array(arr1) && !bogus1) {
1994 DESCUSEDP(sptr, 1);
1995 FT_SECT_OUT(nd1) = DESCRG(sptr);
1996 }
1997 }
1998
1999 /* has to have same distribution */
2000 sptr = FT_SECT_SPTR(nd);
2001 if (STYPEG(sptr) == ST_MEMBER)
2002 return;
2003 sptr1 = FT_SECT_SPTR(nd1);
2004 if (!is_same_array_alignment(sptr, sptr1))
2005 return;
2006
2007 /* has to have the same type */
2008 if (DTY(DTYPEG(sptr) + 1) != DTY(DTYPEG(sptr1) + 1))
2009 return;
2010
2011 if (!is_same_array_shape(sptr, sptr1))
2012 return;
2013
2014 /* has to have the same section bounds */
2015 sub = A_LOPG(sect);
2016 sub1 = A_LOPG(sect1);
2017 if (is_same_array_bounds(sub, sub1, std, std1, fg, fg1)) {
2018 FT_SECT_SAME(nd1) = rt;
2019 FT_SECT_OUT(nd1) = FT_SECT_OUT(nd);
2020 if (is_dominator_fg(FT_SECT_FREE(nd), FT_SECT_FREE(nd1)))
2021 FT_SECT_FREE(nd) = FT_SECT_FREE(nd1);
2022 FT_SECT_REUSE(nd) = 1;
2023 STD_DELETE(rt1_std) = 1;
2024 optsum.sect++;
2025 }
2026 }
2027
2028 static LOGICAL
is_dominator_fg(int std,int std1)2029 is_dominator_fg(int std, int std1)
2030 {
2031 int std_lp, std_lp1;
2032 int hd, hd1;
2033
2034 hd = STD_FG(std);
2035 hd1 = STD_FG(std1);
2036 if (is_dominator(hd, hd1))
2037 return TRUE;
2038 return FALSE;
2039 }
2040
2041 /*
2042 * if fg1 == fg2, then return TRUE if std1 preceeds std2 and FALSE otherwise
2043 * if fg1 != fg2, then return TRUE if fg1 dominates fg2,
2044 * or the postdominator of fg1 dominates fg2,
2045 * or some postdominator of fg1 dominates fg2, and so on
2046 */
2047 static LOGICAL
must_follow(int fg1,int std1,int fg2,int std2)2048 must_follow(int fg1, int std1, int fg2, int std2)
2049 {
2050 int std, found1, fg;
2051 if (fg1 == fg2) {
2052 rdilts(fg1);
2053 found1 = FALSE;
2054 for (std = FG_STDFIRST(fg1); std; std = STD_NEXT(std)) {
2055 if (std == std1) {
2056 found1 = TRUE;
2057 }
2058 if (std == std2) {
2059 wrilts(fg1);
2060 return found1;
2061 }
2062 }
2063 wrilts(fg1);
2064 /* didn't find std2 */
2065 } else {
2066 for (fg = fg1; fg > 0; fg = FG_PDOM(fg)) {
2067 if (is_dominator(fg, fg2))
2068 return TRUE;
2069 }
2070 }
2071 return FALSE;
2072 } /* must_follow */
2073
2074 static void
eliminate_copy(int lp,int lp1,int rt_std,int rt1_std)2075 eliminate_copy(int lp, int lp1, int rt_std, int rt1_std)
2076 {
2077 int std, std1;
2078 int fg, fg1;
2079 int rt, rt1;
2080 int nd, nd1, nd2;
2081 int i;
2082 int sub, ndim, asd;
2083 int sub1, ndim1, asd1;
2084 int count;
2085 int sptr;
2086 int sptr1;
2087 int hd, hd1;
2088 int copy, copy1;
2089 int lhs, lhs1;
2090 int rhs, rhs1;
2091 int sect;
2092
2093 if (XBIT(47, 0x40000))
2094 return;
2095 if (LP_PARENT(lp) != LP_PARENT(lp1))
2096 return;
2097 rt = STD_AST(rt_std);
2098 rt1 = STD_AST(rt1_std);
2099 assert(LP_FORALL(lp), "eliminate_copy: expecting forall", lp, 2);
2100 hd = LP_HEAD(lp);
2101 std = FG_STDFIRST(hd);
2102 fg = LP_FG(lp);
2103
2104 assert(LP_FORALL(lp1), "eliminate_copy: expecting forall1", lp1, 2);
2105 hd1 = LP_HEAD(lp1);
2106 std1 = FG_STDFIRST(hd1);
2107 fg1 = LP_FG(lp1);
2108
2109 copy = A_SRCG(rt);
2110 copy1 = A_SRCG(rt1);
2111 nd = A_OPT1G(copy);
2112 nd1 = A_OPT1G(copy1);
2113
2114 /* has to have same left-hand-side distribution */
2115 lhs = FT_CCOPY_LHS(nd);
2116 lhs1 = FT_CCOPY_LHS(nd1);
2117 sptr = left_array_symbol(lhs);
2118 sptr1 = left_array_symbol(lhs1);
2119 if (!is_same_array_alignment_for_schedule(sptr, sptr1))
2120 return;
2121
2122 /* has to have same temp distribution */
2123 sptr = FT_CCOPY_TSPTR(nd);
2124 sptr1 = FT_CCOPY_TSPTR(nd1);
2125 if (!is_same_array_alignment_for_schedule(sptr, sptr1))
2126 return;
2127
2128 /* has to have same right-hand-side distribution */
2129 rhs = FT_CCOPY_RHS(nd);
2130 rhs1 = FT_CCOPY_RHS(nd1);
2131 sptr = left_array_symbol(rhs);
2132 sptr1 = left_array_symbol(rhs1);
2133 if (!is_same_array_alignment_for_schedule(sptr, sptr1))
2134 return;
2135
2136 /* has to have the same tmps bounds */
2137 sub = A_DESTG(copy);
2138 sub1 = A_DESTG(copy1);
2139 if (!is_same_array_bounds_for_schedule(sub, sub1, std, std1, fg, fg1))
2140 return;
2141
2142 /* has to have the same rhs bounds */
2143 sub = A_SRCG(copy);
2144 sub1 = A_SRCG(copy1);
2145 if (!is_same_array_bounds_for_schedule(sub, sub1, std, std1, fg, fg1))
2146 return;
2147
2148 /* neither source or destination sections can have bogus flags */
2149 sect = FT_CCOPY_SECTL(nd);
2150 sub = STD_AST(sect);
2151 nd2 = A_OPT1G(A_SRCG(sub));
2152 if (FT_SECT_FLAG(nd2) & BOGUSFLAG)
2153 return;
2154 sect = FT_CCOPY_SECTR(nd);
2155 sub = STD_AST(sect);
2156 nd2 = A_OPT1G(A_SRCG(sub));
2157 if (FT_SECT_FLAG(nd2) & BOGUSFLAG)
2158 return;
2159 sect = FT_CCOPY_SECTL(nd1);
2160 sub = STD_AST(sect);
2161 nd2 = A_OPT1G(A_SRCG(sub));
2162 if (FT_SECT_FLAG(nd2) & BOGUSFLAG)
2163 return;
2164 sect = FT_CCOPY_SECTR(nd1);
2165 sub = STD_AST(sect);
2166 nd2 = A_OPT1G(A_SRCG(sub));
2167 if (FT_SECT_FLAG(nd2) & BOGUSFLAG)
2168 return;
2169
2170 /* has to have the same lhs bounds */
2171 sub = FT_CCOPY_LHSSEC(nd);
2172 sub1 = FT_CCOPY_LHSSEC(nd1);
2173 if (sub && sub1 &&
2174 !is_same_array_bounds_for_schedule(sub, sub1, std, std1, fg, fg1)) {
2175 FT_CCOPY_NOTLHS(nd) = 1;
2176 FT_CCOPY_NOTLHS(nd1) = 1;
2177 }
2178
2179 if (FT_CCOPY_NOTLHS(nd) || FT_CCOPY_NOTLHS(nd1)) {
2180 FT_CCOPY_NOTLHS(nd) = 1;
2181 FT_CCOPY_NOTLHS(nd1) = 1;
2182 }
2183
2184 FT_CCOPY_SAME(nd1) = rt;
2185 FT_CCOPY_OUT(nd1) = FT_CCOPY_OUT(nd);
2186 if (is_dominator_fg(FT_CCOPY_FREE(nd), FT_CCOPY_FREE(nd1)))
2187 FT_CCOPY_FREE(nd) = FT_CCOPY_FREE(nd1);
2188 FT_CCOPY_REUSE(nd) = 1;
2189 STD_DELETE(rt1_std) = 1;
2190 optsum.copysection++;
2191 }
2192
2193 static void
eliminate_gather(int lp,int lp1,int rt_std,int rt1_std)2194 eliminate_gather(int lp, int lp1, int rt_std, int rt1_std)
2195 {
2196 int std, std1;
2197 int fg, fg1;
2198 int rt, rt1;
2199 int nd, nd1, nd3;
2200 int i;
2201 int vsub, ndim, asd;
2202 int vsub1, ndim1, asd1;
2203 int hd, hd1;
2204 int gather, gather1;
2205 int nvsub, nvsub1;
2206 int mask, mask1;
2207 int sptr, sptr1;
2208 int v, v1;
2209 int per, per1;
2210 int sub, sub1;
2211
2212 if (XBIT(47, 0x80000))
2213 return;
2214 if (LP_PARENT(lp) != LP_PARENT(lp1))
2215 return;
2216 rt = STD_AST(rt_std);
2217 rt1 = STD_AST(rt1_std);
2218 assert(LP_FORALL(lp), "eliminate_gather: expecting forall", lp, 2);
2219 hd = LP_HEAD(lp);
2220 std = FG_STDFIRST(hd);
2221 fg = LP_FG(lp);
2222
2223 assert(LP_FORALL(lp1), "eliminate_gather: expecting forall1", lp1, 2);
2224 hd1 = LP_HEAD(lp1);
2225 std1 = FG_STDFIRST(hd1);
2226 fg1 = LP_FG(lp1);
2227
2228 gather = A_SRCG(rt);
2229 gather1 = A_SRCG(rt1);
2230 nd = A_OPT1G(gather);
2231 nd1 = A_OPT1G(gather1);
2232
2233 /* flags hast to be same */
2234 if (FT_CGATHER_TYPE(nd) != FT_CGATHER_TYPE(nd1))
2235 return;
2236 if (FT_CGATHER_VFLAG(nd) != FT_CGATHER_VFLAG(nd1))
2237 return;
2238 if (FT_CGATHER_PFLAG(nd) != FT_CGATHER_PFLAG(nd1))
2239 return;
2240
2241 /* vsub has to have same distribution */
2242 vsub = FT_CGATHER_VSUB(nd);
2243 vsub1 = FT_CGATHER_VSUB(nd1);
2244 sptr = left_array_symbol(vsub);
2245 sptr1 = left_array_symbol(vsub1);
2246 if (!is_same_array_alignment_for_schedule(sptr, sptr1))
2247 return;
2248
2249 /* nvsub has to have same distribution */
2250 nvsub = FT_CGATHER_NVSUB(nd);
2251 nvsub1 = FT_CGATHER_NVSUB(nd1);
2252 sptr = left_array_symbol(nvsub);
2253 sptr1 = left_array_symbol(nvsub1);
2254 if (!is_same_array_alignment_for_schedule(sptr, sptr1))
2255 return;
2256
2257 /* has to have the same vsub bounds */
2258 if (!is_same_array_bounds_for_schedule(vsub, vsub1, std, std1, fg, fg1))
2259 return;
2260
2261 /* has to have the same nvsub bounds */
2262 if (!is_same_array_bounds_for_schedule(nvsub, nvsub1, std, std1, fg, fg1))
2263 return;
2264
2265 /* masks have to have contents */
2266 mask = FT_CGATHER_MASK(nd);
2267 mask1 = FT_CGATHER_MASK(nd1);
2268 if (mask != mask1)
2269 return;
2270 if (mask) {
2271 if (!is_avail_expr(mask, std, fg, std1, fg1))
2272 return;
2273 }
2274
2275 asd = A_ASDG(left_subscript_ast(vsub));
2276 ndim = ASD_NDIM(asd);
2277
2278 asd1 = A_ASDG(left_subscript_ast(vsub1));
2279 ndim1 = ASD_NDIM(asd1);
2280
2281 if (ndim != ndim1)
2282 return;
2283
2284 for (i = 0; i < ndim; i++) {
2285 v = FT_CGATHER_V(nd, i);
2286 v1 = FT_CGATHER_V(nd1, i);
2287 if (v != v1)
2288 return;
2289 if (v) {
2290 if (!is_avail_expr(v, std, fg, std1, fg1))
2291 return;
2292 }
2293 /* has to have same permute */
2294 per = FT_CGATHER_PERMUTE(nd, i);
2295 per1 = FT_CGATHER_PERMUTE(nd1, i);
2296 if (per != per1)
2297 return;
2298 }
2299
2300 /* has to have the same lhs bounds */
2301 sub = FT_CGATHER_LHSSEC(nd);
2302 sub1 = FT_CGATHER_LHSSEC(nd1);
2303 if (sub && sub1 &&
2304 !is_same_array_bounds_for_schedule(sub, sub1, std, std1, fg, fg1)) {
2305 FT_CGATHER_NOTLHS(nd) = 1;
2306 FT_CGATHER_NOTLHS(nd1) = 1;
2307 }
2308
2309 if (FT_CGATHER_NOTLHS(nd) || FT_CGATHER_NOTLHS(nd1)) {
2310 FT_CGATHER_NOTLHS(nd) = 1;
2311 FT_CGATHER_NOTLHS(nd1) = 1;
2312 }
2313
2314 FT_CGATHER_SAME(nd1) = rt;
2315 FT_CGATHER_OUT(nd1) = FT_CGATHER_OUT(nd);
2316 FT_CGATHER_FREE(nd) = FT_CGATHER_FREE(nd1);
2317 FT_CGATHER_REUSE(nd) = 1;
2318 STD_DELETE(rt1_std) = 1;
2319 if (FT_CGATHER_TYPE(nd) == A_HGATHER)
2320 optsum.gatherx++;
2321 else
2322 optsum.scatterx++;
2323 }
2324
2325 static void
eliminate_shift(int lp,int lp1,int rt_std,int rt1_std)2326 eliminate_shift(int lp, int lp1, int rt_std, int rt1_std)
2327 {
2328 int std, std1;
2329 int fg, fg1;
2330 int rt, rt1;
2331 int nd, nd1, nd3;
2332 int i;
2333 int ndim, asd;
2334 int ndim1, asd1;
2335 int count;
2336 int src, src1, srcl;
2337 int hd, hd1;
2338 int shift, shift1;
2339 int sptr, sptr1;
2340 int v, v1, cv, cv1, ns, ns1;
2341 int nmax, pmax;
2342 int sub[7];
2343 int new;
2344
2345 if (XBIT(47, 0x100000))
2346 return;
2347 if (LP_PARENT(lp) != LP_PARENT(lp1))
2348 return;
2349 rt = STD_AST(rt_std);
2350 rt1 = STD_AST(rt1_std);
2351 assert(LP_FORALL(lp), "eliminate_shift: expecting forall", lp, 2);
2352 hd = LP_HEAD(lp);
2353 std = FG_STDFIRST(hd);
2354 fg = LP_FG(lp);
2355
2356 assert(LP_FORALL(lp1), "eliminate_shift: expecting forall1", lp1, 2);
2357 hd1 = LP_HEAD(lp1);
2358 std1 = FG_STDFIRST(hd1);
2359 fg1 = LP_FG(lp1);
2360
2361 shift = A_SRCG(rt);
2362 shift1 = A_SRCG(rt1);
2363 nd = A_OPT1G(shift);
2364 nd1 = A_OPT1G(shift1);
2365
2366 if (FT_SHIFT_TYPE(nd1) != FT_SHIFT_TYPE(nd))
2367 return;
2368 if (FT_SHIFT_BOUNDARY(nd1) != FT_SHIFT_BOUNDARY(nd))
2369 return;
2370 if (FT_SHIFT_BOUNDARY(nd) &&
2371 !is_avail_expr(FT_SHIFT_BOUNDARY(nd), std, fg, std1, fg1))
2372 return;
2373 src = A_SRCG(shift);
2374 sptr = left_array_symbol(src);
2375
2376 src1 = A_SRCG(shift1);
2377 sptr1 = left_array_symbol(src1);
2378
2379 if (!is_same_array_alignment(sptr, sptr1))
2380 return;
2381
2382 if (!is_same_array_shape(sptr, sptr1))
2383 return;
2384
2385 /* has to have the same shift values */
2386 /* second shift has to be less than equal to first shift
2387 * at all dimension negative and pozitive direction
2388 */
2389
2390 srcl = left_subscript_ast(src);
2391 asd = A_ASDG(srcl);
2392 asd1 = A_ASDG(left_subscript_ast(src1));
2393 ndim = ASD_NDIM(asd);
2394 ndim1 = ASD_NDIM(asd1);
2395 if (ndim != ndim1)
2396 return;
2397 for (i = 0; i < ndim; i++) {
2398 v = ASD_SUBS(asd, i);
2399 v1 = ASD_SUBS(asd1, i);
2400 assert(A_TYPEG(v) == A_TRIPLE, "eliminate_shift: expecting triple", v, 3);
2401 assert(A_TYPEG(v1) == A_TRIPLE, "eliminate_shift: expecting triple1", v1,
2402 3);
2403 ns = A_LBDG(v);
2404 ns1 = A_LBDG(v1);
2405 assert(A_TYPEG(ns) == A_CNST, "eliminate_shift:expecting constant", v, 3);
2406 assert(A_TYPEG(ns1) == A_CNST, "eliminate_shift:expecting constant1", v1,
2407 3);
2408 cv = get_int_cval(A_SPTRG(A_ALIASG(ns)));
2409 cv1 = get_int_cval(A_SPTRG(A_ALIASG(ns1)));
2410 nmax = ns;
2411 if (cv < cv1)
2412 nmax = ns1;
2413
2414 ns = A_UPBDG(v);
2415 ns1 = A_UPBDG(v1);
2416 assert(A_TYPEG(ns) == A_CNST, "eliminate_shift:expecting constant2", v, 3);
2417 assert(A_TYPEG(ns1) == A_CNST, "eliminate_shift:expecting constant3", v, 3);
2418 cv = get_int_cval(A_SPTRG(A_ALIASG(ns)));
2419 cv1 = get_int_cval(A_SPTRG(A_ALIASG(ns1)));
2420 pmax = ns;
2421 if (cv < cv1)
2422 pmax = ns1;
2423 sub[i] = mk_triple(nmax, pmax, 0);
2424 }
2425
2426 new = mk_subscr(A_LOPG(srcl), sub, ndim, DTY(DTYPEG(sptr) + 1));
2427 new = replace_ast_subtree(src, srcl, new);
2428 A_SRCP(shift, new);
2429
2430 FT_SHIFT_SAME(nd1) = rt;
2431 FT_SHIFT_OUT(nd1) = FT_SHIFT_OUT(nd);
2432 if (is_dominator_fg(FT_SHIFT_FREE(nd), FT_SHIFT_FREE(nd1)))
2433 FT_SHIFT_FREE(nd) = FT_SHIFT_FREE(nd1);
2434 FT_SHIFT_REUSE(nd) = 1;
2435 STD_DELETE(rt1_std) = 1;
2436 optsum.shift++;
2437 }
2438
2439 static void
eliminate_start(int lp,int lp1,int rt_std,int rt1_std)2440 eliminate_start(int lp, int lp1, int rt_std, int rt1_std)
2441 {
2442 int std, std1;
2443 int fg, fg1;
2444 int rt, rt1;
2445 int nd, nd1;
2446 int i;
2447 int sub, ndim, asd;
2448 int sub1, ndim1, asd1;
2449 int count;
2450 int sptr, sptr1;
2451 int ast, ast1;
2452 int hd, hd1;
2453 int start, start1;
2454 int cp, cp1;
2455 int lhs, lhs1;
2456 int rhs, rhs1;
2457 int comm, commstd;
2458 int comm1, commstd1;
2459 int asn, asn1;
2460 int nd3, nd4;
2461 int stype, stype1;
2462 int alloc_std, alloc;
2463 int nd5;
2464 int src, src1;
2465 int dest, dest1;
2466
2467 rt = STD_AST(rt_std);
2468 rt1 = STD_AST(rt1_std);
2469 assert(LP_FORALL(lp), "eliminate_start: expecting forall", lp, 2);
2470 hd = LP_HEAD(lp);
2471 std = FG_STDFIRST(hd);
2472 fg = LP_FG(lp);
2473
2474 assert(LP_FORALL(lp1), "eliminate_start: expecting forall1", lp1, 2);
2475 hd1 = LP_HEAD(lp1);
2476 std1 = FG_STDFIRST(hd1);
2477 fg1 = LP_FG(lp1);
2478
2479 start = A_SRCG(rt);
2480 start1 = A_SRCG(rt1);
2481 nd = A_OPT1G(start);
2482 nd1 = A_OPT1G(start1);
2483
2484 stype = FT_CSTART_TYPE(nd);
2485 stype1 = FT_CSTART_TYPE(nd1);
2486 if (stype != stype1)
2487 return;
2488
2489 /* has to have same cp */
2490 commstd = FT_CSTART_COMM(nd);
2491 asn = STD_AST(commstd);
2492 comm = A_SRCG(asn);
2493 nd3 = A_OPT1G(comm);
2494 if (stype == A_HCOPYSECT)
2495 cp = FT_CCOPY_OUT(nd3);
2496 if (stype == A_HOVLPSHIFT)
2497 cp = FT_SHIFT_OUT(nd3);
2498 if (stype == A_HGATHER)
2499 cp = FT_CGATHER_OUT(nd3);
2500 if (stype == A_HSCATTER)
2501 cp = FT_CGATHER_OUT(nd3);
2502
2503 commstd1 = FT_CSTART_COMM(nd1);
2504 asn1 = STD_AST(commstd1);
2505 comm1 = A_SRCG(asn1);
2506 nd4 = A_OPT1G(comm1);
2507 if (stype1 == A_HCOPYSECT)
2508 cp1 = FT_CCOPY_OUT(nd4);
2509 if (stype1 == A_HOVLPSHIFT)
2510 cp1 = FT_SHIFT_OUT(nd4);
2511 if (stype1 == A_HGATHER)
2512 cp1 = FT_CGATHER_OUT(nd4);
2513 if (stype1 == A_HSCATTER)
2514 cp1 = FT_CGATHER_OUT(nd4);
2515
2516 if (cp != cp1)
2517 return;
2518
2519 /* has to have the same source array */
2520 sub = A_SRCG(start);
2521 sub1 = A_SRCG(start1);
2522 ast = left_subscript_ast(sub);
2523 ast1 = left_subscript_ast(sub1);
2524 if (A_LOPG(ast) != A_LOPG(ast1))
2525 return;
2526
2527 rhs = A_LOPG(FT_CSTART_RHS(nd1));
2528 if (std != std1)
2529 if (!is_avail_expr(rhs, std, fg, std1, fg1))
2530 return;
2531
2532 /* scatterx needs also destination is same */
2533 if (stype1 == A_HSCATTER) {
2534 sub = A_DESTG(start);
2535 sub1 = A_DESTG(start1);
2536 ast = left_subscript_ast(sub);
2537 ast1 = left_subscript_ast(sub1);
2538 if (A_LOPG(ast) != A_LOPG(ast1))
2539 return;
2540 }
2541
2542 src = A_SRCG(start);
2543 src1 = A_SRCG(start1);
2544
2545 dest = A_DESTG(start);
2546 dest1 = A_DESTG(start1);
2547
2548 if (stype == A_HCOPYSECT || stype == A_HGATHER || stype == A_HSCATTER) {
2549
2550 /* has to have same destination distribution */
2551 sptr = left_array_symbol(dest);
2552 sptr1 = left_array_symbol(dest1);
2553 if (!is_same_array_alignment(sptr, sptr1))
2554 return;
2555
2556 /* has to have same src distribution */
2557 sptr = left_array_symbol(src);
2558 sptr1 = left_array_symbol(src1);
2559 if (!is_same_array_alignment(sptr, sptr1))
2560 return;
2561
2562 /* has to have the same dest bounds */
2563 if (!is_same_array_bounds(dest, dest1, std, std1, fg, fg1))
2564 return;
2565
2566 /* has to have the same src bounds */
2567 if (!is_same_array_bounds(src, src1, std, std1, fg, fg1))
2568 return;
2569 }
2570
2571 FT_CSTART_SAME(nd1) = rt;
2572 FT_CSTART_OUT(nd1) = FT_CSTART_OUT(nd);
2573 if (FT_CSTART_ALLOC(nd1) && FT_CSTART_ALLOC(nd)) {
2574 FT_CSTART_ALLOC(nd1) = alloc_std = FT_CSTART_ALLOC(nd);
2575 alloc = STD_AST(alloc_std);
2576 nd5 = A_OPT1G(alloc);
2577 if (is_dominator_fg(FT_ALLOC_FREE(nd5), FT_CSTART_FREE(nd1)))
2578 FT_ALLOC_FREE(nd5) = FT_CSTART_FREE(nd1);
2579 FT_ALLOC_USED(nd5) = 1;
2580 }
2581 if (is_dominator_fg(FT_CSTART_FREE(nd), FT_CSTART_FREE(nd1)))
2582 FT_CSTART_FREE(nd) = FT_CSTART_FREE(nd1);
2583 FT_CSTART_REUSE(nd) = 1;
2584 STD_DELETE(rt1_std) = 1;
2585 optsum.start++;
2586 }
2587
2588 static void
eliminate_get_scalar(void)2589 eliminate_get_scalar(void)
2590 {
2591 int i, j;
2592 int src, src1;
2593 int ast, ast1;
2594 int fg, fg1;
2595 int commstd, commstd1;
2596 int rt, rt1;
2597 int nd, nd1;
2598
2599 init_gstbl();
2600 find_get_scalar();
2601
2602 for (i = 0; i < gstbl.avl; i++) {
2603 commstd = gstbl.base[i].f1;
2604 if (STD_DELETE(commstd))
2605 continue;
2606 rt = STD_AST(commstd);
2607 nd = A_OPT1G(rt);
2608 assert(A_TYPEG(rt) == A_HGETSCLR, "generate_get_scalar: wrong ast type", 2,
2609 rt);
2610 assert(nd, "generate_get_scalar: nd is 0", 2, rt);
2611 for (j = i + 1; j < gstbl.avl; j++) {
2612 commstd1 = gstbl.base[j].f1;
2613 if (STD_DELETE(commstd1))
2614 continue;
2615 rt1 = STD_AST(commstd1);
2616 src = A_SRCG(rt);
2617 src1 = A_SRCG(rt1);
2618 if (src != src1)
2619 continue;
2620
2621 nd1 = A_OPT1G(rt1);
2622 fg = STD_FG(commstd);
2623 fg1 = STD_FG(commstd1);
2624 if (!is_dominator(fg, fg1))
2625 continue;
2626 if (!is_avail_expr(src, commstd, fg, commstd1, fg1))
2627 continue;
2628 FT_GETSCLR_SAME(nd1) = rt;
2629 FT_GETSCLR_REUSE(nd) = 1;
2630 STD_DELETE(commstd1) = 1;
2631 }
2632 }
2633 free_gstbl();
2634 }
2635
2636 void
init_gstbl(void)2637 init_gstbl(void)
2638 {
2639 gstbl.size = 200;
2640 NEW(gstbl.base, TABLE, gstbl.size);
2641 gstbl.avl = 0;
2642 }
2643
2644 void
free_gstbl(void)2645 free_gstbl(void)
2646 {
2647
2648 FREE(gstbl.base);
2649 }
2650
2651 int
get_gstbl(void)2652 get_gstbl(void)
2653 {
2654 int nd;
2655
2656 nd = gstbl.avl++;
2657 NEED(gstbl.avl, gstbl.base, TABLE, gstbl.size, gstbl.size + 100);
2658 if (nd > SPTR_MAX || gstbl.base == NULL)
2659 errfatal(7);
2660 return nd;
2661 }
2662
2663 void
init_brtbl(void)2664 init_brtbl(void)
2665 {
2666 brtbl.size = 200;
2667 NEW(brtbl.base, TABLE, brtbl.size);
2668 brtbl.avl = 0;
2669 }
2670
2671 void
free_brtbl(void)2672 free_brtbl(void)
2673 {
2674 FREE(brtbl.base);
2675 }
2676
2677 int
get_brtbl(void)2678 get_brtbl(void)
2679 {
2680 int nd;
2681
2682 nd = brtbl.avl++;
2683 NEED(brtbl.avl, brtbl.base, TABLE, brtbl.size, brtbl.size + 100);
2684 if (nd > SPTR_MAX || brtbl.base == NULL)
2685 errfatal(7);
2686 return nd;
2687 }
2688
2689 static void
comm_optimize_init(void)2690 comm_optimize_init(void)
2691 {
2692 optshrd_init();
2693 induction_init();
2694 optshrd_finit();
2695 }
2696
2697 static void
comm_optimize_end(void)2698 comm_optimize_end(void)
2699 {
2700 optshrd_fend();
2701 optshrd_end();
2702 induction_end();
2703 }
2704
2705 /* optimization table */
2706
2707 void
init_ftb(void)2708 init_ftb(void)
2709 {
2710 ftb.size = 240;
2711 NEW(ftb.base, FT, ftb.size);
2712 ftb.avl = 1;
2713 }
2714
2715 int
mk_ftb(void)2716 mk_ftb(void)
2717 {
2718 int nd;
2719
2720 nd = ftb.avl++;
2721 NEED(ftb.avl, ftb.base, FT, ftb.size, ftb.size + 240);
2722 if (ftb.base == NULL)
2723 errfatal(7);
2724 return nd;
2725 }
2726
2727 LITEMF *
clist(void)2728 clist(void)
2729 {
2730 LITEMF *list;
2731
2732 list = (LITEMF *)getitem(FORALL_AREA, sizeof(LITEMF));
2733 list->nitem = 0;
2734 list->next = 0;
2735 return list;
2736 }
2737
2738 void
plist(LITEMF * list,int item)2739 plist(LITEMF *list, int item)
2740 {
2741 LITEMF *listp, *last;
2742 assert(list, "plist: list is NULL", 0, 3);
2743 if (list->nitem == 0) {
2744 list->item = item;
2745 list->next = 0;
2746 list->nitem = 1;
2747 return;
2748 }
2749 for (listp = list; listp != 0; listp = listp->next)
2750 last = listp;
2751 listp = (LITEMF *)getitem(FORALL_AREA, sizeof(LITEMF));
2752 listp->item = item;
2753 listp->next = 0;
2754 last->next = listp;
2755 list->nitem++;
2756 }
2757
2758 int
glist(LITEMF * list,int n)2759 glist(LITEMF *list, int n)
2760 {
2761 LITEMF *listp;
2762 int i;
2763
2764 assert(list->nitem > n, "glist: nitem not >", n, 0);
2765 listp = list;
2766 for (i = 0; i < list->nitem; i++) {
2767 if (i == n)
2768 return listp->item;
2769 listp = listp->next;
2770 }
2771 return 0;
2772 }
2773
2774 /* Is this item in the list? */
2775 LOGICAL
inlist(LITEMF * list,int item)2776 inlist(LITEMF *list, int item)
2777 {
2778 LITEMF *p;
2779 if (list->nitem == 0)
2780 return FALSE; /* list->item is invalid */
2781 for (p = list; p != 0; p = p->next) {
2782 if (p->item == item)
2783 return TRUE;
2784 }
2785 return FALSE;
2786 }
2787
2788 /* Dump this list of ints. */
2789 void
dlist(LITEMF * list)2790 dlist(LITEMF *list)
2791 {
2792 LITEMF *p;
2793 FILE *dfile = gbl.dbgfil ? gbl.dbgfil : stderr;
2794 fprintf(dfile, "List of %d items:", list->nitem);
2795 if (list->nitem > 0) {
2796 for (p = list; p != 0; p = p->next) {
2797 fprintf(dfile, " %d", p->item);
2798 }
2799 }
2800 fprintf(dfile, "\n");
2801 }
2802
2803 static int
common_compute_point(LITEMF * nm_list,int fg,int std)2804 common_compute_point(LITEMF *nm_list, int fg, int std)
2805 {
2806 LITEMF *fg_list, *std_list;
2807 int i, j, n;
2808 int nme;
2809 int use;
2810 DU *du;
2811 int def;
2812 int fg1, fg2;
2813 int count;
2814
2815 fg_list = clist();
2816 std_list = clist();
2817
2818 n = nm_list->nitem;
2819 for (i = 0; i < n; i++) {
2820 nme = A_NMEG(glist(nm_list, i));
2821 def = NME_DEF(nme);
2822 if (def)
2823 for (du = DEF_DU(def); du != 0; du = du->next) {
2824 use = du->use;
2825 if (USE_STD(use) == std) {
2826 if (only_one_ud(use)) {
2827 plist(fg_list, DEF_FG(def));
2828 plist(std_list, DEF_STD(def));
2829 } else {
2830 plist(fg_list, fg);
2831 plist(fg_list, std);
2832 }
2833 }
2834 }
2835 else {
2836 plist(fg_list, 0);
2837 plist(std_list, 0);
2838 }
2839 }
2840
2841 count = 0;
2842 for (i = 0; i < n; i++) {
2843 fg1 = glist(fg_list, i);
2844 count = 0;
2845 for (j = 0; j < n; j++) {
2846 fg2 = glist(fg_list, j);
2847 if (is_dominator(fg2, fg1))
2848 count++;
2849 }
2850 if (count == n)
2851 break;
2852 }
2853 if (glist(std_list, i) != std)
2854 return glist(std_list, i);
2855 else
2856 return STD_PREV(std);
2857 }
2858
2859 /* This routine is to change all allocate statements into
2860 * allocate ast which is defined for hpf communication ast.
2861 * By this way, any allocate may benefit of comm_invar.
2862 */
2863 static void
alloc2ast(void)2864 alloc2ast(void)
2865 {
2866 int start, i;
2867 int std, stdnext;
2868 int allocast, newallocast;
2869 int deallocast, ast;
2870 int nd;
2871 LOGICAL found;
2872 int sptr;
2873
2874 start = ftb.avl;
2875 for (std = STD_NEXT(0); std; std = stdnext) {
2876 stdnext = STD_NEXT(std);
2877 if (STD_IGNORE(std))
2878 continue;
2879 ast = STD_AST(std);
2880 if (A_TYPEG(ast) == A_ALLOC) {
2881 if (A_TKNG(ast) == TK_ALLOCATE) {
2882 allocast = STD_AST(std);
2883 ast = A_SRCG(allocast);
2884 if (A_TYPEG(ast) != A_SUBSCR)
2885 continue;
2886 if (A_TYPEG(A_LOPG(ast)) != A_ID)
2887 continue;
2888 sptr = A_SPTRG(A_LOPG(ast));
2889 if (!HCCSYMG(sptr))
2890 continue;
2891 if (ADJLENG(sptr))
2892 continue;
2893 newallocast = new_node(A_HALLOBNDS);
2894 A_LOPP(newallocast, ast);
2895
2896 nd = mk_ftb();
2897 FT_STD(nd) = 0;
2898 FT_FORALL(nd) = 0;
2899 FT_ALLOC_SPTR(nd) = sptr;
2900 FT_ALLOC_FREE(nd) = 0;
2901 FT_ALLOC_SAME(nd) = 0;
2902 FT_ALLOC_REUSE(nd) = 0;
2903 FT_ALLOC_USED(nd) = 0;
2904 FT_ALLOC_OUT(nd) = sptr;
2905 A_OPT1P(newallocast, nd);
2906 STD_AST(std) = newallocast;
2907 A_STDP(newallocast, std);
2908 /*
2909 add_stmt_before(newallocast, std);
2910 delete_stmt(std);
2911 */
2912 } else {
2913 assert(A_TKNG(ast) == TK_DEALLOCATE, "alloc2ast: bad dealloc", std, 4);
2914 deallocast = STD_AST(std);
2915 ast = A_SRCG(deallocast);
2916 if (A_TYPEG(ast) != A_ID)
2917 continue;
2918 sptr = A_SPTRG(ast);
2919 if (!HCCSYMG(sptr) || STYPEG(sptr) != ST_ARRAY)
2920 continue;
2921 if (ADJLENG(sptr))
2922 continue;
2923 found = FALSE;
2924 for (i = start; i < ftb.avl; i++) {
2925 if (FT_ALLOC_FREE(i))
2926 continue;
2927 if (FT_ALLOC_SPTR(i) == sptr) {
2928 FT_ALLOC_FREE(i) = STD_PREV(std);
2929 FT_ALLOC_PTASGN(i) = STD_PTASGN(std);
2930 delete_stmt(std);
2931 found = TRUE;
2932 }
2933 }
2934 assert(found, "alloc2ast: missing allocate", std, 2);
2935 }
2936 }
2937 }
2938 }
2939
2940 /** \brief Try to optimize allocate statements.
2941
2942 If they have same bounds, give them the same bound variable.
2943 This will help the compiler to reduce # of communication
2944 for allocatable variables.
2945 */
2946 void
optimize_alloc(void)2947 optimize_alloc(void)
2948 {
2949 comm_optimize_init();
2950 flowgraph(); /* build the flowgraph for the function */
2951 postdominators(); /* need these as well */
2952 #if DEBUG
2953 if (DBGBIT(35, 1))
2954 dump_flowgraph();
2955 #endif
2956
2957 findloop(HLOPT_ALL); /* find the loops */
2958
2959 flow(); /* do flow analysis on the loops */
2960
2961 #if DEBUG
2962 if (DBGBIT(35, 4)) {
2963 dump_flowgraph();
2964 dump_loops();
2965 }
2966 #endif
2967 opt_allocate();
2968 comm_optimize_end();
2969 }
2970
2971 static void
opt_allocate(void)2972 opt_allocate(void)
2973 {
2974 ADSC *ad;
2975 int allocast;
2976 int ast;
2977 int std;
2978 int sptr;
2979 int i;
2980 int ndim;
2981 int stdnext;
2982 int sub[MAXSUBS];
2983 LITEMF *defs_to_propagate = clist();
2984 LITEMF *shape_exceptions = clist(); /* don't propagate into these shapes */
2985
2986 for (std = STD_NEXT(0); std; std = stdnext) {
2987 LOGICAL changed;
2988 int asd;
2989 stdnext = STD_NEXT(std);
2990 allocast = STD_AST(std);
2991 if (STD_PAR(std))
2992 continue;
2993 if (is_allocatable_assign(allocast)) {
2994 /* don't propagate shape bounds -- may be changed in transform() */
2995 int shape = A_SHAPEG(A_DESTG(allocast));
2996 plist(shape_exceptions, shape);
2997 continue;
2998 }
2999 if (A_TYPEG(allocast) != A_ALLOC)
3000 continue;
3001 if (A_TKNG(allocast) != TK_ALLOCATE)
3002 continue;
3003 ast = A_SRCG(allocast);
3004 if (A_TYPEG(ast) != A_SUBSCR)
3005 continue;
3006 /* member is busted -- lfm */
3007 if (A_TYPEG(A_LOPG(ast)) != A_ID)
3008 continue;
3009 sptr = A_SPTRG(A_LOPG(ast));
3010 /* pointer lb, ub is not A_ID,
3011 it may array static_descriptor */
3012 if (NOALLOOPTG(sptr))
3013 continue;
3014 if (POINTERG(sptr))
3015 continue;
3016 if (CMBLKG(sptr))
3017 continue;
3018 if (MDALLOCG(sptr))
3019 continue;
3020 if (SAVEG(sptr))
3021 continue;
3022
3023 /* a SAVEd allocatable will not appear itself in a common block,
3024 * but its pointer offset variable will. */
3025 if (PTROFFG(sptr) && SCG(PTROFFG(sptr)) == SC_CMBLK)
3026 continue;
3027
3028 /* put bounds into NME table */
3029 ad = AD_DPTR(DTYPEG(sptr));
3030 ndim = rank_of_sym(sptr);
3031 asd = A_ASDG(ast);
3032
3033 changed = FALSE; /* did any bounds change? */
3034 for (i = 0; i < ndim; i++) {
3035 int lw, up, lw2, up2;
3036 int ss = ASD_SUBS(asd, i);
3037 if (A_TYPEG(ss) == A_TRIPLE) {
3038 lw = A_LBDG(ss);
3039 up = A_UPBDG(ss);
3040 } else {
3041 lw = astb.i1;
3042 up = ss;
3043 }
3044 lw2 = propagate_bound(defs_to_propagate, lw);
3045 up2 = propagate_bound(defs_to_propagate, up);
3046 if (lw2 != lw || up2 != up) {
3047 sub[i] = mk_triple(lw2, up2, 0);
3048 changed = TRUE;
3049 } else {
3050 sub[i] = ss;
3051 }
3052 }
3053
3054 /* change allocate too */
3055 if (changed) {
3056 int new = mk_subscr(mk_id(sptr), sub, ndim, DTY(DTYPEG(sptr) + 1));
3057 A_SRCP(allocast, new);
3058 }
3059
3060 /* optmize allocatable alignment */
3061 }
3062
3063 ast_visit(1, 1);
3064 for (i = 0; i < defs_to_propagate->nitem; i++) {
3065 int def = glist(defs_to_propagate, i);
3066 int stddef = DEF_STD(def);
3067 int astdef = STD_AST(stddef);
3068 int src, dest;
3069 assert(A_TYPEG(astdef) == A_ASN, "expecting ASN ast", astdef, ERR_Fatal);
3070 src = A_SRCG(astdef);
3071 dest = A_DESTG(astdef);
3072 ast_replace(dest, src);
3073 }
3074
3075 /* change all shape*/
3076 rewrite_all_shape(shape_exceptions);
3077 ast_unvisit();
3078 freearea(FORALL_AREA);
3079 }
3080
3081 /* Is this an assignment with F2003 allocatable semantics? */
3082 static LOGICAL
is_allocatable_assign(int ast)3083 is_allocatable_assign(int ast)
3084 {
3085 int dest, src;
3086 LOGICAL dest_is_mem = FALSE;
3087 if (A_TYPEG(ast) != A_ASN) return FALSE;
3088 dest = A_DESTG(ast);
3089 src = A_SRCG(ast);
3090 while (A_TYPEG(dest) == A_MEM) {
3091 dest = A_MEMG(dest);
3092 dest_is_mem = TRUE;
3093 }
3094 if (!dest_is_mem && !XBIT(54, 0x1))
3095 return FALSE;
3096 while (A_TYPEG(src) == A_MEM) {
3097 src = A_MEMG(src);
3098 }
3099 if (A_TYPEG(dest) == A_ID && A_TYPEG(src) == A_ID) {
3100 int dest_sym = sym_of_ast(dest);
3101 if (ALLOCATTRG(dest_sym)) {
3102 return TRUE;
3103 }
3104 }
3105 return FALSE;
3106 }
3107
3108 /* If possible, propagate the assignment to bound, add it to defs_to_propagate
3109 * and return the new value. */
3110 static int
propagate_bound(LITEMF * defs_to_propagate,int bound)3111 propagate_bound(LITEMF *defs_to_propagate, int bound)
3112 {
3113 if (A_TYPEG(bound) == A_ID) {
3114 int nme = addnme(NT_VAR, A_SPTRG(bound), 0, (INT)0);
3115 int def = NME_DEF(nme);
3116 if (is_safe_copy(def)) {
3117 int std = DEF_STD(def);
3118 int ast = STD_AST(std);
3119 int sptr;
3120 assert(A_TYPEG(ast) == A_ASN, "expecting ASN ast", ast, ERR_Fatal);
3121 sptr = sym_of_ast(A_DESTG(ast));
3122 if (!GSCOPEG(sptr))
3123 plist(defs_to_propagate, def);
3124 return A_SRCG(ast);
3125 }
3126 }
3127 return bound;
3128 }
3129
3130 /* this routine is to decide whether it is safe to
3131 * propagate the definition for an array bound variable to its use
3132 * example, z_b_1=n*m
3133 * The list of definitions for the variable is passed as 'def'.
3134 * The requirements are:
3135 * 1-) All definitions are assignments, with the same RHS
3136 * 2-) All assignments to variables used in the RHS must
3137 * be post-dominated by a dominator of these uses
3138 */
3139 static LOGICAL
is_safe_copy(int deflist)3140 is_safe_copy(int deflist)
3141 {
3142 int std, fg, ast, src, dest;
3143 int def, defstd, deffg, defast;
3144 int nvar, onlyvar, a[10];
3145 int v, defv;
3146 int stdv, fgv;
3147 int i;
3148
3149 if (deflist == 0)
3150 return FALSE;
3151 std = DEF_STD(deflist);
3152 fg = DEF_FG(deflist);
3153
3154 /* must be A_ASN */
3155 ast = STD_AST(std);
3156 if (A_TYPEG(ast) != A_ASN)
3157 return FALSE;
3158 src = A_SRCG(ast);
3159 dest = A_DESTG(ast);
3160
3161 for (def = deflist; def; def = DEF_NEXT(def)) {
3162 defstd = DEF_STD(def);
3163 deffg = DEF_FG(def);
3164 defast = STD_AST(defstd);
3165 if (A_TYPEG(defast) != A_TYPEG(ast))
3166 return FALSE;
3167 if (A_SRCG(defast) != src)
3168 return FALSE;
3169 if (A_DESTG(defast) != dest)
3170 return FALSE;
3171 }
3172
3173 /* at this point, all assignments have the same RHS */
3174
3175 if (A_TYPEG(src) != A_CNST) {
3176 /* decompose the expression into the variables that comprise it */
3177 nvar = 0;
3178 onlyvar = 1;
3179
3180 decompose_expression(src, a, 10, &nvar, &onlyvar);
3181
3182 if (nvar > 10) /* too many variables in RHS? */
3183 return FALSE;
3184 if (!onlyvar) /* something complex in RHS? */
3185 return FALSE;
3186 for (i = 0; i < nvar; ++i) {
3187 v = a[i];
3188 v = addnme(NT_VAR, A_SPTRG(v), 0, (INT)0); /* find NME */
3189 /* go through defs of this variable;
3190 * all the uses here must be dominated by a postdominator
3191 * of the definition of the variable.
3192 * This allows:
3193 * n = xxx
3194 * if(..)then
3195 * m = yyy
3196 * endif
3197 * z_b_1 = n * m
3198 * allocate(foo(z_b_1))
3199 * but disallows:
3200 * z_b_1 = n * m
3201 * allocate(foo(z_b_1))
3202 * n = xxx
3203 * if(..)then
3204 * m = yyy
3205 * endif
3206 */
3207
3208 for (defv = NME_DEF(v); defv; defv = DEF_NEXT(defv)) {
3209 stdv = DEF_STD(defv);
3210 fgv = DEF_FG(defv);
3211 for (def = deflist; def; def = DEF_NEXT(def)) {
3212 defstd = DEF_STD(def);
3213 deffg = DEF_FG(def);
3214 if (!must_follow(fgv, stdv, deffg, defstd))
3215 return FALSE;
3216 }
3217 }
3218 }
3219 }
3220 return TRUE;
3221 } /* is_safe_copy */
3222
3223 static LOGICAL
is_same_def(int def,int def1)3224 is_same_def(int def, int def1)
3225 {
3226 int std, std1;
3227 int fg, fg1;
3228 int next, next1;
3229 int ast, ast1;
3230 int expr, expr1;
3231
3232 if (def == 0 || def1 == 0)
3233 return FALSE;
3234 std = DEF_STD(def);
3235 std1 = DEF_STD(def1);
3236 fg = DEF_FG(def);
3237 fg1 = DEF_FG(def1);
3238
3239 next = DEF_NEXT(def);
3240 next1 = DEF_NEXT(def1);
3241
3242 /* there should be only one defs */
3243 if (next || next1)
3244 return FALSE;
3245
3246 /* they have to be A_ASN and same A_SRC */
3247 ast = STD_AST(std);
3248 ast1 = STD_AST(std1);
3249 if (A_TYPEG(ast) != A_ASN || A_TYPEG(ast1) != A_ASN)
3250 return FALSE;
3251 expr = A_SRCG(ast);
3252 expr1 = A_SRCG(ast1);
3253 if (expr != expr1)
3254 return FALSE;
3255
3256 /* value is not changed between them */
3257 if (!is_dominator(fg, fg1))
3258 return FALSE;
3259 if (!is_avail_expr(expr, std, fg, std1, fg1))
3260 return FALSE;
3261
3262 return TRUE;
3263 }
3264
3265 /* This routine is checks that def has only one definition and
3266 * src of that definition is a constant; if so, it returns
3267 * the ast of the difference, else it returns 'defaultval'
3268 */
3269 static int
diff_def_cnst(int cnstAst,int def,int defaultval)3270 diff_def_cnst(int cnstAst, int def, int defaultval)
3271 {
3272 int std;
3273 int fg;
3274 int next;
3275 int ast;
3276 int expr;
3277 int condef, conast;
3278
3279 if (def == 0)
3280 return defaultval;
3281 std = DEF_STD(def);
3282 fg = DEF_FG(def);
3283 next = DEF_NEXT(def);
3284 /* there should be only one defs */
3285 if (next)
3286 return defaultval;
3287
3288 /* they have to be A_ASN and same A_SRC */
3289 ast = STD_AST(std);
3290 if (A_TYPEG(ast) != A_ASN)
3291 return defaultval;
3292 expr = A_SRCG(ast);
3293 if (A_ALIASG(expr))
3294 expr = A_ALIASG(expr);
3295 if (A_TYPEG(expr) != A_CNST)
3296 return defaultval;
3297 if (A_DTYPEG(expr) != DT_INT)
3298 return defaultval;
3299 condef = A_SPTRG(expr);
3300 condef = CONVAL2G(condef);
3301 conast = A_SPTRG(cnstAst);
3302 conast = CONVAL2G(conast);
3303 condef = conast - condef;
3304 ast = mk_cval(condef, DT_INT);
3305
3306 return ast;
3307 }
3308
3309 static void
rewrite_all_shape(LITEMF * exceptions)3310 rewrite_all_shape(LITEMF *exceptions)
3311 {
3312 int shape;
3313 int ndim;
3314 int ii, i;
3315 int old_lwb, old_upb, old_st;
3316 int new_lwb, new_upb, new_st;
3317 int old_sptr, new_sptr;
3318
3319 for (ii = 1; ii < MAXDIMS; ii++) {
3320 ndim = ii;
3321 /* search the existing SHDs with the same number of dimensions
3322 */
3323 for (shape = astb.shd.hash[ndim - 1]; shape; shape = SHD_NEXT(shape)) {
3324 if (inlist(exceptions, shape))
3325 continue;
3326 for (i = 0; i < ndim; i++) {
3327 old_lwb = SHD_LWB(shape, i);
3328 old_upb = SHD_UPB(shape, i);
3329 old_st = SHD_STRIDE(shape, i);
3330 new_lwb = ast_rewrite(SHD_LWB(shape, i));
3331 new_upb = ast_rewrite(SHD_UPB(shape, i));
3332 new_st = ast_rewrite(SHD_STRIDE(shape, i));
3333 SHD_LWB(shape, i) = new_lwb;
3334 SHD_UPB(shape, i) = new_upb;
3335 SHD_STRIDE(shape, i) = new_st;
3336 if (flg.smp) {
3337 if (A_TYPEG(old_lwb) == A_ID) {
3338 old_sptr = sym_of_ast(old_lwb);
3339 new_sptr = 0;
3340 if (ast_is_sym(new_lwb)) {
3341 new_sptr = sym_of_ast(new_lwb);
3342 }
3343 if (new_sptr && new_sptr != old_sptr &&
3344 PARREFG(old_sptr) && STYPEG(new_sptr) != ST_CONST) {
3345 set_parref_flag2(new_sptr, old_sptr, 0);
3346 }
3347 }
3348 if (A_TYPEG(old_upb) == A_ID) {
3349 old_sptr = sym_of_ast(old_upb);
3350 new_sptr = 0;
3351 if (ast_is_sym(new_upb)) {
3352 new_sptr = sym_of_ast(new_upb);
3353 }
3354 if (new_sptr && new_sptr != old_sptr &&
3355 PARREFG(old_sptr) && STYPEG(new_sptr) != ST_CONST) {
3356 set_parref_flag2(new_sptr, old_sptr, 0);
3357 }
3358 }
3359 if (A_TYPEG(old_st) == A_ID) {
3360 old_sptr = sym_of_ast(old_st);
3361 new_sptr = 0;
3362 if (ast_is_sym(new_st)) {
3363 new_sptr = sym_of_ast(new_st);
3364 }
3365 if (new_sptr && new_sptr != old_sptr &&
3366 PARREFG(old_sptr) && STYPEG(new_sptr) != ST_CONST) {
3367 set_parref_flag2(new_sptr, old_sptr, 0);
3368 }
3369 }
3370 }
3371 }
3372 }
3373 }
3374 }
3375
3376 static void
decompose_expression(int expr,int a[],int size,int * nvar,int * onlyvar)3377 decompose_expression(int expr, int a[], int size, int *nvar, int *onlyvar)
3378 {
3379 int i;
3380 int asd;
3381 int ndim, n;
3382 int arr;
3383 int argt;
3384
3385 if (expr == 0)
3386 return;
3387 switch (A_TYPEG(expr)) {
3388 case A_ID:
3389 if (*nvar >= size) {
3390 *nvar = size + 1;
3391 return;
3392 }
3393 a[*nvar] = expr;
3394 (*nvar)++;
3395 return;
3396 case A_BINOP:
3397 decompose_expression(A_LOPG(expr), a, size, nvar, onlyvar);
3398 decompose_expression(A_ROPG(expr), a, size, nvar, onlyvar);
3399 return;
3400 case A_CONV:
3401 case A_UNOP:
3402 case A_PAREN:
3403 decompose_expression(A_LOPG(expr), a, size, nvar, onlyvar);
3404 return;
3405 case A_LABEL:
3406 case A_CMPLXC:
3407 case A_CNST:
3408 return;
3409 case A_MEM:
3410 decompose_expression((int)A_PARENTG(expr), a, size, nvar, onlyvar);
3411 return;
3412 case A_SUBSTR:
3413 if (onlyvar) {
3414 *onlyvar = 0;
3415 return;
3416 }
3417 decompose_expression((int)A_LOPG(expr), a, size, nvar, onlyvar);
3418 decompose_expression((int)A_LEFTG(expr), a, size, nvar, onlyvar);
3419 decompose_expression((int)A_RIGHTG(expr), a, size, nvar, onlyvar);
3420 return;
3421 case A_ICALL:
3422 case A_INTR:
3423 case A_FUNC:
3424 if (onlyvar) {
3425 *onlyvar = 0;
3426 return;
3427 }
3428 argt = A_ARGSG(expr);
3429 n = A_ARGCNTG(expr);
3430 for (i = 0; i < n; ++i)
3431 decompose_expression(ARGT_ARG(argt, i), a, size, nvar, onlyvar);
3432 return;
3433 case A_TRIPLE:
3434 if (onlyvar) {
3435 *onlyvar = 0;
3436 return;
3437 }
3438 decompose_expression(A_LBDG(expr), a, size, nvar, onlyvar);
3439 decompose_expression(A_UPBDG(expr), a, size, nvar, onlyvar);
3440 decompose_expression(A_STRIDEG(expr), a, size, nvar, onlyvar);
3441 return;
3442 case A_SUBSCR:
3443 if (onlyvar) {
3444 *onlyvar = 0;
3445 return;
3446 }
3447 arr = A_LOPG(expr);
3448 decompose_expression(A_LOPG(expr), a, size, nvar, onlyvar);
3449 asd = A_ASDG(expr);
3450 ndim = ASD_NDIM(asd);
3451 for (i = 0; i < ndim; i++)
3452 decompose_expression(ASD_SUBS(asd, i), a, size, nvar, onlyvar);
3453 return;
3454 default:
3455 interr("decompose_expression: unknown type", expr, 3);
3456 return;
3457 }
3458 }
3459
3460 /** \brief Put forall calls into forall tables.
3461
3462 FT_MCALL will have calls used in the mask.<br>
3463 FT_SCALL will have calls used in the statement.<br>
3464 These later will be used to parallel calls.
3465 */
3466 void
put_forall_pcalls(int fstd)3467 put_forall_pcalls(int fstd)
3468 {
3469 int forall;
3470 int topstd;
3471 int i, j;
3472 int mask, expr;
3473 int nd, nd1, nd2;
3474 int nargs, argt, arg;
3475 int pstd, past, psptr;
3476 int pstd1, past1, psptr1;
3477 int asn;
3478
3479 forall = STD_AST(fstd);
3480 assert(A_TYPEG(forall) == A_FORALL, "put_forall_pcalls: must be forall",
3481 forall, 4);
3482
3483 mask = A_IFEXPRG(forall);
3484 asn = A_IFSTMTG(forall);
3485 nd = A_OPT1G(forall);
3486 topstd = A_SRCG(forall);
3487 if (!topstd)
3488 return;
3489 if (A_ARRASNG(forall))
3490 return;
3491 /* put statements calls */
3492 for (i = topstd; i != fstd; i = STD_NEXT(i)) {
3493 if (is_pcalls(i, fstd)) {
3494 plist(FT_PCALL(nd), i);
3495 STD_PURE(i) = TRUE;
3496 FT_NPCALL(nd)++;
3497 }
3498 }
3499
3500 for (i = 0; i < FT_NPCALL(nd); i++) {
3501 pstd = glist(FT_PCALL(nd), i);
3502 past = STD_AST(pstd);
3503 nargs = A_ARGCNTG(past);
3504 argt = A_ARGSG(past);
3505 arg = ARGT_ARG(argt, 0);
3506 switch (A_TYPEG(arg)) { /* FS#18714 - skip over non-symbol arg */
3507 case A_ID:
3508 case A_LABEL:
3509 case A_ENTRY:
3510 case A_SUBSCR:
3511 case A_SUBSTR:
3512 case A_MEM:
3513 break;
3514 default:
3515 continue;
3516 }
3517 psptr = sym_of_ast(arg);
3518 if (mask && expr_dependent(mask, arg, fstd, fstd)) {
3519 plist(FT_MCALL(nd), pstd);
3520 FT_NMCALL(nd)++;
3521 } else if (expr_dependent(A_SRCG(asn), arg, fstd, fstd) ||
3522 expr_dependent(A_DESTG(asn), arg, fstd, fstd)) {
3523 plist(FT_SCALL(nd), pstd);
3524 FT_NSCALL(nd)++;
3525 }
3526 }
3527
3528 for (i = 0; i < FT_NPCALL(nd); i++) {
3529 pstd = glist(FT_PCALL(nd), i);
3530 past = STD_AST(pstd);
3531 nargs = A_ARGCNTG(past);
3532 argt = A_ARGSG(past);
3533 arg = ARGT_ARG(argt, 0);
3534 switch (A_TYPEG(arg)) { /* FS#18714 - skip over non-symbol arg */
3535 case A_ID:
3536 case A_LABEL:
3537 case A_ENTRY:
3538 case A_SUBSCR:
3539 case A_SUBSTR:
3540 case A_MEM:
3541 break;
3542 default:
3543 continue;
3544 }
3545 psptr = sym_of_ast(arg);
3546 nd1 = mk_ftb();
3547 FT_CALL_SPTR(nd1) = psptr;
3548 /* don't distribute PURE result */
3549 FT_CALL_NCALL(nd1) = 0;
3550 FT_CALL_CALL(nd1) = clist();
3551 FT_CALL_POS(nd1) = 0;
3552 A_OPT1P(past, nd1);
3553 }
3554
3555 for (i = 0; i < FT_NPCALL(nd); i++) {
3556 pstd = glist(FT_PCALL(nd), i);
3557 past = STD_AST(pstd);
3558 nd1 = A_OPT1G(past);
3559 if (nd1 == 0)
3560 continue;
3561 psptr = FT_CALL_SPTR(nd1);
3562 for (j = 0; j < FT_NPCALL(nd); j++) {
3563 if (i == j)
3564 continue;
3565 pstd1 = glist(FT_PCALL(nd), j);
3566 past1 = STD_AST(pstd1);
3567 nd2 = A_OPT1G(past1);
3568 if (contains_ast(past1, mk_id(psptr))) {
3569 plist(FT_CALL_CALL(nd2), pstd);
3570 FT_CALL_NCALL(nd2)++;
3571 }
3572 }
3573 }
3574 }
3575
3576 /* This checks:
3577 * Whether std is call which may be pure
3578 */
3579 static LOGICAL
is_pcalls(int std,int fstd)3580 is_pcalls(int std, int fstd)
3581 {
3582 int ast;
3583 int sptr;
3584 ast = STD_AST(std);
3585 if (A_TYPEG(ast) == A_CALL) {
3586 sptr = A_SPTRG(A_LOPG(ast));
3587 if (is_impure(sptr))
3588 error(488, 4, STD_LINENO(fstd), "subprogram call in FORALL",
3589 SYMNAME(sptr));
3590 else
3591 return TRUE;
3592 }
3593 if (A_TYPEG(ast) == A_ICALL)
3594 return TRUE;
3595 return FALSE;
3596 }
3597
3598 static void
forall_make_same_idx(int std)3599 forall_make_same_idx(int std)
3600 {
3601
3602 int idx[7];
3603 int list, list1, listp;
3604 int forall;
3605 int nidx;
3606 int isptr, isptr1;
3607 int dtype;
3608 int i;
3609 int nd, nd1;
3610 int oldast, newast;
3611 int newforall;
3612 int af, st, nc, bd;
3613 int cnt;
3614 int ip, pstd, past;
3615 LITEMF *plist;
3616
3617 forall = STD_AST(std);
3618 assert(A_TYPEG(forall) == A_FORALL, "make_same_idx: not forall", 2, forall);
3619 list = A_LISTG(forall);
3620 nidx = 0;
3621 for (listp = list; listp != 0; listp = ASTLI_NEXT(listp)) {
3622 idx[nidx] = listp;
3623 nidx++;
3624 }
3625 assert(nidx <= 7, "make_same_idx: illegal forall", 2, forall);
3626
3627 /* if it is already changed, don't do any thing */
3628 cnt = 0;
3629 for (i = 0; i < nidx; i++) {
3630 isptr1 = ASTLI_SPTR(idx[i]);
3631 dtype = DTYPEG(isptr1);
3632 isptr = get_init_idx(i, dtype);
3633 if (isptr == isptr1)
3634 cnt++;
3635 if (STD_TASK(std) && SCG(isptr) == SC_PRIVATE) {
3636 TASKP(isptr, 1);
3637 }
3638 }
3639 if (cnt == nidx)
3640 return;
3641 ast_visit(1, 1);
3642
3643 /* change forall */
3644 for (i = 0; i < nidx; i++) {
3645 isptr = ASTLI_SPTR(idx[i]);
3646 dtype = DTYPEG(isptr);
3647 oldast = mk_id(isptr);
3648 isptr = get_init_idx(i, dtype);
3649 newast = mk_id(isptr);
3650 if (STD_TASK(std) && SCG(isptr) == SC_PRIVATE) {
3651 TASKP(isptr, 1);
3652 }
3653 ast_replace(oldast, newast);
3654 }
3655
3656 nd = A_OPT1G(forall);
3657 af = A_ARRASNG(forall);
3658 st = A_STARTG(forall);
3659 nc = A_NCOUNTG(forall);
3660 bd = A_CONSTBNDG(forall);
3661 newforall = ast_rewrite(forall);
3662 A_OPT1P(newforall, nd);
3663 A_ARRASNP(newforall, af);
3664 A_STARTP(newforall, st);
3665 A_NCOUNTP(newforall, nc);
3666 A_CONSTBNDP(newforall, bd);
3667
3668 A_STDP(newforall, std);
3669 STD_AST(std) = newforall;
3670
3671 /* change also pcalls */
3672 plist = FT_PCALL(nd);
3673 for (ip = 0; ip < FT_NPCALL(nd); ip++) {
3674 pstd = plist->item;
3675 plist = plist->next;
3676 past = STD_AST(pstd);
3677 nd1 = A_OPT1G(past);
3678 past = ast_rewrite(past);
3679 A_OPT1P(past, nd1);
3680 STD_AST(pstd) = past;
3681 A_STDP(past, pstd);
3682 }
3683 ast_unvisit();
3684 }
3685
3686 /* SHARED MEMORY OPTIMIZATION START HERE */
3687
3688 /** \brief Put all get_scalar into table to easily process */
3689 void
find_get_scalar(void)3690 find_get_scalar(void)
3691 {
3692 int std, stdnext;
3693 int ast;
3694 int i;
3695 int type;
3696 int nd;
3697
3698 for (std = STD_NEXT(0); std; std = stdnext) {
3699 stdnext = STD_NEXT(std);
3700 ast = STD_AST(std);
3701 type = A_TYPEG(ast);
3702 if (type != A_HGETSCLR)
3703 continue;
3704 nd = A_OPT1G(ast);
3705 if (nd == 0) {
3706 nd = mk_ftb();
3707 FT_STD(nd) = 0;
3708 FT_GETSCLR_SAME(nd) = 0;
3709 FT_GETSCLR_REUSE(nd) = 0;
3710 FT_GETSCLR_NOMEM(nd) = 0;
3711 FT_GETSCLR_OMEMED(nd) = 0;
3712 }
3713 A_OPT1P(ast, nd);
3714 i = get_gstbl();
3715 gstbl.base[i].f1 = std;
3716 }
3717 }
3718