1 /*
2  * Copyright (c) 1994-2019, NVIDIA CORPORATION.  All rights reserved.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  *
16  */
17 
18 /**
19    \file
20    \brief Fortran communications optimizer module
21  */
22 
23 #include "gbldefs.h"
24 #include "global.h"
25 #include "error.h"
26 #include "comm.h"
27 #include "symtab.h"
28 #include "dtypeutl.h"
29 #include "soc.h"
30 #include "semant.h"
31 #include "ast.h"
32 #include "gramtk.h"
33 #include "transfrm.h"
34 #include "extern.h"
35 #include "commopt.h"
36 #include "ccffinfo.h"
37 #include "optimize.h"
38 #include "nme.h"
39 #include "rte.h"
40 #include "hlvect.h"
41 
42 extern int rewrite_opfields;
43 
44 static void commopt(void);
45 static void shmem_opt1(void);
46 static void shmem_opt2(void);
47 static void shmemopt1(void);
48 static void shmemopt2(void);
49 static void comm_optimize_init(void);
50 static LOGICAL same_forall_bnds(int, int, int);
51 static LOGICAL is_fusable(int, int, int);
52 static LOGICAL smp_conflict(int, int);
53 static LOGICAL is_in_block(int, int);
54 static LOGICAL is_different_scalar_mask(int, int);
55 static LOGICAL Conflict(int, int, int, LOGICAL, int, int);
56 static LOGICAL is_branch_between(int, int);
57 static LOGICAL is_contains_ast_between(int, int, int);
58 static LOGICAL is_same_align_shape(int, int, int, int);
59 
60 static void comm_optimize_end(void);
61 static void eliminate_rt(int);
62 static void eliminate_bnds(int, int, int, int);
63 static void eliminate_alloc(int, int, int, int);
64 static void eliminate_sect(int, int, int, int);
65 static void eliminate_copy(int, int, int, int);
66 static void eliminate_gather(int, int, int, int);
67 static void eliminate_shift(int, int, int, int);
68 static void eliminate_start(int, int, int, int);
69 static void eliminate_get_scalar(void);
70 static void fuse_forall(int);
71 static LOGICAL is_same_descr_for_bnds(int, int);
72 static LOGICAL Conflict_(int);
73 static LOGICAL is_same_idx(int, int);
74 static LOGICAL is_dominator_fg(int, int);
75 static LOGICAL must_follow(int, int, int, int);
76 static int find_lp(int);
77 static void init_optsum(void);
78 #if DEBUG
79 static void optsummary(void);
80 #endif
81 static void alloc2ast(void);
82 static void opt_allocate(void);
83 static LOGICAL is_same_def(int, int);
84 static LOGICAL is_safe_copy(int);
85 static LOGICAL is_allocatable_assign(int ast);
86 static int propagate_bound(LITEMF *defs_to_propagate, int bound);
87 static void rewrite_all_shape(LITEMF *);
88 static void decompose_expression(int, int[], int, int *, int *);
89 static LOGICAL is_avail_expr_with_list(int, int, int, int, int, int[], int);
90 static void put_forall_fg(void);
91 static LOGICAL independent_commtype(int, int);
92 static LOGICAL is_olap_conflict(int, int);
93 static LOGICAL is_pcalls(int, int);
94 static void forall_make_same_idx(int);
95 static void eliminate_ownerproc(void);
96 static void gather_ownerproc(int atyp, int lp);
97 static void share_ownerproc(void);
98 static void transform_ownerproc(void);
99 static void barrier_opt(void);
100 static void opt_anti_barrier(void);
101 static void opt_flow_barrier(void);
102 static LOGICAL can_reach_no_barrier(int fg1, int fg2);
103 static LOGICAL _can_reach(int fg1, int fg2);
104 static void omem_barrier(void);
105 
106 #if DEBUG
107 static int dodebug = 0;
108 #define TR(str)               \
109   if (dodebug) {              \
110     fprintf(gbl.dbgfil, str); \
111     fflush(gbl.dbgfil);       \
112   }
113 #define TR1(str)      \
114   if (DBGBIT(0, 512)) \
115   dump_stg_stat(str)
116 #else
117 #define TR(str)
118 #define TR1(str)
119 #endif
120 
121 INT *lpsort;
122 FTB ftb;
123 OPTSUM optsum;
124 
125 static void
selection_sort(void)126 selection_sort(void)
127 {
128   int i, j;
129   int hd, hd1;
130   int dom;
131   int t;
132   int ast, ast1;
133   int std, std1;
134 
135   lpsort = (INT *)getitem(FORALL_AREA, (opt.nloops + 1) * sizeof(INT));
136   for (i = 1; i <= opt.nloops; ++i)
137     lpsort[i] = i;
138 
139   for (i = 1; i <= opt.nloops - 1; ++i) {
140     dom = i;
141     for (j = i + 1; j <= opt.nloops; ++j) {
142       hd = LP_HEAD(lpsort[dom]);
143       hd1 = LP_HEAD(lpsort[j]);
144       std = FG_STDFIRST(hd);
145       ast = STD_AST(std);
146       std1 = FG_STDFIRST(hd1);
147       ast1 = STD_AST(std1);
148       if (is_dominator(hd1, hd))
149         dom = j;
150     }
151     t = lpsort[dom];
152     lpsort[dom] = lpsort[i];
153     lpsort[i] = t;
154   }
155 }
156 
157 void
comm_optimize_post(void)158 comm_optimize_post(void)
159 {
160   alloc2ast();
161   comm_optimize_init();
162   flowgraph(); /* build the flowgraph for the function */
163 #if DEBUG
164   if (DBGBIT(35, 1))
165     dump_flowgraph();
166 #endif
167 
168   findloop(HLOPT_ALL); /* find the loops */
169 
170   rewrite_opfields = 0x3; /* copy opt1 and opt2 fields */
171   flow();                 /* do flow analysis on the loops  */
172   rewrite_opfields = 0;   /* reset */
173 
174 #if DEBUG
175   if (DBGBIT(35, 4)) {
176     dump_flowgraph();
177     dump_loops();
178   }
179 #endif
180 
181   commopt();
182 
183 #if DEBUG
184   if (DBGBIT(59, 1))
185     optsummary();
186 #endif
187   comm_optimize_end();
188 }
189 
190 void
comm_optimize_pre(void)191 comm_optimize_pre(void)
192 {
193 
194   comm_optimize_init();
195   flowgraph(); /* build the flowgraph for the function */
196 #if DEBUG
197   if (DBGBIT(35, 1))
198     dump_flowgraph();
199 #endif
200 
201   findloop(HLOPT_ALL); /* find the loops */
202 
203   flow(); /* do flow analysis on the loops  */
204 
205 #if DEBUG
206   if (DBGBIT(35, 4)) {
207     dump_flowgraph();
208     dump_loops();
209   }
210 #endif
211   selection_sort();
212   fuse_forall(0);
213   comm_optimize_end();
214 }
215 
216 void
forall_init(void)217 forall_init(void)
218 {
219   int std;
220   int ast;
221   int parallel_depth;
222   int task_depth;
223 
224   init_ftb();
225   init_dtb();
226   init_optsum();
227   init_bnd();
228 
229   for (std = STD_NEXT(0); std;) {
230     ast = STD_AST(std);
231     if (A_TYPEG(ast) == A_FORALL) {
232       forall_opt1(ast);
233     }
234     std = STD_NEXT(std);
235   }
236   /* put calls into calls table */
237   parallel_depth = 0;
238   task_depth = 0;
239   for (std = STD_NEXT(0); std;) {
240     ast = STD_AST(std);
241     switch (A_TYPEG(ast)) {
242     case A_FORALL:
243       put_forall_pcalls(std);
244       forall_make_same_idx(std);
245       break;
246     case A_MP_PARALLEL:
247       ++parallel_depth;
248       /*symutl.sc = SC_PRIVATE;*/
249       set_descriptor_sc(SC_PRIVATE);
250       break;
251     case A_MP_ENDPARALLEL:
252       --parallel_depth;
253       if (parallel_depth == 0 && task_depth == 0) {
254         /*symutl.sc = SC_LOCAL;*/
255         set_descriptor_sc(SC_LOCAL);
256       }
257       break;
258     case A_MP_TASK:
259     case A_MP_TASKLOOP:
260       ++task_depth;
261       set_descriptor_sc(SC_PRIVATE);
262       break;
263     case A_MP_ENDTASK:
264     case A_MP_ETASKLOOP:
265       --task_depth;
266       if (parallel_depth == 0 && task_depth == 0) {
267         set_descriptor_sc(SC_LOCAL);
268       }
269       break;
270     default:
271       break;
272     }
273     std = STD_NEXT(std);
274   }
275 }
276 
277 static void
init_optsum(void)278 init_optsum(void)
279 {
280   optsum.fuse = 0;
281   optsum.bnd = 0;
282   optsum.alloc = 0;
283   optsum.sect = 0;
284   optsum.copysection = 0;
285   optsum.gatherx = 0;
286   optsum.scatterx = 0;
287   optsum.shift = 0;
288   optsum.start = 0;
289 }
290 
291 #if DEBUG
292 static void
optsummary(void)293 optsummary(void)
294 {
295 
296   static char msg0[] = "===== comm opt summary for %s ====\n";
297   static char msg1[] = "%5d loops fused\n";
298   static char msg2[] = "%5d pghpf_localize_bnd calls eliminated\n";
299   static char msg3[] = "%5d allocates eliminated\n";
300   static char msg4[] = "%5d pghpf_sect calls eliminated\n";
301   static char msg5[] = "%5d pghpf_comm_copy calls eliminated\n";
302   static char msg6[] = "%5d pghpf_comm_gatherx calls eliminated\n";
303   static char msg7[] = "%5d pghpf_comm_scatterx calls eliminated\n";
304   static char msg8[] = "%5d pghpf_comm_shift calls eliminated\n";
305   static char msg9[] = "%5d pghpf_comm_start calls eliminated\n";
306   FILE *fil;
307 
308   fil = gbl.dbgfil;
309   if (fil == NULL)
310     fil = stderr;
311   fprintf(fil, msg0, SYMNAME(gbl.currsub));
312   fprintf(fil, msg1, optsum.fuse);
313   fprintf(fil, msg2, optsum.bnd);
314   fprintf(fil, msg3, optsum.alloc);
315   fprintf(fil, msg4, optsum.sect);
316   fprintf(fil, msg5, optsum.copysection);
317   fprintf(fil, msg6, optsum.gatherx);
318   fprintf(fil, msg7, optsum.scatterx);
319   fprintf(fil, msg8, optsum.shift);
320   fprintf(fil, msg9, optsum.start);
321 }
322 #endif
323 
324 static void
commopt(void)325 commopt(void)
326 {
327   int i;
328   int forall;
329   int std;
330   selection_sort();
331   put_forall_fg();
332 
333   if (!XBIT(47, 0x2000))
334     eliminate_rt(1);
335   if (!XBIT(47, 0x4000))
336     eliminate_rt(2);
337   if (!XBIT(47, 0x8000))
338     eliminate_rt(3);
339   if (!XBIT(47, 0x10000))
340     eliminate_rt(4);
341   if (!XBIT(47, 0x20000))
342     eliminate_get_scalar();
343 }
344 
345 static void
fuse_forall(int nested)346 fuse_forall(int nested)
347 {
348   int i, j, k, l;
349   int std, std1;
350   int forall, forall1;
351   int nd, nd1;
352   int rt, rt1;
353   int rt_std, rt1_std;
354   int hd, hd1;
355   int type, type1;
356   int ii, jj;
357   int expr, expr1;
358   int newexpr1, newrhs1;
359   int asn, asn1;
360   int rhs, rhs1;
361   int lhs, lhs1;
362   int nidx, nidx1;
363   int list, list1, listp;
364   int isptr;
365   int fg, fg1;
366   int number_of_try;
367 
368   for (ii = 1; ii <= opt.nloops; ++ii) {
369     i = lpsort[ii];
370     hd = LP_HEAD(i); /*the flow graph node which is the loop head*/
371     fg = LP_FG(i);
372     std = FG_STDFIRST(hd);
373     forall = STD_AST(std);
374     if (LP_FORALL(i)) {
375 
376       hd = LP_HEAD(i); /*the flow graph node which is the loop head*/
377       fg = LP_FG(i);
378       std = FG_STDFIRST(hd);
379       forall = STD_AST(std);
380       nd = A_OPT1G(forall);
381       if (FT_NFUSE(nd, nested) >= MAXFUSE)
382         continue;
383       if (FT_FUSED(nd))
384         continue;
385       number_of_try = 0;
386       for (jj = ii + 1; jj <= opt.nloops; ++jj) {
387         j = lpsort[jj];
388         if (LP_PARENT(i) != LP_PARENT(j))
389           continue;
390         if (FT_NFUSE(nd, nested) >= MAXFUSE)
391           break;
392         if (number_of_try > 25)
393           continue;
394         if (LP_FORALL(j)) {
395           /*the flow graph node which is the loop head */
396           hd1 = LP_HEAD(j);
397           fg1 = LP_FG(j);
398           if (!is_dominator(hd, hd1))
399             continue;
400           std1 = FG_STDFIRST(hd1);
401           if (STD_ACCEL(std) != STD_ACCEL(std1))
402             continue;
403           forall1 = STD_AST(std1);
404           nd1 = A_OPT1G(forall1);
405           if (FT_FUSED(nd1))
406             continue;
407           if (!same_forall_bnds(i, j, nested))
408             continue;
409           if (!is_same_idx(std, std1))
410             continue;
411           number_of_try++;
412           if (!is_fusable(i, j, nested))
413             continue;
414 
415           FT_FUSELP(nd, nested, FT_NFUSE(nd, nested)) = j;
416           FT_FUSEDSTD(nd, nested, FT_NFUSE(nd, nested)) = std1;
417           FT_NFUSE(nd, nested)++;
418           FT_FUSED(nd1) = 1;
419           FT_HEADER(nd1) = FT_HEADER(nd);
420           FT_FUSED(nd) = 1;
421           optsum.fuse++;
422         }
423       }
424     }
425   }
426 }
427 
428 static LOGICAL
is_same_idx(int std,int std1)429 is_same_idx(int std, int std1)
430 {
431   int idx[7], idx1[7];
432   int list, list1, listp;
433   int forall, forall1;
434   int nidx, nidx1;
435   int isptr, isptr1;
436   int i;
437   int nd;
438   int oldast, newast;
439   int newforall, newforall1;
440   int af, st, nc;
441 
442   forall = STD_AST(std);
443   list = A_LISTG(forall);
444   nidx = 0;
445   for (listp = list; listp != 0; listp = ASTLI_NEXT(listp)) {
446     idx[nidx] = listp;
447     nidx++;
448   }
449 
450   forall1 = STD_AST(std1);
451   list1 = A_LISTG(forall1);
452   nidx1 = 0;
453   for (listp = list1; listp != 0; listp = ASTLI_NEXT(listp)) {
454     idx1[nidx1] = listp;
455     nidx1++;
456   }
457 
458   if (nidx != nidx1)
459     return FALSE;
460 
461   for (i = 0; i < nidx; i++) {
462     isptr = ASTLI_SPTR(idx[i]);
463     isptr1 = ASTLI_SPTR(idx1[i]);
464     if (isptr != isptr1)
465       return FALSE;
466   }
467   return TRUE;
468 }
469 
470 LOGICAL
same_forall_size(int lp1,int lp2,int nested)471 same_forall_size(int lp1, int lp2, int nested)
472 {
473   int idx1[7], idx2[7];
474   int itriple1, itriple2;
475   int lb1, ub1, st1;
476   int lb2, ub2, st2;
477   int std1, std2;
478   int fg1, fg2;
479   int hd1, hd2;
480   int list1, list2, listp;
481   int forall1, forall2;
482   int nidx1, nidx2;
483   int i, k;
484   int asd1, asd2;
485   int ndim1, ndim2;
486   int order2[7];
487   int no;
488   int lhs1, lhs2, newlhs2, l, l2;
489   int sptr1, sptr2;
490   int isptr1, isptr2;
491   int dim1, dim2;
492   int newast, oldast;
493   LOGICAL same = FALSE;
494 
495   hd1 = LP_HEAD(lp1); /*the flow graph node which is the loop head*/
496   fg1 = LP_FG(lp1);
497   std1 = FG_STDFIRST(hd1);
498   forall1 = STD_AST(std1);
499   lhs1 = A_DESTG(A_IFSTMTG(forall1));
500   list1 = A_LISTG(forall1);
501   nidx1 = 0;
502   for (listp = list1; listp != 0; listp = ASTLI_NEXT(listp)) {
503     idx1[nidx1] = listp;
504     ++nidx1;
505   }
506 
507   hd2 = LP_HEAD(lp2);
508   fg2 = LP_FG(lp2);
509   if (!is_dominator(hd1, hd2))
510     return FALSE;
511   std2 = FG_STDFIRST(hd2);
512   forall2 = STD_AST(std2);
513   lhs2 = A_DESTG(A_IFSTMTG(forall2));
514   list2 = A_LISTG(forall2);
515   nidx2 = 0;
516   for (listp = list2; listp != 0; listp = ASTLI_NEXT(listp)) {
517     idx2[nidx2] = listp;
518     nidx2++;
519   }
520 
521   if (nidx1 != nidx2)
522     return FALSE;
523 
524   for (i = 0; i < nidx1 - nested; i++) {
525     itriple1 = ASTLI_TRIPLE(idx1[i]);
526     lb1 = A_LBDG(itriple1);
527     ub1 = A_UPBDG(itriple1);
528     st1 = A_STRIDEG(itriple1);
529     if (st1 == 0)
530       st1 = astb.i1;
531     itriple2 = ASTLI_TRIPLE(idx2[i]);
532     lb2 = A_LBDG(itriple2);
533     ub2 = A_UPBDG(itriple2);
534     st2 = A_STRIDEG(itriple2);
535     if (st2 == 0)
536       st2 = astb.i1;
537     same = FALSE;
538 
539     /*is_same_size(lb1,lb2, ub1, ub2, st1, st2) */
540 
541     if ((lb1 == lb2 && ub1 == ub2 && st1 == st2) ||
542         ((mk_binop(OP_SUB, ub1, lb1, astb.bnd.dtype) ==
543           mk_binop(OP_SUB, ub2, lb2, astb.bnd.dtype)) &&
544          st1 == st2)) {
545       if ((is_avail_expr(lb1, std1, fg1, std2, fg2) &&
546            is_avail_expr(ub1, std1, fg1, std2, fg2) &&
547            is_avail_expr(st1, std1, fg1, std2, fg2)))
548         same = TRUE;
549     }
550     if (!same)
551       return FALSE;
552   }
553 
554   if (!same)
555     return FALSE;
556   return TRUE;
557 }
558 
559 static LOGICAL
same_forall_bnds(int lp1,int lp2,int nested)560 same_forall_bnds(int lp1, int lp2, int nested)
561 {
562   int idx1[7], idx2[7];
563   int itriple1, itriple2;
564   int lb1, ub1, st1;
565   int lb2, ub2, st2;
566   int std1, std2;
567   int fg1, fg2;
568   int hd1, hd2;
569   int list1, list2, listp;
570   int forall1, forall2;
571   int nidx1, nidx2;
572   int i, k;
573   int asd1, asd2;
574   int ndim1, ndim2;
575   int order2[7];
576   int no;
577   int lhs1, lhs2, newlhs2, l, l2;
578   int sptr1, sptr2;
579   int isptr1, isptr2;
580   int dim1, dim2;
581   int newast, oldast;
582   LOGICAL same;
583 
584   hd1 = LP_HEAD(lp1); /*the flow graph node which is the loop head*/
585   fg1 = LP_FG(lp1);
586   std1 = FG_STDFIRST(hd1);
587   forall1 = STD_AST(std1);
588   lhs1 = A_DESTG(A_IFSTMTG(forall1));
589   list1 = A_LISTG(forall1);
590   nidx1 = 0;
591   for (listp = list1; listp != 0; listp = ASTLI_NEXT(listp)) {
592     idx1[nidx1] = listp;
593     ++nidx1;
594   }
595 
596   hd2 = LP_HEAD(lp2);
597   fg2 = LP_FG(lp2);
598   if (!is_dominator(hd1, hd2))
599     return FALSE;
600   std2 = FG_STDFIRST(hd2);
601   forall2 = STD_AST(std2);
602   lhs2 = A_DESTG(A_IFSTMTG(forall2));
603   list2 = A_LISTG(forall2);
604   nidx2 = 0;
605   for (listp = list2; listp != 0; listp = ASTLI_NEXT(listp)) {
606     idx2[nidx2] = listp;
607     nidx2++;
608   }
609 
610   if (nidx1 != nidx2)
611     return FALSE;
612 
613   for (i = 0; i < nidx1 - nested; i++) {
614     itriple1 = ASTLI_TRIPLE(idx1[i]);
615     lb1 = A_LBDG(itriple1);
616     ub1 = A_UPBDG(itriple1);
617     st1 = A_STRIDEG(itriple1);
618     if (st1 == 0)
619       st1 = astb.i1;
620     itriple2 = ASTLI_TRIPLE(idx2[i]);
621     lb2 = A_LBDG(itriple2);
622     ub2 = A_UPBDG(itriple2);
623     st2 = A_STRIDEG(itriple2);
624     if (st2 == 0)
625       st2 = astb.i1;
626     same = FALSE;
627     if (lb1 == lb2 && ub1 == ub2 && st1 == st2) {
628       if ((is_avail_expr(lb1, std1, fg1, std2, fg2) &&
629            is_avail_expr(ub1, std1, fg1, std2, fg2) &&
630            is_avail_expr(st1, std1, fg1, std2, fg2)))
631         same = TRUE;
632     }
633     if (!same)
634       return FALSE;
635   }
636 
637   if (is_duplicate(lhs1, list1))
638     return FALSE;
639   if (is_duplicate(lhs2, list2))
640     return FALSE;
641 
642   ast_visit(1, 1);
643   for (i = 0; i < nidx1; i++) {
644     isptr1 = ASTLI_SPTR(idx1[i]);
645     newast = mk_id(isptr1);
646     isptr2 = ASTLI_SPTR(idx2[i]);
647     oldast = mk_id(isptr2);
648     ast_replace(oldast, newast);
649   }
650   newlhs2 = ast_rewrite(lhs2);
651   ast_unvisit();
652   if (!is_ordered(lhs1, newlhs2, list1, order2, &no))
653     return FALSE;
654 
655   /* has to have same distribution */
656   l = left_subscript_ast(lhs1);
657   sptr1 = left_array_symbol(lhs1);
658   l2 = left_subscript_ast(newlhs2);
659   sptr2 = left_array_symbol(newlhs2);
660 
661   assert(A_TYPEG(l) == A_SUBSCR, "same_forall_bnds: not a subscript", l, 4);
662   asd1 = A_ASDG(l);
663   ndim1 = ASD_NDIM(asd1);
664   assert(A_TYPEG(l2) == A_SUBSCR, "same_forall_bnds: not a subscript", l, 4);
665   asd2 = A_ASDG(l2);
666   ndim2 = ASD_NDIM(asd2);
667 
668   for (i = 0; i < nidx1; i++) {
669     isptr1 = ASTLI_SPTR(idx1[i]);
670     dim1 = -1;
671     dim2 = -1;
672     for (k = 0; k < ndim1; k++)
673       if (is_name_in_expr(ASD_SUBS(asd1, k), isptr1))
674         dim1 = k;
675     for (k = 0; k < ndim2; k++)
676       if (is_name_in_expr(ASD_SUBS(asd2, k), isptr1))
677         dim2 = k;
678     if (dim1 == -1 || dim2 == -1) {
679       /* index does not appear in any distributed dimensions;
680        * until I figure out just what this is doing, be safe */
681       return FALSE;
682     }
683     if (!is_same_align_shape(sptr1, dim1, sptr2, dim2))
684       return FALSE;
685     if (ASD_SUBS(asd1, dim1) != ASD_SUBS(asd2, dim2))
686       return FALSE;
687   }
688 
689   return TRUE;
690 }
691 
692 static struct {
693   int otherlhs;
694   int src;
695   int sink;
696   int list;
697   int after;
698   int order;
699   int forcomm;
700 } conf;
701 
702 /* Return TRUE if there is conflict for loop fusion.
703  * order is just for swap for src and sink at Conflict_.
704  * forcomm is that there is Isno_comm TRUE, no need to test iff forcomm set. */
705 static LOGICAL
Conflict(int list,int src,int sink,int after,int order,int forcomm)706 Conflict(int list, int src, int sink, int after, int order, int forcomm)
707 {
708   LOGICAL result = FALSE;
709 
710   conf.src = src;
711   conf.sink = sink;
712   conf.list = list;
713   conf.after = after;
714   conf.order = order;
715   conf.forcomm = forcomm;
716   return Conflict_(sink);
717 }
718 
719 /* This routine will return TRUE iff,
720  * lhs array appears at sink and their subscripts are different.
721  */
722 static LOGICAL
Conflict_(int sink)723 Conflict_(int sink)
724 {
725   LOGICAL l, r;
726   int argt;
727   int nargs;
728   int asd, asd_lhs;
729   int numdim;
730   int i;
731   int result;
732   int src, sptr, sptr1;
733 
734   if (sink == 0)
735     return FALSE;
736   switch (A_TYPEG(sink)) {
737   case A_CMPLXC:
738   case A_CNST:
739   case A_SUBSTR:
740     return FALSE;
741   case A_MEM:
742     if (Conflict_(A_PARENTG(sink))) {
743       /* see if this 'member' appears in the 'conf.src' tree */
744       int a, p, member;
745       a = conf.src;
746       member = A_SPTRG(A_MEMG(sink));
747       while (1) {
748         switch (A_TYPEG(a)) {
749         case A_SUBSCR:
750           a = A_LOPG(a);
751           break;
752         case A_ID:
753           return TRUE;
754           break;
755         case A_MEM:
756           p = A_PARENTG(a);
757           if (DDTG(A_DTYPEG(p)) == ENCLDTYPEG(member)) {
758             /* same member? different? */
759             if (A_MEMG(a) == A_MEMG(sink))
760               return TRUE;
761             return FALSE;
762           }
763           a = p;
764           break;
765         default:
766           interr("Conflict_: unexpected AST in member tree", a, 3);
767           return FALSE;
768         }
769       }
770     }
771     return FALSE;
772   case A_ID:
773     if (A_SPTRG(sink) == sym_of_ast(conf.src))
774       return TRUE;
775     else
776       return FALSE;
777   case A_BINOP:
778     l = Conflict_(A_LOPG(sink));
779     if (l)
780       return TRUE;
781     r = Conflict_(A_ROPG(sink));
782     if (r)
783       return TRUE;
784     return FALSE;
785   case A_UNOP:
786     return Conflict_(A_LOPG(sink));
787   case A_PAREN:
788   case A_CONV:
789     return Conflict_(A_LOPG(sink));
790   case A_SUBSCR:
791     if (Conflict_(A_LOPG(sink))) {
792       if (conf.order)
793         result = dd_array_conflict(conf.list, sink, conf.src, conf.after);
794       else
795         result = dd_array_conflict(conf.list, conf.src, sink, conf.after);
796       return result;
797     }
798     return FALSE;
799   case A_TRIPLE:
800     l = Conflict_(A_LBDG(sink));
801     if (l)
802       return TRUE;
803     r = Conflict_(A_UPBDG(sink));
804     if (r)
805       return TRUE;
806     return Conflict_(A_STRIDEG(sink));
807   case A_INTR:
808   case A_FUNC:
809     nargs = A_ARGCNTG(sink);
810     argt = A_ARGSG(sink);
811     /* dd_array_conflict does not work if the constructed section is
812      * out of bound. a(10);forall(i=1:10) b(i) = cshift(a(i+2))
813      * here a(3:12) is not in bounds.
814      */
815     if (A_OPTYPEG(sink) == I_CSHIFT || A_OPTYPEG(sink) == I_EOSHIFT) {
816       src = ARGT_ARG(argt, 0);
817       sptr = sym_of_ast(src);
818       sptr1 = sym_of_ast(conf.src);
819       if (sptr == sptr1)
820         return TRUE;
821     }
822     for (i = 0; i < nargs; ++i) {
823       l = Conflict_(ARGT_ARG(argt, i));
824       if (l)
825         return TRUE;
826     }
827     return FALSE;
828   case A_LABEL:
829   default:
830     interr("Conflict_: unexpected ast", sink, 2);
831     return FALSE;
832   }
833 }
834 
835 static LOGICAL
_olap_conflict(int expr,int expr1)836 _olap_conflict(int expr, int expr1)
837 {
838   LOGICAL l, r;
839   int argt;
840   int nargs;
841   int asd, asd_lhs;
842   int numdim;
843   int i;
844   int result;
845 
846   if (expr == 0)
847     return FALSE;
848   switch (A_TYPEG(expr)) {
849   case A_CMPLXC:
850   case A_CNST:
851   case A_ID:
852   case A_SUBSTR:
853   case A_MEM:
854     return FALSE;
855   case A_BINOP:
856     l = _olap_conflict(A_LOPG(expr), expr1);
857     if (l)
858       return TRUE;
859     r = _olap_conflict(A_ROPG(expr), expr1);
860     if (r)
861       return TRUE;
862     return FALSE;
863   case A_UNOP:
864     return _olap_conflict(A_LOPG(expr), expr1);
865   case A_PAREN:
866   case A_CONV:
867     return _olap_conflict(A_LOPG(expr), expr1);
868   case A_SUBSCR:
869     return FALSE;
870   case A_TRIPLE:
871     l = _olap_conflict(A_LBDG(expr), expr1);
872     if (l)
873       return TRUE;
874     r = _olap_conflict(A_UPBDG(expr), expr1);
875     if (r)
876       return TRUE;
877     return _olap_conflict(A_STRIDEG(expr), expr1);
878   case A_INTR:
879   case A_FUNC:
880     nargs = A_ARGCNTG(expr);
881     argt = A_ARGSG(expr);
882     for (i = 0; i < nargs; ++i) {
883       l = _olap_conflict(ARGT_ARG(argt, i), expr1);
884       if (l)
885         return TRUE;
886     }
887     if (A_OPTYPEG(expr) == I_CSHIFT || A_OPTYPEG(expr) == I_EOSHIFT)
888       return is_shift_conflict(expr, argt, expr1);
889     return FALSE;
890   case A_LABEL:
891   default:
892     interr("_olap_conflict: unexpected ast", expr, 2);
893     return FALSE;
894   }
895 }
896 
897 static LOGICAL
is_olap_conflict(int forall,int forall1)898 is_olap_conflict(int forall, int forall1)
899 {
900   int expr, rhs;
901   int expr1, rhs1;
902 
903   expr = A_IFEXPRG(forall);
904   rhs = A_SRCG(A_IFSTMTG(forall));
905   expr1 = A_IFEXPRG(forall1);
906   rhs1 = A_SRCG(A_IFSTMTG(forall1));
907 
908   if (_olap_conflict(expr, expr1))
909     return TRUE;
910   if (_olap_conflict(expr, rhs1))
911     return TRUE;
912   if (_olap_conflict(expr1, expr))
913     return TRUE;
914   if (_olap_conflict(expr1, rhs))
915     return TRUE;
916 
917   if (_olap_conflict(rhs, expr1))
918     return TRUE;
919   if (_olap_conflict(rhs, rhs1))
920     return TRUE;
921   if (_olap_conflict(rhs1, expr))
922     return TRUE;
923   if (_olap_conflict(rhs1, rhs))
924     return TRUE;
925   return FALSE;
926 }
927 
928 /**
929    \brief ...
930 
931    <pre>
932      S1: A(i) = ...
933      S2: ...  = ... A(f(i)) ...
934    </pre>
935 
936    The loops cannot be fused if either of the following calls to
937    dd_array_conflict() return TRUE:
938    <pre>
939      dd_array_conflict(tripletList, A(f(i)), A(i), FALSE)
940      dd_array_conflict(tripletList, A(i), A(f(i)), TRUE)
941    </pre>
942 
943    Alternatively, the statements within the original loops may be of the
944    following form:
945 
946    <pre>
947      S1: ...  = ... A(f(i))
948      S2: A(i) = ...
949    </pre>
950 
951    The loops cannot be fused if the following call to dd_array_conflict()
952    returns TRUE.
953    <pre>
954      dd_array_conflict(tripletList, A(i), A(f(i)), FALSE)
955    </pre>
956 
957    The back end recognizes loops with 0 RHS and transforms these into calls
958    to _mzero, so don't fuse assignments with just 0 on the RHS.
959  */
960 static LOGICAL
is_fusable(int lp,int lp1,int nested)961 is_fusable(int lp, int lp1, int nested)
962 {
963 
964   int i, j, k, l;
965   int std, std1;
966   int forall, forall1;
967   int nd, nd1;
968   int rt, rt1;
969   int rt_std, rt1_std;
970   int hd, hd1;
971   int type, type1;
972   int ii, jj;
973   int expr, expr1;
974   int newexpr1, newrhs1, newlhs1;
975   int newexpr, newrhs;
976   int asn, asn1;
977   int rhs, rhs1;
978   int rhssptr;
979   int lhs, lhs_array, lhs1, lhs1_array;
980   int lhs_sptr, lhs_sptr1;
981   int nidx, nidx1;
982   int list, list1, listp;
983   int isptr;
984   int fg, fg1;
985   LOGICAL fuseable;
986   int fuselp;
987   int oldast, newast;
988   int triple;
989   int idx[7];
990   int cnt;
991   LOGICAL fuse_cnst_rhs;
992 
993   if (XBIT(47, 0x80000000))
994     return FALSE;
995 
996   hd = LP_HEAD(lp); /*the flow graph node which is the loop head*/
997   fg = LP_FG(lp);
998   std = FG_STDFIRST(hd);
999   forall = STD_AST(std);
1000   list = A_LISTG(forall);
1001   /*
1002    * NOTES:
1003    * XBIT(47,0x4000000) - fuse even if constant RHS
1004    * XBIT(8,0x8000000)  - inhibit mem idioms
1005    */
1006   fuse_cnst_rhs = FALSE;
1007   if (XBIT(47, 0x4000000))
1008     fuse_cnst_rhs = TRUE;
1009   else if (XBIT(8, 0x8000000))
1010     fuse_cnst_rhs = TRUE;
1011   else {
1012   }
1013 
1014   expr = A_IFEXPRG(forall);
1015   asn = A_IFSTMTG(forall);
1016   rhs = A_SRCG(asn);
1017   lhs = A_DESTG(asn);
1018   for (lhs_array = lhs; A_TYPEG(lhs_array) == A_MEM;
1019        lhs_array = A_PARENTG(lhs_array))
1020     ;
1021   lhs_sptr = sym_of_ast(lhs);
1022   nd = A_OPT1G(forall);
1023 
1024   if (LP_PARENT(lp) != LP_PARENT(lp1))
1025     return FALSE;
1026 
1027   if (A_TYPEG(rhs) == A_CONV)
1028     rhs = A_LOPG(rhs);
1029   if (flg.opt >= 2 && !fuse_cnst_rhs && A_TYPEG(rhs) == A_CNST) {
1030     /*
1031      * prefer calling a tuned mzero/mem rather fusing.
1032      */
1033     return FALSE;
1034   }
1035   /*the flow graph node which is the loop head */
1036   hd1 = LP_HEAD(lp1);
1037   fg1 = LP_FG(lp1);
1038   if (!is_dominator(hd, hd1))
1039     return FALSE;
1040   if (smp_conflict(hd, hd1))
1041     return FALSE;
1042   std1 = FG_STDFIRST(hd1);
1043   forall1 = STD_AST(std1);
1044   nd1 = A_OPT1G(forall1);
1045   list1 = A_LISTG(forall1);
1046   expr1 = A_IFEXPRG(forall1);
1047   asn1 = A_IFSTMTG(forall1);
1048   rhs1 = A_SRCG(asn1);
1049   lhs1 = A_DESTG(asn1);
1050   for (lhs1_array = lhs1; A_TYPEG(lhs1_array) == A_MEM;
1051        lhs1_array = A_PARENTG(lhs1_array))
1052     ;
1053   lhs_sptr1 = sym_of_ast(lhs1);
1054   if (A_TYPEG(rhs1) == A_CONV)
1055     rhs1 = A_LOPG(rhs1);
1056   if (flg.opt >= 2 && !fuse_cnst_rhs && A_TYPEG(rhs1) == A_CNST) {
1057     /*
1058      * prefer calling a tuned mzero/mem rather fusing.
1059      */
1060     return FALSE;
1061   }
1062 
1063   /* forall1 values should not changed between std and std1 */
1064   /* Except that if they are in block, nothing change them */
1065   if (!is_in_block(std, std1)) {
1066     cnt = 0;
1067     for (listp = list1; listp != 0; listp = ASTLI_NEXT(listp)) {
1068       isptr = ASTLI_SPTR(listp);
1069       idx[cnt] = mk_id(isptr);
1070       cnt++;
1071     }
1072     if (!is_avail_expr_with_list(expr1, std, fg, std1, fg1, idx, cnt) ||
1073         !is_avail_expr_with_list(rhs1, std, fg, std1, fg1, idx, cnt) ||
1074         !is_avail_expr_with_list(lhs1, std, fg, std1, fg1, idx, cnt))
1075       return FALSE;
1076   }
1077 
1078   /* dependency */
1079   if (subscr_dependent(rhs1, lhs, std1, std))
1080     return FALSE;
1081   if (rhs1 && A_LOPG(rhs1) && A_TYPEG(rhs1) == A_SUBSCR &&
1082       A_TYPEG(A_LOPG(rhs1)) == A_MEM)
1083     if (subscr_dependent(A_LOPG(rhs1), lhs, std1, std))
1084       return FALSE;
1085   if (subscr_dependent(expr1, lhs, std1, std))
1086     return FALSE;
1087   if (expr1 && A_LOPG(expr1) && A_TYPEG(expr1) == A_SUBSCR &&
1088       A_TYPEG(A_LOPG(expr1)) == A_MEM)
1089     if (subscr_dependent(A_LOPG(expr1), lhs, std1, std))
1090       return FALSE;
1091   if (subscr_dependent(lhs1, lhs, std1, std))
1092     return FALSE;
1093   if (lhs1 && A_LOPG(lhs1) && A_TYPEG(lhs1) == A_SUBSCR &&
1094       A_TYPEG(A_LOPG(lhs1)) == A_MEM)
1095     if (subscr_dependent(A_LOPG(lhs1), lhs, std1, std))
1096       return FALSE;
1097   if (rhs && lhs1 && A_TYPEG(rhs) == A_SUBSCR && A_LOPG(lhs1) &&
1098       A_TYPEG(lhs1) == A_SUBSCR)
1099     if (subscr_dependent(rhs, A_LOPG(lhs1), std1, std))
1100       return FALSE;
1101 
1102   /* don't let LHSs appears at FORALL list */
1103   /* forall(i=x(1):x(2)) x(i) =   */
1104   for (listp = list1; listp != 0; listp = ASTLI_NEXT(listp)) {
1105     triple = ASTLI_TRIPLE(listp);
1106     if (A_TYPEG(lhs_array) == A_SUBSCR &&
1107         contains_ast(triple, A_LOPG(lhs_array)))
1108       return FALSE;
1109     if (A_TYPEG(lhs1_array) == A_SUBSCR &&
1110         contains_ast(triple, A_LOPG(lhs1_array)))
1111       return FALSE;
1112   }
1113 
1114   /* because of using mask pghpf_vsub_gather	*/
1115   /* this can be optimized, iff expr1 and rhs1
1116    * does not have indirections */
1117   if (A_TYPEG(lhs_array) == A_SUBSCR && contains_ast(expr1, A_LOPG(lhs_array)))
1118     return FALSE;
1119 
1120   if (expr1)
1121     if (Conflict(A_LISTG(forall), lhs, expr1, FALSE, 1, 0))
1122       return FALSE;
1123   if (Conflict(A_LISTG(forall), lhs, rhs1, FALSE, 1, 0))
1124     return FALSE;
1125 
1126   /* for communication */
1127   conf.otherlhs = lhs1;
1128   if (expr1)
1129     if (Conflict(A_LISTG(forall), lhs, expr1, TRUE, 0, 1))
1130       return FALSE;
1131   if (Conflict(A_LISTG(forall), lhs, rhs1, TRUE, 0, 1))
1132     return FALSE;
1133 
1134   if (expr)
1135     if (Conflict(A_LISTG(forall), lhs1, expr, FALSE, 0, 0))
1136       return FALSE;
1137   if (Conflict(A_LISTG(forall), lhs1, rhs, FALSE, 0, 0))
1138     return FALSE;
1139 
1140   if (is_branch_between(lp, lp1))
1141     return FALSE;
1142   if (!is_in_block(std, std1))
1143     if (A_TYPEG(lhs1_array) == A_SUBSCR &&
1144         is_contains_ast_between(lp, lp1, A_LOPG(lhs1_array)))
1145       return FALSE;
1146 
1147   if (is_olap_conflict(forall, forall1))
1148     return FALSE;
1149   fuseable = TRUE;
1150   for (k = 0; k < FT_NFUSE(nd, nested); k++) {
1151     fuselp = FT_FUSELP(nd, nested, k);
1152     if (!is_fusable(fuselp, lp1, nested))
1153       fuseable = FALSE;
1154   }
1155   if (!fuseable)
1156     return FALSE;
1157 
1158   return TRUE;
1159 }
1160 
1161 /*
1162  * Determine if two flowgraph nodes conflict with respect to smp
1163  * execution given that fg1 dominates fg2.
1164  */
1165 static LOGICAL
smp_conflict(int fg1,int fg2)1166 smp_conflict(int fg1, int fg2)
1167 {
1168   int fg;
1169   int std;
1170   int ast;
1171 
1172   if (!flg.smp)
1173     return FALSE;
1174   if (fg1 == fg2)
1175     /* no conflict if the same node */
1176     return FALSE;
1177   if ((FG_PAR(fg1) && !FG_PAR(fg2)) || (!FG_PAR(fg1) && FG_PAR(fg2)))
1178     /* fg1 is serial & fg2 is parallel, or vice versa */
1179     return TRUE;
1180   if (FG_PAR(fg2)) {
1181     /* both are within a parallel region; determine if it's the
1182      * same region.
1183      */
1184     for (fg = fg1; fg != fg2; fg = FG_LNEXT(fg)) {
1185       rdilts(fg);
1186       for (std = FG_STDFIRST(fg); std; std = STD_NEXT(std)) {
1187         ast = STD_AST(std);
1188         if (A_TYPEG(ast) == A_MP_ENDPARALLEL) {
1189           /* endparallel was seen in a node before fg2 - it must
1190            * be a different parallel region.
1191            */
1192           wrilts(fg);
1193           return TRUE;
1194         }
1195       }
1196       wrilts(fg);
1197     }
1198   }
1199   if (FG_CS(fg1) ^ FG_CS(fg2))
1200     /* fg1 is in a critical section & fg2 is not, or vice versa */
1201     return TRUE;
1202   if (FG_CS(fg2)) {
1203     /* both are within a critical section; determine if it's the
1204      * same critical section.
1205      */
1206     for (fg = fg1; fg != fg2; fg = FG_LNEXT(fg)) {
1207       rdilts(fg);
1208       for (std = FG_STDFIRST(fg); std; std = STD_NEXT(std)) {
1209         ast = STD_AST(std);
1210         if (A_TYPEG(ast) == A_MP_ENDCRITICAL) {
1211           wrilts(fg);
1212           return TRUE;
1213         }
1214       }
1215       wrilts(fg);
1216     }
1217   }
1218   if (FG_PARSECT(fg1) ^ FG_PARSECT(fg2))
1219     /* fg1 is in a parallel section (master, single, sections) & fg2 is
1220      * not, or vice versa
1221      */
1222     return TRUE;
1223   if (FG_PARSECT(fg2)) {
1224     /* both are within a parallel section; determine if it's the
1225      * same parallel section.
1226      */
1227     for (fg = fg1; fg != fg2; fg = FG_LNEXT(fg)) {
1228       rdilts(fg);
1229       for (std = FG_STDFIRST(fg); std; std = STD_NEXT(std)) {
1230         ast = STD_AST(std);
1231         switch (A_TYPEG(ast)) {
1232         case A_MP_ENDMASTER:
1233         case A_MP_ENDSINGLE:
1234         case A_MP_ENDSECTIONS:
1235           wrilts(fg);
1236           return TRUE;
1237         }
1238       }
1239       wrilts(fg);
1240     }
1241   }
1242 
1243   return FALSE;
1244 }
1245 
1246 static LOGICAL
is_in_block(int std,int std1)1247 is_in_block(int std, int std1)
1248 {
1249   int forall, forall1, forallh;
1250   int header, nextstd, fusedstd;
1251   int nd, k;
1252 
1253   forall = STD_AST(std);
1254   forall1 = STD_AST(std1);
1255   nd = A_OPT1G(forall);
1256   header = FT_HEADER(nd);
1257   forallh = STD_AST(header);
1258   assert(A_TYPEG(forallh) == A_FORALL, "is_in_block: expecting forall", forallh,
1259          3);
1260   nd = A_OPT1G(forallh);
1261   assert(nd, "is_in_block: nd is 0", forallh, 3);
1262   for (k = 0; k < FT_NFUSE(nd, 0); k++) {
1263     fusedstd = FT_FUSEDSTD(nd, 0, k);
1264     nextstd = STD_NEXT(header);
1265     while (A_TYPEG(STD_AST(nextstd)) == A_CONTINUE)
1266       nextstd = STD_NEXT(nextstd);
1267     header = nextstd;
1268     if (nextstd == fusedstd)
1269       continue;
1270     return FALSE;
1271   }
1272   nextstd = STD_NEXT(header);
1273   while (A_TYPEG(STD_AST(nextstd)) == A_CONTINUE)
1274     nextstd = STD_NEXT(nextstd);
1275   return (nextstd == std1);
1276 }
1277 
1278 static LOGICAL
is_branch_between(int lp,int lp1)1279 is_branch_between(int lp, int lp1)
1280 {
1281   int ast, std;
1282   int hd, fg;
1283   int hd1, fg1;
1284   int type;
1285 
1286   hd = LP_HEAD(lp);
1287   fg = LP_FG(lp);
1288   hd1 = LP_HEAD(lp1);
1289   fg1 = LP_FG(lp1);
1290 
1291   if (lp == lp1)
1292     return FALSE;
1293 
1294   while (TRUE) {
1295     fg = FG_LNEXT(fg);
1296     if (fg == fg1)
1297       return FALSE;
1298     rdilts(fg);
1299     for (std = FG_STDFIRST(fg); std; std = STD_NEXT(std)) {
1300       ast = STD_AST(std);
1301       type = A_TYPEG(ast);
1302       if (type != A_FORALL && type != A_DO && type != A_ENDDO) {
1303         if (STD_BR(std)) {
1304           wrilts(fg);
1305           return TRUE;
1306         }
1307       }
1308     }
1309     wrilts(fg);
1310   }
1311 }
1312 
1313 static LOGICAL
is_contains_ast_between(int lp,int lp1,int a)1314 is_contains_ast_between(int lp, int lp1, int a)
1315 {
1316   int ast, std;
1317   int hd, fg;
1318   int hd1, fg1;
1319   int type;
1320 
1321   hd = LP_HEAD(lp);
1322   fg = LP_FG(lp);
1323   hd1 = LP_HEAD(lp1);
1324   fg1 = LP_FG(lp1);
1325 
1326   while (TRUE) {
1327     fg = FG_LNEXT(fg);
1328     if (fg == fg1)
1329       return FALSE;
1330     rdilts(fg);
1331     for (std = FG_STDFIRST(fg); std; std = STD_NEXT(std)) {
1332       ast = STD_AST(std);
1333       if (contains_ast(ast, a)) {
1334         wrilts(fg);
1335         return TRUE;
1336       }
1337     }
1338     wrilts(fg);
1339   }
1340 }
1341 
1342 /* This routine is same as is_avail_expr except that
1343  * it will look at the cnt number of variables given by idx.
1344  * idx has varaible A_ID ast. it will decompose_expression
1345  * and eliminate idx variable from list.
1346  * it helps forall triplet variables ignored.
1347  */
1348 static LOGICAL
is_avail_expr_with_list(int expr,int std,int fg,int std1,int fg1,int idx[],int cnt)1349 is_avail_expr_with_list(int expr, int std, int fg, int std1, int fg1, int idx[],
1350                         int cnt)
1351 {
1352   int lst[100], lst1[100];
1353   int size, nvar;
1354   int ele;
1355   int found;
1356   int i, j;
1357   int nvar1;
1358 
1359   if (!expr)
1360     return TRUE;
1361   size = 100;
1362   nvar = 0;
1363 
1364   decompose_expression(expr, lst, size, &nvar, NULL);
1365   if (nvar > size)
1366     return FALSE;
1367 
1368   /* eliminate duplicate variables from the lst */
1369   nvar1 = 0;
1370   for (i = 0; i < nvar; i++) {
1371     found = 0;
1372     for (j = 0; j < nvar1; j++)
1373       if (lst[i] == lst1[j])
1374         found = 1;
1375     if (found)
1376       continue;
1377     lst1[nvar1] = lst[i];
1378     nvar1++;
1379   }
1380 
1381   for (i = 0; i < nvar1; i++) {
1382     ele = lst1[i];
1383     found = 0;
1384     for (j = 0; j < cnt; j++)
1385       if (idx[j] == ele)
1386         found = 1;
1387     if (found)
1388       continue;
1389     if (!is_avail_expr(ele, std, fg, std1, fg1))
1390       return FALSE;
1391   }
1392   return TRUE;
1393 }
1394 
1395 static void
put_forall_fg(void)1396 put_forall_fg(void)
1397 {
1398   int i, ii;
1399   int std, hd;
1400   int forall;
1401   int nd;
1402   for (ii = 1; ii <= opt.nloops; ++ii) {
1403     i = lpsort[ii];
1404     if (LP_FORALL(i)) {
1405       hd = LP_HEAD(i); /*the flow graph node which is the loop head*/
1406       std = FG_STDFIRST(hd);
1407       forall = STD_AST(std);
1408       nd = A_OPT1G(forall);
1409       FT_FG(nd) = hd;
1410     }
1411   }
1412 }
1413 
1414 static void
eliminate_rt(int type0)1415 eliminate_rt(int type0)
1416 {
1417   int i, j, k, l;
1418   int std, std1;
1419   int forall, forall1;
1420   int nd, nd1;
1421   int rt, rt1;
1422   int rt_std, rt1_std;
1423   int hd, hd1;
1424   int type, type1;
1425   int ii, jj;
1426   int header, header1;
1427   int header_forall, header_forall1;
1428   int header_fg, header_fg1;
1429   LITEMF *list_rt, *list_rt1;
1430 
1431   for (ii = 1; ii <= opt.nloops; ++ii) {
1432     i = lpsort[ii];
1433     if (LP_FORALL(i)) {
1434       hd = LP_HEAD(i); /*the flow graph node which is the loop head*/
1435       std = FG_STDFIRST(hd);
1436       forall = STD_AST(std);
1437       nd = A_OPT1G(forall);
1438       if (FT_NRT(nd) == 0)
1439         continue;
1440       for (jj = ii; jj <= opt.nloops; ++jj) {
1441         j = lpsort[jj];
1442         if (LP_FORALL(j)) {
1443           /*the flow graph node which is the loop head */
1444           hd1 = LP_HEAD(j);
1445           if (!is_dominator(hd, hd1))
1446             continue;
1447           std1 = FG_STDFIRST(hd1);
1448           forall1 = STD_AST(std1);
1449           nd1 = A_OPT1G(forall1);
1450           if (FT_NRT(nd1) == 0)
1451             continue;
1452 
1453           /* when foralls are fused,
1454            *	first header should dominate the second  header
1455            */
1456           header = FT_HEADER(nd);
1457           header1 = FT_HEADER(nd1);
1458           header_forall = STD_AST(header);
1459           header_forall1 = STD_AST(header1);
1460           assert(A_TYPEG(header_forall) == A_FORALL,
1461                  "eliminate_rt:expecting forall", header_forall, 2);
1462           assert(A_TYPEG(header_forall1) == A_FORALL,
1463                  "eliminate_rt:expecting forall1", header_forall1, 2);
1464           header_fg = FT_FG(A_OPT1G(header_forall));
1465           header_fg1 = FT_FG(A_OPT1G(header_forall1));
1466           if (header != header1)
1467             if (!is_dominator(header_fg, header_fg1))
1468               continue;
1469 
1470           list_rt = FT_RTL(nd);
1471           for (k = 0; k < FT_NRT(nd); k++) {
1472             rt_std = list_rt->item;
1473             list_rt = list_rt->next;
1474             rt = STD_AST(rt_std);
1475             if (STD_DELETE(rt_std))
1476               continue;
1477             list_rt1 = FT_RTL(nd1);
1478             for (l = 0; l < FT_NRT(nd1); l++) {
1479               rt1_std = list_rt1->item;
1480               list_rt1 = list_rt1->next;
1481 
1482               rt1 = STD_AST(rt1_std);
1483               type = A_TYPEG(rt);
1484               if (type == A_ASN)
1485                 type = A_TYPEG(A_SRCG(rt));
1486               type1 = A_TYPEG(rt1);
1487               if (type1 == A_ASN)
1488                 type1 = A_TYPEG(A_SRCG(rt1));
1489               if (STD_DELETE(rt1_std))
1490                 continue;
1491               if (rt == rt1)
1492                 continue;
1493               if (type != type1)
1494                 continue;
1495               if (i == j)
1496                 if (k > l)
1497                   continue;
1498               if (type == A_HCYCLICLP)
1499                 type = A_HLOCALIZEBNDS;
1500               /* to share alloc after sharing comm_start
1501                * since comm_start can change alloc
1502                */
1503 
1504               if (!independent_commtype(type, type0))
1505                 continue;
1506 
1507               /* all run-tme calls require some freeing
1508                * except localize bounds
1509                * a = b
1510                * if(i.eq.0) then
1511                *	    a =b
1512                * endif
1513                */
1514               /* You need post_dominator here for much better
1515                * optimization
1516                */
1517               if (type != A_HLOCALIZEBNDS && type != A_HCYCLICLP &&
1518                   is_branch_between(i, j))
1519                 continue;
1520 
1521               switch (type) {
1522               case A_HLOCALIZEBNDS: /* type0==4 */
1523               case A_HCYCLICLP:     /* type0==4 */
1524                 eliminate_bnds(i, j, rt_std, rt1_std);
1525                 break;
1526               case A_HALLOBNDS: /* type0==3 */
1527                 eliminate_alloc(i, j, rt_std, rt1_std);
1528                 break;
1529               case A_HSECT: /* type0==4 */
1530                 eliminate_sect(i, j, rt_std, rt1_std);
1531                 break;
1532               case A_HCOPYSECT: /* type0==1 */
1533                 eliminate_copy(i, j, rt_std, rt1_std);
1534                 break;
1535               case A_HGATHER:  /* type0==1 */
1536               case A_HSCATTER: /* type0==1 */
1537                 eliminate_gather(i, j, rt_std, rt1_std);
1538                 break;
1539               case A_HOVLPSHIFT: /* type0==1 */
1540                 eliminate_shift(i, j, rt_std, rt1_std);
1541                 break;
1542               case A_HCSTART: /* type0==2 */
1543                 eliminate_start(i, j, rt_std, rt1_std);
1544                 break;
1545               default:
1546                 break;
1547               }
1548             }
1549           }
1550         }
1551       }
1552     }
1553   }
1554 }
1555 
1556 /* This routine is used by eliminate_rt.
1557  * It is designed to eliminate independent rt at the same
1558  * call of eliminate_rt to speed up compilation.
1559  */
1560 static LOGICAL
independent_commtype(int type,int type0)1561 independent_commtype(int type, int type0)
1562 {
1563   if (type0 == 1) {
1564     if (type == A_HGATHER || type == A_HSCATTER || type == A_HCOPYSECT ||
1565         type == A_HOVLPSHIFT)
1566       return TRUE;
1567     else
1568       return FALSE;
1569   } else if (type0 == 2) {
1570     if (type == A_HCSTART)
1571       return TRUE;
1572     else
1573       return FALSE;
1574   } else if (type0 == 3) {
1575     if (type == A_HALLOBNDS)
1576       return TRUE;
1577     else
1578       return FALSE;
1579   } else if (type0 == 4) {
1580     if (type == A_HSECT || type == A_HLOCALIZEBNDS)
1581       return TRUE;
1582     else
1583       return FALSE;
1584   } else
1585     assert(0, "merged_commtype:wrong-type", type0, 2);
1586   return FALSE;
1587 }
1588 
1589 static LOGICAL
is_same_descr_for_bnds(int rt,int rt1)1590 is_same_descr_for_bnds(int rt, int rt1)
1591 {
1592   int lhs, lhs1;
1593   int nd, nd1;
1594   int sptr, sptr1;
1595   int dim, dim1;
1596 
1597   nd = A_OPT1G(rt);
1598   nd1 = A_OPT1G(rt1);
1599 
1600   lhs = FT_BND_LHS(nd);
1601   sptr = left_array_symbol(lhs);
1602   dim = A_DIMG(rt);
1603   dim = get_int_cval(A_SPTRG(A_ALIASG(dim))) - 1;
1604 
1605   lhs1 = FT_BND_LHS(nd1);
1606   sptr1 = left_array_symbol(lhs1);
1607   dim1 = A_DIMG(rt1);
1608   dim1 = get_int_cval(A_SPTRG(A_ALIASG(dim1))) - 1;
1609   if (POINTERG(sptr) || POINTERG(sptr1))
1610     return FALSE;
1611   return is_same_align_shape(sptr, dim, sptr1, dim1);
1612 }
1613 
1614 static LOGICAL
is_same_align_shape(int sptr,int dim,int sptr1,int dim1)1615 is_same_align_shape(int sptr, int dim, int sptr1, int dim1)
1616 {
1617   return TRUE;
1618 }
1619 
1620 static void
eliminate_bnds(int lp,int lp1,int rt_std,int rt1_std)1621 eliminate_bnds(int lp, int lp1, int rt_std, int rt1_std)
1622 {
1623   int itriple, itriple1;
1624   int lb, ub, st;
1625   int lb1, ub1, st1;
1626   int std, std1;
1627   int fg, fg1;
1628   int rt, rt1;
1629   int nd, nd1;
1630   int hd, hd1;
1631 
1632   rt = STD_AST(rt_std);
1633   rt1 = STD_AST(rt1_std);
1634   assert(LP_FORALL(lp), "eliminate_bnds: forall LP not set", lp, 2);
1635   hd = LP_HEAD(lp);
1636   std = FG_STDFIRST(hd);
1637   fg = LP_FG(lp);
1638 
1639   assert(LP_FORALL(lp1), "eliminate_bnds: forall1 LP not set", lp1, 2);
1640   hd1 = LP_HEAD(lp1);
1641   std1 = FG_STDFIRST(hd1);
1642   fg1 = LP_FG(lp1);
1643 
1644   if (!is_same_descr_for_bnds(rt, rt1))
1645     return;
1646 
1647   nd = A_OPT1G(rt);
1648   nd1 = A_OPT1G(rt1);
1649   itriple = A_ITRIPLEG(rt);
1650   lb = A_LBDG(itriple);
1651   ub = A_UPBDG(itriple);
1652   st = A_STRIDEG(itriple);
1653   if (st == 0)
1654     st = astb.i1;
1655 
1656   itriple1 = A_ITRIPLEG(rt1);
1657   lb1 = A_LBDG(itriple1);
1658   ub1 = A_UPBDG(itriple1);
1659   st1 = A_STRIDEG(itriple1);
1660   if (st1 == 0)
1661     st1 = astb.i1;
1662   if (lb == lb1 && ub == ub1 && st == st1) {
1663     if (is_avail_expr(lb, std, fg, std1, fg1) &&
1664         is_avail_expr(ub, std, fg, std1, fg1) &&
1665         is_avail_expr(st, std, fg, std1, fg1)) {
1666       FT_BND_SAME(nd1) = rt;
1667       STD_DELETE(rt1_std) = 1;
1668       optsum.bnd++;
1669     }
1670   }
1671 }
1672 
1673 static LOGICAL
is_same_array_bounds(int sub,int sub1,int std,int std1,int fg,int fg1)1674 is_same_array_bounds(int sub, int sub1, int std, int std1, int fg, int fg1)
1675 {
1676   int ndim, ndim1;
1677   int asd, asd1;
1678   int count;
1679   int lb, ub, st;
1680   int lb1, ub1, st1;
1681   int i;
1682   int itriple, itriple1;
1683 
1684   while (1) {
1685     if (A_TYPEG(sub) != A_TYPEG(sub1))
1686       return FALSE;
1687     switch (A_TYPEG(sub)) {
1688     case A_ID:
1689       return TRUE;
1690     case A_MEM:
1691       sub = A_PARENTG(sub);
1692       sub1 = A_PARENTG(sub1);
1693       break;
1694     case A_SUBSTR:
1695       sub = A_LOPG(sub);
1696       sub1 = A_LOPG(sub1);
1697       break;
1698     case A_SUBSCR:
1699       asd = A_ASDG(sub);
1700       ndim = ASD_NDIM(asd);
1701       asd1 = A_ASDG(sub1);
1702       ndim1 = ASD_NDIM(asd1);
1703       if (ndim != ndim1)
1704         return FALSE;
1705       count = 0;
1706       for (i = 0; i < ndim; i++) {
1707         itriple = ASD_SUBS(asd, i);
1708         if (A_TYPEG(itriple) == A_TRIPLE) {
1709           lb = A_LBDG(itriple);
1710           ub = A_UPBDG(itriple);
1711           st = A_STRIDEG(itriple);
1712           if (st == 0)
1713             st = astb.i1;
1714         } else {
1715           lb = itriple;
1716           ub = astb.i1;
1717           st = astb.i1;
1718         }
1719 
1720         itriple1 = ASD_SUBS(asd1, i);
1721         if (A_TYPEG(itriple1) == A_TRIPLE) {
1722           lb1 = A_LOPG(itriple1);
1723           ub1 = A_UPBDG(itriple1);
1724           st1 = A_STRIDEG(itriple1);
1725           if (st1 == 0)
1726             st1 = astb.i1;
1727         } else {
1728           lb1 = itriple1;
1729           ub1 = astb.i1;
1730           st1 = astb.i1;
1731         }
1732 
1733         if (lb == lb1 && ub == ub1 && st == st1) {
1734           if (is_avail_expr(lb, std, fg, std1, fg1) &&
1735               is_avail_expr(ub, std, fg, std1, fg1) &&
1736               is_avail_expr(st, std, fg, std1, fg1))
1737             count++;
1738         }
1739       }
1740       if (count != ndim)
1741         return FALSE;
1742       sub = A_LOPG(sub);
1743       sub1 = A_LOPG(sub1);
1744       break;
1745     default:
1746       interr("is_same_array_bounds: unexpected AST type ", sub, 0);
1747       return FALSE;
1748     }
1749   }
1750 }
1751 
1752 /* This routine checks whether sub and sub1 arrays have same subscripts
1753  * it can allows that they can have different subscripts iff
1754  *    1-if subscript is scalar and collapsed.
1755  *    2-if trailing subscripts are scalars and they are collapsed.
1756  * Also, each array dimension extent must be the same, up to the
1757  * last nonscalar/nondistributed dimension.
1758  */
1759 static LOGICAL
is_same_array_bounds_for_schedule(int sub,int sub1,int std,int std1,int fg,int fg1)1760 is_same_array_bounds_for_schedule(int sub, int sub1, int std, int std1, int fg,
1761                                   int fg1)
1762 {
1763   return TRUE;
1764 }
1765 
1766 LOGICAL
is_same_array_alignment(int sptr,int sptr1)1767 is_same_array_alignment(int sptr, int sptr1)
1768 {
1769   return TRUE;
1770 }
1771 
1772 /* This routine is used for schedule elimination.
1773  * It checks if the arrays are aligned to the same template
1774  * with the same number of distribution and
1775  * aligned to the same template axis.
1776  * And also overlap area has to be the same.
1777  */
1778 static LOGICAL
is_same_array_alignment_for_schedule(int sptr,int sptr1)1779 is_same_array_alignment_for_schedule(int sptr, int sptr1)
1780 {
1781   return TRUE;
1782 }
1783 
1784 static int
find_lp(int std)1785 find_lp(int std)
1786 {
1787   int i;
1788   int hd1;
1789   int std1;
1790 
1791   for (i = 1; i <= opt.nloops; ++i) {
1792     hd1 = LP_HEAD(i);
1793     std1 = FG_STDFIRST(hd1);
1794     if (std == std1)
1795       return i;
1796   }
1797   assert(0, "find_lp: loop not found", std, 3);
1798   return 0;
1799 }
1800 
1801 static void
eliminate_alloc(int lp,int lp1,int rt_std,int rt1_std)1802 eliminate_alloc(int lp, int lp1, int rt_std, int rt1_std)
1803 {
1804   int std, std1;
1805   int fg, fg1;
1806   int rt, rt1;
1807   int nd, nd1;
1808   int i;
1809   int sub, ndim, asd;
1810   int sub1, ndim1, asd1;
1811   int count;
1812   int sptr;
1813   int sptr1;
1814   int hd, hd1;
1815   int freestd, freestd1;
1816   int lp2, hd2;
1817   int lp3, hd3;
1818 
1819   if (LP_PARENT(lp) != LP_PARENT(lp1))
1820     return;
1821   rt = STD_AST(rt_std);
1822   rt1 = STD_AST(rt1_std);
1823   assert(LP_FORALL(lp), "eliminate_alloc: expecting forall", lp, 2);
1824   hd = LP_HEAD(lp);
1825   std = FG_STDFIRST(hd);
1826   fg = LP_FG(lp);
1827 
1828   assert(LP_FORALL(lp1), "eliminate_alloc: expecting forall1", lp1, 2);
1829   hd1 = LP_HEAD(lp1);
1830   std1 = FG_STDFIRST(hd1);
1831   fg1 = LP_FG(lp1);
1832 
1833   /* has to have same distribution */
1834   nd = A_OPT1G(rt);
1835   nd1 = A_OPT1G(rt1);
1836   if (FT_ALLOC_USED(nd1))
1837     return;
1838   if (FT_ALLOC_USED(nd))
1839     return;
1840 
1841   /* has to be disjoint live time */
1842   /* first free has to dominate the second alloc */
1843   if (lp == lp1)
1844     return;
1845   freestd = FT_ALLOC_FREE(nd);
1846   if (freestd == std1)
1847     return;
1848   if (!is_dominator_fg(freestd, std1))
1849     return;
1850 
1851   freestd1 = FT_ALLOC_FREE(nd1);
1852   if (freestd == freestd1)
1853     return;
1854   if (!is_dominator_fg(freestd, freestd1))
1855     return;
1856 
1857   sptr = FT_ALLOC_SPTR(nd);
1858   sptr1 = FT_ALLOC_SPTR(nd1);
1859   if (!is_same_array_alignment(sptr, sptr1))
1860     return;
1861 
1862   /* has to have the same type */
1863   if (DTY(DTYPEG(sptr) + 1) != DTY(DTYPEG(sptr1) + 1))
1864     return;
1865 
1866   /* can not be re-used at the same loop twice */
1867   if (FT_ALLOC_FREE(nd) == FT_ALLOC_FREE(nd1))
1868     return;
1869 
1870   /* has to have the same alloc bounds */
1871   sub = A_LOPG(rt);
1872   sub1 = A_LOPG(rt1);
1873   if (is_same_array_bounds(sub, sub1, std, std1, fg, fg1)) {
1874     FT_ALLOC_SAME(nd1) = rt;
1875     FT_ALLOC_OUT(nd1) = FT_ALLOC_OUT(nd);
1876     if (is_dominator_fg(FT_ALLOC_FREE(nd), FT_ALLOC_FREE(nd1)))
1877       FT_ALLOC_FREE(nd) = FT_ALLOC_FREE(nd1);
1878     FT_ALLOC_REUSE(nd) = 1;
1879     STD_DELETE(rt1_std) = 1;
1880     optsum.alloc++;
1881   }
1882 }
1883 
1884 LOGICAL
is_same_array_shape(int sptr,int sptr1)1885 is_same_array_shape(int sptr, int sptr1)
1886 {
1887   LOGICAL result;
1888   int dist, align;
1889   ADSC *ad, *ad1;
1890   int ndim, ndim1;
1891   int lb, lb1;
1892   int ub, ub1;
1893   int i;
1894 
1895   result = TRUE;
1896   if (sptr == sptr1)
1897     return TRUE;
1898 
1899   if (ALLOCG(sptr) || ALLOCG(sptr1))
1900     result = FALSE;
1901 
1902   ad = AD_DPTR(DTYPEG(sptr));
1903   ad1 = AD_DPTR(DTYPEG(sptr1));
1904   ndim = rank_of_sym(sptr);
1905   ndim1 = rank_of_sym(sptr1);
1906 
1907   if (ndim != ndim1)
1908     result = FALSE;
1909 
1910   for (i = 0; i < ndim; i++) {
1911     lb = AD_LWAST(ad, i);
1912     lb1 = AD_LWAST(ad1, i);
1913     if (lb != lb1)
1914       result = FALSE;
1915     ub = AD_UPAST(ad, i);
1916     ub1 = AD_UPAST(ad1, i);
1917     if (ub != ub1)
1918       result = FALSE;
1919   }
1920 
1921   return result;
1922 }
1923 
1924 static void
eliminate_sect(int lp,int lp1,int rt_std,int rt1_std)1925 eliminate_sect(int lp, int lp1, int rt_std, int rt1_std)
1926 {
1927   int std, std1;
1928   int fg, fg1;
1929   int rt, rt1;
1930   int nd, nd1;
1931   int i;
1932   int sub, ndim, asd;
1933   int sub1, ndim1, asd1;
1934   int count;
1935   int sptr;
1936   int sptr1;
1937   int hd, hd1;
1938   int sect, sect1;
1939   int allocstd, allocstd1;
1940   int alloc, alloc1;
1941   int nd3;
1942   int arr, arr1;
1943   int sectflag, sectflag1;
1944   int bogus, bogus1;
1945 
1946   if (LP_PARENT(lp) != LP_PARENT(lp1))
1947     return;
1948   rt = STD_AST(rt_std);
1949   rt1 = STD_AST(rt1_std);
1950   assert(LP_FORALL(lp), "eliminate_sect: expecting forall", lp, 2);
1951   hd = LP_HEAD(lp);
1952   std = FG_STDFIRST(hd);
1953   fg = LP_FG(lp);
1954 
1955   assert(LP_FORALL(lp1), "eliminate_sect: expecting forall1", lp1, 2);
1956   hd1 = LP_HEAD(lp1);
1957   std1 = FG_STDFIRST(hd1);
1958   fg1 = LP_FG(lp1);
1959 
1960   sect = A_SRCG(rt);
1961   sect1 = A_SRCG(rt1);
1962   nd = A_OPT1G(sect);
1963   nd1 = A_OPT1G(sect1);
1964   arr = A_LOPG(sect);
1965   arr1 = A_LOPG(sect1);
1966 
1967   /* has to have same flag */
1968   sectflag = FT_SECT_FLAG(nd);
1969   sectflag1 = FT_SECT_FLAG(nd1);
1970   if (sectflag != sectflag1)
1971     return;
1972 
1973   allocstd = FT_SECT_ALLOC(nd);
1974   if (allocstd) {
1975     alloc = STD_AST(allocstd);
1976     nd3 = A_OPT1G(alloc);
1977     sptr = FT_ALLOC_OUT(nd3);
1978     FT_SECT_SPTR(nd) = sptr;
1979     bogus = getbit(sectflag, 8);
1980     if (is_whole_array(arr) && !bogus) {
1981       DESCUSEDP(sptr, 1);
1982       FT_SECT_OUT(nd) = DESCRG(sptr);
1983     }
1984   }
1985 
1986   allocstd1 = FT_SECT_ALLOC(nd1);
1987   if (allocstd1) {
1988     alloc1 = STD_AST(allocstd1);
1989     nd3 = A_OPT1G(alloc1);
1990     sptr = FT_ALLOC_OUT(nd3);
1991     FT_SECT_SPTR(nd1) = sptr;
1992     bogus1 = getbit(sectflag1, 8);
1993     if (is_whole_array(arr1) && !bogus1) {
1994       DESCUSEDP(sptr, 1);
1995       FT_SECT_OUT(nd1) = DESCRG(sptr);
1996     }
1997   }
1998 
1999   /* has to have same distribution */
2000   sptr = FT_SECT_SPTR(nd);
2001   if (STYPEG(sptr) == ST_MEMBER)
2002     return;
2003   sptr1 = FT_SECT_SPTR(nd1);
2004   if (!is_same_array_alignment(sptr, sptr1))
2005     return;
2006 
2007   /* has to have the same type */
2008   if (DTY(DTYPEG(sptr) + 1) != DTY(DTYPEG(sptr1) + 1))
2009     return;
2010 
2011   if (!is_same_array_shape(sptr, sptr1))
2012     return;
2013 
2014   /* has to have the same section bounds */
2015   sub = A_LOPG(sect);
2016   sub1 = A_LOPG(sect1);
2017   if (is_same_array_bounds(sub, sub1, std, std1, fg, fg1)) {
2018     FT_SECT_SAME(nd1) = rt;
2019     FT_SECT_OUT(nd1) = FT_SECT_OUT(nd);
2020     if (is_dominator_fg(FT_SECT_FREE(nd), FT_SECT_FREE(nd1)))
2021       FT_SECT_FREE(nd) = FT_SECT_FREE(nd1);
2022     FT_SECT_REUSE(nd) = 1;
2023     STD_DELETE(rt1_std) = 1;
2024     optsum.sect++;
2025   }
2026 }
2027 
2028 static LOGICAL
is_dominator_fg(int std,int std1)2029 is_dominator_fg(int std, int std1)
2030 {
2031   int std_lp, std_lp1;
2032   int hd, hd1;
2033 
2034   hd = STD_FG(std);
2035   hd1 = STD_FG(std1);
2036   if (is_dominator(hd, hd1))
2037     return TRUE;
2038   return FALSE;
2039 }
2040 
2041 /*
2042  * if fg1 == fg2, then return TRUE if std1 preceeds std2 and FALSE otherwise
2043  * if fg1 != fg2, then return TRUE if fg1 dominates fg2,
2044  *  or the postdominator of fg1 dominates fg2,
2045  *  or some postdominator of fg1 dominates fg2, and so on
2046  */
2047 static LOGICAL
must_follow(int fg1,int std1,int fg2,int std2)2048 must_follow(int fg1, int std1, int fg2, int std2)
2049 {
2050   int std, found1, fg;
2051   if (fg1 == fg2) {
2052     rdilts(fg1);
2053     found1 = FALSE;
2054     for (std = FG_STDFIRST(fg1); std; std = STD_NEXT(std)) {
2055       if (std == std1) {
2056         found1 = TRUE;
2057       }
2058       if (std == std2) {
2059         wrilts(fg1);
2060         return found1;
2061       }
2062     }
2063     wrilts(fg1);
2064     /* didn't find std2 */
2065   } else {
2066     for (fg = fg1; fg > 0; fg = FG_PDOM(fg)) {
2067       if (is_dominator(fg, fg2))
2068         return TRUE;
2069     }
2070   }
2071   return FALSE;
2072 } /* must_follow */
2073 
2074 static void
eliminate_copy(int lp,int lp1,int rt_std,int rt1_std)2075 eliminate_copy(int lp, int lp1, int rt_std, int rt1_std)
2076 {
2077   int std, std1;
2078   int fg, fg1;
2079   int rt, rt1;
2080   int nd, nd1, nd2;
2081   int i;
2082   int sub, ndim, asd;
2083   int sub1, ndim1, asd1;
2084   int count;
2085   int sptr;
2086   int sptr1;
2087   int hd, hd1;
2088   int copy, copy1;
2089   int lhs, lhs1;
2090   int rhs, rhs1;
2091   int sect;
2092 
2093   if (XBIT(47, 0x40000))
2094     return;
2095   if (LP_PARENT(lp) != LP_PARENT(lp1))
2096     return;
2097   rt = STD_AST(rt_std);
2098   rt1 = STD_AST(rt1_std);
2099   assert(LP_FORALL(lp), "eliminate_copy: expecting forall", lp, 2);
2100   hd = LP_HEAD(lp);
2101   std = FG_STDFIRST(hd);
2102   fg = LP_FG(lp);
2103 
2104   assert(LP_FORALL(lp1), "eliminate_copy: expecting forall1", lp1, 2);
2105   hd1 = LP_HEAD(lp1);
2106   std1 = FG_STDFIRST(hd1);
2107   fg1 = LP_FG(lp1);
2108 
2109   copy = A_SRCG(rt);
2110   copy1 = A_SRCG(rt1);
2111   nd = A_OPT1G(copy);
2112   nd1 = A_OPT1G(copy1);
2113 
2114   /* has to have same left-hand-side distribution */
2115   lhs = FT_CCOPY_LHS(nd);
2116   lhs1 = FT_CCOPY_LHS(nd1);
2117   sptr = left_array_symbol(lhs);
2118   sptr1 = left_array_symbol(lhs1);
2119   if (!is_same_array_alignment_for_schedule(sptr, sptr1))
2120     return;
2121 
2122   /* has to have same temp distribution */
2123   sptr = FT_CCOPY_TSPTR(nd);
2124   sptr1 = FT_CCOPY_TSPTR(nd1);
2125   if (!is_same_array_alignment_for_schedule(sptr, sptr1))
2126     return;
2127 
2128   /* has to have same right-hand-side distribution */
2129   rhs = FT_CCOPY_RHS(nd);
2130   rhs1 = FT_CCOPY_RHS(nd1);
2131   sptr = left_array_symbol(rhs);
2132   sptr1 = left_array_symbol(rhs1);
2133   if (!is_same_array_alignment_for_schedule(sptr, sptr1))
2134     return;
2135 
2136   /* has to have the same tmps bounds */
2137   sub = A_DESTG(copy);
2138   sub1 = A_DESTG(copy1);
2139   if (!is_same_array_bounds_for_schedule(sub, sub1, std, std1, fg, fg1))
2140     return;
2141 
2142   /* has to have the same rhs bounds */
2143   sub = A_SRCG(copy);
2144   sub1 = A_SRCG(copy1);
2145   if (!is_same_array_bounds_for_schedule(sub, sub1, std, std1, fg, fg1))
2146     return;
2147 
2148   /* neither source or destination sections can have bogus flags */
2149   sect = FT_CCOPY_SECTL(nd);
2150   sub = STD_AST(sect);
2151   nd2 = A_OPT1G(A_SRCG(sub));
2152   if (FT_SECT_FLAG(nd2) & BOGUSFLAG)
2153     return;
2154   sect = FT_CCOPY_SECTR(nd);
2155   sub = STD_AST(sect);
2156   nd2 = A_OPT1G(A_SRCG(sub));
2157   if (FT_SECT_FLAG(nd2) & BOGUSFLAG)
2158     return;
2159   sect = FT_CCOPY_SECTL(nd1);
2160   sub = STD_AST(sect);
2161   nd2 = A_OPT1G(A_SRCG(sub));
2162   if (FT_SECT_FLAG(nd2) & BOGUSFLAG)
2163     return;
2164   sect = FT_CCOPY_SECTR(nd1);
2165   sub = STD_AST(sect);
2166   nd2 = A_OPT1G(A_SRCG(sub));
2167   if (FT_SECT_FLAG(nd2) & BOGUSFLAG)
2168     return;
2169 
2170   /* has to have the same lhs bounds */
2171   sub = FT_CCOPY_LHSSEC(nd);
2172   sub1 = FT_CCOPY_LHSSEC(nd1);
2173   if (sub && sub1 &&
2174       !is_same_array_bounds_for_schedule(sub, sub1, std, std1, fg, fg1)) {
2175     FT_CCOPY_NOTLHS(nd) = 1;
2176     FT_CCOPY_NOTLHS(nd1) = 1;
2177   }
2178 
2179   if (FT_CCOPY_NOTLHS(nd) || FT_CCOPY_NOTLHS(nd1)) {
2180     FT_CCOPY_NOTLHS(nd) = 1;
2181     FT_CCOPY_NOTLHS(nd1) = 1;
2182   }
2183 
2184   FT_CCOPY_SAME(nd1) = rt;
2185   FT_CCOPY_OUT(nd1) = FT_CCOPY_OUT(nd);
2186   if (is_dominator_fg(FT_CCOPY_FREE(nd), FT_CCOPY_FREE(nd1)))
2187     FT_CCOPY_FREE(nd) = FT_CCOPY_FREE(nd1);
2188   FT_CCOPY_REUSE(nd) = 1;
2189   STD_DELETE(rt1_std) = 1;
2190   optsum.copysection++;
2191 }
2192 
2193 static void
eliminate_gather(int lp,int lp1,int rt_std,int rt1_std)2194 eliminate_gather(int lp, int lp1, int rt_std, int rt1_std)
2195 {
2196   int std, std1;
2197   int fg, fg1;
2198   int rt, rt1;
2199   int nd, nd1, nd3;
2200   int i;
2201   int vsub, ndim, asd;
2202   int vsub1, ndim1, asd1;
2203   int hd, hd1;
2204   int gather, gather1;
2205   int nvsub, nvsub1;
2206   int mask, mask1;
2207   int sptr, sptr1;
2208   int v, v1;
2209   int per, per1;
2210   int sub, sub1;
2211 
2212   if (XBIT(47, 0x80000))
2213     return;
2214   if (LP_PARENT(lp) != LP_PARENT(lp1))
2215     return;
2216   rt = STD_AST(rt_std);
2217   rt1 = STD_AST(rt1_std);
2218   assert(LP_FORALL(lp), "eliminate_gather: expecting forall", lp, 2);
2219   hd = LP_HEAD(lp);
2220   std = FG_STDFIRST(hd);
2221   fg = LP_FG(lp);
2222 
2223   assert(LP_FORALL(lp1), "eliminate_gather: expecting forall1", lp1, 2);
2224   hd1 = LP_HEAD(lp1);
2225   std1 = FG_STDFIRST(hd1);
2226   fg1 = LP_FG(lp1);
2227 
2228   gather = A_SRCG(rt);
2229   gather1 = A_SRCG(rt1);
2230   nd = A_OPT1G(gather);
2231   nd1 = A_OPT1G(gather1);
2232 
2233   /* flags hast to be same */
2234   if (FT_CGATHER_TYPE(nd) != FT_CGATHER_TYPE(nd1))
2235     return;
2236   if (FT_CGATHER_VFLAG(nd) != FT_CGATHER_VFLAG(nd1))
2237     return;
2238   if (FT_CGATHER_PFLAG(nd) != FT_CGATHER_PFLAG(nd1))
2239     return;
2240 
2241   /* vsub has to have same distribution */
2242   vsub = FT_CGATHER_VSUB(nd);
2243   vsub1 = FT_CGATHER_VSUB(nd1);
2244   sptr = left_array_symbol(vsub);
2245   sptr1 = left_array_symbol(vsub1);
2246   if (!is_same_array_alignment_for_schedule(sptr, sptr1))
2247     return;
2248 
2249   /* nvsub has to have same distribution */
2250   nvsub = FT_CGATHER_NVSUB(nd);
2251   nvsub1 = FT_CGATHER_NVSUB(nd1);
2252   sptr = left_array_symbol(nvsub);
2253   sptr1 = left_array_symbol(nvsub1);
2254   if (!is_same_array_alignment_for_schedule(sptr, sptr1))
2255     return;
2256 
2257   /* has to have the same vsub bounds */
2258   if (!is_same_array_bounds_for_schedule(vsub, vsub1, std, std1, fg, fg1))
2259     return;
2260 
2261   /* has to have the same nvsub bounds */
2262   if (!is_same_array_bounds_for_schedule(nvsub, nvsub1, std, std1, fg, fg1))
2263     return;
2264 
2265   /* masks have to have contents */
2266   mask = FT_CGATHER_MASK(nd);
2267   mask1 = FT_CGATHER_MASK(nd1);
2268   if (mask != mask1)
2269     return;
2270   if (mask) {
2271     if (!is_avail_expr(mask, std, fg, std1, fg1))
2272       return;
2273   }
2274 
2275   asd = A_ASDG(left_subscript_ast(vsub));
2276   ndim = ASD_NDIM(asd);
2277 
2278   asd1 = A_ASDG(left_subscript_ast(vsub1));
2279   ndim1 = ASD_NDIM(asd1);
2280 
2281   if (ndim != ndim1)
2282     return;
2283 
2284   for (i = 0; i < ndim; i++) {
2285     v = FT_CGATHER_V(nd, i);
2286     v1 = FT_CGATHER_V(nd1, i);
2287     if (v != v1)
2288       return;
2289     if (v) {
2290       if (!is_avail_expr(v, std, fg, std1, fg1))
2291         return;
2292     }
2293     /* has to have same permute */
2294     per = FT_CGATHER_PERMUTE(nd, i);
2295     per1 = FT_CGATHER_PERMUTE(nd1, i);
2296     if (per != per1)
2297       return;
2298   }
2299 
2300   /* has to have the same lhs bounds */
2301   sub = FT_CGATHER_LHSSEC(nd);
2302   sub1 = FT_CGATHER_LHSSEC(nd1);
2303   if (sub && sub1 &&
2304       !is_same_array_bounds_for_schedule(sub, sub1, std, std1, fg, fg1)) {
2305     FT_CGATHER_NOTLHS(nd) = 1;
2306     FT_CGATHER_NOTLHS(nd1) = 1;
2307   }
2308 
2309   if (FT_CGATHER_NOTLHS(nd) || FT_CGATHER_NOTLHS(nd1)) {
2310     FT_CGATHER_NOTLHS(nd) = 1;
2311     FT_CGATHER_NOTLHS(nd1) = 1;
2312   }
2313 
2314   FT_CGATHER_SAME(nd1) = rt;
2315   FT_CGATHER_OUT(nd1) = FT_CGATHER_OUT(nd);
2316   FT_CGATHER_FREE(nd) = FT_CGATHER_FREE(nd1);
2317   FT_CGATHER_REUSE(nd) = 1;
2318   STD_DELETE(rt1_std) = 1;
2319   if (FT_CGATHER_TYPE(nd) == A_HGATHER)
2320     optsum.gatherx++;
2321   else
2322     optsum.scatterx++;
2323 }
2324 
2325 static void
eliminate_shift(int lp,int lp1,int rt_std,int rt1_std)2326 eliminate_shift(int lp, int lp1, int rt_std, int rt1_std)
2327 {
2328   int std, std1;
2329   int fg, fg1;
2330   int rt, rt1;
2331   int nd, nd1, nd3;
2332   int i;
2333   int ndim, asd;
2334   int ndim1, asd1;
2335   int count;
2336   int src, src1, srcl;
2337   int hd, hd1;
2338   int shift, shift1;
2339   int sptr, sptr1;
2340   int v, v1, cv, cv1, ns, ns1;
2341   int nmax, pmax;
2342   int sub[7];
2343   int new;
2344 
2345   if (XBIT(47, 0x100000))
2346     return;
2347   if (LP_PARENT(lp) != LP_PARENT(lp1))
2348     return;
2349   rt = STD_AST(rt_std);
2350   rt1 = STD_AST(rt1_std);
2351   assert(LP_FORALL(lp), "eliminate_shift: expecting forall", lp, 2);
2352   hd = LP_HEAD(lp);
2353   std = FG_STDFIRST(hd);
2354   fg = LP_FG(lp);
2355 
2356   assert(LP_FORALL(lp1), "eliminate_shift: expecting forall1", lp1, 2);
2357   hd1 = LP_HEAD(lp1);
2358   std1 = FG_STDFIRST(hd1);
2359   fg1 = LP_FG(lp1);
2360 
2361   shift = A_SRCG(rt);
2362   shift1 = A_SRCG(rt1);
2363   nd = A_OPT1G(shift);
2364   nd1 = A_OPT1G(shift1);
2365 
2366   if (FT_SHIFT_TYPE(nd1) != FT_SHIFT_TYPE(nd))
2367     return;
2368   if (FT_SHIFT_BOUNDARY(nd1) != FT_SHIFT_BOUNDARY(nd))
2369     return;
2370   if (FT_SHIFT_BOUNDARY(nd) &&
2371       !is_avail_expr(FT_SHIFT_BOUNDARY(nd), std, fg, std1, fg1))
2372     return;
2373   src = A_SRCG(shift);
2374   sptr = left_array_symbol(src);
2375 
2376   src1 = A_SRCG(shift1);
2377   sptr1 = left_array_symbol(src1);
2378 
2379   if (!is_same_array_alignment(sptr, sptr1))
2380     return;
2381 
2382   if (!is_same_array_shape(sptr, sptr1))
2383     return;
2384 
2385   /* has to have the same shift values */
2386   /* second shift has to be less than equal to first shift
2387    * at all dimension negative and pozitive direction
2388    */
2389 
2390   srcl = left_subscript_ast(src);
2391   asd = A_ASDG(srcl);
2392   asd1 = A_ASDG(left_subscript_ast(src1));
2393   ndim = ASD_NDIM(asd);
2394   ndim1 = ASD_NDIM(asd1);
2395   if (ndim != ndim1)
2396     return;
2397   for (i = 0; i < ndim; i++) {
2398     v = ASD_SUBS(asd, i);
2399     v1 = ASD_SUBS(asd1, i);
2400     assert(A_TYPEG(v) == A_TRIPLE, "eliminate_shift: expecting triple", v, 3);
2401     assert(A_TYPEG(v1) == A_TRIPLE, "eliminate_shift: expecting triple1", v1,
2402            3);
2403     ns = A_LBDG(v);
2404     ns1 = A_LBDG(v1);
2405     assert(A_TYPEG(ns) == A_CNST, "eliminate_shift:expecting constant", v, 3);
2406     assert(A_TYPEG(ns1) == A_CNST, "eliminate_shift:expecting constant1", v1,
2407            3);
2408     cv = get_int_cval(A_SPTRG(A_ALIASG(ns)));
2409     cv1 = get_int_cval(A_SPTRG(A_ALIASG(ns1)));
2410     nmax = ns;
2411     if (cv < cv1)
2412       nmax = ns1;
2413 
2414     ns = A_UPBDG(v);
2415     ns1 = A_UPBDG(v1);
2416     assert(A_TYPEG(ns) == A_CNST, "eliminate_shift:expecting constant2", v, 3);
2417     assert(A_TYPEG(ns1) == A_CNST, "eliminate_shift:expecting constant3", v, 3);
2418     cv = get_int_cval(A_SPTRG(A_ALIASG(ns)));
2419     cv1 = get_int_cval(A_SPTRG(A_ALIASG(ns1)));
2420     pmax = ns;
2421     if (cv < cv1)
2422       pmax = ns1;
2423     sub[i] = mk_triple(nmax, pmax, 0);
2424   }
2425 
2426   new = mk_subscr(A_LOPG(srcl), sub, ndim, DTY(DTYPEG(sptr) + 1));
2427   new = replace_ast_subtree(src, srcl, new);
2428   A_SRCP(shift, new);
2429 
2430   FT_SHIFT_SAME(nd1) = rt;
2431   FT_SHIFT_OUT(nd1) = FT_SHIFT_OUT(nd);
2432   if (is_dominator_fg(FT_SHIFT_FREE(nd), FT_SHIFT_FREE(nd1)))
2433     FT_SHIFT_FREE(nd) = FT_SHIFT_FREE(nd1);
2434   FT_SHIFT_REUSE(nd) = 1;
2435   STD_DELETE(rt1_std) = 1;
2436   optsum.shift++;
2437 }
2438 
2439 static void
eliminate_start(int lp,int lp1,int rt_std,int rt1_std)2440 eliminate_start(int lp, int lp1, int rt_std, int rt1_std)
2441 {
2442   int std, std1;
2443   int fg, fg1;
2444   int rt, rt1;
2445   int nd, nd1;
2446   int i;
2447   int sub, ndim, asd;
2448   int sub1, ndim1, asd1;
2449   int count;
2450   int sptr, sptr1;
2451   int ast, ast1;
2452   int hd, hd1;
2453   int start, start1;
2454   int cp, cp1;
2455   int lhs, lhs1;
2456   int rhs, rhs1;
2457   int comm, commstd;
2458   int comm1, commstd1;
2459   int asn, asn1;
2460   int nd3, nd4;
2461   int stype, stype1;
2462   int alloc_std, alloc;
2463   int nd5;
2464   int src, src1;
2465   int dest, dest1;
2466 
2467   rt = STD_AST(rt_std);
2468   rt1 = STD_AST(rt1_std);
2469   assert(LP_FORALL(lp), "eliminate_start: expecting forall", lp, 2);
2470   hd = LP_HEAD(lp);
2471   std = FG_STDFIRST(hd);
2472   fg = LP_FG(lp);
2473 
2474   assert(LP_FORALL(lp1), "eliminate_start: expecting forall1", lp1, 2);
2475   hd1 = LP_HEAD(lp1);
2476   std1 = FG_STDFIRST(hd1);
2477   fg1 = LP_FG(lp1);
2478 
2479   start = A_SRCG(rt);
2480   start1 = A_SRCG(rt1);
2481   nd = A_OPT1G(start);
2482   nd1 = A_OPT1G(start1);
2483 
2484   stype = FT_CSTART_TYPE(nd);
2485   stype1 = FT_CSTART_TYPE(nd1);
2486   if (stype != stype1)
2487     return;
2488 
2489   /* has to have same cp */
2490   commstd = FT_CSTART_COMM(nd);
2491   asn = STD_AST(commstd);
2492   comm = A_SRCG(asn);
2493   nd3 = A_OPT1G(comm);
2494   if (stype == A_HCOPYSECT)
2495     cp = FT_CCOPY_OUT(nd3);
2496   if (stype == A_HOVLPSHIFT)
2497     cp = FT_SHIFT_OUT(nd3);
2498   if (stype == A_HGATHER)
2499     cp = FT_CGATHER_OUT(nd3);
2500   if (stype == A_HSCATTER)
2501     cp = FT_CGATHER_OUT(nd3);
2502 
2503   commstd1 = FT_CSTART_COMM(nd1);
2504   asn1 = STD_AST(commstd1);
2505   comm1 = A_SRCG(asn1);
2506   nd4 = A_OPT1G(comm1);
2507   if (stype1 == A_HCOPYSECT)
2508     cp1 = FT_CCOPY_OUT(nd4);
2509   if (stype1 == A_HOVLPSHIFT)
2510     cp1 = FT_SHIFT_OUT(nd4);
2511   if (stype1 == A_HGATHER)
2512     cp1 = FT_CGATHER_OUT(nd4);
2513   if (stype1 == A_HSCATTER)
2514     cp1 = FT_CGATHER_OUT(nd4);
2515 
2516   if (cp != cp1)
2517     return;
2518 
2519   /* has to have the same source array */
2520   sub = A_SRCG(start);
2521   sub1 = A_SRCG(start1);
2522   ast = left_subscript_ast(sub);
2523   ast1 = left_subscript_ast(sub1);
2524   if (A_LOPG(ast) != A_LOPG(ast1))
2525     return;
2526 
2527   rhs = A_LOPG(FT_CSTART_RHS(nd1));
2528   if (std != std1)
2529     if (!is_avail_expr(rhs, std, fg, std1, fg1))
2530       return;
2531 
2532   /* scatterx needs also destination is same */
2533   if (stype1 == A_HSCATTER) {
2534     sub = A_DESTG(start);
2535     sub1 = A_DESTG(start1);
2536     ast = left_subscript_ast(sub);
2537     ast1 = left_subscript_ast(sub1);
2538     if (A_LOPG(ast) != A_LOPG(ast1))
2539       return;
2540   }
2541 
2542   src = A_SRCG(start);
2543   src1 = A_SRCG(start1);
2544 
2545   dest = A_DESTG(start);
2546   dest1 = A_DESTG(start1);
2547 
2548   if (stype == A_HCOPYSECT || stype == A_HGATHER || stype == A_HSCATTER) {
2549 
2550     /* has to have same destination distribution */
2551     sptr = left_array_symbol(dest);
2552     sptr1 = left_array_symbol(dest1);
2553     if (!is_same_array_alignment(sptr, sptr1))
2554       return;
2555 
2556     /* has to have same src distribution */
2557     sptr = left_array_symbol(src);
2558     sptr1 = left_array_symbol(src1);
2559     if (!is_same_array_alignment(sptr, sptr1))
2560       return;
2561 
2562     /* has to have the same dest bounds */
2563     if (!is_same_array_bounds(dest, dest1, std, std1, fg, fg1))
2564       return;
2565 
2566     /* has to have the same src bounds */
2567     if (!is_same_array_bounds(src, src1, std, std1, fg, fg1))
2568       return;
2569   }
2570 
2571   FT_CSTART_SAME(nd1) = rt;
2572   FT_CSTART_OUT(nd1) = FT_CSTART_OUT(nd);
2573   if (FT_CSTART_ALLOC(nd1) && FT_CSTART_ALLOC(nd)) {
2574     FT_CSTART_ALLOC(nd1) = alloc_std = FT_CSTART_ALLOC(nd);
2575     alloc = STD_AST(alloc_std);
2576     nd5 = A_OPT1G(alloc);
2577     if (is_dominator_fg(FT_ALLOC_FREE(nd5), FT_CSTART_FREE(nd1)))
2578       FT_ALLOC_FREE(nd5) = FT_CSTART_FREE(nd1);
2579     FT_ALLOC_USED(nd5) = 1;
2580   }
2581   if (is_dominator_fg(FT_CSTART_FREE(nd), FT_CSTART_FREE(nd1)))
2582     FT_CSTART_FREE(nd) = FT_CSTART_FREE(nd1);
2583   FT_CSTART_REUSE(nd) = 1;
2584   STD_DELETE(rt1_std) = 1;
2585   optsum.start++;
2586 }
2587 
2588 static void
eliminate_get_scalar(void)2589 eliminate_get_scalar(void)
2590 {
2591   int i, j;
2592   int src, src1;
2593   int ast, ast1;
2594   int fg, fg1;
2595   int commstd, commstd1;
2596   int rt, rt1;
2597   int nd, nd1;
2598 
2599   init_gstbl();
2600   find_get_scalar();
2601 
2602   for (i = 0; i < gstbl.avl; i++) {
2603     commstd = gstbl.base[i].f1;
2604     if (STD_DELETE(commstd))
2605       continue;
2606     rt = STD_AST(commstd);
2607     nd = A_OPT1G(rt);
2608     assert(A_TYPEG(rt) == A_HGETSCLR, "generate_get_scalar: wrong ast type", 2,
2609            rt);
2610     assert(nd, "generate_get_scalar: nd is 0", 2, rt);
2611     for (j = i + 1; j < gstbl.avl; j++) {
2612       commstd1 = gstbl.base[j].f1;
2613       if (STD_DELETE(commstd1))
2614         continue;
2615       rt1 = STD_AST(commstd1);
2616       src = A_SRCG(rt);
2617       src1 = A_SRCG(rt1);
2618       if (src != src1)
2619         continue;
2620 
2621       nd1 = A_OPT1G(rt1);
2622       fg = STD_FG(commstd);
2623       fg1 = STD_FG(commstd1);
2624       if (!is_dominator(fg, fg1))
2625         continue;
2626       if (!is_avail_expr(src, commstd, fg, commstd1, fg1))
2627         continue;
2628       FT_GETSCLR_SAME(nd1) = rt;
2629       FT_GETSCLR_REUSE(nd) = 1;
2630       STD_DELETE(commstd1) = 1;
2631     }
2632   }
2633   free_gstbl();
2634 }
2635 
2636 void
init_gstbl(void)2637 init_gstbl(void)
2638 {
2639   gstbl.size = 200;
2640   NEW(gstbl.base, TABLE, gstbl.size);
2641   gstbl.avl = 0;
2642 }
2643 
2644 void
free_gstbl(void)2645 free_gstbl(void)
2646 {
2647 
2648   FREE(gstbl.base);
2649 }
2650 
2651 int
get_gstbl(void)2652 get_gstbl(void)
2653 {
2654   int nd;
2655 
2656   nd = gstbl.avl++;
2657   NEED(gstbl.avl, gstbl.base, TABLE, gstbl.size, gstbl.size + 100);
2658   if (nd > SPTR_MAX || gstbl.base == NULL)
2659     errfatal(7);
2660   return nd;
2661 }
2662 
2663 void
init_brtbl(void)2664 init_brtbl(void)
2665 {
2666   brtbl.size = 200;
2667   NEW(brtbl.base, TABLE, brtbl.size);
2668   brtbl.avl = 0;
2669 }
2670 
2671 void
free_brtbl(void)2672 free_brtbl(void)
2673 {
2674   FREE(brtbl.base);
2675 }
2676 
2677 int
get_brtbl(void)2678 get_brtbl(void)
2679 {
2680   int nd;
2681 
2682   nd = brtbl.avl++;
2683   NEED(brtbl.avl, brtbl.base, TABLE, brtbl.size, brtbl.size + 100);
2684   if (nd > SPTR_MAX || brtbl.base == NULL)
2685     errfatal(7);
2686   return nd;
2687 }
2688 
2689 static void
comm_optimize_init(void)2690 comm_optimize_init(void)
2691 {
2692   optshrd_init();
2693   induction_init();
2694   optshrd_finit();
2695 }
2696 
2697 static void
comm_optimize_end(void)2698 comm_optimize_end(void)
2699 {
2700   optshrd_fend();
2701   optshrd_end();
2702   induction_end();
2703 }
2704 
2705 /* optimization table */
2706 
2707 void
init_ftb(void)2708 init_ftb(void)
2709 {
2710   ftb.size = 240;
2711   NEW(ftb.base, FT, ftb.size);
2712   ftb.avl = 1;
2713 }
2714 
2715 int
mk_ftb(void)2716 mk_ftb(void)
2717 {
2718   int nd;
2719 
2720   nd = ftb.avl++;
2721   NEED(ftb.avl, ftb.base, FT, ftb.size, ftb.size + 240);
2722   if (ftb.base == NULL)
2723     errfatal(7);
2724   return nd;
2725 }
2726 
2727 LITEMF *
clist(void)2728 clist(void)
2729 {
2730   LITEMF *list;
2731 
2732   list = (LITEMF *)getitem(FORALL_AREA, sizeof(LITEMF));
2733   list->nitem = 0;
2734   list->next = 0;
2735   return list;
2736 }
2737 
2738 void
plist(LITEMF * list,int item)2739 plist(LITEMF *list, int item)
2740 {
2741   LITEMF *listp, *last;
2742   assert(list, "plist: list is NULL", 0, 3);
2743   if (list->nitem == 0) {
2744     list->item = item;
2745     list->next = 0;
2746     list->nitem = 1;
2747     return;
2748   }
2749   for (listp = list; listp != 0; listp = listp->next)
2750     last = listp;
2751   listp = (LITEMF *)getitem(FORALL_AREA, sizeof(LITEMF));
2752   listp->item = item;
2753   listp->next = 0;
2754   last->next = listp;
2755   list->nitem++;
2756 }
2757 
2758 int
glist(LITEMF * list,int n)2759 glist(LITEMF *list, int n)
2760 {
2761   LITEMF *listp;
2762   int i;
2763 
2764   assert(list->nitem > n, "glist: nitem not >", n, 0);
2765   listp = list;
2766   for (i = 0; i < list->nitem; i++) {
2767     if (i == n)
2768       return listp->item;
2769     listp = listp->next;
2770   }
2771   return 0;
2772 }
2773 
2774 /* Is this item in the list? */
2775 LOGICAL
inlist(LITEMF * list,int item)2776 inlist(LITEMF *list, int item)
2777 {
2778   LITEMF *p;
2779   if (list->nitem == 0)
2780     return FALSE;  /* list->item is invalid */
2781   for (p = list; p != 0; p = p->next) {
2782     if (p->item == item)
2783       return TRUE;
2784   }
2785   return FALSE;
2786 }
2787 
2788 /* Dump this list of ints. */
2789 void
dlist(LITEMF * list)2790 dlist(LITEMF *list)
2791 {
2792   LITEMF *p;
2793   FILE *dfile = gbl.dbgfil ? gbl.dbgfil : stderr;
2794   fprintf(dfile, "List of %d items:", list->nitem);
2795   if (list->nitem > 0) {
2796     for (p = list; p != 0; p = p->next) {
2797       fprintf(dfile, " %d", p->item);
2798     }
2799   }
2800   fprintf(dfile, "\n");
2801 }
2802 
2803 static int
common_compute_point(LITEMF * nm_list,int fg,int std)2804 common_compute_point(LITEMF *nm_list, int fg, int std)
2805 {
2806   LITEMF *fg_list, *std_list;
2807   int i, j, n;
2808   int nme;
2809   int use;
2810   DU *du;
2811   int def;
2812   int fg1, fg2;
2813   int count;
2814 
2815   fg_list = clist();
2816   std_list = clist();
2817 
2818   n = nm_list->nitem;
2819   for (i = 0; i < n; i++) {
2820     nme = A_NMEG(glist(nm_list, i));
2821     def = NME_DEF(nme);
2822     if (def)
2823       for (du = DEF_DU(def); du != 0; du = du->next) {
2824         use = du->use;
2825         if (USE_STD(use) == std) {
2826           if (only_one_ud(use)) {
2827             plist(fg_list, DEF_FG(def));
2828             plist(std_list, DEF_STD(def));
2829           } else {
2830             plist(fg_list, fg);
2831             plist(fg_list, std);
2832           }
2833         }
2834       }
2835     else {
2836       plist(fg_list, 0);
2837       plist(std_list, 0);
2838     }
2839   }
2840 
2841   count = 0;
2842   for (i = 0; i < n; i++) {
2843     fg1 = glist(fg_list, i);
2844     count = 0;
2845     for (j = 0; j < n; j++) {
2846       fg2 = glist(fg_list, j);
2847       if (is_dominator(fg2, fg1))
2848         count++;
2849     }
2850     if (count == n)
2851       break;
2852   }
2853   if (glist(std_list, i) != std)
2854     return glist(std_list, i);
2855   else
2856     return STD_PREV(std);
2857 }
2858 
2859 /* This routine is to change all allocate statements into
2860  * allocate ast which is defined for hpf communication ast.
2861  * By this way, any allocate may benefit of comm_invar.
2862  */
2863 static void
alloc2ast(void)2864 alloc2ast(void)
2865 {
2866   int start, i;
2867   int std, stdnext;
2868   int allocast, newallocast;
2869   int deallocast, ast;
2870   int nd;
2871   LOGICAL found;
2872   int sptr;
2873 
2874   start = ftb.avl;
2875   for (std = STD_NEXT(0); std; std = stdnext) {
2876     stdnext = STD_NEXT(std);
2877     if (STD_IGNORE(std))
2878       continue;
2879     ast = STD_AST(std);
2880     if (A_TYPEG(ast) == A_ALLOC) {
2881       if (A_TKNG(ast) == TK_ALLOCATE) {
2882         allocast = STD_AST(std);
2883         ast = A_SRCG(allocast);
2884         if (A_TYPEG(ast) != A_SUBSCR)
2885           continue;
2886         if (A_TYPEG(A_LOPG(ast)) != A_ID)
2887           continue;
2888         sptr = A_SPTRG(A_LOPG(ast));
2889         if (!HCCSYMG(sptr))
2890           continue;
2891         if (ADJLENG(sptr))
2892           continue;
2893         newallocast = new_node(A_HALLOBNDS);
2894         A_LOPP(newallocast, ast);
2895 
2896         nd = mk_ftb();
2897         FT_STD(nd) = 0;
2898         FT_FORALL(nd) = 0;
2899         FT_ALLOC_SPTR(nd) = sptr;
2900         FT_ALLOC_FREE(nd) = 0;
2901         FT_ALLOC_SAME(nd) = 0;
2902         FT_ALLOC_REUSE(nd) = 0;
2903         FT_ALLOC_USED(nd) = 0;
2904         FT_ALLOC_OUT(nd) = sptr;
2905         A_OPT1P(newallocast, nd);
2906         STD_AST(std) = newallocast;
2907         A_STDP(newallocast, std);
2908         /*
2909            add_stmt_before(newallocast, std);
2910            delete_stmt(std);
2911          */
2912       } else {
2913         assert(A_TKNG(ast) == TK_DEALLOCATE, "alloc2ast: bad dealloc", std, 4);
2914         deallocast = STD_AST(std);
2915         ast = A_SRCG(deallocast);
2916         if (A_TYPEG(ast) != A_ID)
2917           continue;
2918         sptr = A_SPTRG(ast);
2919         if (!HCCSYMG(sptr) || STYPEG(sptr) != ST_ARRAY)
2920           continue;
2921         if (ADJLENG(sptr))
2922           continue;
2923         found = FALSE;
2924         for (i = start; i < ftb.avl; i++) {
2925           if (FT_ALLOC_FREE(i))
2926             continue;
2927           if (FT_ALLOC_SPTR(i) == sptr) {
2928             FT_ALLOC_FREE(i) = STD_PREV(std);
2929             FT_ALLOC_PTASGN(i) = STD_PTASGN(std);
2930             delete_stmt(std);
2931             found = TRUE;
2932           }
2933         }
2934         assert(found, "alloc2ast: missing allocate", std, 2);
2935       }
2936     }
2937   }
2938 }
2939 
2940 /** \brief Try to optimize allocate statements.
2941 
2942     If they have same bounds, give them the same bound variable.
2943     This will help the compiler to reduce # of communication
2944     for allocatable variables.
2945  */
2946 void
optimize_alloc(void)2947 optimize_alloc(void)
2948 {
2949   comm_optimize_init();
2950   flowgraph();      /* build the flowgraph for the function */
2951   postdominators(); /* need these as well */
2952 #if DEBUG
2953   if (DBGBIT(35, 1))
2954     dump_flowgraph();
2955 #endif
2956 
2957   findloop(HLOPT_ALL); /* find the loops */
2958 
2959   flow(); /* do flow analysis on the loops  */
2960 
2961 #if DEBUG
2962   if (DBGBIT(35, 4)) {
2963     dump_flowgraph();
2964     dump_loops();
2965   }
2966 #endif
2967   opt_allocate();
2968   comm_optimize_end();
2969 }
2970 
2971 static void
opt_allocate(void)2972 opt_allocate(void)
2973 {
2974   ADSC *ad;
2975   int allocast;
2976   int ast;
2977   int std;
2978   int sptr;
2979   int i;
2980   int ndim;
2981   int stdnext;
2982   int sub[MAXSUBS];
2983   LITEMF *defs_to_propagate = clist();
2984   LITEMF *shape_exceptions = clist(); /* don't propagate into these shapes */
2985 
2986   for (std = STD_NEXT(0); std; std = stdnext) {
2987     LOGICAL changed;
2988     int asd;
2989     stdnext = STD_NEXT(std);
2990     allocast = STD_AST(std);
2991     if (STD_PAR(std))
2992       continue;
2993     if (is_allocatable_assign(allocast)) {
2994       /* don't propagate shape bounds -- may be changed in transform() */
2995       int shape = A_SHAPEG(A_DESTG(allocast));
2996       plist(shape_exceptions, shape);
2997       continue;
2998     }
2999     if (A_TYPEG(allocast) != A_ALLOC)
3000       continue;
3001     if (A_TKNG(allocast) != TK_ALLOCATE)
3002       continue;
3003     ast = A_SRCG(allocast);
3004     if (A_TYPEG(ast) != A_SUBSCR)
3005       continue;
3006     /* member is busted -- lfm */
3007     if (A_TYPEG(A_LOPG(ast)) != A_ID)
3008       continue;
3009     sptr = A_SPTRG(A_LOPG(ast));
3010     /* pointer lb, ub is not A_ID,
3011        it may array static_descriptor */
3012     if (NOALLOOPTG(sptr))
3013       continue;
3014     if (POINTERG(sptr))
3015       continue;
3016     if (CMBLKG(sptr))
3017       continue;
3018     if (MDALLOCG(sptr))
3019       continue;
3020     if (SAVEG(sptr))
3021       continue;
3022 
3023     /* a SAVEd allocatable will not appear itself in a common block,
3024      * but its pointer offset variable will. */
3025     if (PTROFFG(sptr) && SCG(PTROFFG(sptr)) == SC_CMBLK)
3026       continue;
3027 
3028     /* put bounds into NME table */
3029     ad = AD_DPTR(DTYPEG(sptr));
3030     ndim = rank_of_sym(sptr);
3031     asd = A_ASDG(ast);
3032 
3033     changed = FALSE;  /* did any bounds change? */
3034     for (i = 0; i < ndim; i++) {
3035       int lw, up, lw2, up2;
3036       int ss = ASD_SUBS(asd, i);
3037       if (A_TYPEG(ss) == A_TRIPLE) {
3038         lw = A_LBDG(ss);
3039         up = A_UPBDG(ss);
3040       } else {
3041         lw = astb.i1;
3042         up = ss;
3043       }
3044       lw2 = propagate_bound(defs_to_propagate, lw);
3045       up2 = propagate_bound(defs_to_propagate, up);
3046       if (lw2 != lw || up2 != up) {
3047         sub[i] = mk_triple(lw2, up2, 0);
3048         changed = TRUE;
3049       } else {
3050         sub[i] = ss;
3051       }
3052     }
3053 
3054     /* change allocate too */
3055     if (changed) {
3056       int new = mk_subscr(mk_id(sptr), sub, ndim, DTY(DTYPEG(sptr) + 1));
3057       A_SRCP(allocast, new);
3058     }
3059 
3060     /* optmize allocatable alignment */
3061   }
3062 
3063   ast_visit(1, 1);
3064   for (i = 0; i < defs_to_propagate->nitem; i++) {
3065     int def = glist(defs_to_propagate, i);
3066     int stddef = DEF_STD(def);
3067     int astdef = STD_AST(stddef);
3068     int src, dest;
3069     assert(A_TYPEG(astdef) == A_ASN, "expecting ASN ast", astdef, ERR_Fatal);
3070     src = A_SRCG(astdef);
3071     dest = A_DESTG(astdef);
3072     ast_replace(dest, src);
3073   }
3074 
3075   /* change all shape*/
3076   rewrite_all_shape(shape_exceptions);
3077   ast_unvisit();
3078   freearea(FORALL_AREA);
3079 }
3080 
3081 /* Is this an assignment with F2003 allocatable semantics? */
3082 static LOGICAL
is_allocatable_assign(int ast)3083 is_allocatable_assign(int ast)
3084 {
3085   int dest, src;
3086   LOGICAL dest_is_mem = FALSE;
3087   if (A_TYPEG(ast) != A_ASN) return FALSE;
3088   dest = A_DESTG(ast);
3089   src = A_SRCG(ast);
3090   while (A_TYPEG(dest) == A_MEM) {
3091     dest = A_MEMG(dest);
3092     dest_is_mem = TRUE;
3093   }
3094   if (!dest_is_mem && !XBIT(54, 0x1))
3095     return FALSE;
3096   while (A_TYPEG(src) == A_MEM) {
3097     src = A_MEMG(src);
3098   }
3099   if (A_TYPEG(dest) == A_ID && A_TYPEG(src) == A_ID) {
3100     int dest_sym = sym_of_ast(dest);
3101     if (ALLOCATTRG(dest_sym)) {
3102       return TRUE;
3103     }
3104   }
3105   return FALSE;
3106 }
3107 
3108 /* If possible, propagate the assignment to bound, add it to defs_to_propagate
3109  * and return the new value. */
3110 static int
propagate_bound(LITEMF * defs_to_propagate,int bound)3111 propagate_bound(LITEMF *defs_to_propagate, int bound)
3112 {
3113   if (A_TYPEG(bound) == A_ID) {
3114     int nme = addnme(NT_VAR, A_SPTRG(bound), 0, (INT)0);
3115     int def = NME_DEF(nme);
3116     if (is_safe_copy(def)) {
3117       int std = DEF_STD(def);
3118       int ast = STD_AST(std);
3119       int sptr;
3120       assert(A_TYPEG(ast) == A_ASN, "expecting ASN ast", ast, ERR_Fatal);
3121       sptr = sym_of_ast(A_DESTG(ast));
3122       if (!GSCOPEG(sptr))
3123         plist(defs_to_propagate, def);
3124       return A_SRCG(ast);
3125     }
3126   }
3127   return bound;
3128 }
3129 
3130 /* this routine is to decide whether it is safe to
3131  * propagate the definition for an array bound variable to its use
3132  *  example, z_b_1=n*m
3133  * The list of definitions for the variable is passed as 'def'.
3134  * The requirements are:
3135  *  1-) All definitions are assignments, with the same RHS
3136  *  2-) All assignments to variables used in the RHS must
3137  *      be post-dominated by a dominator of these uses
3138  */
3139 static LOGICAL
is_safe_copy(int deflist)3140 is_safe_copy(int deflist)
3141 {
3142   int std, fg, ast, src, dest;
3143   int def, defstd, deffg, defast;
3144   int nvar, onlyvar, a[10];
3145   int v, defv;
3146   int stdv, fgv;
3147   int i;
3148 
3149   if (deflist == 0)
3150     return FALSE;
3151   std = DEF_STD(deflist);
3152   fg = DEF_FG(deflist);
3153 
3154   /* must be A_ASN */
3155   ast = STD_AST(std);
3156   if (A_TYPEG(ast) != A_ASN)
3157     return FALSE;
3158   src = A_SRCG(ast);
3159   dest = A_DESTG(ast);
3160 
3161   for (def = deflist; def; def = DEF_NEXT(def)) {
3162     defstd = DEF_STD(def);
3163     deffg = DEF_FG(def);
3164     defast = STD_AST(defstd);
3165     if (A_TYPEG(defast) != A_TYPEG(ast))
3166       return FALSE;
3167     if (A_SRCG(defast) != src)
3168       return FALSE;
3169     if (A_DESTG(defast) != dest)
3170       return FALSE;
3171   }
3172 
3173   /* at this point, all assignments have the same RHS */
3174 
3175   if (A_TYPEG(src) != A_CNST) {
3176     /* decompose the expression into the variables that comprise it */
3177     nvar = 0;
3178     onlyvar = 1;
3179 
3180     decompose_expression(src, a, 10, &nvar, &onlyvar);
3181 
3182     if (nvar > 10) /* too many variables in RHS? */
3183       return FALSE;
3184     if (!onlyvar) /* something complex in RHS? */
3185       return FALSE;
3186     for (i = 0; i < nvar; ++i) {
3187       v = a[i];
3188       v = addnme(NT_VAR, A_SPTRG(v), 0, (INT)0); /* find NME */
3189       /* go through defs of this variable;
3190        * all the uses here must be dominated by a postdominator
3191        * of the definition of the variable.
3192        * This allows:
3193        *    n = xxx
3194        *    if(..)then
3195        *      m = yyy
3196        *    endif
3197        *    z_b_1 = n * m
3198        *    allocate(foo(z_b_1))
3199        * but disallows:
3200        *    z_b_1 = n * m
3201        *    allocate(foo(z_b_1))
3202        *    n = xxx
3203        *    if(..)then
3204        *      m = yyy
3205        *    endif
3206        */
3207 
3208       for (defv = NME_DEF(v); defv; defv = DEF_NEXT(defv)) {
3209         stdv = DEF_STD(defv);
3210         fgv = DEF_FG(defv);
3211         for (def = deflist; def; def = DEF_NEXT(def)) {
3212           defstd = DEF_STD(def);
3213           deffg = DEF_FG(def);
3214           if (!must_follow(fgv, stdv, deffg, defstd))
3215             return FALSE;
3216         }
3217       }
3218     }
3219   }
3220   return TRUE;
3221 } /* is_safe_copy */
3222 
3223 static LOGICAL
is_same_def(int def,int def1)3224 is_same_def(int def, int def1)
3225 {
3226   int std, std1;
3227   int fg, fg1;
3228   int next, next1;
3229   int ast, ast1;
3230   int expr, expr1;
3231 
3232   if (def == 0 || def1 == 0)
3233     return FALSE;
3234   std = DEF_STD(def);
3235   std1 = DEF_STD(def1);
3236   fg = DEF_FG(def);
3237   fg1 = DEF_FG(def1);
3238 
3239   next = DEF_NEXT(def);
3240   next1 = DEF_NEXT(def1);
3241 
3242   /* there should be only one defs */
3243   if (next || next1)
3244     return FALSE;
3245 
3246   /* they have to be A_ASN and same A_SRC */
3247   ast = STD_AST(std);
3248   ast1 = STD_AST(std1);
3249   if (A_TYPEG(ast) != A_ASN || A_TYPEG(ast1) != A_ASN)
3250     return FALSE;
3251   expr = A_SRCG(ast);
3252   expr1 = A_SRCG(ast1);
3253   if (expr != expr1)
3254     return FALSE;
3255 
3256   /* value is not changed between them */
3257   if (!is_dominator(fg, fg1))
3258     return FALSE;
3259   if (!is_avail_expr(expr, std, fg, std1, fg1))
3260     return FALSE;
3261 
3262   return TRUE;
3263 }
3264 
3265 /* This routine is checks that def has only one definition and
3266  * src of that definition is a constant; if so, it returns
3267  * the ast of the difference, else it returns 'defaultval'
3268  */
3269 static int
diff_def_cnst(int cnstAst,int def,int defaultval)3270 diff_def_cnst(int cnstAst, int def, int defaultval)
3271 {
3272   int std;
3273   int fg;
3274   int next;
3275   int ast;
3276   int expr;
3277   int condef, conast;
3278 
3279   if (def == 0)
3280     return defaultval;
3281   std = DEF_STD(def);
3282   fg = DEF_FG(def);
3283   next = DEF_NEXT(def);
3284   /* there should be only one defs */
3285   if (next)
3286     return defaultval;
3287 
3288   /* they have to be A_ASN and same A_SRC */
3289   ast = STD_AST(std);
3290   if (A_TYPEG(ast) != A_ASN)
3291     return defaultval;
3292   expr = A_SRCG(ast);
3293   if (A_ALIASG(expr))
3294     expr = A_ALIASG(expr);
3295   if (A_TYPEG(expr) != A_CNST)
3296     return defaultval;
3297   if (A_DTYPEG(expr) != DT_INT)
3298     return defaultval;
3299   condef = A_SPTRG(expr);
3300   condef = CONVAL2G(condef);
3301   conast = A_SPTRG(cnstAst);
3302   conast = CONVAL2G(conast);
3303   condef = conast - condef;
3304   ast = mk_cval(condef, DT_INT);
3305 
3306   return ast;
3307 }
3308 
3309 static void
rewrite_all_shape(LITEMF * exceptions)3310 rewrite_all_shape(LITEMF *exceptions)
3311 {
3312   int shape;
3313   int ndim;
3314   int ii, i;
3315   int old_lwb, old_upb, old_st;
3316   int new_lwb, new_upb, new_st;
3317   int old_sptr, new_sptr;
3318 
3319   for (ii = 1; ii < MAXDIMS; ii++) {
3320     ndim = ii;
3321     /* search the existing SHDs with the same number of dimensions
3322      */
3323     for (shape = astb.shd.hash[ndim - 1]; shape; shape = SHD_NEXT(shape)) {
3324       if (inlist(exceptions, shape))
3325         continue;
3326       for (i = 0; i < ndim; i++) {
3327         old_lwb = SHD_LWB(shape, i);
3328         old_upb = SHD_UPB(shape, i);
3329         old_st = SHD_STRIDE(shape, i);
3330         new_lwb = ast_rewrite(SHD_LWB(shape, i));
3331         new_upb = ast_rewrite(SHD_UPB(shape, i));
3332         new_st = ast_rewrite(SHD_STRIDE(shape, i));
3333         SHD_LWB(shape, i) = new_lwb;
3334         SHD_UPB(shape, i) = new_upb;
3335         SHD_STRIDE(shape, i) = new_st;
3336         if (flg.smp) {
3337           if (A_TYPEG(old_lwb) == A_ID) {
3338             old_sptr = sym_of_ast(old_lwb);
3339             new_sptr = 0;
3340             if (ast_is_sym(new_lwb)) {
3341               new_sptr = sym_of_ast(new_lwb);
3342             }
3343             if (new_sptr && new_sptr != old_sptr &&
3344                 PARREFG(old_sptr) && STYPEG(new_sptr) != ST_CONST) {
3345               set_parref_flag2(new_sptr, old_sptr, 0);
3346             }
3347           }
3348           if (A_TYPEG(old_upb) == A_ID) {
3349             old_sptr = sym_of_ast(old_upb);
3350             new_sptr = 0;
3351             if (ast_is_sym(new_upb)) {
3352               new_sptr = sym_of_ast(new_upb);
3353             }
3354             if (new_sptr &&  new_sptr != old_sptr &&
3355                 PARREFG(old_sptr) && STYPEG(new_sptr) != ST_CONST) {
3356               set_parref_flag2(new_sptr, old_sptr, 0);
3357             }
3358           }
3359           if (A_TYPEG(old_st) == A_ID) {
3360             old_sptr = sym_of_ast(old_st);
3361             new_sptr = 0;
3362             if (ast_is_sym(new_st)) {
3363               new_sptr = sym_of_ast(new_st);
3364             }
3365             if (new_sptr && new_sptr != old_sptr &&
3366                 PARREFG(old_sptr) && STYPEG(new_sptr) != ST_CONST) {
3367               set_parref_flag2(new_sptr, old_sptr, 0);
3368             }
3369           }
3370         }
3371       }
3372     }
3373   }
3374 }
3375 
3376 static void
decompose_expression(int expr,int a[],int size,int * nvar,int * onlyvar)3377 decompose_expression(int expr, int a[], int size, int *nvar, int *onlyvar)
3378 {
3379   int i;
3380   int asd;
3381   int ndim, n;
3382   int arr;
3383   int argt;
3384 
3385   if (expr == 0)
3386     return;
3387   switch (A_TYPEG(expr)) {
3388   case A_ID:
3389     if (*nvar >= size) {
3390       *nvar = size + 1;
3391       return;
3392     }
3393     a[*nvar] = expr;
3394     (*nvar)++;
3395     return;
3396   case A_BINOP:
3397     decompose_expression(A_LOPG(expr), a, size, nvar, onlyvar);
3398     decompose_expression(A_ROPG(expr), a, size, nvar, onlyvar);
3399     return;
3400   case A_CONV:
3401   case A_UNOP:
3402   case A_PAREN:
3403     decompose_expression(A_LOPG(expr), a, size, nvar, onlyvar);
3404     return;
3405   case A_LABEL:
3406   case A_CMPLXC:
3407   case A_CNST:
3408     return;
3409   case A_MEM:
3410     decompose_expression((int)A_PARENTG(expr), a, size, nvar, onlyvar);
3411     return;
3412   case A_SUBSTR:
3413     if (onlyvar) {
3414       *onlyvar = 0;
3415       return;
3416     }
3417     decompose_expression((int)A_LOPG(expr), a, size, nvar, onlyvar);
3418     decompose_expression((int)A_LEFTG(expr), a, size, nvar, onlyvar);
3419     decompose_expression((int)A_RIGHTG(expr), a, size, nvar, onlyvar);
3420     return;
3421   case A_ICALL:
3422   case A_INTR:
3423   case A_FUNC:
3424     if (onlyvar) {
3425       *onlyvar = 0;
3426       return;
3427     }
3428     argt = A_ARGSG(expr);
3429     n = A_ARGCNTG(expr);
3430     for (i = 0; i < n; ++i)
3431       decompose_expression(ARGT_ARG(argt, i), a, size, nvar, onlyvar);
3432     return;
3433   case A_TRIPLE:
3434     if (onlyvar) {
3435       *onlyvar = 0;
3436       return;
3437     }
3438     decompose_expression(A_LBDG(expr), a, size, nvar, onlyvar);
3439     decompose_expression(A_UPBDG(expr), a, size, nvar, onlyvar);
3440     decompose_expression(A_STRIDEG(expr), a, size, nvar, onlyvar);
3441     return;
3442   case A_SUBSCR:
3443     if (onlyvar) {
3444       *onlyvar = 0;
3445       return;
3446     }
3447     arr = A_LOPG(expr);
3448     decompose_expression(A_LOPG(expr), a, size, nvar, onlyvar);
3449     asd = A_ASDG(expr);
3450     ndim = ASD_NDIM(asd);
3451     for (i = 0; i < ndim; i++)
3452       decompose_expression(ASD_SUBS(asd, i), a, size, nvar, onlyvar);
3453     return;
3454   default:
3455     interr("decompose_expression: unknown type", expr, 3);
3456     return;
3457   }
3458 }
3459 
3460 /** \brief Put forall calls into forall tables.
3461 
3462     FT_MCALL will have calls used in the mask.<br>
3463     FT_SCALL will have calls used in the statement.<br>
3464     These later will be used to parallel calls.
3465  */
3466 void
put_forall_pcalls(int fstd)3467 put_forall_pcalls(int fstd)
3468 {
3469   int forall;
3470   int topstd;
3471   int i, j;
3472   int mask, expr;
3473   int nd, nd1, nd2;
3474   int nargs, argt, arg;
3475   int pstd, past, psptr;
3476   int pstd1, past1, psptr1;
3477   int asn;
3478 
3479   forall = STD_AST(fstd);
3480   assert(A_TYPEG(forall) == A_FORALL, "put_forall_pcalls: must be forall",
3481          forall, 4);
3482 
3483   mask = A_IFEXPRG(forall);
3484   asn = A_IFSTMTG(forall);
3485   nd = A_OPT1G(forall);
3486   topstd = A_SRCG(forall);
3487   if (!topstd)
3488     return;
3489   if (A_ARRASNG(forall))
3490     return;
3491   /* put statements calls */
3492   for (i = topstd; i != fstd; i = STD_NEXT(i)) {
3493     if (is_pcalls(i, fstd)) {
3494       plist(FT_PCALL(nd), i);
3495       STD_PURE(i) = TRUE;
3496       FT_NPCALL(nd)++;
3497     }
3498   }
3499 
3500   for (i = 0; i < FT_NPCALL(nd); i++) {
3501     pstd = glist(FT_PCALL(nd), i);
3502     past = STD_AST(pstd);
3503     nargs = A_ARGCNTG(past);
3504     argt = A_ARGSG(past);
3505     arg = ARGT_ARG(argt, 0);
3506     switch (A_TYPEG(arg)) { /* FS#18714 - skip over non-symbol arg */
3507     case A_ID:
3508     case A_LABEL:
3509     case A_ENTRY:
3510     case A_SUBSCR:
3511     case A_SUBSTR:
3512     case A_MEM:
3513       break;
3514     default:
3515       continue;
3516     }
3517     psptr = sym_of_ast(arg);
3518     if (mask && expr_dependent(mask, arg, fstd, fstd)) {
3519       plist(FT_MCALL(nd), pstd);
3520       FT_NMCALL(nd)++;
3521     } else if (expr_dependent(A_SRCG(asn), arg, fstd, fstd) ||
3522                expr_dependent(A_DESTG(asn), arg, fstd, fstd)) {
3523       plist(FT_SCALL(nd), pstd);
3524       FT_NSCALL(nd)++;
3525     }
3526   }
3527 
3528   for (i = 0; i < FT_NPCALL(nd); i++) {
3529     pstd = glist(FT_PCALL(nd), i);
3530     past = STD_AST(pstd);
3531     nargs = A_ARGCNTG(past);
3532     argt = A_ARGSG(past);
3533     arg = ARGT_ARG(argt, 0);
3534     switch (A_TYPEG(arg)) { /* FS#18714 - skip over non-symbol arg */
3535     case A_ID:
3536     case A_LABEL:
3537     case A_ENTRY:
3538     case A_SUBSCR:
3539     case A_SUBSTR:
3540     case A_MEM:
3541       break;
3542     default:
3543       continue;
3544     }
3545     psptr = sym_of_ast(arg);
3546     nd1 = mk_ftb();
3547     FT_CALL_SPTR(nd1) = psptr;
3548     /* don't distribute PURE result */
3549     FT_CALL_NCALL(nd1) = 0;
3550     FT_CALL_CALL(nd1) = clist();
3551     FT_CALL_POS(nd1) = 0;
3552     A_OPT1P(past, nd1);
3553   }
3554 
3555   for (i = 0; i < FT_NPCALL(nd); i++) {
3556     pstd = glist(FT_PCALL(nd), i);
3557     past = STD_AST(pstd);
3558     nd1 = A_OPT1G(past);
3559     if (nd1 == 0)
3560       continue;
3561     psptr = FT_CALL_SPTR(nd1);
3562     for (j = 0; j < FT_NPCALL(nd); j++) {
3563       if (i == j)
3564         continue;
3565       pstd1 = glist(FT_PCALL(nd), j);
3566       past1 = STD_AST(pstd1);
3567       nd2 = A_OPT1G(past1);
3568       if (contains_ast(past1, mk_id(psptr))) {
3569         plist(FT_CALL_CALL(nd2), pstd);
3570         FT_CALL_NCALL(nd2)++;
3571       }
3572     }
3573   }
3574 }
3575 
3576 /* This checks:
3577  * Whether std is call which may be pure
3578  */
3579 static LOGICAL
is_pcalls(int std,int fstd)3580 is_pcalls(int std, int fstd)
3581 {
3582   int ast;
3583   int sptr;
3584   ast = STD_AST(std);
3585   if (A_TYPEG(ast) == A_CALL) {
3586     sptr = A_SPTRG(A_LOPG(ast));
3587     if (is_impure(sptr))
3588       error(488, 4, STD_LINENO(fstd), "subprogram call in FORALL",
3589             SYMNAME(sptr));
3590     else
3591       return TRUE;
3592   }
3593   if (A_TYPEG(ast) == A_ICALL)
3594     return TRUE;
3595   return FALSE;
3596 }
3597 
3598 static void
forall_make_same_idx(int std)3599 forall_make_same_idx(int std)
3600 {
3601 
3602   int idx[7];
3603   int list, list1, listp;
3604   int forall;
3605   int nidx;
3606   int isptr, isptr1;
3607   int dtype;
3608   int i;
3609   int nd, nd1;
3610   int oldast, newast;
3611   int newforall;
3612   int af, st, nc, bd;
3613   int cnt;
3614   int ip, pstd, past;
3615   LITEMF *plist;
3616 
3617   forall = STD_AST(std);
3618   assert(A_TYPEG(forall) == A_FORALL, "make_same_idx: not forall", 2, forall);
3619   list = A_LISTG(forall);
3620   nidx = 0;
3621   for (listp = list; listp != 0; listp = ASTLI_NEXT(listp)) {
3622     idx[nidx] = listp;
3623     nidx++;
3624   }
3625   assert(nidx <= 7, "make_same_idx: illegal forall", 2, forall);
3626 
3627   /* if it is already changed, don't do any thing */
3628   cnt = 0;
3629   for (i = 0; i < nidx; i++) {
3630     isptr1 = ASTLI_SPTR(idx[i]);
3631     dtype = DTYPEG(isptr1);
3632     isptr = get_init_idx(i, dtype);
3633     if (isptr == isptr1)
3634       cnt++;
3635     if (STD_TASK(std) && SCG(isptr) == SC_PRIVATE) {
3636       TASKP(isptr, 1);
3637     }
3638   }
3639   if (cnt == nidx)
3640     return;
3641   ast_visit(1, 1);
3642 
3643   /* change forall */
3644   for (i = 0; i < nidx; i++) {
3645     isptr = ASTLI_SPTR(idx[i]);
3646     dtype = DTYPEG(isptr);
3647     oldast = mk_id(isptr);
3648     isptr = get_init_idx(i, dtype);
3649     newast = mk_id(isptr);
3650     if (STD_TASK(std) && SCG(isptr) == SC_PRIVATE) {
3651       TASKP(isptr, 1);
3652     }
3653     ast_replace(oldast, newast);
3654   }
3655 
3656   nd = A_OPT1G(forall);
3657   af = A_ARRASNG(forall);
3658   st = A_STARTG(forall);
3659   nc = A_NCOUNTG(forall);
3660   bd = A_CONSTBNDG(forall);
3661   newforall = ast_rewrite(forall);
3662   A_OPT1P(newforall, nd);
3663   A_ARRASNP(newforall, af);
3664   A_STARTP(newforall, st);
3665   A_NCOUNTP(newforall, nc);
3666   A_CONSTBNDP(newforall, bd);
3667 
3668   A_STDP(newforall, std);
3669   STD_AST(std) = newforall;
3670 
3671   /* change also pcalls */
3672   plist = FT_PCALL(nd);
3673   for (ip = 0; ip < FT_NPCALL(nd); ip++) {
3674     pstd = plist->item;
3675     plist = plist->next;
3676     past = STD_AST(pstd);
3677     nd1 = A_OPT1G(past);
3678     past = ast_rewrite(past);
3679     A_OPT1P(past, nd1);
3680     STD_AST(pstd) = past;
3681     A_STDP(past, pstd);
3682   }
3683   ast_unvisit();
3684 }
3685 
3686 /* SHARED MEMORY OPTIMIZATION START HERE */
3687 
3688 /** \brief Put all get_scalar into table to easily process */
3689 void
find_get_scalar(void)3690 find_get_scalar(void)
3691 {
3692   int std, stdnext;
3693   int ast;
3694   int i;
3695   int type;
3696   int nd;
3697 
3698   for (std = STD_NEXT(0); std; std = stdnext) {
3699     stdnext = STD_NEXT(std);
3700     ast = STD_AST(std);
3701     type = A_TYPEG(ast);
3702     if (type != A_HGETSCLR)
3703       continue;
3704     nd = A_OPT1G(ast);
3705     if (nd == 0) {
3706       nd = mk_ftb();
3707       FT_STD(nd) = 0;
3708       FT_GETSCLR_SAME(nd) = 0;
3709       FT_GETSCLR_REUSE(nd) = 0;
3710       FT_GETSCLR_NOMEM(nd) = 0;
3711       FT_GETSCLR_OMEMED(nd) = 0;
3712     }
3713     A_OPT1P(ast, nd);
3714     i = get_gstbl();
3715     gstbl.base[i].f1 = std;
3716   }
3717 }
3718