1 /*
2  * Copyright (c) 1994-2018, NVIDIA CORPORATION.  All rights reserved.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  *
16  */
17 
18 /**
19    \file
20    \brief Fortran communications module
21  */
22 
23 #include "comm.h"
24 #include "gbldefs.h"
25 #include "global.h"
26 #include "error.h"
27 #include "symtab.h"
28 #include "symutl.h"
29 #include "dtypeutl.h"
30 #include "soc.h"
31 #include "semant.h"
32 #include "ast.h"
33 #include "gramtk.h"
34 #include "extern.h"
35 #include "hpfutl.h"
36 #include "commopt.h"
37 #include "ccffinfo.h"
38 #include "dinit.h"
39 #include "direct.h"
40 #include "rte.h"
41 #include "rtlRtns.h"
42 
43 struct cs_table {
44   LOGICAL is_used_lhs;
45 };
46 
47 static struct cs_table cs_table;
48 
49 static void comm_init(void);
50 static void transform_ptr(int std, int ast);
51 static int normalize_forall_triplet(int std, int forall);
52 static void emit_overlap(int a);
53 static int emit_permute_section(int a, int std);
54 static int eliminate_extra_idx(int lhs, int a, int forall);
55 static int emit_copy_section(int a, int std);
56 static int canonical_conversion(int ast);
57 static void forall_dependency_scalarize(int std, int *std1, int *std2);
58 static LOGICAL is_use_lhs(int a, LOGICAL, LOGICAL, int);
59 static int emit_gatherx(int a, int std, LOGICAL opt);
60 static void fix_guard_forall(int std);
61 static void emit_sum_scatterx(int);
62 static void emit_scatterx(int);
63 static void emit_scatterx_gatherx(int std, int result, int array, int mask,
64                                   int allocstd, int tempast0, int lhssec,
65                                   int comm_type);
66 static void compute_permute(int lhs, int rhs, int list, int order[7]);
67 static int put_data(int permute[7], int no);
68 static LOGICAL is_permuted(int array, int per[7], int per1[7], int *nper1);
69 static int scalar_communication(int ast, int std);
70 static int tag_call_comm(int std, int forall);
71 static void call_comm(int cstd, int fstd, int forall);
72 static void insert_call_comm(int std, int forall);
73 static void put_call_comm(int cstd, int fstd, int forall);
74 static void shape_communication(int std, int forall);
75 static void shape_comm(int cstd, int fstd, int forall);
76 static int sequentialize_mask_call(int forall, int stdnext);
77 static int sequentialize_stmt_call(int forall, int stdnext);
78 static int sequentialize_call(int cstd, int stdnext, int forall);
79 static int gen_shape_comm(int arg, int forall, int std, int nomask);
80 static int reference_for_pure_temp(int sptr, int lhs, int arg, int forall);
81 static void init_pertbl(void);
82 static void free_pertbl(void);
83 static int get_pertbl(void);
84 static int copy_section_temp_before(int sptr, int rhs, int forall);
85 static CTYPE *getcyclic(void);
86 static void init_opt_tables(void);
87 static LOGICAL is_scatter(int std);
88 static void opt_overlap(void);
89 static int insert_forall_comm(int ast);
90 static int construct_list_for_pure(int arg, int mask, int list);
91 static LOGICAL is_pure_temp_too_large(int list, int arg);
92 static int handle_pure_temp_too_large(int expr, int std);
93 static int forall_2_sec(int a, int forall);
94 static int make_sec_ast(int arr, int std, int allocstd, int sectflag);
95 static int temp_copy_section(int std, int forall, int lhs, int rhs, int dty,
96                              int *allocast);
97 static int temp_gatherx(int std, int forall, int lhs, int rhs, int dty,
98                         int *allocast);
99 static int gatherx_temp_before(int sptr, int rhs, int forall);
100 static int simple_reference_for_temp(int sptr, int a, int forall);
101 
102 /**
103    \brief Finalize the phase and free allocated memory.
104  */
105 void
comm_fini(void)106 comm_fini(void)
107 {
108   TRANS_FREE(trans.subb);
109   trans.subb.stg_base = NULL;
110   TRANS_FREE(trans.arrb);
111   trans.subb.stg_base = NULL;
112   TRANS_FREE(trans.tdescb);
113   trans.tdescb.stg_base = NULL;
114   FREE(finfot.base);
115   finfot.base = NULL;
116   free_pertbl();
117 }
118 
119 /**
120    \brief Communication analyzer entry point.
121  */
122 void
comm_analyze(void)123 comm_analyze(void)
124 {
125   int std, stdnext;
126   int ast;
127   int lhs, sptr;
128   int endmasterstd, endcriticalstd;
129   int parallel_depth;
130   int task_depth;
131   int type;
132 
133   comm_init();
134   init_region();
135   parallel_depth = 0;
136   task_depth = 0;
137   for (std = STD_NEXT(0); std; std = stdnext) {
138     stdnext = STD_NEXT(std);
139     gbl.lineno = STD_LINENO(std);
140     if (STD_PURE(std))
141       continue;
142     if (STD_LOCAL(std) || pure_gbl.end_master_region != 0)
143       pure_gbl.local_mode = 1; /* don't process for DO-INDEPENDENT */
144     else
145       pure_gbl.local_mode = 0;
146     ast = STD_AST(std);
147     switch (type = A_TYPEG(ast)) {
148     case A_MP_PARALLEL:
149       ++parallel_depth;
150       /*symutl.sc = SC_PRIVATE;*/
151       set_descriptor_sc(SC_PRIVATE);
152       break;
153     case A_MP_ENDPARALLEL:
154       --parallel_depth;
155       if (parallel_depth == 0 && task_depth == 0) {
156         /*symutl.sc = SC_LOCAL;*/
157         set_descriptor_sc(SC_LOCAL);
158       }
159       break;
160     case A_MP_TASKLOOPREG:
161     case A_MP_ETASKLOOPREG:
162       break;
163     case A_MP_TASK:
164     case A_MP_TASKLOOP:
165       ++task_depth;
166       set_descriptor_sc(SC_PRIVATE);
167       break;
168     case A_MP_ENDTASK:
169     case A_MP_ETASKLOOP:
170       --task_depth;
171       if (parallel_depth == 0 && task_depth == 0) {
172         set_descriptor_sc(SC_LOCAL);
173       }
174       break;
175     default:
176       break;
177     }
178     if (type == A_FORALL) {
179       if (STD_LOCAL(std))
180         continue; /* don't process for DO-INDEPENDENT */
181       transform_forall(std, ast);
182     } else if (type == A_ICALL && A_OPTYPEG(ast) == I_PTR2_ASSIGN)
183       transform_ptr(std, ast);
184     else
185       transform_ast(std, ast);
186     check_region(std);
187   }
188 }
189 
190 /**
191    \brief Keep track of STD of endcritical or endmaster statement
192  */
193 void
init_region(void)194 init_region(void)
195 {
196   pure_gbl.end_master_region = 0;
197   pure_gbl.end_critical_region = 0;
198 } /* init_region */
199 
200 /**
201    \brief Check a region is valid.
202  */
203 void
check_region(int std)204 check_region(int std)
205 {
206   int ast = STD_AST(std);
207   if (A_TYPEG(ast) == A_MASTER && pure_gbl.end_master_region == 0) {
208     /* get endmaster ast */
209     int endmasterast = A_LOPG(ast);
210     pure_gbl.end_master_region = A_STDG(endmasterast);
211     if (pure_gbl.end_critical_region == 0) {
212       pure_gbl.end_critical_region = pure_gbl.end_master_region;
213     }
214   } else if (A_TYPEG(ast) == A_CRITICAL && pure_gbl.end_critical_region == 0) {
215     /* get endcritical ast */
216     int endcriticalast = A_LOPG(ast);
217     pure_gbl.end_critical_region = A_STDG(endcriticalast);
218   }
219   if (pure_gbl.end_critical_region == std) {
220     pure_gbl.end_critical_region = 0;
221   }
222   if (pure_gbl.end_master_region == std) {
223     pure_gbl.end_master_region = 0;
224   }
225 } /* check_region */
226 
227 /**
228    \brief Create mask statements for conditional expression ast and insert them
229    after stdstart. Return the STD of the last statement added.
230  */
231 int
insert_mask(int ast,int stdstart)232 insert_mask(int ast, int stdstart)
233 {
234   int std;
235   int aststmt;
236 
237   if (A_TYPEG(ast) == A_BINOP && A_OPTYPEG(ast) == OP_SCAND) {
238     std = insert_mask(A_LOPG(ast), stdstart);
239     std = insert_mask(A_ROPG(ast), std);
240     return std;
241   }
242   aststmt = mk_stmt(A_IFTHEN, 0);
243   A_IFEXPRP(aststmt, ast);
244   std = add_stmt_after(aststmt, stdstart);
245   return std;
246 }
247 
248 /**
249    \brief Create ENDIF statements corresponding to conditional statements
250    emitted
251    for mask expression ast. Insert the ENDIFs after stdstart.
252    Return the STD of the last statement added.
253  */
254 int
insert_endmask(int ast,int stdstart)255 insert_endmask(int ast, int stdstart)
256 {
257   int std;
258   int aststmt;
259 
260   if (A_TYPEG(ast) == A_BINOP && A_OPTYPEG(ast) == OP_SCAND) {
261     std = insert_endmask(A_LOPG(ast), stdstart);
262     std = insert_endmask(A_ROPG(ast), std);
263     return std;
264   }
265   aststmt = mk_stmt(A_ENDIF, 0);
266   std = add_stmt_after(aststmt, stdstart);
267   return std;
268 }
269 
270 /**
271    \brief Dump compiler internal information for the communication analyzer.
272  */
273 void
report_comm(int std,int cause)274 report_comm(int std, int cause)
275 {
276   int ln;
277   int sptr;
278   static char msg8[] = "no parallelism: ";
279 
280   if (!XBIT(0, 2))
281     return;
282 
283   if (STD_MINFO(std))
284     return;
285 
286   STD_MINFO(std) = 1;
287 
288   ln = STD_LINENO(std);
289   switch (cause) {
290   case CANONICAL_CAUSE:
291     ccff_info(MSGFTN, "FTN001", 1, ln, "Forall scalarized", NULL);
292     break;
293   case INTRINSIC_CAUSE:
294     ccff_info(MSGFTN, "FTN002", 1, ln,
295               "Forall scalarized: transformational intrinsic call", NULL);
296     break;
297   case UGLYCOMM_CAUSE:
298     ccff_info(MSGFTN, "FTN003", 1, ln,
299               "Forall scalarized: complex communication", NULL);
300     break;
301   case DEPENDENCY_CAUSE:
302     ccff_info(MSGFTN, "FTN004", 1, ln, "Forall split in two: data dependence",
303               NULL);
304     break;
305   case GETSCALAR_CAUSE:
306     ccff_info(MSGFTN, "FTN005", 1, ln, "Expensive scalar communication", NULL);
307     break;
308   case COPYSCALAR_CAUSE:
309     ccff_info(MSGFTN, "FTN006", 1, ln, "Expensive scalar copy communication",
310               NULL);
311     break;
312   case COPYSECTION_CAUSE:
313     ccff_info(MSGFTN, "FTN007", 1, ln,
314               "Expensive all-to-all section copy communication", NULL);
315     break;
316   case PURECOMM_CAUSE:
317     ccff_info(MSGFTN, "FTN008", 1, ln,
318               "Communication generated: Forall pure arguments", NULL);
319     break;
320   case UGLYPURE_CAUSE:
321     ccff_info(MSGFTN, "FTN009", 1, ln,
322               "Forall scalarized: complex pure argument", NULL);
323     break;
324   case UGLYMASK_CAUSE:
325     ccff_info(MSGFTN, "FTN010", 1, ln,
326               "Forall scalarized: complex mask expression", NULL);
327     break;
328   case MANYRUNTIME_CAUSE:
329     assert(A_TYPEG(STD_AST(std)) == A_FORALL, "report_comm: forall is expected",
330            std, 2);
331     ccff_info(MSGFTN, "FTN011", 1, ln, "Too many runtime calls", NULL);
332     break;
333   }
334 }
335 
336 /**
337    \brief Construct an AST to add the lower bound of dimension dim
338    for array datatype dtyp to ast, and return the new AST.
339  */
340 int
add_lbnd(int dtyp,int dim,int ast,int astmember)341 add_lbnd(int dtyp, int dim, int ast, int astmember)
342 {
343   int astBnd = ADD_LWAST(dtyp, dim);
344   int ast1;
345 
346   if (!astBnd || astBnd == astb.bnd.one)
347     return ast;
348 
349   ast1 = mk_binop(OP_ADD, ast, check_member(astmember, astBnd), astb.bnd.dtype);
350   ast1 = mk_binop(OP_SUB, ast1, astb.bnd.one, astb.bnd.dtype);
351   return ast1;
352 }
353 
354 /**
355    \brief Construct an AST to subtract the lower bound of dimension dim
356    for array datatype dtyp to ast, and return the new AST.
357  */
358 int
sub_lbnd(int dtyp,int dim,int ast,int astmember)359 sub_lbnd(int dtyp, int dim, int ast, int astmember)
360 {
361   int astBnd = ADD_LWAST(dtyp, dim);
362   int ast1;
363 
364   if (!astBnd || astBnd == astb.bnd.one)
365     return ast;
366 
367   ast1 = mk_binop(OP_SUB, ast, check_member(astmember, astBnd), astb.bnd.dtype);
368   ast1 = mk_binop(OP_ADD, ast1, astb.bnd.one, astb.bnd.dtype);
369   return ast1;
370 }
371 
372 /**
373    \brief Return TRUE if the bounds of array sptr should be 1-based with
374    respect to the runtime.
375  */
376 LOGICAL
normalize_bounds(int sptr)377 normalize_bounds(int sptr)
378 {
379   int aln;
380   int sptr1;
381 
382   if (STYPEG(sptr) != ST_ARRAY)
383     return FALSE;
384   sptr1 = sptr;
385   return (XBIT(58, 0x22) && !POINTERG(sptr));
386 }
387 
388 LOGICAL
is_same_number_of_idx(int dest,int src,int list)389 is_same_number_of_idx(int dest, int src, int list)
390 {
391   int count, count1;
392   int asd;
393   int j, ndim;
394 
395   count = 0;
396   count1 = 0;
397 
398   /* dest */
399   while (dest) {
400     switch (A_TYPEG(dest)) {
401     case A_ID:
402       dest = 0;
403       break;
404     case A_SUBSTR:
405       dest = A_LOPG(dest);
406       break;
407     case A_MEM:
408       dest = A_PARENTG(dest);
409       break;
410     case A_SUBSCR:
411       asd = A_ASDG(dest);
412       ndim = ASD_NDIM(asd);
413 
414       for (j = 0; j < ndim; ++j) {
415         if (search_forall_var(ASD_SUBS(asd, j), list))
416           count++;
417       }
418       dest = A_LOPG(dest);
419       break;
420     default:
421       dest = 0;
422       break;
423     }
424   }
425   while (src) {
426     switch (A_TYPEG(src)) {
427     case A_ID:
428       src = 0;
429       break;
430     case A_SUBSTR:
431       src = A_LOPG(src);
432       break;
433     case A_MEM:
434       src = A_PARENTG(src);
435       break;
436     case A_SUBSCR:
437       /* src */
438       asd = A_ASDG(src);
439       ndim = ASD_NDIM(asd);
440       for (j = 0; j < ndim; ++j) {
441         if (search_forall_var(ASD_SUBS(asd, j), list))
442           count1++;
443       }
444       src = A_LOPG(src);
445       break;
446     default:
447       src = 0;
448       break;
449     }
450   }
451 
452   if (count1 == count)
453     return TRUE;
454   else
455     return FALSE;
456 }
457 
458 /**
459    \brief This routine finds the dimension of sptr.
460 
461    It takes subscript `a(f(i),5,f(j))`. It eliminates scalar dimension.
462    It makes an ast for reference sptr: `a(f(i),5,f(j)) --> sptr(f(i),f(j))`
463 
464    NOTE: This is always called after get_temp_forall(), which calls
465    mk_forall_sptr().  The subscripts are not always as simple
466    as `sptr(f(i),f(j))`, especially if the stride is not known.
467    if the stride is not +1 or -1, the subscript will be normalized.
468  */
469 int
reference_for_temp(int sptr,int a,int forall)470 reference_for_temp(int sptr, int a, int forall)
471 {
472   int subs[7];
473   int list;
474   int i, ndim, k;
475   int astnew, vector;
476 
477   list = A_LISTG(forall);
478   ndim = 0;
479   vector = 0;
480   do {
481     if (A_TYPEG(a) == A_MEM) {
482       a = A_PARENTG(a);
483     } else if (A_TYPEG(a) == A_SUBSCR) {
484       int asd, adim;
485       asd = A_ASDG(a);
486       adim = ASD_NDIM(asd);
487       /* array will be referenced after communication as follows  */
488       for (i = 0; i < adim; i++) {
489         int ast;
490         ast = ASD_SUBS(asd, i);
491         if (XBIT(58, 0x20000)) {
492           extern int constant_stride(int a, int *value);
493           int c, stride, lw, up;
494           if (A_TYPEG(ast) == A_TRIPLE) {
495             lw = check_member(a, A_LBDG(ast));
496             up = check_member(a, A_UPBDG(ast));
497             c = constant_stride(A_STRIDEG(ast), &stride);
498             if (flg.opt >= 2 && !XBIT(2, 0x400000)) {
499               stride = A_STRIDEG(ast);
500               if (stride == 0)
501                 stride = astb.i1;
502               up = mk_binop(OP_DIV, mk_binop(OP_ADD, mk_binop(OP_SUB, up, lw,
503                                                               stb.user.dt_int),
504                                              stride, stb.user.dt_int),
505                             stride, stb.user.dt_int);
506               lw = astb.i1;
507               subs[ndim] = mk_triple(lw, up, 0);
508             } else if (c && stride == 1) {
509               subs[ndim] = ast;
510             } else if (c && stride == -1) {
511               subs[ndim] = ast;
512             } else {
513               stride = A_STRIDEG(ast);
514               if (stride == 0)
515                 stride = astb.i1;
516               up = mk_binop(OP_DIV, mk_binop(OP_ADD, mk_binop(OP_SUB, up, lw,
517                                                               stb.user.dt_int),
518                                              stride, stb.user.dt_int),
519                             stride, stb.user.dt_int);
520               lw = astb.i1;
521               subs[ndim] = mk_triple(lw, up, 0);
522             }
523             ++ndim;
524             vector = 1;
525           } else if (A_SHAPEG(ast)) {
526             subs[ndim] = ast;
527             ++ndim;
528             vector = 1;
529           } else if ((k = search_forall_var(ast, list))) {
530             if (other_forall_var(ast, list, k))
531               /*f2731*/
532               subs[ndim] = ast;
533             else {
534               lw = A_LBDG(ASTLI_TRIPLE(k));
535               up = A_UPBDG(ASTLI_TRIPLE(k));
536               c = constant_stride(A_STRIDEG(ASTLI_TRIPLE(k)), &stride);
537               if (flg.opt >= 2 && !XBIT(2, 0x400000)) {
538                 stride = A_STRIDEG(ASTLI_TRIPLE(k));
539                 if (stride == 0)
540                   stride = astb.i1;
541                 subs[ndim] = mk_binop(
542                     OP_DIV,
543                     mk_binop(OP_ADD, mk_binop(OP_SUB, mk_id(ASTLI_SPTR(k)), lw,
544                                               stb.user.dt_int),
545                              stride, stb.user.dt_int),
546                     stride, stb.user.dt_int);
547               } else if (c && stride == 1) {
548                 subs[ndim] = mk_id(ASTLI_SPTR(k));
549               } else if (c && stride == -1) {
550                 subs[ndim] = mk_id(ASTLI_SPTR(k));
551               } else {
552                 stride = A_STRIDEG(ASTLI_TRIPLE(k));
553                 if (stride == 0)
554                   stride = astb.i1;
555                 subs[ndim] = mk_binop(
556                     OP_DIV,
557                     mk_binop(OP_ADD, mk_binop(OP_SUB, mk_id(ASTLI_SPTR(k)), lw,
558                                               stb.user.dt_int),
559                              stride, stb.user.dt_int),
560                     stride, stb.user.dt_int);
561               }
562             }
563             ++ndim;
564           }
565         } else if (A_TYPEG(ast) == A_TRIPLE || A_SHAPEG(ast)) {
566           /* include this dimension */
567           subs[ndim] = ast;
568           ++ndim;
569           vector = 1;
570         } else if (search_forall_var(ASD_SUBS(asd, i), list)) {
571           /* include this dimension */
572           subs[ndim] = ast;
573           ++ndim;
574         }
575       }
576       a = A_LOPG(a);
577     } else {
578       interr("reference_for_temp: not subscr or member", a, 3);
579     }
580   } while (A_TYPEG(a) != A_ID);
581   assert(ndim == rank_of_sym(sptr), "reference_for_temp: rank mismatched", sptr,
582          4);
583   if (vector) {
584     astnew = mk_subscr(mk_id(sptr), subs, ndim, DTYPEG(sptr));
585   } else {
586     astnew = mk_subscr(mk_id(sptr), subs, ndim, DTY(DTYPEG(sptr) + 1));
587   }
588   return astnew;
589 }
590 
591 /**
592    \brief This routine a barrier statement in the barrier table.
593  */
594 int
record_barrier(LOGICAL bBefore,int astStmt,int std)595 record_barrier(LOGICAL bBefore, int astStmt, int std)
596 {
597   int i;
598   int sptr;
599   LITEMF *pl;
600 
601   switch (A_TYPEG(astStmt)) {
602   case A_ASN:
603     sptr = sym_of_ast(A_DESTG(astStmt));
604     pl = clist();
605     pl->item = sptr;
606     break;
607   case A_FORALL:
608     sptr = sym_of_ast(A_DESTG(A_IFSTMTG(astStmt)));
609     pl = clist();
610     pl->item = sptr;
611     break;
612   default:
613     return 0;
614   }
615   i = get_brtbl();
616   brtbl.base[i].f1 = bBefore;
617   brtbl.base[i].f2 = std;
618   brtbl.base[i].f3 = pl;
619   return i;
620 }
621 
622 /**
623    \brief This routine is to read distributed array element at forall by
624    using get scalar primitive.
625  */
626 int
emit_get_scalar(int a,int std)627 emit_get_scalar(int a, int std)
628 {
629   int lsptr, ld;
630   int astnew;
631   int list;
632   int i, nargs, argt;
633   int asd;
634   int ndim;
635   int temp, tempast;
636   int ast;
637   int commstd;
638   int nd;
639 
640   if (STD_LOCAL(std))
641     return a; /* don't process for DO-INDEPENDENT */
642   asd = A_ASDG(a);
643   ndim = ASD_NDIM(asd);
644   ld = dist_ast(a);
645   if (ld == 0)
646     return a;
647   lsptr = memsym_of_ast(ld);
648   if (!DISTG(lsptr) && !ALIGNG(lsptr))
649     return a;
650 
651   /* It is distributed.  Create a temp to hold the value */
652   temp = sym_get_scalar(SYMNAME(lsptr), "s", DTY(DTYPEG(lsptr) + 1));
653   tempast = mk_id(temp);
654   ast = new_node(A_HGETSCLR);
655   A_SRCP(ast, a);
656   A_DESTP(ast, tempast);
657   if (DESCRG(lsptr)) {
658     int lop;
659     lop = check_member(a, mk_id(DESCRG(lsptr)));
660     A_LOPP(ast, lop);
661   }
662   commstd = add_stmt_before(ast, std);
663   A_STDP(ast, commstd);
664   return replace_ast_subtree(a, ld, tempast);
665 }
666 
667 /**
668    <pre>
669    Algorithm:
670    * gather information abouth lhs array.
671    * tag communications for rhs array.
672    * optimize overlap_shift if there is same array shift.
673    * optimize copy_section
674    * convert to forall into block forall since owner computes rule distribution
675      for cyclic require complicated statement insertion.
676    * forall_gbl.s0....
677    * forall_gbl.s1 forall(i=.
678    * forall_gbl.s2   A(i)=
679    * forall_gbl.s3 endforall
680    * forall_gbl.s4, forall_gbl.s5 ...
681 
682    These variables are globals.
683    * forall_gbl.s1 moves up.
684    * forall_gbl.s4 moves down.
685    </pre>
686  */
687 void
forall_opt1(int ast)688 forall_opt1(int ast)
689 {
690   int std;
691   int i, j;
692   int nd;
693 
694   std = A_STDG(ast);
695   if (A_OPT1G(ast))
696     return;
697   nd = mk_ftb();
698   FT_NRT(nd) = 0;
699   FT_RTL(nd) = clist();
700   FT_NMCALL(nd) = 0;
701   FT_MCALL(nd) = clist();
702   FT_NSCALL(nd) = 0;
703   FT_SCALL(nd) = clist();
704   FT_NMGET(nd) = 0;
705   FT_MGET(nd) = clist();
706   FT_NSGET(nd) = 0;
707   FT_SGET(nd) = clist();
708   FT_NPCALL(nd) = 0;
709   FT_PCALL(nd) = clist();
710   FT_IGNORE(nd) = 0;
711   FT_SECTL(nd) = 0;
712   FT_CYCLIC(nd) = getcyclic();
713   for (i = 0; i < 7; i++) {
714     FT_NFUSE(nd, i) = 0;
715     for (j = 0; j < MAXFUSE; j++)
716       FT_FUSELP(nd, i, j) = 0;
717   }
718   FT_FUSED(nd) = 0;
719   FT_HEADER(nd) = std;
720   FT_BARR1(nd) = 0;
721   FT_BARR2(nd) = 0;
722   FT_FG(nd) = 0;
723   A_OPT1P(ast, nd);
724 }
725 
726 void
transform_forall(int std,int ast)727 transform_forall(int std, int ast)
728 {
729   int asn;
730   int src, dest;
731   int asd;
732   int astnew;
733   int endforall;
734   int test1, test2;
735   int nd;
736   int lhs;
737 
738   comminfo.std = std;
739   comminfo.usedstd = std;
740   comminfo.forall = ast;
741   trans.rhsbase = 0;
742 
743   init_opt_tables();
744   forall_opt1(ast);
745 
746   if (pure_gbl.end_critical_region != 0) {
747     scalarize(std, ast, TRUE);
748     return;
749   }
750 
751   shape_communication(std, ast);
752 
753   comminfo.std = std;
754   comminfo.usedstd = std;
755   comminfo.forall = ast;
756 
757   asn = A_IFSTMTG(ast);
758   dest = scalar_communication(A_DESTG(asn), std);
759   src = scalar_communication(A_SRCG(asn), std);
760   A_DESTP(asn, dest);
761   A_SRCP(asn, src);
762 
763   /* if the lhs is distributed, adjust the forall bounds; insert the
764    * communication for the forall statement; adjust the rhs bounds
765    */
766   comminfo.mask_phase = 0;
767   if (normalize_forall_triplet(std, ast) == 0) {
768     report_comm(std, CANONICAL_CAUSE);
769     scalarize(std, ast, TRUE);
770     return;
771   }
772 
773   if (is_scatter(std))
774     return;
775 
776   if (canonical_conversion(ast) == 0) {
777     report_comm(std, CANONICAL_CAUSE);
778     scalarize(std, ast, TRUE);
779     return;
780   }
781 
782   asn = A_IFSTMTG(ast);
783   if (process_lhs_sub(std, ast) == 0) {
784     scalarize(std, ast, TRUE);
785     return;
786   }
787   test1 = tag_forall_comm(A_SRCG(A_IFSTMTG(ast)));
788   comminfo.mask_phase = 1;
789   if (!comminfo.unstruct && A_IFEXPRG(ast))
790     test2 = tag_forall_comm(A_IFEXPRG(ast));
791   if (!comminfo.unstruct)
792     test1 = tag_call_comm(std, ast);
793 
794   if (comminfo.unstruct) {
795     report_comm(std, UGLYCOMM_CAUSE);
796     scalarize(std, ast, TRUE);
797     return;
798   }
799   if (comminfo.ugly_mask) {
800     report_comm(std, UGLYMASK_CAUSE);
801     scalarize(std, ast, TRUE);
802     return;
803   }
804   comminfo.mask_phase = 0;
805   opt_overlap();
806   astnew = insert_forall_comm(A_SRCG(asn));
807   A_SRCP(asn, astnew);
808   comminfo.mask_phase = 1;
809   if (A_IFEXPRG(ast)) {
810     astnew = insert_forall_comm(A_IFEXPRG(ast));
811     A_IFEXPRP(ast, astnew);
812   }
813   insert_call_comm(std, ast);
814 
815   /* guard_forall(std); */
816   fix_guard_forall(std);
817 
818   /* give information if more than 40 run-time calls generated
819    * for this forall
820    */
821   if (FT_NRT(A_OPT1G(STD_AST(std))) > 40)
822     report_comm(std, MANYRUNTIME_CAUSE);
823 }
824 
825 /**
826    \brief The forall should be treated like a serial statement.
827 
828    Turn it into a block-forall so the IF stuff works OK.
829  */
830 void
scalarize(int std,int forall,LOGICAL after_transformer)831 scalarize(int std, int forall, LOGICAL after_transformer)
832 {
833   int std1;
834   int std2;
835 
836   std1 = 0;
837   std2 = 0;
838   forall_dependency_scalarize(std, &std1, &std2);
839   forall = STD_AST(std);
840   sequentialize(std, forall, after_transformer);
841   if (std1) {
842     forall = STD_AST(std1);
843     if (after_transformer)
844       transform_forall(std1, forall);
845   }
846 
847   if (std2) {
848     forall = STD_AST(std2);
849     if (after_transformer)
850       transform_forall(std2, forall);
851   }
852 }
853 
854 /**
855    \brief This is neccessary, if forall sequentialized.
856  */
857 void
un_fuse(int forall)858 un_fuse(int forall)
859 {
860   int nd, nd1;
861   int forall1;
862   int fusedstd;
863   int i;
864   int forallstd;
865 
866   nd = A_OPT1G(forall);
867   for (i = 0; i < FT_NFUSE(nd, 0); i++) {
868     fusedstd = FT_FUSEDSTD(nd, 0, i);
869     forall1 = STD_AST(fusedstd);
870     nd1 = A_OPT1G(forall1);
871     FT_HEADER(nd1) = fusedstd;
872   }
873   FT_NFUSE(nd, 0) = 0;
874   forallstd = A_STDG(forall);
875   assert(forallstd, "un_fuse: it must be forall", forall, 3);
876   assert(STD_AST(forallstd) == forall, "un_fuse: it must be forall", forall, 3);
877   FT_HEADER(nd) = forallstd;
878 }
879 
880 void
sequentialize(int std,int forall,LOGICAL after_transformer)881 sequentialize(int std, int forall, LOGICAL after_transformer)
882 {
883   int asn;
884   int newast;
885   int stdnext, stdnext1;
886   int n, i;
887   int triplet_list, index_var;
888   int triplet;
889   int expr;
890   int lineno;
891   LOGICAL craft_partion;
892 
893   if (after_transformer)
894     un_fuse(forall);
895 
896   ast_to_comment(forall);
897   asn = A_IFSTMTG(forall);
898   if (!asn) {
899     asn = mk_stmt(A_CONTINUE, 0);
900   }
901   lineno = STD_LINENO(std);
902   stdnext = STD_NEXT(std);
903   delete_stmt(A_STDG(forall));
904 
905   n = 0;
906   triplet_list = A_LISTG(forall);
907   for (; triplet_list; triplet_list = ASTLI_NEXT(triplet_list)) {
908     int dovar;
909     n++;
910     index_var = ASTLI_SPTR(triplet_list);
911     triplet = ASTLI_TRIPLE(triplet_list);
912     newast = mk_stmt(A_DO, 0);
913     dovar = mk_id(index_var);
914     A_DOVARP(newast, dovar);
915     A_M1P(newast, A_LBDG(triplet));
916     A_M2P(newast, A_UPBDG(triplet));
917     A_M3P(newast, A_STRIDEG(triplet));
918     A_M4P(newast, 0);
919     stdnext = add_stmt_before(newast, stdnext);
920     STD_LINENO(stdnext) = lineno;
921     if (after_transformer)
922       transform_ast(stdnext, newast);
923     stdnext = STD_NEXT(stdnext);
924   }
925 
926   if (after_transformer)
927     stdnext = sequentialize_mask_call(forall, stdnext);
928 
929   expr = A_IFEXPRG(forall);
930   if (expr) {
931     stdnext = STD_PREV(stdnext);
932     stdnext1 = insert_mask(expr, stdnext);
933     stdnext1 = STD_NEXT(stdnext1);
934     if (after_transformer) {
935       int nextnext;
936       stdnext = STD_NEXT(stdnext);
937       for (; stdnext != stdnext1; stdnext = nextnext) {
938         nextnext = STD_NEXT(stdnext);
939         transform_ast(stdnext, STD_AST(stdnext));
940       }
941     }
942     stdnext = stdnext1;
943   }
944 
945   if (after_transformer)
946     stdnext = sequentialize_stmt_call(forall, stdnext);
947 
948   stdnext = add_stmt_before(asn, stdnext);
949   stdnext1 = STD_NEXT(stdnext);
950   STD_LINENO(stdnext) = lineno;
951   if (after_transformer)
952     transform_ast(stdnext, asn);
953   stdnext = stdnext1;
954 
955   if (expr) {
956     stdnext = insert_endmask(expr, STD_PREV(stdnext));
957     stdnext = STD_NEXT(stdnext);
958   }
959 
960   for (i = 0; i < n; i++) {
961     newast = mk_stmt(A_ENDDO, 0);
962     stdnext = add_stmt_before(newast, stdnext);
963     STD_LINENO(stdnext) = lineno;
964     stdnext = STD_NEXT(stdnext);
965   }
966 }
967 
968 /**
969    \brief Initialize the communication analyzer phase.
970  */
971 static void
comm_init(void)972 comm_init(void)
973 {
974   TRANS_ALLOC(trans.subb, SUBINFO, 1000);
975   TRANS_ALLOC(trans.arrb, ARREF, 100);
976   TRANS_ALLOC(trans.tdescb, TDESC, 50);
977   init_pertbl();
978   init_brtbl();
979 }
980 
981 static LOGICAL
is_scatter(int std)982 is_scatter(int std)
983 {
984   if (!scatter_class(std))
985     return FALSE;
986   if (!comminfo.scat.base && !comminfo.scat.array_simple)
987     return FALSE;
988   emit_sum_scatterx(std);
989   emit_scatterx(std);
990   return TRUE;
991 }
992 
993 /**
994    \brief Like reference_for_temp(), this routine finds the dimension of sptr.
995 
996    It takes subscript `a(f(i),5,f(j))`. It eliminates scalar dimensions.
997    It makes an ast to reference sptr: `a(f(i),5,f(j)) --> sptr(i,j)`
998  */
999 static int
simple_reference_for_temp(int sptr,int a,int forall)1000 simple_reference_for_temp(int sptr, int a, int forall)
1001 {
1002   int subs[7];
1003   int list;
1004   int i, ndim, k;
1005   int astnew;
1006 
1007   list = A_LISTG(forall);
1008   ndim = 0;
1009   do {
1010     if (A_TYPEG(a) == A_MEM) {
1011       a = A_PARENTG(a);
1012     } else if (A_TYPEG(a) == A_SUBSCR) {
1013       int asd, adim;
1014       asd = A_ASDG(a);
1015       adim = ASD_NDIM(asd);
1016       /* array will be referenced after communication as follows  */
1017       for (i = 0; i < adim; i++) {
1018         int ast;
1019         ast = ASD_SUBS(asd, i);
1020         if (XBIT(58, 0x20000)) {
1021           if (A_TYPEG(ast) == A_TRIPLE) {
1022             subs[ndim] = ast;
1023             ++ndim;
1024           } else if ((k = search_forall_var(ast, list))) {
1025             subs[ndim] = mk_id(ASTLI_SPTR(k));
1026             ++ndim;
1027           } else if (A_SHAPEG(ast)) {
1028             subs[ndim] = ast;
1029             ++ndim;
1030           }
1031         } else if ((k = search_forall_var(ast, list))) {
1032           subs[ndim] = mk_id(ASTLI_SPTR(k));
1033           ++ndim;
1034         } else if (A_TYPEG(ast) == A_TRIPLE || A_SHAPEG(ast)) {
1035           /* include this dimension */
1036           subs[ndim] = ast;
1037           ++ndim;
1038         }
1039       }
1040       a = A_LOPG(a);
1041     } else {
1042       interr("simple_reference_for_temp: not subscr or member", a, 3);
1043     }
1044   } while (A_TYPEG(a) != A_ID);
1045   assert(ndim == rank_of_sym(sptr),
1046          "simple_reference_for_temp: rank mismatched", sptr, 4);
1047   astnew = mk_subscr(mk_id(sptr), subs, ndim, DTY(DTYPEG(sptr) + 1));
1048   return astnew;
1049 }
1050 
1051 static int
temp_gatherx(int std,int forall,int lhs,int rhs,int dty,int * allocast)1052 temp_gatherx(int std, int forall, int lhs, int rhs, int dty, int *allocast)
1053 {
1054   int sptr;
1055   int subscr[7];
1056   int ast;
1057   int nd;
1058   int astnew;
1059   int header;
1060 
1061   nd = A_OPT1G(forall);
1062   header = FT_HEADER(nd);
1063   sptr = mk_forall_sptr_gatherx(forall, lhs, rhs, subscr, dty);
1064 
1065   astnew =
1066       mk_subscr(mk_id(sptr), subscr, rank_of_sym(sptr), DTY(DTYPEG(sptr) + 1));
1067   ast = new_node(A_HALLOBNDS);
1068   A_LOPP(ast, astnew);
1069   nd = mk_ftb();
1070   FT_STD(nd) = std;
1071   FT_FORALL(nd) = forall;
1072   FT_ALLOC_SPTR(nd) = sptr;
1073   FT_ALLOC_FREE(nd) = header;
1074   FT_ALLOC_SAME(nd) = 0;
1075   FT_ALLOC_REUSE(nd) = 0;
1076   FT_ALLOC_USED(nd) = 0;
1077   FT_ALLOC_OUT(nd) = sptr;
1078   A_OPT1P(ast, nd);
1079   *allocast = ast;
1080   return sptr;
1081 }
1082 
1083 static int
temp_copy_section(int std,int forall,int lhs,int rhs,int dty,int * allocast)1084 temp_copy_section(int std, int forall, int lhs, int rhs, int dty, int *allocast)
1085 {
1086   int sptr;
1087   int subscr[7];
1088   int ast;
1089   int nd;
1090   int astnew;
1091   int header;
1092 
1093   nd = A_OPT1G(forall);
1094   header = FT_HEADER(nd);
1095   sptr = mk_forall_sptr_copy_section(forall, lhs, rhs, subscr, dty);
1096 
1097   astnew =
1098       mk_subscr(mk_id(sptr), subscr, rank_of_sym(sptr), DTY(DTYPEG(sptr) + 1));
1099   ast = new_node(A_HALLOBNDS);
1100   A_LOPP(ast, astnew);
1101   nd = mk_ftb();
1102   FT_STD(nd) = std;
1103   FT_FORALL(nd) = forall;
1104   FT_ALLOC_SPTR(nd) = sptr;
1105   FT_ALLOC_FREE(nd) = header;
1106   FT_ALLOC_SAME(nd) = 0;
1107   FT_ALLOC_REUSE(nd) = 0;
1108   FT_ALLOC_USED(nd) = 0;
1109   FT_ALLOC_OUT(nd) = sptr;
1110   A_OPT1P(ast, nd);
1111   *allocast = ast;
1112   return sptr;
1113 }
1114 
1115 /**
1116    \brief Just like copy_section_temp_before() except it does not eliminate
1117    scalar dimension.
1118 
1119    This means that it makes a new array with sptr by using subscript of rhs.
1120  */
1121 static int
gatherx_temp_before(int sptr,int rhs,int forall)1122 gatherx_temp_before(int sptr, int rhs, int forall)
1123 {
1124   int subs[7];
1125   int k, j;
1126   int asd;
1127   int ndim;
1128   int astnew;
1129   int astli;
1130   int nidx;
1131   int list;
1132 
1133   asd = A_ASDG(rhs);
1134   ndim = ASD_NDIM(asd);
1135   list = A_LISTG(forall);
1136 
1137   j = 0;
1138   /* array will be referenced after communication as follows  */
1139   for (k = 0; k < ndim; ++k) {
1140     astli = 0;
1141     nidx = 0;
1142     search_forall_idx(ASD_SUBS(asd, k), list, &astli, &nidx);
1143     if (nidx == 1 && astli) {
1144       /* include this dimension */
1145       subs[j] = mk_id(ASTLI_SPTR(astli));
1146       j++;
1147     } else if (nidx == 0 && astli == 0) {
1148       /* include scalar dimension too */
1149       subs[j] = ASD_SUBS(asd, k);
1150       j++;
1151     }
1152   }
1153   assert(j == rank_of_sym(sptr), "gatherx_temp_before: rank mismatched", sptr,
1154          4);
1155   astnew = mk_subscr(mk_id(sptr), subs, j, DTY(DTYPEG(sptr) + 1));
1156   return astnew;
1157 }
1158 
1159 static int
make_sec_ast(int arr,int std,int allocstd,int sectflag)1160 make_sec_ast(int arr, int std, int allocstd, int sectflag)
1161 {
1162   int asn;
1163   int ast;
1164   int nd;
1165   int sec, secast;
1166   int sectstd;
1167   int forall;
1168   int sptr;
1169   int header;
1170   int bogus;
1171   int shape;
1172   int rank;
1173 
1174   forall = STD_AST(std);
1175   nd = A_OPT1G(forall);
1176   header = FT_HEADER(nd);
1177 
1178   asn = mk_stmt(A_ASN, astb.bnd.dtype);
1179   ast = new_node(A_HSECT);
1180   sptr = sptr_of_subscript(arr);
1181   A_LOPP(ast, arr);
1182   nd = mk_ftb();
1183   FT_STD(nd) = std;
1184   FT_FORALL(nd) = forall;
1185   FT_SECT_ARR(nd) = arr;
1186   FT_SECT_SPTR(nd) = sptr;
1187   FT_SECT_ALLOC(nd) = allocstd;
1188   FT_SECT_FREE(nd) = header;
1189   FT_SECT_FLAG(nd) = sectflag;
1190   bogus = getbit(sectflag, 8);
1191   shape = A_SHAPEG(arr);
1192   assert(shape, "make_sec_ast: ast has no shape", arr, 4);
1193   rank = SHD_NDIM(shape);
1194   if (is_whole_array(arr) && !bogus) {
1195     DESCUSEDP(sptr, 1);
1196     sec = DESCRG(sptr);
1197     secast = check_member(arr, mk_id(sec));
1198   } else {
1199     sec = sym_get_sdescr(sptr, rank); /* ZB */
1200     secast = mk_id(sec);
1201   }
1202   FT_SECT_SAME(nd) = 0;
1203   FT_SECT_REUSE(nd) = 0;
1204   FT_SECT_OUT(nd) = sec;
1205   A_OPT1P(ast, nd);
1206 
1207   A_DESTP(asn, secast);
1208   A_SRCP(asn, ast);
1209 
1210   sectstd = add_stmt_before(asn, header);
1211   A_STDP(asn, sectstd);
1212   nd = A_OPT1G(forall);
1213   plist(FT_RTL(nd), sectstd);
1214   FT_NRT(nd)++;
1215 
1216   return sectstd;
1217 }
1218 
1219 /**
1220    \brief This routine takes an array in a forall statement with its subinfo
1221    and replaces all forall indexes.
1222 
1223    E.g., `forall(i=1:10:2) a(i+1)` will become `a(2:11:2)`.
1224 
1225    Note that this assumes each forall index appears in array subscripts.
1226    If not, something is wrong in the communication detection algorithm.
1227  */
1228 static int
forall_2_sec(int a,int forall)1229 forall_2_sec(int a, int forall)
1230 {
1231   int list;
1232   int ndim;
1233   int i;
1234   int j;
1235   int asd;
1236   int sub_expr;
1237   int triple;
1238   int l, u, s;
1239   int t1, t2, t3;
1240   int subs[7];
1241   int sptr;
1242   int astli;
1243   int base;
1244   int stride;
1245   int shape;
1246   int nd;
1247   int nidx;
1248   int changed;
1249 
1250   assert(A_TYPEG(a) == A_SUBSCR, "forall_2_sec: not SUBSCR", a, 4);
1251   list = A_LISTG(forall);
1252   asd = A_ASDG(a);
1253   sptr = sptr_of_subscript(a);
1254   ndim = ASD_NDIM(asd);
1255   shape = 0;
1256   if (A_ARRASNG(forall)) {
1257     nd = get_finfo(forall, a);
1258     if (nd)
1259       shape = FINFO_SHAPE(nd);
1260   }
1261 
1262   /* If it was an array assignment, use the original section info */
1263   if (A_ARRASNG(forall) && shape) {
1264     j = 0;
1265     for (i = 0; i < ndim; i++) {
1266       sub_expr = ASD_SUBS(asd, i);
1267       astli = 0;
1268       nidx = 0;
1269       search_forall_idx(sub_expr, list, &astli, &nidx);
1270       if (nidx == 1) {
1271         t1 = check_member(a, SHD_LWB(shape, j));
1272         t2 = check_member(a, SHD_UPB(shape, j));
1273         t3 = check_member(a, SHD_STRIDE(shape, j));
1274         j++;
1275         subs[i] = mk_triple(t1, t2, t3);
1276       } else
1277         subs[i] = ASD_SUBS(asd, i);
1278     }
1279     assert(j == SHD_NDIM(shape), "forall_2_sec: something is wrong", a, 4);
1280     return mk_subscr(A_LOPG(a), subs, ndim, DTYPEG(sptr));
1281   }
1282   /* If it was a forall, calculate the section info */
1283   changed = 0;
1284   for (i = 0; i < ndim; i++) {
1285     sub_expr = ASD_SUBS(asd, i);
1286     astli = 0;
1287     search_idx(sub_expr, list, &astli, &base, &stride);
1288     assert(base, "forall_2_sec: something is wrong", a, 4);
1289     if (astli) {
1290       triple = ASTLI_TRIPLE(astli);
1291       l = A_LBDG(triple);
1292       u = A_UPBDG(triple);
1293       s = A_STRIDEG(triple);
1294       t1 = replace_expr(sub_expr, ASTLI_SPTR(astli), l, 1);
1295       t2 = replace_expr(sub_expr, ASTLI_SPTR(astli), u, 1);
1296       if (s == 0)
1297         s = astb.bnd.one;
1298       t3 = opt_binop(OP_MUL, s, stride, astb.bnd.dtype);
1299       subs[i] = mk_triple(t1, t2, t3);
1300       changed = 1;
1301     } else
1302       subs[i] = ASD_SUBS(asd, i);
1303   }
1304   if (changed)
1305     return mk_subscr(A_LOPG(a), subs, ndim, DTYPEG(sptr));
1306   else
1307     return a;
1308 }
1309 
1310 /* give a%b(1:n)%c, return pointer to a%b%c in 'pnewast',
1311  * pointer to a%b(1:n) in 'psectast', pointer to b in 'psptr'. */
1312 static void
remove_section(int ast,int * pnewast,int * psectast,int * psptr,int * panydist,int * pnontrivial)1313 remove_section(int ast, int *pnewast, int *psectast, int *psptr, int *panydist,
1314                int *pnontrivial)
1315 {
1316   int lop, sptr = 0;
1317   switch (A_TYPEG(ast)) {
1318   case A_SUBSTR:
1319     remove_section(A_LOPG(ast), pnewast, psectast, psptr, panydist,
1320                    pnontrivial);
1321     *pnewast = mk_substr(*pnewast, A_LEFTG(ast), A_RIGHTG(ast), A_DTYPEG(ast));
1322     break;
1323   case A_INTR:
1324     *pnewast = ast;
1325     *psectast = 0;
1326     *psptr = 0;
1327     break;
1328   case A_ID:
1329     sptr = A_SPTRG(ast);
1330     *psptr = sptr;
1331     *psectast = ast;
1332     *pnewast = ast;
1333     break;
1334   case A_MEM:
1335     lop = A_PARENTG(ast);
1336     remove_section(lop, pnewast, psectast, psptr, panydist, pnontrivial);
1337     *pnewast = mk_member(*pnewast, A_MEMG(ast), A_DTYPEG(ast));
1338     sptr = A_SPTRG(A_MEMG(ast));
1339     if (A_SHAPEG(lop) != 0) {
1340       /* psectast, psptr already set by parent */
1341       *pnontrivial = 1;
1342     } else {
1343       *psectast = ast;
1344       *psptr = sptr;
1345     }
1346     break;
1347   case A_SUBSCR:
1348     lop = A_LOPG(ast);
1349     if (A_TYPEG(lop) == A_ID) {
1350       sptr = A_SPTRG(lop);
1351     } else if (A_TYPEG(lop) == A_MEM) {
1352       sptr = A_SPTRG(A_MEMG(lop));
1353     }
1354     remove_section(lop, pnewast, psectast, psptr, panydist, pnontrivial);
1355     if (A_SHAPEG(ast) == 0) {
1356       *pnewast = mk_subscr_copy(*pnewast, A_ASDG(ast), A_DTYPEG(ast));
1357       *psectast = ast;
1358       *psptr = sptr;
1359     } else if (A_TYPEG(lop) == A_ID ||
1360                (A_TYPEG(lop) == A_MEM && A_SHAPEG(A_PARENTG(lop)) == 0)) {
1361       /* if the 'lop' is an ID, or
1362        * if the 'lop' is an member whose parent has no shape,
1363        * shape comes from this subscript */
1364       *psectast = ast;
1365       *psptr = sptr;
1366     } else {
1367       /* section comes from A_MEM parent; psectast, psptr already set */
1368       *pnewast = mk_subscr_copy(*pnewast, A_ASDG(ast), A_DTYPEG(ast));
1369       *pnontrivial = 1;
1370     }
1371     break;
1372   default:
1373     *pnewast = 0;
1374     *psectast = 0;
1375     *psptr = 0;
1376     break;
1377   }
1378   if (sptr && ALIGNG(sptr))
1379     *panydist = 1;
1380 } /* remove_section */
1381 
1382 /*     pv => ar
1383  *     pv => ar(lower:upper:stride,...)
1384  *     call pghpf_ptr_assign(pv, pv$sdsc, ar, ar$d, sectflag)
1385  *  pv: base.
1386  *  pv$sdsc:            pv's (new) static descriptor
1387  *  ar:                 ar's base address (ar or ar(ar$o))
1388  *  ar$d:               ar's (old) descriptor
1389  *  sectflag:           integer, 0 if whole array, 1 if section
1390  */
1391 static void
transform_ptr(int std,int ast)1392 transform_ptr(int std, int ast)
1393 {
1394   int ast1;
1395   int argt, nargs;
1396   int newargt;
1397   int src, dest, newsrc, sectast, src_sptr, anydist;
1398   int dest_sptr, nontrivial;
1399   int array_desc;
1400   int func;
1401   LOGICAL is_cyclic;
1402   int align, section;
1403   int ndim;
1404   int i;
1405   int ptr_reshape_dest = 0;
1406   int dtype;
1407 
1408   assert(A_TYPEG(ast) == A_ICALL && A_OPTYPEG(ast) == I_PTR2_ASSIGN,
1409          "transform_ptr: something is wrong", 2, ast);
1410   NODESCP(find_pointer_variable(A_LOPG(ast)), 1);
1411   argt = A_ARGSG(ast);
1412   nargs = A_ARGCNTG(ast);
1413   assert(nargs == 2, "transform_ptr: something is wrong", 2, ast);
1414   src = ARGT_ARG(argt, 1);
1415   dest = ARGT_ARG(argt, 0);
1416 
1417   anydist = 0;
1418   nontrivial = 0;
1419   remove_section(src, &newsrc, &sectast, &src_sptr, &anydist, &nontrivial);
1420 
1421 /* sectast points to subtree with A_SHAPE() != 0.
1422  * src_sptr is the section sptr */
1423 again:
1424   if (A_TYPEG(dest) == A_ID) {
1425     dest_sptr = A_SPTRG(dest);
1426   } else if (A_TYPEG(dest) == A_MEM) {
1427     dest_sptr = A_SPTRG(A_MEMG(dest));
1428   } else if (A_TYPEG(dest) == A_SUBSCR) { /* ptr reshape */
1429     ptr_reshape_dest = dest;
1430     dest = A_LOPG(dest);
1431     goto again;
1432   } else
1433     assert(0, "transform_ptr: bad pointer assignment target", ast, 3);
1434 
1435   /* don't let scalar pointer point to distributed array */
1436   if (DTY(DTYPEG(dest_sptr)) != TY_ARRAY && DTY(DTYPEG(src_sptr)) == TY_ARRAY &&
1437       anydist)
1438     error(155, 4, STD_LINENO(std), SYMNAME(dest_sptr),
1439           "- scalar POINTER associated with distributed object is unsupported");
1440 
1441   DESCUSEDP(src_sptr, 1);
1442   DESCUSEDP(dest_sptr, 1);
1443   if (!POINTERG(dest_sptr))
1444     error(155, 3, STD_LINENO(std), "must be POINTER", SYMNAME(dest_sptr));
1445 
1446   array_desc = 0;
1447   section = 0;
1448   dtype = DDTG(DTYPEG(dest_sptr));
1449   if (DTY(dtype) == TY_PTR && DTY(DTY(dtype + 1)) == TY_PROC &&
1450       STYPEG(src_sptr) == ST_PROC) {
1451     /* No array descriptor for procedure name target in a
1452      * procedure pointer assignment.
1453      */
1454   } else if (ptr_reshape_dest && bnds_remap_list(ptr_reshape_dest) &&
1455              simply_contiguous(src)) {
1456     emit_alnd_secd(dest_sptr, dest, TRUE, std, ptr_reshape_dest);
1457   } else if (A_TYPEG(sectast) == A_SUBSCR && A_SHAPEG(sectast) != 0) {
1458     int d;
1459     array_desc = check_member(dest, mk_id(SDSCG(dest_sptr)));
1460     d = make_sec_from_ast(sectast, std, std, array_desc, 0);
1461     /* if this was the whole array, we use the descriptor
1462      * of the source, not target */
1463     if (d == DESCRG(src_sptr)) {
1464       array_desc = check_member(sectast, mk_id(d));
1465     }
1466     section = 1;
1467   } else if (A_TYPEG(src) == A_MEM && A_SHAPEG(A_PARENTG(src))) {
1468     section = 1;
1469     array_desc = DESCRG(src_sptr);
1470     array_desc = check_member(sectast, mk_id(array_desc));
1471   } else {
1472     if (POINTERG(src_sptr) && A_SHAPEG(sectast)) {
1473       array_desc = SDSCG(src_sptr); /* section descriptor */
1474       array_desc = check_member(sectast, mk_id(array_desc));
1475     } else if (DTY(DTYPEG(src_sptr)) == TY_ARRAY && A_SHAPEG(sectast)) {
1476       array_desc = DESCRG(src_sptr);
1477       array_desc = check_member(sectast, mk_id(array_desc));
1478     } else {
1479       array_desc = 0;
1480     }
1481   }
1482 
1483   nargs = nontrivial ? 7 : 5;
1484   if (A_TYPEG(ptr_reshape_dest) == A_SUBSCR) {
1485     /* ptr reshape
1486      * compute number of additional args
1487      */
1488     int shd, nd, asd, i, sub;
1489 
1490     if (ptr_reshape_dest && bnds_remap_list(ptr_reshape_dest) &&
1491         simply_contiguous(src)) {
1492       newsrc = first_element(src);
1493     }
1494     shd = A_SHAPEG(ptr_reshape_dest);
1495     nd = SHD_NDIM(shd);
1496     nargs = 8; /* num dimensions */
1497     asd = A_ASDG(ptr_reshape_dest);
1498     for (i = 0; i < nd; ++i) {
1499       sub = ASD_SUBS(asd, i);
1500       if (A_LBDG(sub))
1501         ++nargs; /* lowerbound */
1502       if (A_UPBDG(sub))
1503         ++nargs; /* upperbound */
1504     }
1505   }
1506   newargt = mk_argt(nargs);
1507   ARGT_ARG(newargt, 0) = ARGT_ARG(argt, 0);
1508   /* this will need some changes when dest_sptr is a derived type member */
1509   if ((STYPEG(dest_sptr) == ST_VAR || STYPEG(dest_sptr) == ST_ARRAY) &&
1510       DSCASTG(dest_sptr)) {
1511     ARGT_ARG(newargt, 1) = DSCASTG(dest_sptr);
1512   } else {
1513     SPTR sdsc = SDSCG(dest_sptr);
1514     if (sdsc) {
1515       ARGT_ARG(newargt, 1) = check_member(dest, mk_id(sdsc));
1516     } else {
1517       ARGT_ARG(newargt, 1) = astb.bnd.zero;
1518     }
1519   }
1520   ARGT_ARG(newargt, 2) = newsrc;
1521   if (array_desc)
1522     ARGT_ARG(newargt, 3) = array_desc;
1523   else
1524     ARGT_ARG(newargt, 3) =
1525         mk_isz_cval(dtype_to_arg(DTYPEG(dest_sptr)), astb.bnd.dtype);
1526 
1527   /* section flag argument */
1528   if (!section)
1529     ARGT_ARG(newargt, 4) = astb.bnd.zero;
1530   else
1531     ARGT_ARG(newargt, 4) = astb.bnd.one;
1532 
1533   if (nontrivial) {
1534     /* add datatype argument */
1535     ARGT_ARG(newargt, 5) =
1536         mk_isz_cval(size_of(DDTG(DTYPEG(dest_sptr))), astb.bnd.dtype);
1537     ARGT_ARG(newargt, 6) =
1538         mk_isz_cval(ty_to_lib[DTYG(DTYPEG(dest_sptr))], astb.bnd.dtype);
1539   }
1540 
1541   if (A_TYPEG(ptr_reshape_dest) == A_SUBSCR) {
1542     /* ptr reshape
1543      * generate additional args
1544      */
1545     int shd, nd, asd, i, sub, val[4] = {0, 0, 0, 0}, tmp, ast, flag;
1546     int lbast, ubast, argcnt = 7;
1547 
1548     if (!nontrivial) {
1549       ARGT_ARG(newargt, 5) = astb.bnd.zero;
1550       ARGT_ARG(newargt, 6) = astb.bnd.zero;
1551     }
1552     shd = A_SHAPEG(ptr_reshape_dest);
1553     nd = SHD_NDIM(shd);
1554     val[1] = nd;
1555     tmp = getcon(val, DT_INT4);
1556     ARGT_ARG(newargt, argcnt++) = mk_cnst(tmp); /* num dimensions */
1557     asd = A_ASDG(ptr_reshape_dest);
1558     for (i = 0; i < nd; ++i) {
1559       sub = ASD_SUBS(asd, i);
1560       lbast = A_LBDG(sub);
1561       ubast = A_UPBDG(sub);
1562       if (lbast) {
1563         ARGT_ARG(newargt, argcnt++) = lbast; /* lowerbound */
1564       }
1565       if (ubast) {
1566         ARGT_ARG(newargt, argcnt++) = ubast; /* upperbound */
1567       }
1568     }
1569   }
1570   A_ARGCNTP(ast, nargs);
1571   A_ARGSP(ast, newargt);
1572 }
1573 
1574 static int
insert_forall_comm(int ast)1575 insert_forall_comm(int ast)
1576 {
1577   /* go through and add the communication & rewrite the AST */
1578   int std;
1579   int l, r, d, o;
1580   int l1, l2, l3;
1581   int a, a1;
1582   int i, nargs, argt, j;
1583   int arref;
1584   int header;
1585   int forall;
1586   int rhs_is_dist;
1587   int sptr;
1588   int asd, ndim;
1589   int subs[7];
1590   int nd, nd1, nd2;
1591   int src;
1592   int cnt;
1593   int commstd, commasn, comm;
1594   int lhs;
1595   int newast;
1596 
1597   a = ast;
1598   if (!a)
1599     return a;
1600   std = comminfo.std;
1601   forall = STD_AST(std);
1602   switch (A_TYPEG(ast)) {
1603   /* expressions */
1604   case A_BINOP:
1605     o = A_OPTYPEG(a);
1606     d = A_DTYPEG(a);
1607     l = insert_forall_comm(A_LOPG(a));
1608     r = insert_forall_comm(A_ROPG(a));
1609     return mk_binop(o, l, r, d);
1610   case A_UNOP:
1611     o = A_OPTYPEG(a);
1612     d = A_DTYPEG(a);
1613     l = insert_forall_comm(A_LOPG(a));
1614     return mk_unop(o, l, d);
1615   case A_CONV:
1616     d = A_DTYPEG(a);
1617     l = insert_forall_comm(A_LOPG(a));
1618     return mk_convert(l, d);
1619   case A_PAREN:
1620     d = A_DTYPEG(a);
1621     l = insert_forall_comm(A_LOPG(a));
1622     return mk_paren(l, d);
1623   case A_MEM:
1624     r = A_MEMG(a);
1625     d = A_DTYPEG(r);
1626     l = insert_forall_comm(A_PARENTG(a) /*, forall, std*/);
1627     return mk_member(l, r, d);
1628   case A_SUBSTR:
1629     return a;
1630   case A_INTR:
1631   case A_FUNC:
1632     nargs = A_ARGCNTG(a);
1633     argt = A_ARGSG(a);
1634     for (i = 0; i < nargs; ++i) {
1635       ARGT_ARG(argt, i) = insert_forall_comm(ARGT_ARG(argt, i));
1636     }
1637     /* remove cshift and eoshift, since they become overlap comm */
1638     if (A_OPTYPEG(a) == I_CSHIFT || A_OPTYPEG(a) == I_EOSHIFT) {
1639       src = ARGT_ARG(argt, 0);
1640       nd = A_OPT1G(comminfo.forall);
1641       cnt = FT_NRT(nd) - 2;
1642       commstd = glist(FT_RTL(nd), cnt);
1643       commasn = STD_AST(commstd);
1644       comm = A_SRCG(commasn);
1645       assert(A_TYPEG(comm) == A_HOVLPSHIFT,
1646              "insert_forall_comm: CSHIFT/EOSHIFT must be overlap", a, 2);
1647       nd2 = A_OPT1G(comm);
1648       FT_SHIFT_TYPE(nd2) = A_OPTYPEG(a);
1649       if (A_OPTYPEG(a) == I_EOSHIFT)
1650         FT_SHIFT_BOUNDARY(nd2) = ARGT_ARG(argt, 2);
1651       return src;
1652     }
1653     return a;
1654   case A_CNST:
1655   case A_CMPLXC:
1656     return a;
1657   case A_ID:
1658     return a;
1659   case A_SUBSCR:
1660     if (A_SHAPEG(a))
1661       return a;
1662     sptr = sptr_of_subscript(a);
1663     if (!ALIGNG(sptr)) {
1664       int parent;
1665       parent = A_LOPG(a);
1666       asd = A_ASDG(a);
1667       ndim = ASD_NDIM(asd);
1668       for (i = 0; i < ndim; i++) {
1669         subs[i] = insert_forall_comm(ASD_SUBS(asd, i));
1670       }
1671       parent = insert_forall_comm(parent);
1672       return mk_subscr(parent, subs, ndim, A_DTYPEG(a));
1673     }
1674 
1675     if (!A_SHAPEG(a) && is_array_element_in_forall(a, std)) {
1676       nd = A_OPT1G(forall);
1677       header = FT_HEADER(nd);
1678       /*             a = emit_get_scalar(a, header); */
1679       rhs_is_dist = FALSE;
1680       a = insert_comm_before(header, a, &rhs_is_dist, FALSE);
1681       return a;
1682     }
1683     /* don't generate communication iff lhs == rhs */
1684     lhs = A_DESTG(A_IFSTMTG(forall));
1685     if (lhs == a)
1686       return a;
1687 
1688     arref = A_RFPTRG(a);
1689 
1690     switch (ARREF_CLASS(arref)) {
1691     case NO_COMM:
1692       break;
1693     case OVERLAP:
1694       emit_overlap(a);
1695       break;
1696     case COPY_SECTION:
1697       a = emit_copy_section(a, std);
1698       break;
1699     case GATHER:
1700       a = emit_gatherx(a, std, FALSE);
1701       break;
1702     case IRREGULAR:
1703       /*		a = emit_irregular(a, std);*/
1704       break;
1705     default:
1706       interr("insert_forall_comm: unknown comm tag", std, 2);
1707       return 0;
1708     }
1709 
1710     return a;
1711 
1712   default:
1713     interr("insert_forall_comm: unknown expression", std, 2);
1714     return 0;
1715   }
1716 }
1717 
1718 static void
init_opt_tables(void)1719 init_opt_tables(void)
1720 {
1721   cs_table.is_used_lhs = FALSE;
1722 }
1723 
1724 /* return TRUE if the LHS variable can be used for this RHS communication
1725  * target */
1726 static LOGICAL
is_use_lhs(int a,LOGICAL sameidx,LOGICAL independent,int std)1727 is_use_lhs(int a, LOGICAL sameidx, LOGICAL independent, int std)
1728 {
1729   int lhs;
1730   int sptr, sptr_lhs;
1731   int list;
1732   int src;
1733   int aa, nextaa, alhs, nextalhs;
1734 
1735   if (cs_table.is_used_lhs)
1736     return FALSE;
1737   if (A_IFEXPRG(comminfo.forall))
1738     return FALSE;
1739   lhs = comminfo.sub;
1740   list = A_LISTG(comminfo.forall);
1741   src = A_SRCG(A_IFSTMTG(comminfo.forall));
1742   if (DTY(A_DTYPEG(a)) != DTY(A_DTYPEG(lhs)))
1743     return FALSE;
1744   if (sameidx && !is_same_number_of_idx(lhs, a, list))
1745     return FALSE;
1746   if (!independent && expr_dependent(a, lhs, std, std))
1747     return FALSE;
1748 
1749   cs_table.is_used_lhs = TRUE;
1750   return TRUE;
1751 } /* is_use_lhs */
1752 
1753 /* this is used to decide if section created for forall
1754  * to check whether index is out of bounds .
1755  * This does not occur iff:
1756  *     1-) forall from array-assignment or where statement
1757  *     2-) forall without mask
1758  */
1759 static LOGICAL
is_bogus_forall(int forall)1760 is_bogus_forall(int forall)
1761 {
1762   int mask;
1763 
1764   if (A_ARRASNG(forall))
1765     return FALSE;
1766   mask = A_IFEXPRG(forall);
1767   if (!mask)
1768     return FALSE;
1769   return TRUE;
1770 }
1771 
1772 static int
emit_copy_section(int a,int std)1773 emit_copy_section(int a, int std)
1774 {
1775   int ast;
1776   int astnew;
1777   int asn;
1778   int tempast;
1779   int tempast0;
1780   int i, j;
1781   int src, dest, lop;
1782   int forall;
1783   int list;
1784   int lhs;
1785   int allocstd;
1786   int startstd;
1787   int commstd;
1788   int sectlstd;
1789   int sectrstd;
1790   int cp, xfer;
1791   int nd;
1792   int sptr;
1793   int allocast;
1794   int order2[7];
1795   int no;
1796   int header;
1797   int lhssec;
1798   int sectflag;
1799   LOGICAL independent;
1800 
1801   forall = STD_AST(std);
1802   lhs = comminfo.sub;
1803   list = A_LISTG(forall);
1804   nd = A_OPT1G(forall);
1805   header = FT_HEADER(nd);
1806 
1807   sectflag = 0;
1808   if (is_bogus_forall(forall))
1809     sectflag |= BOGUSFLAG;
1810 
1811   if (!is_ordered(lhs, a, list, order2, &no)) {
1812     tempast = emit_permute_section(a, std);
1813     return tempast;
1814   }
1815 
1816   open_dynpragma(std, STD_LINENO(std));
1817   independent = (flg.x[19] & 0x100) != 0;
1818   close_pragma();
1819   sectlstd = 0;
1820   lhssec = 0;
1821   if (is_use_lhs(a, TRUE, independent, std)) {
1822     sptr = sptr_of_subscript(comminfo.sub);
1823     tempast = lhs;
1824     lhssec = tempast = forall_2_sec(tempast, forall);
1825     sectlstd = make_sec_ast(tempast, std, 0, sectflag);
1826     nd = A_OPT1G(forall);
1827     FT_SECTL(nd) = sectlstd;
1828   }
1829 
1830   sptr = temp_copy_section(std, forall, lhs, a,
1831                            DTY(DTYPEG(sptr_of_subscript(a)) + 1), &allocast);
1832   tempast0 = tempast = copy_section_temp_before(sptr, a, forall);
1833 
1834   allocstd = add_stmt_before(allocast, header);
1835   A_STDP(allocast, allocstd);
1836   nd = A_OPT1G(forall);
1837   plist(FT_RTL(nd), allocstd);
1838   FT_NRT(nd)++;
1839 
1840   tempast = forall_2_sec(tempast, forall);
1841   sectlstd = make_sec_ast(tempast, std, allocstd, sectflag);
1842 
1843   astnew = forall_2_sec(a, forall);
1844   sectrstd = make_sec_ast(astnew, std, 0, sectflag);
1845 
1846   asn = mk_stmt(A_ASN, astb.bnd.dtype);
1847   ast = new_node(A_HCOPYSECT);
1848   A_SRCP(ast, astnew);
1849   A_SDESCP(ast, 0);
1850   A_DESTP(ast, tempast);
1851   A_DDESCP(ast, 0);
1852   nd = mk_ftb();
1853   FT_STD(nd) = std;
1854   FT_FORALL(nd) = forall;
1855   FT_CCOPY_LHS(nd) = lhs;
1856   FT_CCOPY_RHS(nd) = a;
1857   FT_CCOPY_TSPTR(nd) = sptr;
1858   FT_CCOPY_SECTR(nd) = sectrstd;
1859   FT_CCOPY_SECTL(nd) = sectlstd;
1860   FT_CCOPY_ALLOC(nd) = allocstd;
1861   FT_CCOPY_FREE(nd) = header;
1862   FT_CCOPY_REUSE(nd) = 0;
1863   FT_CCOPY_USELHS(nd) = 0;
1864   FT_CCOPY_SAME(nd) = 0;
1865   FT_CCOPY_LHSSEC(nd) = lhssec;
1866   FT_CCOPY_NOTLHS(nd) = (lhssec) ? 0 : 1;
1867   A_OPT1P(ast, nd);
1868   cp = sym_get_cp();
1869   FT_CCOPY_OUT(nd) = cp;
1870   dest = mk_id(cp);
1871   A_DESTP(asn, dest);
1872   A_SRCP(asn, ast);
1873 
1874   commstd = add_stmt_before(asn, header);
1875   A_STDP(asn, commstd);
1876   nd = A_OPT1G(forall);
1877   plist(FT_RTL(nd), commstd);
1878   FT_NRT(nd)++;
1879 
1880   asn = mk_stmt(A_ASN, astb.bnd.dtype);
1881   ast = new_node(A_HCSTART);
1882   lop = mk_id(cp);
1883   A_LOPP(ast, lop);
1884   A_SRCP(ast, astnew);
1885   A_DESTP(ast, tempast);
1886   nd = mk_ftb();
1887   FT_STD(nd) = std;
1888   FT_FORALL(nd) = forall;
1889   FT_CSTART_COMM(nd) = commstd;
1890   FT_CSTART_RHS(nd) = a;
1891   FT_CSTART_USEDSTD(nd) = comminfo.usedstd;
1892   xfer = sym_get_xfer();
1893   FT_CSTART_OUT(nd) = xfer;
1894   FT_CSTART_SECTR(nd) = sectrstd;
1895   FT_CSTART_SECTL(nd) = sectlstd;
1896   FT_CSTART_ALLOC(nd) = allocstd;
1897   FT_CSTART_FREE(nd) = header;
1898   FT_CSTART_REF(nd) = tempast0;
1899   FT_CSTART_TYPE(nd) = A_HCOPYSECT;
1900   FT_CSTART_REUSE(nd) = 0;
1901   FT_CSTART_INVMVD(nd) = 0;
1902   FT_CSTART_USELHS(nd) = 0;
1903   FT_CSTART_SAME(nd) = 0;
1904   A_OPT1P(ast, nd);
1905   dest = mk_id(xfer);
1906   A_DESTP(asn, dest);
1907   A_SRCP(asn, ast);
1908 
1909   startstd = add_stmt_before(asn, header);
1910   A_STDP(asn, startstd);
1911   nd = A_OPT1G(forall);
1912   plist(FT_RTL(nd), startstd);
1913   FT_NRT(nd)++;
1914 
1915   return a;
1916 }
1917 
1918 /*
1919  * pghpf_permute_section(void *rb, void *sb, section *rs, section *ss, ...)
1920  *
1921  *chdr *
1922  * pghpf_comm_permute(void *rb, void *sb, section *rs, section *ss, ...)
1923  * ... = int x1, .., int xN,  where N = section rank
1924  * The axis arguments (x1, .., xN) is a permutation of the integers 1..N.
1925  * The permutation applies to the dimensions on the right hand side (like
1926  * a gather operation).
1927  * For example:
1928  *	forall (i=1:2, j=1:4, k=1:5) a(i,3,j,k) = b(k,i,j)
1929  *	pghpf_permute_section(a, b, a$s, b$s, 2, 3, 1)
1930  */
1931 static int
emit_permute_section(int a,int std)1932 emit_permute_section(int a, int std)
1933 {
1934   int sptr, sptrast;
1935   int asd;
1936   int ndim;
1937   int ast1;
1938   int subs[7];
1939   int astnew;
1940   int tempast, tempast0;
1941   int argt, nargs;
1942   int i, j;
1943   int src, dest;
1944   int forall;
1945   int list;
1946   int arref;
1947   int lhs;
1948   LOGICAL use_lhs;
1949   int order2[7];
1950   int no;
1951   int func;
1952   int new_a;
1953   int nd, header;
1954   int sectflag;
1955 
1956   forall = STD_AST(std);
1957   nd = A_OPT1G(forall);
1958   header = FT_HEADER(nd);
1959   lhs = comminfo.sub;
1960   list = A_LISTG(forall);
1961   asd = A_ASDG(comminfo.sub);
1962   ndim = ASD_NDIM(asd);
1963 
1964   sectflag = 0;
1965   if (is_bogus_forall(forall))
1966     sectflag |= BOGUSFLAG;
1967 
1968   if (cs_table.is_used_lhs) {
1969     use_lhs = FALSE;
1970   } else {
1971     use_lhs = is_use_lhs_final(a, forall, TRUE, FALSE, std);
1972   }
1973   if (use_lhs) {
1974     sptr = sptr_of_subscript(comminfo.sub);
1975     sptrast = A_LOPG(comminfo.sub);
1976     tempast = lhs;
1977     cs_table.is_used_lhs = TRUE;
1978   } else {
1979     new_a = eliminate_extra_idx(lhs, a, forall);
1980     sptr = get_temp_copy_section(forall, lhs, new_a, header, header, a);
1981     sptrast = mk_id(sptr);
1982     tempast0 = tempast = copy_section_temp_before(sptr, new_a, forall);
1983   }
1984 
1985   if (is_ordered(tempast, a, list, order2, &no)) {
1986     assert(0, "emit_permute_section: something is wrong", 3, a);
1987   }
1988 
1989   tempast = forall_2_sec(tempast, forall);
1990   dest = make_sec_from_ast(tempast, header, header, 0, sectflag);
1991 
1992   astnew = forall_2_sec(a, forall);
1993   src = make_sec_from_ast(astnew, header, header, 0, sectflag);
1994 
1995   nargs = 4 + no;
1996   func = mk_id(sym_mkfunc(mkRteRtnNm(RTE_permute_section), DT_NONE));
1997   NODESCP(A_SPTRG(func), 1);
1998 
1999   argt = mk_argt(nargs);
2000   ARGT_ARG(argt, 0) = sptrast;
2001   ARGT_ARG(argt, 1) = A_LOPG(a);
2002 
2003   ARGT_ARG(argt, 2) = check_member(sptrast, mk_id(dest));
2004   ARGT_ARG(argt, 3) = check_member(A_LOPG(a), mk_id(src));
2005 
2006   for (i = 0; i < no; i++)
2007     ARGT_ARG(argt, 4 + i) = mk_isz_cval(order2[i] + 1, astb.bnd.dtype);
2008 
2009   ast1 = mk_stmt(A_CALL, 0);
2010   A_LOPP(ast1, func);
2011   A_ARGCNTP(ast1, nargs);
2012   A_ARGSP(ast1, argt);
2013   add_stmt_before(ast1, header);
2014 
2015   /* temp will be referenced after communication as follows  */
2016   if (use_lhs)
2017     return lhs; /* forall is totally removed no need to access */
2018   else {
2019     process_rhs_sub(tempast0);
2020     return tempast0;
2021   }
2022 }
2023 
2024 /* This routine finds out the dimension of sptr.
2025  * It takes subscript a(f(i),5,f(j)). It eliminates scalar dimension.
2026  * It makes an ast for reference sptr.
2027  *  a(f(i),5,f(j)) --> sptr(i,j)
2028  */
2029 static int
copy_section_temp_before(int sptr,int rhs,int forall)2030 copy_section_temp_before(int sptr, int rhs, int forall)
2031 {
2032   int subs[7];
2033   int k, j;
2034   int asd;
2035   int ndim;
2036   int astnew;
2037   int astli;
2038   int nidx;
2039   int list;
2040 
2041   asd = A_ASDG(rhs);
2042   ndim = ASD_NDIM(asd);
2043   list = A_LISTG(forall);
2044 
2045   j = 0;
2046   /* array will be referenced after communication as follows  */
2047   for (k = 0; k < ndim; ++k) {
2048     astli = 0;
2049     nidx = 0;
2050     search_forall_idx(ASD_SUBS(asd, k), list, &astli, &nidx);
2051     if (nidx == 1 && astli) {
2052       /* include this dimension */
2053       subs[j] = mk_id(ASTLI_SPTR(astli));
2054       j++;
2055     }
2056   }
2057   assert(j == rank_of_sym(sptr), "copy_section_temp_before: rank mismatched",
2058          sptr, 4);
2059   astnew = mk_subscr(mk_id(sptr), subs, j, DTY(DTYPEG(sptr) + 1));
2060   return astnew;
2061 }
2062 
2063 /* It takes  forall(i=,j=,k=) a(i,j,k) =  b(j,i) , return a(i,j,1) */
2064 static int
eliminate_extra_idx(int lhs,int a,int forall)2065 eliminate_extra_idx(int lhs, int a, int forall)
2066 {
2067   int subs[7];
2068   int k, i;
2069   int asd;
2070   int ndim;
2071   int asd1;
2072   int ndim1;
2073   int astnew;
2074   int astli;
2075   int nidx;
2076   int list;
2077   LOGICAL found;
2078   int sptr;
2079 
2080   sptr = sptr_of_subscript(lhs);
2081   asd = A_ASDG(lhs);
2082   ndim = ASD_NDIM(asd);
2083   list = A_LISTG(forall);
2084 
2085   asd1 = A_ASDG(a);
2086   ndim1 = ASD_NDIM(asd1);
2087 
2088   for (k = 0; k < ndim; ++k) {
2089     subs[k] = ASD_SUBS(asd, k);
2090     astli = 0;
2091     nidx = 0;
2092     search_forall_idx(ASD_SUBS(asd, k), list, &astli, &nidx);
2093     if (nidx == 1 && astli) {
2094       found = FALSE;
2095       for (i = 0; i < ndim1; ++i)
2096         if (is_name_in_expr(ASD_SUBS(asd1, i), ASTLI_SPTR(astli)))
2097           found = TRUE;
2098       if (!found)
2099         subs[k] = astb.i1;
2100     }
2101   }
2102   astnew = mk_subscr(mk_id(sptr), subs, ndim, DTY(DTYPEG(sptr) + 1));
2103   return astnew;
2104 }
2105 
2106 /* This  routine is to find out how index is permuted at result
2107  * based on array. used by scatterx/gatherx to perform axis ordering.
2108  * It creates axis array for indirection subscripts.
2109  *
2110  * For an indirectly indexed dimension, the axis vector indicates which
2111  * combination of the index variables is used to subscript the index
2112  * vector.  The size of the axis vector is equal to the rank of the index
2113  * vector.  If the order of the index variables is not permuted, i.e. the
2114  * axis vector is (/1, 2, 3, .. N/), then the corresponding permuted bit
2115  * can be zeroed and the axis argument omitted.
2116 
2117  * For a directly indexed dimension, the axis argument indicates which
2118  * index variable is used to subscript that dimension.  If the axis
2119  * number matches the dimension number, then the corresponding permuted
2120  * bit can be zeroed and the axis argument omitted.
2121  */
2122 static void
permute_axis(int result,int array,int list,int permute[7])2123 permute_axis(int result, int array, int list, int permute[7])
2124 {
2125 
2126   int order2[7];
2127   int no;
2128   int subs[7];
2129   int newresult;
2130   int astli, nidx;
2131   int asd, ndim;
2132   int i, j;
2133   int per[7], per1[7];
2134   int nper1;
2135 
2136   for (i = 0; i < 7; i++)
2137     permute[i] = 0;
2138 
2139   /* find out for indirection array */
2140   asd = A_ASDG(result);
2141   ndim = ASD_NDIM(asd);
2142   for (i = 0; i < ndim; i++) {
2143     subs[i] = ASD_SUBS(asd, i);
2144     if (is_vector_subscript(subs[i], list)) {
2145       compute_permute(array, subs[i], list, per);
2146       if (is_permuted(subs[i], per, per1, &nper1))
2147         permute[i] = put_data(per1, nper1);
2148       subs[i] = mk_isz_cval(1, astb.bnd.dtype);
2149     }
2150   }
2151 
2152   /* find out after eliminating indirections */
2153 
2154   newresult = mk_subscr(A_LOPG(result), subs, ndim, A_DTYPEG(result));
2155   compute_permute(array, newresult, list, per);
2156 
2157   for (i = 0; i < ndim; i++) {
2158     subs[i] = ASD_SUBS(asd, i);
2159     if (per[i] == 0)
2160       continue;
2161     if (is_vector_subscript(subs[i], list))
2162       continue;
2163     permute[i] = mk_isz_cval(per[i], astb.bnd.dtype);
2164   }
2165 }
2166 
2167 static void
init_pertbl(void)2168 init_pertbl(void)
2169 {
2170   pertbl.size = 200;
2171   NEW(pertbl.base, TABLE, pertbl.size);
2172   pertbl.avl = 0;
2173 }
2174 
2175 static void
free_pertbl(void)2176 free_pertbl(void)
2177 {
2178   FREE(pertbl.base);
2179   pertbl.base = NULL;
2180 }
2181 
2182 static int
get_pertbl(void)2183 get_pertbl(void)
2184 {
2185   int nd;
2186 
2187   nd = pertbl.avl++;
2188   NEED(pertbl.avl, pertbl.base, TABLE, pertbl.size, pertbl.size + 100);
2189   if (nd > SPTR_MAX || pertbl.base == NULL)
2190     errfatal(7);
2191   return nd;
2192 }
2193 
2194 static int
put_data(int permute[7],int no)2195 put_data(int permute[7], int no)
2196 {
2197   ADSC *ad;
2198   int dtype;
2199   int i, j;
2200   int arr;
2201   LOGICAL found;
2202 
2203   assert(no, "put_data: something is wrong", no, 2);
2204 
2205   /* find about whether same axis array created before */
2206   for (i = 0; i < pertbl.avl; i++) {
2207     if (pertbl.base[i].f2 == no) {
2208       found = TRUE;
2209       for (j = 0; j < no; j++) {
2210         if (permute[j] != pertbl.base[i].f4[j])
2211           found = FALSE;
2212       }
2213       if (found)
2214         return mk_id(pertbl.base[i].f1);
2215     }
2216   }
2217 
2218   arr = sym_get_array("axis", 0, DT_INT, 1);
2219 
2220   i = get_pertbl();
2221   pertbl.base[i].f1 = arr;
2222   pertbl.base[i].f2 = no;
2223   for (j = 0; j < no; j++)
2224     pertbl.base[i].f4[j] = permute[j];
2225 
2226   ALLOCP(arr, 0);
2227   dtype = DTYPEG(arr);
2228   ad = AD_DPTR(dtype);
2229   AD_LWAST(ad, 0) = AD_LWBD(ad, 0) = 0;
2230   AD_NUMELM(ad) = AD_UPBD(ad, 0) = AD_UPAST(ad, 0) = AD_EXTNTAST(ad, 0) =
2231       mk_isz_cval(no, astb.bnd.dtype);
2232   AD_DEFER(ad) = 0;
2233   AD_NOBOUNDS(ad) = 0;
2234 
2235   dinit_put(DINIT_LOC, (INT)arr);
2236 
2237   dtype = DDTG(DTYPEG(arr));
2238 
2239   for (i = 0; i < no; i++) {
2240     if (DTY(DT_INT) == TY_INT8) {
2241       INT val[2];
2242       val[0] = 0;
2243       val[1] = permute[i];
2244       dinit_put(dtype, getcon(val, DT_INT8));
2245     } else
2246       dinit_put(dtype, permute[i]);
2247   }
2248   dinit_put(DINIT_END, 0);
2249   DINITP(arr, 1);
2250   sym_is_refd(arr);
2251 
2252   return mk_id(arr);
2253 }
2254 
2255 /*This routine calculates permute of rhs based on lhs
2256  * for example, lhs(i,2, j,k) rhs(3,k,i,j) then
2257  * permute will be /0,3,1,2/
2258  */
2259 static void
compute_permute(int lhs,int rhs,int list,int order[7])2260 compute_permute(int lhs, int rhs, int list, int order[7])
2261 {
2262   int asd, ndim;
2263   int i, j;
2264   int count, count1;
2265   int order1[7];
2266   LOGICAL found;
2267   int astli, nidx;
2268   int iloc;
2269 
2270   for (j = 0; j < 7; j++)
2271     order[j] = 0;
2272 
2273   assert(!is_duplicate(lhs, list), "compute_permute:something is wrong", lhs,
2274          3);
2275 
2276   /* rhs */
2277   asd = A_ASDG(rhs);
2278   ndim = ASD_NDIM(asd);
2279   count = 0;
2280   for (j = 0; j < ndim; ++j) {
2281     order[j] = 0;
2282     astli = 0;
2283     nidx = 0;
2284     search_forall_idx(ASD_SUBS(asd, j), list, &astli, &nidx);
2285     if (nidx == 1 && astli) {
2286       order[j] = ASTLI_SPTR(astli);
2287       count++;
2288     }
2289   }
2290 
2291   /* lhs */
2292   asd = A_ASDG(lhs);
2293   ndim = ASD_NDIM(asd);
2294   count1 = 0;
2295   for (j = 0; j < ndim; ++j) {
2296     astli = 0;
2297     nidx = 0;
2298     search_forall_idx(ASD_SUBS(asd, j), list, &astli, &nidx);
2299     if (nidx == 1 && astli) {
2300       order1[count1] = ASTLI_SPTR(astli);
2301       count1++;
2302     }
2303   }
2304 
2305   asd = A_ASDG(rhs);
2306   ndim = ASD_NDIM(asd);
2307   for (j = 0; j < ndim; j++) {
2308     if (order[j] == 0)
2309       continue;
2310     found = FALSE;
2311     for (i = 0; i < count1; i++) {
2312       if (order1[i] == order[j]) {
2313         found = TRUE;
2314         iloc = i + 1;
2315       }
2316     }
2317     assert(found, "compute_permute:something is wrong", lhs, 3);
2318     order[j] = iloc;
2319   }
2320 }
2321 
2322 static LOGICAL
is_permuted(int array,int per[7],int per1[7],int * nper1)2323 is_permuted(int array, int per[7], int per1[7], int *nper1)
2324 {
2325   int asd;
2326   int ndim;
2327   int count;
2328   int i;
2329   LOGICAL permuted;
2330 
2331   assert(A_TYPEG(array) == A_SUBSCR, "is_permuted: something is wrong", array,
2332          2);
2333 
2334   asd = A_ASDG(array);
2335   ndim = ASD_NDIM(asd);
2336   count = 0;
2337   for (i = 0; i < ndim; i++) {
2338     if (per[i]) {
2339       per1[count] = per[i];
2340       count++;
2341     }
2342   }
2343 
2344   permuted = FALSE;
2345   for (i = 0; i < count; i++) {
2346     if (per1[i] != (i + 1))
2347       permuted = TRUE;
2348   }
2349 
2350   *nper1 = count;
2351   return permuted;
2352 }
2353 
2354 static void
emit_sum_scatterx(int std)2355 emit_sum_scatterx(int std)
2356 {
2357   int sptr;
2358   int asd1;
2359   int ndim1;
2360   int ast1;
2361   int subs[7];
2362   int astnew;
2363   int tempast, tempast0;
2364   int argt, nargs;
2365   int i, j;
2366   int forall;
2367   int list;
2368   int vflag, pflag;
2369   int vdim, pdim;
2370   int nvec;
2371   int secv;
2372   ADSC *ad;
2373   int glb, gub;
2374   int asn;
2375   int mask;
2376   int result_sec, base_sec, array_sec, mask_sec;
2377   int result, newresult;
2378   int base;
2379   int array;
2380   int func;
2381   int permute[7];
2382   int npermute;
2383   int ndim, asd;
2384   int nv;
2385   int newbase;
2386   char name[40];
2387   int function, operator;
2388   int sectflag;
2389 
2390   forall = STD_AST(std);
2391   asn = A_IFSTMTG(forall);
2392 
2393   sectflag = 0;
2394 
2395   mask = comminfo.scat.mask;
2396   result = comminfo.scat.result;
2397   base = comminfo.scat.base;
2398   array = comminfo.scat.array;
2399   operator= comminfo.scat.operator;
2400   function = comminfo.scat.function;
2401   if (!base)
2402     return;
2403 
2404   if (!comminfo.scat.array_simple) {
2405     int sptrtemp, newforall, newlist, asn, newstd, newarray;
2406     struct comminfo savecomminfo;
2407     sptrtemp = get_temp_forall(forall, base, std, std, 0, array);
2408     newarray = simple_reference_for_temp(sptrtemp, base, forall);
2409     /* assign temp from nonsimple array */
2410     newforall = mk_stmt(A_FORALL, 0);
2411     A_LISTP(newforall, A_LISTG(forall));
2412     A_SRCP(newforall, A_SRCG(forall));
2413     asn = mk_stmt(A_ASN, 0);
2414     A_DESTP(asn, newarray);
2415     A_SRCP(asn, array);
2416     A_IFSTMTP(newforall, asn);
2417     newstd = add_stmt_before(newforall, std);
2418     array = newarray;
2419     savecomminfo = comminfo;
2420     process_forall(newstd);
2421     transform_forall(newstd, newforall);
2422     comminfo = savecomminfo;
2423   }
2424 
2425   sptr = sptr_of_subscript(result);
2426   list = A_LISTG(forall);
2427   asd1 = A_ASDG(result);
2428   ndim1 = ASD_NDIM(asd1);
2429 
2430   vflag = 0;
2431   vdim = 0;
2432   nvec = 0;
2433   j = 0;
2434   for (i = 0; i < ndim1; i++) {
2435     subs[i] = ASD_SUBS(asd1, i);
2436     if (is_scalar(ASD_SUBS(asd1, i), list))
2437       continue;
2438     if (is_vector_subscript(ASD_SUBS(asd1, i), list)) {
2439       ad = AD_DPTR(DTYPEG(sptr));
2440       glb = AD_LWAST(ad, i);
2441       gub = AD_UPAST(ad, i);
2442       subs[i] = mk_isz_cval(1, astb.bnd.dtype);
2443       vflag |= 1 << j;
2444       vdim |= 1 << i;
2445       nvec++;
2446     }
2447     j++;
2448   }
2449 
2450   permute_axis(result, array, list, permute);
2451 
2452   npermute = 0;
2453   pflag = 0;
2454   pdim = 0;
2455   j = 0;
2456   for (i = 0; i < ndim1; i++) {
2457     if (is_scalar(ASD_SUBS(asd1, i), list))
2458       continue;
2459     if (permute[i]) {
2460       pflag |= 1 << j;
2461       pdim |= 1 << i;
2462       npermute++;
2463     }
2464     j++;
2465   }
2466 
2467   if (nvec == ndim1)
2468     result_sec = DESCRG(sptr);
2469   else {
2470     newresult = mk_subscr(A_LOPG(result), subs, ndim1, A_DTYPEG(result));
2471     astnew = forall_2_sec(newresult, forall);
2472     /* change astnew for vector dimension */
2473     ad = AD_DPTR(DTYPEG(sptr_of_subscript(astnew)));
2474     asd1 = A_ASDG(astnew);
2475     ndim1 = ASD_NDIM(asd1);
2476     for (i = 0; i < ndim1; i++) {
2477       subs[i] = ASD_SUBS(asd1, i);
2478       if (getbit(vdim, i)) {
2479         glb = AD_LWAST(ad, i);
2480         gub = AD_UPAST(ad, i);
2481         subs[i] = mk_triple(glb, gub, 0);
2482       }
2483     }
2484     astnew = mk_subscr(A_LOPG(astnew), subs, ndim1, A_DTYPEG(astnew));
2485     result_sec = make_sec_from_ast(astnew, std, std, 0, sectflag | NOTSECTFLAG);
2486   }
2487 
2488   base_sec = result_sec;
2489 
2490   tempast = forall_2_sec(array, forall);
2491   array_sec = make_sec_from_ast(tempast, std, std, 0, sectflag);
2492 
2493   if (mask) {
2494     mask = forall_2_sec(mask, forall);
2495     mask_sec = make_sec_from_ast(mask, std, std, 0, sectflag);
2496     mask = A_LOPG(mask);
2497     mask_sec = mk_id(mask_sec);
2498   } else {
2499     mask = mk_cval(1, DT_LOG);
2500     mask_sec = mk_cval(dtype_to_arg(A_DTYPEG(mask)), DT_INT);
2501   }
2502 
2503   nargs = 2 * 4 + 1 + 1 + 2 * nvec + npermute;
2504   argt = mk_argt(nargs);
2505 
2506   ARGT_ARG(argt, 0) = A_LOPG(result);
2507   DESCUSEDP(sptr, 1);
2508   ARGT_ARG(argt, 1) = A_LOPG(array);
2509   ARGT_ARG(argt, 2) = A_LOPG(base);
2510   ARGT_ARG(argt, 3) = mask;
2511 
2512   /* sections */
2513   ARGT_ARG(argt, 4) = check_member(result, mk_id(result_sec));
2514   ARGT_ARG(argt, 5) = check_member(array, mk_id(array_sec));
2515   ARGT_ARG(argt, 6) = check_member(base, mk_id(base_sec));
2516   ARGT_ARG(argt, 7) = mask_sec;
2517 
2518   ARGT_ARG(argt, 8) = mk_cval(vflag, DT_INT);
2519   ARGT_ARG(argt, 9) = mk_cval(pflag, DT_INT);
2520   j = 10;
2521   asd1 = A_ASDG(result);
2522   ndim1 = ASD_NDIM(asd1);
2523   for (i = 0; i < ndim1; i++) {
2524     if (!is_scalar(ASD_SUBS(asd1, i), list) &&
2525         is_vector_subscript(ASD_SUBS(asd1, i), list)) {
2526       astnew = forall_2_sec(ASD_SUBS(asd1, i), forall);
2527       secv = make_sec_from_ast(astnew, std, std, 0, sectflag);
2528       ARGT_ARG(argt, j) = A_LOPG(ASD_SUBS(asd1, i));
2529       j++;
2530       ARGT_ARG(argt, j) = mk_id(secv);
2531       j++;
2532     }
2533     if (permute[i]) {
2534       ARGT_ARG(argt, j) = permute[i];
2535       j++;
2536     }
2537   }
2538   ast1 = mk_stmt(A_CALL, 0);
2539 
2540   func = 0;
2541   strcpy(name, "");
2542   if (operator) {
2543     switch (operator) {
2544     case OP_ADD:
2545       strcpy(name, mkRteRtnNm(RTE_sum_scatterx));
2546       break;
2547     case OP_MUL:
2548       strcpy(name, mkRteRtnNm(RTE_product_scatterx));
2549       break;
2550     case OP_LOR:
2551       strcpy(name, mkRteRtnNm(RTE_any_scatterx));
2552       break;
2553     case OP_LAND:
2554       strcpy(name, mkRteRtnNm(RTE_all_scatterx));
2555       break;
2556     case OP_LNEQV:
2557       strcpy(name, mkRteRtnNm(RTE_parity_scatterx));
2558       break;
2559     }
2560   }
2561   if (function) {
2562     switch (function) {
2563     case I_MAX:
2564       strcpy(name, mkRteRtnNm(RTE_maxval_scatterx));
2565       break;
2566     case I_MIN:
2567       strcpy(name, mkRteRtnNm(RTE_minval_scatterx));
2568       break;
2569     case I_IAND:
2570       strcpy(name, mkRteRtnNm(RTE_iall_scatterx));
2571       break;
2572     case I_IOR:
2573       strcpy(name, mkRteRtnNm(RTE_iany_scatterx));
2574       break;
2575     case I_IEOR:
2576       strcpy(name, mkRteRtnNm(RTE_iparity_scatterx));
2577       break;
2578     }
2579   }
2580 
2581   assert(strcmp(name, ""), "emit_sum_scatterx: something is wrong", std, 2);
2582   func = mk_id(sym_mkfunc(name, DT_NONE));
2583   A_LOPP(ast1, func);
2584   A_ARGCNTP(ast1, nargs);
2585   A_ARGSP(ast1, argt);
2586   add_stmt_before(ast1, std);
2587   NODESCP(memsym_of_ast(A_LOPG(ast1)), 1);
2588   STD_DELETE(std) = 1;
2589 }
2590 
2591 static void
emit_scatterx(int std)2592 emit_scatterx(int std)
2593 {
2594   int mask;
2595   int result;
2596   int array;
2597   int base;
2598 
2599   mask = comminfo.scat.mask;
2600   result = comminfo.scat.result;
2601   array = comminfo.scat.array;
2602   base = comminfo.scat.base;
2603 
2604   if (base)
2605     return;
2606 
2607   emit_scatterx_gatherx(std, result, array, mask, 0, 0, 0, A_HSCATTER);
2608 
2609   STD_DELETE(std) = 1;
2610 }
2611 
2612 static void
emit_scatterx_gatherx(int std,int result,int array,int mask,int allocstd,int tempast0,int lhssec,int comm_type)2613 emit_scatterx_gatherx(int std, int result, int array, int mask, int allocstd,
2614                       int tempast0, int lhssec, int comm_type)
2615 {
2616   int sptr, dest, lop;
2617   int asd1;
2618   int ndim1;
2619   int ast1;
2620   int subs[7];
2621   int astnew;
2622   int tempast;
2623   int argt, nargs;
2624   int i, j;
2625   int forall;
2626   int list;
2627   int vflag, pflag;
2628   int pdim, vdim;
2629   int nvec;
2630   int secv;
2631   ADSC *ad;
2632   int glb, gub;
2633   int asn;
2634   int result_sec, base_sec, array_sec, mask_sec;
2635   int newresult;
2636   int func;
2637   int permute[7];
2638   int npermute;
2639   int ndim, asd;
2640   int newbase;
2641   int nd;
2642   int header;
2643   int vsub, nvsub, newvsub;
2644   int vsub_sec, nvsub_sec;
2645   int commstd;
2646   int cp, xfer;
2647   int startstd;
2648   int ast;
2649   int v, sectvstd;
2650   int sectvsub, sectnvsub;
2651   int vsubstd, nvsubstd, maskstd;
2652   int lhs;
2653   int mask_id;
2654   int sectflag;
2655   INDEX_REUSE *irp;
2656   NEWVAR *nv;
2657   LOGICAL index_reuse;
2658   int index_reuse_condvar;
2659   int ifstd;
2660 
2661   forall = STD_AST(std);
2662   asn = A_IFSTMTG(forall);
2663   lhs = A_DESTG(asn);
2664   nd = A_OPT1G(forall);
2665   header = FT_HEADER(nd);
2666 
2667   sectflag = 0;
2668 
2669   if (comm_type == A_HGATHER) {
2670     vsub = array;
2671     nvsub = result;
2672     func = mk_id(sym_mkfunc(mkRteRtnNm(RTE_comm_gatherx), DT_ADDR));
2673   } else if (comm_type == A_HSCATTER) {
2674     vsub = result;
2675     nvsub = array;
2676     func = mk_id(sym_mkfunc(mkRteRtnNm(RTE_comm_scatterx), DT_ADDR));
2677   }
2678 
2679   sptr = memsym_of_ast(vsub);
2680   list = A_LISTG(forall);
2681   asd1 = A_ASDG(vsub);
2682   ndim1 = ASD_NDIM(asd1);
2683 
2684   index_reuse = FALSE;
2685   open_dynpragma(std, STD_LINENO(std));
2686   for (irp = direct.index_reuse_list; irp; irp = irp->next) {
2687     for (nv = irp->reuse_list; nv; nv = nv->next) {
2688       if (sptr == nv->var) {
2689         index_reuse = TRUE;
2690         index_reuse_condvar = irp->condvar;
2691         goto found_index_reuse;
2692       }
2693     }
2694   }
2695 found_index_reuse:
2696   close_pragma();
2697 
2698   vflag = 0;
2699   vdim = 0;
2700   nvec = 0;
2701   j = 0;
2702   for (i = 0; i < ndim1; i++) {
2703     subs[i] = ASD_SUBS(asd1, i);
2704     if (is_scalar(ASD_SUBS(asd1, i), list))
2705       continue;
2706     if (is_vector_subscript(ASD_SUBS(asd1, i), list)) {
2707       ad = AD_DPTR(DTYPEG(sptr));
2708       glb = AD_LWAST(ad, i);
2709       gub = AD_UPAST(ad, i);
2710       subs[i] = mk_isz_cval(1, astb.bnd.dtype);
2711       vflag |= 1 << j;
2712       vdim |= 1 << i;
2713       nvec++;
2714     }
2715     j++;
2716   }
2717 
2718   permute_axis(vsub, nvsub, list, permute);
2719 
2720   npermute = 0;
2721   pflag = 0;
2722   pdim = 0;
2723   j = 0;
2724   for (i = 0; i < ndim1; i++) {
2725     if (is_scalar(ASD_SUBS(asd1, i), list))
2726       continue;
2727     if (permute[i]) {
2728       pflag |= 1 << j;
2729       pdim |= 1 << i;
2730       npermute++;
2731     }
2732     j++;
2733   }
2734 
2735   newvsub = mk_subscr(A_LOPG(vsub), subs, ndim1, A_DTYPEG(vsub));
2736   astnew = forall_2_sec(newvsub, forall);
2737   /* change astnew for vector dimension */
2738   ad = AD_DPTR(DTYPEG(memsym_of_ast(astnew)));
2739   asd1 = A_ASDG(astnew);
2740   ndim1 = ASD_NDIM(asd1);
2741   for (i = 0; i < ndim1; i++) {
2742     subs[i] = ASD_SUBS(asd1, i);
2743     if (getbit(vdim, i)) {
2744       glb = AD_LWAST(ad, i);
2745       gub = AD_UPAST(ad, i);
2746       subs[i] = mk_triple(glb, gub, 0);
2747     }
2748   }
2749   newvsub = mk_subscr(A_LOPG(astnew), subs, ndim1, DTYPEG(sptr));
2750   vsubstd = make_sec_ast(newvsub, std, 0, sectflag | NOREINDEX);
2751 
2752   nvsub = forall_2_sec(nvsub, forall);
2753   nvsubstd = make_sec_ast(nvsub, std, allocstd, sectflag);
2754 
2755   if (mask && !comminfo.mask_phase) {
2756     mask = forall_2_sec(mask, forall);
2757     maskstd = make_sec_ast(mask, std, 0, sectflag);
2758     mask_id = mk_id(memsym_of_ast(mask));
2759   } else {
2760     mask = 0;
2761     mask_id = 0;
2762     maskstd = 0;
2763   }
2764 
2765   asn = mk_stmt(A_ASN, astb.bnd.dtype);
2766   ast = new_node(A_HGATHER);
2767   A_SRCP(ast, A_LOPG(result));
2768   A_SDESCP(ast, 0);
2769   A_DESTP(ast, A_LOPG(array));
2770   A_DDESCP(ast, 0);
2771   A_MASKP(ast, mask_id);
2772   A_MDESCP(ast, 0);
2773   A_BVECTP(ast, 0);
2774   nd = mk_ftb();
2775   FT_STD(nd) = std;
2776   FT_FORALL(nd) = forall;
2777   FT_CGATHER_VSUB(nd) = newvsub;
2778   FT_CGATHER_NVSUB(nd) = nvsub;
2779   FT_CGATHER_MASK(nd) = mask;
2780   FT_CGATHER_SECTVSUB(nd) = vsubstd;
2781   FT_CGATHER_SECTNVSUB(nd) = nvsubstd;
2782   FT_CGATHER_SECTM(nd) = maskstd;
2783   FT_CGATHER_ALLOC(nd) = allocstd;
2784   FT_CGATHER_FREE(nd) = header;
2785   FT_CGATHER_REUSE(nd) = 0;
2786   FT_CGATHER_INDEXREUSE(nd) = index_reuse;
2787   FT_CGATHER_USELHS(nd) = 0;
2788   FT_CGATHER_LHS(nd) = lhs;
2789   FT_CGATHER_RHS(nd) = array;
2790   FT_CGATHER_SAME(nd) = 0;
2791   FT_CGATHER_VFLAG(nd) = vflag;
2792   FT_CGATHER_PFLAG(nd) = pflag;
2793   FT_CGATHER_VDIM(nd) = vdim;
2794   FT_CGATHER_PDIM(nd) = pdim;
2795   FT_CGATHER_NVEC(nd) = nvec;
2796   FT_CGATHER_NPER(nd) = npermute;
2797   FT_CGATHER_TYPE(nd) = comm_type;
2798   FT_CGATHER_LHSSEC(nd) = lhssec;
2799   FT_CGATHER_NOTLHS(nd) = (lhssec) ? 0 : 1;
2800   j = 8;
2801   asd1 = A_ASDG(vsub);
2802   ndim1 = ASD_NDIM(asd1);
2803   for (i = 0; i < ndim1; i++) {
2804     FT_CGATHER_SECTV(nd, i) = 0;
2805     FT_CGATHER_V(nd, i) = 0;
2806     FT_CGATHER_PERMUTE(nd, i) = 0;
2807     if (!is_scalar(ASD_SUBS(asd1, i), list) &&
2808         is_vector_subscript(ASD_SUBS(asd1, i), list)) {
2809       v = forall_2_sec(ASD_SUBS(asd1, i), forall);
2810       sectvstd = make_sec_ast(v, std, 0, sectflag);
2811       v = ASD_SUBS(asd1, i);
2812       FT_CGATHER_SECTV(nd, i) = sectvstd;
2813       assert(A_TYPEG(v) == A_SUBSCR,
2814              "emit_scatterx_gatherx: non-subscript in gather", A_TYPEG(v), 4);
2815       FT_CGATHER_V(nd, i) = A_LOPG(v);
2816     }
2817     if (permute[i]) {
2818       FT_CGATHER_PERMUTE(nd, i) = permute[i];
2819     }
2820   }
2821 
2822   A_OPT1P(ast, nd);
2823   cp = sym_get_cp();
2824   FT_CGATHER_OUT(nd) = cp;
2825   dest = mk_id(cp);
2826   A_DESTP(asn, dest);
2827   A_SRCP(asn, ast);
2828 
2829   if (index_reuse) {
2830     /*
2831      * 'vsub appeared in a JAHPF INDEX_REUSE directive:
2832      * 	!hpfj index_reuse [(<condition>)] vsub...
2833      *
2834      * Enclose the 'pghpf_comm_gatherx/scatterx' call in a
2835      * conditional as follows:
2836      * (i) if no <condition> is specified:
2837      *
2838      * 	if (cp == 0) then
2839      * 	  cp = pghpf_comm_gatherx/scatterx(...)
2840      * 	endif
2841      *
2842      * (ii) if <condition> is specified:
2843      *
2844      * 	if (cp == 0 .or. .not. <condition>) then
2845      * 	  if (cp /= 0) then
2846      * 	    call pghpf_comm_free(1,cp)
2847      * 	  endif
2848      * 	  cp = pghpf_comm_gatherx/scatterx(...)
2849      * 	endif
2850      */
2851     SAVEP(cp, 1);
2852     ast = mk_stmt(A_IFTHEN, 0);
2853     ast1 = mk_binop(OP_EQ, mk_id(cp), mk_convert(astb.i0, DT_ADDR), DT_LOG);
2854     if (index_reuse_condvar) {
2855       ast1 = mk_binop(OP_LOR, ast1,
2856                       mk_unop(OP_LNOT, mk_id(index_reuse_condvar), DT_LOG),
2857                       DT_LOG);
2858     }
2859     A_IFEXPRP(ast, ast1);
2860     ifstd = add_stmt_before(ast, header);
2861     A_STDP(ast, ifstd);
2862 
2863     if (index_reuse_condvar) {
2864       int predicate = mk_binop(OP_NE, mk_id(cp), mk_convert(astb.i0, DT_ADDR),
2865                                DT_LOG);
2866       int func = mk_id(sym_mkfunc(mkRteRtnNm(RTE_comm_free), DT_NONE));
2867       ast = mk_stmt(A_IFTHEN, 0);
2868       A_IFEXPRP(ast, predicate);
2869       ifstd = add_stmt_before(ast, header);
2870       A_STDP(ast, ifstd);
2871 
2872       argt = mk_argt(2);
2873       ARGT_ARG(argt, 0) = astb.i1;
2874       ARGT_ARG(argt, 1) = mk_id(cp);
2875       ast = mk_stmt(A_CALL, 0);
2876       A_LOPP(ast, func);
2877       NODESCP(A_SPTRG(A_LOPG(ast)), 1);
2878       A_ARGCNTP(ast, 2);
2879       A_ARGSP(ast, argt);
2880       ifstd = add_stmt_before(ast, header);
2881       A_STDP(ast, ifstd);
2882 
2883       ast = mk_stmt(A_ENDIF, 0);
2884       ifstd = add_stmt_before(ast, header);
2885       A_STDP(ast, ifstd);
2886     }
2887   }
2888 
2889   commstd = add_stmt_before(asn, header);
2890   A_STDP(asn, commstd);
2891   nd = A_OPT1G(forall);
2892   plist(FT_RTL(nd), commstd);
2893   FT_NRT(nd)++;
2894 
2895   if (index_reuse) {
2896     ast = mk_stmt(A_ENDIF, 0);
2897     ifstd = add_stmt_before(ast, header);
2898     A_STDP(ast, ifstd);
2899   }
2900 
2901   asn = mk_stmt(A_ASN, astb.bnd.dtype);
2902   ast = new_node(A_HCSTART);
2903   lop = mk_id(cp);
2904   A_LOPP(ast, lop);
2905   A_SRCP(ast, array);
2906   A_DESTP(ast, result);
2907   nd = mk_ftb();
2908   FT_STD(nd) = std;
2909   FT_FORALL(nd) = forall;
2910   FT_CSTART_COMM(nd) = commstd;
2911   FT_CSTART_RHS(nd) = array;
2912   FT_CSTART_USEDSTD(nd) = comminfo.usedstd;
2913   xfer = sym_get_xfer();
2914   FT_CSTART_OUT(nd) = xfer;
2915   FT_CSTART_SECTL(nd) = vsubstd;
2916   FT_CSTART_SECTR(nd) = nvsubstd;
2917   FT_CSTART_ALLOC(nd) = allocstd;
2918   FT_CSTART_FREE(nd) = header;
2919   FT_CSTART_REF(nd) = tempast0;
2920   FT_CSTART_TYPE(nd) = comm_type;
2921   FT_CSTART_REUSE(nd) = 0;
2922   FT_CSTART_INVMVD(nd) = 0;
2923   FT_CSTART_USELHS(nd) = 0;
2924   FT_CSTART_SAME(nd) = 0;
2925   A_OPT1P(ast, nd);
2926   dest = mk_id(xfer);
2927   A_DESTP(asn, dest);
2928   A_SRCP(asn, ast);
2929 
2930   startstd = add_stmt_before(asn, header);
2931   A_STDP(asn, startstd);
2932   nd = A_OPT1G(forall);
2933   plist(FT_RTL(nd), startstd);
2934   FT_NRT(nd)++;
2935 }
2936 
2937 static int
emit_gatherx(int a,int std,LOGICAL opt)2938 emit_gatherx(int a, int std, LOGICAL opt)
2939 {
2940   int sptr;
2941   int asd1;
2942   int ndim1;
2943   int ast1;
2944   int astnew;
2945   int tempast, tempast0;
2946   int src, dest;
2947   int forall;
2948   int list;
2949   int lhs;
2950   LOGICAL use_lhs;
2951   int mask;
2952   int nd, header;
2953   int allocast, allocstd;
2954   int sectlstd;
2955   int lhssec;
2956   int sectflag;
2957   LOGICAL independent;
2958 
2959   forall = STD_AST(std);
2960   nd = A_OPT1G(forall);
2961   header = FT_HEADER(nd);
2962   lhs = comminfo.sub;
2963   mask = A_IFEXPRG(forall);
2964   list = A_LISTG(forall);
2965   asd1 = A_ASDG(a);
2966   ndim1 = ASD_NDIM(asd1);
2967 
2968   sectflag = 0;
2969 
2970   open_dynpragma(std, STD_LINENO(std));
2971   independent = (flg.x[19] & 0x100) != 0;
2972   close_pragma();
2973 
2974   sectlstd = 0;
2975   lhssec = 0;
2976   if (is_use_lhs(a, FALSE, independent, std)) {
2977     sptr = memsym_of_ast(comminfo.sub);
2978     tempast = lhs;
2979     lhssec = tempast = forall_2_sec(tempast, forall);
2980     sectlstd = make_sec_ast(tempast, std, 0, sectflag | NOREINDEX);
2981     nd = A_OPT1G(forall);
2982     FT_SECTL(nd) = sectlstd;
2983   }
2984 
2985   sptr = temp_gatherx(std, forall, lhs, lhs, DTY(DTYPEG(memsym_of_ast(a)) + 1),
2986                       &allocast);
2987   tempast0 = tempast = gatherx_temp_before(sptr, lhs, forall);
2988 
2989   allocstd = add_stmt_before(allocast, header);
2990   A_STDP(allocast, allocstd);
2991   nd = A_OPT1G(forall);
2992   plist(FT_RTL(nd), allocstd);
2993   FT_NRT(nd)++;
2994 
2995   emit_scatterx_gatherx(std, tempast, a, mask, allocstd, tempast0, lhssec,
2996                         A_HGATHER);
2997   return a;
2998 }
2999 
3000 /* Algorithm:
3001  * This will choice the bigest overlap shift at each dimension
3002  * among the same array in the set.
3003  * Store overlap_shift value in array symbol table.
3004  * mark the all OVERLAP as  NO_COMM but the first one.
3005  */
3006 static void
opt_overlap(void)3007 opt_overlap(void)
3008 {
3009   int i;
3010   int arr, arr1;
3011   int subinfo1, ndim;
3012   int subinfo;
3013   int align;
3014   int nargs, ast1, argt;
3015   int first;
3016   int nd, nd1;
3017   int sptr, sptr1;
3018 
3019   /* Now compute the total overlap-shift for each separate array symbol */
3020   for (arr = trans.rhsbase; arr != 0; arr = ARREF_NEXT(arr)) {
3021     if (ARREF_CLASS(arr) != OVERLAP)
3022       continue;
3023     align = ALIGNG(ARREF_ARRSYM(arr));
3024     for (arr1 = arr; arr1 != 0; arr1 = ARREF_NEXT(arr1)) {
3025       sptr = ARREF_ARRSYM(arr);
3026       sptr1 = ARREF_ARRSYM(arr1);
3027       if (ARREF_ARRSYM(arr1) != ARREF_ARRSYM(arr))
3028         continue;
3029       /* find out shift values and store union of them into subinfo */
3030       subinfo = ARREF_SUB(arr);
3031       subinfo1 = ARREF_SUB(arr1);
3032       ndim = ARREF_NDIM(arr1);
3033       for (i = 0; i < ndim; ++i) {
3034         int v;
3035         if (SUBI_COMMT(subinfo1 + i) != COMMT_SHIFTC)
3036           continue;
3037         if ((v = SUBI_COMMV(subinfo1 + i)) < 0) {
3038           v = -v;
3039           if (v > SUBI_NOP(subinfo + i)) {
3040             SUBI_NOP(subinfo + i) = v;
3041             SUBI_NOP(subinfo1 + i) = v;
3042           }
3043 
3044         } else {
3045           if (v > SUBI_POP(subinfo + i)) {
3046             SUBI_POP(subinfo + i) = v;
3047             SUBI_POP(subinfo1 + i) = v;
3048           }
3049         }
3050       }
3051 
3052       if (flg.ipa) {
3053         /* allow common block overlap increase */
3054         if ((ARGG(sptr1) && SCG(sptr) != SC_CMBLK) || SCG(sptr1) == SC_DUMMY)
3055           continue;
3056       } else {
3057         if (ARGG(sptr1) || SCG(sptr1) == SC_DUMMY || SCG(sptr) == SC_CMBLK)
3058           continue;
3059       }
3060 
3061       ARREF_FLAG(arr1) = 2;
3062       subinfo = ARREF_SUB(arr);
3063       subinfo1 = ARREF_SUB(arr1);
3064       ndim = ARREF_NDIM(arr1);
3065     }
3066   }
3067 }
3068 
3069 static void
emit_overlap(int a)3070 emit_overlap(int a)
3071 {
3072   int align, sdesc, dest, lop;
3073   int arr;
3074   int asd, ndim;
3075   int astnew;
3076   int asn;
3077   int i;
3078   int startstd;
3079   int commstd;
3080   int cp, xfer;
3081   int nd;
3082   int sptr;
3083   int subs[7];
3084   int forall;
3085   int std;
3086   int ns, ps;
3087   int ast;
3088   int header;
3089   int subinfo;
3090   int arref;
3091 
3092   std = comminfo.std;
3093   forall = STD_AST(std);
3094   nd = A_OPT1G(forall);
3095   header = FT_HEADER(nd);
3096   /* put out the shift call for this symbol */
3097   arr = A_LOPG(a);
3098   sptr = memsym_of_ast(arr);
3099   align = ALIGNG(sptr);
3100   asd = A_ASDG(a);
3101   ndim = ASD_NDIM(asd);
3102   arref = A_RFPTRG(a);
3103   subinfo = ARREF_SUB(arref);
3104 
3105   DESCUSEDP(sptr, 1);
3106   for (i = 0; i < ndim; ++i) {
3107     ns = mk_isz_cval(SUBI_NOP(subinfo + i), astb.bnd.dtype);
3108     ps = mk_isz_cval(SUBI_POP(subinfo + i), astb.bnd.dtype);
3109     subs[i] = mk_triple(ps, ns, 0);
3110   }
3111   astnew = mk_subscr(arr, subs, ndim, DTYPEG(sptr));
3112 
3113   asn = mk_stmt(A_ASN, astb.bnd.dtype);
3114   ast = new_node(A_HOVLPSHIFT);
3115   A_SRCP(ast, astnew);
3116   sdesc = check_member(arr, mk_id(DESCRG(sptr)));
3117   A_SDESCP(ast, sdesc);
3118   nd = mk_ftb();
3119   FT_STD(nd) = std;
3120   FT_FORALL(nd) = forall;
3121   FT_SHIFT_RHS(nd) = a;
3122   FT_SHIFT_FREE(nd) = header;
3123   FT_SHIFT_REUSE(nd) = 0;
3124   FT_SHIFT_SAME(nd) = 0;
3125   FT_SHIFT_TYPE(nd) = 0;
3126   FT_SHIFT_BOUNDARY(nd) = 0;
3127   A_OPT1P(ast, nd);
3128   cp = sym_get_cp();
3129   FT_SHIFT_OUT(nd) = cp;
3130   dest = mk_id(cp);
3131   A_DESTP(asn, dest);
3132   A_SRCP(asn, ast);
3133 
3134   commstd = add_stmt_before(asn, header);
3135   A_STDP(asn, commstd);
3136   nd = A_OPT1G(forall);
3137   plist(FT_RTL(nd), commstd);
3138   FT_NRT(nd)++;
3139 
3140   asn = mk_stmt(A_ASN, astb.bnd.dtype);
3141   ast = new_node(A_HCSTART);
3142   lop = mk_id(cp);
3143   A_LOPP(ast, lop);
3144   A_SRCP(ast, astnew);
3145   A_DESTP(ast, astnew);
3146   nd = mk_ftb();
3147   FT_STD(nd) = std;
3148   FT_FORALL(nd) = forall;
3149   FT_CSTART_COMM(nd) = commstd;
3150   FT_CSTART_RHS(nd) = a;
3151   FT_CSTART_USEDSTD(nd) = comminfo.usedstd;
3152   xfer = sym_get_xfer();
3153   FT_CSTART_OUT(nd) = xfer;
3154   FT_CSTART_SECTL(nd) = 0;
3155   FT_CSTART_SECTR(nd) = 0;
3156   FT_CSTART_ALLOC(nd) = 0;
3157 
3158   FT_CSTART_FREE(nd) = header;
3159   FT_CSTART_REF(nd) = 0;
3160   FT_CSTART_TYPE(nd) = A_HOVLPSHIFT;
3161   FT_CSTART_REUSE(nd) = 0;
3162   FT_CSTART_INVMVD(nd) = 0;
3163   FT_CSTART_USELHS(nd) = 0;
3164   FT_CSTART_SAME(nd) = 0;
3165   A_OPT1P(ast, nd);
3166   dest = mk_id(xfer);
3167   A_DESTP(asn, dest);
3168   A_SRCP(asn, ast);
3169 
3170   startstd = add_stmt_before(asn, header);
3171   A_STDP(asn, startstd);
3172   nd = A_OPT1G(forall);
3173   plist(FT_RTL(nd), startstd);
3174   FT_NRT(nd)++;
3175 }
3176 
3177 static CTYPE *
getcyclic(void)3178 getcyclic(void)
3179 {
3180   int i;
3181   CTYPE *ct;
3182   ct = (CTYPE *)getitem(FORALL_AREA, sizeof(CTYPE));
3183   ct->lhs = 0;
3184   ct->ifast = 0;
3185   ct->endifast = 0;
3186   ct->inner_cyclic = clist();
3187   for (i = 0; i < 7; i++) {
3188     ct->c_lof[i] = 0;
3189     ct->c_dupl[i] = 0;
3190     ct->idx[i] = 0;
3191     ct->cb_init[i] = 0;
3192     ct->cb_do[i] = 0;
3193     ct->cb_block[i] = 0;
3194     ct->cb_inc[i] = 0;
3195     ct->cb_enddo[i] = 0;
3196     ct->c_init[i] = 0;
3197     ct->c_inc[i] = 0;
3198   }
3199   return ct;
3200 }
3201 
3202 static int
shape_comm_in_expr(int expr,int forall,int std,int nomask)3203 shape_comm_in_expr(int expr, int forall, int std, int nomask)
3204 {
3205   int l, r, d, o;
3206   int l1, l2, l3;
3207   int i, nargs, argt, j;
3208   int lhs, sptr;
3209 
3210   if (expr == 0)
3211     return expr;
3212   switch (A_TYPEG(expr)) {
3213   /* expressions */
3214   case A_BINOP:
3215     o = A_OPTYPEG(expr);
3216     d = A_DTYPEG(expr);
3217     l = shape_comm_in_expr(A_LOPG(expr), forall, std, nomask);
3218     r = shape_comm_in_expr(A_ROPG(expr), forall, std, nomask);
3219     if (l == A_LOPG(expr) && r == A_ROPG(expr))
3220       return expr;
3221     return mk_binop(o, l, r, d);
3222   case A_UNOP:
3223     o = A_OPTYPEG(expr);
3224     d = A_DTYPEG(expr);
3225     l = shape_comm_in_expr(A_LOPG(expr), forall, std, nomask);
3226     if (l == A_LOPG(expr))
3227       return expr;
3228     return mk_unop(o, l, d);
3229   case A_CONV:
3230     d = A_DTYPEG(expr);
3231     l = shape_comm_in_expr(A_LOPG(expr), forall, std, nomask);
3232     if (l == A_LOPG(expr))
3233       return expr;
3234     return mk_convert(l, d);
3235   case A_PAREN:
3236     d = A_DTYPEG(expr);
3237     l = shape_comm_in_expr(A_LOPG(expr), forall, std, nomask);
3238     if (l == A_LOPG(expr))
3239       return expr;
3240     return mk_paren(l, d);
3241   case A_SUBSTR:
3242     return expr;
3243   case A_INTR:
3244   case A_FUNC:
3245     /* size & present intrinsics do not need the array content,
3246      * no need to communicate
3247      */
3248     o = A_OPTYPEG(expr);
3249     if (o == I_SIZE || o == I_PRESENT)
3250       return expr;
3251     nargs = A_ARGCNTG(expr);
3252     argt = A_ARGSG(expr);
3253     for (i = 0; i < nargs; ++i) {
3254       ARGT_ARG(argt, i) =
3255           shape_comm_in_expr(ARGT_ARG(argt, i), forall, std, nomask);
3256     }
3257     return expr;
3258   case A_CNST:
3259   case A_CMPLXC:
3260     return expr;
3261   case A_MEM:
3262     if (!A_SHAPEG(expr))
3263       return expr;
3264     sptr = A_SPTRG(A_MEMG(expr));
3265     r = A_MEMG(expr);
3266     d = A_DTYPEG(r);
3267     l = shape_comm_in_expr(A_PARENTG(expr), forall, std, nomask);
3268     if (l == A_PARENTG(expr))
3269       return expr;
3270     return mk_member(l, r, d);
3271   case A_ID:
3272   case A_SUBSCR:
3273     if (!A_SHAPEG(expr))
3274       return expr;
3275     lhs = A_DESTG(A_IFSTMTG(forall));
3276     expr = convert_subscript(expr);
3277     return expr;
3278   default:
3279     interr("shape_comm_in_expr: unknown expression", expr, 2);
3280     return expr;
3281   }
3282 }
3283 
3284 static void
shape_communication(int std,int forall)3285 shape_communication(int std, int forall)
3286 {
3287   int nd;
3288   int i;
3289   int cstd;
3290   int expr;
3291   int asn;
3292   int rhs;
3293 
3294   /* handle shape communication at forall first a(i) = pure_func(b) */
3295   expr = A_IFEXPRG(forall);
3296   asn = A_IFSTMTG(forall);
3297   rhs = A_SRCG(asn);
3298   rhs = shape_comm_in_expr(rhs, forall, std, 1);
3299   expr = shape_comm_in_expr(expr, forall, std, 1);
3300   A_SRCP(asn, rhs);
3301   A_IFEXPRP(forall, expr);
3302 
3303   /* handle shape communication at calls second */
3304   nd = A_OPT1G(forall);
3305   for (i = 0; i < FT_NMCALL(nd); i++) {
3306     cstd = glist(FT_MCALL(nd), i);
3307     shape_comm(cstd, std, forall);
3308   }
3309 
3310   for (i = 0; i < FT_NSCALL(nd); i++) {
3311     cstd = glist(FT_SCALL(nd), i);
3312     shape_comm(cstd, std, forall);
3313   }
3314 }
3315 
3316 static void
shape_comm(int cstd,int fstd,int forall)3317 shape_comm(int cstd, int fstd, int forall)
3318 {
3319   int ast, ast1;
3320   int cstd1;
3321   int nd, nd1;
3322   int i;
3323   int nargs, argt;
3324   int lhs;
3325   int arg;
3326 
3327   ast = STD_AST(cstd);
3328   nd = A_OPT1G(ast);
3329   assert(nd, "call_comm: something is wrong", ast, 3);
3330   for (i = 0; i < FT_CALL_NCALL(nd); i++) {
3331     cstd1 = glist(FT_CALL_CALL(nd), i);
3332     ast1 = STD_AST(cstd1);
3333     nd1 = A_OPT1G(ast1);
3334     assert(nd1, "put_calls: something is wrong", ast1, 3);
3335     shape_comm(cstd1, fstd, forall);
3336   }
3337   nargs = A_ARGCNTG(ast);
3338   argt = A_ARGSG(ast);
3339   for (i = 0; i < nargs; ++i) {
3340     arg = ARGT_ARG(argt, i);
3341     if (!A_SHAPEG(arg))
3342       continue;
3343     lhs = A_DESTG(A_IFSTMTG(forall));
3344     assert(A_TYPEG(arg) == A_SUBSCR || A_TYPEG(arg) == A_ID ||
3345                A_TYPEG(arg) == A_MEM,
3346            "shape_comm: array expression is not supported", arg, 3);
3347     arg = convert_subscript(arg);
3348   }
3349 }
3350 
3351 /* The function of this routine is to handle communication of arg.
3352  * This arg is from PURE function and it has shape. It will try to
3353  * bring to lhs of forall. Distribution of TMP will be based on LHS.
3354  * However, the size and shape of TMP will be based on both LHS and arg.
3355  * There are three rules for TMP:
3356  *        1-) heading dimensions size and distribution from LHS
3357  *        2-) tailling dimensions size from shape of arg with no distribution
3358  *        3-) remove idx from forall list if it does not appear at arg or mask
3359  * For example: (assume that a, b have different distributions.
3360  *      forall(i=1:n) a(i)= sum(b(i,iloc(i),:))
3361  * will be
3362  *      forall(i=1:n) tmp(i,:) =b(i,iloc(i),:)
3363  *      forall(i=1:n) a(i) = sum(tmp(i,:))
3364  * There will be no communication for tmp which becames new arg of PURE.
3365  * is_pure_temp_too_large() decides whether tmp will have more dimension than
3366  * arg. if it is, tmp will be replication of arg.
3367  */
3368 
3369 static int
gen_shape_comm(int arg,int forall,int std,int nomask)3370 gen_shape_comm(int arg, int forall, int std, int nomask)
3371 {
3372   int newforall;
3373   int newstd;
3374   int sptr;
3375   int asn;
3376   int newast;
3377   int tmpast;
3378   int lhs;
3379   int mask;
3380   int list;
3381   int olist;
3382   int ast;
3383   int shape;
3384   int nd;
3385   int header;
3386 
3387   if (!A_SHAPEG(arg))
3388     return arg;
3389   lhs = A_DESTG(A_IFSTMTG(forall));
3390   olist = A_LISTG(forall);
3391   mask = A_IFEXPRG(forall);
3392   if (nomask)
3393     mask = 0;
3394   nd = A_OPT1G(forall);
3395   assert(nd, "gen_shape_comm: something is wrong", forall, 3);
3396   header = FT_HEADER(nd);
3397   list = construct_list_for_pure(arg, mask, olist);
3398   if (is_pure_temp_too_large(list, arg)) {
3399     tmpast = handle_pure_temp_too_large(arg, header);
3400     return tmpast;
3401   }
3402   /* put new list to forall for short time to trick
3403    * get_temp_pure() and reference_for_pure_temp()
3404    */
3405   A_LISTP(forall, list);
3406   /* create a pure temp */
3407   sptr = get_temp_pure(forall, lhs, arg, header, header, arg);
3408   tmpast = reference_for_pure_temp(sptr, lhs, arg, forall);
3409   /* put original list back to forall */
3410   A_LISTP(forall, olist);
3411 
3412   asn = mk_stmt(A_ASN, DTYPEG(sptr));
3413   A_DESTP(asn, tmpast);
3414   A_SRCP(asn, arg);
3415 
3416   if (list) {
3417     newforall = mk_stmt(A_FORALL, 0);
3418     A_LISTP(newforall, list);
3419     A_IFSTMTP(newforall, asn);
3420     A_IFEXPRP(newforall, mask);
3421   } else {
3422     shape = A_SHAPEG(tmpast);
3423     newforall = make_forall(shape, tmpast, 0, 0);
3424     ast = normalize_forall(newforall, asn, 0);
3425     A_IFSTMTP(newforall, ast);
3426     A_IFEXPRP(newforall, 0);
3427   }
3428   newforall = rename_forall_list(newforall);
3429   newstd = add_stmt_before(newforall, header);
3430   process_forall(newstd);
3431 
3432   newforall = STD_AST(newstd);
3433   transform_forall(newstd, newforall);
3434   return tmpast;
3435 }
3436 
3437 /* construct a new list based on old list
3438  * which must appear arg or mask expression
3439  */
3440 static int
construct_list_for_pure(int arg,int mask,int list)3441 construct_list_for_pure(int arg, int mask, int list)
3442 {
3443   int newlist;
3444   int isptr;
3445   int j;
3446 
3447   start_astli();
3448   for (j = list; j != 0; j = ASTLI_NEXT(j)) {
3449     isptr = ASTLI_SPTR(j);
3450     if (is_name_in_expr(arg, isptr) || is_name_in_expr(mask, isptr)) {
3451       /* include this one */
3452       newlist = add_astli();
3453       ASTLI_SPTR(newlist) = ASTLI_SPTR(j);
3454       ASTLI_TRIPLE(newlist) = ASTLI_TRIPLE(j);
3455     }
3456   }
3457   return ASTLI_HEAD;
3458 }
3459 
3460 /* This will find temp_reference for pure communication.
3461  *  lhs=a(i,j,2), arg=b(2,i,:) will be tmp=tmp(i,j,:)
3462  * heading dimension from lhs, talling from arg.
3463  */
3464 static int
reference_for_pure_temp(int sptr,int lhs,int arg,int forall)3465 reference_for_pure_temp(int sptr, int lhs, int arg, int forall)
3466 {
3467   int subs[7];
3468   int list;
3469   int i, j;
3470   int asd;
3471   int ndim;
3472   int astnew;
3473   int shape;
3474   int sdim;
3475 
3476   list = A_LISTG(forall);
3477   asd = A_ASDG(lhs);
3478   ndim = ASD_NDIM(asd);
3479   j = 0;
3480   for (i = 0; i < ndim; i++) {
3481     if (search_forall_var(ASD_SUBS(asd, i), list)) {
3482       /* include this dimension */
3483       subs[j] = ASD_SUBS(asd, i);
3484       j++;
3485     }
3486   }
3487 
3488   shape = A_SHAPEG(arg);
3489   asd = A_ASDG(arg);
3490   ndim = ASD_NDIM(asd);
3491   sdim = 0;
3492   for (i = 0; i < ndim; i++) {
3493     if (A_TYPEG(ASD_SUBS(asd, i)) == A_TRIPLE || A_SHAPEG(ASD_SUBS(asd, i))) {
3494       /* include this dimension */
3495       subs[j] = ASD_SUBS(asd, i);
3496       j++;
3497       sdim++;
3498     }
3499   }
3500   assert(j == rank_of_sym(sptr), "reference_for_pure_temp: rank mismatched",
3501          sptr, 4);
3502   assert(shape, "reference_for_pure_temp: shape mismatched", sptr, 4);
3503   assert(SHD_NDIM(shape) == sdim, "reference_for_pure_temp: shape mismatched",
3504          sptr, 4);
3505 
3506   astnew = mk_subscr(mk_id(sptr), subs, j, DTYPEG(sptr));
3507   return astnew;
3508 }
3509 
3510 /* this will decide whether pure tmp will be larger than arg
3511  * if gen_shape_comm() choose to have distributed temp.
3512  * if it is, it will not choose the distributed temp.
3513  * it will choose to have replicated temp.
3514  */
3515 static LOGICAL
is_pure_temp_too_large(int list,int arg)3516 is_pure_temp_too_large(int list, int arg)
3517 {
3518   int count;
3519   int ndim;
3520   int asd;
3521   int i;
3522   int j;
3523 
3524   count = 0;
3525   for (j = list; j != 0; j = ASTLI_NEXT(j))
3526     count++;
3527   assert(A_TYPEG(arg) == A_SUBSCR, "is_pure_temp_too_large: not SUBSCR", arg,
3528          4);
3529   asd = A_ASDG(arg);
3530   ndim = ASD_NDIM(asd);
3531   for (i = 0; i < ndim; i++) {
3532     if (A_TYPEG(ASD_SUBS(asd, i)) == A_TRIPLE || A_SHAPEG(ASD_SUBS(asd, i)))
3533       count++;
3534   }
3535   if (count > ndim)
3536     return TRUE;
3537   return FALSE;
3538 }
3539 
3540 /* this routine is to find distributed array in expr.
3541  * assign those array to the same size replicated temp
3542  * For example:   a(inx(i))
3543  * indx$temp = indx
3544  * a$temp = a
3545  * return a$temp(indx$temp(i))
3546  */
3547 static int
handle_pure_temp_too_large(int expr,int std)3548 handle_pure_temp_too_large(int expr, int std)
3549 {
3550   int l, r, d, o;
3551   int l1, l2, l3;
3552   int i, nargs, argt, j;
3553   int tmp_sptr, tmp_ast;
3554   int forall, ast;
3555   int asd, ndim;
3556   int shape, std1;
3557   int sptr;
3558   int eledtype;
3559   int subs[7];
3560   int asn;
3561 
3562   if (expr == 0)
3563     return expr;
3564   switch (A_TYPEG(expr)) {
3565   /* expressions */
3566   case A_BINOP:
3567     o = A_OPTYPEG(expr);
3568     d = A_DTYPEG(expr);
3569     l = handle_pure_temp_too_large(A_LOPG(expr), std);
3570     r = handle_pure_temp_too_large(A_ROPG(expr), std);
3571     return mk_binop(o, l, r, d);
3572   case A_UNOP:
3573     o = A_OPTYPEG(expr);
3574     d = A_DTYPEG(expr);
3575     l = handle_pure_temp_too_large(A_LOPG(expr), std);
3576     return mk_unop(o, l, d);
3577   case A_CONV:
3578     d = A_DTYPEG(expr);
3579     l = handle_pure_temp_too_large(A_LOPG(expr), std);
3580     return mk_convert(l, d);
3581   case A_PAREN:
3582     d = A_DTYPEG(expr);
3583     l = handle_pure_temp_too_large(A_LOPG(expr), std);
3584     return mk_paren(l, d);
3585   case A_SUBSTR:
3586     return expr;
3587   case A_INTR:
3588   case A_FUNC:
3589     nargs = A_ARGCNTG(expr);
3590     argt = A_ARGSG(expr);
3591     for (i = 0; i < nargs; ++i) {
3592       ARGT_ARG(argt, i) = handle_pure_temp_too_large(ARGT_ARG(argt, i), std);
3593     }
3594     return expr;
3595   case A_CNST:
3596   case A_CMPLXC:
3597     return expr;
3598   case A_MEM:
3599     sptr = A_SPTRG(A_MEMG(expr));
3600     if (DTY(DTYPEG(sptr)) != TY_ARRAY || !ALIGNG(sptr)) {
3601       r = A_MEMG(expr);
3602       d = A_DTYPEG(r);
3603       l = handle_pure_temp_too_large(A_PARENTG(expr), std);
3604       return mk_member(l, r, d);
3605     }
3606     goto replicate_temp;
3607 
3608   case A_ID:
3609     sptr = A_SPTRG(expr);
3610     if (STYPEG(sptr) != ST_ARRAY || !ALIGNG(sptr))
3611       return expr;
3612     eledtype = DTY(DTYPEG(sptr) + 1);
3613 
3614   replicate_temp:
3615     /* copy to replicate temp */
3616     tmp_sptr = get_temp_pure_replicated(sptr, std, std, expr);
3617     tmp_ast = mk_id(tmp_sptr);
3618     asn = mk_assn_stmt(tmp_ast, expr, eledtype);
3619     shape = A_SHAPEG(tmp_ast);
3620     forall = make_forall(shape, tmp_ast, 0, 0);
3621     A_ARRASNP(forall, TRUE);
3622     forall = rename_forall_list(forall);
3623     ast = normalize_forall(forall, asn, 0);
3624     A_IFSTMTP(forall, ast);
3625     A_IFEXPRP(forall, 0);
3626     std1 = add_stmt_before(forall, std);
3627     process_forall(std1);
3628     transform_forall(std1, forall);
3629     return mk_id(tmp_sptr);
3630   case A_SUBSCR:
3631     asd = A_ASDG(expr);
3632     ndim = ASD_NDIM(asd);
3633     for (i = 0; i < ndim; i++) {
3634       subs[i] = handle_pure_temp_too_large(ASD_SUBS(asd, i), std);
3635     }
3636     l1 = handle_pure_temp_too_large(A_LOPG(expr), std);
3637     expr = mk_subscr(l1, subs, ndim, A_DTYPEG(expr));
3638     return expr;
3639   case A_TRIPLE:
3640     l1 = handle_pure_temp_too_large(A_LBDG(expr), std);
3641     l2 = handle_pure_temp_too_large(A_UPBDG(expr), std);
3642     l3 = handle_pure_temp_too_large(A_STRIDEG(expr), std);
3643     return mk_triple(l1, l2, l3);
3644   default:
3645     interr("handle_pure_temp_too_large: unknown expression", expr, 2);
3646     return expr;
3647   }
3648 }
3649 
3650 static void
insert_call_comm(int std,int forall)3651 insert_call_comm(int std, int forall)
3652 {
3653   int nd;
3654   int i;
3655   int cstd;
3656 
3657   nd = A_OPT1G(forall);
3658   comminfo.mask_phase = 1;
3659   for (i = 0; i < FT_NMCALL(nd); i++) {
3660     cstd = glist(FT_MCALL(nd), i);
3661     put_call_comm(cstd, std, forall);
3662   }
3663   comminfo.mask_phase = 0;
3664   for (i = 0; i < FT_NSCALL(nd); i++) {
3665     cstd = glist(FT_SCALL(nd), i);
3666     put_call_comm(cstd, std, forall);
3667   }
3668 }
3669 
3670 static void
put_call_comm(int cstd,int fstd,int forall)3671 put_call_comm(int cstd, int fstd, int forall)
3672 {
3673   int ast, ast1;
3674   int cstd1;
3675   int nd, nd1;
3676   int i;
3677   int test;
3678   int nargs, argt;
3679 
3680   comminfo.usedstd = cstd;
3681   ast = STD_AST(cstd);
3682   nd = A_OPT1G(ast);
3683   assert(nd, "call_comm: something is wrong", ast, 3);
3684   for (i = 0; i < FT_CALL_NCALL(nd); i++) {
3685     cstd1 = glist(FT_CALL_CALL(nd), i);
3686     ast1 = STD_AST(cstd1);
3687     nd1 = A_OPT1G(ast1);
3688     assert(nd1, "put_calls: something is wrong", ast1, 3);
3689     put_call_comm(cstd1, fstd, forall);
3690   }
3691   nargs = A_ARGCNTG(ast);
3692   argt = A_ARGSG(ast);
3693   for (i = 0; i < nargs; ++i) {
3694     ARGT_ARG(argt, i) = insert_forall_comm(ARGT_ARG(argt, i));
3695   }
3696 }
3697 
3698 static int
tag_call_comm(int std,int forall)3699 tag_call_comm(int std, int forall)
3700 {
3701   int nd;
3702   int i;
3703   int cstd;
3704 
3705   nd = A_OPT1G(forall);
3706   comminfo.mask_phase = 1;
3707   for (i = 0; i < FT_NMCALL(nd); i++) {
3708     cstd = glist(FT_MCALL(nd), i);
3709     call_comm(cstd, std, forall);
3710   }
3711   comminfo.mask_phase = 0;
3712   for (i = 0; i < FT_NSCALL(nd); i++) {
3713     cstd = glist(FT_SCALL(nd), i);
3714     call_comm(cstd, std, forall);
3715   }
3716   return 1;
3717 }
3718 
3719 static void
call_comm(int cstd,int fstd,int forall)3720 call_comm(int cstd, int fstd, int forall)
3721 {
3722   int ast, ast1;
3723   int cstd1;
3724   int nd, nd1;
3725   int i;
3726   int test;
3727 
3728   ast = STD_AST(cstd);
3729   nd = A_OPT1G(ast);
3730   assert(nd, "call_comm: something is wrong", ast, 3);
3731   for (i = 0; i < FT_CALL_NCALL(nd); i++) {
3732     cstd1 = glist(FT_CALL_CALL(nd), i);
3733     ast1 = STD_AST(cstd1);
3734     nd1 = A_OPT1G(ast1);
3735     assert(nd1, "put_calls: something is wrong", ast1, 3);
3736     call_comm(cstd1, fstd, forall);
3737   }
3738   test = tag_forall_comm(ast);
3739 }
3740 
3741 static int
sequentialize_mask_call(int forall,int stdnext)3742 sequentialize_mask_call(int forall, int stdnext)
3743 {
3744   int nd;
3745   int i;
3746   int cstd;
3747 
3748   nd = A_OPT1G(forall);
3749   for (i = 0; i < FT_NMCALL(nd); i++) {
3750     cstd = glist(FT_MCALL(nd), i);
3751     stdnext = sequentialize_call(cstd, stdnext, forall);
3752   }
3753   return stdnext;
3754 }
3755 
3756 static int
sequentialize_stmt_call(int forall,int stdnext)3757 sequentialize_stmt_call(int forall, int stdnext)
3758 {
3759   int nd;
3760   int i;
3761   int cstd;
3762 
3763   nd = A_OPT1G(forall);
3764   for (i = 0; i < FT_NSCALL(nd); i++) {
3765     cstd = glist(FT_SCALL(nd), i);
3766     stdnext = sequentialize_call(cstd, stdnext, forall);
3767   }
3768   return stdnext;
3769 }
3770 
3771 static int
sequentialize_call(int cstd,int stdnext,int forall)3772 sequentialize_call(int cstd, int stdnext, int forall)
3773 {
3774   int ast, ast1;
3775   int cstd1;
3776   int nd, nd1;
3777   int i, lineno;
3778   int stdnext1;
3779 
3780   ast = STD_AST(cstd);
3781   nd = A_OPT1G(ast);
3782   assert(nd, "call_comm: something is wrong", ast, 3);
3783   for (i = 0; i < FT_CALL_NCALL(nd); i++) {
3784     cstd1 = glist(FT_CALL_CALL(nd), i);
3785     ast1 = STD_AST(cstd1);
3786     nd1 = A_OPT1G(ast1);
3787     assert(nd1, "put_calls: something is wrong", ast1, 3);
3788     stdnext = sequentialize_call(cstd1, stdnext, forall);
3789   }
3790   lineno = STD_LINENO(cstd);
3791   delete_stmt(cstd);
3792   stdnext = add_stmt_before(ast, stdnext);
3793   stdnext1 = STD_NEXT(stdnext);
3794   STD_LINENO(stdnext) = lineno;
3795   transform_ast(stdnext, ast);
3796   stdnext = stdnext1;
3797   return stdnext;
3798 }
3799 
3800 /* this routine will normalize forall triplet list,
3801  * It makes triple integer and
3802  * It eliminates distributed array from triplet.
3803  */
3804 static int
normalize_forall_triplet(int std,int forall)3805 normalize_forall_triplet(int std, int forall)
3806 {
3807   int lb, ub, st;
3808   int triplet_list;
3809   int triplet;
3810   int list;
3811   int rhs_is_dist;
3812   int tmp_sptr;
3813   int newlist;
3814   int ast, dest;
3815   int triplet_list1, triplet1;
3816   int isptr;
3817 
3818   /* don't allow forall(i=1:n,j=istart(i):istop(i) */
3819   triplet_list = A_LISTG(forall);
3820   if (is_multiple_idx_in_list(triplet_list))
3821     return 0;
3822 
3823   /* It eliminates distributed array from triplet */
3824   triplet_list = A_LISTG(forall);
3825   start_astli();
3826   for (; triplet_list; triplet_list = ASTLI_NEXT(triplet_list)) {
3827     triplet = ASTLI_TRIPLE(triplet_list);
3828     /* case forall(i=idx(1):n) */
3829     rhs_is_dist = FALSE;
3830     triplet = insert_comm_before(std, triplet, &rhs_is_dist, FALSE);
3831     newlist = add_astli();
3832     ASTLI_SPTR(newlist) = ASTLI_SPTR(triplet_list);
3833     ASTLI_TRIPLE(newlist) = triplet;
3834   }
3835   list = ASTLI_HEAD;
3836   A_LISTP(forall, list);
3837 
3838   /* make forall triple DT_INT if not */
3839   triplet_list = A_LISTG(forall);
3840   start_astli();
3841   for (; triplet_list; triplet_list = ASTLI_NEXT(triplet_list)) {
3842     triplet = ASTLI_TRIPLE(triplet_list);
3843     lb = A_LBDG(triplet);
3844     assert(lb, "normalize_forall_triplet: no lower bound at forall triplet",
3845            forall, 3);
3846     if (A_TYPEG(lb) == A_CONV)
3847       lb = A_LOPG(lb);
3848     if (!DT_ISINT(A_DTYPEG(lb))) {
3849       tmp_sptr = sym_get_scalar("lb", 0, astb.bnd.dtype);
3850       ast = mk_stmt(A_ASN, astb.bnd.dtype);
3851       dest = mk_id(tmp_sptr);
3852       A_DESTP(ast, dest);
3853       A_SRCP(ast, lb);
3854       add_stmt_before(ast, std);
3855       lb = mk_id(tmp_sptr);
3856     }
3857     ub = A_UPBDG(triplet);
3858     assert(ub, "normalize_forall_triplet: no lower bound at forall triplet",
3859            forall, 3);
3860     if (A_TYPEG(ub) == A_CONV)
3861       ub = A_LOPG(ub);
3862     if (!DT_ISINT(A_DTYPEG(ub))) {
3863       tmp_sptr = sym_get_scalar("ub", 0, astb.bnd.dtype);
3864       ast = mk_stmt(A_ASN, astb.bnd.dtype);
3865       dest = mk_id(tmp_sptr);
3866       A_DESTP(ast, dest);
3867       A_SRCP(ast, ub);
3868       add_stmt_before(ast, std);
3869       ub = mk_id(tmp_sptr);
3870     }
3871     st = A_STRIDEG(triplet);
3872     if (A_TYPEG(st) == A_CONV)
3873       st = A_LOPG(st);
3874     if (st)
3875       if (!DT_ISINT(A_DTYPEG(st))) {
3876         tmp_sptr = sym_get_scalar("st", 0, astb.bnd.dtype);
3877         ast = mk_stmt(A_ASN, astb.bnd.dtype);
3878         dest = mk_id(tmp_sptr);
3879         A_DESTP(ast, dest);
3880         A_SRCP(ast, st);
3881         add_stmt_before(ast, std);
3882         st = mk_id(tmp_sptr);
3883       }
3884     triplet = mk_triple(lb, ub, st);
3885     newlist = add_astli();
3886     ASTLI_SPTR(newlist) = ASTLI_SPTR(triplet_list);
3887     ASTLI_TRIPLE(newlist) = triplet;
3888   }
3889   list = ASTLI_HEAD;
3890   A_LISTP(forall, list);
3891   return 1;
3892 }
3893 
3894 /* This is a quick fix to move guard_forall after optimization.
3895  * guard_forall was inserting IF-THEN which was reducing
3896  * the optimization chance. guard_forall can be written
3897  * such that it will not need this fix. */
3898 static void
fix_guard_forall(int std)3899 fix_guard_forall(int std)
3900 {
3901   CTYPE *ct;
3902   int ast;
3903   int asn;
3904   int subinfo;
3905   int lhs, lhsd;
3906   int ndim, asd;
3907   int i;
3908   int nd;
3909 
3910   ast = STD_AST(std);
3911   asn = A_IFSTMTG(ast);
3912   nd = A_OPT1G(ast);
3913   ct = FT_CYCLIC(nd);
3914   lhs = A_DESTG(asn);
3915   lhsd = left_subscript_ast(lhs);
3916   asd = A_ASDG(lhsd);
3917   ndim = ASD_NDIM(asd);
3918   subinfo = comminfo.subinfo;
3919   for (i = 0; i < ndim; ++i) {
3920     ct->c_dstt[i] = SUBI_DSTT(subinfo + i);
3921     ct->c_dupl[i] = SUBI_DUPL(subinfo + i);
3922     ct->c_idx[i] = SUBI_IDX(subinfo + i);
3923   }
3924   A_OPT1P(ast, nd);
3925 }
3926 
3927 /* This routine  is to check whether forall has dependency.
3928  * If it has, it creates temp which is shape array with lhs.
3929  * For example,
3930  *              forall(i=1:N) a(i) = a(i-1)+.....
3931  * will be rewritten
3932  *              forall(i=1:N) temp(i) = a(i-1)+.....
3933  *              forall(i=1:N) a(i) = temp(i)
3934  */
3935 static void
forall_dependency_scalarize(int std,int * std1,int * std2)3936 forall_dependency_scalarize(int std, int *std1, int *std2)
3937 {
3938   int lhs, rhs;
3939   int ast, ast1, ast2;
3940   int asn;
3941   int asd;
3942   int subs[7];
3943   int i;
3944   int ndim;
3945   int sptr;
3946   int temp_ast;
3947   int newforall, newasn;
3948   int expr;
3949   int lineno;
3950   LOGICAL bIndep;
3951 
3952   ast = STD_AST(std);
3953   asn = A_IFSTMTG(ast);
3954   if (A_TYPEG(asn) != A_ASN)
3955     return;
3956   lhs = A_DESTG(asn);
3957   rhs = A_SRCG(asn);
3958   expr = A_IFEXPRG(ast);
3959 
3960   /* forall-independent */
3961   lineno = STD_LINENO(std);
3962   open_pragma(lineno);
3963   bIndep = XBIT(19, 0x100) != 0;
3964   close_pragma();
3965   if (bIndep)
3966     return;
3967 
3968   /* take conditional expr, if there is dependency */
3969   if (is_dependent(lhs, expr, ast, std, std) && A_TYPEG(lhs) != A_SUBSTR) {
3970     sptr = get_temp_forall(ast, lhs, std, std, DT_LOG, 0);
3971     temp_ast = reference_for_temp(sptr, lhs, ast);
3972     A_IFEXPRP(ast, temp_ast);
3973     newforall = mk_stmt(A_FORALL, 0);
3974     A_LISTP(newforall, A_LISTG(ast));
3975     A_IFEXPRP(newforall, 0);
3976     newasn = mk_stmt(A_ASN, 0);
3977     A_DESTP(newasn, temp_ast);
3978     A_SRCP(newasn, expr);
3979     A_IFSTMTP(newforall, newasn);
3980     *std1 = add_stmt_before(newforall, std);
3981   }
3982 
3983   if (is_dependent(lhs, rhs, ast, std, std) && A_TYPEG(lhs) != A_SUBSTR) {
3984     sptr = get_temp_forall(ast, lhs, std, std, 0, lhs);
3985     temp_ast = reference_for_temp(sptr, lhs, ast);
3986     A_DESTP(asn, temp_ast);
3987     newforall = mk_stmt(A_FORALL, 0);
3988     A_LISTP(newforall, A_LISTG(ast));
3989     A_IFEXPRP(newforall, A_IFEXPRG(ast));
3990     newasn = mk_stmt(A_ASN, 0);
3991     A_DESTP(newasn, lhs);
3992     A_SRCP(newasn, temp_ast);
3993     A_IFSTMTP(newforall, newasn);
3994     *std2 = add_stmt_after(newforall, std);
3995   }
3996 }
3997 
3998 static int
fix_mem_ast(int astmem,int ast)3999 fix_mem_ast(int astmem, int ast)
4000 {
4001 
4002   int rslt;
4003 
4004   switch (A_TYPEG(ast)) {
4005 
4006   case A_BINOP:
4007     rslt = fix_mem_ast(astmem, A_LOPG(ast));
4008     if (rslt && rslt != A_LOPG(ast))
4009       A_LOPP(ast, rslt);
4010     rslt = fix_mem_ast(astmem, A_ROPG(ast));
4011     if (rslt && rslt != A_ROPG(ast))
4012       A_ROPP(ast, rslt);
4013     break;
4014   case A_UNOP:
4015     rslt = fix_mem_ast(astmem, A_LOPG(ast));
4016     if (rslt && rslt != A_LOPG(ast))
4017       A_LOPP(ast, rslt);
4018     break;
4019   case A_LABEL:
4020   case A_ENTRY:
4021   case A_ID:
4022     return check_member(astmem, ast);
4023   case A_SUBSCR:
4024   case A_SUBSTR:
4025     rslt = fix_mem_ast(astmem, A_LOPG(ast));
4026     if (rslt && rslt != A_LOPG(ast))
4027       A_LOPP(ast, rslt);
4028     break;
4029   case A_MEM:
4030     rslt = fix_mem_ast(astmem, A_PARENTG(ast));
4031     if (rslt && rslt != A_PARENTG(ast))
4032       A_PARENTP(ast, rslt);
4033     break;
4034   }
4035   return 0;
4036 }
4037 
4038 /* This routine will perform the following canonical conversion
4039  *
4040  * forall(i=l:u:s)  a(m*i+k) = ...i...
4041  *
4042  * will be converted into
4043  *
4044  * forall(i=m*l+k:m*u+k:m*s)  a(i) = ...(i-k)/m...
4045  */
4046 /* ### rewrite this routine to handle members */
4047 static int
canonical_conversion(int ast)4048 canonical_conversion(int ast)
4049 {
4050   int list;
4051   int asn;
4052   int astli;
4053   int base, stride;
4054   int expr;
4055   int newexpr;
4056   int l, u, s;
4057   int ll, uu, ss;
4058   int triple;
4059   int asd;
4060   int ndim;
4061   int isptr;
4062   int i, k;
4063   int zero = astb.bnd.zero;
4064   int ifexpr;
4065   int subs[7];
4066   int newdest;
4067   int nd, nd1;
4068   int ip, pstd, past;
4069   LITEMF *plist;
4070   int glb, gub, st;
4071   ADSC *ad;
4072   int lhs, lhsd, sptr, dim;
4073   int align;
4074 
4075 
4076   /* Don't replace the subscript if we intend it that way */
4077   if (!XBIT(58,0x1000000) && A_CONSTBNDG(ast))
4078     return 0;
4079 
4080   list = A_LISTG(ast);
4081   ifexpr = A_IFEXPRG(ast);
4082   asn = A_IFSTMTG(ast);
4083   expr = A_SRCG(asn);
4084   lhs = A_DESTG(asn);
4085 
4086   for (lhsd = lhs; A_TYPEG(lhsd) != A_ID;) {
4087     switch (A_TYPEG(lhsd)) {
4088     case A_SUBSCR:
4089       asd = A_ASDG(lhsd);
4090       ndim = ASD_NDIM(asd);
4091 
4092       /* don't let A(V(I)), where V is distributed, that is solved earlier */
4093       for (i = 0; i < ndim; ++i) {
4094         ss = ASD_SUBS(asd, i);
4095         if (is_dist_array_in_expr(ss)) {
4096           return 0;
4097         }
4098       }
4099       lhsd = A_LOPG(lhsd);
4100       break;
4101     case A_MEM:
4102       lhsd = A_PARENTG(lhsd);
4103       break;
4104     default:
4105       interr("canonical_conversion unexpected AST type on LHS", A_TYPEG(lhsd),
4106              3);
4107       break;
4108     }
4109   }
4110   lhsd = left_subscript_ast(lhs);
4111   asd = A_ASDG(lhsd);
4112   ndim = ASD_NDIM(asd);
4113   sptr = left_array_symbol(lhs);
4114   align = ALIGNG(sptr);
4115 
4116   /* don't let A(I+J), don't let A(I,I+1), let A(I,I) */
4117   for (i = 0; i < ndim; i++) {
4118     astli = 0;
4119     search_idx(ASD_SUBS(asd, i), list, &astli, &base, &stride);
4120     if (base == 0)
4121       return 0; /* i+j */
4122     if (astli == 0 && stride == zero)
4123       continue; /* only base */
4124     if (base == zero && stride == astb.bnd.one)
4125       continue; /* a(i) */
4126     isptr = ASTLI_SPTR(astli);
4127     for (k = 0; k < ndim; ++k) {
4128       if (k != i) {
4129         if (is_name_in_expr(ASD_SUBS(asd, k), isptr)) {
4130           return 0; /* A(i+1,i)  */
4131         }
4132       }
4133     }
4134   }
4135 
4136 
4137   for (i = 0; i < ndim; i++) {
4138     subs[i] = ASD_SUBS(asd, i);
4139     astli = 0;
4140     search_idx(ASD_SUBS(asd, i), list, &astli, &base, &stride);
4141     if (base == 0)
4142       return 0; /* i+j */
4143     if (astli == 0 && stride == zero)
4144       continue; /* only base */
4145     if (base == zero && stride == astb.bnd.one)
4146       continue; /* a(i) */
4147     ast_visit(1, 1);
4148     isptr = ASTLI_SPTR(astli);
4149     /* change the lhs subscript*/
4150     subs[i] = mk_id(isptr);
4151 
4152     /* calculate (i-k)/m   */
4153     newexpr = opt_binop(OP_SUB, mk_id(isptr), base, astb.bnd.dtype);
4154     newexpr = opt_binop(OP_DIV, newexpr, stride, astb.bnd.dtype);
4155 
4156     ast_replace(mk_id(isptr), newexpr);
4157 
4158     /* change the rhs expression*/
4159     expr = ast_rewrite(expr);
4160 
4161     /* change the ifexpr expression*/
4162     ifexpr = ast_rewrite(ifexpr);
4163 
4164     /* change also pcalls */
4165     nd = A_OPT1G(ast);
4166     plist = FT_PCALL(nd);
4167     for (ip = 0; ip < FT_NPCALL(nd); ip++) {
4168       pstd = plist->item;
4169       plist = plist->next;
4170       past = STD_AST(pstd);
4171       nd1 = A_OPT1G(past);
4172       past = ast_rewrite(past);
4173       A_OPT1P(past, nd1);
4174       STD_AST(pstd) = past;
4175       A_STDP(past, pstd);
4176     }
4177 
4178     ast_unvisit();
4179 
4180     /* change the forall list */
4181     triple = ASTLI_TRIPLE(astli);
4182     l = A_LBDG(triple);
4183     fix_mem_ast(l, base);
4184     u = A_UPBDG(triple);
4185     s = A_STRIDEG(triple);
4186 
4187     ll = opt_binop(OP_MUL, stride, l, astb.bnd.dtype);
4188     ll = opt_binop(OP_ADD, ll, base, astb.bnd.dtype);
4189     uu = opt_binop(OP_MUL, stride, u, astb.bnd.dtype);
4190     uu = opt_binop(OP_ADD, uu, base, astb.bnd.dtype);
4191     if (s == 0)
4192       ss = stride;
4193     else
4194       ss = opt_binop(OP_MUL, stride, s, astb.bnd.dtype);
4195     ASTLI_TRIPLE(astli) = mk_triple(ll, uu, ss);
4196   }
4197   newdest = mk_subscr(A_LOPG(lhsd), subs, ndim, A_DTYPEG(lhsd));
4198   newdest = replace_ast_subtree(lhs, lhsd, newdest);
4199   A_DESTP(asn, newdest);
4200   A_SRCP(asn, expr);
4201   A_IFEXPRP(ast, ifexpr);
4202   A_IFSTMTP(ast, asn);
4203   return 1;
4204 }
4205 
4206 /* this will find scalar communication at ast,
4207  * It expect that std is forall std.
4208  * It does not disturb other forall communication:
4209  * For example, forall(i=1:n) a(b(1),c(i)) = 1
4210  * Here, only perform communication for b(1).
4211  */
4212 static int
scalar_communication(int ast,int std)4213 scalar_communication(int ast, int std)
4214 {
4215   int l, r, d, o;
4216   int l1, l2, l3;
4217   int a, a1;
4218   int i, nargs, argt, j;
4219   int arref;
4220   int header;
4221   int forall;
4222   int rhs_is_dist;
4223   int sptr;
4224   int asd, ndim;
4225   int subs[7];
4226   int nd, nd1, nd2;
4227   int src;
4228   int cnt;
4229 
4230   a = ast;
4231   if (!a)
4232     return a;
4233   forall = STD_AST(std);
4234   switch (A_TYPEG(ast)) {
4235   /* expressions */
4236   case A_BINOP:
4237     o = A_OPTYPEG(a);
4238     d = A_DTYPEG(a);
4239     l = scalar_communication(A_LOPG(a), std);
4240     r = scalar_communication(A_ROPG(a), std);
4241     return mk_binop(o, l, r, d);
4242   case A_UNOP:
4243     o = A_OPTYPEG(a);
4244     d = A_DTYPEG(a);
4245     l = scalar_communication(A_LOPG(a), std);
4246     return mk_unop(o, l, d);
4247   case A_CONV:
4248     d = A_DTYPEG(a);
4249     l = scalar_communication(A_LOPG(a), std);
4250     return mk_convert(l, d);
4251   case A_PAREN:
4252     d = A_DTYPEG(a);
4253     l = scalar_communication(A_LOPG(a), std);
4254     return mk_paren(l, d);
4255   case A_MEM:
4256     r = A_MEMG(a);
4257     d = A_DTYPEG(r);
4258     l = scalar_communication(A_PARENTG(a), std);
4259     return mk_member(l, r, d);
4260   case A_SUBSTR:
4261     return a;
4262   case A_INTR:
4263   case A_FUNC:
4264     nargs = A_ARGCNTG(a);
4265     argt = A_ARGSG(a);
4266     for (i = 0; i < nargs; ++i) {
4267       ARGT_ARG(argt, i) = scalar_communication(ARGT_ARG(argt, i), std);
4268     }
4269     return a;
4270   case A_CNST:
4271   case A_CMPLXC:
4272     return a;
4273   case A_ID:
4274     return a;
4275   case A_SUBSCR:
4276     if (!A_SHAPEG(a) && is_array_element_in_forall(a, std)) {
4277       nd = A_OPT1G(forall);
4278       header = FT_HEADER(nd);
4279       rhs_is_dist = FALSE;
4280       a = insert_comm_before(header, a, &rhs_is_dist, FALSE);
4281       return a;
4282     }
4283 
4284     asd = A_ASDG(a);
4285     ndim = ASD_NDIM(asd);
4286     for (i = 0; i < ndim; i++) {
4287       subs[i] = scalar_communication(ASD_SUBS(asd, i), std);
4288     }
4289     return mk_subscr(A_LOPG(a), subs, ndim, A_DTYPEG(a));
4290   case A_TRIPLE:
4291     l1 = scalar_communication(A_LBDG(a), std);
4292     l2 = scalar_communication(A_UPBDG(a), std);
4293     l3 = scalar_communication(A_STRIDEG(a), std);
4294     return mk_triple(l1, l2, l3);
4295   default:
4296     interr("scalar_communication: unknown expression", std, 2);
4297     return 0;
4298   }
4299 }
4300