1 /*
2 * Copyright (c) 1994-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *
16 */
17
18 /**
19 \file
20 \brief Fortran communications module
21 */
22
23 #include "comm.h"
24 #include "gbldefs.h"
25 #include "global.h"
26 #include "error.h"
27 #include "symtab.h"
28 #include "symutl.h"
29 #include "dtypeutl.h"
30 #include "soc.h"
31 #include "semant.h"
32 #include "ast.h"
33 #include "gramtk.h"
34 #include "extern.h"
35 #include "hpfutl.h"
36 #include "commopt.h"
37 #include "ccffinfo.h"
38 #include "dinit.h"
39 #include "direct.h"
40 #include "rte.h"
41 #include "rtlRtns.h"
42
43 struct cs_table {
44 LOGICAL is_used_lhs;
45 };
46
47 static struct cs_table cs_table;
48
49 static void comm_init(void);
50 static void transform_ptr(int std, int ast);
51 static int normalize_forall_triplet(int std, int forall);
52 static void emit_overlap(int a);
53 static int emit_permute_section(int a, int std);
54 static int eliminate_extra_idx(int lhs, int a, int forall);
55 static int emit_copy_section(int a, int std);
56 static int canonical_conversion(int ast);
57 static void forall_dependency_scalarize(int std, int *std1, int *std2);
58 static LOGICAL is_use_lhs(int a, LOGICAL, LOGICAL, int);
59 static int emit_gatherx(int a, int std, LOGICAL opt);
60 static void fix_guard_forall(int std);
61 static void emit_sum_scatterx(int);
62 static void emit_scatterx(int);
63 static void emit_scatterx_gatherx(int std, int result, int array, int mask,
64 int allocstd, int tempast0, int lhssec,
65 int comm_type);
66 static void compute_permute(int lhs, int rhs, int list, int order[7]);
67 static int put_data(int permute[7], int no);
68 static LOGICAL is_permuted(int array, int per[7], int per1[7], int *nper1);
69 static int scalar_communication(int ast, int std);
70 static int tag_call_comm(int std, int forall);
71 static void call_comm(int cstd, int fstd, int forall);
72 static void insert_call_comm(int std, int forall);
73 static void put_call_comm(int cstd, int fstd, int forall);
74 static void shape_communication(int std, int forall);
75 static void shape_comm(int cstd, int fstd, int forall);
76 static int sequentialize_mask_call(int forall, int stdnext);
77 static int sequentialize_stmt_call(int forall, int stdnext);
78 static int sequentialize_call(int cstd, int stdnext, int forall);
79 static int gen_shape_comm(int arg, int forall, int std, int nomask);
80 static int reference_for_pure_temp(int sptr, int lhs, int arg, int forall);
81 static void init_pertbl(void);
82 static void free_pertbl(void);
83 static int get_pertbl(void);
84 static int copy_section_temp_before(int sptr, int rhs, int forall);
85 static CTYPE *getcyclic(void);
86 static void init_opt_tables(void);
87 static LOGICAL is_scatter(int std);
88 static void opt_overlap(void);
89 static int insert_forall_comm(int ast);
90 static int construct_list_for_pure(int arg, int mask, int list);
91 static LOGICAL is_pure_temp_too_large(int list, int arg);
92 static int handle_pure_temp_too_large(int expr, int std);
93 static int forall_2_sec(int a, int forall);
94 static int make_sec_ast(int arr, int std, int allocstd, int sectflag);
95 static int temp_copy_section(int std, int forall, int lhs, int rhs, int dty,
96 int *allocast);
97 static int temp_gatherx(int std, int forall, int lhs, int rhs, int dty,
98 int *allocast);
99 static int gatherx_temp_before(int sptr, int rhs, int forall);
100 static int simple_reference_for_temp(int sptr, int a, int forall);
101
102 /**
103 \brief Finalize the phase and free allocated memory.
104 */
105 void
comm_fini(void)106 comm_fini(void)
107 {
108 TRANS_FREE(trans.subb);
109 trans.subb.stg_base = NULL;
110 TRANS_FREE(trans.arrb);
111 trans.subb.stg_base = NULL;
112 TRANS_FREE(trans.tdescb);
113 trans.tdescb.stg_base = NULL;
114 FREE(finfot.base);
115 finfot.base = NULL;
116 free_pertbl();
117 }
118
119 /**
120 \brief Communication analyzer entry point.
121 */
122 void
comm_analyze(void)123 comm_analyze(void)
124 {
125 int std, stdnext;
126 int ast;
127 int lhs, sptr;
128 int endmasterstd, endcriticalstd;
129 int parallel_depth;
130 int task_depth;
131 int type;
132
133 comm_init();
134 init_region();
135 parallel_depth = 0;
136 task_depth = 0;
137 for (std = STD_NEXT(0); std; std = stdnext) {
138 stdnext = STD_NEXT(std);
139 gbl.lineno = STD_LINENO(std);
140 if (STD_PURE(std))
141 continue;
142 if (STD_LOCAL(std) || pure_gbl.end_master_region != 0)
143 pure_gbl.local_mode = 1; /* don't process for DO-INDEPENDENT */
144 else
145 pure_gbl.local_mode = 0;
146 ast = STD_AST(std);
147 switch (type = A_TYPEG(ast)) {
148 case A_MP_PARALLEL:
149 ++parallel_depth;
150 /*symutl.sc = SC_PRIVATE;*/
151 set_descriptor_sc(SC_PRIVATE);
152 break;
153 case A_MP_ENDPARALLEL:
154 --parallel_depth;
155 if (parallel_depth == 0 && task_depth == 0) {
156 /*symutl.sc = SC_LOCAL;*/
157 set_descriptor_sc(SC_LOCAL);
158 }
159 break;
160 case A_MP_TASKLOOPREG:
161 case A_MP_ETASKLOOPREG:
162 break;
163 case A_MP_TASK:
164 case A_MP_TASKLOOP:
165 ++task_depth;
166 set_descriptor_sc(SC_PRIVATE);
167 break;
168 case A_MP_ENDTASK:
169 case A_MP_ETASKLOOP:
170 --task_depth;
171 if (parallel_depth == 0 && task_depth == 0) {
172 set_descriptor_sc(SC_LOCAL);
173 }
174 break;
175 default:
176 break;
177 }
178 if (type == A_FORALL) {
179 if (STD_LOCAL(std))
180 continue; /* don't process for DO-INDEPENDENT */
181 transform_forall(std, ast);
182 } else if (type == A_ICALL && A_OPTYPEG(ast) == I_PTR2_ASSIGN)
183 transform_ptr(std, ast);
184 else
185 transform_ast(std, ast);
186 check_region(std);
187 }
188 }
189
190 /**
191 \brief Keep track of STD of endcritical or endmaster statement
192 */
193 void
init_region(void)194 init_region(void)
195 {
196 pure_gbl.end_master_region = 0;
197 pure_gbl.end_critical_region = 0;
198 } /* init_region */
199
200 /**
201 \brief Check a region is valid.
202 */
203 void
check_region(int std)204 check_region(int std)
205 {
206 int ast = STD_AST(std);
207 if (A_TYPEG(ast) == A_MASTER && pure_gbl.end_master_region == 0) {
208 /* get endmaster ast */
209 int endmasterast = A_LOPG(ast);
210 pure_gbl.end_master_region = A_STDG(endmasterast);
211 if (pure_gbl.end_critical_region == 0) {
212 pure_gbl.end_critical_region = pure_gbl.end_master_region;
213 }
214 } else if (A_TYPEG(ast) == A_CRITICAL && pure_gbl.end_critical_region == 0) {
215 /* get endcritical ast */
216 int endcriticalast = A_LOPG(ast);
217 pure_gbl.end_critical_region = A_STDG(endcriticalast);
218 }
219 if (pure_gbl.end_critical_region == std) {
220 pure_gbl.end_critical_region = 0;
221 }
222 if (pure_gbl.end_master_region == std) {
223 pure_gbl.end_master_region = 0;
224 }
225 } /* check_region */
226
227 /**
228 \brief Create mask statements for conditional expression ast and insert them
229 after stdstart. Return the STD of the last statement added.
230 */
231 int
insert_mask(int ast,int stdstart)232 insert_mask(int ast, int stdstart)
233 {
234 int std;
235 int aststmt;
236
237 if (A_TYPEG(ast) == A_BINOP && A_OPTYPEG(ast) == OP_SCAND) {
238 std = insert_mask(A_LOPG(ast), stdstart);
239 std = insert_mask(A_ROPG(ast), std);
240 return std;
241 }
242 aststmt = mk_stmt(A_IFTHEN, 0);
243 A_IFEXPRP(aststmt, ast);
244 std = add_stmt_after(aststmt, stdstart);
245 return std;
246 }
247
248 /**
249 \brief Create ENDIF statements corresponding to conditional statements
250 emitted
251 for mask expression ast. Insert the ENDIFs after stdstart.
252 Return the STD of the last statement added.
253 */
254 int
insert_endmask(int ast,int stdstart)255 insert_endmask(int ast, int stdstart)
256 {
257 int std;
258 int aststmt;
259
260 if (A_TYPEG(ast) == A_BINOP && A_OPTYPEG(ast) == OP_SCAND) {
261 std = insert_endmask(A_LOPG(ast), stdstart);
262 std = insert_endmask(A_ROPG(ast), std);
263 return std;
264 }
265 aststmt = mk_stmt(A_ENDIF, 0);
266 std = add_stmt_after(aststmt, stdstart);
267 return std;
268 }
269
270 /**
271 \brief Dump compiler internal information for the communication analyzer.
272 */
273 void
report_comm(int std,int cause)274 report_comm(int std, int cause)
275 {
276 int ln;
277 int sptr;
278 static char msg8[] = "no parallelism: ";
279
280 if (!XBIT(0, 2))
281 return;
282
283 if (STD_MINFO(std))
284 return;
285
286 STD_MINFO(std) = 1;
287
288 ln = STD_LINENO(std);
289 switch (cause) {
290 case CANONICAL_CAUSE:
291 ccff_info(MSGFTN, "FTN001", 1, ln, "Forall scalarized", NULL);
292 break;
293 case INTRINSIC_CAUSE:
294 ccff_info(MSGFTN, "FTN002", 1, ln,
295 "Forall scalarized: transformational intrinsic call", NULL);
296 break;
297 case UGLYCOMM_CAUSE:
298 ccff_info(MSGFTN, "FTN003", 1, ln,
299 "Forall scalarized: complex communication", NULL);
300 break;
301 case DEPENDENCY_CAUSE:
302 ccff_info(MSGFTN, "FTN004", 1, ln, "Forall split in two: data dependence",
303 NULL);
304 break;
305 case GETSCALAR_CAUSE:
306 ccff_info(MSGFTN, "FTN005", 1, ln, "Expensive scalar communication", NULL);
307 break;
308 case COPYSCALAR_CAUSE:
309 ccff_info(MSGFTN, "FTN006", 1, ln, "Expensive scalar copy communication",
310 NULL);
311 break;
312 case COPYSECTION_CAUSE:
313 ccff_info(MSGFTN, "FTN007", 1, ln,
314 "Expensive all-to-all section copy communication", NULL);
315 break;
316 case PURECOMM_CAUSE:
317 ccff_info(MSGFTN, "FTN008", 1, ln,
318 "Communication generated: Forall pure arguments", NULL);
319 break;
320 case UGLYPURE_CAUSE:
321 ccff_info(MSGFTN, "FTN009", 1, ln,
322 "Forall scalarized: complex pure argument", NULL);
323 break;
324 case UGLYMASK_CAUSE:
325 ccff_info(MSGFTN, "FTN010", 1, ln,
326 "Forall scalarized: complex mask expression", NULL);
327 break;
328 case MANYRUNTIME_CAUSE:
329 assert(A_TYPEG(STD_AST(std)) == A_FORALL, "report_comm: forall is expected",
330 std, 2);
331 ccff_info(MSGFTN, "FTN011", 1, ln, "Too many runtime calls", NULL);
332 break;
333 }
334 }
335
336 /**
337 \brief Construct an AST to add the lower bound of dimension dim
338 for array datatype dtyp to ast, and return the new AST.
339 */
340 int
add_lbnd(int dtyp,int dim,int ast,int astmember)341 add_lbnd(int dtyp, int dim, int ast, int astmember)
342 {
343 int astBnd = ADD_LWAST(dtyp, dim);
344 int ast1;
345
346 if (!astBnd || astBnd == astb.bnd.one)
347 return ast;
348
349 ast1 = mk_binop(OP_ADD, ast, check_member(astmember, astBnd), astb.bnd.dtype);
350 ast1 = mk_binop(OP_SUB, ast1, astb.bnd.one, astb.bnd.dtype);
351 return ast1;
352 }
353
354 /**
355 \brief Construct an AST to subtract the lower bound of dimension dim
356 for array datatype dtyp to ast, and return the new AST.
357 */
358 int
sub_lbnd(int dtyp,int dim,int ast,int astmember)359 sub_lbnd(int dtyp, int dim, int ast, int astmember)
360 {
361 int astBnd = ADD_LWAST(dtyp, dim);
362 int ast1;
363
364 if (!astBnd || astBnd == astb.bnd.one)
365 return ast;
366
367 ast1 = mk_binop(OP_SUB, ast, check_member(astmember, astBnd), astb.bnd.dtype);
368 ast1 = mk_binop(OP_ADD, ast1, astb.bnd.one, astb.bnd.dtype);
369 return ast1;
370 }
371
372 /**
373 \brief Return TRUE if the bounds of array sptr should be 1-based with
374 respect to the runtime.
375 */
376 LOGICAL
normalize_bounds(int sptr)377 normalize_bounds(int sptr)
378 {
379 int aln;
380 int sptr1;
381
382 if (STYPEG(sptr) != ST_ARRAY)
383 return FALSE;
384 sptr1 = sptr;
385 return (XBIT(58, 0x22) && !POINTERG(sptr));
386 }
387
388 LOGICAL
is_same_number_of_idx(int dest,int src,int list)389 is_same_number_of_idx(int dest, int src, int list)
390 {
391 int count, count1;
392 int asd;
393 int j, ndim;
394
395 count = 0;
396 count1 = 0;
397
398 /* dest */
399 while (dest) {
400 switch (A_TYPEG(dest)) {
401 case A_ID:
402 dest = 0;
403 break;
404 case A_SUBSTR:
405 dest = A_LOPG(dest);
406 break;
407 case A_MEM:
408 dest = A_PARENTG(dest);
409 break;
410 case A_SUBSCR:
411 asd = A_ASDG(dest);
412 ndim = ASD_NDIM(asd);
413
414 for (j = 0; j < ndim; ++j) {
415 if (search_forall_var(ASD_SUBS(asd, j), list))
416 count++;
417 }
418 dest = A_LOPG(dest);
419 break;
420 default:
421 dest = 0;
422 break;
423 }
424 }
425 while (src) {
426 switch (A_TYPEG(src)) {
427 case A_ID:
428 src = 0;
429 break;
430 case A_SUBSTR:
431 src = A_LOPG(src);
432 break;
433 case A_MEM:
434 src = A_PARENTG(src);
435 break;
436 case A_SUBSCR:
437 /* src */
438 asd = A_ASDG(src);
439 ndim = ASD_NDIM(asd);
440 for (j = 0; j < ndim; ++j) {
441 if (search_forall_var(ASD_SUBS(asd, j), list))
442 count1++;
443 }
444 src = A_LOPG(src);
445 break;
446 default:
447 src = 0;
448 break;
449 }
450 }
451
452 if (count1 == count)
453 return TRUE;
454 else
455 return FALSE;
456 }
457
458 /**
459 \brief This routine finds the dimension of sptr.
460
461 It takes subscript `a(f(i),5,f(j))`. It eliminates scalar dimension.
462 It makes an ast for reference sptr: `a(f(i),5,f(j)) --> sptr(f(i),f(j))`
463
464 NOTE: This is always called after get_temp_forall(), which calls
465 mk_forall_sptr(). The subscripts are not always as simple
466 as `sptr(f(i),f(j))`, especially if the stride is not known.
467 if the stride is not +1 or -1, the subscript will be normalized.
468 */
469 int
reference_for_temp(int sptr,int a,int forall)470 reference_for_temp(int sptr, int a, int forall)
471 {
472 int subs[7];
473 int list;
474 int i, ndim, k;
475 int astnew, vector;
476
477 list = A_LISTG(forall);
478 ndim = 0;
479 vector = 0;
480 do {
481 if (A_TYPEG(a) == A_MEM) {
482 a = A_PARENTG(a);
483 } else if (A_TYPEG(a) == A_SUBSCR) {
484 int asd, adim;
485 asd = A_ASDG(a);
486 adim = ASD_NDIM(asd);
487 /* array will be referenced after communication as follows */
488 for (i = 0; i < adim; i++) {
489 int ast;
490 ast = ASD_SUBS(asd, i);
491 if (XBIT(58, 0x20000)) {
492 extern int constant_stride(int a, int *value);
493 int c, stride, lw, up;
494 if (A_TYPEG(ast) == A_TRIPLE) {
495 lw = check_member(a, A_LBDG(ast));
496 up = check_member(a, A_UPBDG(ast));
497 c = constant_stride(A_STRIDEG(ast), &stride);
498 if (flg.opt >= 2 && !XBIT(2, 0x400000)) {
499 stride = A_STRIDEG(ast);
500 if (stride == 0)
501 stride = astb.i1;
502 up = mk_binop(OP_DIV, mk_binop(OP_ADD, mk_binop(OP_SUB, up, lw,
503 stb.user.dt_int),
504 stride, stb.user.dt_int),
505 stride, stb.user.dt_int);
506 lw = astb.i1;
507 subs[ndim] = mk_triple(lw, up, 0);
508 } else if (c && stride == 1) {
509 subs[ndim] = ast;
510 } else if (c && stride == -1) {
511 subs[ndim] = ast;
512 } else {
513 stride = A_STRIDEG(ast);
514 if (stride == 0)
515 stride = astb.i1;
516 up = mk_binop(OP_DIV, mk_binop(OP_ADD, mk_binop(OP_SUB, up, lw,
517 stb.user.dt_int),
518 stride, stb.user.dt_int),
519 stride, stb.user.dt_int);
520 lw = astb.i1;
521 subs[ndim] = mk_triple(lw, up, 0);
522 }
523 ++ndim;
524 vector = 1;
525 } else if (A_SHAPEG(ast)) {
526 subs[ndim] = ast;
527 ++ndim;
528 vector = 1;
529 } else if ((k = search_forall_var(ast, list))) {
530 if (other_forall_var(ast, list, k))
531 /*f2731*/
532 subs[ndim] = ast;
533 else {
534 lw = A_LBDG(ASTLI_TRIPLE(k));
535 up = A_UPBDG(ASTLI_TRIPLE(k));
536 c = constant_stride(A_STRIDEG(ASTLI_TRIPLE(k)), &stride);
537 if (flg.opt >= 2 && !XBIT(2, 0x400000)) {
538 stride = A_STRIDEG(ASTLI_TRIPLE(k));
539 if (stride == 0)
540 stride = astb.i1;
541 subs[ndim] = mk_binop(
542 OP_DIV,
543 mk_binop(OP_ADD, mk_binop(OP_SUB, mk_id(ASTLI_SPTR(k)), lw,
544 stb.user.dt_int),
545 stride, stb.user.dt_int),
546 stride, stb.user.dt_int);
547 } else if (c && stride == 1) {
548 subs[ndim] = mk_id(ASTLI_SPTR(k));
549 } else if (c && stride == -1) {
550 subs[ndim] = mk_id(ASTLI_SPTR(k));
551 } else {
552 stride = A_STRIDEG(ASTLI_TRIPLE(k));
553 if (stride == 0)
554 stride = astb.i1;
555 subs[ndim] = mk_binop(
556 OP_DIV,
557 mk_binop(OP_ADD, mk_binop(OP_SUB, mk_id(ASTLI_SPTR(k)), lw,
558 stb.user.dt_int),
559 stride, stb.user.dt_int),
560 stride, stb.user.dt_int);
561 }
562 }
563 ++ndim;
564 }
565 } else if (A_TYPEG(ast) == A_TRIPLE || A_SHAPEG(ast)) {
566 /* include this dimension */
567 subs[ndim] = ast;
568 ++ndim;
569 vector = 1;
570 } else if (search_forall_var(ASD_SUBS(asd, i), list)) {
571 /* include this dimension */
572 subs[ndim] = ast;
573 ++ndim;
574 }
575 }
576 a = A_LOPG(a);
577 } else {
578 interr("reference_for_temp: not subscr or member", a, 3);
579 }
580 } while (A_TYPEG(a) != A_ID);
581 assert(ndim == rank_of_sym(sptr), "reference_for_temp: rank mismatched", sptr,
582 4);
583 if (vector) {
584 astnew = mk_subscr(mk_id(sptr), subs, ndim, DTYPEG(sptr));
585 } else {
586 astnew = mk_subscr(mk_id(sptr), subs, ndim, DTY(DTYPEG(sptr) + 1));
587 }
588 return astnew;
589 }
590
591 /**
592 \brief This routine a barrier statement in the barrier table.
593 */
594 int
record_barrier(LOGICAL bBefore,int astStmt,int std)595 record_barrier(LOGICAL bBefore, int astStmt, int std)
596 {
597 int i;
598 int sptr;
599 LITEMF *pl;
600
601 switch (A_TYPEG(astStmt)) {
602 case A_ASN:
603 sptr = sym_of_ast(A_DESTG(astStmt));
604 pl = clist();
605 pl->item = sptr;
606 break;
607 case A_FORALL:
608 sptr = sym_of_ast(A_DESTG(A_IFSTMTG(astStmt)));
609 pl = clist();
610 pl->item = sptr;
611 break;
612 default:
613 return 0;
614 }
615 i = get_brtbl();
616 brtbl.base[i].f1 = bBefore;
617 brtbl.base[i].f2 = std;
618 brtbl.base[i].f3 = pl;
619 return i;
620 }
621
622 /**
623 \brief This routine is to read distributed array element at forall by
624 using get scalar primitive.
625 */
626 int
emit_get_scalar(int a,int std)627 emit_get_scalar(int a, int std)
628 {
629 int lsptr, ld;
630 int astnew;
631 int list;
632 int i, nargs, argt;
633 int asd;
634 int ndim;
635 int temp, tempast;
636 int ast;
637 int commstd;
638 int nd;
639
640 if (STD_LOCAL(std))
641 return a; /* don't process for DO-INDEPENDENT */
642 asd = A_ASDG(a);
643 ndim = ASD_NDIM(asd);
644 ld = dist_ast(a);
645 if (ld == 0)
646 return a;
647 lsptr = memsym_of_ast(ld);
648 if (!DISTG(lsptr) && !ALIGNG(lsptr))
649 return a;
650
651 /* It is distributed. Create a temp to hold the value */
652 temp = sym_get_scalar(SYMNAME(lsptr), "s", DTY(DTYPEG(lsptr) + 1));
653 tempast = mk_id(temp);
654 ast = new_node(A_HGETSCLR);
655 A_SRCP(ast, a);
656 A_DESTP(ast, tempast);
657 if (DESCRG(lsptr)) {
658 int lop;
659 lop = check_member(a, mk_id(DESCRG(lsptr)));
660 A_LOPP(ast, lop);
661 }
662 commstd = add_stmt_before(ast, std);
663 A_STDP(ast, commstd);
664 return replace_ast_subtree(a, ld, tempast);
665 }
666
667 /**
668 <pre>
669 Algorithm:
670 * gather information abouth lhs array.
671 * tag communications for rhs array.
672 * optimize overlap_shift if there is same array shift.
673 * optimize copy_section
674 * convert to forall into block forall since owner computes rule distribution
675 for cyclic require complicated statement insertion.
676 * forall_gbl.s0....
677 * forall_gbl.s1 forall(i=.
678 * forall_gbl.s2 A(i)=
679 * forall_gbl.s3 endforall
680 * forall_gbl.s4, forall_gbl.s5 ...
681
682 These variables are globals.
683 * forall_gbl.s1 moves up.
684 * forall_gbl.s4 moves down.
685 </pre>
686 */
687 void
forall_opt1(int ast)688 forall_opt1(int ast)
689 {
690 int std;
691 int i, j;
692 int nd;
693
694 std = A_STDG(ast);
695 if (A_OPT1G(ast))
696 return;
697 nd = mk_ftb();
698 FT_NRT(nd) = 0;
699 FT_RTL(nd) = clist();
700 FT_NMCALL(nd) = 0;
701 FT_MCALL(nd) = clist();
702 FT_NSCALL(nd) = 0;
703 FT_SCALL(nd) = clist();
704 FT_NMGET(nd) = 0;
705 FT_MGET(nd) = clist();
706 FT_NSGET(nd) = 0;
707 FT_SGET(nd) = clist();
708 FT_NPCALL(nd) = 0;
709 FT_PCALL(nd) = clist();
710 FT_IGNORE(nd) = 0;
711 FT_SECTL(nd) = 0;
712 FT_CYCLIC(nd) = getcyclic();
713 for (i = 0; i < 7; i++) {
714 FT_NFUSE(nd, i) = 0;
715 for (j = 0; j < MAXFUSE; j++)
716 FT_FUSELP(nd, i, j) = 0;
717 }
718 FT_FUSED(nd) = 0;
719 FT_HEADER(nd) = std;
720 FT_BARR1(nd) = 0;
721 FT_BARR2(nd) = 0;
722 FT_FG(nd) = 0;
723 A_OPT1P(ast, nd);
724 }
725
726 void
transform_forall(int std,int ast)727 transform_forall(int std, int ast)
728 {
729 int asn;
730 int src, dest;
731 int asd;
732 int astnew;
733 int endforall;
734 int test1, test2;
735 int nd;
736 int lhs;
737
738 comminfo.std = std;
739 comminfo.usedstd = std;
740 comminfo.forall = ast;
741 trans.rhsbase = 0;
742
743 init_opt_tables();
744 forall_opt1(ast);
745
746 if (pure_gbl.end_critical_region != 0) {
747 scalarize(std, ast, TRUE);
748 return;
749 }
750
751 shape_communication(std, ast);
752
753 comminfo.std = std;
754 comminfo.usedstd = std;
755 comminfo.forall = ast;
756
757 asn = A_IFSTMTG(ast);
758 dest = scalar_communication(A_DESTG(asn), std);
759 src = scalar_communication(A_SRCG(asn), std);
760 A_DESTP(asn, dest);
761 A_SRCP(asn, src);
762
763 /* if the lhs is distributed, adjust the forall bounds; insert the
764 * communication for the forall statement; adjust the rhs bounds
765 */
766 comminfo.mask_phase = 0;
767 if (normalize_forall_triplet(std, ast) == 0) {
768 report_comm(std, CANONICAL_CAUSE);
769 scalarize(std, ast, TRUE);
770 return;
771 }
772
773 if (is_scatter(std))
774 return;
775
776 if (canonical_conversion(ast) == 0) {
777 report_comm(std, CANONICAL_CAUSE);
778 scalarize(std, ast, TRUE);
779 return;
780 }
781
782 asn = A_IFSTMTG(ast);
783 if (process_lhs_sub(std, ast) == 0) {
784 scalarize(std, ast, TRUE);
785 return;
786 }
787 test1 = tag_forall_comm(A_SRCG(A_IFSTMTG(ast)));
788 comminfo.mask_phase = 1;
789 if (!comminfo.unstruct && A_IFEXPRG(ast))
790 test2 = tag_forall_comm(A_IFEXPRG(ast));
791 if (!comminfo.unstruct)
792 test1 = tag_call_comm(std, ast);
793
794 if (comminfo.unstruct) {
795 report_comm(std, UGLYCOMM_CAUSE);
796 scalarize(std, ast, TRUE);
797 return;
798 }
799 if (comminfo.ugly_mask) {
800 report_comm(std, UGLYMASK_CAUSE);
801 scalarize(std, ast, TRUE);
802 return;
803 }
804 comminfo.mask_phase = 0;
805 opt_overlap();
806 astnew = insert_forall_comm(A_SRCG(asn));
807 A_SRCP(asn, astnew);
808 comminfo.mask_phase = 1;
809 if (A_IFEXPRG(ast)) {
810 astnew = insert_forall_comm(A_IFEXPRG(ast));
811 A_IFEXPRP(ast, astnew);
812 }
813 insert_call_comm(std, ast);
814
815 /* guard_forall(std); */
816 fix_guard_forall(std);
817
818 /* give information if more than 40 run-time calls generated
819 * for this forall
820 */
821 if (FT_NRT(A_OPT1G(STD_AST(std))) > 40)
822 report_comm(std, MANYRUNTIME_CAUSE);
823 }
824
825 /**
826 \brief The forall should be treated like a serial statement.
827
828 Turn it into a block-forall so the IF stuff works OK.
829 */
830 void
scalarize(int std,int forall,LOGICAL after_transformer)831 scalarize(int std, int forall, LOGICAL after_transformer)
832 {
833 int std1;
834 int std2;
835
836 std1 = 0;
837 std2 = 0;
838 forall_dependency_scalarize(std, &std1, &std2);
839 forall = STD_AST(std);
840 sequentialize(std, forall, after_transformer);
841 if (std1) {
842 forall = STD_AST(std1);
843 if (after_transformer)
844 transform_forall(std1, forall);
845 }
846
847 if (std2) {
848 forall = STD_AST(std2);
849 if (after_transformer)
850 transform_forall(std2, forall);
851 }
852 }
853
854 /**
855 \brief This is neccessary, if forall sequentialized.
856 */
857 void
un_fuse(int forall)858 un_fuse(int forall)
859 {
860 int nd, nd1;
861 int forall1;
862 int fusedstd;
863 int i;
864 int forallstd;
865
866 nd = A_OPT1G(forall);
867 for (i = 0; i < FT_NFUSE(nd, 0); i++) {
868 fusedstd = FT_FUSEDSTD(nd, 0, i);
869 forall1 = STD_AST(fusedstd);
870 nd1 = A_OPT1G(forall1);
871 FT_HEADER(nd1) = fusedstd;
872 }
873 FT_NFUSE(nd, 0) = 0;
874 forallstd = A_STDG(forall);
875 assert(forallstd, "un_fuse: it must be forall", forall, 3);
876 assert(STD_AST(forallstd) == forall, "un_fuse: it must be forall", forall, 3);
877 FT_HEADER(nd) = forallstd;
878 }
879
880 void
sequentialize(int std,int forall,LOGICAL after_transformer)881 sequentialize(int std, int forall, LOGICAL after_transformer)
882 {
883 int asn;
884 int newast;
885 int stdnext, stdnext1;
886 int n, i;
887 int triplet_list, index_var;
888 int triplet;
889 int expr;
890 int lineno;
891 LOGICAL craft_partion;
892
893 if (after_transformer)
894 un_fuse(forall);
895
896 ast_to_comment(forall);
897 asn = A_IFSTMTG(forall);
898 if (!asn) {
899 asn = mk_stmt(A_CONTINUE, 0);
900 }
901 lineno = STD_LINENO(std);
902 stdnext = STD_NEXT(std);
903 delete_stmt(A_STDG(forall));
904
905 n = 0;
906 triplet_list = A_LISTG(forall);
907 for (; triplet_list; triplet_list = ASTLI_NEXT(triplet_list)) {
908 int dovar;
909 n++;
910 index_var = ASTLI_SPTR(triplet_list);
911 triplet = ASTLI_TRIPLE(triplet_list);
912 newast = mk_stmt(A_DO, 0);
913 dovar = mk_id(index_var);
914 A_DOVARP(newast, dovar);
915 A_M1P(newast, A_LBDG(triplet));
916 A_M2P(newast, A_UPBDG(triplet));
917 A_M3P(newast, A_STRIDEG(triplet));
918 A_M4P(newast, 0);
919 stdnext = add_stmt_before(newast, stdnext);
920 STD_LINENO(stdnext) = lineno;
921 if (after_transformer)
922 transform_ast(stdnext, newast);
923 stdnext = STD_NEXT(stdnext);
924 }
925
926 if (after_transformer)
927 stdnext = sequentialize_mask_call(forall, stdnext);
928
929 expr = A_IFEXPRG(forall);
930 if (expr) {
931 stdnext = STD_PREV(stdnext);
932 stdnext1 = insert_mask(expr, stdnext);
933 stdnext1 = STD_NEXT(stdnext1);
934 if (after_transformer) {
935 int nextnext;
936 stdnext = STD_NEXT(stdnext);
937 for (; stdnext != stdnext1; stdnext = nextnext) {
938 nextnext = STD_NEXT(stdnext);
939 transform_ast(stdnext, STD_AST(stdnext));
940 }
941 }
942 stdnext = stdnext1;
943 }
944
945 if (after_transformer)
946 stdnext = sequentialize_stmt_call(forall, stdnext);
947
948 stdnext = add_stmt_before(asn, stdnext);
949 stdnext1 = STD_NEXT(stdnext);
950 STD_LINENO(stdnext) = lineno;
951 if (after_transformer)
952 transform_ast(stdnext, asn);
953 stdnext = stdnext1;
954
955 if (expr) {
956 stdnext = insert_endmask(expr, STD_PREV(stdnext));
957 stdnext = STD_NEXT(stdnext);
958 }
959
960 for (i = 0; i < n; i++) {
961 newast = mk_stmt(A_ENDDO, 0);
962 stdnext = add_stmt_before(newast, stdnext);
963 STD_LINENO(stdnext) = lineno;
964 stdnext = STD_NEXT(stdnext);
965 }
966 }
967
968 /**
969 \brief Initialize the communication analyzer phase.
970 */
971 static void
comm_init(void)972 comm_init(void)
973 {
974 TRANS_ALLOC(trans.subb, SUBINFO, 1000);
975 TRANS_ALLOC(trans.arrb, ARREF, 100);
976 TRANS_ALLOC(trans.tdescb, TDESC, 50);
977 init_pertbl();
978 init_brtbl();
979 }
980
981 static LOGICAL
is_scatter(int std)982 is_scatter(int std)
983 {
984 if (!scatter_class(std))
985 return FALSE;
986 if (!comminfo.scat.base && !comminfo.scat.array_simple)
987 return FALSE;
988 emit_sum_scatterx(std);
989 emit_scatterx(std);
990 return TRUE;
991 }
992
993 /**
994 \brief Like reference_for_temp(), this routine finds the dimension of sptr.
995
996 It takes subscript `a(f(i),5,f(j))`. It eliminates scalar dimensions.
997 It makes an ast to reference sptr: `a(f(i),5,f(j)) --> sptr(i,j)`
998 */
999 static int
simple_reference_for_temp(int sptr,int a,int forall)1000 simple_reference_for_temp(int sptr, int a, int forall)
1001 {
1002 int subs[7];
1003 int list;
1004 int i, ndim, k;
1005 int astnew;
1006
1007 list = A_LISTG(forall);
1008 ndim = 0;
1009 do {
1010 if (A_TYPEG(a) == A_MEM) {
1011 a = A_PARENTG(a);
1012 } else if (A_TYPEG(a) == A_SUBSCR) {
1013 int asd, adim;
1014 asd = A_ASDG(a);
1015 adim = ASD_NDIM(asd);
1016 /* array will be referenced after communication as follows */
1017 for (i = 0; i < adim; i++) {
1018 int ast;
1019 ast = ASD_SUBS(asd, i);
1020 if (XBIT(58, 0x20000)) {
1021 if (A_TYPEG(ast) == A_TRIPLE) {
1022 subs[ndim] = ast;
1023 ++ndim;
1024 } else if ((k = search_forall_var(ast, list))) {
1025 subs[ndim] = mk_id(ASTLI_SPTR(k));
1026 ++ndim;
1027 } else if (A_SHAPEG(ast)) {
1028 subs[ndim] = ast;
1029 ++ndim;
1030 }
1031 } else if ((k = search_forall_var(ast, list))) {
1032 subs[ndim] = mk_id(ASTLI_SPTR(k));
1033 ++ndim;
1034 } else if (A_TYPEG(ast) == A_TRIPLE || A_SHAPEG(ast)) {
1035 /* include this dimension */
1036 subs[ndim] = ast;
1037 ++ndim;
1038 }
1039 }
1040 a = A_LOPG(a);
1041 } else {
1042 interr("simple_reference_for_temp: not subscr or member", a, 3);
1043 }
1044 } while (A_TYPEG(a) != A_ID);
1045 assert(ndim == rank_of_sym(sptr),
1046 "simple_reference_for_temp: rank mismatched", sptr, 4);
1047 astnew = mk_subscr(mk_id(sptr), subs, ndim, DTY(DTYPEG(sptr) + 1));
1048 return astnew;
1049 }
1050
1051 static int
temp_gatherx(int std,int forall,int lhs,int rhs,int dty,int * allocast)1052 temp_gatherx(int std, int forall, int lhs, int rhs, int dty, int *allocast)
1053 {
1054 int sptr;
1055 int subscr[7];
1056 int ast;
1057 int nd;
1058 int astnew;
1059 int header;
1060
1061 nd = A_OPT1G(forall);
1062 header = FT_HEADER(nd);
1063 sptr = mk_forall_sptr_gatherx(forall, lhs, rhs, subscr, dty);
1064
1065 astnew =
1066 mk_subscr(mk_id(sptr), subscr, rank_of_sym(sptr), DTY(DTYPEG(sptr) + 1));
1067 ast = new_node(A_HALLOBNDS);
1068 A_LOPP(ast, astnew);
1069 nd = mk_ftb();
1070 FT_STD(nd) = std;
1071 FT_FORALL(nd) = forall;
1072 FT_ALLOC_SPTR(nd) = sptr;
1073 FT_ALLOC_FREE(nd) = header;
1074 FT_ALLOC_SAME(nd) = 0;
1075 FT_ALLOC_REUSE(nd) = 0;
1076 FT_ALLOC_USED(nd) = 0;
1077 FT_ALLOC_OUT(nd) = sptr;
1078 A_OPT1P(ast, nd);
1079 *allocast = ast;
1080 return sptr;
1081 }
1082
1083 static int
temp_copy_section(int std,int forall,int lhs,int rhs,int dty,int * allocast)1084 temp_copy_section(int std, int forall, int lhs, int rhs, int dty, int *allocast)
1085 {
1086 int sptr;
1087 int subscr[7];
1088 int ast;
1089 int nd;
1090 int astnew;
1091 int header;
1092
1093 nd = A_OPT1G(forall);
1094 header = FT_HEADER(nd);
1095 sptr = mk_forall_sptr_copy_section(forall, lhs, rhs, subscr, dty);
1096
1097 astnew =
1098 mk_subscr(mk_id(sptr), subscr, rank_of_sym(sptr), DTY(DTYPEG(sptr) + 1));
1099 ast = new_node(A_HALLOBNDS);
1100 A_LOPP(ast, astnew);
1101 nd = mk_ftb();
1102 FT_STD(nd) = std;
1103 FT_FORALL(nd) = forall;
1104 FT_ALLOC_SPTR(nd) = sptr;
1105 FT_ALLOC_FREE(nd) = header;
1106 FT_ALLOC_SAME(nd) = 0;
1107 FT_ALLOC_REUSE(nd) = 0;
1108 FT_ALLOC_USED(nd) = 0;
1109 FT_ALLOC_OUT(nd) = sptr;
1110 A_OPT1P(ast, nd);
1111 *allocast = ast;
1112 return sptr;
1113 }
1114
1115 /**
1116 \brief Just like copy_section_temp_before() except it does not eliminate
1117 scalar dimension.
1118
1119 This means that it makes a new array with sptr by using subscript of rhs.
1120 */
1121 static int
gatherx_temp_before(int sptr,int rhs,int forall)1122 gatherx_temp_before(int sptr, int rhs, int forall)
1123 {
1124 int subs[7];
1125 int k, j;
1126 int asd;
1127 int ndim;
1128 int astnew;
1129 int astli;
1130 int nidx;
1131 int list;
1132
1133 asd = A_ASDG(rhs);
1134 ndim = ASD_NDIM(asd);
1135 list = A_LISTG(forall);
1136
1137 j = 0;
1138 /* array will be referenced after communication as follows */
1139 for (k = 0; k < ndim; ++k) {
1140 astli = 0;
1141 nidx = 0;
1142 search_forall_idx(ASD_SUBS(asd, k), list, &astli, &nidx);
1143 if (nidx == 1 && astli) {
1144 /* include this dimension */
1145 subs[j] = mk_id(ASTLI_SPTR(astli));
1146 j++;
1147 } else if (nidx == 0 && astli == 0) {
1148 /* include scalar dimension too */
1149 subs[j] = ASD_SUBS(asd, k);
1150 j++;
1151 }
1152 }
1153 assert(j == rank_of_sym(sptr), "gatherx_temp_before: rank mismatched", sptr,
1154 4);
1155 astnew = mk_subscr(mk_id(sptr), subs, j, DTY(DTYPEG(sptr) + 1));
1156 return astnew;
1157 }
1158
1159 static int
make_sec_ast(int arr,int std,int allocstd,int sectflag)1160 make_sec_ast(int arr, int std, int allocstd, int sectflag)
1161 {
1162 int asn;
1163 int ast;
1164 int nd;
1165 int sec, secast;
1166 int sectstd;
1167 int forall;
1168 int sptr;
1169 int header;
1170 int bogus;
1171 int shape;
1172 int rank;
1173
1174 forall = STD_AST(std);
1175 nd = A_OPT1G(forall);
1176 header = FT_HEADER(nd);
1177
1178 asn = mk_stmt(A_ASN, astb.bnd.dtype);
1179 ast = new_node(A_HSECT);
1180 sptr = sptr_of_subscript(arr);
1181 A_LOPP(ast, arr);
1182 nd = mk_ftb();
1183 FT_STD(nd) = std;
1184 FT_FORALL(nd) = forall;
1185 FT_SECT_ARR(nd) = arr;
1186 FT_SECT_SPTR(nd) = sptr;
1187 FT_SECT_ALLOC(nd) = allocstd;
1188 FT_SECT_FREE(nd) = header;
1189 FT_SECT_FLAG(nd) = sectflag;
1190 bogus = getbit(sectflag, 8);
1191 shape = A_SHAPEG(arr);
1192 assert(shape, "make_sec_ast: ast has no shape", arr, 4);
1193 rank = SHD_NDIM(shape);
1194 if (is_whole_array(arr) && !bogus) {
1195 DESCUSEDP(sptr, 1);
1196 sec = DESCRG(sptr);
1197 secast = check_member(arr, mk_id(sec));
1198 } else {
1199 sec = sym_get_sdescr(sptr, rank); /* ZB */
1200 secast = mk_id(sec);
1201 }
1202 FT_SECT_SAME(nd) = 0;
1203 FT_SECT_REUSE(nd) = 0;
1204 FT_SECT_OUT(nd) = sec;
1205 A_OPT1P(ast, nd);
1206
1207 A_DESTP(asn, secast);
1208 A_SRCP(asn, ast);
1209
1210 sectstd = add_stmt_before(asn, header);
1211 A_STDP(asn, sectstd);
1212 nd = A_OPT1G(forall);
1213 plist(FT_RTL(nd), sectstd);
1214 FT_NRT(nd)++;
1215
1216 return sectstd;
1217 }
1218
1219 /**
1220 \brief This routine takes an array in a forall statement with its subinfo
1221 and replaces all forall indexes.
1222
1223 E.g., `forall(i=1:10:2) a(i+1)` will become `a(2:11:2)`.
1224
1225 Note that this assumes each forall index appears in array subscripts.
1226 If not, something is wrong in the communication detection algorithm.
1227 */
1228 static int
forall_2_sec(int a,int forall)1229 forall_2_sec(int a, int forall)
1230 {
1231 int list;
1232 int ndim;
1233 int i;
1234 int j;
1235 int asd;
1236 int sub_expr;
1237 int triple;
1238 int l, u, s;
1239 int t1, t2, t3;
1240 int subs[7];
1241 int sptr;
1242 int astli;
1243 int base;
1244 int stride;
1245 int shape;
1246 int nd;
1247 int nidx;
1248 int changed;
1249
1250 assert(A_TYPEG(a) == A_SUBSCR, "forall_2_sec: not SUBSCR", a, 4);
1251 list = A_LISTG(forall);
1252 asd = A_ASDG(a);
1253 sptr = sptr_of_subscript(a);
1254 ndim = ASD_NDIM(asd);
1255 shape = 0;
1256 if (A_ARRASNG(forall)) {
1257 nd = get_finfo(forall, a);
1258 if (nd)
1259 shape = FINFO_SHAPE(nd);
1260 }
1261
1262 /* If it was an array assignment, use the original section info */
1263 if (A_ARRASNG(forall) && shape) {
1264 j = 0;
1265 for (i = 0; i < ndim; i++) {
1266 sub_expr = ASD_SUBS(asd, i);
1267 astli = 0;
1268 nidx = 0;
1269 search_forall_idx(sub_expr, list, &astli, &nidx);
1270 if (nidx == 1) {
1271 t1 = check_member(a, SHD_LWB(shape, j));
1272 t2 = check_member(a, SHD_UPB(shape, j));
1273 t3 = check_member(a, SHD_STRIDE(shape, j));
1274 j++;
1275 subs[i] = mk_triple(t1, t2, t3);
1276 } else
1277 subs[i] = ASD_SUBS(asd, i);
1278 }
1279 assert(j == SHD_NDIM(shape), "forall_2_sec: something is wrong", a, 4);
1280 return mk_subscr(A_LOPG(a), subs, ndim, DTYPEG(sptr));
1281 }
1282 /* If it was a forall, calculate the section info */
1283 changed = 0;
1284 for (i = 0; i < ndim; i++) {
1285 sub_expr = ASD_SUBS(asd, i);
1286 astli = 0;
1287 search_idx(sub_expr, list, &astli, &base, &stride);
1288 assert(base, "forall_2_sec: something is wrong", a, 4);
1289 if (astli) {
1290 triple = ASTLI_TRIPLE(astli);
1291 l = A_LBDG(triple);
1292 u = A_UPBDG(triple);
1293 s = A_STRIDEG(triple);
1294 t1 = replace_expr(sub_expr, ASTLI_SPTR(astli), l, 1);
1295 t2 = replace_expr(sub_expr, ASTLI_SPTR(astli), u, 1);
1296 if (s == 0)
1297 s = astb.bnd.one;
1298 t3 = opt_binop(OP_MUL, s, stride, astb.bnd.dtype);
1299 subs[i] = mk_triple(t1, t2, t3);
1300 changed = 1;
1301 } else
1302 subs[i] = ASD_SUBS(asd, i);
1303 }
1304 if (changed)
1305 return mk_subscr(A_LOPG(a), subs, ndim, DTYPEG(sptr));
1306 else
1307 return a;
1308 }
1309
1310 /* give a%b(1:n)%c, return pointer to a%b%c in 'pnewast',
1311 * pointer to a%b(1:n) in 'psectast', pointer to b in 'psptr'. */
1312 static void
remove_section(int ast,int * pnewast,int * psectast,int * psptr,int * panydist,int * pnontrivial)1313 remove_section(int ast, int *pnewast, int *psectast, int *psptr, int *panydist,
1314 int *pnontrivial)
1315 {
1316 int lop, sptr = 0;
1317 switch (A_TYPEG(ast)) {
1318 case A_SUBSTR:
1319 remove_section(A_LOPG(ast), pnewast, psectast, psptr, panydist,
1320 pnontrivial);
1321 *pnewast = mk_substr(*pnewast, A_LEFTG(ast), A_RIGHTG(ast), A_DTYPEG(ast));
1322 break;
1323 case A_INTR:
1324 *pnewast = ast;
1325 *psectast = 0;
1326 *psptr = 0;
1327 break;
1328 case A_ID:
1329 sptr = A_SPTRG(ast);
1330 *psptr = sptr;
1331 *psectast = ast;
1332 *pnewast = ast;
1333 break;
1334 case A_MEM:
1335 lop = A_PARENTG(ast);
1336 remove_section(lop, pnewast, psectast, psptr, panydist, pnontrivial);
1337 *pnewast = mk_member(*pnewast, A_MEMG(ast), A_DTYPEG(ast));
1338 sptr = A_SPTRG(A_MEMG(ast));
1339 if (A_SHAPEG(lop) != 0) {
1340 /* psectast, psptr already set by parent */
1341 *pnontrivial = 1;
1342 } else {
1343 *psectast = ast;
1344 *psptr = sptr;
1345 }
1346 break;
1347 case A_SUBSCR:
1348 lop = A_LOPG(ast);
1349 if (A_TYPEG(lop) == A_ID) {
1350 sptr = A_SPTRG(lop);
1351 } else if (A_TYPEG(lop) == A_MEM) {
1352 sptr = A_SPTRG(A_MEMG(lop));
1353 }
1354 remove_section(lop, pnewast, psectast, psptr, panydist, pnontrivial);
1355 if (A_SHAPEG(ast) == 0) {
1356 *pnewast = mk_subscr_copy(*pnewast, A_ASDG(ast), A_DTYPEG(ast));
1357 *psectast = ast;
1358 *psptr = sptr;
1359 } else if (A_TYPEG(lop) == A_ID ||
1360 (A_TYPEG(lop) == A_MEM && A_SHAPEG(A_PARENTG(lop)) == 0)) {
1361 /* if the 'lop' is an ID, or
1362 * if the 'lop' is an member whose parent has no shape,
1363 * shape comes from this subscript */
1364 *psectast = ast;
1365 *psptr = sptr;
1366 } else {
1367 /* section comes from A_MEM parent; psectast, psptr already set */
1368 *pnewast = mk_subscr_copy(*pnewast, A_ASDG(ast), A_DTYPEG(ast));
1369 *pnontrivial = 1;
1370 }
1371 break;
1372 default:
1373 *pnewast = 0;
1374 *psectast = 0;
1375 *psptr = 0;
1376 break;
1377 }
1378 if (sptr && ALIGNG(sptr))
1379 *panydist = 1;
1380 } /* remove_section */
1381
1382 /* pv => ar
1383 * pv => ar(lower:upper:stride,...)
1384 * call pghpf_ptr_assign(pv, pv$sdsc, ar, ar$d, sectflag)
1385 * pv: base.
1386 * pv$sdsc: pv's (new) static descriptor
1387 * ar: ar's base address (ar or ar(ar$o))
1388 * ar$d: ar's (old) descriptor
1389 * sectflag: integer, 0 if whole array, 1 if section
1390 */
1391 static void
transform_ptr(int std,int ast)1392 transform_ptr(int std, int ast)
1393 {
1394 int ast1;
1395 int argt, nargs;
1396 int newargt;
1397 int src, dest, newsrc, sectast, src_sptr, anydist;
1398 int dest_sptr, nontrivial;
1399 int array_desc;
1400 int func;
1401 LOGICAL is_cyclic;
1402 int align, section;
1403 int ndim;
1404 int i;
1405 int ptr_reshape_dest = 0;
1406 int dtype;
1407
1408 assert(A_TYPEG(ast) == A_ICALL && A_OPTYPEG(ast) == I_PTR2_ASSIGN,
1409 "transform_ptr: something is wrong", 2, ast);
1410 NODESCP(find_pointer_variable(A_LOPG(ast)), 1);
1411 argt = A_ARGSG(ast);
1412 nargs = A_ARGCNTG(ast);
1413 assert(nargs == 2, "transform_ptr: something is wrong", 2, ast);
1414 src = ARGT_ARG(argt, 1);
1415 dest = ARGT_ARG(argt, 0);
1416
1417 anydist = 0;
1418 nontrivial = 0;
1419 remove_section(src, &newsrc, §ast, &src_sptr, &anydist, &nontrivial);
1420
1421 /* sectast points to subtree with A_SHAPE() != 0.
1422 * src_sptr is the section sptr */
1423 again:
1424 if (A_TYPEG(dest) == A_ID) {
1425 dest_sptr = A_SPTRG(dest);
1426 } else if (A_TYPEG(dest) == A_MEM) {
1427 dest_sptr = A_SPTRG(A_MEMG(dest));
1428 } else if (A_TYPEG(dest) == A_SUBSCR) { /* ptr reshape */
1429 ptr_reshape_dest = dest;
1430 dest = A_LOPG(dest);
1431 goto again;
1432 } else
1433 assert(0, "transform_ptr: bad pointer assignment target", ast, 3);
1434
1435 /* don't let scalar pointer point to distributed array */
1436 if (DTY(DTYPEG(dest_sptr)) != TY_ARRAY && DTY(DTYPEG(src_sptr)) == TY_ARRAY &&
1437 anydist)
1438 error(155, 4, STD_LINENO(std), SYMNAME(dest_sptr),
1439 "- scalar POINTER associated with distributed object is unsupported");
1440
1441 DESCUSEDP(src_sptr, 1);
1442 DESCUSEDP(dest_sptr, 1);
1443 if (!POINTERG(dest_sptr))
1444 error(155, 3, STD_LINENO(std), "must be POINTER", SYMNAME(dest_sptr));
1445
1446 array_desc = 0;
1447 section = 0;
1448 dtype = DDTG(DTYPEG(dest_sptr));
1449 if (DTY(dtype) == TY_PTR && DTY(DTY(dtype + 1)) == TY_PROC &&
1450 STYPEG(src_sptr) == ST_PROC) {
1451 /* No array descriptor for procedure name target in a
1452 * procedure pointer assignment.
1453 */
1454 } else if (ptr_reshape_dest && bnds_remap_list(ptr_reshape_dest) &&
1455 simply_contiguous(src)) {
1456 emit_alnd_secd(dest_sptr, dest, TRUE, std, ptr_reshape_dest);
1457 } else if (A_TYPEG(sectast) == A_SUBSCR && A_SHAPEG(sectast) != 0) {
1458 int d;
1459 array_desc = check_member(dest, mk_id(SDSCG(dest_sptr)));
1460 d = make_sec_from_ast(sectast, std, std, array_desc, 0);
1461 /* if this was the whole array, we use the descriptor
1462 * of the source, not target */
1463 if (d == DESCRG(src_sptr)) {
1464 array_desc = check_member(sectast, mk_id(d));
1465 }
1466 section = 1;
1467 } else if (A_TYPEG(src) == A_MEM && A_SHAPEG(A_PARENTG(src))) {
1468 section = 1;
1469 array_desc = DESCRG(src_sptr);
1470 array_desc = check_member(sectast, mk_id(array_desc));
1471 } else {
1472 if (POINTERG(src_sptr) && A_SHAPEG(sectast)) {
1473 array_desc = SDSCG(src_sptr); /* section descriptor */
1474 array_desc = check_member(sectast, mk_id(array_desc));
1475 } else if (DTY(DTYPEG(src_sptr)) == TY_ARRAY && A_SHAPEG(sectast)) {
1476 array_desc = DESCRG(src_sptr);
1477 array_desc = check_member(sectast, mk_id(array_desc));
1478 } else {
1479 array_desc = 0;
1480 }
1481 }
1482
1483 nargs = nontrivial ? 7 : 5;
1484 if (A_TYPEG(ptr_reshape_dest) == A_SUBSCR) {
1485 /* ptr reshape
1486 * compute number of additional args
1487 */
1488 int shd, nd, asd, i, sub;
1489
1490 if (ptr_reshape_dest && bnds_remap_list(ptr_reshape_dest) &&
1491 simply_contiguous(src)) {
1492 newsrc = first_element(src);
1493 }
1494 shd = A_SHAPEG(ptr_reshape_dest);
1495 nd = SHD_NDIM(shd);
1496 nargs = 8; /* num dimensions */
1497 asd = A_ASDG(ptr_reshape_dest);
1498 for (i = 0; i < nd; ++i) {
1499 sub = ASD_SUBS(asd, i);
1500 if (A_LBDG(sub))
1501 ++nargs; /* lowerbound */
1502 if (A_UPBDG(sub))
1503 ++nargs; /* upperbound */
1504 }
1505 }
1506 newargt = mk_argt(nargs);
1507 ARGT_ARG(newargt, 0) = ARGT_ARG(argt, 0);
1508 /* this will need some changes when dest_sptr is a derived type member */
1509 if ((STYPEG(dest_sptr) == ST_VAR || STYPEG(dest_sptr) == ST_ARRAY) &&
1510 DSCASTG(dest_sptr)) {
1511 ARGT_ARG(newargt, 1) = DSCASTG(dest_sptr);
1512 } else {
1513 SPTR sdsc = SDSCG(dest_sptr);
1514 if (sdsc) {
1515 ARGT_ARG(newargt, 1) = check_member(dest, mk_id(sdsc));
1516 } else {
1517 ARGT_ARG(newargt, 1) = astb.bnd.zero;
1518 }
1519 }
1520 ARGT_ARG(newargt, 2) = newsrc;
1521 if (array_desc)
1522 ARGT_ARG(newargt, 3) = array_desc;
1523 else
1524 ARGT_ARG(newargt, 3) =
1525 mk_isz_cval(dtype_to_arg(DTYPEG(dest_sptr)), astb.bnd.dtype);
1526
1527 /* section flag argument */
1528 if (!section)
1529 ARGT_ARG(newargt, 4) = astb.bnd.zero;
1530 else
1531 ARGT_ARG(newargt, 4) = astb.bnd.one;
1532
1533 if (nontrivial) {
1534 /* add datatype argument */
1535 ARGT_ARG(newargt, 5) =
1536 mk_isz_cval(size_of(DDTG(DTYPEG(dest_sptr))), astb.bnd.dtype);
1537 ARGT_ARG(newargt, 6) =
1538 mk_isz_cval(ty_to_lib[DTYG(DTYPEG(dest_sptr))], astb.bnd.dtype);
1539 }
1540
1541 if (A_TYPEG(ptr_reshape_dest) == A_SUBSCR) {
1542 /* ptr reshape
1543 * generate additional args
1544 */
1545 int shd, nd, asd, i, sub, val[4] = {0, 0, 0, 0}, tmp, ast, flag;
1546 int lbast, ubast, argcnt = 7;
1547
1548 if (!nontrivial) {
1549 ARGT_ARG(newargt, 5) = astb.bnd.zero;
1550 ARGT_ARG(newargt, 6) = astb.bnd.zero;
1551 }
1552 shd = A_SHAPEG(ptr_reshape_dest);
1553 nd = SHD_NDIM(shd);
1554 val[1] = nd;
1555 tmp = getcon(val, DT_INT4);
1556 ARGT_ARG(newargt, argcnt++) = mk_cnst(tmp); /* num dimensions */
1557 asd = A_ASDG(ptr_reshape_dest);
1558 for (i = 0; i < nd; ++i) {
1559 sub = ASD_SUBS(asd, i);
1560 lbast = A_LBDG(sub);
1561 ubast = A_UPBDG(sub);
1562 if (lbast) {
1563 ARGT_ARG(newargt, argcnt++) = lbast; /* lowerbound */
1564 }
1565 if (ubast) {
1566 ARGT_ARG(newargt, argcnt++) = ubast; /* upperbound */
1567 }
1568 }
1569 }
1570 A_ARGCNTP(ast, nargs);
1571 A_ARGSP(ast, newargt);
1572 }
1573
1574 static int
insert_forall_comm(int ast)1575 insert_forall_comm(int ast)
1576 {
1577 /* go through and add the communication & rewrite the AST */
1578 int std;
1579 int l, r, d, o;
1580 int l1, l2, l3;
1581 int a, a1;
1582 int i, nargs, argt, j;
1583 int arref;
1584 int header;
1585 int forall;
1586 int rhs_is_dist;
1587 int sptr;
1588 int asd, ndim;
1589 int subs[7];
1590 int nd, nd1, nd2;
1591 int src;
1592 int cnt;
1593 int commstd, commasn, comm;
1594 int lhs;
1595 int newast;
1596
1597 a = ast;
1598 if (!a)
1599 return a;
1600 std = comminfo.std;
1601 forall = STD_AST(std);
1602 switch (A_TYPEG(ast)) {
1603 /* expressions */
1604 case A_BINOP:
1605 o = A_OPTYPEG(a);
1606 d = A_DTYPEG(a);
1607 l = insert_forall_comm(A_LOPG(a));
1608 r = insert_forall_comm(A_ROPG(a));
1609 return mk_binop(o, l, r, d);
1610 case A_UNOP:
1611 o = A_OPTYPEG(a);
1612 d = A_DTYPEG(a);
1613 l = insert_forall_comm(A_LOPG(a));
1614 return mk_unop(o, l, d);
1615 case A_CONV:
1616 d = A_DTYPEG(a);
1617 l = insert_forall_comm(A_LOPG(a));
1618 return mk_convert(l, d);
1619 case A_PAREN:
1620 d = A_DTYPEG(a);
1621 l = insert_forall_comm(A_LOPG(a));
1622 return mk_paren(l, d);
1623 case A_MEM:
1624 r = A_MEMG(a);
1625 d = A_DTYPEG(r);
1626 l = insert_forall_comm(A_PARENTG(a) /*, forall, std*/);
1627 return mk_member(l, r, d);
1628 case A_SUBSTR:
1629 return a;
1630 case A_INTR:
1631 case A_FUNC:
1632 nargs = A_ARGCNTG(a);
1633 argt = A_ARGSG(a);
1634 for (i = 0; i < nargs; ++i) {
1635 ARGT_ARG(argt, i) = insert_forall_comm(ARGT_ARG(argt, i));
1636 }
1637 /* remove cshift and eoshift, since they become overlap comm */
1638 if (A_OPTYPEG(a) == I_CSHIFT || A_OPTYPEG(a) == I_EOSHIFT) {
1639 src = ARGT_ARG(argt, 0);
1640 nd = A_OPT1G(comminfo.forall);
1641 cnt = FT_NRT(nd) - 2;
1642 commstd = glist(FT_RTL(nd), cnt);
1643 commasn = STD_AST(commstd);
1644 comm = A_SRCG(commasn);
1645 assert(A_TYPEG(comm) == A_HOVLPSHIFT,
1646 "insert_forall_comm: CSHIFT/EOSHIFT must be overlap", a, 2);
1647 nd2 = A_OPT1G(comm);
1648 FT_SHIFT_TYPE(nd2) = A_OPTYPEG(a);
1649 if (A_OPTYPEG(a) == I_EOSHIFT)
1650 FT_SHIFT_BOUNDARY(nd2) = ARGT_ARG(argt, 2);
1651 return src;
1652 }
1653 return a;
1654 case A_CNST:
1655 case A_CMPLXC:
1656 return a;
1657 case A_ID:
1658 return a;
1659 case A_SUBSCR:
1660 if (A_SHAPEG(a))
1661 return a;
1662 sptr = sptr_of_subscript(a);
1663 if (!ALIGNG(sptr)) {
1664 int parent;
1665 parent = A_LOPG(a);
1666 asd = A_ASDG(a);
1667 ndim = ASD_NDIM(asd);
1668 for (i = 0; i < ndim; i++) {
1669 subs[i] = insert_forall_comm(ASD_SUBS(asd, i));
1670 }
1671 parent = insert_forall_comm(parent);
1672 return mk_subscr(parent, subs, ndim, A_DTYPEG(a));
1673 }
1674
1675 if (!A_SHAPEG(a) && is_array_element_in_forall(a, std)) {
1676 nd = A_OPT1G(forall);
1677 header = FT_HEADER(nd);
1678 /* a = emit_get_scalar(a, header); */
1679 rhs_is_dist = FALSE;
1680 a = insert_comm_before(header, a, &rhs_is_dist, FALSE);
1681 return a;
1682 }
1683 /* don't generate communication iff lhs == rhs */
1684 lhs = A_DESTG(A_IFSTMTG(forall));
1685 if (lhs == a)
1686 return a;
1687
1688 arref = A_RFPTRG(a);
1689
1690 switch (ARREF_CLASS(arref)) {
1691 case NO_COMM:
1692 break;
1693 case OVERLAP:
1694 emit_overlap(a);
1695 break;
1696 case COPY_SECTION:
1697 a = emit_copy_section(a, std);
1698 break;
1699 case GATHER:
1700 a = emit_gatherx(a, std, FALSE);
1701 break;
1702 case IRREGULAR:
1703 /* a = emit_irregular(a, std);*/
1704 break;
1705 default:
1706 interr("insert_forall_comm: unknown comm tag", std, 2);
1707 return 0;
1708 }
1709
1710 return a;
1711
1712 default:
1713 interr("insert_forall_comm: unknown expression", std, 2);
1714 return 0;
1715 }
1716 }
1717
1718 static void
init_opt_tables(void)1719 init_opt_tables(void)
1720 {
1721 cs_table.is_used_lhs = FALSE;
1722 }
1723
1724 /* return TRUE if the LHS variable can be used for this RHS communication
1725 * target */
1726 static LOGICAL
is_use_lhs(int a,LOGICAL sameidx,LOGICAL independent,int std)1727 is_use_lhs(int a, LOGICAL sameidx, LOGICAL independent, int std)
1728 {
1729 int lhs;
1730 int sptr, sptr_lhs;
1731 int list;
1732 int src;
1733 int aa, nextaa, alhs, nextalhs;
1734
1735 if (cs_table.is_used_lhs)
1736 return FALSE;
1737 if (A_IFEXPRG(comminfo.forall))
1738 return FALSE;
1739 lhs = comminfo.sub;
1740 list = A_LISTG(comminfo.forall);
1741 src = A_SRCG(A_IFSTMTG(comminfo.forall));
1742 if (DTY(A_DTYPEG(a)) != DTY(A_DTYPEG(lhs)))
1743 return FALSE;
1744 if (sameidx && !is_same_number_of_idx(lhs, a, list))
1745 return FALSE;
1746 if (!independent && expr_dependent(a, lhs, std, std))
1747 return FALSE;
1748
1749 cs_table.is_used_lhs = TRUE;
1750 return TRUE;
1751 } /* is_use_lhs */
1752
1753 /* this is used to decide if section created for forall
1754 * to check whether index is out of bounds .
1755 * This does not occur iff:
1756 * 1-) forall from array-assignment or where statement
1757 * 2-) forall without mask
1758 */
1759 static LOGICAL
is_bogus_forall(int forall)1760 is_bogus_forall(int forall)
1761 {
1762 int mask;
1763
1764 if (A_ARRASNG(forall))
1765 return FALSE;
1766 mask = A_IFEXPRG(forall);
1767 if (!mask)
1768 return FALSE;
1769 return TRUE;
1770 }
1771
1772 static int
emit_copy_section(int a,int std)1773 emit_copy_section(int a, int std)
1774 {
1775 int ast;
1776 int astnew;
1777 int asn;
1778 int tempast;
1779 int tempast0;
1780 int i, j;
1781 int src, dest, lop;
1782 int forall;
1783 int list;
1784 int lhs;
1785 int allocstd;
1786 int startstd;
1787 int commstd;
1788 int sectlstd;
1789 int sectrstd;
1790 int cp, xfer;
1791 int nd;
1792 int sptr;
1793 int allocast;
1794 int order2[7];
1795 int no;
1796 int header;
1797 int lhssec;
1798 int sectflag;
1799 LOGICAL independent;
1800
1801 forall = STD_AST(std);
1802 lhs = comminfo.sub;
1803 list = A_LISTG(forall);
1804 nd = A_OPT1G(forall);
1805 header = FT_HEADER(nd);
1806
1807 sectflag = 0;
1808 if (is_bogus_forall(forall))
1809 sectflag |= BOGUSFLAG;
1810
1811 if (!is_ordered(lhs, a, list, order2, &no)) {
1812 tempast = emit_permute_section(a, std);
1813 return tempast;
1814 }
1815
1816 open_dynpragma(std, STD_LINENO(std));
1817 independent = (flg.x[19] & 0x100) != 0;
1818 close_pragma();
1819 sectlstd = 0;
1820 lhssec = 0;
1821 if (is_use_lhs(a, TRUE, independent, std)) {
1822 sptr = sptr_of_subscript(comminfo.sub);
1823 tempast = lhs;
1824 lhssec = tempast = forall_2_sec(tempast, forall);
1825 sectlstd = make_sec_ast(tempast, std, 0, sectflag);
1826 nd = A_OPT1G(forall);
1827 FT_SECTL(nd) = sectlstd;
1828 }
1829
1830 sptr = temp_copy_section(std, forall, lhs, a,
1831 DTY(DTYPEG(sptr_of_subscript(a)) + 1), &allocast);
1832 tempast0 = tempast = copy_section_temp_before(sptr, a, forall);
1833
1834 allocstd = add_stmt_before(allocast, header);
1835 A_STDP(allocast, allocstd);
1836 nd = A_OPT1G(forall);
1837 plist(FT_RTL(nd), allocstd);
1838 FT_NRT(nd)++;
1839
1840 tempast = forall_2_sec(tempast, forall);
1841 sectlstd = make_sec_ast(tempast, std, allocstd, sectflag);
1842
1843 astnew = forall_2_sec(a, forall);
1844 sectrstd = make_sec_ast(astnew, std, 0, sectflag);
1845
1846 asn = mk_stmt(A_ASN, astb.bnd.dtype);
1847 ast = new_node(A_HCOPYSECT);
1848 A_SRCP(ast, astnew);
1849 A_SDESCP(ast, 0);
1850 A_DESTP(ast, tempast);
1851 A_DDESCP(ast, 0);
1852 nd = mk_ftb();
1853 FT_STD(nd) = std;
1854 FT_FORALL(nd) = forall;
1855 FT_CCOPY_LHS(nd) = lhs;
1856 FT_CCOPY_RHS(nd) = a;
1857 FT_CCOPY_TSPTR(nd) = sptr;
1858 FT_CCOPY_SECTR(nd) = sectrstd;
1859 FT_CCOPY_SECTL(nd) = sectlstd;
1860 FT_CCOPY_ALLOC(nd) = allocstd;
1861 FT_CCOPY_FREE(nd) = header;
1862 FT_CCOPY_REUSE(nd) = 0;
1863 FT_CCOPY_USELHS(nd) = 0;
1864 FT_CCOPY_SAME(nd) = 0;
1865 FT_CCOPY_LHSSEC(nd) = lhssec;
1866 FT_CCOPY_NOTLHS(nd) = (lhssec) ? 0 : 1;
1867 A_OPT1P(ast, nd);
1868 cp = sym_get_cp();
1869 FT_CCOPY_OUT(nd) = cp;
1870 dest = mk_id(cp);
1871 A_DESTP(asn, dest);
1872 A_SRCP(asn, ast);
1873
1874 commstd = add_stmt_before(asn, header);
1875 A_STDP(asn, commstd);
1876 nd = A_OPT1G(forall);
1877 plist(FT_RTL(nd), commstd);
1878 FT_NRT(nd)++;
1879
1880 asn = mk_stmt(A_ASN, astb.bnd.dtype);
1881 ast = new_node(A_HCSTART);
1882 lop = mk_id(cp);
1883 A_LOPP(ast, lop);
1884 A_SRCP(ast, astnew);
1885 A_DESTP(ast, tempast);
1886 nd = mk_ftb();
1887 FT_STD(nd) = std;
1888 FT_FORALL(nd) = forall;
1889 FT_CSTART_COMM(nd) = commstd;
1890 FT_CSTART_RHS(nd) = a;
1891 FT_CSTART_USEDSTD(nd) = comminfo.usedstd;
1892 xfer = sym_get_xfer();
1893 FT_CSTART_OUT(nd) = xfer;
1894 FT_CSTART_SECTR(nd) = sectrstd;
1895 FT_CSTART_SECTL(nd) = sectlstd;
1896 FT_CSTART_ALLOC(nd) = allocstd;
1897 FT_CSTART_FREE(nd) = header;
1898 FT_CSTART_REF(nd) = tempast0;
1899 FT_CSTART_TYPE(nd) = A_HCOPYSECT;
1900 FT_CSTART_REUSE(nd) = 0;
1901 FT_CSTART_INVMVD(nd) = 0;
1902 FT_CSTART_USELHS(nd) = 0;
1903 FT_CSTART_SAME(nd) = 0;
1904 A_OPT1P(ast, nd);
1905 dest = mk_id(xfer);
1906 A_DESTP(asn, dest);
1907 A_SRCP(asn, ast);
1908
1909 startstd = add_stmt_before(asn, header);
1910 A_STDP(asn, startstd);
1911 nd = A_OPT1G(forall);
1912 plist(FT_RTL(nd), startstd);
1913 FT_NRT(nd)++;
1914
1915 return a;
1916 }
1917
1918 /*
1919 * pghpf_permute_section(void *rb, void *sb, section *rs, section *ss, ...)
1920 *
1921 *chdr *
1922 * pghpf_comm_permute(void *rb, void *sb, section *rs, section *ss, ...)
1923 * ... = int x1, .., int xN, where N = section rank
1924 * The axis arguments (x1, .., xN) is a permutation of the integers 1..N.
1925 * The permutation applies to the dimensions on the right hand side (like
1926 * a gather operation).
1927 * For example:
1928 * forall (i=1:2, j=1:4, k=1:5) a(i,3,j,k) = b(k,i,j)
1929 * pghpf_permute_section(a, b, a$s, b$s, 2, 3, 1)
1930 */
1931 static int
emit_permute_section(int a,int std)1932 emit_permute_section(int a, int std)
1933 {
1934 int sptr, sptrast;
1935 int asd;
1936 int ndim;
1937 int ast1;
1938 int subs[7];
1939 int astnew;
1940 int tempast, tempast0;
1941 int argt, nargs;
1942 int i, j;
1943 int src, dest;
1944 int forall;
1945 int list;
1946 int arref;
1947 int lhs;
1948 LOGICAL use_lhs;
1949 int order2[7];
1950 int no;
1951 int func;
1952 int new_a;
1953 int nd, header;
1954 int sectflag;
1955
1956 forall = STD_AST(std);
1957 nd = A_OPT1G(forall);
1958 header = FT_HEADER(nd);
1959 lhs = comminfo.sub;
1960 list = A_LISTG(forall);
1961 asd = A_ASDG(comminfo.sub);
1962 ndim = ASD_NDIM(asd);
1963
1964 sectflag = 0;
1965 if (is_bogus_forall(forall))
1966 sectflag |= BOGUSFLAG;
1967
1968 if (cs_table.is_used_lhs) {
1969 use_lhs = FALSE;
1970 } else {
1971 use_lhs = is_use_lhs_final(a, forall, TRUE, FALSE, std);
1972 }
1973 if (use_lhs) {
1974 sptr = sptr_of_subscript(comminfo.sub);
1975 sptrast = A_LOPG(comminfo.sub);
1976 tempast = lhs;
1977 cs_table.is_used_lhs = TRUE;
1978 } else {
1979 new_a = eliminate_extra_idx(lhs, a, forall);
1980 sptr = get_temp_copy_section(forall, lhs, new_a, header, header, a);
1981 sptrast = mk_id(sptr);
1982 tempast0 = tempast = copy_section_temp_before(sptr, new_a, forall);
1983 }
1984
1985 if (is_ordered(tempast, a, list, order2, &no)) {
1986 assert(0, "emit_permute_section: something is wrong", 3, a);
1987 }
1988
1989 tempast = forall_2_sec(tempast, forall);
1990 dest = make_sec_from_ast(tempast, header, header, 0, sectflag);
1991
1992 astnew = forall_2_sec(a, forall);
1993 src = make_sec_from_ast(astnew, header, header, 0, sectflag);
1994
1995 nargs = 4 + no;
1996 func = mk_id(sym_mkfunc(mkRteRtnNm(RTE_permute_section), DT_NONE));
1997 NODESCP(A_SPTRG(func), 1);
1998
1999 argt = mk_argt(nargs);
2000 ARGT_ARG(argt, 0) = sptrast;
2001 ARGT_ARG(argt, 1) = A_LOPG(a);
2002
2003 ARGT_ARG(argt, 2) = check_member(sptrast, mk_id(dest));
2004 ARGT_ARG(argt, 3) = check_member(A_LOPG(a), mk_id(src));
2005
2006 for (i = 0; i < no; i++)
2007 ARGT_ARG(argt, 4 + i) = mk_isz_cval(order2[i] + 1, astb.bnd.dtype);
2008
2009 ast1 = mk_stmt(A_CALL, 0);
2010 A_LOPP(ast1, func);
2011 A_ARGCNTP(ast1, nargs);
2012 A_ARGSP(ast1, argt);
2013 add_stmt_before(ast1, header);
2014
2015 /* temp will be referenced after communication as follows */
2016 if (use_lhs)
2017 return lhs; /* forall is totally removed no need to access */
2018 else {
2019 process_rhs_sub(tempast0);
2020 return tempast0;
2021 }
2022 }
2023
2024 /* This routine finds out the dimension of sptr.
2025 * It takes subscript a(f(i),5,f(j)). It eliminates scalar dimension.
2026 * It makes an ast for reference sptr.
2027 * a(f(i),5,f(j)) --> sptr(i,j)
2028 */
2029 static int
copy_section_temp_before(int sptr,int rhs,int forall)2030 copy_section_temp_before(int sptr, int rhs, int forall)
2031 {
2032 int subs[7];
2033 int k, j;
2034 int asd;
2035 int ndim;
2036 int astnew;
2037 int astli;
2038 int nidx;
2039 int list;
2040
2041 asd = A_ASDG(rhs);
2042 ndim = ASD_NDIM(asd);
2043 list = A_LISTG(forall);
2044
2045 j = 0;
2046 /* array will be referenced after communication as follows */
2047 for (k = 0; k < ndim; ++k) {
2048 astli = 0;
2049 nidx = 0;
2050 search_forall_idx(ASD_SUBS(asd, k), list, &astli, &nidx);
2051 if (nidx == 1 && astli) {
2052 /* include this dimension */
2053 subs[j] = mk_id(ASTLI_SPTR(astli));
2054 j++;
2055 }
2056 }
2057 assert(j == rank_of_sym(sptr), "copy_section_temp_before: rank mismatched",
2058 sptr, 4);
2059 astnew = mk_subscr(mk_id(sptr), subs, j, DTY(DTYPEG(sptr) + 1));
2060 return astnew;
2061 }
2062
2063 /* It takes forall(i=,j=,k=) a(i,j,k) = b(j,i) , return a(i,j,1) */
2064 static int
eliminate_extra_idx(int lhs,int a,int forall)2065 eliminate_extra_idx(int lhs, int a, int forall)
2066 {
2067 int subs[7];
2068 int k, i;
2069 int asd;
2070 int ndim;
2071 int asd1;
2072 int ndim1;
2073 int astnew;
2074 int astli;
2075 int nidx;
2076 int list;
2077 LOGICAL found;
2078 int sptr;
2079
2080 sptr = sptr_of_subscript(lhs);
2081 asd = A_ASDG(lhs);
2082 ndim = ASD_NDIM(asd);
2083 list = A_LISTG(forall);
2084
2085 asd1 = A_ASDG(a);
2086 ndim1 = ASD_NDIM(asd1);
2087
2088 for (k = 0; k < ndim; ++k) {
2089 subs[k] = ASD_SUBS(asd, k);
2090 astli = 0;
2091 nidx = 0;
2092 search_forall_idx(ASD_SUBS(asd, k), list, &astli, &nidx);
2093 if (nidx == 1 && astli) {
2094 found = FALSE;
2095 for (i = 0; i < ndim1; ++i)
2096 if (is_name_in_expr(ASD_SUBS(asd1, i), ASTLI_SPTR(astli)))
2097 found = TRUE;
2098 if (!found)
2099 subs[k] = astb.i1;
2100 }
2101 }
2102 astnew = mk_subscr(mk_id(sptr), subs, ndim, DTY(DTYPEG(sptr) + 1));
2103 return astnew;
2104 }
2105
2106 /* This routine is to find out how index is permuted at result
2107 * based on array. used by scatterx/gatherx to perform axis ordering.
2108 * It creates axis array for indirection subscripts.
2109 *
2110 * For an indirectly indexed dimension, the axis vector indicates which
2111 * combination of the index variables is used to subscript the index
2112 * vector. The size of the axis vector is equal to the rank of the index
2113 * vector. If the order of the index variables is not permuted, i.e. the
2114 * axis vector is (/1, 2, 3, .. N/), then the corresponding permuted bit
2115 * can be zeroed and the axis argument omitted.
2116
2117 * For a directly indexed dimension, the axis argument indicates which
2118 * index variable is used to subscript that dimension. If the axis
2119 * number matches the dimension number, then the corresponding permuted
2120 * bit can be zeroed and the axis argument omitted.
2121 */
2122 static void
permute_axis(int result,int array,int list,int permute[7])2123 permute_axis(int result, int array, int list, int permute[7])
2124 {
2125
2126 int order2[7];
2127 int no;
2128 int subs[7];
2129 int newresult;
2130 int astli, nidx;
2131 int asd, ndim;
2132 int i, j;
2133 int per[7], per1[7];
2134 int nper1;
2135
2136 for (i = 0; i < 7; i++)
2137 permute[i] = 0;
2138
2139 /* find out for indirection array */
2140 asd = A_ASDG(result);
2141 ndim = ASD_NDIM(asd);
2142 for (i = 0; i < ndim; i++) {
2143 subs[i] = ASD_SUBS(asd, i);
2144 if (is_vector_subscript(subs[i], list)) {
2145 compute_permute(array, subs[i], list, per);
2146 if (is_permuted(subs[i], per, per1, &nper1))
2147 permute[i] = put_data(per1, nper1);
2148 subs[i] = mk_isz_cval(1, astb.bnd.dtype);
2149 }
2150 }
2151
2152 /* find out after eliminating indirections */
2153
2154 newresult = mk_subscr(A_LOPG(result), subs, ndim, A_DTYPEG(result));
2155 compute_permute(array, newresult, list, per);
2156
2157 for (i = 0; i < ndim; i++) {
2158 subs[i] = ASD_SUBS(asd, i);
2159 if (per[i] == 0)
2160 continue;
2161 if (is_vector_subscript(subs[i], list))
2162 continue;
2163 permute[i] = mk_isz_cval(per[i], astb.bnd.dtype);
2164 }
2165 }
2166
2167 static void
init_pertbl(void)2168 init_pertbl(void)
2169 {
2170 pertbl.size = 200;
2171 NEW(pertbl.base, TABLE, pertbl.size);
2172 pertbl.avl = 0;
2173 }
2174
2175 static void
free_pertbl(void)2176 free_pertbl(void)
2177 {
2178 FREE(pertbl.base);
2179 pertbl.base = NULL;
2180 }
2181
2182 static int
get_pertbl(void)2183 get_pertbl(void)
2184 {
2185 int nd;
2186
2187 nd = pertbl.avl++;
2188 NEED(pertbl.avl, pertbl.base, TABLE, pertbl.size, pertbl.size + 100);
2189 if (nd > SPTR_MAX || pertbl.base == NULL)
2190 errfatal(7);
2191 return nd;
2192 }
2193
2194 static int
put_data(int permute[7],int no)2195 put_data(int permute[7], int no)
2196 {
2197 ADSC *ad;
2198 int dtype;
2199 int i, j;
2200 int arr;
2201 LOGICAL found;
2202
2203 assert(no, "put_data: something is wrong", no, 2);
2204
2205 /* find about whether same axis array created before */
2206 for (i = 0; i < pertbl.avl; i++) {
2207 if (pertbl.base[i].f2 == no) {
2208 found = TRUE;
2209 for (j = 0; j < no; j++) {
2210 if (permute[j] != pertbl.base[i].f4[j])
2211 found = FALSE;
2212 }
2213 if (found)
2214 return mk_id(pertbl.base[i].f1);
2215 }
2216 }
2217
2218 arr = sym_get_array("axis", 0, DT_INT, 1);
2219
2220 i = get_pertbl();
2221 pertbl.base[i].f1 = arr;
2222 pertbl.base[i].f2 = no;
2223 for (j = 0; j < no; j++)
2224 pertbl.base[i].f4[j] = permute[j];
2225
2226 ALLOCP(arr, 0);
2227 dtype = DTYPEG(arr);
2228 ad = AD_DPTR(dtype);
2229 AD_LWAST(ad, 0) = AD_LWBD(ad, 0) = 0;
2230 AD_NUMELM(ad) = AD_UPBD(ad, 0) = AD_UPAST(ad, 0) = AD_EXTNTAST(ad, 0) =
2231 mk_isz_cval(no, astb.bnd.dtype);
2232 AD_DEFER(ad) = 0;
2233 AD_NOBOUNDS(ad) = 0;
2234
2235 dinit_put(DINIT_LOC, (INT)arr);
2236
2237 dtype = DDTG(DTYPEG(arr));
2238
2239 for (i = 0; i < no; i++) {
2240 if (DTY(DT_INT) == TY_INT8) {
2241 INT val[2];
2242 val[0] = 0;
2243 val[1] = permute[i];
2244 dinit_put(dtype, getcon(val, DT_INT8));
2245 } else
2246 dinit_put(dtype, permute[i]);
2247 }
2248 dinit_put(DINIT_END, 0);
2249 DINITP(arr, 1);
2250 sym_is_refd(arr);
2251
2252 return mk_id(arr);
2253 }
2254
2255 /*This routine calculates permute of rhs based on lhs
2256 * for example, lhs(i,2, j,k) rhs(3,k,i,j) then
2257 * permute will be /0,3,1,2/
2258 */
2259 static void
compute_permute(int lhs,int rhs,int list,int order[7])2260 compute_permute(int lhs, int rhs, int list, int order[7])
2261 {
2262 int asd, ndim;
2263 int i, j;
2264 int count, count1;
2265 int order1[7];
2266 LOGICAL found;
2267 int astli, nidx;
2268 int iloc;
2269
2270 for (j = 0; j < 7; j++)
2271 order[j] = 0;
2272
2273 assert(!is_duplicate(lhs, list), "compute_permute:something is wrong", lhs,
2274 3);
2275
2276 /* rhs */
2277 asd = A_ASDG(rhs);
2278 ndim = ASD_NDIM(asd);
2279 count = 0;
2280 for (j = 0; j < ndim; ++j) {
2281 order[j] = 0;
2282 astli = 0;
2283 nidx = 0;
2284 search_forall_idx(ASD_SUBS(asd, j), list, &astli, &nidx);
2285 if (nidx == 1 && astli) {
2286 order[j] = ASTLI_SPTR(astli);
2287 count++;
2288 }
2289 }
2290
2291 /* lhs */
2292 asd = A_ASDG(lhs);
2293 ndim = ASD_NDIM(asd);
2294 count1 = 0;
2295 for (j = 0; j < ndim; ++j) {
2296 astli = 0;
2297 nidx = 0;
2298 search_forall_idx(ASD_SUBS(asd, j), list, &astli, &nidx);
2299 if (nidx == 1 && astli) {
2300 order1[count1] = ASTLI_SPTR(astli);
2301 count1++;
2302 }
2303 }
2304
2305 asd = A_ASDG(rhs);
2306 ndim = ASD_NDIM(asd);
2307 for (j = 0; j < ndim; j++) {
2308 if (order[j] == 0)
2309 continue;
2310 found = FALSE;
2311 for (i = 0; i < count1; i++) {
2312 if (order1[i] == order[j]) {
2313 found = TRUE;
2314 iloc = i + 1;
2315 }
2316 }
2317 assert(found, "compute_permute:something is wrong", lhs, 3);
2318 order[j] = iloc;
2319 }
2320 }
2321
2322 static LOGICAL
is_permuted(int array,int per[7],int per1[7],int * nper1)2323 is_permuted(int array, int per[7], int per1[7], int *nper1)
2324 {
2325 int asd;
2326 int ndim;
2327 int count;
2328 int i;
2329 LOGICAL permuted;
2330
2331 assert(A_TYPEG(array) == A_SUBSCR, "is_permuted: something is wrong", array,
2332 2);
2333
2334 asd = A_ASDG(array);
2335 ndim = ASD_NDIM(asd);
2336 count = 0;
2337 for (i = 0; i < ndim; i++) {
2338 if (per[i]) {
2339 per1[count] = per[i];
2340 count++;
2341 }
2342 }
2343
2344 permuted = FALSE;
2345 for (i = 0; i < count; i++) {
2346 if (per1[i] != (i + 1))
2347 permuted = TRUE;
2348 }
2349
2350 *nper1 = count;
2351 return permuted;
2352 }
2353
2354 static void
emit_sum_scatterx(int std)2355 emit_sum_scatterx(int std)
2356 {
2357 int sptr;
2358 int asd1;
2359 int ndim1;
2360 int ast1;
2361 int subs[7];
2362 int astnew;
2363 int tempast, tempast0;
2364 int argt, nargs;
2365 int i, j;
2366 int forall;
2367 int list;
2368 int vflag, pflag;
2369 int vdim, pdim;
2370 int nvec;
2371 int secv;
2372 ADSC *ad;
2373 int glb, gub;
2374 int asn;
2375 int mask;
2376 int result_sec, base_sec, array_sec, mask_sec;
2377 int result, newresult;
2378 int base;
2379 int array;
2380 int func;
2381 int permute[7];
2382 int npermute;
2383 int ndim, asd;
2384 int nv;
2385 int newbase;
2386 char name[40];
2387 int function, operator;
2388 int sectflag;
2389
2390 forall = STD_AST(std);
2391 asn = A_IFSTMTG(forall);
2392
2393 sectflag = 0;
2394
2395 mask = comminfo.scat.mask;
2396 result = comminfo.scat.result;
2397 base = comminfo.scat.base;
2398 array = comminfo.scat.array;
2399 operator= comminfo.scat.operator;
2400 function = comminfo.scat.function;
2401 if (!base)
2402 return;
2403
2404 if (!comminfo.scat.array_simple) {
2405 int sptrtemp, newforall, newlist, asn, newstd, newarray;
2406 struct comminfo savecomminfo;
2407 sptrtemp = get_temp_forall(forall, base, std, std, 0, array);
2408 newarray = simple_reference_for_temp(sptrtemp, base, forall);
2409 /* assign temp from nonsimple array */
2410 newforall = mk_stmt(A_FORALL, 0);
2411 A_LISTP(newforall, A_LISTG(forall));
2412 A_SRCP(newforall, A_SRCG(forall));
2413 asn = mk_stmt(A_ASN, 0);
2414 A_DESTP(asn, newarray);
2415 A_SRCP(asn, array);
2416 A_IFSTMTP(newforall, asn);
2417 newstd = add_stmt_before(newforall, std);
2418 array = newarray;
2419 savecomminfo = comminfo;
2420 process_forall(newstd);
2421 transform_forall(newstd, newforall);
2422 comminfo = savecomminfo;
2423 }
2424
2425 sptr = sptr_of_subscript(result);
2426 list = A_LISTG(forall);
2427 asd1 = A_ASDG(result);
2428 ndim1 = ASD_NDIM(asd1);
2429
2430 vflag = 0;
2431 vdim = 0;
2432 nvec = 0;
2433 j = 0;
2434 for (i = 0; i < ndim1; i++) {
2435 subs[i] = ASD_SUBS(asd1, i);
2436 if (is_scalar(ASD_SUBS(asd1, i), list))
2437 continue;
2438 if (is_vector_subscript(ASD_SUBS(asd1, i), list)) {
2439 ad = AD_DPTR(DTYPEG(sptr));
2440 glb = AD_LWAST(ad, i);
2441 gub = AD_UPAST(ad, i);
2442 subs[i] = mk_isz_cval(1, astb.bnd.dtype);
2443 vflag |= 1 << j;
2444 vdim |= 1 << i;
2445 nvec++;
2446 }
2447 j++;
2448 }
2449
2450 permute_axis(result, array, list, permute);
2451
2452 npermute = 0;
2453 pflag = 0;
2454 pdim = 0;
2455 j = 0;
2456 for (i = 0; i < ndim1; i++) {
2457 if (is_scalar(ASD_SUBS(asd1, i), list))
2458 continue;
2459 if (permute[i]) {
2460 pflag |= 1 << j;
2461 pdim |= 1 << i;
2462 npermute++;
2463 }
2464 j++;
2465 }
2466
2467 if (nvec == ndim1)
2468 result_sec = DESCRG(sptr);
2469 else {
2470 newresult = mk_subscr(A_LOPG(result), subs, ndim1, A_DTYPEG(result));
2471 astnew = forall_2_sec(newresult, forall);
2472 /* change astnew for vector dimension */
2473 ad = AD_DPTR(DTYPEG(sptr_of_subscript(astnew)));
2474 asd1 = A_ASDG(astnew);
2475 ndim1 = ASD_NDIM(asd1);
2476 for (i = 0; i < ndim1; i++) {
2477 subs[i] = ASD_SUBS(asd1, i);
2478 if (getbit(vdim, i)) {
2479 glb = AD_LWAST(ad, i);
2480 gub = AD_UPAST(ad, i);
2481 subs[i] = mk_triple(glb, gub, 0);
2482 }
2483 }
2484 astnew = mk_subscr(A_LOPG(astnew), subs, ndim1, A_DTYPEG(astnew));
2485 result_sec = make_sec_from_ast(astnew, std, std, 0, sectflag | NOTSECTFLAG);
2486 }
2487
2488 base_sec = result_sec;
2489
2490 tempast = forall_2_sec(array, forall);
2491 array_sec = make_sec_from_ast(tempast, std, std, 0, sectflag);
2492
2493 if (mask) {
2494 mask = forall_2_sec(mask, forall);
2495 mask_sec = make_sec_from_ast(mask, std, std, 0, sectflag);
2496 mask = A_LOPG(mask);
2497 mask_sec = mk_id(mask_sec);
2498 } else {
2499 mask = mk_cval(1, DT_LOG);
2500 mask_sec = mk_cval(dtype_to_arg(A_DTYPEG(mask)), DT_INT);
2501 }
2502
2503 nargs = 2 * 4 + 1 + 1 + 2 * nvec + npermute;
2504 argt = mk_argt(nargs);
2505
2506 ARGT_ARG(argt, 0) = A_LOPG(result);
2507 DESCUSEDP(sptr, 1);
2508 ARGT_ARG(argt, 1) = A_LOPG(array);
2509 ARGT_ARG(argt, 2) = A_LOPG(base);
2510 ARGT_ARG(argt, 3) = mask;
2511
2512 /* sections */
2513 ARGT_ARG(argt, 4) = check_member(result, mk_id(result_sec));
2514 ARGT_ARG(argt, 5) = check_member(array, mk_id(array_sec));
2515 ARGT_ARG(argt, 6) = check_member(base, mk_id(base_sec));
2516 ARGT_ARG(argt, 7) = mask_sec;
2517
2518 ARGT_ARG(argt, 8) = mk_cval(vflag, DT_INT);
2519 ARGT_ARG(argt, 9) = mk_cval(pflag, DT_INT);
2520 j = 10;
2521 asd1 = A_ASDG(result);
2522 ndim1 = ASD_NDIM(asd1);
2523 for (i = 0; i < ndim1; i++) {
2524 if (!is_scalar(ASD_SUBS(asd1, i), list) &&
2525 is_vector_subscript(ASD_SUBS(asd1, i), list)) {
2526 astnew = forall_2_sec(ASD_SUBS(asd1, i), forall);
2527 secv = make_sec_from_ast(astnew, std, std, 0, sectflag);
2528 ARGT_ARG(argt, j) = A_LOPG(ASD_SUBS(asd1, i));
2529 j++;
2530 ARGT_ARG(argt, j) = mk_id(secv);
2531 j++;
2532 }
2533 if (permute[i]) {
2534 ARGT_ARG(argt, j) = permute[i];
2535 j++;
2536 }
2537 }
2538 ast1 = mk_stmt(A_CALL, 0);
2539
2540 func = 0;
2541 strcpy(name, "");
2542 if (operator) {
2543 switch (operator) {
2544 case OP_ADD:
2545 strcpy(name, mkRteRtnNm(RTE_sum_scatterx));
2546 break;
2547 case OP_MUL:
2548 strcpy(name, mkRteRtnNm(RTE_product_scatterx));
2549 break;
2550 case OP_LOR:
2551 strcpy(name, mkRteRtnNm(RTE_any_scatterx));
2552 break;
2553 case OP_LAND:
2554 strcpy(name, mkRteRtnNm(RTE_all_scatterx));
2555 break;
2556 case OP_LNEQV:
2557 strcpy(name, mkRteRtnNm(RTE_parity_scatterx));
2558 break;
2559 }
2560 }
2561 if (function) {
2562 switch (function) {
2563 case I_MAX:
2564 strcpy(name, mkRteRtnNm(RTE_maxval_scatterx));
2565 break;
2566 case I_MIN:
2567 strcpy(name, mkRteRtnNm(RTE_minval_scatterx));
2568 break;
2569 case I_IAND:
2570 strcpy(name, mkRteRtnNm(RTE_iall_scatterx));
2571 break;
2572 case I_IOR:
2573 strcpy(name, mkRteRtnNm(RTE_iany_scatterx));
2574 break;
2575 case I_IEOR:
2576 strcpy(name, mkRteRtnNm(RTE_iparity_scatterx));
2577 break;
2578 }
2579 }
2580
2581 assert(strcmp(name, ""), "emit_sum_scatterx: something is wrong", std, 2);
2582 func = mk_id(sym_mkfunc(name, DT_NONE));
2583 A_LOPP(ast1, func);
2584 A_ARGCNTP(ast1, nargs);
2585 A_ARGSP(ast1, argt);
2586 add_stmt_before(ast1, std);
2587 NODESCP(memsym_of_ast(A_LOPG(ast1)), 1);
2588 STD_DELETE(std) = 1;
2589 }
2590
2591 static void
emit_scatterx(int std)2592 emit_scatterx(int std)
2593 {
2594 int mask;
2595 int result;
2596 int array;
2597 int base;
2598
2599 mask = comminfo.scat.mask;
2600 result = comminfo.scat.result;
2601 array = comminfo.scat.array;
2602 base = comminfo.scat.base;
2603
2604 if (base)
2605 return;
2606
2607 emit_scatterx_gatherx(std, result, array, mask, 0, 0, 0, A_HSCATTER);
2608
2609 STD_DELETE(std) = 1;
2610 }
2611
2612 static void
emit_scatterx_gatherx(int std,int result,int array,int mask,int allocstd,int tempast0,int lhssec,int comm_type)2613 emit_scatterx_gatherx(int std, int result, int array, int mask, int allocstd,
2614 int tempast0, int lhssec, int comm_type)
2615 {
2616 int sptr, dest, lop;
2617 int asd1;
2618 int ndim1;
2619 int ast1;
2620 int subs[7];
2621 int astnew;
2622 int tempast;
2623 int argt, nargs;
2624 int i, j;
2625 int forall;
2626 int list;
2627 int vflag, pflag;
2628 int pdim, vdim;
2629 int nvec;
2630 int secv;
2631 ADSC *ad;
2632 int glb, gub;
2633 int asn;
2634 int result_sec, base_sec, array_sec, mask_sec;
2635 int newresult;
2636 int func;
2637 int permute[7];
2638 int npermute;
2639 int ndim, asd;
2640 int newbase;
2641 int nd;
2642 int header;
2643 int vsub, nvsub, newvsub;
2644 int vsub_sec, nvsub_sec;
2645 int commstd;
2646 int cp, xfer;
2647 int startstd;
2648 int ast;
2649 int v, sectvstd;
2650 int sectvsub, sectnvsub;
2651 int vsubstd, nvsubstd, maskstd;
2652 int lhs;
2653 int mask_id;
2654 int sectflag;
2655 INDEX_REUSE *irp;
2656 NEWVAR *nv;
2657 LOGICAL index_reuse;
2658 int index_reuse_condvar;
2659 int ifstd;
2660
2661 forall = STD_AST(std);
2662 asn = A_IFSTMTG(forall);
2663 lhs = A_DESTG(asn);
2664 nd = A_OPT1G(forall);
2665 header = FT_HEADER(nd);
2666
2667 sectflag = 0;
2668
2669 if (comm_type == A_HGATHER) {
2670 vsub = array;
2671 nvsub = result;
2672 func = mk_id(sym_mkfunc(mkRteRtnNm(RTE_comm_gatherx), DT_ADDR));
2673 } else if (comm_type == A_HSCATTER) {
2674 vsub = result;
2675 nvsub = array;
2676 func = mk_id(sym_mkfunc(mkRteRtnNm(RTE_comm_scatterx), DT_ADDR));
2677 }
2678
2679 sptr = memsym_of_ast(vsub);
2680 list = A_LISTG(forall);
2681 asd1 = A_ASDG(vsub);
2682 ndim1 = ASD_NDIM(asd1);
2683
2684 index_reuse = FALSE;
2685 open_dynpragma(std, STD_LINENO(std));
2686 for (irp = direct.index_reuse_list; irp; irp = irp->next) {
2687 for (nv = irp->reuse_list; nv; nv = nv->next) {
2688 if (sptr == nv->var) {
2689 index_reuse = TRUE;
2690 index_reuse_condvar = irp->condvar;
2691 goto found_index_reuse;
2692 }
2693 }
2694 }
2695 found_index_reuse:
2696 close_pragma();
2697
2698 vflag = 0;
2699 vdim = 0;
2700 nvec = 0;
2701 j = 0;
2702 for (i = 0; i < ndim1; i++) {
2703 subs[i] = ASD_SUBS(asd1, i);
2704 if (is_scalar(ASD_SUBS(asd1, i), list))
2705 continue;
2706 if (is_vector_subscript(ASD_SUBS(asd1, i), list)) {
2707 ad = AD_DPTR(DTYPEG(sptr));
2708 glb = AD_LWAST(ad, i);
2709 gub = AD_UPAST(ad, i);
2710 subs[i] = mk_isz_cval(1, astb.bnd.dtype);
2711 vflag |= 1 << j;
2712 vdim |= 1 << i;
2713 nvec++;
2714 }
2715 j++;
2716 }
2717
2718 permute_axis(vsub, nvsub, list, permute);
2719
2720 npermute = 0;
2721 pflag = 0;
2722 pdim = 0;
2723 j = 0;
2724 for (i = 0; i < ndim1; i++) {
2725 if (is_scalar(ASD_SUBS(asd1, i), list))
2726 continue;
2727 if (permute[i]) {
2728 pflag |= 1 << j;
2729 pdim |= 1 << i;
2730 npermute++;
2731 }
2732 j++;
2733 }
2734
2735 newvsub = mk_subscr(A_LOPG(vsub), subs, ndim1, A_DTYPEG(vsub));
2736 astnew = forall_2_sec(newvsub, forall);
2737 /* change astnew for vector dimension */
2738 ad = AD_DPTR(DTYPEG(memsym_of_ast(astnew)));
2739 asd1 = A_ASDG(astnew);
2740 ndim1 = ASD_NDIM(asd1);
2741 for (i = 0; i < ndim1; i++) {
2742 subs[i] = ASD_SUBS(asd1, i);
2743 if (getbit(vdim, i)) {
2744 glb = AD_LWAST(ad, i);
2745 gub = AD_UPAST(ad, i);
2746 subs[i] = mk_triple(glb, gub, 0);
2747 }
2748 }
2749 newvsub = mk_subscr(A_LOPG(astnew), subs, ndim1, DTYPEG(sptr));
2750 vsubstd = make_sec_ast(newvsub, std, 0, sectflag | NOREINDEX);
2751
2752 nvsub = forall_2_sec(nvsub, forall);
2753 nvsubstd = make_sec_ast(nvsub, std, allocstd, sectflag);
2754
2755 if (mask && !comminfo.mask_phase) {
2756 mask = forall_2_sec(mask, forall);
2757 maskstd = make_sec_ast(mask, std, 0, sectflag);
2758 mask_id = mk_id(memsym_of_ast(mask));
2759 } else {
2760 mask = 0;
2761 mask_id = 0;
2762 maskstd = 0;
2763 }
2764
2765 asn = mk_stmt(A_ASN, astb.bnd.dtype);
2766 ast = new_node(A_HGATHER);
2767 A_SRCP(ast, A_LOPG(result));
2768 A_SDESCP(ast, 0);
2769 A_DESTP(ast, A_LOPG(array));
2770 A_DDESCP(ast, 0);
2771 A_MASKP(ast, mask_id);
2772 A_MDESCP(ast, 0);
2773 A_BVECTP(ast, 0);
2774 nd = mk_ftb();
2775 FT_STD(nd) = std;
2776 FT_FORALL(nd) = forall;
2777 FT_CGATHER_VSUB(nd) = newvsub;
2778 FT_CGATHER_NVSUB(nd) = nvsub;
2779 FT_CGATHER_MASK(nd) = mask;
2780 FT_CGATHER_SECTVSUB(nd) = vsubstd;
2781 FT_CGATHER_SECTNVSUB(nd) = nvsubstd;
2782 FT_CGATHER_SECTM(nd) = maskstd;
2783 FT_CGATHER_ALLOC(nd) = allocstd;
2784 FT_CGATHER_FREE(nd) = header;
2785 FT_CGATHER_REUSE(nd) = 0;
2786 FT_CGATHER_INDEXREUSE(nd) = index_reuse;
2787 FT_CGATHER_USELHS(nd) = 0;
2788 FT_CGATHER_LHS(nd) = lhs;
2789 FT_CGATHER_RHS(nd) = array;
2790 FT_CGATHER_SAME(nd) = 0;
2791 FT_CGATHER_VFLAG(nd) = vflag;
2792 FT_CGATHER_PFLAG(nd) = pflag;
2793 FT_CGATHER_VDIM(nd) = vdim;
2794 FT_CGATHER_PDIM(nd) = pdim;
2795 FT_CGATHER_NVEC(nd) = nvec;
2796 FT_CGATHER_NPER(nd) = npermute;
2797 FT_CGATHER_TYPE(nd) = comm_type;
2798 FT_CGATHER_LHSSEC(nd) = lhssec;
2799 FT_CGATHER_NOTLHS(nd) = (lhssec) ? 0 : 1;
2800 j = 8;
2801 asd1 = A_ASDG(vsub);
2802 ndim1 = ASD_NDIM(asd1);
2803 for (i = 0; i < ndim1; i++) {
2804 FT_CGATHER_SECTV(nd, i) = 0;
2805 FT_CGATHER_V(nd, i) = 0;
2806 FT_CGATHER_PERMUTE(nd, i) = 0;
2807 if (!is_scalar(ASD_SUBS(asd1, i), list) &&
2808 is_vector_subscript(ASD_SUBS(asd1, i), list)) {
2809 v = forall_2_sec(ASD_SUBS(asd1, i), forall);
2810 sectvstd = make_sec_ast(v, std, 0, sectflag);
2811 v = ASD_SUBS(asd1, i);
2812 FT_CGATHER_SECTV(nd, i) = sectvstd;
2813 assert(A_TYPEG(v) == A_SUBSCR,
2814 "emit_scatterx_gatherx: non-subscript in gather", A_TYPEG(v), 4);
2815 FT_CGATHER_V(nd, i) = A_LOPG(v);
2816 }
2817 if (permute[i]) {
2818 FT_CGATHER_PERMUTE(nd, i) = permute[i];
2819 }
2820 }
2821
2822 A_OPT1P(ast, nd);
2823 cp = sym_get_cp();
2824 FT_CGATHER_OUT(nd) = cp;
2825 dest = mk_id(cp);
2826 A_DESTP(asn, dest);
2827 A_SRCP(asn, ast);
2828
2829 if (index_reuse) {
2830 /*
2831 * 'vsub appeared in a JAHPF INDEX_REUSE directive:
2832 * !hpfj index_reuse [(<condition>)] vsub...
2833 *
2834 * Enclose the 'pghpf_comm_gatherx/scatterx' call in a
2835 * conditional as follows:
2836 * (i) if no <condition> is specified:
2837 *
2838 * if (cp == 0) then
2839 * cp = pghpf_comm_gatherx/scatterx(...)
2840 * endif
2841 *
2842 * (ii) if <condition> is specified:
2843 *
2844 * if (cp == 0 .or. .not. <condition>) then
2845 * if (cp /= 0) then
2846 * call pghpf_comm_free(1,cp)
2847 * endif
2848 * cp = pghpf_comm_gatherx/scatterx(...)
2849 * endif
2850 */
2851 SAVEP(cp, 1);
2852 ast = mk_stmt(A_IFTHEN, 0);
2853 ast1 = mk_binop(OP_EQ, mk_id(cp), mk_convert(astb.i0, DT_ADDR), DT_LOG);
2854 if (index_reuse_condvar) {
2855 ast1 = mk_binop(OP_LOR, ast1,
2856 mk_unop(OP_LNOT, mk_id(index_reuse_condvar), DT_LOG),
2857 DT_LOG);
2858 }
2859 A_IFEXPRP(ast, ast1);
2860 ifstd = add_stmt_before(ast, header);
2861 A_STDP(ast, ifstd);
2862
2863 if (index_reuse_condvar) {
2864 int predicate = mk_binop(OP_NE, mk_id(cp), mk_convert(astb.i0, DT_ADDR),
2865 DT_LOG);
2866 int func = mk_id(sym_mkfunc(mkRteRtnNm(RTE_comm_free), DT_NONE));
2867 ast = mk_stmt(A_IFTHEN, 0);
2868 A_IFEXPRP(ast, predicate);
2869 ifstd = add_stmt_before(ast, header);
2870 A_STDP(ast, ifstd);
2871
2872 argt = mk_argt(2);
2873 ARGT_ARG(argt, 0) = astb.i1;
2874 ARGT_ARG(argt, 1) = mk_id(cp);
2875 ast = mk_stmt(A_CALL, 0);
2876 A_LOPP(ast, func);
2877 NODESCP(A_SPTRG(A_LOPG(ast)), 1);
2878 A_ARGCNTP(ast, 2);
2879 A_ARGSP(ast, argt);
2880 ifstd = add_stmt_before(ast, header);
2881 A_STDP(ast, ifstd);
2882
2883 ast = mk_stmt(A_ENDIF, 0);
2884 ifstd = add_stmt_before(ast, header);
2885 A_STDP(ast, ifstd);
2886 }
2887 }
2888
2889 commstd = add_stmt_before(asn, header);
2890 A_STDP(asn, commstd);
2891 nd = A_OPT1G(forall);
2892 plist(FT_RTL(nd), commstd);
2893 FT_NRT(nd)++;
2894
2895 if (index_reuse) {
2896 ast = mk_stmt(A_ENDIF, 0);
2897 ifstd = add_stmt_before(ast, header);
2898 A_STDP(ast, ifstd);
2899 }
2900
2901 asn = mk_stmt(A_ASN, astb.bnd.dtype);
2902 ast = new_node(A_HCSTART);
2903 lop = mk_id(cp);
2904 A_LOPP(ast, lop);
2905 A_SRCP(ast, array);
2906 A_DESTP(ast, result);
2907 nd = mk_ftb();
2908 FT_STD(nd) = std;
2909 FT_FORALL(nd) = forall;
2910 FT_CSTART_COMM(nd) = commstd;
2911 FT_CSTART_RHS(nd) = array;
2912 FT_CSTART_USEDSTD(nd) = comminfo.usedstd;
2913 xfer = sym_get_xfer();
2914 FT_CSTART_OUT(nd) = xfer;
2915 FT_CSTART_SECTL(nd) = vsubstd;
2916 FT_CSTART_SECTR(nd) = nvsubstd;
2917 FT_CSTART_ALLOC(nd) = allocstd;
2918 FT_CSTART_FREE(nd) = header;
2919 FT_CSTART_REF(nd) = tempast0;
2920 FT_CSTART_TYPE(nd) = comm_type;
2921 FT_CSTART_REUSE(nd) = 0;
2922 FT_CSTART_INVMVD(nd) = 0;
2923 FT_CSTART_USELHS(nd) = 0;
2924 FT_CSTART_SAME(nd) = 0;
2925 A_OPT1P(ast, nd);
2926 dest = mk_id(xfer);
2927 A_DESTP(asn, dest);
2928 A_SRCP(asn, ast);
2929
2930 startstd = add_stmt_before(asn, header);
2931 A_STDP(asn, startstd);
2932 nd = A_OPT1G(forall);
2933 plist(FT_RTL(nd), startstd);
2934 FT_NRT(nd)++;
2935 }
2936
2937 static int
emit_gatherx(int a,int std,LOGICAL opt)2938 emit_gatherx(int a, int std, LOGICAL opt)
2939 {
2940 int sptr;
2941 int asd1;
2942 int ndim1;
2943 int ast1;
2944 int astnew;
2945 int tempast, tempast0;
2946 int src, dest;
2947 int forall;
2948 int list;
2949 int lhs;
2950 LOGICAL use_lhs;
2951 int mask;
2952 int nd, header;
2953 int allocast, allocstd;
2954 int sectlstd;
2955 int lhssec;
2956 int sectflag;
2957 LOGICAL independent;
2958
2959 forall = STD_AST(std);
2960 nd = A_OPT1G(forall);
2961 header = FT_HEADER(nd);
2962 lhs = comminfo.sub;
2963 mask = A_IFEXPRG(forall);
2964 list = A_LISTG(forall);
2965 asd1 = A_ASDG(a);
2966 ndim1 = ASD_NDIM(asd1);
2967
2968 sectflag = 0;
2969
2970 open_dynpragma(std, STD_LINENO(std));
2971 independent = (flg.x[19] & 0x100) != 0;
2972 close_pragma();
2973
2974 sectlstd = 0;
2975 lhssec = 0;
2976 if (is_use_lhs(a, FALSE, independent, std)) {
2977 sptr = memsym_of_ast(comminfo.sub);
2978 tempast = lhs;
2979 lhssec = tempast = forall_2_sec(tempast, forall);
2980 sectlstd = make_sec_ast(tempast, std, 0, sectflag | NOREINDEX);
2981 nd = A_OPT1G(forall);
2982 FT_SECTL(nd) = sectlstd;
2983 }
2984
2985 sptr = temp_gatherx(std, forall, lhs, lhs, DTY(DTYPEG(memsym_of_ast(a)) + 1),
2986 &allocast);
2987 tempast0 = tempast = gatherx_temp_before(sptr, lhs, forall);
2988
2989 allocstd = add_stmt_before(allocast, header);
2990 A_STDP(allocast, allocstd);
2991 nd = A_OPT1G(forall);
2992 plist(FT_RTL(nd), allocstd);
2993 FT_NRT(nd)++;
2994
2995 emit_scatterx_gatherx(std, tempast, a, mask, allocstd, tempast0, lhssec,
2996 A_HGATHER);
2997 return a;
2998 }
2999
3000 /* Algorithm:
3001 * This will choice the bigest overlap shift at each dimension
3002 * among the same array in the set.
3003 * Store overlap_shift value in array symbol table.
3004 * mark the all OVERLAP as NO_COMM but the first one.
3005 */
3006 static void
opt_overlap(void)3007 opt_overlap(void)
3008 {
3009 int i;
3010 int arr, arr1;
3011 int subinfo1, ndim;
3012 int subinfo;
3013 int align;
3014 int nargs, ast1, argt;
3015 int first;
3016 int nd, nd1;
3017 int sptr, sptr1;
3018
3019 /* Now compute the total overlap-shift for each separate array symbol */
3020 for (arr = trans.rhsbase; arr != 0; arr = ARREF_NEXT(arr)) {
3021 if (ARREF_CLASS(arr) != OVERLAP)
3022 continue;
3023 align = ALIGNG(ARREF_ARRSYM(arr));
3024 for (arr1 = arr; arr1 != 0; arr1 = ARREF_NEXT(arr1)) {
3025 sptr = ARREF_ARRSYM(arr);
3026 sptr1 = ARREF_ARRSYM(arr1);
3027 if (ARREF_ARRSYM(arr1) != ARREF_ARRSYM(arr))
3028 continue;
3029 /* find out shift values and store union of them into subinfo */
3030 subinfo = ARREF_SUB(arr);
3031 subinfo1 = ARREF_SUB(arr1);
3032 ndim = ARREF_NDIM(arr1);
3033 for (i = 0; i < ndim; ++i) {
3034 int v;
3035 if (SUBI_COMMT(subinfo1 + i) != COMMT_SHIFTC)
3036 continue;
3037 if ((v = SUBI_COMMV(subinfo1 + i)) < 0) {
3038 v = -v;
3039 if (v > SUBI_NOP(subinfo + i)) {
3040 SUBI_NOP(subinfo + i) = v;
3041 SUBI_NOP(subinfo1 + i) = v;
3042 }
3043
3044 } else {
3045 if (v > SUBI_POP(subinfo + i)) {
3046 SUBI_POP(subinfo + i) = v;
3047 SUBI_POP(subinfo1 + i) = v;
3048 }
3049 }
3050 }
3051
3052 if (flg.ipa) {
3053 /* allow common block overlap increase */
3054 if ((ARGG(sptr1) && SCG(sptr) != SC_CMBLK) || SCG(sptr1) == SC_DUMMY)
3055 continue;
3056 } else {
3057 if (ARGG(sptr1) || SCG(sptr1) == SC_DUMMY || SCG(sptr) == SC_CMBLK)
3058 continue;
3059 }
3060
3061 ARREF_FLAG(arr1) = 2;
3062 subinfo = ARREF_SUB(arr);
3063 subinfo1 = ARREF_SUB(arr1);
3064 ndim = ARREF_NDIM(arr1);
3065 }
3066 }
3067 }
3068
3069 static void
emit_overlap(int a)3070 emit_overlap(int a)
3071 {
3072 int align, sdesc, dest, lop;
3073 int arr;
3074 int asd, ndim;
3075 int astnew;
3076 int asn;
3077 int i;
3078 int startstd;
3079 int commstd;
3080 int cp, xfer;
3081 int nd;
3082 int sptr;
3083 int subs[7];
3084 int forall;
3085 int std;
3086 int ns, ps;
3087 int ast;
3088 int header;
3089 int subinfo;
3090 int arref;
3091
3092 std = comminfo.std;
3093 forall = STD_AST(std);
3094 nd = A_OPT1G(forall);
3095 header = FT_HEADER(nd);
3096 /* put out the shift call for this symbol */
3097 arr = A_LOPG(a);
3098 sptr = memsym_of_ast(arr);
3099 align = ALIGNG(sptr);
3100 asd = A_ASDG(a);
3101 ndim = ASD_NDIM(asd);
3102 arref = A_RFPTRG(a);
3103 subinfo = ARREF_SUB(arref);
3104
3105 DESCUSEDP(sptr, 1);
3106 for (i = 0; i < ndim; ++i) {
3107 ns = mk_isz_cval(SUBI_NOP(subinfo + i), astb.bnd.dtype);
3108 ps = mk_isz_cval(SUBI_POP(subinfo + i), astb.bnd.dtype);
3109 subs[i] = mk_triple(ps, ns, 0);
3110 }
3111 astnew = mk_subscr(arr, subs, ndim, DTYPEG(sptr));
3112
3113 asn = mk_stmt(A_ASN, astb.bnd.dtype);
3114 ast = new_node(A_HOVLPSHIFT);
3115 A_SRCP(ast, astnew);
3116 sdesc = check_member(arr, mk_id(DESCRG(sptr)));
3117 A_SDESCP(ast, sdesc);
3118 nd = mk_ftb();
3119 FT_STD(nd) = std;
3120 FT_FORALL(nd) = forall;
3121 FT_SHIFT_RHS(nd) = a;
3122 FT_SHIFT_FREE(nd) = header;
3123 FT_SHIFT_REUSE(nd) = 0;
3124 FT_SHIFT_SAME(nd) = 0;
3125 FT_SHIFT_TYPE(nd) = 0;
3126 FT_SHIFT_BOUNDARY(nd) = 0;
3127 A_OPT1P(ast, nd);
3128 cp = sym_get_cp();
3129 FT_SHIFT_OUT(nd) = cp;
3130 dest = mk_id(cp);
3131 A_DESTP(asn, dest);
3132 A_SRCP(asn, ast);
3133
3134 commstd = add_stmt_before(asn, header);
3135 A_STDP(asn, commstd);
3136 nd = A_OPT1G(forall);
3137 plist(FT_RTL(nd), commstd);
3138 FT_NRT(nd)++;
3139
3140 asn = mk_stmt(A_ASN, astb.bnd.dtype);
3141 ast = new_node(A_HCSTART);
3142 lop = mk_id(cp);
3143 A_LOPP(ast, lop);
3144 A_SRCP(ast, astnew);
3145 A_DESTP(ast, astnew);
3146 nd = mk_ftb();
3147 FT_STD(nd) = std;
3148 FT_FORALL(nd) = forall;
3149 FT_CSTART_COMM(nd) = commstd;
3150 FT_CSTART_RHS(nd) = a;
3151 FT_CSTART_USEDSTD(nd) = comminfo.usedstd;
3152 xfer = sym_get_xfer();
3153 FT_CSTART_OUT(nd) = xfer;
3154 FT_CSTART_SECTL(nd) = 0;
3155 FT_CSTART_SECTR(nd) = 0;
3156 FT_CSTART_ALLOC(nd) = 0;
3157
3158 FT_CSTART_FREE(nd) = header;
3159 FT_CSTART_REF(nd) = 0;
3160 FT_CSTART_TYPE(nd) = A_HOVLPSHIFT;
3161 FT_CSTART_REUSE(nd) = 0;
3162 FT_CSTART_INVMVD(nd) = 0;
3163 FT_CSTART_USELHS(nd) = 0;
3164 FT_CSTART_SAME(nd) = 0;
3165 A_OPT1P(ast, nd);
3166 dest = mk_id(xfer);
3167 A_DESTP(asn, dest);
3168 A_SRCP(asn, ast);
3169
3170 startstd = add_stmt_before(asn, header);
3171 A_STDP(asn, startstd);
3172 nd = A_OPT1G(forall);
3173 plist(FT_RTL(nd), startstd);
3174 FT_NRT(nd)++;
3175 }
3176
3177 static CTYPE *
getcyclic(void)3178 getcyclic(void)
3179 {
3180 int i;
3181 CTYPE *ct;
3182 ct = (CTYPE *)getitem(FORALL_AREA, sizeof(CTYPE));
3183 ct->lhs = 0;
3184 ct->ifast = 0;
3185 ct->endifast = 0;
3186 ct->inner_cyclic = clist();
3187 for (i = 0; i < 7; i++) {
3188 ct->c_lof[i] = 0;
3189 ct->c_dupl[i] = 0;
3190 ct->idx[i] = 0;
3191 ct->cb_init[i] = 0;
3192 ct->cb_do[i] = 0;
3193 ct->cb_block[i] = 0;
3194 ct->cb_inc[i] = 0;
3195 ct->cb_enddo[i] = 0;
3196 ct->c_init[i] = 0;
3197 ct->c_inc[i] = 0;
3198 }
3199 return ct;
3200 }
3201
3202 static int
shape_comm_in_expr(int expr,int forall,int std,int nomask)3203 shape_comm_in_expr(int expr, int forall, int std, int nomask)
3204 {
3205 int l, r, d, o;
3206 int l1, l2, l3;
3207 int i, nargs, argt, j;
3208 int lhs, sptr;
3209
3210 if (expr == 0)
3211 return expr;
3212 switch (A_TYPEG(expr)) {
3213 /* expressions */
3214 case A_BINOP:
3215 o = A_OPTYPEG(expr);
3216 d = A_DTYPEG(expr);
3217 l = shape_comm_in_expr(A_LOPG(expr), forall, std, nomask);
3218 r = shape_comm_in_expr(A_ROPG(expr), forall, std, nomask);
3219 if (l == A_LOPG(expr) && r == A_ROPG(expr))
3220 return expr;
3221 return mk_binop(o, l, r, d);
3222 case A_UNOP:
3223 o = A_OPTYPEG(expr);
3224 d = A_DTYPEG(expr);
3225 l = shape_comm_in_expr(A_LOPG(expr), forall, std, nomask);
3226 if (l == A_LOPG(expr))
3227 return expr;
3228 return mk_unop(o, l, d);
3229 case A_CONV:
3230 d = A_DTYPEG(expr);
3231 l = shape_comm_in_expr(A_LOPG(expr), forall, std, nomask);
3232 if (l == A_LOPG(expr))
3233 return expr;
3234 return mk_convert(l, d);
3235 case A_PAREN:
3236 d = A_DTYPEG(expr);
3237 l = shape_comm_in_expr(A_LOPG(expr), forall, std, nomask);
3238 if (l == A_LOPG(expr))
3239 return expr;
3240 return mk_paren(l, d);
3241 case A_SUBSTR:
3242 return expr;
3243 case A_INTR:
3244 case A_FUNC:
3245 /* size & present intrinsics do not need the array content,
3246 * no need to communicate
3247 */
3248 o = A_OPTYPEG(expr);
3249 if (o == I_SIZE || o == I_PRESENT)
3250 return expr;
3251 nargs = A_ARGCNTG(expr);
3252 argt = A_ARGSG(expr);
3253 for (i = 0; i < nargs; ++i) {
3254 ARGT_ARG(argt, i) =
3255 shape_comm_in_expr(ARGT_ARG(argt, i), forall, std, nomask);
3256 }
3257 return expr;
3258 case A_CNST:
3259 case A_CMPLXC:
3260 return expr;
3261 case A_MEM:
3262 if (!A_SHAPEG(expr))
3263 return expr;
3264 sptr = A_SPTRG(A_MEMG(expr));
3265 r = A_MEMG(expr);
3266 d = A_DTYPEG(r);
3267 l = shape_comm_in_expr(A_PARENTG(expr), forall, std, nomask);
3268 if (l == A_PARENTG(expr))
3269 return expr;
3270 return mk_member(l, r, d);
3271 case A_ID:
3272 case A_SUBSCR:
3273 if (!A_SHAPEG(expr))
3274 return expr;
3275 lhs = A_DESTG(A_IFSTMTG(forall));
3276 expr = convert_subscript(expr);
3277 return expr;
3278 default:
3279 interr("shape_comm_in_expr: unknown expression", expr, 2);
3280 return expr;
3281 }
3282 }
3283
3284 static void
shape_communication(int std,int forall)3285 shape_communication(int std, int forall)
3286 {
3287 int nd;
3288 int i;
3289 int cstd;
3290 int expr;
3291 int asn;
3292 int rhs;
3293
3294 /* handle shape communication at forall first a(i) = pure_func(b) */
3295 expr = A_IFEXPRG(forall);
3296 asn = A_IFSTMTG(forall);
3297 rhs = A_SRCG(asn);
3298 rhs = shape_comm_in_expr(rhs, forall, std, 1);
3299 expr = shape_comm_in_expr(expr, forall, std, 1);
3300 A_SRCP(asn, rhs);
3301 A_IFEXPRP(forall, expr);
3302
3303 /* handle shape communication at calls second */
3304 nd = A_OPT1G(forall);
3305 for (i = 0; i < FT_NMCALL(nd); i++) {
3306 cstd = glist(FT_MCALL(nd), i);
3307 shape_comm(cstd, std, forall);
3308 }
3309
3310 for (i = 0; i < FT_NSCALL(nd); i++) {
3311 cstd = glist(FT_SCALL(nd), i);
3312 shape_comm(cstd, std, forall);
3313 }
3314 }
3315
3316 static void
shape_comm(int cstd,int fstd,int forall)3317 shape_comm(int cstd, int fstd, int forall)
3318 {
3319 int ast, ast1;
3320 int cstd1;
3321 int nd, nd1;
3322 int i;
3323 int nargs, argt;
3324 int lhs;
3325 int arg;
3326
3327 ast = STD_AST(cstd);
3328 nd = A_OPT1G(ast);
3329 assert(nd, "call_comm: something is wrong", ast, 3);
3330 for (i = 0; i < FT_CALL_NCALL(nd); i++) {
3331 cstd1 = glist(FT_CALL_CALL(nd), i);
3332 ast1 = STD_AST(cstd1);
3333 nd1 = A_OPT1G(ast1);
3334 assert(nd1, "put_calls: something is wrong", ast1, 3);
3335 shape_comm(cstd1, fstd, forall);
3336 }
3337 nargs = A_ARGCNTG(ast);
3338 argt = A_ARGSG(ast);
3339 for (i = 0; i < nargs; ++i) {
3340 arg = ARGT_ARG(argt, i);
3341 if (!A_SHAPEG(arg))
3342 continue;
3343 lhs = A_DESTG(A_IFSTMTG(forall));
3344 assert(A_TYPEG(arg) == A_SUBSCR || A_TYPEG(arg) == A_ID ||
3345 A_TYPEG(arg) == A_MEM,
3346 "shape_comm: array expression is not supported", arg, 3);
3347 arg = convert_subscript(arg);
3348 }
3349 }
3350
3351 /* The function of this routine is to handle communication of arg.
3352 * This arg is from PURE function and it has shape. It will try to
3353 * bring to lhs of forall. Distribution of TMP will be based on LHS.
3354 * However, the size and shape of TMP will be based on both LHS and arg.
3355 * There are three rules for TMP:
3356 * 1-) heading dimensions size and distribution from LHS
3357 * 2-) tailling dimensions size from shape of arg with no distribution
3358 * 3-) remove idx from forall list if it does not appear at arg or mask
3359 * For example: (assume that a, b have different distributions.
3360 * forall(i=1:n) a(i)= sum(b(i,iloc(i),:))
3361 * will be
3362 * forall(i=1:n) tmp(i,:) =b(i,iloc(i),:)
3363 * forall(i=1:n) a(i) = sum(tmp(i,:))
3364 * There will be no communication for tmp which becames new arg of PURE.
3365 * is_pure_temp_too_large() decides whether tmp will have more dimension than
3366 * arg. if it is, tmp will be replication of arg.
3367 */
3368
3369 static int
gen_shape_comm(int arg,int forall,int std,int nomask)3370 gen_shape_comm(int arg, int forall, int std, int nomask)
3371 {
3372 int newforall;
3373 int newstd;
3374 int sptr;
3375 int asn;
3376 int newast;
3377 int tmpast;
3378 int lhs;
3379 int mask;
3380 int list;
3381 int olist;
3382 int ast;
3383 int shape;
3384 int nd;
3385 int header;
3386
3387 if (!A_SHAPEG(arg))
3388 return arg;
3389 lhs = A_DESTG(A_IFSTMTG(forall));
3390 olist = A_LISTG(forall);
3391 mask = A_IFEXPRG(forall);
3392 if (nomask)
3393 mask = 0;
3394 nd = A_OPT1G(forall);
3395 assert(nd, "gen_shape_comm: something is wrong", forall, 3);
3396 header = FT_HEADER(nd);
3397 list = construct_list_for_pure(arg, mask, olist);
3398 if (is_pure_temp_too_large(list, arg)) {
3399 tmpast = handle_pure_temp_too_large(arg, header);
3400 return tmpast;
3401 }
3402 /* put new list to forall for short time to trick
3403 * get_temp_pure() and reference_for_pure_temp()
3404 */
3405 A_LISTP(forall, list);
3406 /* create a pure temp */
3407 sptr = get_temp_pure(forall, lhs, arg, header, header, arg);
3408 tmpast = reference_for_pure_temp(sptr, lhs, arg, forall);
3409 /* put original list back to forall */
3410 A_LISTP(forall, olist);
3411
3412 asn = mk_stmt(A_ASN, DTYPEG(sptr));
3413 A_DESTP(asn, tmpast);
3414 A_SRCP(asn, arg);
3415
3416 if (list) {
3417 newforall = mk_stmt(A_FORALL, 0);
3418 A_LISTP(newforall, list);
3419 A_IFSTMTP(newforall, asn);
3420 A_IFEXPRP(newforall, mask);
3421 } else {
3422 shape = A_SHAPEG(tmpast);
3423 newforall = make_forall(shape, tmpast, 0, 0);
3424 ast = normalize_forall(newforall, asn, 0);
3425 A_IFSTMTP(newforall, ast);
3426 A_IFEXPRP(newforall, 0);
3427 }
3428 newforall = rename_forall_list(newforall);
3429 newstd = add_stmt_before(newforall, header);
3430 process_forall(newstd);
3431
3432 newforall = STD_AST(newstd);
3433 transform_forall(newstd, newforall);
3434 return tmpast;
3435 }
3436
3437 /* construct a new list based on old list
3438 * which must appear arg or mask expression
3439 */
3440 static int
construct_list_for_pure(int arg,int mask,int list)3441 construct_list_for_pure(int arg, int mask, int list)
3442 {
3443 int newlist;
3444 int isptr;
3445 int j;
3446
3447 start_astli();
3448 for (j = list; j != 0; j = ASTLI_NEXT(j)) {
3449 isptr = ASTLI_SPTR(j);
3450 if (is_name_in_expr(arg, isptr) || is_name_in_expr(mask, isptr)) {
3451 /* include this one */
3452 newlist = add_astli();
3453 ASTLI_SPTR(newlist) = ASTLI_SPTR(j);
3454 ASTLI_TRIPLE(newlist) = ASTLI_TRIPLE(j);
3455 }
3456 }
3457 return ASTLI_HEAD;
3458 }
3459
3460 /* This will find temp_reference for pure communication.
3461 * lhs=a(i,j,2), arg=b(2,i,:) will be tmp=tmp(i,j,:)
3462 * heading dimension from lhs, talling from arg.
3463 */
3464 static int
reference_for_pure_temp(int sptr,int lhs,int arg,int forall)3465 reference_for_pure_temp(int sptr, int lhs, int arg, int forall)
3466 {
3467 int subs[7];
3468 int list;
3469 int i, j;
3470 int asd;
3471 int ndim;
3472 int astnew;
3473 int shape;
3474 int sdim;
3475
3476 list = A_LISTG(forall);
3477 asd = A_ASDG(lhs);
3478 ndim = ASD_NDIM(asd);
3479 j = 0;
3480 for (i = 0; i < ndim; i++) {
3481 if (search_forall_var(ASD_SUBS(asd, i), list)) {
3482 /* include this dimension */
3483 subs[j] = ASD_SUBS(asd, i);
3484 j++;
3485 }
3486 }
3487
3488 shape = A_SHAPEG(arg);
3489 asd = A_ASDG(arg);
3490 ndim = ASD_NDIM(asd);
3491 sdim = 0;
3492 for (i = 0; i < ndim; i++) {
3493 if (A_TYPEG(ASD_SUBS(asd, i)) == A_TRIPLE || A_SHAPEG(ASD_SUBS(asd, i))) {
3494 /* include this dimension */
3495 subs[j] = ASD_SUBS(asd, i);
3496 j++;
3497 sdim++;
3498 }
3499 }
3500 assert(j == rank_of_sym(sptr), "reference_for_pure_temp: rank mismatched",
3501 sptr, 4);
3502 assert(shape, "reference_for_pure_temp: shape mismatched", sptr, 4);
3503 assert(SHD_NDIM(shape) == sdim, "reference_for_pure_temp: shape mismatched",
3504 sptr, 4);
3505
3506 astnew = mk_subscr(mk_id(sptr), subs, j, DTYPEG(sptr));
3507 return astnew;
3508 }
3509
3510 /* this will decide whether pure tmp will be larger than arg
3511 * if gen_shape_comm() choose to have distributed temp.
3512 * if it is, it will not choose the distributed temp.
3513 * it will choose to have replicated temp.
3514 */
3515 static LOGICAL
is_pure_temp_too_large(int list,int arg)3516 is_pure_temp_too_large(int list, int arg)
3517 {
3518 int count;
3519 int ndim;
3520 int asd;
3521 int i;
3522 int j;
3523
3524 count = 0;
3525 for (j = list; j != 0; j = ASTLI_NEXT(j))
3526 count++;
3527 assert(A_TYPEG(arg) == A_SUBSCR, "is_pure_temp_too_large: not SUBSCR", arg,
3528 4);
3529 asd = A_ASDG(arg);
3530 ndim = ASD_NDIM(asd);
3531 for (i = 0; i < ndim; i++) {
3532 if (A_TYPEG(ASD_SUBS(asd, i)) == A_TRIPLE || A_SHAPEG(ASD_SUBS(asd, i)))
3533 count++;
3534 }
3535 if (count > ndim)
3536 return TRUE;
3537 return FALSE;
3538 }
3539
3540 /* this routine is to find distributed array in expr.
3541 * assign those array to the same size replicated temp
3542 * For example: a(inx(i))
3543 * indx$temp = indx
3544 * a$temp = a
3545 * return a$temp(indx$temp(i))
3546 */
3547 static int
handle_pure_temp_too_large(int expr,int std)3548 handle_pure_temp_too_large(int expr, int std)
3549 {
3550 int l, r, d, o;
3551 int l1, l2, l3;
3552 int i, nargs, argt, j;
3553 int tmp_sptr, tmp_ast;
3554 int forall, ast;
3555 int asd, ndim;
3556 int shape, std1;
3557 int sptr;
3558 int eledtype;
3559 int subs[7];
3560 int asn;
3561
3562 if (expr == 0)
3563 return expr;
3564 switch (A_TYPEG(expr)) {
3565 /* expressions */
3566 case A_BINOP:
3567 o = A_OPTYPEG(expr);
3568 d = A_DTYPEG(expr);
3569 l = handle_pure_temp_too_large(A_LOPG(expr), std);
3570 r = handle_pure_temp_too_large(A_ROPG(expr), std);
3571 return mk_binop(o, l, r, d);
3572 case A_UNOP:
3573 o = A_OPTYPEG(expr);
3574 d = A_DTYPEG(expr);
3575 l = handle_pure_temp_too_large(A_LOPG(expr), std);
3576 return mk_unop(o, l, d);
3577 case A_CONV:
3578 d = A_DTYPEG(expr);
3579 l = handle_pure_temp_too_large(A_LOPG(expr), std);
3580 return mk_convert(l, d);
3581 case A_PAREN:
3582 d = A_DTYPEG(expr);
3583 l = handle_pure_temp_too_large(A_LOPG(expr), std);
3584 return mk_paren(l, d);
3585 case A_SUBSTR:
3586 return expr;
3587 case A_INTR:
3588 case A_FUNC:
3589 nargs = A_ARGCNTG(expr);
3590 argt = A_ARGSG(expr);
3591 for (i = 0; i < nargs; ++i) {
3592 ARGT_ARG(argt, i) = handle_pure_temp_too_large(ARGT_ARG(argt, i), std);
3593 }
3594 return expr;
3595 case A_CNST:
3596 case A_CMPLXC:
3597 return expr;
3598 case A_MEM:
3599 sptr = A_SPTRG(A_MEMG(expr));
3600 if (DTY(DTYPEG(sptr)) != TY_ARRAY || !ALIGNG(sptr)) {
3601 r = A_MEMG(expr);
3602 d = A_DTYPEG(r);
3603 l = handle_pure_temp_too_large(A_PARENTG(expr), std);
3604 return mk_member(l, r, d);
3605 }
3606 goto replicate_temp;
3607
3608 case A_ID:
3609 sptr = A_SPTRG(expr);
3610 if (STYPEG(sptr) != ST_ARRAY || !ALIGNG(sptr))
3611 return expr;
3612 eledtype = DTY(DTYPEG(sptr) + 1);
3613
3614 replicate_temp:
3615 /* copy to replicate temp */
3616 tmp_sptr = get_temp_pure_replicated(sptr, std, std, expr);
3617 tmp_ast = mk_id(tmp_sptr);
3618 asn = mk_assn_stmt(tmp_ast, expr, eledtype);
3619 shape = A_SHAPEG(tmp_ast);
3620 forall = make_forall(shape, tmp_ast, 0, 0);
3621 A_ARRASNP(forall, TRUE);
3622 forall = rename_forall_list(forall);
3623 ast = normalize_forall(forall, asn, 0);
3624 A_IFSTMTP(forall, ast);
3625 A_IFEXPRP(forall, 0);
3626 std1 = add_stmt_before(forall, std);
3627 process_forall(std1);
3628 transform_forall(std1, forall);
3629 return mk_id(tmp_sptr);
3630 case A_SUBSCR:
3631 asd = A_ASDG(expr);
3632 ndim = ASD_NDIM(asd);
3633 for (i = 0; i < ndim; i++) {
3634 subs[i] = handle_pure_temp_too_large(ASD_SUBS(asd, i), std);
3635 }
3636 l1 = handle_pure_temp_too_large(A_LOPG(expr), std);
3637 expr = mk_subscr(l1, subs, ndim, A_DTYPEG(expr));
3638 return expr;
3639 case A_TRIPLE:
3640 l1 = handle_pure_temp_too_large(A_LBDG(expr), std);
3641 l2 = handle_pure_temp_too_large(A_UPBDG(expr), std);
3642 l3 = handle_pure_temp_too_large(A_STRIDEG(expr), std);
3643 return mk_triple(l1, l2, l3);
3644 default:
3645 interr("handle_pure_temp_too_large: unknown expression", expr, 2);
3646 return expr;
3647 }
3648 }
3649
3650 static void
insert_call_comm(int std,int forall)3651 insert_call_comm(int std, int forall)
3652 {
3653 int nd;
3654 int i;
3655 int cstd;
3656
3657 nd = A_OPT1G(forall);
3658 comminfo.mask_phase = 1;
3659 for (i = 0; i < FT_NMCALL(nd); i++) {
3660 cstd = glist(FT_MCALL(nd), i);
3661 put_call_comm(cstd, std, forall);
3662 }
3663 comminfo.mask_phase = 0;
3664 for (i = 0; i < FT_NSCALL(nd); i++) {
3665 cstd = glist(FT_SCALL(nd), i);
3666 put_call_comm(cstd, std, forall);
3667 }
3668 }
3669
3670 static void
put_call_comm(int cstd,int fstd,int forall)3671 put_call_comm(int cstd, int fstd, int forall)
3672 {
3673 int ast, ast1;
3674 int cstd1;
3675 int nd, nd1;
3676 int i;
3677 int test;
3678 int nargs, argt;
3679
3680 comminfo.usedstd = cstd;
3681 ast = STD_AST(cstd);
3682 nd = A_OPT1G(ast);
3683 assert(nd, "call_comm: something is wrong", ast, 3);
3684 for (i = 0; i < FT_CALL_NCALL(nd); i++) {
3685 cstd1 = glist(FT_CALL_CALL(nd), i);
3686 ast1 = STD_AST(cstd1);
3687 nd1 = A_OPT1G(ast1);
3688 assert(nd1, "put_calls: something is wrong", ast1, 3);
3689 put_call_comm(cstd1, fstd, forall);
3690 }
3691 nargs = A_ARGCNTG(ast);
3692 argt = A_ARGSG(ast);
3693 for (i = 0; i < nargs; ++i) {
3694 ARGT_ARG(argt, i) = insert_forall_comm(ARGT_ARG(argt, i));
3695 }
3696 }
3697
3698 static int
tag_call_comm(int std,int forall)3699 tag_call_comm(int std, int forall)
3700 {
3701 int nd;
3702 int i;
3703 int cstd;
3704
3705 nd = A_OPT1G(forall);
3706 comminfo.mask_phase = 1;
3707 for (i = 0; i < FT_NMCALL(nd); i++) {
3708 cstd = glist(FT_MCALL(nd), i);
3709 call_comm(cstd, std, forall);
3710 }
3711 comminfo.mask_phase = 0;
3712 for (i = 0; i < FT_NSCALL(nd); i++) {
3713 cstd = glist(FT_SCALL(nd), i);
3714 call_comm(cstd, std, forall);
3715 }
3716 return 1;
3717 }
3718
3719 static void
call_comm(int cstd,int fstd,int forall)3720 call_comm(int cstd, int fstd, int forall)
3721 {
3722 int ast, ast1;
3723 int cstd1;
3724 int nd, nd1;
3725 int i;
3726 int test;
3727
3728 ast = STD_AST(cstd);
3729 nd = A_OPT1G(ast);
3730 assert(nd, "call_comm: something is wrong", ast, 3);
3731 for (i = 0; i < FT_CALL_NCALL(nd); i++) {
3732 cstd1 = glist(FT_CALL_CALL(nd), i);
3733 ast1 = STD_AST(cstd1);
3734 nd1 = A_OPT1G(ast1);
3735 assert(nd1, "put_calls: something is wrong", ast1, 3);
3736 call_comm(cstd1, fstd, forall);
3737 }
3738 test = tag_forall_comm(ast);
3739 }
3740
3741 static int
sequentialize_mask_call(int forall,int stdnext)3742 sequentialize_mask_call(int forall, int stdnext)
3743 {
3744 int nd;
3745 int i;
3746 int cstd;
3747
3748 nd = A_OPT1G(forall);
3749 for (i = 0; i < FT_NMCALL(nd); i++) {
3750 cstd = glist(FT_MCALL(nd), i);
3751 stdnext = sequentialize_call(cstd, stdnext, forall);
3752 }
3753 return stdnext;
3754 }
3755
3756 static int
sequentialize_stmt_call(int forall,int stdnext)3757 sequentialize_stmt_call(int forall, int stdnext)
3758 {
3759 int nd;
3760 int i;
3761 int cstd;
3762
3763 nd = A_OPT1G(forall);
3764 for (i = 0; i < FT_NSCALL(nd); i++) {
3765 cstd = glist(FT_SCALL(nd), i);
3766 stdnext = sequentialize_call(cstd, stdnext, forall);
3767 }
3768 return stdnext;
3769 }
3770
3771 static int
sequentialize_call(int cstd,int stdnext,int forall)3772 sequentialize_call(int cstd, int stdnext, int forall)
3773 {
3774 int ast, ast1;
3775 int cstd1;
3776 int nd, nd1;
3777 int i, lineno;
3778 int stdnext1;
3779
3780 ast = STD_AST(cstd);
3781 nd = A_OPT1G(ast);
3782 assert(nd, "call_comm: something is wrong", ast, 3);
3783 for (i = 0; i < FT_CALL_NCALL(nd); i++) {
3784 cstd1 = glist(FT_CALL_CALL(nd), i);
3785 ast1 = STD_AST(cstd1);
3786 nd1 = A_OPT1G(ast1);
3787 assert(nd1, "put_calls: something is wrong", ast1, 3);
3788 stdnext = sequentialize_call(cstd1, stdnext, forall);
3789 }
3790 lineno = STD_LINENO(cstd);
3791 delete_stmt(cstd);
3792 stdnext = add_stmt_before(ast, stdnext);
3793 stdnext1 = STD_NEXT(stdnext);
3794 STD_LINENO(stdnext) = lineno;
3795 transform_ast(stdnext, ast);
3796 stdnext = stdnext1;
3797 return stdnext;
3798 }
3799
3800 /* this routine will normalize forall triplet list,
3801 * It makes triple integer and
3802 * It eliminates distributed array from triplet.
3803 */
3804 static int
normalize_forall_triplet(int std,int forall)3805 normalize_forall_triplet(int std, int forall)
3806 {
3807 int lb, ub, st;
3808 int triplet_list;
3809 int triplet;
3810 int list;
3811 int rhs_is_dist;
3812 int tmp_sptr;
3813 int newlist;
3814 int ast, dest;
3815 int triplet_list1, triplet1;
3816 int isptr;
3817
3818 /* don't allow forall(i=1:n,j=istart(i):istop(i) */
3819 triplet_list = A_LISTG(forall);
3820 if (is_multiple_idx_in_list(triplet_list))
3821 return 0;
3822
3823 /* It eliminates distributed array from triplet */
3824 triplet_list = A_LISTG(forall);
3825 start_astli();
3826 for (; triplet_list; triplet_list = ASTLI_NEXT(triplet_list)) {
3827 triplet = ASTLI_TRIPLE(triplet_list);
3828 /* case forall(i=idx(1):n) */
3829 rhs_is_dist = FALSE;
3830 triplet = insert_comm_before(std, triplet, &rhs_is_dist, FALSE);
3831 newlist = add_astli();
3832 ASTLI_SPTR(newlist) = ASTLI_SPTR(triplet_list);
3833 ASTLI_TRIPLE(newlist) = triplet;
3834 }
3835 list = ASTLI_HEAD;
3836 A_LISTP(forall, list);
3837
3838 /* make forall triple DT_INT if not */
3839 triplet_list = A_LISTG(forall);
3840 start_astli();
3841 for (; triplet_list; triplet_list = ASTLI_NEXT(triplet_list)) {
3842 triplet = ASTLI_TRIPLE(triplet_list);
3843 lb = A_LBDG(triplet);
3844 assert(lb, "normalize_forall_triplet: no lower bound at forall triplet",
3845 forall, 3);
3846 if (A_TYPEG(lb) == A_CONV)
3847 lb = A_LOPG(lb);
3848 if (!DT_ISINT(A_DTYPEG(lb))) {
3849 tmp_sptr = sym_get_scalar("lb", 0, astb.bnd.dtype);
3850 ast = mk_stmt(A_ASN, astb.bnd.dtype);
3851 dest = mk_id(tmp_sptr);
3852 A_DESTP(ast, dest);
3853 A_SRCP(ast, lb);
3854 add_stmt_before(ast, std);
3855 lb = mk_id(tmp_sptr);
3856 }
3857 ub = A_UPBDG(triplet);
3858 assert(ub, "normalize_forall_triplet: no lower bound at forall triplet",
3859 forall, 3);
3860 if (A_TYPEG(ub) == A_CONV)
3861 ub = A_LOPG(ub);
3862 if (!DT_ISINT(A_DTYPEG(ub))) {
3863 tmp_sptr = sym_get_scalar("ub", 0, astb.bnd.dtype);
3864 ast = mk_stmt(A_ASN, astb.bnd.dtype);
3865 dest = mk_id(tmp_sptr);
3866 A_DESTP(ast, dest);
3867 A_SRCP(ast, ub);
3868 add_stmt_before(ast, std);
3869 ub = mk_id(tmp_sptr);
3870 }
3871 st = A_STRIDEG(triplet);
3872 if (A_TYPEG(st) == A_CONV)
3873 st = A_LOPG(st);
3874 if (st)
3875 if (!DT_ISINT(A_DTYPEG(st))) {
3876 tmp_sptr = sym_get_scalar("st", 0, astb.bnd.dtype);
3877 ast = mk_stmt(A_ASN, astb.bnd.dtype);
3878 dest = mk_id(tmp_sptr);
3879 A_DESTP(ast, dest);
3880 A_SRCP(ast, st);
3881 add_stmt_before(ast, std);
3882 st = mk_id(tmp_sptr);
3883 }
3884 triplet = mk_triple(lb, ub, st);
3885 newlist = add_astli();
3886 ASTLI_SPTR(newlist) = ASTLI_SPTR(triplet_list);
3887 ASTLI_TRIPLE(newlist) = triplet;
3888 }
3889 list = ASTLI_HEAD;
3890 A_LISTP(forall, list);
3891 return 1;
3892 }
3893
3894 /* This is a quick fix to move guard_forall after optimization.
3895 * guard_forall was inserting IF-THEN which was reducing
3896 * the optimization chance. guard_forall can be written
3897 * such that it will not need this fix. */
3898 static void
fix_guard_forall(int std)3899 fix_guard_forall(int std)
3900 {
3901 CTYPE *ct;
3902 int ast;
3903 int asn;
3904 int subinfo;
3905 int lhs, lhsd;
3906 int ndim, asd;
3907 int i;
3908 int nd;
3909
3910 ast = STD_AST(std);
3911 asn = A_IFSTMTG(ast);
3912 nd = A_OPT1G(ast);
3913 ct = FT_CYCLIC(nd);
3914 lhs = A_DESTG(asn);
3915 lhsd = left_subscript_ast(lhs);
3916 asd = A_ASDG(lhsd);
3917 ndim = ASD_NDIM(asd);
3918 subinfo = comminfo.subinfo;
3919 for (i = 0; i < ndim; ++i) {
3920 ct->c_dstt[i] = SUBI_DSTT(subinfo + i);
3921 ct->c_dupl[i] = SUBI_DUPL(subinfo + i);
3922 ct->c_idx[i] = SUBI_IDX(subinfo + i);
3923 }
3924 A_OPT1P(ast, nd);
3925 }
3926
3927 /* This routine is to check whether forall has dependency.
3928 * If it has, it creates temp which is shape array with lhs.
3929 * For example,
3930 * forall(i=1:N) a(i) = a(i-1)+.....
3931 * will be rewritten
3932 * forall(i=1:N) temp(i) = a(i-1)+.....
3933 * forall(i=1:N) a(i) = temp(i)
3934 */
3935 static void
forall_dependency_scalarize(int std,int * std1,int * std2)3936 forall_dependency_scalarize(int std, int *std1, int *std2)
3937 {
3938 int lhs, rhs;
3939 int ast, ast1, ast2;
3940 int asn;
3941 int asd;
3942 int subs[7];
3943 int i;
3944 int ndim;
3945 int sptr;
3946 int temp_ast;
3947 int newforall, newasn;
3948 int expr;
3949 int lineno;
3950 LOGICAL bIndep;
3951
3952 ast = STD_AST(std);
3953 asn = A_IFSTMTG(ast);
3954 if (A_TYPEG(asn) != A_ASN)
3955 return;
3956 lhs = A_DESTG(asn);
3957 rhs = A_SRCG(asn);
3958 expr = A_IFEXPRG(ast);
3959
3960 /* forall-independent */
3961 lineno = STD_LINENO(std);
3962 open_pragma(lineno);
3963 bIndep = XBIT(19, 0x100) != 0;
3964 close_pragma();
3965 if (bIndep)
3966 return;
3967
3968 /* take conditional expr, if there is dependency */
3969 if (is_dependent(lhs, expr, ast, std, std) && A_TYPEG(lhs) != A_SUBSTR) {
3970 sptr = get_temp_forall(ast, lhs, std, std, DT_LOG, 0);
3971 temp_ast = reference_for_temp(sptr, lhs, ast);
3972 A_IFEXPRP(ast, temp_ast);
3973 newforall = mk_stmt(A_FORALL, 0);
3974 A_LISTP(newforall, A_LISTG(ast));
3975 A_IFEXPRP(newforall, 0);
3976 newasn = mk_stmt(A_ASN, 0);
3977 A_DESTP(newasn, temp_ast);
3978 A_SRCP(newasn, expr);
3979 A_IFSTMTP(newforall, newasn);
3980 *std1 = add_stmt_before(newforall, std);
3981 }
3982
3983 if (is_dependent(lhs, rhs, ast, std, std) && A_TYPEG(lhs) != A_SUBSTR) {
3984 sptr = get_temp_forall(ast, lhs, std, std, 0, lhs);
3985 temp_ast = reference_for_temp(sptr, lhs, ast);
3986 A_DESTP(asn, temp_ast);
3987 newforall = mk_stmt(A_FORALL, 0);
3988 A_LISTP(newforall, A_LISTG(ast));
3989 A_IFEXPRP(newforall, A_IFEXPRG(ast));
3990 newasn = mk_stmt(A_ASN, 0);
3991 A_DESTP(newasn, lhs);
3992 A_SRCP(newasn, temp_ast);
3993 A_IFSTMTP(newforall, newasn);
3994 *std2 = add_stmt_after(newforall, std);
3995 }
3996 }
3997
3998 static int
fix_mem_ast(int astmem,int ast)3999 fix_mem_ast(int astmem, int ast)
4000 {
4001
4002 int rslt;
4003
4004 switch (A_TYPEG(ast)) {
4005
4006 case A_BINOP:
4007 rslt = fix_mem_ast(astmem, A_LOPG(ast));
4008 if (rslt && rslt != A_LOPG(ast))
4009 A_LOPP(ast, rslt);
4010 rslt = fix_mem_ast(astmem, A_ROPG(ast));
4011 if (rslt && rslt != A_ROPG(ast))
4012 A_ROPP(ast, rslt);
4013 break;
4014 case A_UNOP:
4015 rslt = fix_mem_ast(astmem, A_LOPG(ast));
4016 if (rslt && rslt != A_LOPG(ast))
4017 A_LOPP(ast, rslt);
4018 break;
4019 case A_LABEL:
4020 case A_ENTRY:
4021 case A_ID:
4022 return check_member(astmem, ast);
4023 case A_SUBSCR:
4024 case A_SUBSTR:
4025 rslt = fix_mem_ast(astmem, A_LOPG(ast));
4026 if (rslt && rslt != A_LOPG(ast))
4027 A_LOPP(ast, rslt);
4028 break;
4029 case A_MEM:
4030 rslt = fix_mem_ast(astmem, A_PARENTG(ast));
4031 if (rslt && rslt != A_PARENTG(ast))
4032 A_PARENTP(ast, rslt);
4033 break;
4034 }
4035 return 0;
4036 }
4037
4038 /* This routine will perform the following canonical conversion
4039 *
4040 * forall(i=l:u:s) a(m*i+k) = ...i...
4041 *
4042 * will be converted into
4043 *
4044 * forall(i=m*l+k:m*u+k:m*s) a(i) = ...(i-k)/m...
4045 */
4046 /* ### rewrite this routine to handle members */
4047 static int
canonical_conversion(int ast)4048 canonical_conversion(int ast)
4049 {
4050 int list;
4051 int asn;
4052 int astli;
4053 int base, stride;
4054 int expr;
4055 int newexpr;
4056 int l, u, s;
4057 int ll, uu, ss;
4058 int triple;
4059 int asd;
4060 int ndim;
4061 int isptr;
4062 int i, k;
4063 int zero = astb.bnd.zero;
4064 int ifexpr;
4065 int subs[7];
4066 int newdest;
4067 int nd, nd1;
4068 int ip, pstd, past;
4069 LITEMF *plist;
4070 int glb, gub, st;
4071 ADSC *ad;
4072 int lhs, lhsd, sptr, dim;
4073 int align;
4074
4075
4076 /* Don't replace the subscript if we intend it that way */
4077 if (!XBIT(58,0x1000000) && A_CONSTBNDG(ast))
4078 return 0;
4079
4080 list = A_LISTG(ast);
4081 ifexpr = A_IFEXPRG(ast);
4082 asn = A_IFSTMTG(ast);
4083 expr = A_SRCG(asn);
4084 lhs = A_DESTG(asn);
4085
4086 for (lhsd = lhs; A_TYPEG(lhsd) != A_ID;) {
4087 switch (A_TYPEG(lhsd)) {
4088 case A_SUBSCR:
4089 asd = A_ASDG(lhsd);
4090 ndim = ASD_NDIM(asd);
4091
4092 /* don't let A(V(I)), where V is distributed, that is solved earlier */
4093 for (i = 0; i < ndim; ++i) {
4094 ss = ASD_SUBS(asd, i);
4095 if (is_dist_array_in_expr(ss)) {
4096 return 0;
4097 }
4098 }
4099 lhsd = A_LOPG(lhsd);
4100 break;
4101 case A_MEM:
4102 lhsd = A_PARENTG(lhsd);
4103 break;
4104 default:
4105 interr("canonical_conversion unexpected AST type on LHS", A_TYPEG(lhsd),
4106 3);
4107 break;
4108 }
4109 }
4110 lhsd = left_subscript_ast(lhs);
4111 asd = A_ASDG(lhsd);
4112 ndim = ASD_NDIM(asd);
4113 sptr = left_array_symbol(lhs);
4114 align = ALIGNG(sptr);
4115
4116 /* don't let A(I+J), don't let A(I,I+1), let A(I,I) */
4117 for (i = 0; i < ndim; i++) {
4118 astli = 0;
4119 search_idx(ASD_SUBS(asd, i), list, &astli, &base, &stride);
4120 if (base == 0)
4121 return 0; /* i+j */
4122 if (astli == 0 && stride == zero)
4123 continue; /* only base */
4124 if (base == zero && stride == astb.bnd.one)
4125 continue; /* a(i) */
4126 isptr = ASTLI_SPTR(astli);
4127 for (k = 0; k < ndim; ++k) {
4128 if (k != i) {
4129 if (is_name_in_expr(ASD_SUBS(asd, k), isptr)) {
4130 return 0; /* A(i+1,i) */
4131 }
4132 }
4133 }
4134 }
4135
4136
4137 for (i = 0; i < ndim; i++) {
4138 subs[i] = ASD_SUBS(asd, i);
4139 astli = 0;
4140 search_idx(ASD_SUBS(asd, i), list, &astli, &base, &stride);
4141 if (base == 0)
4142 return 0; /* i+j */
4143 if (astli == 0 && stride == zero)
4144 continue; /* only base */
4145 if (base == zero && stride == astb.bnd.one)
4146 continue; /* a(i) */
4147 ast_visit(1, 1);
4148 isptr = ASTLI_SPTR(astli);
4149 /* change the lhs subscript*/
4150 subs[i] = mk_id(isptr);
4151
4152 /* calculate (i-k)/m */
4153 newexpr = opt_binop(OP_SUB, mk_id(isptr), base, astb.bnd.dtype);
4154 newexpr = opt_binop(OP_DIV, newexpr, stride, astb.bnd.dtype);
4155
4156 ast_replace(mk_id(isptr), newexpr);
4157
4158 /* change the rhs expression*/
4159 expr = ast_rewrite(expr);
4160
4161 /* change the ifexpr expression*/
4162 ifexpr = ast_rewrite(ifexpr);
4163
4164 /* change also pcalls */
4165 nd = A_OPT1G(ast);
4166 plist = FT_PCALL(nd);
4167 for (ip = 0; ip < FT_NPCALL(nd); ip++) {
4168 pstd = plist->item;
4169 plist = plist->next;
4170 past = STD_AST(pstd);
4171 nd1 = A_OPT1G(past);
4172 past = ast_rewrite(past);
4173 A_OPT1P(past, nd1);
4174 STD_AST(pstd) = past;
4175 A_STDP(past, pstd);
4176 }
4177
4178 ast_unvisit();
4179
4180 /* change the forall list */
4181 triple = ASTLI_TRIPLE(astli);
4182 l = A_LBDG(triple);
4183 fix_mem_ast(l, base);
4184 u = A_UPBDG(triple);
4185 s = A_STRIDEG(triple);
4186
4187 ll = opt_binop(OP_MUL, stride, l, astb.bnd.dtype);
4188 ll = opt_binop(OP_ADD, ll, base, astb.bnd.dtype);
4189 uu = opt_binop(OP_MUL, stride, u, astb.bnd.dtype);
4190 uu = opt_binop(OP_ADD, uu, base, astb.bnd.dtype);
4191 if (s == 0)
4192 ss = stride;
4193 else
4194 ss = opt_binop(OP_MUL, stride, s, astb.bnd.dtype);
4195 ASTLI_TRIPLE(astli) = mk_triple(ll, uu, ss);
4196 }
4197 newdest = mk_subscr(A_LOPG(lhsd), subs, ndim, A_DTYPEG(lhsd));
4198 newdest = replace_ast_subtree(lhs, lhsd, newdest);
4199 A_DESTP(asn, newdest);
4200 A_SRCP(asn, expr);
4201 A_IFEXPRP(ast, ifexpr);
4202 A_IFSTMTP(ast, asn);
4203 return 1;
4204 }
4205
4206 /* this will find scalar communication at ast,
4207 * It expect that std is forall std.
4208 * It does not disturb other forall communication:
4209 * For example, forall(i=1:n) a(b(1),c(i)) = 1
4210 * Here, only perform communication for b(1).
4211 */
4212 static int
scalar_communication(int ast,int std)4213 scalar_communication(int ast, int std)
4214 {
4215 int l, r, d, o;
4216 int l1, l2, l3;
4217 int a, a1;
4218 int i, nargs, argt, j;
4219 int arref;
4220 int header;
4221 int forall;
4222 int rhs_is_dist;
4223 int sptr;
4224 int asd, ndim;
4225 int subs[7];
4226 int nd, nd1, nd2;
4227 int src;
4228 int cnt;
4229
4230 a = ast;
4231 if (!a)
4232 return a;
4233 forall = STD_AST(std);
4234 switch (A_TYPEG(ast)) {
4235 /* expressions */
4236 case A_BINOP:
4237 o = A_OPTYPEG(a);
4238 d = A_DTYPEG(a);
4239 l = scalar_communication(A_LOPG(a), std);
4240 r = scalar_communication(A_ROPG(a), std);
4241 return mk_binop(o, l, r, d);
4242 case A_UNOP:
4243 o = A_OPTYPEG(a);
4244 d = A_DTYPEG(a);
4245 l = scalar_communication(A_LOPG(a), std);
4246 return mk_unop(o, l, d);
4247 case A_CONV:
4248 d = A_DTYPEG(a);
4249 l = scalar_communication(A_LOPG(a), std);
4250 return mk_convert(l, d);
4251 case A_PAREN:
4252 d = A_DTYPEG(a);
4253 l = scalar_communication(A_LOPG(a), std);
4254 return mk_paren(l, d);
4255 case A_MEM:
4256 r = A_MEMG(a);
4257 d = A_DTYPEG(r);
4258 l = scalar_communication(A_PARENTG(a), std);
4259 return mk_member(l, r, d);
4260 case A_SUBSTR:
4261 return a;
4262 case A_INTR:
4263 case A_FUNC:
4264 nargs = A_ARGCNTG(a);
4265 argt = A_ARGSG(a);
4266 for (i = 0; i < nargs; ++i) {
4267 ARGT_ARG(argt, i) = scalar_communication(ARGT_ARG(argt, i), std);
4268 }
4269 return a;
4270 case A_CNST:
4271 case A_CMPLXC:
4272 return a;
4273 case A_ID:
4274 return a;
4275 case A_SUBSCR:
4276 if (!A_SHAPEG(a) && is_array_element_in_forall(a, std)) {
4277 nd = A_OPT1G(forall);
4278 header = FT_HEADER(nd);
4279 rhs_is_dist = FALSE;
4280 a = insert_comm_before(header, a, &rhs_is_dist, FALSE);
4281 return a;
4282 }
4283
4284 asd = A_ASDG(a);
4285 ndim = ASD_NDIM(asd);
4286 for (i = 0; i < ndim; i++) {
4287 subs[i] = scalar_communication(ASD_SUBS(asd, i), std);
4288 }
4289 return mk_subscr(A_LOPG(a), subs, ndim, A_DTYPEG(a));
4290 case A_TRIPLE:
4291 l1 = scalar_communication(A_LBDG(a), std);
4292 l2 = scalar_communication(A_UPBDG(a), std);
4293 l3 = scalar_communication(A_STRIDEG(a), std);
4294 return mk_triple(l1, l2, l3);
4295 default:
4296 interr("scalar_communication: unknown expression", std, 2);
4297 return 0;
4298 }
4299 }
4300