1 /*
2 * Copyright (c) 1995-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *
16 */
17
18 /* clang-format off */
19
20 /* dist.c -- distribution management routines */
21
22 #include "stdioInterf.h"
23 #include "fioMacros.h"
24 #include "scatter.h"
25 #include "fort_vars.h"
26
27 void *ENTFTN(COMM_START, comm_start)(sked **skp, void *rb, F90_Desc *rd,
28 void *sb, F90_Desc *sd);
29
30 sked *ENTFTN(COMM_COPY, comm_copy)(void *rb, void *sb, F90_Desc *rs,
31 F90_Desc *ss);
32
33 static __INT_T dummyGenBlock = 0;
34
I8(__fort_new_gen_block)35 __INT_T *I8(__fort_new_gen_block)(F90_Desc *d, int dim)
36 {
37 return &f90DummyGenBlock;
38 }
39
I8(__fort_gen_block_bounds)40 void I8(__fort_gen_block_bounds)(F90_Desc *d, int dim, __INT_T *the_olb,
41 __INT_T *the_oub, __INT_T pcoord)
42 {
43
44 /* calculate olb and oub for gen_block dimension */
45
46 __INT_T i;
47 __INT_T olb, oub, dtStride;
48 __INT_T *gb, *tempGB;
49 __INT_T pshape;
50 __INT_T direction;
51 __INT_T dUbound;
52 __INT_T tExtent, aExtent;
53 DECL_DIM_PTRS(dd);
54
55 SET_DIM_PTRS(dd, d, dim);
56
57 /*pcoord = DIST_DPTR_PCOORD_G(dd);*/
58
59 if (pcoord < 0) {
60 *the_oub = 0; /*force off processor grid status*/
61 *the_olb = 1;
62 return;
63 }
64
65 dUbound = DPTR_UBOUND_G(dd);
66 dtStride = DIST_DPTR_TSTRIDE_G(dd);
67 pshape = DIST_DPTR_PSHAPE_G(dd);
68 direction = (dtStride < 0);
69
70 tExtent = (DIST_DPTR_TUB_G(dd) - DIST_DPTR_TLB_G(dd)) + 1;
71 aExtent = (dUbound - F90_DPTR_LBOUND_G(dd)) + 1;
72
73 if (tExtent != aExtent) {
74
75 tempGB = I8(__fort_new_gen_block)(d, dim);
76 gb = tempGB;
77
78 } else {
79
80 tempGB = 0;
81 gb = DIST_DPTR_GEN_BLOCK_G(dd);
82 }
83
84 if (gb[pcoord] == 0) {
85 oub = 0; /*force off processor grid status*/
86 olb = 1;
87 } else {
88 gb += ((!direction) ? 0 : (pshape - 1));
89 olb = F90_DPTR_LBOUND_G(dd);
90 oub = *gb + (olb - 1);
91 if (!direction)
92 i = 0;
93 else
94 i = (pshape - 1);
95 for (; i != pcoord;) {
96 olb += *gb;
97 if (!direction) {
98 ++gb;
99 ++i;
100 } else {
101 --gb;
102 --i;
103 }
104 oub += *gb;
105 }
106 }
107
108 if (tempGB)
109 __fort_free(tempGB);
110
111 *the_olb = olb;
112 *the_oub = oub;
113
114 #if defined(DEBUG)
115
116 if (olb < F90_DPTR_LBOUND_G(dd) || oub > dUbound) {
117
118 __fort_abort("__fort_gen_block_bounds: bad gen_block");
119 }
120
121 #endif
122 }
123
124 /* given a divisor n which is a power of two, compute the (positive)
125 right shift amount equivalent to division by n. return -1 if n is
126 not a power of two. */
127
128 static int
div_shift(int n)129 div_shift(int n)
130 {
131 int z = 0;
132 int k = 4 * sizeof(int);
133 unsigned int m = n;
134 while (k) {
135 if (m >> k) {
136 m >>= k;
137 z += k;
138 }
139 k >>= 1;
140 }
141 if (n == 1 << z)
142 return z;
143 else
144 return -1; /* n not a power of 2 */
145 }
146
147 /* T3E, T3D, (Cray)Sparc, and T90 w/IEEE have int_mult_upper
148 intrinsic; C90 has fast 64-bit multiply allowing emulation */
149
150 #ifndef DESC_I8
151
152 __INT_T
ENTRY(INT_MULT_UPPER,int_mult_upper)153 ENTRY(INT_MULT_UPPER, int_mult_upper)
154 (__INT_T *x, __INT_T *y)
155 {
156 register unsigned long a, b;
157 a = (unsigned long)*x;
158 b = (unsigned long)*y;
159 return (unsigned long long)a * (unsigned long long)b >> RECIP_FRACBITS;
160 }
161
162 unsigned long
_int_mult_upper(int x,int y)163 _int_mult_upper(int x, int y)
164 {
165 register unsigned long a, b;
166 a = (unsigned long)x;
167 b = (unsigned long)y;
168 return (unsigned long long)a * (unsigned long long)b >> RECIP_FRACBITS;
169 }
170
171 #endif
172
173 /* greatest common divisor */
174
175 #ifndef DESC_I8
176 int
__fort_gcd(int u,int v)177 __fort_gcd(int u, int v)
178 {
179 int k, m, n, t;
180
181 if (u < 0)
182 u = -u; /* gcd(u,v) = gcd(-u,v) */
183 if (v == 0)
184 return u; /* gcd(u,0) = abs(u) */
185 if (v < 0)
186 v = -v;
187 if (u == 0)
188 return v;
189
190 /* Knuth V.2, 4.5.2, Algorithm B (Binary gcd algorithm). */
191
192 m = u | v;
193 m &= -m; /* == least significant bit of (u|v) */
194
195 k = 0;
196 n = 4 * sizeof(int);
197 while (m != 1) {
198 t = m >> n;
199 if (t != 0) {
200 k += n;
201 m = t;
202 }
203 n >>= 1;
204 }
205
206 u >>= k;
207 v >>= k;
208
209 if (u & 1)
210 t = -v;
211 else
212 t = u;
213
214 while (t != 0) {
215 while ((t & 1) == 0)
216 t /= 2;
217 if (t > 0)
218 u = t;
219 else
220 v = -t;
221 t = u - v;
222 }
223
224 return (u << k);
225 }
226
227 /* least common multiple */
228
229 int
__fort_lcm(int u,int v)230 __fort_lcm(int u, int v)
231 {
232 int p;
233
234 p = u * v;
235 return (p == 0 ? 0 : (p > 0 ? p : -p) / __fort_gcd(u, v));
236 }
237 #endif
238
I8(__fort_owner)239 int I8(__fort_owner)(F90_Desc *d, __INT_T *idxv)
240 {
241 return 0;
242 }
243
244 /* Given indices of an array element, return the processor number of
245 its nearest owner. The nearest owner is the one whose processor
246 coordinate in each replicated dimension is equal to this
247 processor's coordinate. */
248
249 __INT_T
ENTFTN(OWNER,owner)250 ENTFTN(OWNER, owner)
251 (F90_Desc *d, ...)
252 {
253 va_list va;
254 proc *p;
255 procdim *pd;
256 __INT_T dx, idxv[MAXDIMS], owner, px, repl;
257
258 #if defined(DEBUG)
259 if (d == NULL || F90_TAG_G(d) != __DESC)
260 __fort_abort("OWNER: invalid descriptor");
261 #endif
262 va_start(va, d);
263 for (dx = 0; dx < F90_RANK_G(d); ++dx)
264 idxv[dx] = *va_arg(va, __INT_T *);
265 va_end(va);
266
267 owner = I8(__fort_owner)(d, idxv);
268
269 /* add the "scalar subscript" component of the processor number
270 (which was subtracted in __fort_owner!) */
271
272 p = DIST_DIST_TARGET_G(d);
273 repl = DIST_REPLICATED_G(d);
274 for (px = 0; repl != 0; repl >>= 1, ++px) {
275 if (repl & 1) {
276 pd = &p->dim[px];
277 if (pd->coord > 0)
278 owner += pd->stride * pd->coord;
279 }
280 }
281
282 return owner;
283 }
284
285 /* Construct a description of the replication of an object over the
286 processor grid. Replication occurs over the subset of processor
287 grid dimensions onto which no array or template dimensions have
288 been mapped. This divides the processor grid into sets of
289 processors having identical copies of mapped portions of the array
290 or template. */
291
I8(__fort_describe_replication)292 void I8(__fort_describe_replication)(F90_Desc *d, repl_t *r)
293 {
294 DECL_DIM_PTRS(dd);
295 DECL_HDR_PTRS(t);
296 DECL_DIM_PTRS(td);
297 proc *p;
298 procdim *pd;
299 __INT_T grpi, dx, m, ncopies, ndim, ngrp, plow, px;
300
301 ngrp = 1;
302 grpi = 0;
303 plow = DIST_PBASE_G(d);
304 m = DIST_MAPPED_G(d);
305 for (dx = 0; dx < F90_RANK_G(d); m >>= 1, ++dx) {
306 if (m & 1) {
307 SET_DIM_PTRS(dd, d, dx);
308 if (DIST_DPTR_PCOORD_G(dd) > 0) {
309 grpi += DIST_DPTR_PCOORD_G(dd) * ngrp;
310 plow += DIST_DPTR_PCOORD_G(dd) * DIST_DPTR_PSTRIDE_G(dd);
311 }
312 r->gstr[dx] = ngrp; /* replication group multiplier */
313 ngrp *= DIST_DPTR_PSHAPE_G(dd);
314 } else
315 r->gstr[dx] = 0;
316 }
317 r->grpi = grpi; /* my replication group index */
318 r->ngrp = ngrp; /* number of replication groups */
319
320 p = DIST_DIST_TARGET_G(d);
321 ndim = 0;
322 ncopies = 1;
323 m = DIST_REPLICATED_G(d);
324 for (px = 0; m != 0; m >>= 1, ++px) {
325 pd = &p->dim[px];
326 if (m & 1 && pd->shape > 1) {
327
328 if (pd->coord > 0)
329 plow -= pd->coord * pd->stride;
330
331 r->pcnt[ndim] = pd->shape; /* processor counts */
332 r->pstr[ndim] = pd->stride; /* processor strides */
333 ncopies *= pd->shape;
334 ++ndim;
335 }
336 }
337 r->ncopies = ncopies; /* number of replicated copies */
338 r->ndim = ndim; /* number of replicated proc dims */
339 r->plow = plow; /* my replication group low proc number */
340 }
341
342 /* Iterate over all processors owning a copy of the first element of
343 section d. pcoord is an integer vector of size MAXDIMS which saves the
344 processor coordinates in the replication group. */
345
I8(__fort_next_owner)346 int I8(__fort_next_owner)(F90_Desc *d, repl_t *r, int *pcoord, int owner)
347 {
348 int i;
349
350 for (i = 0; i < r->ndim; ++i) {
351 pcoord[i]++;
352 owner += r->pstr[i];
353 if (pcoord[i] < r->pcnt[i])
354 return owner; /* keep going */
355 pcoord[i] = 0;
356 owner -= r->pcnt[i] * r->pstr[i];
357 }
358 return -1; /* finished */
359 }
360
361 /* Determine whether or not a global index in dimension dim of an
362 array is local to this processor. */
363
364 __LOG_T
ENTFTN(ISLOCAL_IDX,islocal_idx)365 ENTFTN(ISLOCAL_IDX, islocal_idx)
366 (F90_Desc *d, __INT_T *dimb, __INT_T *idxb)
367 {
368 DECL_DIM_PTRS(dd);
369 __INT_T dfmt, dim, idx, pcoord, tidx;
370
371 dim = *dimb;
372 idx = *idxb;
373 #if defined(DEBUG)
374 if (d == NULL || F90_TAG_G(d) != __DESC)
375 __fort_abort("ISLOCAL_IDX: invalid descriptor");
376 if (dim < 1 || dim > F90_RANK_G(d))
377 __fort_abort("ISLOCAL_IDX: invalid dimension");
378 #endif
379 SET_DIM_PTRS(dd, d, dim - 1);
380 return GET_DIST_TRUE_LOG;
381 }
382
383 /* Determine whether or not the element of section d referenced by the
384 indices is local to this processor. */
385
I8(__fort_islocal)386 int I8(__fort_islocal)(F90_Desc *d, __INT_T *idxv)
387 {
388 DECL_DIM_PTRS(dd);
389 __INT_T dfmt, dx, pcoord, tidx;
390
391 if (F90_FLAGS_G(d) & __OFF_TEMPLATE)
392 return 0;
393 for (dx = 0, dfmt = DIST_DFMT_G(d); dfmt != 0; dfmt >>= DFMT__WIDTH, ++dx) {
394 SET_DIM_PTRS(dd, d, dx);
395 }
396 return 1;
397 }
398
399 /* Determine whether or not the array element referenced by the
400 indices is local to this processor. */
401
402 __LOG_T
ENTFTN(ISLOCAL,islocal)403 ENTFTN(ISLOCAL, islocal)
404 (F90_Desc *d, ...)
405 {
406 va_list va;
407 DECL_DIM_PTRS(dd);
408 __INT_T dx, idxv[MAXDIMS], pcoord, tidx;
409
410 #if defined(DEBUG)
411 if (d == NULL || F90_TAG_G(d) != __DESC)
412 __fort_abort("ISLOCAL: invalid descriptor");
413 #endif
414 va_start(va, d);
415 for (dx = 0; dx < F90_RANK_G(d); ++dx)
416 idxv[dx] = *va_arg(va, __INT_T *);
417 va_end(va);
418 return I8(__fort_islocal)(d, idxv) ? GET_DIST_TRUE_LOG : 0;
419 }
420
421 /* Determine the prime owner processor number and the local array
422 offset for the element of section d identified by the index
423 vector. */
424
I8(__fort_localize)425 void I8(__fort_localize)(F90_Desc *d, __INT_T *idxv, int *cpu, __INT_T *off)
426 {
427 DECL_DIM_PTRS(dd);
428 proc *p;
429 procdim *pd;
430 __INT_T dfmt, dx, lab, lidx, lof, offset, owner, pcoord, px, repl, tidx;
431
432 owner = DIST_PBASE_G(d);
433 offset = 0;
434 for (dx = 0, dfmt = DIST_DFMT_G(d); dx < F90_RANK_G(d);
435 dfmt >>= DFMT__WIDTH, ++dx) {
436 SET_DIM_PTRS(dd, d, dx);
437 #if defined(DEBUG)
438 if (idxv[dx] < F90_DPTR_LBOUND_G(dd) || idxv[dx] > DPTR_UBOUND_G(dd)) {
439 printf("%d localize: index %d out of bounds %d:%d rank=%d\n",
440 GET_DIST_LCPU, idxv[dx], F90_DPTR_LBOUND_G(dd), DPTR_UBOUND_G(dd),
441 F90_RANK_G(d));
442 __fort_abort((char *)0);
443 }
444 #endif
445 lidx = F90_DPTR_SSTRIDE_G(dd) * idxv[dx] + F90_DPTR_SOFFSET_G(dd);
446
447 switch (dfmt & DFMT__MASK) {
448 case DFMT_COLLAPSED:
449 offset += F90_DPTR_LSTRIDE_G(dd) * (lidx - DIST_DPTR_LAB_G(dd));
450 continue;
451
452 default:
453 __fort_abort("localize: unsupported dist-format");
454 }
455
456 /* find remote processor's lower allocated bound */
457
458 /*
459 * We calculated lab in the gen_block case ...
460 */
461
462 if (!DIST_DPTR_GEN_BLOCK_G(dd)) {
463 lab = DIST_DPTR_CLB_G(dd) + pcoord * DIST_DPTR_BLOCK_G(dd) -
464 DIST_DPTR_AOFFSET_G(dd);
465
466 if (DIST_DPTR_ASTRIDE_G(dd) != 1) {
467 if (DIST_DPTR_ASTRIDE_G(dd) < 0)
468 lab += DIST_DPTR_BLOCK_G(dd) - DIST_DPTR_CYCLE_G(dd);
469 if (DIST_DPTR_ASTRIDE_G(dd) == -1)
470 lab = -lab;
471 else
472 lab = Ceil(lab, DIST_DPTR_ASTRIDE_G(dd));
473 }
474 }
475
476 lab -= DIST_DPTR_NO_G(dd);
477
478 /* accumulate processor number and offset */
479
480 owner += DIST_DPTR_PSTRIDE_G(dd) * pcoord;
481 offset += F90_DPTR_LSTRIDE_G(dd) * (lidx - lab);
482 }
483
484 /* compensate for the "scalar subscript" component of the
485 processor number in pbase. The difference really ought to be
486 kept in a separate descriptor item. */
487
488 p = DIST_DIST_TARGET_G(d);
489 repl = DIST_REPLICATED_G(d);
490 for (px = 0; repl != 0; repl >>= 1, ++px) {
491 if (repl & 1) {
492 pd = &p->dim[px];
493 if (pd->coord > 0)
494 owner -= pd->stride * pd->coord;
495 }
496 }
497
498 *cpu = owner;
499 *off = offset;
500
501 #if defined(DEBUG)
502 if (__fort_test & DEBUG_DIST) {
503 printf("%d localize: cpu=%d off=%d + lstride=%d * (lidx=%d - lab=%d)\n",
504 GET_DIST_LCPU, owner, offset, F90_DPTR_LSTRIDE_G(dd), lidx, lab);
505 }
506 #endif
507 }
508
509 /* localize an index in dimension dim of the array described by d */
510
ENTFTN(LOCALIZE_DIM,localize_dim)511 void ENTFTN(LOCALIZE_DIM, localize_dim)(F90_Desc *d, __INT_T *dimp,
512 __INT_T *idxp, __INT_T *pcoordp,
513 __INT_T *lindexp)
514 {
515 DECL_DIM_PTRS(dd);
516 __INT_T dim, idx, lab, lidx, lof, pcoord, tidx;
517
518 dim = *dimp;
519 idx = *idxp;
520
521 SET_DIM_PTRS(dd, d, dim - 1);
522 #if defined(DEBUG)
523 if (idx < F90_DPTR_LBOUND_G(dd) || idx > DPTR_UBOUND_G(dd))
524 __fort_abort("LOCALIZE_DIM: index out of bounds");
525 #endif
526
527 lidx = F90_DPTR_SSTRIDE_G(dd) * idx + F90_DPTR_SOFFSET_G(dd);
528
529 switch (DFMT(d, dim)) {
530 case DFMT_COLLAPSED:
531 *pcoordp = 0;
532 *lindexp = lidx;
533 return;
534
535 default:
536 __fort_abort("LOCALIZE_DIM: unsupported dist-format");
537 }
538
539 /* remote proc's lab calculated for gen_block above */
540
541 if (!DIST_DPTR_GEN_BLOCK_G(dd)) {
542
543 /* find remote processor's lower allocated bound */
544
545 lab = DIST_DPTR_CLB_G(dd) + pcoord * DIST_DPTR_BLOCK_G(dd) -
546 DIST_DPTR_AOFFSET_G(dd);
547 if (DIST_DPTR_ASTRIDE_G(dd) != 1) {
548 if (DIST_DPTR_ASTRIDE_G(dd) < 0)
549 lab += DIST_DPTR_BLOCK_G(dd) - DIST_DPTR_CYCLE_G(dd);
550 if (DIST_DPTR_ASTRIDE_G(dd) == -1)
551 lab = -lab;
552 else
553 lab = Ceil(lab, DIST_DPTR_ASTRIDE_G(dd));
554 }
555 }
556 lab -= DIST_DPTR_NO_G(dd);
557
558 /* return the remote processor's coordinate and the local index on
559 this processor with the same element offset */
560
561 *pcoordp = pcoord;
562 *lindexp = lidx - lab + DIST_DPTR_LAB_G(dd);
563 }
564
565 /* Given the indices of an element of section d, return its local
566 offset or -1 if it is not local. The offset does not reflect any
567 scalar subscript. */
568
569 __INT_T
I8(__fort_local_offset)570 I8(__fort_local_offset)(F90_Desc *d, __INT_T *idxv)
571 {
572 DECL_DIM_PTRS(dd);
573 __INT_T dx, idx, lidx, offset;
574
575 if (F90_FLAGS_G(d) & __OFF_TEMPLATE)
576 return -1;
577
578 offset = F90_LBASE_G(d) - 1;
579
580 if (F90_FLAGS_G(d) & __SEQUENCE) {
581 for (dx = F90_RANK_G(d); --dx >= 0;) {
582 SET_DIM_PTRS(dd, d, dx);
583 lidx = F90_DPTR_SSTRIDE_G(dd) * idxv[dx] + F90_DPTR_SOFFSET_G(dd);
584 offset += F90_DPTR_LSTRIDE_G(dd) * lidx;
585 }
586 return offset;
587 }
588
589 for (dx = 0; dx < F90_RANK_G(d); ++dx) {
590 SET_DIM_PTRS(dd, d, dx);
591 idx = idxv[dx];
592 #if defined(DEBUG)
593 if (idx < F90_DPTR_LBOUND_G(dd) || idx > DPTR_UBOUND_G(dd))
594 __fort_abort("local_offset: index out of bounds");
595 #endif
596 lidx = F90_DPTR_SSTRIDE_G(dd) * idx + F90_DPTR_SOFFSET_G(dd);
597 offset += F90_DPTR_LSTRIDE_G(dd) * lidx;
598 }
599 #if defined(DEBUG)
600 if (__fort_test & DEBUG_DIST) {
601 printf("%d local_offset: offset=%d + lstride=%d * lidx=%d\n",
602 GET_DIST_LCPU, offset, F90_DPTR_LSTRIDE_G(dd), lidx);
603 }
604 #endif
605 return offset;
606 }
607
608 /* Given the indices of an element of section d, return its local
609 address or NULL if it is not local. The base address is assumed to
610 be adjusted for scalar subscripts. */
611
I8(__fort_local_address)612 void *I8(__fort_local_address)(void *base, F90_Desc *d, __INT_T *idxv)
613 {
614 DECL_DIM_PTRS(dd);
615 __INT_T dfmt, dx, idx, lidx, lof, offset, pcoord, tidx;
616
617 if (F90_FLAGS_G(d) & __OFF_TEMPLATE)
618 return NULL;
619
620 offset = F90_LBASE_G(d) - 1 - DIST_SCOFF_G(d);
621
622 if (F90_FLAGS_G(d) & __SEQUENCE) {
623 for (dx = F90_RANK_G(d); --dx >= 0;) {
624 SET_DIM_PTRS(dd, d, dx);
625 idx = idxv[dx];
626 lidx = F90_DPTR_SSTRIDE_G(dd) * idx + F90_DPTR_SOFFSET_G(dd);
627 offset += F90_DPTR_LSTRIDE_G(dd) * lidx;
628 }
629 return (char *)base + offset * F90_LEN_G(d);
630 }
631
632 for (dx = 0, dfmt = DIST_DFMT_G(d); dx < F90_RANK_G(d);
633 dfmt >>= DFMT__WIDTH, ++dx) {
634 SET_DIM_PTRS(dd, d, dx);
635 idx = idxv[dx];
636 #if defined(DEBUG)
637 if (DPTR_UBOUND_G(dd) < 0)
638 __fort_abort("local_address: index out of bounds");
639 #endif
640 lidx = F90_DPTR_SSTRIDE_G(dd) * idx + F90_DPTR_SOFFSET_G(dd);
641
642 switch (dfmt & DFMT__MASK) {
643 case DFMT_COLLAPSED:
644 break;
645 case DFMT_BLOCK:
646 case DFMT_BLOCK_K:
647 case DFMT_GEN_BLOCK:
648 if (idx < DIST_DPTR_OLB_G(dd) || idx > DIST_DPTR_OUB_G(dd))
649 return NULL;
650 break;
651
652 default:
653 __fort_abort("local_offset: unsupported dist-format");
654 }
655 offset += F90_DPTR_LSTRIDE_G(dd) * lidx;
656 }
657 return (char *)base + offset * F90_LEN_G(d);
658 }
659
660 /* Localize a global index in dimension dim of array a. This is only
661 necessary for dimensions with cyclic or block-cyclic distributions.
662 It is assumed that the index is local */
663
664 __INT_T
ENTFTN(LOCALIZE_INDEX,localize_index)665 ENTFTN(LOCALIZE_INDEX, localize_index)
666 (F90_Desc *d, __INT_T *dimb, __INT_T *idxb)
667 {
668 DECL_DIM_PTRS(dd);
669 int dim, idx, lidx, lof;
670
671 dim = *dimb;
672 idx = *idxb;
673 #if defined(DEBUG)
674 if (d == NULL || F90_TAG_G(d) != __DESC)
675 __fort_abort("LOCALIZE_INDEX: invalid descriptor");
676 if (dim < 1 || dim > F90_RANK_G(d))
677 __fort_abort("LOCALIZE_INDEX: invalid dimension");
678 #endif
679 SET_DIM_PTRS(dd, d, dim - 1);
680 #if defined(DEBUG)
681 if (idx < F90_DPTR_LBOUND_G(dd) || idx > DPTR_UBOUND_G(dd))
682 __fort_abort("LOCALIZE_INDEX: index out of bounds");
683 #endif
684 lidx = F90_DPTR_SSTRIDE_G(dd) * idx + F90_DPTR_SOFFSET_G(dd);
685 return lidx;
686 }
687
688 /* Given the loop bounds l, u, and s that range over dimension dim of
689 section d, return cycle loop lower bound cl, iteration count cn,
690 stride cs, initial cyclic offset lof, and cyclic offset stride los
691 that localize the loop to this processor. */
692
I8(cyclic_setup)693 static int I8(cyclic_setup)(F90_Desc *d, __INT_T dim, __INT_T l, __INT_T u,
694 __INT_T s, __INT_T *pcl, __INT_T *pcs,
695 __INT_T *plof, __INT_T *plos)
696 {
697 DECL_DIM_PTRS(dd);
698 __INT_T abstr, ck, cl, cn, cs, cs0, cu, lof, los, los0, m, n, q, r, tl, tu,
699 ts, ts0, x;
700
701 SET_DIM_PTRS(dd, d, dim - 1);
702
703 /* adjust lower bound to fall within array index range */
704
705 if (s > 0) {
706 n = F90_DPTR_LBOUND_G(dd) - l + s - 1;
707 if (n > 0) {
708 if (s != 1)
709 n /= s;
710 l += n * s;
711 }
712 } else {
713 n = DPTR_UBOUND_G(dd) - l + s + 1;
714 if (n < 0) {
715 if (s == -1)
716 n = -n;
717 else
718 n /= s;
719 l += n * s;
720 }
721 }
722
723 ts = DIST_DPTR_TSTRIDE_G(dd) * s; /* stride in ultimate template */
724
725 cs = (ts < 0) ? -DIST_DPTR_CYCLE_G(dd)
726 : DIST_DPTR_CYCLE_G(dd); /* cycle stride */
727
728 /* check for zero-trip loop, no local data, or not cyclic */
729
730 if (s > 0 ? (l > u || l > DIST_DPTR_OUB_G(dd) || u < DIST_DPTR_OLB_G(dd))
731 : (l < u || l < DIST_DPTR_OLB_G(dd) || u > DIST_DPTR_OUB_G(dd))) {
732
733 /* no local data or zero-trip loop */
734
735 cl = DIST_DPTR_CLB_G(dd);
736 cu = cl - cs;
737 cn = lof = los = 0;
738 } else
739 switch (DFMT(d, dim)) {
740
741 default:
742 __fort_abort("cyclic_setup: unsupported dist-format");
743 }
744 #if defined(DEBUG)
745 if (__fort_test & DEBUG_DIST)
746 printf("%d cyclic dim=%d %d:%d:%d -> %d:%d:%d cn=%d lof=%d los=%d\n",
747 GET_DIST_LCPU, dim, l, u, s, cl, cu, cs, cn, lof, los);
748 #endif
749 *pcl = cl;
750 *pcs = cs;
751 *plof = lof;
752 *plos = los;
753 return cn;
754 }
755
756 /* Cache the cycle loop bounds in the section descriptor. The cached
757 parameters describe loops over the entire section. */
758
I8(__fort_cycle_bounds)759 void I8(__fort_cycle_bounds)(F90_Desc *d)
760 {
761 DECL_DIM_PTRS(dd);
762 __INT_T dim;
763
764 for (dim = F90_RANK_G(d); dim > 0; --dim) {
765 if ((~DIST_CACHED_G(d) >> (dim - 1)) & 1) {
766 SET_DIM_PTRS(dd, d, dim - 1);
767 DIST_DPTR_CN_P(
768 dd, I8(cyclic_setup)(d, dim, F90_DPTR_LBOUND_G(dd), DPTR_UBOUND_G(dd),
769 1, &DIST_DPTR_CL_G(dd), &DIST_DPTR_CS_G(dd),
770 &DIST_DPTR_CLOF_G(dd), &DIST_DPTR_CLOS_G(dd)));
771 }
772 }
773 DIST_CACHED_P(d, (DIST_CACHED_G(d) | ~(-1 << F90_RANK_G(d))));
774 }
775
776 /* Set the lower and upper cycle loop bounds, cycle loop stride,
777 cyclic index offset, and cyclic offset stride and return the cycle
778 loop trip count (number of local blocks) for the loop specified by
779 l:u:s over dimension dim of section d. For a stride 1 loop over
780 the entire dimension, the cycle loop parameters are cached in the
781 descriptor. */
782
783 __INT_T
I8(__fort_cyclic_loop)784 I8(__fort_cyclic_loop)(F90_Desc *d, __INT_T dim, __INT_T l, __INT_T u,
785 __INT_T s, __INT_T *cl, __INT_T *cu, __INT_T *cs,
786 __INT_T *clof, __INT_T *clos)
787 {
788 DECL_DIM_PTRS(dd);
789 __INT_T cn, m;
790
791 #if defined(DEBUG)
792 if (d == NULL || F90_TAG_G(d) != __DESC)
793 __fort_abort("cyclic_loop: invalid descriptor");
794 if (dim < 1 || dim > F90_RANK_G(d))
795 __fort_abort("cyclic_loop: invalid dimension");
796 if (s == 0)
797 __fort_abort("cyclic_loop: invalid stride");
798 #endif
799
800 SET_DIM_PTRS(dd, d, dim - 1);
801
802 if (l == F90_DPTR_LBOUND_G(dd) && u == DPTR_UBOUND_G(dd) && s == 1) {
803
804 /* loop bounds match section bounds */
805
806 m = 1 << (dim - 1);
807 if (~DIST_CACHED_G(d) & m) {
808
809 /* not cached - store cycle loop bounds in descriptor */
810
811 DIST_DPTR_CN_P(dd,
812 I8(cyclic_setup)(d, dim, l, u, s, &DIST_DPTR_CL_G(dd),
813 &DIST_DPTR_CS_G(dd), &DIST_DPTR_CLOF_G(dd),
814 &DIST_DPTR_CLOS_G(dd)));
815 DIST_CACHED_P(d, DIST_CACHED_G(d) | m);
816 }
817
818 /* return previously cached cycle loop bounds */
819
820 *cl = DIST_DPTR_CL_G(dd);
821 *cs = DIST_DPTR_CS_G(dd);
822 *clof = DIST_DPTR_CLOF_G(dd);
823 *clos = DIST_DPTR_CLOS_G(dd);
824
825 cn = DIST_DPTR_CN_G(dd);
826 } else {
827
828 /* loop bounds don't match section bounds */
829
830 cn = I8(cyclic_setup)(d, dim, l, u, s, cl, cs, clof, clos);
831 }
832
833 *cu = *cl + (cn - 1) * (*cs);
834 return cn;
835 }
836
ENTFTN(CYCLIC_LOOP,cyclic_loop)837 void ENTFTN(CYCLIC_LOOP, cyclic_loop)(F90_Desc *d, __INT_T *dim, __INT_T *l,
838 __INT_T *u, __INT_T *s, __INT_T *cl,
839 __INT_T *cu, __INT_T *cs, __INT_T *clof,
840 __INT_T *clos)
841 {
842 __INT_T xcl, xcu, xcs, xclof, xclos;
843
844 (void)I8(__fort_cyclic_loop)(d, *dim, *l, *u, *s, &xcl, &xcu, &xcs, &xclof,
845 &xclos);
846 *cl = xcl;
847 *cu = xcu;
848 *cs = xcs;
849 *clof = xclof;
850 *clos = xclos;
851 }
852
853 /* Given loop bounds l:u:s over dimension dim of array d (not
854 necessarily spanning the entire dimension), localize the loop to
855 the local block specified by cycle index ci (which must increment
856 through the outer cycle loop in order to cover all elements in the
857 l:u:s section). Set block loop bounds bl, bu. */
858
I8(block_setup)859 void I8(block_setup)(F90_Desc *d, int dim, __INT_T l, __INT_T u, int s, int ci,
860 __INT_T *bl, __INT_T *bu)
861 {
862 DECL_DIM_PTRS(dd);
863 __INT_T bb, lob, uob, m, n;
864 #if defined(DEBUG)
865 __INT_T gl = l;
866 __INT_T gu = u;
867
868 if (d == NULL || F90_TAG_G(d) != __DESC)
869 __fort_abort("block_setup: invalid descriptor");
870 if (dim < 1 || dim > F90_RANK_G(d))
871 __fort_abort("block_setup: invalid dimension");
872 if (s == 0)
873 __fort_abort("block_setup: invalid stride");
874 #endif
875
876 SET_DIM_PTRS(dd, d, dim - 1);
877
878 /* adjust lower bound to fall within array index range */
879
880 m = s > 0 ? F90_DPTR_LBOUND_G(dd) - 1 : DPTR_UBOUND_G(dd) + 1;
881 n = m - l + s;
882 if (s != 1)
883 n /= s;
884 if (n < 0)
885 n = 0;
886 l += n * s;
887
888 switch (DFMT(d, dim)) {
889 case DFMT_COLLAPSED:
890 case DFMT_BLOCK:
891 case DFMT_BLOCK_K:
892 case DFMT_GEN_BLOCK:
893 lob = DIST_DPTR_OLB_G(dd);
894 uob = DIST_DPTR_OUB_G(dd);
895 break;
896
897 case DFMT_CYCLIC:
898 case DFMT_CYCLIC_K:
899 m = DIST_DPTR_TSTRIDE_G(dd);
900 bb = DIST_DPTR_BLOCK_G(dd) - 1;
901 if ((m ^ s) < 0)
902 bb = -bb;
903 lob = uob = ci - DIST_DPTR_TOFFSET_G(dd);
904 if (s > 0)
905 uob += bb;
906 else
907 lob += bb;
908 if (m != 1) {
909 lob = Ceil(lob, m);
910 uob = Floor(uob, m);
911 }
912 break;
913
914 default:
915 __fort_abort("block_setup: unsupported dist-format");
916 }
917
918 if (s > 0) {
919 if (l < lob) {
920 if (s != 1)
921 l += s * ((lob - l + s - 1) / s);
922 else
923 l = lob;
924 }
925 if (u > uob)
926 u = uob;
927 } else {
928 if (l > uob) {
929 if (s != -1)
930 l += s * ((uob - l + s + 1) / s);
931 else
932 l = uob;
933 }
934 if (u < lob)
935 u = lob;
936 }
937 #if defined(DEBUG)
938 if (__fort_test & DEBUG_DIST)
939 printf("%d block dim=%d %d:%d:%d ci=%d -> %d:%d:%d\n", GET_DIST_LCPU, dim,
940 gl, gu, s, ci, l, u, s);
941 #endif
942 *bl = l;
943 *bu = u;
944 }
945
ENTFTN(BLOCK_LOOP,block_loop)946 void ENTFTN(BLOCK_LOOP, block_loop)(F90_Desc *d, __INT_T *dim, __INT_T *l,
947 __INT_T *u, __INT_T *s, __INT_T *ci,
948 __INT_T *bl, __INT_T *bu)
949 {
950 __INT_T xbl, xbu;
951
952 I8(block_setup)(d, *dim, *l, *u, *s, *ci, &xbl, &xbu);
953 *bl = xbl;
954 *bu = xbu;
955 }
956
957 /* Same as block_setup, but return loop trip count. */
958
I8(__fort_block_loop)959 int I8(__fort_block_loop)(F90_Desc *d, int dim, __INT_T l, __INT_T u, int s,
960 __INT_T ci, __INT_T *bl, __INT_T *bu)
961 {
962 int bn;
963
964 I8(block_setup)(d, dim, l, u, s, ci, bl, bu);
965 bn = (*bu - *bl + s) / s;
966
967 return bn;
968 }
969
970 /* Set the lower and upper index bounds and return the number of local
971 elements for the local block at cycle index ci in dimension dim of
972 section d. Same as block_setup, but assumes a stride 1 loop over
973 the entire dimension and returns the loop trip count. */
974
975 __INT_T
I8(__fort_block_bounds)976 I8(__fort_block_bounds)(F90_Desc *d, __INT_T dim, __INT_T ci,
977 __INT_T *bl, __INT_T *bu)
978 {
979 DECL_DIM_PTRS(dd);
980
981 SET_DIM_PTRS(dd, d, dim - 1);
982 I8(block_setup)(d, dim, F90_DPTR_LBOUND_G(dd), DPTR_UBOUND_G(dd),
983 1, ci, bl, bu);
984 return *bu - *bl + 1;
985 }
986
987 /* Given loop bounds and stride that range over dimension dim of array
988 a which does NOT have a cyclic distribution, return new loop bounds
989 bl, bu that localize the bounds to this processor. */
990
ENTFTN(LOCALIZE_BOUNDS,localize_bounds)991 void ENTFTN(LOCALIZE_BOUNDS,
992 localize_bounds)(F90_Desc *d, __INT_T *gdim, __INT_T *gl,
993 __INT_T *gu, __INT_T *gs, __INT_T *bl, __INT_T *bu)
994 {
995 DECL_DIM_PTRS(dd);
996 int dim, l, u, s, m, n, lob, uob;
997
998 dim = *gdim;
999 l = *gl;
1000 u = *gu;
1001 s = *gs;
1002
1003 #if defined(DEBUG)
1004 if (d == NULL || F90_TAG_G(d) != __DESC)
1005 __fort_abort("LOCALIZE_BOUNDS: invalid descriptor");
1006 if (dim < 1 || dim > F90_RANK_G(d))
1007 __fort_abort("LOCALIZE_BOUNDS: invalid dimension");
1008 if (s == 0)
1009 __fort_abort("LOCALIZE_BOUNDS: invalid stride");
1010 #endif
1011
1012 SET_DIM_PTRS(dd, d, dim - 1);
1013
1014 /* adjust lower bound to fall within array index range */
1015
1016 m = s > 0 ? F90_DPTR_LBOUND_G(dd) - 1 : DPTR_UBOUND_G(dd) + 1;
1017 n = m - l + s;
1018 if (s != 1)
1019 n /= s;
1020 if (n < 0)
1021 n = 0;
1022 l += n * s;
1023
1024 lob = DIST_DPTR_OLB_G(dd);
1025 uob = DIST_DPTR_OUB_G(dd);
1026 if (s == 1) {
1027 if (l < lob)
1028 l = lob;
1029 if (u > uob)
1030 u = uob;
1031 } else if (s > 0) {
1032 if (l < lob)
1033 l += s * ((lob - l + s - 1) / s);
1034 if (u > uob)
1035 u = uob;
1036 } else {
1037 if (l > uob) {
1038 if (s != -1)
1039 l += s * ((uob - l + s + 1) / s);
1040 else
1041 l = uob;
1042 }
1043 if (u < lob)
1044 u = lob;
1045 }
1046 *bl = l;
1047 *bu = u;
1048 }
1049
1050 /* Create a new processor descriptor */
1051
1052 static void
proc_setup(proc * p)1053 proc_setup(proc *p)
1054 {
1055 procdim *pd;
1056 int i, m, size;
1057 char msg[80];
1058
1059 size = 1;
1060 for (i = 0; i < p->rank; ++i) {
1061 pd = &p->dim[i];
1062 pd->shape_shift = div_shift(pd->shape);
1063 pd->shape_recip = RECIP(pd->shape);
1064 pd->stride = size;
1065 size *= pd->shape;
1066 }
1067 p->size = size;
1068 if (p->base + size > GET_DIST_TCPUS) {
1069 sprintf(msg, "Too few processors. Need %d, got %d.", p->base + size,
1070 GET_DIST_TCPUS);
1071 __fort_abort(msg);
1072 }
1073 m = GET_DIST_LCPU - p->base;
1074 if (m >= 0 && m < size) {
1075 for (i = 0; i < p->rank; ++i) {
1076 pd = &p->dim[i];
1077 RECIP_DIVMOD(&m, &pd->coord, m, pd->shape);
1078 }
1079 } else {
1080 for (i = 0; i < p->rank; ++i) {
1081 pd = &p->dim[i];
1082 pd->coord = -1;
1083 }
1084 p->flags |= __OFF_TEMPLATE;
1085 }
1086 }
1087
ENTFTN(PROCESSORS,processors)1088 void ENTFTN(PROCESSORS, processors)(proc *p, __INT_T *rankp, ...)
1089 {
1090 va_list va;
1091 procdim *pd;
1092 __INT_T i, rank, shape[MAXDIMS];
1093
1094 rank = *rankp;
1095 #if defined(DEBUG)
1096 if (rank < 0 || rank > MAXDIMS)
1097 __fort_abort("PROCESSORS: invalid rank");
1098 #endif
1099 p->tag = __PROC;
1100 p->rank = rank;
1101 p->flags = 0;
1102 p->base = 0;
1103 va_start(va, rankp);
1104 for (i = 0; i < rank; ++i) {
1105 pd = &p->dim[i];
1106 pd->shape = *va_arg(va, __INT_T *);
1107 if (pd->shape < 1)
1108 __fort_abort("PROCESSORS: invalid shape");
1109 }
1110 va_end(va);
1111 proc_setup(p);
1112 }
1113
1114 /* Create a default processor grid of given rank. Factor the number
1115 of processors into the squarest possible set of rank terms, in
1116 ascending order. Default processor descriptors are cached by
1117 rank and never need to be freed. */
1118
1119 #if !defined(DESC_I8)
1120
1121 static proc *default_proc_list[MAXDIMS + 1] = {NULL};
1122
1123 #define NPRIMES 31
1124 static int prime[NPRIMES] = {2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31,
1125 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79,
1126 83, 89, 97, 101, 103, 107, 109, 113, 127};
1127
1128 proc *
__fort_defaultproc(int rank)1129 __fort_defaultproc(int rank)
1130 {
1131 proc *p;
1132 int i, k, np, power[NPRIMES], shape[MAXDIMS];
1133
1134 if (rank < 0 || rank > MAXDIMS)
1135 __fort_abort("DEFAULTPROC: invalid processor rank");
1136
1137 if (rank == 0)
1138 rank = 1; /* substitute rank 1 for rank 0 */
1139
1140 p = default_proc_list[rank];
1141 if (p != NULL)
1142 return p;
1143
1144 for (i = 0; i < rank; ++i)
1145 shape[i] = 1;
1146
1147 np = GET_DIST_TCPUS;
1148 if (rank > 1 && np > 1) {
1149
1150 /* first determine the power of each prime factor */
1151
1152 power[0] = 0; /* powers of two */
1153 while ((np & 1) == 0) {
1154 power[0]++;
1155 np >>= 1;
1156 }
1157 for (k = 1; k < NPRIMES && np >= prime[k]; ++k) {
1158 power[k] = 0;
1159 while (np % prime[k] == 0) {
1160 power[k]++;
1161 np /= prime[k];
1162 }
1163 }
1164
1165 /* now construct the shape vector, using the prime factors
1166 from largest to smallest. keep the shape vector sorted in
1167 ascending order at each step. */
1168
1169 shape[rank - 1] = np;
1170
1171 while (--k >= 0) {
1172 while (--power[k] >= 0) {
1173 shape[0] *= prime[k];
1174 for (i = 1; i < rank && shape[i - 1] > shape[i]; ++i) {
1175 int t = shape[i - 1];
1176 shape[i - 1] = shape[i];
1177 shape[i] = t;
1178 }
1179 }
1180 }
1181 } else if (rank == 1)
1182 shape[0] = np;
1183
1184 /* set up the descriptor */
1185
1186 p = (proc *)__fort_malloc(sizeof(proc) - (MAXDIMS - rank) * sizeof(procdim));
1187 p->tag = __PROC;
1188 p->rank = rank;
1189 p->flags = 0;
1190 p->base = 0;
1191 for (i = 0; i < rank; ++i)
1192 p->dim[i].shape = shape[i];
1193 proc_setup(p);
1194 default_proc_list[rank] = p;
1195 return p;
1196 }
1197
1198 /* rank 0 processor descriptor for the local processor */
1199
1200 static proc *local_proc;
1201
1202 proc *
__fort_localproc()1203 __fort_localproc()
1204 {
1205 if (local_proc == NULL) {
1206 local_proc =
1207 (proc *)__fort_malloc(sizeof(proc) - MAXDIMS * sizeof(procdim));
1208 local_proc->tag = __PROC;
1209 local_proc->rank = 0;
1210 local_proc->flags = __LOCAL;
1211 local_proc->base = GET_DIST_LCPU;
1212 proc_setup(local_proc);
1213 }
1214 return local_proc;
1215 }
1216 #endif
1217
I8(__fort_set_alignment)1218 void I8(__fort_set_alignment)(F90_Desc *d, __INT_T dim, __INT_T lbound,
1219 __INT_T ubound, __INT_T taxis, __INT_T tstride,
1220 __INT_T toffset, ...)
1221 {
1222 DECL_DIM_PTRS(dd);
1223 __INT_T extent;
1224
1225 #if defined(DEBUG)
1226 if (d == NULL || F90_TAG_G(d) != __DESC)
1227 __fort_abort("set_alignment: invalid descriptor");
1228 if (dim < 1 || dim > F90_RANK_G(d))
1229 __fort_abort("set_alignment: invalid dim");
1230 #endif
1231
1232 extent = ubound - lbound + 1;
1233 if (extent < 0) {
1234 lbound = 1;
1235 ubound = 0;
1236 extent = 0;
1237 }
1238
1239 SET_DIM_PTRS(dd, d, dim - 1);
1240
1241 F90_DPTR_LBOUND_P(dd, lbound);
1242 DPTR_UBOUND_P(dd, ubound);
1243 F90_DPTR_SSTRIDE_P(dd, 1); /* section stride */
1244 F90_DPTR_SOFFSET_P(dd, 0); /* section offset */
1245 F90_DPTR_LSTRIDE_P(dd, 0);
1246
1247 #if defined(DEBUG)
1248 if (__fort_test & DEBUG_DIST)
1249 printf("%d set_align dim=%d lb=%d ub=%d tx=%d st=%d of=%d"
1250 " tlb=%d tub=%d clb=%d cno=%d olb=%d oub=%d\n",
1251 GET_DIST_LCPU, dim, F90_DPTR_LBOUND_G(dd), DPTR_UBOUND_G(dd),
1252 DIST_DPTR_TAXIS_G(dd), DIST_DPTR_TSTRIDE_G(dd), DIST_DPTR_TOFFSET_G(dd),
1253 DIST_DPTR_TLB_G(dd), DIST_DPTR_TUB_G(dd), DIST_DPTR_CLB_G(dd),
1254 DIST_DPTR_CNO_G(dd), DIST_DPTR_OLB_G(dd), DIST_DPTR_OUB_G(dd));
1255 #endif
1256 }
1257
1258 /* Set the local bounds for dimension dim of array d to use the local
1259 storage associated with the corresponding dimension of array a. */
1260
I8(__fort_use_allocation)1261 void I8(__fort_use_allocation)(F90_Desc *d, __INT_T dim, __INT_T no, __INT_T po,
1262 F90_Desc *a)
1263 {
1264 DECL_DIM_PTRS(ad);
1265 DECL_DIM_PTRS(dd);
1266 __INT_T aextent, dextent, k, lof;
1267
1268 #if defined(DEBUG)
1269 if (d == NULL || F90_TAG_G(d) != __DESC)
1270 __fort_abort("use_allocation: invalid descriptor");
1271 if (a == NULL || F90_TAG_G(a) != __DESC)
1272 __fort_abort("use_allocation: invalid array descriptor");
1273 if (F90_RANK_G(a) != F90_RANK_G(d))
1274 __fort_abort("use_allocation: descriptor ranks differ");
1275 if (dim < 1 || dim > F90_RANK_G(d))
1276 __fort_abort("use_allocation: invalid dim");
1277 #endif
1278
1279 /* array descriptor; not template */
1280 F90_FLAGS_P(d, F90_FLAGS_G(d) & ~__TEMPLATE);
1281
1282 DIST_NONSEQUENCE_P(d, DIST_NONSEQUENCE_G(a));
1283
1284 SET_DIM_PTRS(dd, d, dim - 1);
1285 SET_DIM_PTRS(ad, a, dim - 1);
1286
1287 #if defined(DEBUG)
1288 if (F90_DPTR_SSTRIDE_G(ad) != 1)
1289 __fort_abort("use_allocation: can't use strided section");
1290 if (no > DIST_DPTR_NO_G(ad) || po > DIST_DPTR_PO_G(ad))
1291 __fort_abort("use_allocation: can't increase overlaps");
1292 #endif
1293
1294 if (DPTR_UBOUND_G(dd) < F90_DPTR_LBOUND_G(dd)) { /* zero-size */
1295 #if defined(DEBUG)
1296 if (DIST_DPTR_OUB_G(dd) != DIST_DPTR_OLB_G(dd) - 1 ||
1297 DIST_DPTR_UAB_G(dd) != DIST_DPTR_LAB_G(dd) - 1)
1298 __fort_abort("use_allocation: bad bounds for zero-size");
1299
1300 #endif
1301 DIST_DPTR_NO_P(dd, 0); /* negative overlap allowance */
1302 DIST_DPTR_PO_P(dd, 0); /* positive overlap allowance */
1303 DIST_DPTR_COFSTR_P(dd, 0); /* cyclic offset stride */
1304 } else {
1305 DIST_DPTR_NO_P(dd, no); /* negative overlap allowance */
1306 DIST_DPTR_PO_P(dd, po); /* positive overlap allowance */
1307
1308 if (~F90_FLAGS_G(a) & F90_FLAGS_G(d) & __LOCAL) { /* coercing to local */
1309 DIST_DPTR_COFSTR_P(dd, 0);
1310 k = DIST_DPTR_OLB_G(dd) - DIST_DPTR_OLB_G(ad);
1311 } else {
1312 DIST_DPTR_COFSTR_P(dd, DIST_DPTR_COFSTR_G(ad));
1313 k = F90_DPTR_LBOUND_G(dd) - F90_DPTR_LBOUND_G(ad);
1314 }
1315 k -= F90_DPTR_SOFFSET_G(ad);
1316 DIST_DPTR_LAB_P(dd, (DIST_DPTR_LAB_G(ad) + (DIST_DPTR_NO_G(ad) - no) + k));
1317 DIST_DPTR_UAB_P(dd, (DIST_DPTR_UAB_G(ad) - (DIST_DPTR_PO_G(ad) - po) + k));
1318
1319 #if defined(DEBUG)
1320 aextent = DIST_DPTR_UAB_G(ad) - DIST_DPTR_LAB_G(ad) + 1;
1321 dextent = DIST_DPTR_UAB_G(dd) - DIST_DPTR_LAB_G(dd) + 1;
1322 if (dim < F90_RANK_G(d)) {
1323 if (aextent != dextent)
1324 __fort_abort("use_allocation: allocated extent changed");
1325 } else if (dextent > aextent)
1326 __fort_abort("use_allocation: allocated extent increased");
1327 #endif
1328 }
1329
1330 #if defined(DEBUG)
1331 if (__fort_test & DEBUG_DIST)
1332 printf("%d use_alloc dim=%d lb=%d ub=%d no=%d po=%d"
1333 " lab=%d uab=%d cofstr=%d\n",
1334 GET_DIST_LCPU, dim, F90_DPTR_LBOUND_G(dd), DPTR_UBOUND_G(dd),
1335 DIST_DPTR_NO_G(dd), DIST_DPTR_PO_G(dd), DIST_DPTR_LAB_G(dd),
1336 DIST_DPTR_UAB_G(dd), DIST_DPTR_COFSTR_G(dd));
1337 #endif
1338 }
1339
1340 /* Map descriptor d onto the single/scalar-subscript coordinate idx in
1341 axis dim of template/array a. For a scalar subscript, adjust the
1342 index base offsets */
1343
I8(__fort_set_single)1344 void I8(__fort_set_single)(F90_Desc *d, F90_Desc *a, __INT_T dim, __INT_T idx,
1345 _set_single_enum what)
1346 {
1347 DECL_DIM_PTRS(ad);
1348 __INT_T k, lidx;
1349
1350 #if defined(DEBUG)
1351 if (a == NULL || F90_TAG_G(a) != __DESC)
1352 __fort_abort("set_single: invalid array descriptor");
1353 if (dim < 1 || dim > F90_RANK_G(a))
1354 __fort_abort("set_single: invalid array dimension");
1355 #endif
1356
1357 SET_DIM_PTRS(ad, a, dim - 1);
1358
1359 /* localize scalar subscript */
1360 if (what != __SINGLE) {
1361 /* adjust index base offsets */
1362 lidx = F90_DPTR_SSTRIDE_G(ad) * idx + F90_DPTR_SOFFSET_G(ad);
1363 k = F90_DPTR_LSTRIDE_G(ad) * (lidx - F90_DPTR_LBOUND_G(ad));
1364 F90_LBASE_P(d, F90_LBASE_G(d) + k - DIST_DPTR_LOFFSET_G(ad));
1365 }
1366
1367 #if defined(DEBUG)
1368 if (__fort_test & DEBUG_DIST)
1369 printf("%d set_single dim=%d idx=%d pbase=%d lbase=%d scoff=%d%s\n",
1370 GET_DIST_LCPU, dim, idx, DIST_PBASE_G(d), F90_LBASE_G(d),
1371 DIST_SCOFF_G(d),
1372 F90_FLAGS_G(d) & __OFF_TEMPLATE ? " OFF_TEMPLATE" : "");
1373 #endif
1374 }
1375
1376 /* Compute the global array size, local array size, local index
1377 multiplier and offset, and local index base offset. This routine
1378 should not be called for templates. */
1379
I8(__fort_finish_descriptor)1380 void I8(__fort_finish_descriptor)(F90_Desc *d)
1381 {
1382 DECL_DIM_PTRS(dd);
1383 __INT_T gsize, i, lextent, lsize, lbase;
1384 __INT_T rank = F90_RANK_G(d);
1385
1386 gsize = lsize = lbase = 1;
1387 for (i = 0; i < rank; ++i) {
1388 SET_DIM_PTRS(dd, d, i);
1389 gsize *= F90_DPTR_EXTENT_G(dd);
1390 F90_DPTR_LSTRIDE_P(dd, lsize);
1391 DIST_DPTR_LOFFSET_P(dd,
1392 -lsize * DIST_DPTR_LAB_G(dd)); /* local index offset */
1393 lbase -= lsize * F90_DPTR_LBOUND_G(dd);
1394 lextent = F90_DPTR_EXTENT_G(dd);
1395 if (lextent > 0)
1396 lsize *= lextent;
1397 else
1398 lsize = 0;
1399 }
1400 F90_GSIZE_P(d, gsize); /* global array size */
1401 F90_LSIZE_P(d, lsize); /* local array size */
1402 F90_LBASE_P(d, lbase);
1403
1404 /* global heap block multiplier is the per-processor heap block
1405 size divided by the data item length */
1406
1407 if (__fort_heap_block > 0 && F90_LEN_G(d) > 0) {
1408 if (F90_KIND_G(d) == __STR || F90_KIND_G(d) == __DERIVED ||
1409 F90_KIND_G(d) == __NONE)
1410 DIST_HEAPB_P(d, __fort_heap_block / F90_LEN_G(d));
1411 else
1412 DIST_HEAPB_P(d, __fort_heap_block >> GET_DIST_SHIFTS(F90_KIND_G(d)));
1413 if (DIST_HEAPB_G(d) <= 0)
1414 __fort_abort("heap block overflow; -heapz too large");
1415 } else
1416 DIST_HEAPB_P(d, 0);
1417 }
1418
1419 /* Map axis ddim of descriptor d onto section l:u:s of dimension adim
1420 of array a. */
1421
1422 /* for F90 */
I8(__fort_set_sectionx)1423 void I8(__fort_set_sectionx)(F90_Desc *d, __INT_T ddim, F90_Desc *a,
1424 __INT_T adim, __INT_T l, __INT_T u, __INT_T s,
1425 __INT_T noreindex)
1426 {
1427 DECL_DIM_PTRS(dd);
1428 DECL_DIM_PTRS(ad);
1429 __INT_T extent, myoffset;
1430
1431 #if defined(DEBUG)
1432 if (a == NULL || F90_TAG_G(a) != __DESC)
1433 __fort_abort("set_section: invalid array descriptor");
1434 if (adim < 1 || adim > F90_RANK_G(a))
1435 __fort_abort("set_section: invalid array dimension");
1436 #endif
1437
1438 SET_DIM_PTRS(ad, a, adim - 1);
1439 SET_DIM_PTRS(dd, d, ddim - 1);
1440
1441 #if defined(DEBUG)
1442 if ((F90_FLAGS_G(d) & (__SEQUENCE | __ASSUMED_SIZE | __BOGUSBOUNDS)) == 0 &&
1443 (l < F90_DPTR_LBOUND_G(ad) || u > DPTR_UBOUND_G(ad)))
1444 __fort_abort("set_section: index out of bounds");
1445 #endif
1446
1447 extent = u - l + s; /* section extent */
1448 if (s != 1) {
1449 if (s == -1)
1450 extent = -extent;
1451 else
1452 extent /= s;
1453 }
1454 if (extent < 0)
1455 extent = 0;
1456
1457 if (noreindex && s == 1) {
1458 F90_DPTR_LBOUND_P(dd, l); /* lower bound */
1459 DPTR_UBOUND_P(dd, extent == 0 ? l - 1 : u); /* upper bound */
1460 myoffset = 0;
1461 } else {
1462 F90_DPTR_LBOUND_P(dd, 1); /* lower bound */
1463 DPTR_UBOUND_P(dd, extent); /* upper bound */
1464 myoffset = l - s;
1465 }
1466
1467 /* adjust section stride and offset; local array index mapping is
1468 unchanged */
1469
1470 /* no longer need section stride/section offset */
1471 F90_DPTR_SSTRIDE_P(dd, F90_DPTR_SSTRIDE_G(ad));
1472 F90_DPTR_SOFFSET_P(dd, (F90_DPTR_SOFFSET_G(ad)));
1473 F90_DPTR_LSTRIDE_P(dd, F90_DPTR_LSTRIDE_G(ad) * s);
1474
1475 #if defined(DEBUG)
1476 if (__fort_test & DEBUG_DIST)
1477 printf("%d set_section %d(%d:%d)->%d(%d:%d:%d) o(%d:%d) a(%d:%d)"
1478 " %dx+%d lbase=%d scoff=%d F90_DPTR_SOFFSET_G(ad)=%d"
1479 " F90_DPTR_SOFFSET_G(dd)=%d\n",
1480 GET_DIST_LCPU, ddim, F90_DPTR_LBOUND_G(dd), DPTR_UBOUND_G(dd), adim,
1481 l, u, s, DIST_DPTR_OLB_G(dd), DIST_DPTR_OUB_G(dd), DIST_DPTR_LAB_G(dd),
1482 DIST_DPTR_UAB_G(dd), F90_DPTR_LSTRIDE_G(dd), DIST_DPTR_LOFFSET_G(dd),
1483 F90_LBASE_G(d), DIST_SCOFF_G(d), F90_DPTR_SOFFSET_G(ad),
1484 F90_DPTR_SOFFSET_G(dd));
1485 #endif
1486 }
1487
I8(__fort_set_section)1488 void I8(__fort_set_section)(F90_Desc *d, __INT_T ddim, F90_Desc *a,
1489 __INT_T adim, __INT_T l, __INT_T u, __INT_T s)
1490 {
1491 __DIST_SET_SECTIONX(d, ddim, a, adim, l, u, s, 0);
1492 }
1493
1494 /* Compute the global section size. Scalar subscript offset, local
1495 base, and local index offsets were adjusted in set_section. Local
1496 size does not change. */
1497
I8(__fort_finish_section)1498 void I8(__fort_finish_section)(F90_Desc *d)
1499 {
1500 DECL_DIM_PTRS(dd);
1501 __INT_T gsize, i;
1502 __INT_T rank = F90_RANK_G(d);
1503
1504 if (DIST_NONSEQUENCE_G(d))
1505 F90_FLAGS_P(d, F90_FLAGS_G(d) & ~__SEQUENCE);
1506
1507 gsize = 1;
1508 for (i = 0; i < rank; ++i) {
1509 gsize *= F90_DIM_EXTENT_G(d, i);
1510 }
1511 F90_GSIZE_P(d, gsize); /* global section size */
1512 }
1513
1514 /* Create a new descriptor for a section of array a. Variable length
1515 argument list gives lower, upper, and stride for each array
1516 dimension followed by a bitmask indicating vector dimensions (not
1517 scalar subscripts.) */
1518
1519 #define BOGUSFLAG 0x100
1520
1521 /* for F90 */
ENTFTN(SECT,sect)1522 void ENTFTN(SECT, sect)(F90_Desc *d, F90_Desc *a,
1523 ...) /* ... = {lower, upper, stride,}* flags */
1524 {
1525 va_list va;
1526 DECL_DIM_PTRS(ad);
1527 DECL_DIM_PTRS(dd);
1528 __INT_T ax, dx, flags, rank;
1529 __INT_T lower[MAXDIMS], upper[MAXDIMS], stride[MAXDIMS];
1530 __INT_T gsize;
1531 __INT_T wrk_rank;
1532
1533 #if defined(DEBUG)
1534 if (d == NULL)
1535 __fort_abort("SECT: missing section descriptor");
1536 if (a == NULL || F90_TAG_G(a) != __DESC)
1537 __fort_abort("SECT: invalid array descriptor");
1538 #endif
1539
1540 /* get bounds, strides, and flags argument */
1541
1542 va_start(va, a);
1543 wrk_rank = F90_RANK_G(a);
1544 for (ax = 0; ax < wrk_rank; ++ax) {
1545 lower[ax] = *va_arg(va, __INT_T *);
1546 upper[ax] = *va_arg(va, __INT_T *);
1547 stride[ax] = *va_arg(va, __INT_T *);
1548 }
1549 flags = *va_arg(va, __INT_T *);
1550 va_end(va);
1551
1552 /* determine section rank - popcnt of flags bits */
1553
1554 #if MAXDIMS != 7
1555 __fort_abort("SECT: need to recode for different MAXDIMS");
1556 #endif
1557 rank = (flags & 0x55) + (flags >> 1 & 0x15);
1558 rank = (rank & 0x33) + (rank >> 2 & 0x13);
1559 rank += rank >> 4;
1560 rank &= 0x7;
1561
1562 /* initialize descriptor */
1563
1564 SET_F90_DIST_DESC_PTR(d, rank);
1565 __DIST_INIT_SECTION(d, rank, a);
1566 if (F90_LEN_G(d) == GET_DIST_SIZE_OF(F90_KIND_G(d)))
1567 F90_FLAGS_P(d, F90_FLAGS_G(d) | __SEQUENTIAL_SECTION);
1568
1569 /* bogus bounds: defer section setup until copy */
1570
1571 gsize = 1;
1572 if (flags & BOGUSFLAG) {
1573 F90_FLAGS_P(d, F90_FLAGS_G(d) | __BOGUSBOUNDS);
1574 wrk_rank = F90_RANK_G(a);
1575 for (dx = 0, ax = 1; ax <= wrk_rank; ++ax) {
1576 if ((flags >> (ax - 1)) & 1) {
1577 SET_DIM_PTRS(dd, d, dx);
1578 dx++;
1579 SET_DIM_PTRS(ad, a, ax - 1);
1580 F90_DPTR_LBOUND_P(dd, lower[ax - 1]);
1581 DPTR_UBOUND_P(dd, upper[ax - 1]);
1582 F90_DPTR_SSTRIDE_P(dd, stride[ax - 1]);
1583 if (F90_DPTR_SSTRIDE_G(dd) != 1 || F90_DPTR_LSTRIDE_G(dd) != gsize) {
1584 F90_FLAGS_P(d, (F90_FLAGS_G(d) & ~__SEQUENTIAL_SECTION));
1585 }
1586 gsize *= F90_DPTR_EXTENT_G(dd);
1587 } else
1588 I8(__fort_set_single)(d, a, ax, lower[ax - 1], __SCALAR);
1589 }
1590 F90_GSIZE_P(d, gsize); /* global section size */
1591 return;
1592 }
1593
1594 /* normal section : set up each dimension and compute GSIZE*/
1595
1596 wrk_rank = F90_RANK_G(a);
1597 for (dx = 0, ax = 1; ax <= wrk_rank; ++ax) {
1598 if ((flags >> (ax - 1)) & 1) {
1599 dx++;
1600 __DIST_SET_SECTIONXX(d, dx, a, ax, lower[ax - 1], upper[ax - 1],
1601 stride[ax - 1], (flags & __NOREINDEX), gsize);
1602 } else {
1603 I8(__fort_set_single)(d, a, ax, lower[ax - 1], __SCALAR);
1604 }
1605 }
1606 /* no longer need section stride/section offset */
1607 F90_GSIZE_P(d, gsize); /* global section size */
1608 }
1609
1610 /* ASECTION invokes DIST_SET_SECTIONXX to
1611 * set bounds, strides; dx and gsize are updated directly */
1612 #define ASECTION(d, dx, a, ax, lb, ub, st, gsize, flags) \
1613 if (flags & (1 << (ax - 1))) { \
1614 dx++; \
1615 __DIST_SET_SECTIONXX(d, dx, a, ax, lb, ub, st, (flags & __NOREINDEX), \
1616 gsize); \
1617 } else \
1618 I8(__fort_set_single)(d, a, ax, lb, __SCALAR);
1619
1620 /* TSECTION is used when the address to be used
1621 * is the address of the first element of the section */
1622 #define TSECTION(d, dx, a, ax, lb, ub, st, gsize, flags) \
1623 if (flags & (1 << (ax - 1))) { \
1624 DECL_DIM_PTRS(__dd); \
1625 DECL_DIM_PTRS(__ad); \
1626 __INT_T __extent, __myoffset, u, l, s; \
1627 dx++; \
1628 SET_DIM_PTRS(__ad, a, ax - 1); \
1629 SET_DIM_PTRS(__dd, d, dx - 1); \
1630 l = lb; \
1631 u = ub; \
1632 s = st; \
1633 __extent = u - l + s; /* section extent */ \
1634 if (s != 1) { \
1635 if (s == -1) { \
1636 __extent = -__extent; \
1637 } else { \
1638 __extent /= s; \
1639 } \
1640 } \
1641 if (__extent < 0) { \
1642 __extent = 0; \
1643 } \
1644 F90_DPTR_LBOUND_P(__dd, 1); \
1645 DPTR_UBOUND_P(__dd, __extent); \
1646 F90_DPTR_SSTRIDE_P(__dd, 1); \
1647 F90_DPTR_SOFFSET_P(__dd, 0); \
1648 F90_DPTR_LSTRIDE_P(__dd, F90_DPTR_LSTRIDE_G(__ad) * s); \
1649 F90_LBASE_P(d, F90_LBASE_G(d) - F90_DPTR_LSTRIDE_G(__dd)); \
1650 if (F90_DPTR_LSTRIDE_G(__dd) != gsize) \
1651 F90_FLAGS_P(d, (F90_FLAGS_G(d) & ~__SEQUENTIAL_SECTION)); \
1652 gsize *= __extent; \
1653 }
1654
1655 /* for F90 */
ENTF90(SECT,sect)1656 void ENTF90(SECT, sect)(F90_Desc *d, F90_Desc *a, __INT_T *prank,
1657 ...) /* ... = {lower, upper, stride,}* flags */
1658 {
1659 va_list va;
1660 DECL_DIM_PTRS(ad);
1661 DECL_DIM_PTRS(dd);
1662 __INT_T ax, dx, flags, rank;
1663 __INT_T lower[MAXDIMS], upper[MAXDIMS], stride[MAXDIMS];
1664 __INT_T gsize;
1665 __INT_T wrk_rank;
1666
1667 #if defined(DEBUG)
1668 if (d == NULL)
1669 __fort_abort("SECT: missing section descriptor");
1670 if (a == NULL || F90_TAG_G(a) != __DESC)
1671 __fort_abort("SECT: invalid array descriptor");
1672 #endif
1673
1674 /* get bounds, strides, and flags argument */
1675
1676 va_start(va, prank);
1677 wrk_rank = *prank;
1678 for (ax = 0; ax < wrk_rank; ++ax) {
1679 lower[ax] = *va_arg(va, __INT_T *);
1680 upper[ax] = *va_arg(va, __INT_T *);
1681 stride[ax] = *va_arg(va, __INT_T *);
1682 }
1683 flags = *va_arg(va, __INT_T *);
1684 va_end(va);
1685
1686 /* determine section rank - popcnt of flags bits */
1687
1688 #if MAXDIMS != 7
1689 __fort_abort("SECT: need to recode for different MAXDIMS");
1690 #endif
1691 rank = (flags & 0x55) + (flags >> 1 & 0x15);
1692 rank = (rank & 0x33) + (rank >> 2 & 0x13);
1693 rank += rank >> 4;
1694 rank &= 0x7;
1695
1696 /* initialize descriptor */
1697
1698 SET_F90_DIST_DESC_PTR(d, rank);
1699 __DIST_INIT_SECTION(d, rank, a);
1700 if (F90_LEN_G(d) == GET_DIST_SIZE_OF(F90_KIND_G(d)))
1701 F90_FLAGS_P(d, F90_FLAGS_G(d) | __SEQUENTIAL_SECTION);
1702
1703 /* bogus bounds: defer section setup until copy */
1704
1705 gsize = 1;
1706 if (flags & BOGUSFLAG) {
1707 F90_FLAGS_P(d, F90_FLAGS_G(d) | __BOGUSBOUNDS);
1708 wrk_rank = F90_RANK_G(a);
1709 for (dx = 0, ax = 1; ax <= wrk_rank; ++ax) {
1710 if ((flags >> (ax - 1)) & 1) {
1711 SET_DIM_PTRS(dd, d, dx);
1712 dx++;
1713 SET_DIM_PTRS(ad, a, ax - 1);
1714 F90_DPTR_LBOUND_P(dd, lower[ax - 1]);
1715 DPTR_UBOUND_P(dd, upper[ax - 1]);
1716 F90_DPTR_SSTRIDE_P(dd, stride[ax - 1]);
1717 if (F90_DPTR_SSTRIDE_G(dd) != 1 || F90_DPTR_LSTRIDE_G(dd) != gsize) {
1718 F90_FLAGS_P(d, (F90_FLAGS_G(d) & ~__SEQUENTIAL_SECTION));
1719 }
1720 gsize *= F90_DPTR_EXTENT_G(dd);
1721 } else
1722 I8(__fort_set_single)(d, a, ax, lower[ax - 1], __SCALAR);
1723 }
1724 F90_GSIZE_P(d, gsize); /* global section size */
1725 F90_LSIZE_P(d, gsize); /* global section size */
1726 return;
1727 }
1728
1729 /* normal section : set up each dimension and compute GSIZE*/
1730
1731 wrk_rank = F90_RANK_G(a);
1732 if (flags & __SECTZBASE) {
1733 F90_LBASE_P(d, 1);
1734 for (dx = 0, ax = 1; ax <= wrk_rank; ++ax) {
1735 TSECTION(d, dx, a, ax, lower[ax - 1], upper[ax - 1], stride[ax - 1],
1736 gsize, flags);
1737 }
1738 } else {
1739 for (dx = 0, ax = 1; ax <= wrk_rank; ++ax) {
1740 ASECTION(d, dx, a, ax, lower[ax - 1], upper[ax - 1], stride[ax - 1],
1741 gsize, flags);
1742 }
1743 }
1744 /* no longer need section stride/section offset */
1745 F90_GSIZE_P(d, gsize); /* global section size */
1746 F90_LSIZE_P(d, gsize); /* global section size */
1747 }
1748
1749 /* for F90 */
ENTF90(SECT1,sect1)1750 void ENTF90(SECT1, sect1)(F90_Desc *d, F90_Desc *a, __INT_T *prank,
1751 /* ... = {lower, upper, stride,}* flags */
1752 __INT_T *lw0, __INT_T *up0, __INT_T *st0,
1753 __INT_T *bfg)
1754 {
1755 DECL_DIM_PTRS(ad);
1756 DECL_DIM_PTRS(dd);
1757 __INT_T ax, dx, flags, rank;
1758 __INT_T gsize;
1759 __INT_T wrk_rank;
1760
1761 #if defined(DEBUG)
1762 if (d == NULL)
1763 __fort_abort("SECT: missing section descriptor");
1764 if (a == NULL || F90_TAG_G(a) != __DESC)
1765 __fort_abort("SECT: invalid array descriptor");
1766 #endif
1767
1768 /* get flags argument */
1769
1770 flags = *bfg;
1771
1772 #if MAXDIMS != 7
1773 __fort_abort("SECT: need to recode for different MAXDIMS");
1774 #endif
1775 /* determine section rank - popcnt of flags bits */
1776 /* rank is at most 1 */
1777 rank = (flags & 0x1);
1778
1779 /* initialize descriptor */
1780
1781 SET_F90_DIST_DESC_PTR(d, rank);
1782 __DIST_INIT_SECTION(d, rank, a);
1783
1784 /* bogus bounds: defer section setup until copy */
1785
1786 gsize = 1;
1787 if (flags & BOGUSFLAG) {
1788 __INT_T lower[1], upper[1], stride[1];
1789 lower[0] = *lw0;
1790 upper[0] = *up0;
1791 stride[0] = *st0;
1792
1793 F90_FLAGS_P(d, F90_FLAGS_G(d) | __BOGUSBOUNDS);
1794 wrk_rank = F90_RANK_G(a);
1795 for (dx = 0, ax = 1; ax <= wrk_rank; ++ax) {
1796 if ((flags >> (ax - 1)) & 1) {
1797 SET_DIM_PTRS(dd, d, dx);
1798 dx++;
1799 SET_DIM_PTRS(ad, a, ax - 1);
1800 F90_DPTR_LBOUND_P(dd, lower[ax - 1]);
1801 DPTR_UBOUND_P(dd, upper[ax - 1]);
1802 F90_DPTR_SSTRIDE_P(dd, stride[ax - 1]);
1803 if (F90_DPTR_SSTRIDE_G(dd) != 1 || F90_DPTR_LSTRIDE_G(dd) != gsize) {
1804 F90_FLAGS_P(d, (F90_FLAGS_G(d) & ~__SEQUENTIAL_SECTION));
1805 }
1806 gsize *= F90_DPTR_EXTENT_G(dd);
1807 } else
1808 I8(__fort_set_single)(d, a, ax, lower[ax - 1], __SCALAR);
1809 }
1810 F90_GSIZE_P(d, gsize); /* global section size */
1811 F90_LSIZE_P(d, gsize); /* global section size */
1812 return;
1813 }
1814
1815 dx = 0;
1816 if (flags & __SECTZBASE) {
1817 F90_LBASE_P(d, 1);
1818 TSECTION(d, dx, a, 1, *lw0, *up0, *st0, gsize, flags);
1819 } else {
1820 ASECTION(d, dx, a, 1, *lw0, *up0, *st0, gsize, flags);
1821 }
1822
1823 /* no longer need section stride/section offset */
1824 F90_GSIZE_P(d, gsize); /* global section size */
1825 F90_LSIZE_P(d, gsize); /* global section size */
1826 }
1827
1828 /* for F90 */
ENTF90(SECT1v,sect1v)1829 void ENTF90(SECT1v, sect1v)(F90_Desc *d, F90_Desc *a, __INT_T *prank,
1830 /* ... = {lower, upper, stride,}* flags */
1831 __INT_T lw0, __INT_T up0, __INT_T st0,
1832 __INT_T flags)
1833 {
1834 DECL_DIM_PTRS(ad);
1835 DECL_DIM_PTRS(dd);
1836 __INT_T ax, dx, rank;
1837 __INT_T gsize;
1838 __INT_T wrk_rank;
1839
1840 #if defined(DEBUG)
1841 if (d == NULL)
1842 __fort_abort("SECT: missing section descriptor");
1843 if (a == NULL || F90_TAG_G(a) != __DESC)
1844 __fort_abort("SECT: invalid array descriptor");
1845 #endif
1846
1847 /* determine section rank - popcnt of flags bits */
1848
1849 /* rank is at most 1 */
1850 rank = (flags & 0x1);
1851
1852 /* initialize descriptor */
1853
1854 SET_F90_DIST_DESC_PTR(d, rank);
1855 __DIST_INIT_SECTION(d, rank, a);
1856 gsize = 1;
1857
1858 /* bogus bounds: defer section setup until copy */
1859
1860 if (flags & BOGUSFLAG) {
1861 __INT_T lower[1], upper[1], stride[1];
1862 lower[0] = lw0;
1863 upper[0] = up0;
1864 stride[0] = st0;
1865
1866 F90_FLAGS_P(d, F90_FLAGS_G(d) | __BOGUSBOUNDS);
1867 wrk_rank = F90_RANK_G(a);
1868 for (dx = 0, ax = 1; ax <= wrk_rank; ++ax) {
1869 if ((flags >> (ax - 1)) & 1) {
1870 SET_DIM_PTRS(dd, d, dx);
1871 dx++;
1872 SET_DIM_PTRS(ad, a, ax - 1);
1873 F90_DPTR_LBOUND_P(dd, lower[ax - 1]);
1874 DPTR_UBOUND_P(dd, upper[ax - 1]);
1875 F90_DPTR_SSTRIDE_P(dd, stride[ax - 1]);
1876 if (F90_DPTR_SSTRIDE_G(dd) != 1 || F90_DPTR_LSTRIDE_G(dd) != gsize) {
1877 F90_FLAGS_P(d, (F90_FLAGS_G(d) & ~__SEQUENTIAL_SECTION));
1878 }
1879 gsize *= F90_DPTR_EXTENT_G(dd);
1880 } else
1881 I8(__fort_set_single)(d, a, ax, lower[ax - 1], __SCALAR);
1882 }
1883 F90_GSIZE_P(d, gsize); /* global section size */
1884 F90_LSIZE_P(d, gsize); /* global section size */
1885 return;
1886 }
1887
1888 /* normal section : set up each dimension and compute GSIZE*/
1889
1890 dx = 0;
1891 if (flags & __SECTZBASE) {
1892 F90_LBASE_P(d, 1);
1893 TSECTION(d, dx, a, 1, lw0, up0, st0, gsize, flags);
1894 } else {
1895 ASECTION(d, dx, a, 1, lw0, up0, st0, gsize, flags);
1896 }
1897
1898 /* no longer need section stride/section offset */
1899 F90_GSIZE_P(d, gsize); /* global section size */
1900 F90_LSIZE_P(d, gsize); /* global section size */
1901 }
1902
1903 /* for F90 */
ENTF90(SECT2,sect2)1904 void ENTF90(SECT2, sect2)(F90_Desc *d, F90_Desc *a, __INT_T *prank,
1905 /* ... = {lower, upper, stride,}* flags */
1906 __INT_T *lw0, __INT_T *up0, __INT_T *st0,
1907 __INT_T *lw1, __INT_T *up1, __INT_T *st1,
1908 __INT_T *bfg)
1909 {
1910 DECL_DIM_PTRS(ad);
1911 DECL_DIM_PTRS(dd);
1912 __INT_T ax, dx, flags, rank;
1913 __INT_T gsize;
1914 __INT_T wrk_rank;
1915
1916 #if defined(DEBUG)
1917 if (d == NULL)
1918 __fort_abort("SECT: missing section descriptor");
1919 if (a == NULL || F90_TAG_G(a) != __DESC)
1920 __fort_abort("SECT: invalid array descriptor");
1921 #endif
1922
1923 /* get flags argument */
1924
1925 flags = *bfg;
1926
1927 #if MAXDIMS != 7
1928 __fort_abort("SECT: need to recode for different MAXDIMS");
1929 #endif
1930 /* determine section rank - popcnt of flags bits */
1931 /* rank is at most 2 */
1932 rank = (flags & 0x1) + (flags >> 1 & 0x1);
1933
1934 /* initialize descriptor */
1935
1936 SET_F90_DIST_DESC_PTR(d, rank);
1937 __DIST_INIT_SECTION(d, rank, a);
1938
1939 /* bogus bounds: defer section setup until copy */
1940
1941 gsize = 1;
1942 if (flags & BOGUSFLAG) {
1943 __INT_T lower[2], upper[2], stride[2];
1944 lower[0] = *lw0;
1945 upper[0] = *up0;
1946 stride[0] = *st0;
1947 lower[1] = *lw1;
1948 upper[1] = *up1;
1949 stride[1] = *st1;
1950
1951 F90_FLAGS_P(d, F90_FLAGS_G(d) | __BOGUSBOUNDS);
1952 wrk_rank = F90_RANK_G(a);
1953 for (dx = 0, ax = 1; ax <= wrk_rank; ++ax) {
1954 if ((flags >> (ax - 1)) & 1) {
1955 SET_DIM_PTRS(dd, d, dx);
1956 dx++;
1957 SET_DIM_PTRS(ad, a, ax - 1);
1958 F90_DPTR_LBOUND_P(dd, lower[ax - 1]);
1959 DPTR_UBOUND_P(dd, upper[ax - 1]);
1960 F90_DPTR_SSTRIDE_P(dd, stride[ax - 1]);
1961 if (F90_DPTR_SSTRIDE_G(dd) != 1 || F90_DPTR_LSTRIDE_G(dd) != gsize) {
1962 F90_FLAGS_P(d, (F90_FLAGS_G(d) & ~__SEQUENTIAL_SECTION));
1963 }
1964 gsize *= F90_DPTR_EXTENT_G(dd);
1965 } else
1966 I8(__fort_set_single)(d, a, ax, lower[ax - 1], __SCALAR);
1967 }
1968 F90_GSIZE_P(d, gsize); /* global section size */
1969 F90_LSIZE_P(d, gsize); /* global section size */
1970 return;
1971 }
1972
1973 dx = 0;
1974 if (flags & __SECTZBASE) {
1975 F90_LBASE_P(d, 1);
1976 TSECTION(d, dx, a, 1, *lw0, *up0, *st0, gsize, flags);
1977 TSECTION(d, dx, a, 2, *lw1, *up1, *st1, gsize, flags);
1978 } else {
1979 ASECTION(d, dx, a, 1, *lw0, *up0, *st0, gsize, flags);
1980 ASECTION(d, dx, a, 2, *lw1, *up1, *st1, gsize, flags);
1981 }
1982
1983 /* no longer need section stride/section offset */
1984 F90_GSIZE_P(d, gsize); /* global section size */
1985 F90_LSIZE_P(d, gsize); /* global section size */
1986 }
1987
1988 /* for F90 */
ENTF90(SECT2v,sect2v)1989 void ENTF90(SECT2v, sect2v)(F90_Desc *d, F90_Desc *a, __INT_T *prank,
1990 /* ... = {lower, upper, stride,}* flags */
1991 __INT_T lw0, __INT_T up0, __INT_T st0, __INT_T lw1,
1992 __INT_T up1, __INT_T st1, __INT_T flags)
1993 {
1994 DECL_DIM_PTRS(ad);
1995 DECL_DIM_PTRS(dd);
1996 __INT_T ax, dx, rank;
1997 __INT_T gsize;
1998 __INT_T wrk_rank;
1999
2000 #if defined(DEBUG)
2001 if (d == NULL)
2002 __fort_abort("SECT: missing section descriptor");
2003 if (a == NULL || F90_TAG_G(a) != __DESC)
2004 __fort_abort("SECT: invalid array descriptor");
2005 #endif
2006
2007 /* determine section rank - popcnt of flags bits */
2008
2009 /* rank is at most 2 */
2010 rank = (flags & 0x1) + (flags >> 1 & 0x1);
2011
2012 /* initialize descriptor */
2013
2014 SET_F90_DIST_DESC_PTR(d, rank);
2015 __DIST_INIT_SECTION(d, rank, a);
2016 gsize = 1;
2017
2018 /* bogus bounds: defer section setup until copy */
2019
2020 if (flags & BOGUSFLAG) {
2021 __INT_T lower[2], upper[2], stride[2];
2022 lower[0] = lw0;
2023 upper[0] = up0;
2024 stride[0] = st0;
2025 lower[1] = lw1;
2026 upper[1] = up1;
2027 stride[1] = st1;
2028
2029 F90_FLAGS_P(d, F90_FLAGS_G(d) | __BOGUSBOUNDS);
2030 wrk_rank = F90_RANK_G(a);
2031 for (dx = 0, ax = 1; ax <= wrk_rank; ++ax) {
2032 if ((flags >> (ax - 1)) & 1) {
2033 SET_DIM_PTRS(dd, d, dx);
2034 dx++;
2035 SET_DIM_PTRS(ad, a, ax - 1);
2036 F90_DPTR_LBOUND_P(dd, lower[ax - 1]);
2037 DPTR_UBOUND_P(dd, upper[ax - 1]);
2038 F90_DPTR_SSTRIDE_P(dd, stride[ax - 1]);
2039 if (F90_DPTR_SSTRIDE_G(dd) != 1 || F90_DPTR_LSTRIDE_G(dd) != gsize) {
2040 F90_FLAGS_P(d, (F90_FLAGS_G(d) & ~__SEQUENTIAL_SECTION));
2041 }
2042 gsize *= F90_DPTR_EXTENT_G(dd);
2043 } else
2044 I8(__fort_set_single)(d, a, ax, lower[ax - 1], __SCALAR);
2045 }
2046 F90_GSIZE_P(d, gsize); /* global section size */
2047 F90_LSIZE_P(d, gsize); /* global section size */
2048 return;
2049 }
2050
2051 /* normal section : set up each dimension and compute GSIZE*/
2052
2053 dx = 0;
2054 if (flags & __SECTZBASE) {
2055 F90_LBASE_P(d, 1);
2056 TSECTION(d, dx, a, 1, lw0, up0, st0, gsize, flags);
2057 TSECTION(d, dx, a, 2, lw1, up1, st1, gsize, flags);
2058 } else {
2059 ASECTION(d, dx, a, 1, lw0, up0, st0, gsize, flags);
2060 ASECTION(d, dx, a, 2, lw1, up1, st1, gsize, flags);
2061 }
2062
2063 /* no longer need section stride/section offset */
2064 F90_GSIZE_P(d, gsize); /* global section size */
2065 F90_LSIZE_P(d, gsize); /* global section size */
2066 }
2067
2068 /* for F90 */
ENTF90(SECT3,sect3)2069 void ENTF90(SECT3, sect3)(F90_Desc *d, F90_Desc *a, __INT_T *prank,
2070 /* ... = {lower, upper, stride,}* flags */
2071 __INT_T *lw0, __INT_T *up0, __INT_T *st0,
2072 __INT_T *lw1, __INT_T *up1, __INT_T *st1,
2073 __INT_T *lw2, __INT_T *up2, __INT_T *st2,
2074 __INT_T *bfg)
2075 {
2076 DECL_DIM_PTRS(ad);
2077 DECL_DIM_PTRS(dd);
2078 __INT_T ax, dx, flags, rank;
2079 __INT_T gsize;
2080 __INT_T wrk_rank;
2081
2082 #if defined(DEBUG)
2083 if (d == NULL)
2084 __fort_abort("SECT: missing section descriptor");
2085 if (a == NULL || F90_TAG_G(a) != __DESC)
2086 __fort_abort("SECT: invalid array descriptor");
2087 #endif
2088
2089 /* get flags argument */
2090
2091 flags = *bfg;
2092
2093 #if MAXDIMS != 7
2094 __fort_abort("SECT: need to recode for different MAXDIMS");
2095 #endif
2096 /* determine section rank - popcnt of flags bits */
2097 /* rank is at most 3 */
2098 rank = (flags & 0x5) + (flags >> 1 & 0x1);
2099 rank = (rank & 0x3) + (rank >> 2 & 0x1);
2100
2101 /* initialize descriptor */
2102
2103 SET_F90_DIST_DESC_PTR(d, rank);
2104 __DIST_INIT_SECTION(d, rank, a);
2105
2106 /* bogus bounds: defer section setup until copy */
2107
2108 gsize = 1;
2109 if (flags & BOGUSFLAG) {
2110 __INT_T lower[MAXDIMS], upper[MAXDIMS], stride[MAXDIMS];
2111 lower[0] = *lw0;
2112 upper[0] = *up0;
2113 stride[0] = *st0;
2114 lower[1] = *lw1;
2115 upper[1] = *up1;
2116 stride[1] = *st1;
2117 lower[2] = *lw2;
2118 upper[2] = *up2;
2119 stride[2] = *st2;
2120
2121 F90_FLAGS_P(d, F90_FLAGS_G(d) | __BOGUSBOUNDS);
2122 wrk_rank = F90_RANK_G(a);
2123 for (dx = 0, ax = 1; ax <= wrk_rank; ++ax) {
2124 if ((flags >> (ax - 1)) & 1) {
2125 SET_DIM_PTRS(dd, d, dx);
2126 dx++;
2127 SET_DIM_PTRS(ad, a, ax - 1);
2128 F90_DPTR_LBOUND_P(dd, lower[ax - 1]);
2129 DPTR_UBOUND_P(dd, upper[ax - 1]);
2130 F90_DPTR_SSTRIDE_P(dd, stride[ax - 1]);
2131 if (F90_DPTR_SSTRIDE_G(dd) != 1 || F90_DPTR_LSTRIDE_G(dd) != gsize) {
2132 F90_FLAGS_P(d, (F90_FLAGS_G(d) & ~__SEQUENTIAL_SECTION));
2133 }
2134 gsize *= F90_DPTR_EXTENT_G(dd);
2135 } else
2136 I8(__fort_set_single)(d, a, ax, lower[ax - 1], __SCALAR);
2137 }
2138 F90_GSIZE_P(d, gsize); /* global section size */
2139 F90_LSIZE_P(d, gsize); /* global section size */
2140 return;
2141 }
2142
2143 dx = 0;
2144 if (flags & __SECTZBASE) {
2145 F90_LBASE_P(d, 1);
2146 TSECTION(d, dx, a, 1, *lw0, *up0, *st0, gsize, flags);
2147 TSECTION(d, dx, a, 2, *lw1, *up1, *st1, gsize, flags);
2148 TSECTION(d, dx, a, 3, *lw2, *up2, *st2, gsize, flags);
2149 } else {
2150 ASECTION(d, dx, a, 1, *lw0, *up0, *st0, gsize, flags);
2151 ASECTION(d, dx, a, 2, *lw1, *up1, *st1, gsize, flags);
2152 ASECTION(d, dx, a, 3, *lw2, *up2, *st2, gsize, flags);
2153 }
2154
2155 /* no longer need section stride/section offset */
2156 F90_GSIZE_P(d, gsize); /* global section size */
2157 F90_LSIZE_P(d, gsize); /* global section size */
2158 }
2159
2160 /* for F90 */
ENTF90(SECT3v,sect3v)2161 void ENTF90(SECT3v, sect3v)(F90_Desc *d, F90_Desc *a, __INT_T *prank,
2162 /* ... = {lower, upper, stride,}* flags */
2163 __INT_T lw0, __INT_T up0, __INT_T st0, __INT_T lw1,
2164 __INT_T up1, __INT_T st1, __INT_T lw2, __INT_T up2,
2165 __INT_T st2, __INT_T flags)
2166 {
2167 DECL_DIM_PTRS(ad);
2168 DECL_DIM_PTRS(dd);
2169 __INT_T ax, dx, rank;
2170 __INT_T gsize;
2171 __INT_T wrk_rank;
2172
2173 #if defined(DEBUG)
2174 if (d == NULL)
2175 __fort_abort("SECT: missing section descriptor");
2176 if (a == NULL || F90_TAG_G(a) != __DESC)
2177 __fort_abort("SECT: invalid array descriptor");
2178 #endif
2179
2180 /* determine section rank - popcnt of flags bits */
2181
2182 /* rank is at most 3 */
2183 rank = (flags & 0x5) + (flags >> 1 & 0x1);
2184 rank = (rank & 0x3) + (rank >> 2 & 0x1);
2185
2186 /* initialize descriptor */
2187
2188 SET_F90_DIST_DESC_PTR(d, rank);
2189 __DIST_INIT_SECTION(d, rank, a);
2190 gsize = 1;
2191
2192 /* bogus bounds: defer section setup until copy */
2193
2194 if (flags & BOGUSFLAG) {
2195 __INT_T lower[3], upper[3], stride[3];
2196 lower[0] = lw0;
2197 upper[0] = up0;
2198 stride[0] = st0;
2199 lower[1] = lw1;
2200 upper[1] = up1;
2201 stride[1] = st1;
2202 lower[2] = lw2;
2203 upper[2] = up2;
2204 stride[2] = st2;
2205
2206 F90_FLAGS_P(d, F90_FLAGS_G(d) | __BOGUSBOUNDS);
2207 wrk_rank = F90_RANK_G(a);
2208 for (dx = 0, ax = 1; ax <= wrk_rank; ++ax) {
2209 if ((flags >> (ax - 1)) & 1) {
2210 SET_DIM_PTRS(dd, d, dx);
2211 dx++;
2212 SET_DIM_PTRS(ad, a, ax - 1);
2213 F90_DPTR_LBOUND_P(dd, lower[ax - 1]);
2214 DPTR_UBOUND_P(dd, upper[ax - 1]);
2215 F90_DPTR_SSTRIDE_P(dd, stride[ax - 1]);
2216 if (F90_DPTR_SSTRIDE_G(dd) != 1 || F90_DPTR_LSTRIDE_G(dd) != gsize) {
2217 F90_FLAGS_P(d, (F90_FLAGS_G(d) & ~__SEQUENTIAL_SECTION));
2218 }
2219 gsize *= F90_DPTR_EXTENT_G(dd);
2220 } else
2221 I8(__fort_set_single)(d, a, ax, lower[ax - 1], __SCALAR);
2222 }
2223 F90_GSIZE_P(d, gsize); /* global section size */
2224 F90_LSIZE_P(d, gsize); /* global section size */
2225 return;
2226 }
2227
2228 /* normal section : set up each dimension and compute GSIZE*/
2229
2230 dx = 0;
2231 if (flags & __SECTZBASE) {
2232 F90_LBASE_P(d, 1);
2233 TSECTION(d, dx, a, 1, lw0, up0, st0, gsize, flags);
2234 TSECTION(d, dx, a, 2, lw1, up1, st1, gsize, flags);
2235 TSECTION(d, dx, a, 3, lw2, up2, st2, gsize, flags);
2236 } else {
2237 ASECTION(d, dx, a, 1, lw0, up0, st0, gsize, flags);
2238 ASECTION(d, dx, a, 2, lw1, up1, st1, gsize, flags);
2239 ASECTION(d, dx, a, 3, lw2, up2, st2, gsize, flags);
2240 }
2241
2242 /* no longer need section stride/section offset */
2243 F90_GSIZE_P(d, gsize); /* global section size */
2244 F90_LSIZE_P(d, gsize); /* global section size */
2245 }
2246
2247 #undef ASECTION
2248 #undef TSECTION
2249
2250 /* BSECTION updates dx, gsize directly */
2251 #define BSECTION(d, dx, a, ax, lb, ub, st, gsize, flags) \
2252 if (flags & (1 << (ax - 1))) { \
2253 dx++; \
2254 __DIST_SET_SECTIONXX(d, dx, a, ax, lb, ub, st, (flags & __NOREINDEX), \
2255 gsize); \
2256 } else \
2257 I8(__fort_set_single)(d, a, ax, lb, __SCALAR);
2258
2259 /* for F90 */
ENTFTN(SECT3,sect3)2260 void ENTFTN(SECT3, sect3)(F90_Desc *d, F90_Desc *a,
2261 /* ... = {lower, upper, stride,}* flags */
2262 __INT_T *lw0, __INT_T *up0, __INT_T *st0,
2263 __INT_T *lw1, __INT_T *up1, __INT_T *st1,
2264 __INT_T *lw2, __INT_T *up2, __INT_T *st2,
2265 __INT_T *bfg)
2266 {
2267 DECL_DIM_PTRS(ad);
2268 DECL_DIM_PTRS(dd);
2269 __INT_T ax, dx, flags, rank;
2270 __INT_T gsize;
2271 __INT_T wrk_rank;
2272
2273 #if defined(DEBUG)
2274 if (d == NULL)
2275 __fort_abort("SECT: missing section descriptor");
2276 if (a == NULL || F90_TAG_G(a) != __DESC)
2277 __fort_abort("SECT: invalid array descriptor");
2278 #endif
2279
2280 /* get flags argument */
2281
2282 flags = *bfg;
2283
2284 #if MAXDIMS != 7
2285 __fort_abort("SECT: need to recode for different MAXDIMS");
2286 #endif
2287 /* determine section rank - popcnt of flags bits */
2288 /* rank is at most 3 */
2289 rank = (flags & 0x5) + (flags >> 1 & 0x1);
2290 rank = (rank & 0x3) + (rank >> 2 & 0x1);
2291
2292 /* initialize descriptor */
2293
2294 SET_F90_DIST_DESC_PTR(d, rank);
2295 __DIST_INIT_SECTION(d, rank, a);
2296
2297 /* bogus bounds: defer section setup until copy */
2298
2299 gsize = 1;
2300 if (flags & BOGUSFLAG) {
2301 __INT_T lower[MAXDIMS], upper[MAXDIMS], stride[MAXDIMS];
2302 lower[0] = *lw0;
2303 upper[0] = *up0;
2304 stride[0] = *st0;
2305 lower[1] = *lw1;
2306 upper[1] = *up1;
2307 stride[1] = *st1;
2308 lower[2] = *lw2;
2309 upper[2] = *up2;
2310 stride[2] = *st2;
2311
2312 F90_FLAGS_P(d, F90_FLAGS_G(d) | __BOGUSBOUNDS);
2313 wrk_rank = F90_RANK_G(a);
2314 for (dx = 0, ax = 1; ax <= wrk_rank; ++ax) {
2315 if ((flags >> (ax - 1)) & 1) {
2316 SET_DIM_PTRS(dd, d, dx);
2317 dx++;
2318 SET_DIM_PTRS(ad, a, ax - 1);
2319 F90_DPTR_LBOUND_P(dd, lower[ax - 1]);
2320 DPTR_UBOUND_P(dd, upper[ax - 1]);
2321 F90_DPTR_SSTRIDE_P(dd, stride[ax - 1]);
2322 if (F90_DPTR_SSTRIDE_G(dd) != 1 || F90_DPTR_LSTRIDE_G(dd) != gsize) {
2323 F90_FLAGS_P(d, (F90_FLAGS_G(d) & ~__SEQUENTIAL_SECTION));
2324 }
2325 gsize *= F90_DPTR_EXTENT_G(dd);
2326 } else
2327 I8(__fort_set_single)(d, a, ax, lower[ax - 1], __SCALAR);
2328 }
2329 F90_GSIZE_P(d, gsize); /* global section size */
2330 return;
2331 }
2332
2333 dx = 0;
2334 BSECTION(d, dx, a, 1, *lw0, *up0, *st0, gsize, flags);
2335 BSECTION(d, dx, a, 2, *lw1, *up1, *st1, gsize, flags);
2336 BSECTION(d, dx, a, 3, *lw2, *up2, *st2, gsize, flags);
2337
2338 /* no longer need section stride/section offset */
2339 F90_GSIZE_P(d, gsize); /* global section size */
2340 }
2341
2342 /* for F90 */
ENTFTN(SECT3v,sect3v)2343 void ENTFTN(SECT3v, sect3v)(F90_Desc *d, F90_Desc *a,
2344 /* ... = {lower, upper, stride,}* flags */
2345 __INT_T lw0, __INT_T up0, __INT_T st0, __INT_T lw1,
2346 __INT_T up1, __INT_T st1, __INT_T lw2, __INT_T up2,
2347 __INT_T st2, __INT_T flags)
2348 {
2349 DECL_DIM_PTRS(ad);
2350 DECL_DIM_PTRS(dd);
2351 __INT_T ax, dx, rank;
2352 __INT_T gsize;
2353 __INT_T wrk_rank;
2354
2355 #if defined(DEBUG)
2356 if (d == NULL)
2357 __fort_abort("SECT: missing section descriptor");
2358 if (a == NULL || F90_TAG_G(a) != __DESC)
2359 __fort_abort("SECT: invalid array descriptor");
2360 #endif
2361
2362 /* determine section rank - popcnt of flags bits */
2363
2364 /* rank is at most 3 */
2365 rank = (flags & 0x5) + (flags >> 1 & 0x1);
2366 rank = (rank & 0x3) + (rank >> 2 & 0x1);
2367
2368 /* initialize descriptor */
2369
2370 SET_F90_DIST_DESC_PTR(d, rank);
2371 __DIST_INIT_SECTION(d, rank, a);
2372
2373 /* bogus bounds: defer section setup until copy */
2374
2375 if (flags & BOGUSFLAG) {
2376 __INT_T lower[3], upper[3], stride[3];
2377 lower[0] = lw0;
2378 upper[0] = up0;
2379 stride[0] = st0;
2380 lower[1] = lw1;
2381 upper[1] = up1;
2382 stride[1] = st1;
2383 lower[2] = lw2;
2384 upper[2] = up2;
2385 stride[2] = st2;
2386
2387 F90_FLAGS_P(d, F90_FLAGS_G(d) | __BOGUSBOUNDS);
2388 wrk_rank = F90_RANK_G(a);
2389 for (dx = 0, ax = 1; ax <= wrk_rank; ++ax) {
2390 if ((flags >> (ax - 1)) & 1) {
2391 SET_DIM_PTRS(dd, d, dx);
2392 dx++;
2393 SET_DIM_PTRS(ad, a, ax - 1);
2394 F90_DPTR_LBOUND_P(dd, lower[ax - 1]);
2395 DPTR_UBOUND_P(dd, upper[ax - 1]);
2396 F90_DPTR_SSTRIDE_P(dd, stride[ax - 1]);
2397 if (F90_DPTR_SSTRIDE_G(dd) != 1 || F90_DPTR_LSTRIDE_G(dd) != gsize) {
2398 F90_FLAGS_P(d, (F90_FLAGS_G(d) & ~__SEQUENTIAL_SECTION));
2399 }
2400 gsize *= F90_DPTR_EXTENT_G(dd);
2401 } else
2402 I8(__fort_set_single)(d, a, ax, lower[ax - 1], __SCALAR);
2403 }
2404 F90_GSIZE_P(d, gsize); /* global section size */
2405 return;
2406 }
2407
2408 /* normal section : set up each dimension and compute GSIZE*/
2409
2410 dx = 0;
2411 BSECTION(d, dx, a, 1, lw0, up0, st0, gsize, flags);
2412 BSECTION(d, dx, a, 2, lw1, up1, st1, gsize, flags);
2413 BSECTION(d, dx, a, 3, lw2, up2, st2, gsize, flags);
2414
2415 /* no longer need section stride/section offset */
2416 F90_GSIZE_P(d, gsize); /* global section size */
2417 }
2418 #undef BSECTION
2419
2420 /* Copy the contents of descriptor d0 into d. Descriptor d is assumed
2421 to be large enough. */
2422
I8(__fort_copy_descriptor)2423 void I8(__fort_copy_descriptor)(F90_Desc *d, F90_Desc *d0)
2424 {
2425 if (F90_TAG_G(d0) == __DESC) {
2426 __fort_bcopy((char *)d, (char *)d0,
2427 SIZE_OF_RANK_n_ARRAY_DESC(F90_RANK_G(d0)));
2428 SET_F90_DIST_DESC_PTR(d, F90_RANK_G(d));
2429 } else {
2430 F90_TAG_P(d, F90_TAG_G(d0));
2431 }
2432 }
2433
2434 /* Create a copy of the align-target template in space reserved
2435 following the descriptor. */
2436
I8(__fort_inherit_template)2437 F90_Desc *I8(__fort_inherit_template)(F90_Desc *d, __INT_T rank,
2438 F90_Desc *target)
2439 {
2440 DECL_HDR_PTRS(t);
2441 __INT_T dz, dzr, tz;
2442
2443 #if defined(DEBUG)
2444 if (rank < 0 || rank > MAXDIMS)
2445 __fort_abort("inherit_descriptor: invalid rank");
2446 if (target == NULL || F90_TAG_G(target) != __DESC)
2447 __fort_abort("inherit_descriptor: invalid align-target descriptor");
2448 #endif
2449
2450 dz = SIZE_OF_RANK_n_ARRAY_DESC(rank);
2451 dzr = ALIGNR(dz);
2452 t = (F90_Desc *)((char *)d + dzr);
2453
2454 I8(__fort_copy_descriptor)(t, target);
2455
2456 F90_FLAGS_P(t, F90_FLAGS_G(t) | __TEMPLATE);
2457 F90_FLAGS_P(t, F90_FLAGS_G(t) & ~__NOT_COPIED);
2458
2459 F90_LSIZE_P(t, 0);
2460
2461 DIST_ALIGN_TARGET_P(t, t);
2462 DIST_NEXT_ALIGNEE_P(t, NULL);
2463 DIST_ACTUAL_ARG_P(t, NULL);
2464
2465 return t;
2466 }
2467
2468 /* Return the section extent. */
2469
2470 __INT_T
ENTFTN(EXTENT,extent)2471 ENTFTN(EXTENT, extent)
2472 (F90_Desc *d, __INT_T *gdim)
2473 {
2474 __INT_T dim;
2475
2476 #if defined(DEBUG)
2477 if (d == NULL)
2478 __fort_abort("EXTENT: invalid descriptor");
2479 #endif
2480
2481 if (F90_TAG_G(d) != __DESC)
2482 return 1; /* scalar or sequential */
2483
2484 dim = *gdim;
2485
2486 #if defined(DEBUG)
2487 if (dim < 1 || dim > F90_RANK_G(d))
2488 __fort_abort("EXTENT: invalid dimension");
2489 #endif
2490
2491 return F90_DIM_EXTENT_G(d, dim - 1);
2492 }
2493
2494 /* this is just like the above, but with an extra argument
2495 * that is set for 'local' bounds, zero for 'global' bounds.
2496 */
2497 __INT_T
ENTFTN(GLEXTENT,glextent)2498 ENTFTN(GLEXTENT, glextent)
2499 (F90_Desc *d, __INT_T *gdim, __INT_T *glocal)
2500 {
2501 DECL_DIM_PTRS(dd);
2502 __INT_T cl, cn, dim, extent, l, u, local;
2503
2504 #if defined(DEBUG)
2505 if (d == NULL)
2506 __fort_abort("GLEXTENT: invalid descriptor");
2507 #endif
2508
2509 if (F90_TAG_G(d) != __DESC)
2510 return 1; /* scalar or sequential */
2511
2512 dim = *gdim;
2513 local = *glocal;
2514
2515 #if defined(DEBUG)
2516 if (dim < 1 || dim > F90_RANK_G(d))
2517 __fort_abort("GLEXTENT: invalid dimension");
2518 #endif
2519
2520 SET_DIM_PTRS(dd, d, dim - 1);
2521
2522 if (local && ~F90_FLAGS_G(d) & __LOCAL) {
2523
2524 /* coercing global to local: return local extent */
2525
2526 if (F90_FLAGS_G(d) & __OFF_TEMPLATE)
2527 return 0;
2528
2529 I8(__fort_cycle_bounds)(d);
2530
2531 extent = 0;
2532 for (cl = DIST_DPTR_CL_G(dd), cn = DIST_DPTR_CN_G(dd); --cn >= 0;
2533 cl += DIST_DPTR_CS_G(dd))
2534 extent += I8(__fort_block_bounds)(d, dim, cl, &l, &u);
2535 } else {
2536
2537 /* normal case; return global extent */
2538
2539 extent = F90_DPTR_EXTENT_G(dd);
2540 }
2541
2542 return extent;
2543 }
2544
2545 /* Return the lower bound for the specified dimension */
2546
2547 __INT_T
ENTFTN(LBOUND,lbound)2548 ENTFTN(LBOUND, lbound)(__INT_T *dim, F90_Desc *pd)
2549 {
2550 if (F90_TAG_G(pd) != __DESC)
2551 __fort_abort("LBOUND: arg not associated with array");
2552 if (!ISPRESENT(dim) || *dim < 1 || *dim > F90_RANK_G(pd))
2553 __fort_abort("LBOUND: invalid dim");
2554 return F90_DIM_LBOUND_G(pd, *dim - 1);
2555 }
2556
2557 __INT8_T
ENTFTN(KLBOUND,klbound)2558 ENTFTN(KLBOUND, klbound)(__INT_T *dim, F90_Desc *pd)
2559 {
2560 if (F90_TAG_G(pd) != __DESC)
2561 __fort_abort("LBOUND: arg not associated with array");
2562 if (!ISPRESENT(dim) || *dim < 1 || *dim > F90_RANK_G(pd))
2563 __fort_abort("LBOUND: invalid dim");
2564 return F90_DIM_LBOUND_G(pd, *dim - 1);
2565 }
2566
2567 /* return the upper bound for the specified dimension */
2568
2569 __INT_T
ENTFTN(UBOUND,ubound)2570 ENTFTN(UBOUND, ubound)(__INT_T *dim, F90_Desc *pd)
2571 {
2572 if (F90_TAG_G(pd) != __DESC)
2573 __fort_abort("UBOUND: arg not associated with array");
2574 if (!ISPRESENT(dim) || *dim < 1 || *dim > F90_RANK_G(pd))
2575 __fort_abort("UBOUND: invalid dim");
2576 return DIM_UBOUND_G(pd, *dim - 1);
2577 }
2578
2579 __INT8_T
ENTFTN(KUBOUND,kubound)2580 ENTFTN(KUBOUND, kubound)(__INT_T *dim, F90_Desc *pd)
2581 {
2582 if (F90_TAG_G(pd) != __DESC)
2583 __fort_abort("UBOUND: arg not associated with array");
2584 if (!ISPRESENT(dim) || *dim < 1 || *dim > F90_RANK_G(pd))
2585 __fort_abort("UBOUND: invalid dim");
2586 return DIM_UBOUND_G(pd, *dim - 1);
2587 }
2588
2589 /* Return lower bounds for all dimensions as a rank 1 array */
2590
ENTFTN(LBOUNDA,lbounda)2591 void ENTFTN(LBOUNDA, lbounda)(__INT_T *arr, F90_Desc *pd)
2592 {
2593 __INT_T dim, rank;
2594
2595 if (F90_TAG_G(pd) != __DESC)
2596 __fort_abort("LBOUND: arg not associated with array");
2597 rank = F90_RANK_G(pd);
2598 for (dim = 0; dim < rank; ++dim)
2599 arr[dim] = F90_DIM_LBOUND_G(pd, dim);
2600 }
2601
ENTFTN(LBOUNDA1,lbounda1)2602 void ENTFTN(LBOUNDA1, lbounda1)(__INT1_T *arr, F90_Desc *pd)
2603 {
2604 __INT_T dim, rank;
2605
2606 if (F90_TAG_G(pd) != __DESC)
2607 __fort_abort("LBOUND: arg not associated with array");
2608 rank = F90_RANK_G(pd);
2609 for (dim = 0; dim < rank; ++dim)
2610 arr[dim] = F90_DIM_LBOUND_G(pd, dim);
2611 }
2612
ENTFTN(LBOUNDA2,lbounda2)2613 void ENTFTN(LBOUNDA2, lbounda2)(__INT2_T *arr, F90_Desc *pd)
2614 {
2615 __INT_T dim, rank;
2616
2617 if (F90_TAG_G(pd) != __DESC)
2618 __fort_abort("LBOUND: arg not associated with array");
2619 rank = F90_RANK_G(pd);
2620 for (dim = 0; dim < rank; ++dim)
2621 arr[dim] = F90_DIM_LBOUND_G(pd, dim);
2622 }
2623
ENTFTN(LBOUNDA4,lbounda4)2624 void ENTFTN(LBOUNDA4, lbounda4)(__INT4_T *arr, F90_Desc *pd)
2625 {
2626 __INT_T dim, rank;
2627
2628 if (F90_TAG_G(pd) != __DESC)
2629 __fort_abort("LBOUND: arg not associated with array");
2630 rank = F90_RANK_G(pd);
2631 for (dim = 0; dim < rank; ++dim)
2632 arr[dim] = F90_DIM_LBOUND_G(pd, dim);
2633 }
2634
ENTFTN(LBOUNDA8,lbounda8)2635 void ENTFTN(LBOUNDA8, lbounda8)(__INT8_T *arr, F90_Desc *pd)
2636 {
2637 __INT_T dim, rank;
2638
2639 if (F90_TAG_G(pd) != __DESC)
2640 __fort_abort("LBOUND: arg not associated with array");
2641 rank = F90_RANK_G(pd);
2642 for (dim = 0; dim < rank; ++dim)
2643 arr[dim] = F90_DIM_LBOUND_G(pd, dim);
2644 }
2645
ENTFTN(LBOUNDAZ,lboundaz)2646 void ENTFTN(LBOUNDAZ, lboundaz)(__INT4_T *arr, F90_Desc *pd)
2647 {
2648 __INT_T dim, rank;
2649
2650 if (F90_TAG_G(pd) != __DESC)
2651 __fort_abort("LBOUND: arg not associated with array");
2652 rank = F90_RANK_G(pd);
2653 for (dim = 0; dim < rank; ++dim)
2654 arr[dim] = F90_DIM_LBOUND_G(pd, dim);
2655 }
2656
ENTFTN(LBOUNDAZ1,lboundaz1)2657 void ENTFTN(LBOUNDAZ1, lboundaz1)(__INT1_T *arr, F90_Desc *pd)
2658 {
2659 __INT_T dim, rank;
2660
2661 if (F90_TAG_G(pd) != __DESC)
2662 __fort_abort("LBOUND: arg not associated with array");
2663 rank = F90_RANK_G(pd);
2664 for (dim = 0; dim < rank; ++dim)
2665 arr[dim] = F90_DIM_LBOUND_G(pd, dim);
2666 }
2667
ENTFTN(LBOUNDAZ2,lboundaz2)2668 void ENTFTN(LBOUNDAZ2, lboundaz2)(__INT2_T *arr, F90_Desc *pd)
2669 {
2670 __INT_T dim, rank;
2671
2672 if (F90_TAG_G(pd) != __DESC)
2673 __fort_abort("LBOUND: arg not associated with array");
2674 rank = F90_RANK_G(pd);
2675 for (dim = 0; dim < rank; ++dim)
2676 arr[dim] = F90_DIM_LBOUND_G(pd, dim);
2677 }
2678
ENTFTN(LBOUNDAZ4,lboundaz4)2679 void ENTFTN(LBOUNDAZ4, lboundaz4)(__INT4_T *arr, F90_Desc *pd)
2680 {
2681 __INT_T dim, rank;
2682
2683 if (F90_TAG_G(pd) != __DESC)
2684 __fort_abort("LBOUND: arg not associated with array");
2685 rank = F90_RANK_G(pd);
2686 for (dim = 0; dim < rank; ++dim)
2687 arr[dim] = F90_DIM_LBOUND_G(pd, dim);
2688 }
2689
ENTFTN(LBOUNDAZ8,lboundaz8)2690 void ENTFTN(LBOUNDAZ8, lboundaz8)(__INT8_T *arr, F90_Desc *pd)
2691 {
2692 __INT_T dim, rank;
2693
2694 if (F90_TAG_G(pd) != __DESC)
2695 __fort_abort("LBOUND: arg not associated with array");
2696 rank = F90_RANK_G(pd);
2697 for (dim = 0; dim < rank; ++dim)
2698 arr[dim] = F90_DIM_LBOUND_G(pd, dim);
2699 }
2700
ENTFTN(KLBOUNDA,klbounda)2701 void ENTFTN(KLBOUNDA, klbounda)(__INT_T *arr, F90_Desc *pd)
2702 {
2703 __INT_T dim, rank;
2704
2705 if (F90_TAG_G(pd) != __DESC)
2706 __fort_abort("LBOUND: arg not associated with array");
2707 rank = F90_RANK_G(pd);
2708 for (dim = 0; dim < rank; ++dim)
2709 arr[dim] = F90_DIM_LBOUND_G(pd, dim);
2710 }
2711
ENTFTN(KLBOUNDAZ,klboundaz)2712 void ENTFTN(KLBOUNDAZ, klboundaz)(__INT8_T *arr, F90_Desc *pd)
2713 {
2714 __INT_T dim, rank;
2715
2716 if (F90_TAG_G(pd) != __DESC)
2717 __fort_abort("LBOUND: arg not associated with array");
2718 rank = F90_RANK_G(pd);
2719 for (dim = 0; dim < rank; ++dim)
2720 arr[dim] = F90_DIM_LBOUND_G(pd, dim);
2721 }
2722
2723 /* Return upper bounds for all dimensions as a rank 1 array */
2724
ENTFTN(UBOUNDA,ubounda)2725 void ENTFTN(UBOUNDA, ubounda)(__INT_T *arr, F90_Desc *pd)
2726 {
2727 __INT_T dim, rank;
2728
2729 if (F90_TAG_G(pd) != __DESC)
2730 __fort_abort("UBOUND: arg not associated with array");
2731 rank = F90_RANK_G(pd);
2732 for (dim = 0; dim < rank; ++dim)
2733 arr[dim] = DIM_UBOUND_G(pd, dim);
2734 }
2735
ENTFTN(UBOUNDA1,ubounda1)2736 void ENTFTN(UBOUNDA1, ubounda1)(__INT1_T *arr, F90_Desc *pd)
2737 {
2738 __INT_T dim, rank;
2739
2740 if (F90_TAG_G(pd) != __DESC)
2741 __fort_abort("UBOUND: arg not associated with array");
2742 rank = F90_RANK_G(pd);
2743 for (dim = 0; dim < rank; ++dim)
2744 arr[dim] = F90_DIM_UBOUND_G(pd, dim);
2745 }
2746
ENTFTN(UBOUNDA2,ubounda2)2747 void ENTFTN(UBOUNDA2, ubounda2)(__INT2_T *arr, F90_Desc *pd)
2748 {
2749 __INT_T dim, rank;
2750
2751 if (F90_TAG_G(pd) != __DESC)
2752 __fort_abort("UBOUND: arg not associated with array");
2753 rank = F90_RANK_G(pd);
2754 for (dim = 0; dim < rank; ++dim)
2755 arr[dim] = F90_DIM_UBOUND_G(pd, dim);
2756 }
2757
ENTFTN(UBOUNDA4,ubounda4)2758 void ENTFTN(UBOUNDA4, ubounda4)(__INT4_T *arr, F90_Desc *pd)
2759 {
2760 __INT_T dim, rank;
2761
2762 if (F90_TAG_G(pd) != __DESC)
2763 __fort_abort("UBOUND: arg not associated with array");
2764 rank = F90_RANK_G(pd);
2765 for (dim = 0; dim < rank; ++dim)
2766 arr[dim] = F90_DIM_UBOUND_G(pd, dim);
2767 }
2768
ENTFTN(UBOUNDA8,ubounda8)2769 void ENTFTN(UBOUNDA8, ubounda8)(__INT8_T *arr, F90_Desc *pd)
2770 {
2771 __INT_T dim, rank;
2772
2773 if (F90_TAG_G(pd) != __DESC)
2774 __fort_abort("UBOUND: arg not associated with array");
2775 rank = F90_RANK_G(pd);
2776 for (dim = 0; dim < rank; ++dim)
2777 arr[dim] = F90_DIM_UBOUND_G(pd, dim);
2778 }
2779
ENTFTN(UBOUNDAZ,uboundaz)2780 void ENTFTN(UBOUNDAZ, uboundaz)(__INT4_T *arr, F90_Desc *pd)
2781 {
2782 __INT_T dim, rank;
2783
2784 if (F90_TAG_G(pd) != __DESC)
2785 __fort_abort("UBOUND: arg not associated with array");
2786 rank = F90_RANK_G(pd);
2787 for (dim = 0; dim < rank; ++dim)
2788 arr[dim] = DIM_UBOUND_G(pd, dim);
2789 }
2790
ENTFTN(UBOUNDAZ1,uboundaz1)2791 void ENTFTN(UBOUNDAZ1, uboundaz1)(__INT1_T *arr, F90_Desc *pd)
2792 {
2793 __INT_T dim, rank;
2794
2795 if (F90_TAG_G(pd) != __DESC)
2796 __fort_abort("UBOUND: arg not associated with array");
2797 rank = F90_RANK_G(pd);
2798 for (dim = 0; dim < rank; ++dim)
2799 arr[dim] = F90_DIM_UBOUND_G(pd, dim);
2800 }
2801
ENTFTN(UBOUNDAZ2,uboundaz2)2802 void ENTFTN(UBOUNDAZ2, uboundaz2)(__INT2_T *arr, F90_Desc *pd)
2803 {
2804 __INT_T dim, rank;
2805
2806 if (F90_TAG_G(pd) != __DESC)
2807 __fort_abort("UBOUND: arg not associated with array");
2808 rank = F90_RANK_G(pd);
2809 for (dim = 0; dim < rank; ++dim)
2810 arr[dim] = F90_DIM_UBOUND_G(pd, dim);
2811 }
2812
ENTFTN(UBOUNDAZ4,uboundaz4)2813 void ENTFTN(UBOUNDAZ4, uboundaz4)(__INT4_T *arr, F90_Desc *pd)
2814 {
2815 __INT_T dim, rank;
2816
2817 if (F90_TAG_G(pd) != __DESC)
2818 __fort_abort("UBOUND: arg not associated with array");
2819 rank = F90_RANK_G(pd);
2820 for (dim = 0; dim < rank; ++dim)
2821 arr[dim] = F90_DIM_UBOUND_G(pd, dim);
2822 }
2823
ENTFTN(UBOUNDAZ8,uboundaz8)2824 void ENTFTN(UBOUNDAZ8, uboundaz8)(__INT8_T *arr, F90_Desc *pd)
2825 {
2826 __INT_T dim, rank;
2827
2828 if (F90_TAG_G(pd) != __DESC)
2829 __fort_abort("UBOUND: arg not associated with array");
2830 rank = F90_RANK_G(pd);
2831 for (dim = 0; dim < rank; ++dim)
2832 arr[dim] = F90_DIM_UBOUND_G(pd, dim);
2833 }
2834
ENTFTN(KUBOUNDA,kubounda)2835 void ENTFTN(KUBOUNDA, kubounda)(__INT_T *arr, F90_Desc *pd)
2836 {
2837 __INT_T dim, rank;
2838
2839 if (F90_TAG_G(pd) != __DESC)
2840 __fort_abort("UBOUND: arg not associated with array");
2841 rank = F90_RANK_G(pd);
2842 for (dim = 0; dim < rank; ++dim)
2843 arr[dim] = DIM_UBOUND_G(pd, dim);
2844 }
2845
ENTFTN(KUBOUNDAZ,kuboundaz)2846 void ENTFTN(KUBOUNDAZ, kuboundaz)(__INT8_T *arr, F90_Desc *pd)
2847 {
2848 __INT_T dim, rank;
2849
2850 if (F90_TAG_G(pd) != __DESC)
2851 __fort_abort("UBOUND: arg not associated with array");
2852 rank = F90_RANK_G(pd);
2853 for (dim = 0; dim < rank; ++dim)
2854 arr[dim] = DIM_UBOUND_G(pd, dim);
2855 }
2856
2857 /* If dim is present, return the extent for the specified dimension.
2858 otherwise, return the size of the array (the product of all
2859 extents) */
2860
2861 __INT_T
ENTFTN(SIZE,size)2862 ENTFTN(SIZE, size)(__INT_T *dim, F90_Desc *pd)
2863 {
2864 __INT_T size;
2865
2866 if (F90_TAG_G(pd) != __DESC)
2867 __fort_abort("SIZE: arg not associated with array");
2868 if (!ISPRESENT(dim))
2869 size = F90_GSIZE_G(pd);
2870 else if (*dim < 1 || *dim > F90_RANK_G(pd))
2871 __fort_abort("SIZE: invalid dim");
2872 else {
2873 size = F90_DIM_EXTENT_G(pd, *dim - 1);
2874 }
2875 return size;
2876 }
2877
2878 __INT8_T
ENTFTN(KSIZE,ksize)2879 ENTFTN(KSIZE, ksize)(__INT_T *dim, F90_Desc *pd)
2880 {
2881
2882 /*
2883 * -i8 variant of __size
2884 */
2885
2886 __INT_T size;
2887
2888 if (F90_TAG_G(pd) != __DESC)
2889 __fort_abort("SIZE: arg not associated with array");
2890 if (!ISPRESENT(dim))
2891 size = F90_GSIZE_G(pd);
2892 else if (*dim < 1 || *dim > F90_RANK_G(pd))
2893 __fort_abort("SIZE: invalid dim");
2894 else {
2895 size = F90_DIM_EXTENT_G(pd, *dim - 1);
2896 }
2897 return (__INT8_T)size;
2898 }
2899
2900 /* Return the array shape as a rank 1 array */
2901
ENTFTN(SHAPE,shape)2902 void ENTFTN(SHAPE, shape)(__INT4_T *arr, F90_Desc *pd)
2903 {
2904 DECL_DIM_PTRS(pdd);
2905 __INT_T dim, rank;
2906
2907 if (F90_TAG_G(pd) != __DESC)
2908 __fort_abort("SHAPE: arg not associated with array");
2909 rank = F90_RANK_G(pd);
2910 for (dim = 0; dim < rank; ++dim) {
2911 SET_DIM_PTRS(pdd, pd, dim);
2912 arr[dim] = F90_DIM_EXTENT_G(pd, dim);
2913 }
2914 }
2915
ENTFTN(KSHAPE,kshape)2916 void ENTFTN(KSHAPE, kshape)(__INT8_T *arr, F90_Desc *pd)
2917 {
2918
2919 /*
2920 * -i8 variant of SHAPE
2921 */
2922
2923 __INT_T dim, rank;
2924
2925 if (F90_TAG_G(pd) != __DESC)
2926 __fort_abort("SHAPE: arg not associated with array");
2927 rank = F90_RANK_G(pd);
2928 for (dim = 0; dim < rank; ++dim) {
2929 arr[dim] = F90_DIM_EXTENT_G(pd, dim);
2930 }
2931 }
2932
I8(__fort_reverse_array)2933 void I8(__fort_reverse_array)(char *db, char *ab, F90_Desc *dd, F90_Desc *ad)
2934 {
2935
2936 /* make a "reversed" copy of descriptor ad and store it in dd,
2937 * then copy the data in ab and store it in reverse in db ...
2938 *
2939 * Internal procedure....we assume only the run-time calls this
2940 * routine... (The compiler could do a much better job at generating
2941 * equivalent code for performing this function)
2942 *
2943 */
2944
2945 __INT_T flags, i;
2946 __INT_T rank;
2947 __INT_T kind, len;
2948 __INT_T _0 = 0;
2949 __INT_T isstar;
2950 DECL_HDR_VARS(dd2);
2951 DECL_DIM_PTRS(add);
2952 __INT_T lbound[MAXDIMS], ubound[MAXDIMS], stride[MAXDIMS], dstfmt[MAXDIMS];
2953 __INT_T paxis[MAXDIMS], no[MAXDIMS], po[MAXDIMS];
2954 __INT_T *gen_block[MAXDIMS];
2955 sked *s;
2956 void *xfer;
2957
2958 rank = F90_RANK_G(ad);
2959
2960 SET_F90_DIST_DESC_PTR(dd2, rank);
2961
2962 flags = (__PRESCRIPTIVE_DIST_TARGET + __PRESCRIPTIVE_DIST_FORMAT +
2963 __DIST_TARGET_AXIS + __ASSUMED_GB_EXTENT + __DUMMY_COLLAPSE_PAXIS);
2964
2965 isstar = 0;
2966 for (i = 0; i < rank; ++i) {
2967 SET_DIM_PTRS(add, ad, i);
2968 stride[i] = -DIST_DPTR_TSTRIDE_G(add);
2969 lbound[i] = DIST_DPTR_TLB_G(add);
2970 ubound[i] = DIST_DPTR_TUB_G(add);
2971 paxis[i] = DIST_DPTR_PAXIS_G(add);
2972 gen_block[i] = DIST_DPTR_GEN_BLOCK_G(add);
2973 no[i] = DIST_DPTR_NO_G(add);
2974 po[i] = DIST_DPTR_PO_G(add);
2975
2976 switch (DFMT(ad, i + 1)) {
2977
2978 case DFMT_GEN_BLOCK:
2979 isstar |= 0x01 << (7 + 3 * i);
2980 dstfmt[i] = 0;
2981 break;
2982 case DFMT_COLLAPSED:
2983 isstar |= 0x01 << i;
2984 dstfmt[i] = 0;
2985 break;
2986 case DFMT_BLOCK:
2987 dstfmt[i] = 0;
2988 break;
2989 case DFMT_BLOCK_K:
2990 dstfmt[i] = DIST_DPTR_BLOCK_G(add);
2991 break;
2992 case DFMT_CYCLIC:
2993 dstfmt[i] = -1;
2994 break;
2995 case DFMT_CYCLIC_K:
2996 dstfmt[i] = -(DIST_DPTR_BLOCK_G(add));
2997 break;
2998
2999 default:
3000 __fort_abort("__fort_reverse: invalid dist format (internal)");
3001 }
3002 }
3003
3004 switch (rank) {
3005
3006 case 1:
3007
3008 ENTFTN(TEMPLATE, template)
3009 (dd2, &rank, &flags, DIST_DIST_TARGET_G(ad), &isstar, &paxis[0],
3010 (!gen_block[0]) ? &dstfmt[0] : gen_block[0], &lbound[0], &ubound[0]);
3011 break;
3012
3013 case 2:
3014
3015 ENTFTN(TEMPLATE, template)
3016 (dd2, &rank, &flags, DIST_DIST_TARGET_G(ad), &isstar, &paxis[0],
3017 (!gen_block[0]) ? &dstfmt[0] : gen_block[0], &lbound[0], &ubound[0],
3018 &paxis[1], (!gen_block[1]) ? &dstfmt[1] : gen_block[1], &lbound[1],
3019 &ubound[1]);
3020 break;
3021
3022 case 3:
3023
3024 ENTFTN(TEMPLATE, template)
3025 (dd2, &rank, &flags, DIST_DIST_TARGET_G(ad), &isstar, &paxis[0],
3026 (!gen_block[0]) ? &dstfmt[0] : gen_block[0], &lbound[0], &ubound[0],
3027 &paxis[1], (!gen_block[1]) ? &dstfmt[1] : gen_block[1], &lbound[1],
3028 &ubound[1], &paxis[2], (!gen_block[2]) ? &dstfmt[2] : gen_block[2],
3029 &lbound[2], &ubound[2]);
3030 break;
3031
3032 case 4:
3033
3034 ENTFTN(TEMPLATE, template)
3035 (dd2, &rank, &flags, DIST_DIST_TARGET_G(ad), &isstar, &paxis[0],
3036 (!gen_block[0]) ? &dstfmt[0] : gen_block[0], &lbound[0], &ubound[0],
3037 &paxis[1], (!gen_block[1]) ? &dstfmt[1] : gen_block[1], &lbound[1],
3038 &ubound[1], &paxis[2], (!gen_block[2]) ? &dstfmt[2] : gen_block[2],
3039 &lbound[2], &ubound[2], &paxis[3],
3040 (!gen_block[3]) ? &dstfmt[3] : gen_block[3], &lbound[3], &ubound[3]);
3041
3042 break;
3043
3044 case 5:
3045
3046 ENTFTN(TEMPLATE, template)
3047 (dd2, &rank, &flags, DIST_DIST_TARGET_G(ad), &isstar, &paxis[0],
3048 (!gen_block[0]) ? &dstfmt[0] : gen_block[0], &lbound[0], &ubound[0],
3049 &paxis[1], (!gen_block[1]) ? &dstfmt[1] : gen_block[1], &lbound[1],
3050 &ubound[1], &paxis[2], (!gen_block[2]) ? &dstfmt[2] : gen_block[2],
3051 &lbound[2], &ubound[2], &paxis[3],
3052 (!gen_block[3]) ? &dstfmt[3] : gen_block[3], &lbound[3], &ubound[3],
3053 &paxis[4], (!gen_block[4]) ? &dstfmt[4] : gen_block[4], &lbound[4],
3054 &ubound[4]);
3055 break;
3056
3057 case 6:
3058
3059 ENTFTN(TEMPLATE, template)
3060 (dd2, &rank, &flags, DIST_DIST_TARGET_G(ad), &isstar, &paxis[0],
3061 (!gen_block[0]) ? &dstfmt[0] : gen_block[0], &lbound[0], &ubound[0],
3062 &paxis[1], (!gen_block[1]) ? &dstfmt[1] : gen_block[1], &lbound[1],
3063 &ubound[1], &paxis[2], (!gen_block[2]) ? &dstfmt[2] : gen_block[2],
3064 &lbound[2], &ubound[2], &paxis[3],
3065 (!gen_block[3]) ? &dstfmt[3] : gen_block[3], &lbound[3], &ubound[3],
3066 &paxis[4], (!gen_block[4]) ? &dstfmt[4] : gen_block[4], &lbound[4],
3067 &ubound[4], &paxis[5], (!gen_block[5]) ? &dstfmt[5] : gen_block[5],
3068 &lbound[5], &ubound[5]);
3069 break;
3070
3071 case 7:
3072
3073 ENTFTN(TEMPLATE, template)
3074 (dd2, &rank, &flags, DIST_DIST_TARGET_G(ad), &isstar, &paxis[0],
3075 (!gen_block[0]) ? &dstfmt[0] : gen_block[0], &lbound[0], &ubound[0],
3076 &paxis[1], (!gen_block[1]) ? &dstfmt[1] : gen_block[1], &lbound[1],
3077 &ubound[1], &paxis[2], (!gen_block[2]) ? &dstfmt[2] : gen_block[2],
3078 &lbound[2], &ubound[2], &paxis[3],
3079 (!gen_block[3]) ? &dstfmt[3] : gen_block[3], &lbound[3], &ubound[3],
3080 &paxis[4], (!gen_block[4]) ? &dstfmt[4] : gen_block[4], &lbound[4],
3081 &ubound[4], &paxis[5], (!gen_block[5]) ? &dstfmt[5] : gen_block[5],
3082 &lbound[5], &ubound[5], &paxis[6],
3083 (!gen_block[6]) ? &dstfmt[6] : gen_block[6], &lbound[6], &ubound[6]);
3084
3085 break;
3086
3087 default:
3088
3089 __fort_abort("reverse_array: Temp Invalid Rank (internal error)");
3090 }
3091
3092 kind = F90_KIND_G(ad);
3093 len = F90_LEN_G(ad);
3094
3095 switch (rank) {
3096
3097 case 1:
3098
3099 ENTFTN(INSTANCE, instance)
3100 (dd2, dd2, &kind, &len, &_0, &no[0], &po[0]);
3101 break;
3102 case 2:
3103
3104 ENTFTN(INSTANCE, instance)
3105 (dd2, dd2, &kind, &len, &_0, &no[0], &po[0], &no[1], &po[1]);
3106 break;
3107 case 3:
3108
3109 ENTFTN(INSTANCE, instance)
3110 (dd2, dd2, &kind, &len, &_0, &no[0], &po[0], &no[1], &po[1], &no[2],
3111 &po[2]);
3112 break;
3113 case 4:
3114
3115 ENTFTN(INSTANCE, instance)
3116 (dd2, dd2, &kind, &len, &_0, &no[0], &po[0], &no[1], &po[1], &no[2], &po[2],
3117 &no[3], &po[3]);
3118 break;
3119 case 5:
3120
3121 ENTFTN(INSTANCE, instance)
3122 (dd2, dd2, &kind, &len, &_0, &no[0], &po[0], &no[1], &po[1], &no[2], &po[2],
3123 &no[3], &po[3], &no[4], &po[4]);
3124 break;
3125 case 6:
3126
3127 ENTFTN(INSTANCE, instance)
3128 (dd2, dd2, &kind, &len, &_0, &no[0], &po[0], &no[1], &po[1], &no[2], &po[2],
3129 &no[3], &po[3], &no[4], &po[4], &no[5], &po[5]);
3130 break;
3131 case 7:
3132
3133 ENTFTN(INSTANCE, instance)
3134 (dd2, dd2, &kind, &len, &_0, &no[0], &po[0], &no[1], &po[1], &no[2], &po[2],
3135 &no[3], &po[3], &no[4], &po[4], &no[5], &po[5], &no[6], &po[6]);
3136 break;
3137
3138 default:
3139 __fort_abort("reverse_array: Instance Invalid Rank (internal error)");
3140 }
3141
3142 /* swap bounds for negative stride */
3143
3144 for (i = 0; i < rank; ++i) {
3145 if (stride[i] < 0) {
3146 __INT_T t;
3147
3148 t = ubound[i];
3149 ubound[i] = lbound[i];
3150 lbound[i] = t;
3151 }
3152 }
3153
3154 switch (rank) {
3155
3156 case 1:
3157
3158 ENTFTN(SECT, sect)(dd, dd2, &lbound[0], &ubound[0], &stride[0], &rank);
3159 break;
3160
3161 case 2:
3162
3163 ENTFTN(SECT,sect) (dd,dd2,&lbound[0],&ubound[0],&stride[0],
3164 &lbound[1],&ubound[1],&stride[1],&rank);
3165 break;
3166
3167 case 3:
3168
3169 ENTFTN(SECT,sect) (dd,dd2,&lbound[0],&ubound[0],&stride[0],
3170 &lbound[1],&ubound[1],&stride[1],
3171 &lbound[2],&ubound[2],&stride[2],&rank);
3172 break;
3173
3174 case 4:
3175
3176 ENTFTN(SECT,sect) (dd,dd2,&lbound[0],&ubound[0],&stride[0],
3177 &lbound[1],&ubound[1],&stride[1],
3178 &lbound[2],&ubound[2],&stride[2],
3179 &lbound[3],&ubound[3],&stride[3],&rank);
3180 break;
3181
3182 case 5:
3183 ENTFTN(SECT,sect) (dd,dd2,&lbound[0],&lbound[0],&stride[0],
3184 &lbound[1],&ubound[1],&stride[1],
3185 &lbound[2],&ubound[2],&stride[2],
3186 &lbound[3],&ubound[3],&stride[3],
3187 &lbound[4],&ubound[4],&stride[4],&rank);
3188 break;
3189
3190 case 6:
3191
3192 ENTFTN(SECT,sect) (dd,dd2,&ubound[0],&lbound[0],&stride[0],
3193 &lbound[1],&ubound[1],&stride[1],
3194 &lbound[2],&ubound[2],&stride[2],
3195 &lbound[3],&ubound[3],&stride[3],
3196 &lbound[4],&ubound[4],&stride[4],
3197 &lbound[5],&ubound[5],&stride[5],&rank);
3198 break;
3199
3200 case 7:
3201
3202 ENTFTN(SECT,sect) (dd,dd2,&ubound[0],&lbound[0],&stride[0],
3203 &lbound[1],&ubound[1],&stride[1],
3204 &lbound[2],&ubound[2],&stride[2],
3205 &lbound[3],&ubound[3],&stride[3],
3206 &lbound[4],&ubound[4],&stride[4],
3207 &lbound[5],&ubound[5],&stride[5],
3208 &lbound[6],&ubound[6],&stride[6],&rank);
3209 break;
3210
3211 default:
3212
3213 __fort_abort("reverse_array: Sect Invalid rank (internal error)");
3214 }
3215
3216 /* copy ab to db */
3217
3218 s = (sked *)ENTFTN(COMM_COPY, comm_copy)(db, ab, dd, ad);
3219
3220 xfer = (void *)ENTFTN(COMM_START, comm_start)(&s, db, dd, (char *)ab, ad);
3221
3222 ENTFTN(COMM_FINISH, comm_finish)(xfer);
3223 }
3224