1 /*
2  * Copyright (c) 1995-2018, NVIDIA CORPORATION.  All rights reserved.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  *
16  */
17 
18 /* clang-format off */
19 
20 /* dist.c -- distribution management routines */
21 
22 #include "stdioInterf.h"
23 #include "fioMacros.h"
24 #include "scatter.h"
25 #include "fort_vars.h"
26 
27 void *ENTFTN(COMM_START, comm_start)(sked **skp, void *rb, F90_Desc *rd,
28                                      void *sb, F90_Desc *sd);
29 
30 sked *ENTFTN(COMM_COPY, comm_copy)(void *rb, void *sb, F90_Desc *rs,
31                                    F90_Desc *ss);
32 
33 static __INT_T dummyGenBlock = 0;
34 
I8(__fort_new_gen_block)35 __INT_T *I8(__fort_new_gen_block)(F90_Desc *d, int dim)
36 {
37   return &f90DummyGenBlock;
38 }
39 
I8(__fort_gen_block_bounds)40 void I8(__fort_gen_block_bounds)(F90_Desc *d, int dim, __INT_T *the_olb,
41                                  __INT_T *the_oub, __INT_T pcoord)
42 {
43 
44   /* calculate olb and oub for gen_block dimension */
45 
46   __INT_T i;
47   __INT_T olb, oub, dtStride;
48   __INT_T *gb, *tempGB;
49   __INT_T pshape;
50   __INT_T direction;
51   __INT_T dUbound;
52   __INT_T tExtent, aExtent;
53   DECL_DIM_PTRS(dd);
54 
55   SET_DIM_PTRS(dd, d, dim);
56 
57   /*pcoord = DIST_DPTR_PCOORD_G(dd);*/
58 
59   if (pcoord < 0) {
60     *the_oub = 0; /*force off processor grid status*/
61     *the_olb = 1;
62     return;
63   }
64 
65   dUbound = DPTR_UBOUND_G(dd);
66   dtStride = DIST_DPTR_TSTRIDE_G(dd);
67   pshape = DIST_DPTR_PSHAPE_G(dd);
68   direction = (dtStride < 0);
69 
70   tExtent = (DIST_DPTR_TUB_G(dd) - DIST_DPTR_TLB_G(dd)) + 1;
71   aExtent = (dUbound - F90_DPTR_LBOUND_G(dd)) + 1;
72 
73   if (tExtent != aExtent) {
74 
75     tempGB = I8(__fort_new_gen_block)(d, dim);
76     gb = tempGB;
77 
78   } else {
79 
80     tempGB = 0;
81     gb = DIST_DPTR_GEN_BLOCK_G(dd);
82   }
83 
84   if (gb[pcoord] == 0) {
85     oub = 0; /*force off processor grid status*/
86     olb = 1;
87   } else {
88     gb += ((!direction) ? 0 : (pshape - 1));
89     olb = F90_DPTR_LBOUND_G(dd);
90     oub = *gb + (olb - 1);
91     if (!direction)
92       i = 0;
93     else
94       i = (pshape - 1);
95     for (; i != pcoord;) {
96       olb += *gb;
97       if (!direction) {
98         ++gb;
99         ++i;
100       } else {
101         --gb;
102         --i;
103       }
104       oub += *gb;
105     }
106   }
107 
108   if (tempGB)
109     __fort_free(tempGB);
110 
111   *the_olb = olb;
112   *the_oub = oub;
113 
114 #if defined(DEBUG)
115 
116   if (olb < F90_DPTR_LBOUND_G(dd) || oub > dUbound) {
117 
118     __fort_abort("__fort_gen_block_bounds: bad gen_block");
119   }
120 
121 #endif
122 }
123 
124 /* given a divisor n which is a power of two, compute the (positive)
125    right shift amount equivalent to division by n.  return -1 if n is
126    not a power of two. */
127 
128 static int
div_shift(int n)129 div_shift(int n)
130 {
131   int z = 0;
132   int k = 4 * sizeof(int);
133   unsigned int m = n;
134   while (k) {
135     if (m >> k) {
136       m >>= k;
137       z += k;
138     }
139     k >>= 1;
140   }
141   if (n == 1 << z)
142     return z;
143   else
144     return -1; /* n not a power of 2 */
145 }
146 
147 /* T3E, T3D, (Cray)Sparc, and T90 w/IEEE have int_mult_upper
148    intrinsic; C90 has fast 64-bit multiply allowing emulation */
149 
150 #ifndef DESC_I8
151 
152 __INT_T
ENTRY(INT_MULT_UPPER,int_mult_upper)153 ENTRY(INT_MULT_UPPER, int_mult_upper)
154 (__INT_T *x, __INT_T *y)
155 {
156   register unsigned long a, b;
157   a = (unsigned long)*x;
158   b = (unsigned long)*y;
159   return (unsigned long long)a * (unsigned long long)b >> RECIP_FRACBITS;
160 }
161 
162 unsigned long
_int_mult_upper(int x,int y)163 _int_mult_upper(int x, int y)
164 {
165   register unsigned long a, b;
166   a = (unsigned long)x;
167   b = (unsigned long)y;
168   return (unsigned long long)a * (unsigned long long)b >> RECIP_FRACBITS;
169 }
170 
171 #endif
172 
173 /* greatest common divisor */
174 
175 #ifndef DESC_I8
176 int
__fort_gcd(int u,int v)177 __fort_gcd(int u, int v)
178 {
179   int k, m, n, t;
180 
181   if (u < 0)
182     u = -u; /* gcd(u,v) = gcd(-u,v) */
183   if (v == 0)
184     return u; /* gcd(u,0) = abs(u) */
185   if (v < 0)
186     v = -v;
187   if (u == 0)
188     return v;
189 
190   /* Knuth V.2, 4.5.2, Algorithm B (Binary gcd algorithm). */
191 
192   m = u | v;
193   m &= -m; /* == least significant bit of (u|v) */
194 
195   k = 0;
196   n = 4 * sizeof(int);
197   while (m != 1) {
198     t = m >> n;
199     if (t != 0) {
200       k += n;
201       m = t;
202     }
203     n >>= 1;
204   }
205 
206   u >>= k;
207   v >>= k;
208 
209   if (u & 1)
210     t = -v;
211   else
212     t = u;
213 
214   while (t != 0) {
215     while ((t & 1) == 0)
216       t /= 2;
217     if (t > 0)
218       u = t;
219     else
220       v = -t;
221     t = u - v;
222   }
223 
224   return (u << k);
225 }
226 
227 /* least common multiple */
228 
229 int
__fort_lcm(int u,int v)230 __fort_lcm(int u, int v)
231 {
232   int p;
233 
234   p = u * v;
235   return (p == 0 ? 0 : (p > 0 ? p : -p) / __fort_gcd(u, v));
236 }
237 #endif
238 
I8(__fort_owner)239 int I8(__fort_owner)(F90_Desc *d, __INT_T *idxv)
240 {
241   return 0;
242 }
243 
244 /* Given indices of an array element, return the processor number of
245    its nearest owner.  The nearest owner is the one whose processor
246    coordinate in each replicated dimension is equal to this
247    processor's coordinate. */
248 
249 __INT_T
ENTFTN(OWNER,owner)250 ENTFTN(OWNER, owner)
251 (F90_Desc *d, ...)
252 {
253   va_list va;
254   proc *p;
255   procdim *pd;
256   __INT_T dx, idxv[MAXDIMS], owner, px, repl;
257 
258 #if defined(DEBUG)
259   if (d == NULL || F90_TAG_G(d) != __DESC)
260     __fort_abort("OWNER: invalid descriptor");
261 #endif
262   va_start(va, d);
263   for (dx = 0; dx < F90_RANK_G(d); ++dx)
264     idxv[dx] = *va_arg(va, __INT_T *);
265   va_end(va);
266 
267   owner = I8(__fort_owner)(d, idxv);
268 
269   /* add the "scalar subscript" component of the processor number
270      (which was subtracted in __fort_owner!) */
271 
272   p = DIST_DIST_TARGET_G(d);
273   repl = DIST_REPLICATED_G(d);
274   for (px = 0; repl != 0; repl >>= 1, ++px) {
275     if (repl & 1) {
276       pd = &p->dim[px];
277       if (pd->coord > 0)
278         owner += pd->stride * pd->coord;
279     }
280   }
281 
282   return owner;
283 }
284 
285 /* Construct a description of the replication of an object over the
286    processor grid.  Replication occurs over the subset of processor
287    grid dimensions onto which no array or template dimensions have
288    been mapped.  This divides the processor grid into sets of
289    processors having identical copies of mapped portions of the array
290    or template. */
291 
I8(__fort_describe_replication)292 void I8(__fort_describe_replication)(F90_Desc *d, repl_t *r)
293 {
294   DECL_DIM_PTRS(dd);
295   DECL_HDR_PTRS(t);
296   DECL_DIM_PTRS(td);
297   proc *p;
298   procdim *pd;
299   __INT_T grpi, dx, m, ncopies, ndim, ngrp, plow, px;
300 
301   ngrp = 1;
302   grpi = 0;
303   plow = DIST_PBASE_G(d);
304   m = DIST_MAPPED_G(d);
305   for (dx = 0; dx < F90_RANK_G(d); m >>= 1, ++dx) {
306     if (m & 1) {
307       SET_DIM_PTRS(dd, d, dx);
308       if (DIST_DPTR_PCOORD_G(dd) > 0) {
309         grpi += DIST_DPTR_PCOORD_G(dd) * ngrp;
310         plow += DIST_DPTR_PCOORD_G(dd) * DIST_DPTR_PSTRIDE_G(dd);
311       }
312       r->gstr[dx] = ngrp; /* replication group multiplier */
313       ngrp *= DIST_DPTR_PSHAPE_G(dd);
314     } else
315       r->gstr[dx] = 0;
316   }
317   r->grpi = grpi; /* my replication group index */
318   r->ngrp = ngrp; /* number of replication groups */
319 
320   p = DIST_DIST_TARGET_G(d);
321   ndim = 0;
322   ncopies = 1;
323   m = DIST_REPLICATED_G(d);
324   for (px = 0; m != 0; m >>= 1, ++px) {
325     pd = &p->dim[px];
326     if (m & 1 && pd->shape > 1) {
327 
328       if (pd->coord > 0)
329         plow -= pd->coord * pd->stride;
330 
331       r->pcnt[ndim] = pd->shape;  /* processor counts */
332       r->pstr[ndim] = pd->stride; /* processor strides */
333       ncopies *= pd->shape;
334       ++ndim;
335     }
336   }
337   r->ncopies = ncopies; /* number of replicated copies */
338   r->ndim = ndim;       /* number of replicated proc dims */
339   r->plow = plow;       /* my replication group low proc number */
340 }
341 
342 /* Iterate over all processors owning a copy of the first element of
343    section d.  pcoord is an integer vector of size MAXDIMS which saves the
344    processor coordinates in the replication group. */
345 
I8(__fort_next_owner)346 int I8(__fort_next_owner)(F90_Desc *d, repl_t *r, int *pcoord, int owner)
347 {
348   int i;
349 
350   for (i = 0; i < r->ndim; ++i) {
351     pcoord[i]++;
352     owner += r->pstr[i];
353     if (pcoord[i] < r->pcnt[i])
354       return owner; /* keep going */
355     pcoord[i] = 0;
356     owner -= r->pcnt[i] * r->pstr[i];
357   }
358   return -1; /* finished */
359 }
360 
361 /* Determine whether or not a global index in dimension dim of an
362    array is local to this processor. */
363 
364 __LOG_T
ENTFTN(ISLOCAL_IDX,islocal_idx)365 ENTFTN(ISLOCAL_IDX, islocal_idx)
366 (F90_Desc *d, __INT_T *dimb, __INT_T *idxb)
367 {
368   DECL_DIM_PTRS(dd);
369   __INT_T dfmt, dim, idx, pcoord, tidx;
370 
371   dim = *dimb;
372   idx = *idxb;
373 #if defined(DEBUG)
374   if (d == NULL || F90_TAG_G(d) != __DESC)
375     __fort_abort("ISLOCAL_IDX: invalid descriptor");
376   if (dim < 1 || dim > F90_RANK_G(d))
377     __fort_abort("ISLOCAL_IDX: invalid dimension");
378 #endif
379   SET_DIM_PTRS(dd, d, dim - 1);
380   return GET_DIST_TRUE_LOG;
381 }
382 
383 /* Determine whether or not the element of section d referenced by the
384    indices is local to this processor. */
385 
I8(__fort_islocal)386 int I8(__fort_islocal)(F90_Desc *d, __INT_T *idxv)
387 {
388   DECL_DIM_PTRS(dd);
389   __INT_T dfmt, dx, pcoord, tidx;
390 
391   if (F90_FLAGS_G(d) & __OFF_TEMPLATE)
392     return 0;
393   for (dx = 0, dfmt = DIST_DFMT_G(d); dfmt != 0; dfmt >>= DFMT__WIDTH, ++dx) {
394     SET_DIM_PTRS(dd, d, dx);
395   }
396   return 1;
397 }
398 
399 /* Determine whether or not the array element referenced by the
400    indices is local to this processor. */
401 
402 __LOG_T
ENTFTN(ISLOCAL,islocal)403 ENTFTN(ISLOCAL, islocal)
404 (F90_Desc *d, ...)
405 {
406   va_list va;
407   DECL_DIM_PTRS(dd);
408   __INT_T dx, idxv[MAXDIMS], pcoord, tidx;
409 
410 #if defined(DEBUG)
411   if (d == NULL || F90_TAG_G(d) != __DESC)
412     __fort_abort("ISLOCAL: invalid descriptor");
413 #endif
414   va_start(va, d);
415   for (dx = 0; dx < F90_RANK_G(d); ++dx)
416     idxv[dx] = *va_arg(va, __INT_T *);
417   va_end(va);
418   return I8(__fort_islocal)(d, idxv) ? GET_DIST_TRUE_LOG : 0;
419 }
420 
421 /* Determine the prime owner processor number and the local array
422    offset for the element of section d identified by the index
423    vector. */
424 
I8(__fort_localize)425 void I8(__fort_localize)(F90_Desc *d, __INT_T *idxv, int *cpu, __INT_T *off)
426 {
427   DECL_DIM_PTRS(dd);
428   proc *p;
429   procdim *pd;
430   __INT_T dfmt, dx, lab, lidx, lof, offset, owner, pcoord, px, repl, tidx;
431 
432   owner = DIST_PBASE_G(d);
433   offset = 0;
434   for (dx = 0, dfmt = DIST_DFMT_G(d); dx < F90_RANK_G(d);
435        dfmt >>= DFMT__WIDTH, ++dx) {
436     SET_DIM_PTRS(dd, d, dx);
437 #if defined(DEBUG)
438     if (idxv[dx] < F90_DPTR_LBOUND_G(dd) || idxv[dx] > DPTR_UBOUND_G(dd)) {
439       printf("%d localize: index %d out of bounds %d:%d rank=%d\n",
440              GET_DIST_LCPU, idxv[dx], F90_DPTR_LBOUND_G(dd), DPTR_UBOUND_G(dd),
441              F90_RANK_G(d));
442       __fort_abort((char *)0);
443     }
444 #endif
445     lidx = F90_DPTR_SSTRIDE_G(dd) * idxv[dx] + F90_DPTR_SOFFSET_G(dd);
446 
447     switch (dfmt & DFMT__MASK) {
448     case DFMT_COLLAPSED:
449       offset += F90_DPTR_LSTRIDE_G(dd) * (lidx - DIST_DPTR_LAB_G(dd));
450       continue;
451 
452     default:
453       __fort_abort("localize: unsupported dist-format");
454     }
455 
456     /* find remote processor's lower allocated bound */
457 
458     /*
459      * We calculated lab in the gen_block case ...
460      */
461 
462     if (!DIST_DPTR_GEN_BLOCK_G(dd)) {
463       lab = DIST_DPTR_CLB_G(dd) + pcoord * DIST_DPTR_BLOCK_G(dd) -
464             DIST_DPTR_AOFFSET_G(dd);
465 
466       if (DIST_DPTR_ASTRIDE_G(dd) != 1) {
467         if (DIST_DPTR_ASTRIDE_G(dd) < 0)
468           lab += DIST_DPTR_BLOCK_G(dd) - DIST_DPTR_CYCLE_G(dd);
469         if (DIST_DPTR_ASTRIDE_G(dd) == -1)
470           lab = -lab;
471         else
472           lab = Ceil(lab, DIST_DPTR_ASTRIDE_G(dd));
473       }
474     }
475 
476     lab -= DIST_DPTR_NO_G(dd);
477 
478     /* accumulate processor number and offset */
479 
480     owner += DIST_DPTR_PSTRIDE_G(dd) * pcoord;
481     offset += F90_DPTR_LSTRIDE_G(dd) * (lidx - lab);
482   }
483 
484   /* compensate for the "scalar subscript" component of the
485      processor number in pbase. The difference really ought to be
486      kept in a separate descriptor item. */
487 
488   p = DIST_DIST_TARGET_G(d);
489   repl = DIST_REPLICATED_G(d);
490   for (px = 0; repl != 0; repl >>= 1, ++px) {
491     if (repl & 1) {
492       pd = &p->dim[px];
493       if (pd->coord > 0)
494         owner -= pd->stride * pd->coord;
495     }
496   }
497 
498   *cpu = owner;
499   *off = offset;
500 
501 #if defined(DEBUG)
502   if (__fort_test & DEBUG_DIST) {
503     printf("%d localize: cpu=%d off=%d + lstride=%d * (lidx=%d - lab=%d)\n",
504            GET_DIST_LCPU, owner, offset, F90_DPTR_LSTRIDE_G(dd), lidx, lab);
505   }
506 #endif
507 }
508 
509 /* localize an index in dimension dim of the array described by d */
510 
ENTFTN(LOCALIZE_DIM,localize_dim)511 void ENTFTN(LOCALIZE_DIM, localize_dim)(F90_Desc *d, __INT_T *dimp,
512                                         __INT_T *idxp, __INT_T *pcoordp,
513                                         __INT_T *lindexp)
514 {
515   DECL_DIM_PTRS(dd);
516   __INT_T dim, idx, lab, lidx, lof, pcoord, tidx;
517 
518   dim = *dimp;
519   idx = *idxp;
520 
521   SET_DIM_PTRS(dd, d, dim - 1);
522 #if defined(DEBUG)
523   if (idx < F90_DPTR_LBOUND_G(dd) || idx > DPTR_UBOUND_G(dd))
524     __fort_abort("LOCALIZE_DIM: index out of bounds");
525 #endif
526 
527   lidx = F90_DPTR_SSTRIDE_G(dd) * idx + F90_DPTR_SOFFSET_G(dd);
528 
529   switch (DFMT(d, dim)) {
530   case DFMT_COLLAPSED:
531     *pcoordp = 0;
532     *lindexp = lidx;
533     return;
534 
535   default:
536     __fort_abort("LOCALIZE_DIM: unsupported dist-format");
537   }
538 
539   /* remote proc's lab calculated for gen_block above */
540 
541   if (!DIST_DPTR_GEN_BLOCK_G(dd)) {
542 
543     /* find remote processor's lower allocated bound */
544 
545     lab = DIST_DPTR_CLB_G(dd) + pcoord * DIST_DPTR_BLOCK_G(dd) -
546           DIST_DPTR_AOFFSET_G(dd);
547     if (DIST_DPTR_ASTRIDE_G(dd) != 1) {
548       if (DIST_DPTR_ASTRIDE_G(dd) < 0)
549         lab += DIST_DPTR_BLOCK_G(dd) - DIST_DPTR_CYCLE_G(dd);
550       if (DIST_DPTR_ASTRIDE_G(dd) == -1)
551         lab = -lab;
552       else
553         lab = Ceil(lab, DIST_DPTR_ASTRIDE_G(dd));
554     }
555   }
556   lab -= DIST_DPTR_NO_G(dd);
557 
558   /* return the remote processor's coordinate and the local index on
559      this processor with the same element offset */
560 
561   *pcoordp = pcoord;
562   *lindexp = lidx - lab + DIST_DPTR_LAB_G(dd);
563 }
564 
565 /* Given the indices of an element of section d, return its local
566    offset or -1 if it is not local.  The offset does not reflect any
567    scalar subscript. */
568 
569 __INT_T
I8(__fort_local_offset)570 I8(__fort_local_offset)(F90_Desc *d, __INT_T *idxv)
571 {
572   DECL_DIM_PTRS(dd);
573   __INT_T dx, idx, lidx, offset;
574 
575   if (F90_FLAGS_G(d) & __OFF_TEMPLATE)
576     return -1;
577 
578   offset = F90_LBASE_G(d) - 1;
579 
580   if (F90_FLAGS_G(d) & __SEQUENCE) {
581     for (dx = F90_RANK_G(d); --dx >= 0;) {
582       SET_DIM_PTRS(dd, d, dx);
583       lidx = F90_DPTR_SSTRIDE_G(dd) * idxv[dx] + F90_DPTR_SOFFSET_G(dd);
584       offset += F90_DPTR_LSTRIDE_G(dd) * lidx;
585     }
586     return offset;
587   }
588 
589   for (dx = 0; dx < F90_RANK_G(d); ++dx) {
590     SET_DIM_PTRS(dd, d, dx);
591     idx = idxv[dx];
592 #if defined(DEBUG)
593     if (idx < F90_DPTR_LBOUND_G(dd) || idx > DPTR_UBOUND_G(dd))
594       __fort_abort("local_offset: index out of bounds");
595 #endif
596     lidx = F90_DPTR_SSTRIDE_G(dd) * idx + F90_DPTR_SOFFSET_G(dd);
597     offset += F90_DPTR_LSTRIDE_G(dd) * lidx;
598   }
599 #if defined(DEBUG)
600   if (__fort_test & DEBUG_DIST) {
601     printf("%d local_offset: offset=%d + lstride=%d * lidx=%d\n",
602            GET_DIST_LCPU, offset, F90_DPTR_LSTRIDE_G(dd), lidx);
603   }
604 #endif
605   return offset;
606 }
607 
608 /* Given the indices of an element of section d, return its local
609    address or NULL if it is not local.  The base address is assumed to
610    be adjusted for scalar subscripts. */
611 
I8(__fort_local_address)612 void *I8(__fort_local_address)(void *base, F90_Desc *d, __INT_T *idxv)
613 {
614   DECL_DIM_PTRS(dd);
615   __INT_T dfmt, dx, idx, lidx, lof, offset, pcoord, tidx;
616 
617   if (F90_FLAGS_G(d) & __OFF_TEMPLATE)
618     return NULL;
619 
620   offset = F90_LBASE_G(d) - 1 - DIST_SCOFF_G(d);
621 
622   if (F90_FLAGS_G(d) & __SEQUENCE) {
623     for (dx = F90_RANK_G(d); --dx >= 0;) {
624       SET_DIM_PTRS(dd, d, dx);
625       idx = idxv[dx];
626       lidx = F90_DPTR_SSTRIDE_G(dd) * idx + F90_DPTR_SOFFSET_G(dd);
627       offset += F90_DPTR_LSTRIDE_G(dd) * lidx;
628     }
629     return (char *)base + offset * F90_LEN_G(d);
630   }
631 
632   for (dx = 0, dfmt = DIST_DFMT_G(d); dx < F90_RANK_G(d);
633        dfmt >>= DFMT__WIDTH, ++dx) {
634     SET_DIM_PTRS(dd, d, dx);
635     idx = idxv[dx];
636 #if defined(DEBUG)
637     if (DPTR_UBOUND_G(dd) < 0)
638       __fort_abort("local_address: index out of bounds");
639 #endif
640     lidx = F90_DPTR_SSTRIDE_G(dd) * idx + F90_DPTR_SOFFSET_G(dd);
641 
642     switch (dfmt & DFMT__MASK) {
643     case DFMT_COLLAPSED:
644       break;
645     case DFMT_BLOCK:
646     case DFMT_BLOCK_K:
647     case DFMT_GEN_BLOCK:
648       if (idx < DIST_DPTR_OLB_G(dd) || idx > DIST_DPTR_OUB_G(dd))
649         return NULL;
650       break;
651 
652     default:
653       __fort_abort("local_offset: unsupported dist-format");
654     }
655     offset += F90_DPTR_LSTRIDE_G(dd) * lidx;
656   }
657   return (char *)base + offset * F90_LEN_G(d);
658 }
659 
660 /* Localize a global index in dimension dim of array a.  This is only
661    necessary for dimensions with cyclic or block-cyclic distributions.
662    It is assumed that the index is local */
663 
664 __INT_T
ENTFTN(LOCALIZE_INDEX,localize_index)665 ENTFTN(LOCALIZE_INDEX, localize_index)
666 (F90_Desc *d, __INT_T *dimb, __INT_T *idxb)
667 {
668   DECL_DIM_PTRS(dd);
669   int dim, idx, lidx, lof;
670 
671   dim = *dimb;
672   idx = *idxb;
673 #if defined(DEBUG)
674   if (d == NULL || F90_TAG_G(d) != __DESC)
675     __fort_abort("LOCALIZE_INDEX: invalid descriptor");
676   if (dim < 1 || dim > F90_RANK_G(d))
677     __fort_abort("LOCALIZE_INDEX: invalid dimension");
678 #endif
679   SET_DIM_PTRS(dd, d, dim - 1);
680 #if defined(DEBUG)
681   if (idx < F90_DPTR_LBOUND_G(dd) || idx > DPTR_UBOUND_G(dd))
682     __fort_abort("LOCALIZE_INDEX: index out of bounds");
683 #endif
684   lidx = F90_DPTR_SSTRIDE_G(dd) * idx + F90_DPTR_SOFFSET_G(dd);
685   return lidx;
686 }
687 
688 /* Given the loop bounds l, u, and s that range over dimension dim of
689    section d, return cycle loop lower bound cl, iteration count cn,
690    stride cs, initial cyclic offset lof, and cyclic offset stride los
691    that localize the loop to this processor. */
692 
I8(cyclic_setup)693 static int I8(cyclic_setup)(F90_Desc *d, __INT_T dim, __INT_T l, __INT_T u,
694                             __INT_T s, __INT_T *pcl, __INT_T *pcs,
695                             __INT_T *plof, __INT_T *plos)
696 {
697   DECL_DIM_PTRS(dd);
698   __INT_T abstr, ck, cl, cn, cs, cs0, cu, lof, los, los0, m, n, q, r, tl, tu,
699       ts, ts0, x;
700 
701   SET_DIM_PTRS(dd, d, dim - 1);
702 
703   /* adjust lower bound to fall within array index range */
704 
705   if (s > 0) {
706     n = F90_DPTR_LBOUND_G(dd) - l + s - 1;
707     if (n > 0) {
708       if (s != 1)
709         n /= s;
710       l += n * s;
711     }
712   } else {
713     n = DPTR_UBOUND_G(dd) - l + s + 1;
714     if (n < 0) {
715       if (s == -1)
716         n = -n;
717       else
718         n /= s;
719       l += n * s;
720     }
721   }
722 
723   ts = DIST_DPTR_TSTRIDE_G(dd) * s; /* stride in ultimate template */
724 
725   cs = (ts < 0) ? -DIST_DPTR_CYCLE_G(dd)
726                 : DIST_DPTR_CYCLE_G(dd); /* cycle stride */
727 
728   /* check for zero-trip loop, no local data, or not cyclic */
729 
730   if (s > 0 ? (l > u || l > DIST_DPTR_OUB_G(dd) || u < DIST_DPTR_OLB_G(dd))
731             : (l < u || l < DIST_DPTR_OLB_G(dd) || u > DIST_DPTR_OUB_G(dd))) {
732 
733     /* no local data or zero-trip loop */
734 
735     cl = DIST_DPTR_CLB_G(dd);
736     cu = cl - cs;
737     cn = lof = los = 0;
738   } else
739     switch (DFMT(d, dim)) {
740 
741     default:
742       __fort_abort("cyclic_setup: unsupported dist-format");
743     }
744 #if defined(DEBUG)
745   if (__fort_test & DEBUG_DIST)
746     printf("%d cyclic dim=%d %d:%d:%d -> %d:%d:%d cn=%d lof=%d los=%d\n",
747            GET_DIST_LCPU, dim, l, u, s, cl, cu, cs, cn, lof, los);
748 #endif
749   *pcl = cl;
750   *pcs = cs;
751   *plof = lof;
752   *plos = los;
753   return cn;
754 }
755 
756 /* Cache the cycle loop bounds in the section descriptor. The cached
757    parameters describe loops over the entire section. */
758 
I8(__fort_cycle_bounds)759 void I8(__fort_cycle_bounds)(F90_Desc *d)
760 {
761   DECL_DIM_PTRS(dd);
762   __INT_T dim;
763 
764   for (dim = F90_RANK_G(d); dim > 0; --dim) {
765     if ((~DIST_CACHED_G(d) >> (dim - 1)) & 1) {
766       SET_DIM_PTRS(dd, d, dim - 1);
767       DIST_DPTR_CN_P(
768           dd, I8(cyclic_setup)(d, dim, F90_DPTR_LBOUND_G(dd), DPTR_UBOUND_G(dd),
769                                1, &DIST_DPTR_CL_G(dd), &DIST_DPTR_CS_G(dd),
770                                &DIST_DPTR_CLOF_G(dd), &DIST_DPTR_CLOS_G(dd)));
771     }
772   }
773   DIST_CACHED_P(d, (DIST_CACHED_G(d) | ~(-1 << F90_RANK_G(d))));
774 }
775 
776 /* Set the lower and upper cycle loop bounds, cycle loop stride,
777    cyclic index offset, and cyclic offset stride and return the cycle
778    loop trip count (number of local blocks) for the loop specified by
779    l:u:s over dimension dim of section d.  For a stride 1 loop over
780    the entire dimension, the cycle loop parameters are cached in the
781    descriptor. */
782 
783 __INT_T
I8(__fort_cyclic_loop)784 I8(__fort_cyclic_loop)(F90_Desc *d, __INT_T dim, __INT_T l, __INT_T u,
785                       __INT_T s, __INT_T *cl, __INT_T *cu, __INT_T *cs,
786                       __INT_T *clof, __INT_T *clos)
787 {
788   DECL_DIM_PTRS(dd);
789   __INT_T cn, m;
790 
791 #if defined(DEBUG)
792   if (d == NULL || F90_TAG_G(d) != __DESC)
793     __fort_abort("cyclic_loop: invalid descriptor");
794   if (dim < 1 || dim > F90_RANK_G(d))
795     __fort_abort("cyclic_loop: invalid dimension");
796   if (s == 0)
797     __fort_abort("cyclic_loop: invalid stride");
798 #endif
799 
800   SET_DIM_PTRS(dd, d, dim - 1);
801 
802   if (l == F90_DPTR_LBOUND_G(dd) && u == DPTR_UBOUND_G(dd) && s == 1) {
803 
804     /* loop bounds match section bounds */
805 
806     m = 1 << (dim - 1);
807     if (~DIST_CACHED_G(d) & m) {
808 
809       /* not cached - store cycle loop bounds in descriptor */
810 
811       DIST_DPTR_CN_P(dd,
812                     I8(cyclic_setup)(d, dim, l, u, s, &DIST_DPTR_CL_G(dd),
813                                      &DIST_DPTR_CS_G(dd), &DIST_DPTR_CLOF_G(dd),
814                                      &DIST_DPTR_CLOS_G(dd)));
815       DIST_CACHED_P(d, DIST_CACHED_G(d) | m);
816     }
817 
818     /* return previously cached cycle loop bounds */
819 
820     *cl = DIST_DPTR_CL_G(dd);
821     *cs = DIST_DPTR_CS_G(dd);
822     *clof = DIST_DPTR_CLOF_G(dd);
823     *clos = DIST_DPTR_CLOS_G(dd);
824 
825     cn = DIST_DPTR_CN_G(dd);
826   } else {
827 
828     /* loop bounds don't match section bounds */
829 
830     cn = I8(cyclic_setup)(d, dim, l, u, s, cl, cs, clof, clos);
831   }
832 
833   *cu = *cl + (cn - 1) * (*cs);
834   return cn;
835 }
836 
ENTFTN(CYCLIC_LOOP,cyclic_loop)837 void ENTFTN(CYCLIC_LOOP, cyclic_loop)(F90_Desc *d, __INT_T *dim, __INT_T *l,
838                                       __INT_T *u, __INT_T *s, __INT_T *cl,
839                                       __INT_T *cu, __INT_T *cs, __INT_T *clof,
840                                       __INT_T *clos)
841 {
842   __INT_T xcl, xcu, xcs, xclof, xclos;
843 
844   (void)I8(__fort_cyclic_loop)(d, *dim, *l, *u, *s, &xcl, &xcu, &xcs, &xclof,
845                                &xclos);
846   *cl = xcl;
847   *cu = xcu;
848   *cs = xcs;
849   *clof = xclof;
850   *clos = xclos;
851 }
852 
853 /* Given loop bounds l:u:s over dimension dim of array d (not
854    necessarily spanning the entire dimension), localize the loop to
855    the local block specified by cycle index ci (which must increment
856    through the outer cycle loop in order to cover all elements in the
857    l:u:s section). Set block loop bounds bl, bu. */
858 
I8(block_setup)859 void I8(block_setup)(F90_Desc *d, int dim, __INT_T l, __INT_T u, int s, int ci,
860                      __INT_T *bl, __INT_T *bu)
861 {
862   DECL_DIM_PTRS(dd);
863   __INT_T bb, lob, uob, m, n;
864 #if defined(DEBUG)
865   __INT_T gl = l;
866   __INT_T gu = u;
867 
868   if (d == NULL || F90_TAG_G(d) != __DESC)
869     __fort_abort("block_setup: invalid descriptor");
870   if (dim < 1 || dim > F90_RANK_G(d))
871     __fort_abort("block_setup: invalid dimension");
872   if (s == 0)
873     __fort_abort("block_setup: invalid stride");
874 #endif
875 
876   SET_DIM_PTRS(dd, d, dim - 1);
877 
878   /* adjust lower bound to fall within array index range */
879 
880   m = s > 0 ? F90_DPTR_LBOUND_G(dd) - 1 : DPTR_UBOUND_G(dd) + 1;
881   n = m - l + s;
882   if (s != 1)
883     n /= s;
884   if (n < 0)
885     n = 0;
886   l += n * s;
887 
888   switch (DFMT(d, dim)) {
889   case DFMT_COLLAPSED:
890   case DFMT_BLOCK:
891   case DFMT_BLOCK_K:
892   case DFMT_GEN_BLOCK:
893     lob = DIST_DPTR_OLB_G(dd);
894     uob = DIST_DPTR_OUB_G(dd);
895     break;
896 
897   case DFMT_CYCLIC:
898   case DFMT_CYCLIC_K:
899     m = DIST_DPTR_TSTRIDE_G(dd);
900     bb = DIST_DPTR_BLOCK_G(dd) - 1;
901     if ((m ^ s) < 0)
902       bb = -bb;
903     lob = uob = ci - DIST_DPTR_TOFFSET_G(dd);
904     if (s > 0)
905       uob += bb;
906     else
907       lob += bb;
908     if (m != 1) {
909       lob = Ceil(lob, m);
910       uob = Floor(uob, m);
911     }
912     break;
913 
914   default:
915     __fort_abort("block_setup: unsupported dist-format");
916   }
917 
918   if (s > 0) {
919     if (l < lob) {
920       if (s != 1)
921         l += s * ((lob - l + s - 1) / s);
922       else
923         l = lob;
924     }
925     if (u > uob)
926       u = uob;
927   } else {
928     if (l > uob) {
929       if (s != -1)
930         l += s * ((uob - l + s + 1) / s);
931       else
932         l = uob;
933     }
934     if (u < lob)
935       u = lob;
936   }
937 #if defined(DEBUG)
938   if (__fort_test & DEBUG_DIST)
939     printf("%d block dim=%d %d:%d:%d ci=%d -> %d:%d:%d\n", GET_DIST_LCPU, dim,
940            gl, gu, s, ci, l, u, s);
941 #endif
942   *bl = l;
943   *bu = u;
944 }
945 
ENTFTN(BLOCK_LOOP,block_loop)946 void ENTFTN(BLOCK_LOOP, block_loop)(F90_Desc *d, __INT_T *dim, __INT_T *l,
947                                     __INT_T *u, __INT_T *s, __INT_T *ci,
948                                     __INT_T *bl, __INT_T *bu)
949 {
950   __INT_T xbl, xbu;
951 
952   I8(block_setup)(d, *dim, *l, *u, *s, *ci, &xbl, &xbu);
953   *bl = xbl;
954   *bu = xbu;
955 }
956 
957 /* Same as block_setup, but return loop trip count. */
958 
I8(__fort_block_loop)959 int I8(__fort_block_loop)(F90_Desc *d, int dim, __INT_T l, __INT_T u, int s,
960                           __INT_T ci, __INT_T *bl, __INT_T *bu)
961 {
962   int bn;
963 
964   I8(block_setup)(d, dim, l, u, s, ci, bl, bu);
965   bn = (*bu - *bl + s) / s;
966 
967   return bn;
968 }
969 
970 /* Set the lower and upper index bounds and return the number of local
971    elements for the local block at cycle index ci in dimension dim of
972    section d. Same as block_setup, but assumes a stride 1 loop over
973    the entire dimension and returns the loop trip count. */
974 
975 __INT_T
I8(__fort_block_bounds)976 I8(__fort_block_bounds)(F90_Desc *d, __INT_T dim, __INT_T ci,
977                        __INT_T *bl, __INT_T *bu)
978 {
979   DECL_DIM_PTRS(dd);
980 
981   SET_DIM_PTRS(dd, d, dim - 1);
982   I8(block_setup)(d, dim, F90_DPTR_LBOUND_G(dd), DPTR_UBOUND_G(dd),
983                     1, ci, bl, bu);
984   return *bu - *bl + 1;
985 }
986 
987 /* Given loop bounds and stride that range over dimension dim of array
988    a which does NOT have a cyclic distribution, return new loop bounds
989    bl, bu that localize the bounds to this processor. */
990 
ENTFTN(LOCALIZE_BOUNDS,localize_bounds)991 void ENTFTN(LOCALIZE_BOUNDS,
992             localize_bounds)(F90_Desc *d, __INT_T *gdim, __INT_T *gl,
993                              __INT_T *gu, __INT_T *gs, __INT_T *bl, __INT_T *bu)
994 {
995   DECL_DIM_PTRS(dd);
996   int dim, l, u, s, m, n, lob, uob;
997 
998   dim = *gdim;
999   l = *gl;
1000   u = *gu;
1001   s = *gs;
1002 
1003 #if defined(DEBUG)
1004   if (d == NULL || F90_TAG_G(d) != __DESC)
1005     __fort_abort("LOCALIZE_BOUNDS: invalid descriptor");
1006   if (dim < 1 || dim > F90_RANK_G(d))
1007     __fort_abort("LOCALIZE_BOUNDS: invalid dimension");
1008   if (s == 0)
1009     __fort_abort("LOCALIZE_BOUNDS: invalid stride");
1010 #endif
1011 
1012   SET_DIM_PTRS(dd, d, dim - 1);
1013 
1014   /* adjust lower bound to fall within array index range */
1015 
1016   m = s > 0 ? F90_DPTR_LBOUND_G(dd) - 1 : DPTR_UBOUND_G(dd) + 1;
1017   n = m - l + s;
1018   if (s != 1)
1019     n /= s;
1020   if (n < 0)
1021     n = 0;
1022   l += n * s;
1023 
1024   lob = DIST_DPTR_OLB_G(dd);
1025   uob = DIST_DPTR_OUB_G(dd);
1026   if (s == 1) {
1027     if (l < lob)
1028       l = lob;
1029     if (u > uob)
1030       u = uob;
1031   } else if (s > 0) {
1032     if (l < lob)
1033       l += s * ((lob - l + s - 1) / s);
1034     if (u > uob)
1035       u = uob;
1036   } else {
1037     if (l > uob) {
1038       if (s != -1)
1039         l += s * ((uob - l + s + 1) / s);
1040       else
1041         l = uob;
1042     }
1043     if (u < lob)
1044       u = lob;
1045   }
1046   *bl = l;
1047   *bu = u;
1048 }
1049 
1050 /* Create a new processor descriptor */
1051 
1052 static void
proc_setup(proc * p)1053 proc_setup(proc *p)
1054 {
1055   procdim *pd;
1056   int i, m, size;
1057   char msg[80];
1058 
1059   size = 1;
1060   for (i = 0; i < p->rank; ++i) {
1061     pd = &p->dim[i];
1062     pd->shape_shift = div_shift(pd->shape);
1063     pd->shape_recip = RECIP(pd->shape);
1064     pd->stride = size;
1065     size *= pd->shape;
1066   }
1067   p->size = size;
1068   if (p->base + size > GET_DIST_TCPUS) {
1069     sprintf(msg, "Too few processors.  Need %d, got %d.", p->base + size,
1070             GET_DIST_TCPUS);
1071     __fort_abort(msg);
1072   }
1073   m = GET_DIST_LCPU - p->base;
1074   if (m >= 0 && m < size) {
1075     for (i = 0; i < p->rank; ++i) {
1076       pd = &p->dim[i];
1077       RECIP_DIVMOD(&m, &pd->coord, m, pd->shape);
1078     }
1079   } else {
1080     for (i = 0; i < p->rank; ++i) {
1081       pd = &p->dim[i];
1082       pd->coord = -1;
1083     }
1084     p->flags |= __OFF_TEMPLATE;
1085   }
1086 }
1087 
ENTFTN(PROCESSORS,processors)1088 void ENTFTN(PROCESSORS, processors)(proc *p, __INT_T *rankp, ...)
1089 {
1090   va_list va;
1091   procdim *pd;
1092   __INT_T i, rank, shape[MAXDIMS];
1093 
1094   rank = *rankp;
1095 #if defined(DEBUG)
1096   if (rank < 0 || rank > MAXDIMS)
1097     __fort_abort("PROCESSORS: invalid rank");
1098 #endif
1099   p->tag = __PROC;
1100   p->rank = rank;
1101   p->flags = 0;
1102   p->base = 0;
1103   va_start(va, rankp);
1104   for (i = 0; i < rank; ++i) {
1105     pd = &p->dim[i];
1106     pd->shape = *va_arg(va, __INT_T *);
1107     if (pd->shape < 1)
1108       __fort_abort("PROCESSORS: invalid shape");
1109   }
1110   va_end(va);
1111   proc_setup(p);
1112 }
1113 
1114 /* Create a default processor grid of given rank.  Factor the number
1115    of processors into the squarest possible set of rank terms, in
1116    ascending order.  Default processor descriptors are cached by
1117    rank and never need to be freed. */
1118 
1119 #if !defined(DESC_I8)
1120 
1121 static proc *default_proc_list[MAXDIMS + 1] = {NULL};
1122 
1123 #define NPRIMES 31
1124 static int prime[NPRIMES] = {2,  3,  5,  7,   11,  13,  17,  19,  23, 29, 31,
1125                              37, 41, 43, 47,  53,  59,  61,  67,  71, 73, 79,
1126                              83, 89, 97, 101, 103, 107, 109, 113, 127};
1127 
1128 proc *
__fort_defaultproc(int rank)1129 __fort_defaultproc(int rank)
1130 {
1131   proc *p;
1132   int i, k, np, power[NPRIMES], shape[MAXDIMS];
1133 
1134   if (rank < 0 || rank > MAXDIMS)
1135     __fort_abort("DEFAULTPROC: invalid processor rank");
1136 
1137   if (rank == 0)
1138     rank = 1; /* substitute rank 1 for rank 0 */
1139 
1140   p = default_proc_list[rank];
1141   if (p != NULL)
1142     return p;
1143 
1144   for (i = 0; i < rank; ++i)
1145     shape[i] = 1;
1146 
1147   np = GET_DIST_TCPUS;
1148   if (rank > 1 && np > 1) {
1149 
1150     /* first determine the power of each prime factor */
1151 
1152     power[0] = 0; /* powers of two */
1153     while ((np & 1) == 0) {
1154       power[0]++;
1155       np >>= 1;
1156     }
1157     for (k = 1; k < NPRIMES && np >= prime[k]; ++k) {
1158       power[k] = 0;
1159       while (np % prime[k] == 0) {
1160         power[k]++;
1161         np /= prime[k];
1162       }
1163     }
1164 
1165     /* now construct the shape vector, using the prime factors
1166        from largest to smallest.  keep the shape vector sorted in
1167        ascending order at each step. */
1168 
1169     shape[rank - 1] = np;
1170 
1171     while (--k >= 0) {
1172       while (--power[k] >= 0) {
1173         shape[0] *= prime[k];
1174         for (i = 1; i < rank && shape[i - 1] > shape[i]; ++i) {
1175           int t = shape[i - 1];
1176           shape[i - 1] = shape[i];
1177           shape[i] = t;
1178         }
1179       }
1180     }
1181   } else if (rank == 1)
1182     shape[0] = np;
1183 
1184   /* set up the descriptor */
1185 
1186   p = (proc *)__fort_malloc(sizeof(proc) - (MAXDIMS - rank) * sizeof(procdim));
1187   p->tag = __PROC;
1188   p->rank = rank;
1189   p->flags = 0;
1190   p->base = 0;
1191   for (i = 0; i < rank; ++i)
1192     p->dim[i].shape = shape[i];
1193   proc_setup(p);
1194   default_proc_list[rank] = p;
1195   return p;
1196 }
1197 
1198 /* rank 0 processor descriptor for the local processor */
1199 
1200 static proc *local_proc;
1201 
1202 proc *
__fort_localproc()1203 __fort_localproc()
1204 {
1205   if (local_proc == NULL) {
1206     local_proc =
1207         (proc *)__fort_malloc(sizeof(proc) - MAXDIMS * sizeof(procdim));
1208     local_proc->tag = __PROC;
1209     local_proc->rank = 0;
1210     local_proc->flags = __LOCAL;
1211     local_proc->base = GET_DIST_LCPU;
1212     proc_setup(local_proc);
1213   }
1214   return local_proc;
1215 }
1216 #endif
1217 
I8(__fort_set_alignment)1218 void I8(__fort_set_alignment)(F90_Desc *d, __INT_T dim, __INT_T lbound,
1219                               __INT_T ubound, __INT_T taxis, __INT_T tstride,
1220                               __INT_T toffset, ...)
1221 {
1222   DECL_DIM_PTRS(dd);
1223   __INT_T extent;
1224 
1225 #if defined(DEBUG)
1226   if (d == NULL || F90_TAG_G(d) != __DESC)
1227     __fort_abort("set_alignment: invalid descriptor");
1228   if (dim < 1 || dim > F90_RANK_G(d))
1229     __fort_abort("set_alignment: invalid dim");
1230 #endif
1231 
1232   extent = ubound - lbound + 1;
1233   if (extent < 0) {
1234     lbound = 1;
1235     ubound = 0;
1236     extent = 0;
1237   }
1238 
1239   SET_DIM_PTRS(dd, d, dim - 1);
1240 
1241   F90_DPTR_LBOUND_P(dd, lbound);
1242   DPTR_UBOUND_P(dd, ubound);
1243   F90_DPTR_SSTRIDE_P(dd, 1); /* section stride */
1244   F90_DPTR_SOFFSET_P(dd, 0); /* section offset */
1245   F90_DPTR_LSTRIDE_P(dd, 0);
1246 
1247 #if defined(DEBUG)
1248   if (__fort_test & DEBUG_DIST)
1249     printf("%d set_align dim=%d lb=%d ub=%d tx=%d st=%d of=%d"
1250            " tlb=%d tub=%d clb=%d cno=%d olb=%d oub=%d\n",
1251            GET_DIST_LCPU, dim, F90_DPTR_LBOUND_G(dd), DPTR_UBOUND_G(dd),
1252            DIST_DPTR_TAXIS_G(dd), DIST_DPTR_TSTRIDE_G(dd), DIST_DPTR_TOFFSET_G(dd),
1253            DIST_DPTR_TLB_G(dd), DIST_DPTR_TUB_G(dd), DIST_DPTR_CLB_G(dd),
1254            DIST_DPTR_CNO_G(dd), DIST_DPTR_OLB_G(dd), DIST_DPTR_OUB_G(dd));
1255 #endif
1256 }
1257 
1258 /* Set the local bounds for dimension dim of array d to use the local
1259    storage associated with the corresponding dimension of array a. */
1260 
I8(__fort_use_allocation)1261 void I8(__fort_use_allocation)(F90_Desc *d, __INT_T dim, __INT_T no, __INT_T po,
1262                                F90_Desc *a)
1263 {
1264   DECL_DIM_PTRS(ad);
1265   DECL_DIM_PTRS(dd);
1266   __INT_T aextent, dextent, k, lof;
1267 
1268 #if defined(DEBUG)
1269   if (d == NULL || F90_TAG_G(d) != __DESC)
1270     __fort_abort("use_allocation: invalid descriptor");
1271   if (a == NULL || F90_TAG_G(a) != __DESC)
1272     __fort_abort("use_allocation: invalid array descriptor");
1273   if (F90_RANK_G(a) != F90_RANK_G(d))
1274     __fort_abort("use_allocation: descriptor ranks differ");
1275   if (dim < 1 || dim > F90_RANK_G(d))
1276     __fort_abort("use_allocation: invalid dim");
1277 #endif
1278 
1279   /* array descriptor; not template */
1280   F90_FLAGS_P(d, F90_FLAGS_G(d) & ~__TEMPLATE);
1281 
1282   DIST_NONSEQUENCE_P(d, DIST_NONSEQUENCE_G(a));
1283 
1284   SET_DIM_PTRS(dd, d, dim - 1);
1285   SET_DIM_PTRS(ad, a, dim - 1);
1286 
1287 #if defined(DEBUG)
1288   if (F90_DPTR_SSTRIDE_G(ad) != 1)
1289     __fort_abort("use_allocation: can't use strided section");
1290   if (no > DIST_DPTR_NO_G(ad) || po > DIST_DPTR_PO_G(ad))
1291     __fort_abort("use_allocation: can't increase overlaps");
1292 #endif
1293 
1294   if (DPTR_UBOUND_G(dd) < F90_DPTR_LBOUND_G(dd)) { /* zero-size */
1295 #if defined(DEBUG)
1296     if (DIST_DPTR_OUB_G(dd) != DIST_DPTR_OLB_G(dd) - 1 ||
1297         DIST_DPTR_UAB_G(dd) != DIST_DPTR_LAB_G(dd) - 1)
1298       __fort_abort("use_allocation: bad bounds for zero-size");
1299 
1300 #endif
1301     DIST_DPTR_NO_P(dd, 0);     /* negative overlap allowance */
1302     DIST_DPTR_PO_P(dd, 0);     /* positive overlap allowance */
1303     DIST_DPTR_COFSTR_P(dd, 0); /* cyclic offset stride */
1304   } else {
1305     DIST_DPTR_NO_P(dd, no); /* negative overlap allowance */
1306     DIST_DPTR_PO_P(dd, po); /* positive overlap allowance */
1307 
1308     if (~F90_FLAGS_G(a) & F90_FLAGS_G(d) & __LOCAL) { /* coercing to local */
1309       DIST_DPTR_COFSTR_P(dd, 0);
1310       k = DIST_DPTR_OLB_G(dd) - DIST_DPTR_OLB_G(ad);
1311     } else {
1312       DIST_DPTR_COFSTR_P(dd, DIST_DPTR_COFSTR_G(ad));
1313       k = F90_DPTR_LBOUND_G(dd) - F90_DPTR_LBOUND_G(ad);
1314     }
1315     k -= F90_DPTR_SOFFSET_G(ad);
1316     DIST_DPTR_LAB_P(dd, (DIST_DPTR_LAB_G(ad) + (DIST_DPTR_NO_G(ad) - no) + k));
1317     DIST_DPTR_UAB_P(dd, (DIST_DPTR_UAB_G(ad) - (DIST_DPTR_PO_G(ad) - po) + k));
1318 
1319 #if defined(DEBUG)
1320     aextent = DIST_DPTR_UAB_G(ad) - DIST_DPTR_LAB_G(ad) + 1;
1321     dextent = DIST_DPTR_UAB_G(dd) - DIST_DPTR_LAB_G(dd) + 1;
1322     if (dim < F90_RANK_G(d)) {
1323       if (aextent != dextent)
1324         __fort_abort("use_allocation: allocated extent changed");
1325     } else if (dextent > aextent)
1326       __fort_abort("use_allocation: allocated extent increased");
1327 #endif
1328   }
1329 
1330 #if defined(DEBUG)
1331   if (__fort_test & DEBUG_DIST)
1332     printf("%d use_alloc dim=%d lb=%d ub=%d no=%d po=%d"
1333            " lab=%d uab=%d cofstr=%d\n",
1334            GET_DIST_LCPU, dim, F90_DPTR_LBOUND_G(dd), DPTR_UBOUND_G(dd),
1335            DIST_DPTR_NO_G(dd), DIST_DPTR_PO_G(dd), DIST_DPTR_LAB_G(dd),
1336            DIST_DPTR_UAB_G(dd), DIST_DPTR_COFSTR_G(dd));
1337 #endif
1338 }
1339 
1340 /* Map descriptor d onto the single/scalar-subscript coordinate idx in
1341    axis dim of template/array a.  For a scalar subscript, adjust the
1342    index base offsets */
1343 
I8(__fort_set_single)1344 void I8(__fort_set_single)(F90_Desc *d, F90_Desc *a, __INT_T dim, __INT_T idx,
1345                            _set_single_enum what)
1346 {
1347   DECL_DIM_PTRS(ad);
1348   __INT_T k, lidx;
1349 
1350 #if defined(DEBUG)
1351   if (a == NULL || F90_TAG_G(a) != __DESC)
1352     __fort_abort("set_single: invalid array descriptor");
1353   if (dim < 1 || dim > F90_RANK_G(a))
1354     __fort_abort("set_single: invalid array dimension");
1355 #endif
1356 
1357   SET_DIM_PTRS(ad, a, dim - 1);
1358 
1359   /* localize scalar subscript */
1360   if (what != __SINGLE) {
1361     /* adjust index base offsets */
1362     lidx = F90_DPTR_SSTRIDE_G(ad) * idx + F90_DPTR_SOFFSET_G(ad);
1363     k = F90_DPTR_LSTRIDE_G(ad) * (lidx - F90_DPTR_LBOUND_G(ad));
1364     F90_LBASE_P(d, F90_LBASE_G(d) + k - DIST_DPTR_LOFFSET_G(ad));
1365   }
1366 
1367 #if defined(DEBUG)
1368   if (__fort_test & DEBUG_DIST)
1369     printf("%d set_single dim=%d idx=%d pbase=%d lbase=%d scoff=%d%s\n",
1370            GET_DIST_LCPU, dim, idx, DIST_PBASE_G(d), F90_LBASE_G(d),
1371            DIST_SCOFF_G(d),
1372            F90_FLAGS_G(d) & __OFF_TEMPLATE ? " OFF_TEMPLATE" : "");
1373 #endif
1374 }
1375 
1376 /* Compute the global array size, local array size, local index
1377    multiplier and offset, and local index base offset.  This routine
1378    should not be called for templates. */
1379 
I8(__fort_finish_descriptor)1380 void I8(__fort_finish_descriptor)(F90_Desc *d)
1381 {
1382   DECL_DIM_PTRS(dd);
1383   __INT_T gsize, i, lextent, lsize, lbase;
1384   __INT_T rank = F90_RANK_G(d);
1385 
1386   gsize = lsize = lbase = 1;
1387   for (i = 0; i < rank; ++i) {
1388     SET_DIM_PTRS(dd, d, i);
1389     gsize *= F90_DPTR_EXTENT_G(dd);
1390     F90_DPTR_LSTRIDE_P(dd, lsize);
1391     DIST_DPTR_LOFFSET_P(dd,
1392                        -lsize * DIST_DPTR_LAB_G(dd)); /* local index offset */
1393     lbase -= lsize * F90_DPTR_LBOUND_G(dd);
1394     lextent = F90_DPTR_EXTENT_G(dd);
1395     if (lextent > 0)
1396       lsize *= lextent;
1397     else
1398       lsize = 0;
1399   }
1400   F90_GSIZE_P(d, gsize); /* global array size */
1401   F90_LSIZE_P(d, lsize); /* local array size */
1402   F90_LBASE_P(d, lbase);
1403 
1404   /* global heap block multiplier is the per-processor heap block
1405      size divided by the data item length */
1406 
1407   if (__fort_heap_block > 0 && F90_LEN_G(d) > 0) {
1408     if (F90_KIND_G(d) == __STR || F90_KIND_G(d) == __DERIVED ||
1409         F90_KIND_G(d) == __NONE)
1410       DIST_HEAPB_P(d, __fort_heap_block / F90_LEN_G(d));
1411     else
1412       DIST_HEAPB_P(d, __fort_heap_block >> GET_DIST_SHIFTS(F90_KIND_G(d)));
1413     if (DIST_HEAPB_G(d) <= 0)
1414       __fort_abort("heap block overflow; -heapz too large");
1415   } else
1416     DIST_HEAPB_P(d, 0);
1417 }
1418 
1419 /* Map axis ddim of descriptor d onto section l:u:s of dimension adim
1420    of array a. */
1421 
1422 /* for F90 */
I8(__fort_set_sectionx)1423 void I8(__fort_set_sectionx)(F90_Desc *d, __INT_T ddim, F90_Desc *a,
1424                              __INT_T adim, __INT_T l, __INT_T u, __INT_T s,
1425                              __INT_T noreindex)
1426 {
1427   DECL_DIM_PTRS(dd);
1428   DECL_DIM_PTRS(ad);
1429   __INT_T extent, myoffset;
1430 
1431 #if defined(DEBUG)
1432   if (a == NULL || F90_TAG_G(a) != __DESC)
1433     __fort_abort("set_section: invalid array descriptor");
1434   if (adim < 1 || adim > F90_RANK_G(a))
1435     __fort_abort("set_section: invalid array dimension");
1436 #endif
1437 
1438   SET_DIM_PTRS(ad, a, adim - 1);
1439   SET_DIM_PTRS(dd, d, ddim - 1);
1440 
1441 #if defined(DEBUG)
1442   if ((F90_FLAGS_G(d) & (__SEQUENCE | __ASSUMED_SIZE | __BOGUSBOUNDS)) == 0 &&
1443       (l < F90_DPTR_LBOUND_G(ad) || u > DPTR_UBOUND_G(ad)))
1444     __fort_abort("set_section: index out of bounds");
1445 #endif
1446 
1447   extent = u - l + s; /* section extent */
1448   if (s != 1) {
1449     if (s == -1)
1450       extent = -extent;
1451     else
1452       extent /= s;
1453   }
1454   if (extent < 0)
1455     extent = 0;
1456 
1457   if (noreindex && s == 1) {
1458     F90_DPTR_LBOUND_P(dd, l);                   /* lower bound */
1459     DPTR_UBOUND_P(dd, extent == 0 ? l - 1 : u); /* upper bound */
1460     myoffset = 0;
1461   } else {
1462     F90_DPTR_LBOUND_P(dd, 1);  /* lower bound */
1463     DPTR_UBOUND_P(dd, extent); /* upper bound */
1464     myoffset = l - s;
1465   }
1466 
1467 /* adjust section stride and offset; local array index mapping is
1468    unchanged */
1469 
1470   /* no longer need section stride/section offset */
1471   F90_DPTR_SSTRIDE_P(dd, F90_DPTR_SSTRIDE_G(ad));
1472   F90_DPTR_SOFFSET_P(dd, (F90_DPTR_SOFFSET_G(ad)));
1473   F90_DPTR_LSTRIDE_P(dd, F90_DPTR_LSTRIDE_G(ad) * s);
1474 
1475 #if defined(DEBUG)
1476   if (__fort_test & DEBUG_DIST)
1477     printf("%d set_section %d(%d:%d)->%d(%d:%d:%d) o(%d:%d) a(%d:%d)"
1478            " %dx+%d lbase=%d scoff=%d F90_DPTR_SOFFSET_G(ad)=%d"
1479            " F90_DPTR_SOFFSET_G(dd)=%d\n",
1480            GET_DIST_LCPU, ddim, F90_DPTR_LBOUND_G(dd), DPTR_UBOUND_G(dd), adim,
1481            l, u, s, DIST_DPTR_OLB_G(dd), DIST_DPTR_OUB_G(dd), DIST_DPTR_LAB_G(dd),
1482            DIST_DPTR_UAB_G(dd), F90_DPTR_LSTRIDE_G(dd), DIST_DPTR_LOFFSET_G(dd),
1483            F90_LBASE_G(d), DIST_SCOFF_G(d), F90_DPTR_SOFFSET_G(ad),
1484            F90_DPTR_SOFFSET_G(dd));
1485 #endif
1486 }
1487 
I8(__fort_set_section)1488 void I8(__fort_set_section)(F90_Desc *d, __INT_T ddim, F90_Desc *a,
1489                             __INT_T adim, __INT_T l, __INT_T u, __INT_T s)
1490 {
1491   __DIST_SET_SECTIONX(d, ddim, a, adim, l, u, s, 0);
1492 }
1493 
1494 /* Compute the global section size.  Scalar subscript offset, local
1495    base, and local index offsets were adjusted in set_section.  Local
1496    size does not change. */
1497 
I8(__fort_finish_section)1498 void I8(__fort_finish_section)(F90_Desc *d)
1499 {
1500   DECL_DIM_PTRS(dd);
1501   __INT_T gsize, i;
1502   __INT_T rank = F90_RANK_G(d);
1503 
1504   if (DIST_NONSEQUENCE_G(d))
1505     F90_FLAGS_P(d, F90_FLAGS_G(d) & ~__SEQUENCE);
1506 
1507   gsize = 1;
1508   for (i = 0; i < rank; ++i) {
1509     gsize *= F90_DIM_EXTENT_G(d, i);
1510   }
1511   F90_GSIZE_P(d, gsize); /* global section size */
1512 }
1513 
1514 /* Create a new descriptor for a section of array a.  Variable length
1515    argument list gives lower, upper, and stride for each array
1516    dimension followed by a bitmask indicating vector dimensions (not
1517    scalar subscripts.) */
1518 
1519 #define BOGUSFLAG 0x100
1520 
1521 /* for F90 */
ENTFTN(SECT,sect)1522 void ENTFTN(SECT, sect)(F90_Desc *d, F90_Desc *a,
1523                         ...) /* ... = {lower, upper, stride,}* flags */
1524 {
1525   va_list va;
1526   DECL_DIM_PTRS(ad);
1527   DECL_DIM_PTRS(dd);
1528   __INT_T ax, dx, flags, rank;
1529   __INT_T lower[MAXDIMS], upper[MAXDIMS], stride[MAXDIMS];
1530   __INT_T gsize;
1531   __INT_T wrk_rank;
1532 
1533 #if defined(DEBUG)
1534   if (d == NULL)
1535     __fort_abort("SECT: missing section descriptor");
1536   if (a == NULL || F90_TAG_G(a) != __DESC)
1537     __fort_abort("SECT: invalid array descriptor");
1538 #endif
1539 
1540   /* get bounds, strides, and flags argument */
1541 
1542   va_start(va, a);
1543   wrk_rank = F90_RANK_G(a);
1544   for (ax = 0; ax < wrk_rank; ++ax) {
1545     lower[ax] = *va_arg(va, __INT_T *);
1546     upper[ax] = *va_arg(va, __INT_T *);
1547     stride[ax] = *va_arg(va, __INT_T *);
1548   }
1549   flags = *va_arg(va, __INT_T *);
1550   va_end(va);
1551 
1552 /* determine section rank - popcnt of flags bits */
1553 
1554 #if MAXDIMS != 7
1555   __fort_abort("SECT: need to recode for different MAXDIMS");
1556 #endif
1557   rank = (flags & 0x55) + (flags >> 1 & 0x15);
1558   rank = (rank & 0x33) + (rank >> 2 & 0x13);
1559   rank += rank >> 4;
1560   rank &= 0x7;
1561 
1562   /* initialize descriptor */
1563 
1564   SET_F90_DIST_DESC_PTR(d, rank);
1565   __DIST_INIT_SECTION(d, rank, a);
1566   if (F90_LEN_G(d) == GET_DIST_SIZE_OF(F90_KIND_G(d)))
1567     F90_FLAGS_P(d, F90_FLAGS_G(d) | __SEQUENTIAL_SECTION);
1568 
1569   /* bogus bounds: defer section setup until copy */
1570 
1571   gsize = 1;
1572   if (flags & BOGUSFLAG) {
1573     F90_FLAGS_P(d, F90_FLAGS_G(d) | __BOGUSBOUNDS);
1574     wrk_rank = F90_RANK_G(a);
1575     for (dx = 0, ax = 1; ax <= wrk_rank; ++ax) {
1576       if ((flags >> (ax - 1)) & 1) {
1577         SET_DIM_PTRS(dd, d, dx);
1578         dx++;
1579         SET_DIM_PTRS(ad, a, ax - 1);
1580         F90_DPTR_LBOUND_P(dd, lower[ax - 1]);
1581         DPTR_UBOUND_P(dd, upper[ax - 1]);
1582         F90_DPTR_SSTRIDE_P(dd, stride[ax - 1]);
1583         if (F90_DPTR_SSTRIDE_G(dd) != 1 || F90_DPTR_LSTRIDE_G(dd) != gsize) {
1584           F90_FLAGS_P(d, (F90_FLAGS_G(d) & ~__SEQUENTIAL_SECTION));
1585         }
1586         gsize *= F90_DPTR_EXTENT_G(dd);
1587       } else
1588         I8(__fort_set_single)(d, a, ax, lower[ax - 1], __SCALAR);
1589     }
1590     F90_GSIZE_P(d, gsize); /* global section size */
1591     return;
1592   }
1593 
1594   /* normal section : set up each dimension and compute GSIZE*/
1595 
1596   wrk_rank = F90_RANK_G(a);
1597   for (dx = 0, ax = 1; ax <= wrk_rank; ++ax) {
1598     if ((flags >> (ax - 1)) & 1) {
1599       dx++;
1600       __DIST_SET_SECTIONXX(d, dx, a, ax, lower[ax - 1], upper[ax - 1],
1601                           stride[ax - 1], (flags & __NOREINDEX), gsize);
1602     } else {
1603       I8(__fort_set_single)(d, a, ax, lower[ax - 1], __SCALAR);
1604     }
1605   }
1606   /* no longer need section stride/section offset */
1607   F90_GSIZE_P(d, gsize); /* global section size */
1608 }
1609 
1610 /* ASECTION invokes DIST_SET_SECTIONXX to
1611  * set bounds, strides; dx and gsize are updated directly */
1612 #define ASECTION(d, dx, a, ax, lb, ub, st, gsize, flags)                       \
1613   if (flags & (1 << (ax - 1))) {                                               \
1614     dx++;                                                                      \
1615     __DIST_SET_SECTIONXX(d, dx, a, ax, lb, ub, st, (flags & __NOREINDEX),       \
1616                         gsize);                                                \
1617   } else                                                                       \
1618     I8(__fort_set_single)(d, a, ax, lb, __SCALAR);
1619 
1620 /* TSECTION is used when the address to be used
1621  * is the address of the first element of the section */
1622 #define TSECTION(d, dx, a, ax, lb, ub, st, gsize, flags)                       \
1623   if (flags & (1 << (ax - 1))) {                                               \
1624     DECL_DIM_PTRS(__dd);                                                       \
1625     DECL_DIM_PTRS(__ad);                                                       \
1626     __INT_T __extent, __myoffset, u, l, s;                                     \
1627     dx++;                                                                      \
1628     SET_DIM_PTRS(__ad, a, ax - 1);                                             \
1629     SET_DIM_PTRS(__dd, d, dx - 1);                                             \
1630     l = lb;                                                                    \
1631     u = ub;                                                                    \
1632     s = st;                                                                    \
1633     __extent = u - l + s; /* section extent */                                 \
1634     if (s != 1) {                                                              \
1635       if (s == -1) {                                                           \
1636         __extent = -__extent;                                                  \
1637       } else {                                                                 \
1638         __extent /= s;                                                         \
1639       }                                                                        \
1640     }                                                                          \
1641     if (__extent < 0) {                                                        \
1642       __extent = 0;                                                            \
1643     }                                                                          \
1644     F90_DPTR_LBOUND_P(__dd, 1);                                                \
1645     DPTR_UBOUND_P(__dd, __extent);                                             \
1646     F90_DPTR_SSTRIDE_P(__dd, 1);                                               \
1647     F90_DPTR_SOFFSET_P(__dd, 0);                                               \
1648     F90_DPTR_LSTRIDE_P(__dd, F90_DPTR_LSTRIDE_G(__ad) * s);                    \
1649     F90_LBASE_P(d, F90_LBASE_G(d) - F90_DPTR_LSTRIDE_G(__dd));                 \
1650     if (F90_DPTR_LSTRIDE_G(__dd) != gsize)                                     \
1651       F90_FLAGS_P(d, (F90_FLAGS_G(d) & ~__SEQUENTIAL_SECTION));                \
1652     gsize *= __extent;                                                         \
1653   }
1654 
1655 /* for F90 */
ENTF90(SECT,sect)1656 void ENTF90(SECT, sect)(F90_Desc *d, F90_Desc *a, __INT_T *prank,
1657                         ...) /* ... = {lower, upper, stride,}* flags */
1658 {
1659   va_list va;
1660   DECL_DIM_PTRS(ad);
1661   DECL_DIM_PTRS(dd);
1662   __INT_T ax, dx, flags, rank;
1663   __INT_T lower[MAXDIMS], upper[MAXDIMS], stride[MAXDIMS];
1664   __INT_T gsize;
1665   __INT_T wrk_rank;
1666 
1667 #if defined(DEBUG)
1668   if (d == NULL)
1669     __fort_abort("SECT: missing section descriptor");
1670   if (a == NULL || F90_TAG_G(a) != __DESC)
1671     __fort_abort("SECT: invalid array descriptor");
1672 #endif
1673 
1674   /* get bounds, strides, and flags argument */
1675 
1676   va_start(va, prank);
1677   wrk_rank = *prank;
1678   for (ax = 0; ax < wrk_rank; ++ax) {
1679     lower[ax] = *va_arg(va, __INT_T *);
1680     upper[ax] = *va_arg(va, __INT_T *);
1681     stride[ax] = *va_arg(va, __INT_T *);
1682   }
1683   flags = *va_arg(va, __INT_T *);
1684   va_end(va);
1685 
1686 /* determine section rank - popcnt of flags bits */
1687 
1688 #if MAXDIMS != 7
1689   __fort_abort("SECT: need to recode for different MAXDIMS");
1690 #endif
1691   rank = (flags & 0x55) + (flags >> 1 & 0x15);
1692   rank = (rank & 0x33) + (rank >> 2 & 0x13);
1693   rank += rank >> 4;
1694   rank &= 0x7;
1695 
1696   /* initialize descriptor */
1697 
1698   SET_F90_DIST_DESC_PTR(d, rank);
1699   __DIST_INIT_SECTION(d, rank, a);
1700   if (F90_LEN_G(d) == GET_DIST_SIZE_OF(F90_KIND_G(d)))
1701     F90_FLAGS_P(d, F90_FLAGS_G(d) | __SEQUENTIAL_SECTION);
1702 
1703   /* bogus bounds: defer section setup until copy */
1704 
1705   gsize = 1;
1706   if (flags & BOGUSFLAG) {
1707     F90_FLAGS_P(d, F90_FLAGS_G(d) | __BOGUSBOUNDS);
1708     wrk_rank = F90_RANK_G(a);
1709     for (dx = 0, ax = 1; ax <= wrk_rank; ++ax) {
1710       if ((flags >> (ax - 1)) & 1) {
1711         SET_DIM_PTRS(dd, d, dx);
1712         dx++;
1713         SET_DIM_PTRS(ad, a, ax - 1);
1714         F90_DPTR_LBOUND_P(dd, lower[ax - 1]);
1715         DPTR_UBOUND_P(dd, upper[ax - 1]);
1716         F90_DPTR_SSTRIDE_P(dd, stride[ax - 1]);
1717         if (F90_DPTR_SSTRIDE_G(dd) != 1 || F90_DPTR_LSTRIDE_G(dd) != gsize) {
1718           F90_FLAGS_P(d, (F90_FLAGS_G(d) & ~__SEQUENTIAL_SECTION));
1719         }
1720         gsize *= F90_DPTR_EXTENT_G(dd);
1721       } else
1722         I8(__fort_set_single)(d, a, ax, lower[ax - 1], __SCALAR);
1723     }
1724     F90_GSIZE_P(d, gsize); /* global section size */
1725     F90_LSIZE_P(d, gsize); /* global section size */
1726     return;
1727   }
1728 
1729   /* normal section : set up each dimension and compute GSIZE*/
1730 
1731   wrk_rank = F90_RANK_G(a);
1732   if (flags & __SECTZBASE) {
1733     F90_LBASE_P(d, 1);
1734     for (dx = 0, ax = 1; ax <= wrk_rank; ++ax) {
1735       TSECTION(d, dx, a, ax, lower[ax - 1], upper[ax - 1], stride[ax - 1],
1736                gsize, flags);
1737     }
1738   } else {
1739     for (dx = 0, ax = 1; ax <= wrk_rank; ++ax) {
1740       ASECTION(d, dx, a, ax, lower[ax - 1], upper[ax - 1], stride[ax - 1],
1741                gsize, flags);
1742     }
1743   }
1744   /* no longer need section stride/section offset */
1745   F90_GSIZE_P(d, gsize); /* global section size */
1746   F90_LSIZE_P(d, gsize); /* global section size */
1747 }
1748 
1749 /* for F90 */
ENTF90(SECT1,sect1)1750 void ENTF90(SECT1, sect1)(F90_Desc *d, F90_Desc *a, __INT_T *prank,
1751                           /* ... = {lower, upper, stride,}* flags */
1752                           __INT_T *lw0, __INT_T *up0, __INT_T *st0,
1753                           __INT_T *bfg)
1754 {
1755   DECL_DIM_PTRS(ad);
1756   DECL_DIM_PTRS(dd);
1757   __INT_T ax, dx, flags, rank;
1758   __INT_T gsize;
1759   __INT_T wrk_rank;
1760 
1761 #if defined(DEBUG)
1762   if (d == NULL)
1763     __fort_abort("SECT: missing section descriptor");
1764   if (a == NULL || F90_TAG_G(a) != __DESC)
1765     __fort_abort("SECT: invalid array descriptor");
1766 #endif
1767 
1768   /* get flags argument */
1769 
1770   flags = *bfg;
1771 
1772 #if MAXDIMS != 7
1773   __fort_abort("SECT: need to recode for different MAXDIMS");
1774 #endif
1775   /* determine section rank - popcnt of flags bits */
1776   /* rank is at most 1 */
1777   rank = (flags & 0x1);
1778 
1779   /* initialize descriptor */
1780 
1781   SET_F90_DIST_DESC_PTR(d, rank);
1782   __DIST_INIT_SECTION(d, rank, a);
1783 
1784   /* bogus bounds: defer section setup until copy */
1785 
1786   gsize = 1;
1787   if (flags & BOGUSFLAG) {
1788     __INT_T lower[1], upper[1], stride[1];
1789     lower[0] = *lw0;
1790     upper[0] = *up0;
1791     stride[0] = *st0;
1792 
1793     F90_FLAGS_P(d, F90_FLAGS_G(d) | __BOGUSBOUNDS);
1794     wrk_rank = F90_RANK_G(a);
1795     for (dx = 0, ax = 1; ax <= wrk_rank; ++ax) {
1796       if ((flags >> (ax - 1)) & 1) {
1797         SET_DIM_PTRS(dd, d, dx);
1798         dx++;
1799         SET_DIM_PTRS(ad, a, ax - 1);
1800         F90_DPTR_LBOUND_P(dd, lower[ax - 1]);
1801         DPTR_UBOUND_P(dd, upper[ax - 1]);
1802         F90_DPTR_SSTRIDE_P(dd, stride[ax - 1]);
1803         if (F90_DPTR_SSTRIDE_G(dd) != 1 || F90_DPTR_LSTRIDE_G(dd) != gsize) {
1804           F90_FLAGS_P(d, (F90_FLAGS_G(d) & ~__SEQUENTIAL_SECTION));
1805         }
1806         gsize *= F90_DPTR_EXTENT_G(dd);
1807       } else
1808         I8(__fort_set_single)(d, a, ax, lower[ax - 1], __SCALAR);
1809     }
1810     F90_GSIZE_P(d, gsize); /* global section size */
1811     F90_LSIZE_P(d, gsize); /* global section size */
1812     return;
1813   }
1814 
1815   dx = 0;
1816   if (flags & __SECTZBASE) {
1817     F90_LBASE_P(d, 1);
1818     TSECTION(d, dx, a, 1, *lw0, *up0, *st0, gsize, flags);
1819   } else {
1820     ASECTION(d, dx, a, 1, *lw0, *up0, *st0, gsize, flags);
1821   }
1822 
1823   /* no longer need section stride/section offset */
1824   F90_GSIZE_P(d, gsize); /* global section size */
1825   F90_LSIZE_P(d, gsize); /* global section size */
1826 }
1827 
1828 /* for F90 */
ENTF90(SECT1v,sect1v)1829 void ENTF90(SECT1v, sect1v)(F90_Desc *d, F90_Desc *a, __INT_T *prank,
1830                             /* ... = {lower, upper, stride,}* flags */
1831                             __INT_T lw0, __INT_T up0, __INT_T st0,
1832                             __INT_T flags)
1833 {
1834   DECL_DIM_PTRS(ad);
1835   DECL_DIM_PTRS(dd);
1836   __INT_T ax, dx, rank;
1837   __INT_T gsize;
1838   __INT_T wrk_rank;
1839 
1840 #if defined(DEBUG)
1841   if (d == NULL)
1842     __fort_abort("SECT: missing section descriptor");
1843   if (a == NULL || F90_TAG_G(a) != __DESC)
1844     __fort_abort("SECT: invalid array descriptor");
1845 #endif
1846 
1847   /* determine section rank - popcnt of flags bits */
1848 
1849   /* rank is at most 1 */
1850   rank = (flags & 0x1);
1851 
1852   /* initialize descriptor */
1853 
1854   SET_F90_DIST_DESC_PTR(d, rank);
1855   __DIST_INIT_SECTION(d, rank, a);
1856   gsize = 1;
1857 
1858   /* bogus bounds: defer section setup until copy */
1859 
1860   if (flags & BOGUSFLAG) {
1861     __INT_T lower[1], upper[1], stride[1];
1862     lower[0] = lw0;
1863     upper[0] = up0;
1864     stride[0] = st0;
1865 
1866     F90_FLAGS_P(d, F90_FLAGS_G(d) | __BOGUSBOUNDS);
1867     wrk_rank = F90_RANK_G(a);
1868     for (dx = 0, ax = 1; ax <= wrk_rank; ++ax) {
1869       if ((flags >> (ax - 1)) & 1) {
1870         SET_DIM_PTRS(dd, d, dx);
1871         dx++;
1872         SET_DIM_PTRS(ad, a, ax - 1);
1873         F90_DPTR_LBOUND_P(dd, lower[ax - 1]);
1874         DPTR_UBOUND_P(dd, upper[ax - 1]);
1875         F90_DPTR_SSTRIDE_P(dd, stride[ax - 1]);
1876         if (F90_DPTR_SSTRIDE_G(dd) != 1 || F90_DPTR_LSTRIDE_G(dd) != gsize) {
1877           F90_FLAGS_P(d, (F90_FLAGS_G(d) & ~__SEQUENTIAL_SECTION));
1878         }
1879         gsize *= F90_DPTR_EXTENT_G(dd);
1880       } else
1881         I8(__fort_set_single)(d, a, ax, lower[ax - 1], __SCALAR);
1882     }
1883     F90_GSIZE_P(d, gsize); /* global section size */
1884     F90_LSIZE_P(d, gsize); /* global section size */
1885     return;
1886   }
1887 
1888   /* normal section : set up each dimension and compute GSIZE*/
1889 
1890   dx = 0;
1891   if (flags & __SECTZBASE) {
1892     F90_LBASE_P(d, 1);
1893     TSECTION(d, dx, a, 1, lw0, up0, st0, gsize, flags);
1894   } else {
1895     ASECTION(d, dx, a, 1, lw0, up0, st0, gsize, flags);
1896   }
1897 
1898   /* no longer need section stride/section offset */
1899   F90_GSIZE_P(d, gsize); /* global section size */
1900   F90_LSIZE_P(d, gsize); /* global section size */
1901 }
1902 
1903 /* for F90 */
ENTF90(SECT2,sect2)1904 void ENTF90(SECT2, sect2)(F90_Desc *d, F90_Desc *a, __INT_T *prank,
1905                           /* ... = {lower, upper, stride,}* flags */
1906                           __INT_T *lw0, __INT_T *up0, __INT_T *st0,
1907                           __INT_T *lw1, __INT_T *up1, __INT_T *st1,
1908                           __INT_T *bfg)
1909 {
1910   DECL_DIM_PTRS(ad);
1911   DECL_DIM_PTRS(dd);
1912   __INT_T ax, dx, flags, rank;
1913   __INT_T gsize;
1914   __INT_T wrk_rank;
1915 
1916 #if defined(DEBUG)
1917   if (d == NULL)
1918     __fort_abort("SECT: missing section descriptor");
1919   if (a == NULL || F90_TAG_G(a) != __DESC)
1920     __fort_abort("SECT: invalid array descriptor");
1921 #endif
1922 
1923   /* get flags argument */
1924 
1925   flags = *bfg;
1926 
1927 #if MAXDIMS != 7
1928   __fort_abort("SECT: need to recode for different MAXDIMS");
1929 #endif
1930   /* determine section rank - popcnt of flags bits */
1931   /* rank is at most 2 */
1932   rank = (flags & 0x1) + (flags >> 1 & 0x1);
1933 
1934   /* initialize descriptor */
1935 
1936   SET_F90_DIST_DESC_PTR(d, rank);
1937   __DIST_INIT_SECTION(d, rank, a);
1938 
1939   /* bogus bounds: defer section setup until copy */
1940 
1941   gsize = 1;
1942   if (flags & BOGUSFLAG) {
1943     __INT_T lower[2], upper[2], stride[2];
1944     lower[0] = *lw0;
1945     upper[0] = *up0;
1946     stride[0] = *st0;
1947     lower[1] = *lw1;
1948     upper[1] = *up1;
1949     stride[1] = *st1;
1950 
1951     F90_FLAGS_P(d, F90_FLAGS_G(d) | __BOGUSBOUNDS);
1952     wrk_rank = F90_RANK_G(a);
1953     for (dx = 0, ax = 1; ax <= wrk_rank; ++ax) {
1954       if ((flags >> (ax - 1)) & 1) {
1955         SET_DIM_PTRS(dd, d, dx);
1956         dx++;
1957         SET_DIM_PTRS(ad, a, ax - 1);
1958         F90_DPTR_LBOUND_P(dd, lower[ax - 1]);
1959         DPTR_UBOUND_P(dd, upper[ax - 1]);
1960         F90_DPTR_SSTRIDE_P(dd, stride[ax - 1]);
1961         if (F90_DPTR_SSTRIDE_G(dd) != 1 || F90_DPTR_LSTRIDE_G(dd) != gsize) {
1962           F90_FLAGS_P(d, (F90_FLAGS_G(d) & ~__SEQUENTIAL_SECTION));
1963         }
1964         gsize *= F90_DPTR_EXTENT_G(dd);
1965       } else
1966         I8(__fort_set_single)(d, a, ax, lower[ax - 1], __SCALAR);
1967     }
1968     F90_GSIZE_P(d, gsize); /* global section size */
1969     F90_LSIZE_P(d, gsize); /* global section size */
1970     return;
1971   }
1972 
1973   dx = 0;
1974   if (flags & __SECTZBASE) {
1975     F90_LBASE_P(d, 1);
1976     TSECTION(d, dx, a, 1, *lw0, *up0, *st0, gsize, flags);
1977     TSECTION(d, dx, a, 2, *lw1, *up1, *st1, gsize, flags);
1978   } else {
1979     ASECTION(d, dx, a, 1, *lw0, *up0, *st0, gsize, flags);
1980     ASECTION(d, dx, a, 2, *lw1, *up1, *st1, gsize, flags);
1981   }
1982 
1983   /* no longer need section stride/section offset */
1984   F90_GSIZE_P(d, gsize); /* global section size */
1985   F90_LSIZE_P(d, gsize); /* global section size */
1986 }
1987 
1988 /* for F90 */
ENTF90(SECT2v,sect2v)1989 void ENTF90(SECT2v, sect2v)(F90_Desc *d, F90_Desc *a, __INT_T *prank,
1990                             /* ... = {lower, upper, stride,}* flags */
1991                             __INT_T lw0, __INT_T up0, __INT_T st0, __INT_T lw1,
1992                             __INT_T up1, __INT_T st1, __INT_T flags)
1993 {
1994   DECL_DIM_PTRS(ad);
1995   DECL_DIM_PTRS(dd);
1996   __INT_T ax, dx, rank;
1997   __INT_T gsize;
1998   __INT_T wrk_rank;
1999 
2000 #if defined(DEBUG)
2001   if (d == NULL)
2002     __fort_abort("SECT: missing section descriptor");
2003   if (a == NULL || F90_TAG_G(a) != __DESC)
2004     __fort_abort("SECT: invalid array descriptor");
2005 #endif
2006 
2007   /* determine section rank - popcnt of flags bits */
2008 
2009   /* rank is at most 2 */
2010   rank = (flags & 0x1) + (flags >> 1 & 0x1);
2011 
2012   /* initialize descriptor */
2013 
2014   SET_F90_DIST_DESC_PTR(d, rank);
2015   __DIST_INIT_SECTION(d, rank, a);
2016   gsize = 1;
2017 
2018   /* bogus bounds: defer section setup until copy */
2019 
2020   if (flags & BOGUSFLAG) {
2021     __INT_T lower[2], upper[2], stride[2];
2022     lower[0] = lw0;
2023     upper[0] = up0;
2024     stride[0] = st0;
2025     lower[1] = lw1;
2026     upper[1] = up1;
2027     stride[1] = st1;
2028 
2029     F90_FLAGS_P(d, F90_FLAGS_G(d) | __BOGUSBOUNDS);
2030     wrk_rank = F90_RANK_G(a);
2031     for (dx = 0, ax = 1; ax <= wrk_rank; ++ax) {
2032       if ((flags >> (ax - 1)) & 1) {
2033         SET_DIM_PTRS(dd, d, dx);
2034         dx++;
2035         SET_DIM_PTRS(ad, a, ax - 1);
2036         F90_DPTR_LBOUND_P(dd, lower[ax - 1]);
2037         DPTR_UBOUND_P(dd, upper[ax - 1]);
2038         F90_DPTR_SSTRIDE_P(dd, stride[ax - 1]);
2039         if (F90_DPTR_SSTRIDE_G(dd) != 1 || F90_DPTR_LSTRIDE_G(dd) != gsize) {
2040           F90_FLAGS_P(d, (F90_FLAGS_G(d) & ~__SEQUENTIAL_SECTION));
2041         }
2042         gsize *= F90_DPTR_EXTENT_G(dd);
2043       } else
2044         I8(__fort_set_single)(d, a, ax, lower[ax - 1], __SCALAR);
2045     }
2046     F90_GSIZE_P(d, gsize); /* global section size */
2047     F90_LSIZE_P(d, gsize); /* global section size */
2048     return;
2049   }
2050 
2051   /* normal section : set up each dimension and compute GSIZE*/
2052 
2053   dx = 0;
2054   if (flags & __SECTZBASE) {
2055     F90_LBASE_P(d, 1);
2056     TSECTION(d, dx, a, 1, lw0, up0, st0, gsize, flags);
2057     TSECTION(d, dx, a, 2, lw1, up1, st1, gsize, flags);
2058   } else {
2059     ASECTION(d, dx, a, 1, lw0, up0, st0, gsize, flags);
2060     ASECTION(d, dx, a, 2, lw1, up1, st1, gsize, flags);
2061   }
2062 
2063   /* no longer need section stride/section offset */
2064   F90_GSIZE_P(d, gsize); /* global section size */
2065   F90_LSIZE_P(d, gsize); /* global section size */
2066 }
2067 
2068 /* for F90 */
ENTF90(SECT3,sect3)2069 void ENTF90(SECT3, sect3)(F90_Desc *d, F90_Desc *a, __INT_T *prank,
2070                           /* ... = {lower, upper, stride,}* flags */
2071                           __INT_T *lw0, __INT_T *up0, __INT_T *st0,
2072                           __INT_T *lw1, __INT_T *up1, __INT_T *st1,
2073                           __INT_T *lw2, __INT_T *up2, __INT_T *st2,
2074                           __INT_T *bfg)
2075 {
2076   DECL_DIM_PTRS(ad);
2077   DECL_DIM_PTRS(dd);
2078   __INT_T ax, dx, flags, rank;
2079   __INT_T gsize;
2080   __INT_T wrk_rank;
2081 
2082 #if defined(DEBUG)
2083   if (d == NULL)
2084     __fort_abort("SECT: missing section descriptor");
2085   if (a == NULL || F90_TAG_G(a) != __DESC)
2086     __fort_abort("SECT: invalid array descriptor");
2087 #endif
2088 
2089   /* get flags argument */
2090 
2091   flags = *bfg;
2092 
2093 #if MAXDIMS != 7
2094   __fort_abort("SECT: need to recode for different MAXDIMS");
2095 #endif
2096   /* determine section rank - popcnt of flags bits */
2097   /* rank is at most 3 */
2098   rank = (flags & 0x5) + (flags >> 1 & 0x1);
2099   rank = (rank & 0x3) + (rank >> 2 & 0x1);
2100 
2101   /* initialize descriptor */
2102 
2103   SET_F90_DIST_DESC_PTR(d, rank);
2104   __DIST_INIT_SECTION(d, rank, a);
2105 
2106   /* bogus bounds: defer section setup until copy */
2107 
2108   gsize = 1;
2109   if (flags & BOGUSFLAG) {
2110     __INT_T lower[MAXDIMS], upper[MAXDIMS], stride[MAXDIMS];
2111     lower[0] = *lw0;
2112     upper[0] = *up0;
2113     stride[0] = *st0;
2114     lower[1] = *lw1;
2115     upper[1] = *up1;
2116     stride[1] = *st1;
2117     lower[2] = *lw2;
2118     upper[2] = *up2;
2119     stride[2] = *st2;
2120 
2121     F90_FLAGS_P(d, F90_FLAGS_G(d) | __BOGUSBOUNDS);
2122     wrk_rank = F90_RANK_G(a);
2123     for (dx = 0, ax = 1; ax <= wrk_rank; ++ax) {
2124       if ((flags >> (ax - 1)) & 1) {
2125         SET_DIM_PTRS(dd, d, dx);
2126         dx++;
2127         SET_DIM_PTRS(ad, a, ax - 1);
2128         F90_DPTR_LBOUND_P(dd, lower[ax - 1]);
2129         DPTR_UBOUND_P(dd, upper[ax - 1]);
2130         F90_DPTR_SSTRIDE_P(dd, stride[ax - 1]);
2131         if (F90_DPTR_SSTRIDE_G(dd) != 1 || F90_DPTR_LSTRIDE_G(dd) != gsize) {
2132           F90_FLAGS_P(d, (F90_FLAGS_G(d) & ~__SEQUENTIAL_SECTION));
2133         }
2134         gsize *= F90_DPTR_EXTENT_G(dd);
2135       } else
2136         I8(__fort_set_single)(d, a, ax, lower[ax - 1], __SCALAR);
2137     }
2138     F90_GSIZE_P(d, gsize); /* global section size */
2139     F90_LSIZE_P(d, gsize); /* global section size */
2140     return;
2141   }
2142 
2143   dx = 0;
2144   if (flags & __SECTZBASE) {
2145     F90_LBASE_P(d, 1);
2146     TSECTION(d, dx, a, 1, *lw0, *up0, *st0, gsize, flags);
2147     TSECTION(d, dx, a, 2, *lw1, *up1, *st1, gsize, flags);
2148     TSECTION(d, dx, a, 3, *lw2, *up2, *st2, gsize, flags);
2149   } else {
2150     ASECTION(d, dx, a, 1, *lw0, *up0, *st0, gsize, flags);
2151     ASECTION(d, dx, a, 2, *lw1, *up1, *st1, gsize, flags);
2152     ASECTION(d, dx, a, 3, *lw2, *up2, *st2, gsize, flags);
2153   }
2154 
2155   /* no longer need section stride/section offset */
2156   F90_GSIZE_P(d, gsize); /* global section size */
2157   F90_LSIZE_P(d, gsize); /* global section size */
2158 }
2159 
2160 /* for F90 */
ENTF90(SECT3v,sect3v)2161 void ENTF90(SECT3v, sect3v)(F90_Desc *d, F90_Desc *a, __INT_T *prank,
2162                             /* ... = {lower, upper, stride,}* flags */
2163                             __INT_T lw0, __INT_T up0, __INT_T st0, __INT_T lw1,
2164                             __INT_T up1, __INT_T st1, __INT_T lw2, __INT_T up2,
2165                             __INT_T st2, __INT_T flags)
2166 {
2167   DECL_DIM_PTRS(ad);
2168   DECL_DIM_PTRS(dd);
2169   __INT_T ax, dx, rank;
2170   __INT_T gsize;
2171   __INT_T wrk_rank;
2172 
2173 #if defined(DEBUG)
2174   if (d == NULL)
2175     __fort_abort("SECT: missing section descriptor");
2176   if (a == NULL || F90_TAG_G(a) != __DESC)
2177     __fort_abort("SECT: invalid array descriptor");
2178 #endif
2179 
2180   /* determine section rank - popcnt of flags bits */
2181 
2182   /* rank is at most 3 */
2183   rank = (flags & 0x5) + (flags >> 1 & 0x1);
2184   rank = (rank & 0x3) + (rank >> 2 & 0x1);
2185 
2186   /* initialize descriptor */
2187 
2188   SET_F90_DIST_DESC_PTR(d, rank);
2189   __DIST_INIT_SECTION(d, rank, a);
2190   gsize = 1;
2191 
2192   /* bogus bounds: defer section setup until copy */
2193 
2194   if (flags & BOGUSFLAG) {
2195     __INT_T lower[3], upper[3], stride[3];
2196     lower[0] = lw0;
2197     upper[0] = up0;
2198     stride[0] = st0;
2199     lower[1] = lw1;
2200     upper[1] = up1;
2201     stride[1] = st1;
2202     lower[2] = lw2;
2203     upper[2] = up2;
2204     stride[2] = st2;
2205 
2206     F90_FLAGS_P(d, F90_FLAGS_G(d) | __BOGUSBOUNDS);
2207     wrk_rank = F90_RANK_G(a);
2208     for (dx = 0, ax = 1; ax <= wrk_rank; ++ax) {
2209       if ((flags >> (ax - 1)) & 1) {
2210         SET_DIM_PTRS(dd, d, dx);
2211         dx++;
2212         SET_DIM_PTRS(ad, a, ax - 1);
2213         F90_DPTR_LBOUND_P(dd, lower[ax - 1]);
2214         DPTR_UBOUND_P(dd, upper[ax - 1]);
2215         F90_DPTR_SSTRIDE_P(dd, stride[ax - 1]);
2216         if (F90_DPTR_SSTRIDE_G(dd) != 1 || F90_DPTR_LSTRIDE_G(dd) != gsize) {
2217           F90_FLAGS_P(d, (F90_FLAGS_G(d) & ~__SEQUENTIAL_SECTION));
2218         }
2219         gsize *= F90_DPTR_EXTENT_G(dd);
2220       } else
2221         I8(__fort_set_single)(d, a, ax, lower[ax - 1], __SCALAR);
2222     }
2223     F90_GSIZE_P(d, gsize); /* global section size */
2224     F90_LSIZE_P(d, gsize); /* global section size */
2225     return;
2226   }
2227 
2228   /* normal section : set up each dimension and compute GSIZE*/
2229 
2230   dx = 0;
2231   if (flags & __SECTZBASE) {
2232     F90_LBASE_P(d, 1);
2233     TSECTION(d, dx, a, 1, lw0, up0, st0, gsize, flags);
2234     TSECTION(d, dx, a, 2, lw1, up1, st1, gsize, flags);
2235     TSECTION(d, dx, a, 3, lw2, up2, st2, gsize, flags);
2236   } else {
2237     ASECTION(d, dx, a, 1, lw0, up0, st0, gsize, flags);
2238     ASECTION(d, dx, a, 2, lw1, up1, st1, gsize, flags);
2239     ASECTION(d, dx, a, 3, lw2, up2, st2, gsize, flags);
2240   }
2241 
2242   /* no longer need section stride/section offset */
2243   F90_GSIZE_P(d, gsize); /* global section size */
2244   F90_LSIZE_P(d, gsize); /* global section size */
2245 }
2246 
2247 #undef ASECTION
2248 #undef TSECTION
2249 
2250 /* BSECTION updates dx, gsize directly */
2251 #define BSECTION(d, dx, a, ax, lb, ub, st, gsize, flags)                       \
2252   if (flags & (1 << (ax - 1))) {                                               \
2253     dx++;                                                                      \
2254     __DIST_SET_SECTIONXX(d, dx, a, ax, lb, ub, st, (flags & __NOREINDEX),       \
2255                         gsize);                                                \
2256   } else                                                                       \
2257     I8(__fort_set_single)(d, a, ax, lb, __SCALAR);
2258 
2259 /* for F90 */
ENTFTN(SECT3,sect3)2260 void ENTFTN(SECT3, sect3)(F90_Desc *d, F90_Desc *a,
2261                           /* ... = {lower, upper, stride,}* flags */
2262                           __INT_T *lw0, __INT_T *up0, __INT_T *st0,
2263                           __INT_T *lw1, __INT_T *up1, __INT_T *st1,
2264                           __INT_T *lw2, __INT_T *up2, __INT_T *st2,
2265                           __INT_T *bfg)
2266 {
2267   DECL_DIM_PTRS(ad);
2268   DECL_DIM_PTRS(dd);
2269   __INT_T ax, dx, flags, rank;
2270   __INT_T gsize;
2271   __INT_T wrk_rank;
2272 
2273 #if defined(DEBUG)
2274   if (d == NULL)
2275     __fort_abort("SECT: missing section descriptor");
2276   if (a == NULL || F90_TAG_G(a) != __DESC)
2277     __fort_abort("SECT: invalid array descriptor");
2278 #endif
2279 
2280   /* get flags argument */
2281 
2282   flags = *bfg;
2283 
2284 #if MAXDIMS != 7
2285   __fort_abort("SECT: need to recode for different MAXDIMS");
2286 #endif
2287   /* determine section rank - popcnt of flags bits */
2288   /* rank is at most 3 */
2289   rank = (flags & 0x5) + (flags >> 1 & 0x1);
2290   rank = (rank & 0x3) + (rank >> 2 & 0x1);
2291 
2292   /* initialize descriptor */
2293 
2294   SET_F90_DIST_DESC_PTR(d, rank);
2295   __DIST_INIT_SECTION(d, rank, a);
2296 
2297   /* bogus bounds: defer section setup until copy */
2298 
2299   gsize = 1;
2300   if (flags & BOGUSFLAG) {
2301     __INT_T lower[MAXDIMS], upper[MAXDIMS], stride[MAXDIMS];
2302     lower[0] = *lw0;
2303     upper[0] = *up0;
2304     stride[0] = *st0;
2305     lower[1] = *lw1;
2306     upper[1] = *up1;
2307     stride[1] = *st1;
2308     lower[2] = *lw2;
2309     upper[2] = *up2;
2310     stride[2] = *st2;
2311 
2312     F90_FLAGS_P(d, F90_FLAGS_G(d) | __BOGUSBOUNDS);
2313     wrk_rank = F90_RANK_G(a);
2314     for (dx = 0, ax = 1; ax <= wrk_rank; ++ax) {
2315       if ((flags >> (ax - 1)) & 1) {
2316         SET_DIM_PTRS(dd, d, dx);
2317         dx++;
2318         SET_DIM_PTRS(ad, a, ax - 1);
2319         F90_DPTR_LBOUND_P(dd, lower[ax - 1]);
2320         DPTR_UBOUND_P(dd, upper[ax - 1]);
2321         F90_DPTR_SSTRIDE_P(dd, stride[ax - 1]);
2322         if (F90_DPTR_SSTRIDE_G(dd) != 1 || F90_DPTR_LSTRIDE_G(dd) != gsize) {
2323           F90_FLAGS_P(d, (F90_FLAGS_G(d) & ~__SEQUENTIAL_SECTION));
2324         }
2325         gsize *= F90_DPTR_EXTENT_G(dd);
2326       } else
2327         I8(__fort_set_single)(d, a, ax, lower[ax - 1], __SCALAR);
2328     }
2329     F90_GSIZE_P(d, gsize); /* global section size */
2330     return;
2331   }
2332 
2333   dx = 0;
2334   BSECTION(d, dx, a, 1, *lw0, *up0, *st0, gsize, flags);
2335   BSECTION(d, dx, a, 2, *lw1, *up1, *st1, gsize, flags);
2336   BSECTION(d, dx, a, 3, *lw2, *up2, *st2, gsize, flags);
2337 
2338   /* no longer need section stride/section offset */
2339   F90_GSIZE_P(d, gsize); /* global section size */
2340 }
2341 
2342 /* for F90 */
ENTFTN(SECT3v,sect3v)2343 void ENTFTN(SECT3v, sect3v)(F90_Desc *d, F90_Desc *a,
2344                             /* ... = {lower, upper, stride,}* flags */
2345                             __INT_T lw0, __INT_T up0, __INT_T st0, __INT_T lw1,
2346                             __INT_T up1, __INT_T st1, __INT_T lw2, __INT_T up2,
2347                             __INT_T st2, __INT_T flags)
2348 {
2349   DECL_DIM_PTRS(ad);
2350   DECL_DIM_PTRS(dd);
2351   __INT_T ax, dx, rank;
2352   __INT_T gsize;
2353   __INT_T wrk_rank;
2354 
2355 #if defined(DEBUG)
2356   if (d == NULL)
2357     __fort_abort("SECT: missing section descriptor");
2358   if (a == NULL || F90_TAG_G(a) != __DESC)
2359     __fort_abort("SECT: invalid array descriptor");
2360 #endif
2361 
2362   /* determine section rank - popcnt of flags bits */
2363 
2364   /* rank is at most 3 */
2365   rank = (flags & 0x5) + (flags >> 1 & 0x1);
2366   rank = (rank & 0x3) + (rank >> 2 & 0x1);
2367 
2368   /* initialize descriptor */
2369 
2370   SET_F90_DIST_DESC_PTR(d, rank);
2371   __DIST_INIT_SECTION(d, rank, a);
2372 
2373   /* bogus bounds: defer section setup until copy */
2374 
2375   if (flags & BOGUSFLAG) {
2376     __INT_T lower[3], upper[3], stride[3];
2377     lower[0] = lw0;
2378     upper[0] = up0;
2379     stride[0] = st0;
2380     lower[1] = lw1;
2381     upper[1] = up1;
2382     stride[1] = st1;
2383     lower[2] = lw2;
2384     upper[2] = up2;
2385     stride[2] = st2;
2386 
2387     F90_FLAGS_P(d, F90_FLAGS_G(d) | __BOGUSBOUNDS);
2388     wrk_rank = F90_RANK_G(a);
2389     for (dx = 0, ax = 1; ax <= wrk_rank; ++ax) {
2390       if ((flags >> (ax - 1)) & 1) {
2391         SET_DIM_PTRS(dd, d, dx);
2392         dx++;
2393         SET_DIM_PTRS(ad, a, ax - 1);
2394         F90_DPTR_LBOUND_P(dd, lower[ax - 1]);
2395         DPTR_UBOUND_P(dd, upper[ax - 1]);
2396         F90_DPTR_SSTRIDE_P(dd, stride[ax - 1]);
2397         if (F90_DPTR_SSTRIDE_G(dd) != 1 || F90_DPTR_LSTRIDE_G(dd) != gsize) {
2398           F90_FLAGS_P(d, (F90_FLAGS_G(d) & ~__SEQUENTIAL_SECTION));
2399         }
2400         gsize *= F90_DPTR_EXTENT_G(dd);
2401       } else
2402         I8(__fort_set_single)(d, a, ax, lower[ax - 1], __SCALAR);
2403     }
2404     F90_GSIZE_P(d, gsize); /* global section size */
2405     return;
2406   }
2407 
2408   /* normal section : set up each dimension and compute GSIZE*/
2409 
2410   dx = 0;
2411   BSECTION(d, dx, a, 1, lw0, up0, st0, gsize, flags);
2412   BSECTION(d, dx, a, 2, lw1, up1, st1, gsize, flags);
2413   BSECTION(d, dx, a, 3, lw2, up2, st2, gsize, flags);
2414 
2415   /* no longer need section stride/section offset */
2416   F90_GSIZE_P(d, gsize); /* global section size */
2417 }
2418 #undef BSECTION
2419 
2420 /* Copy the contents of descriptor d0 into d.  Descriptor d is assumed
2421    to be large enough. */
2422 
I8(__fort_copy_descriptor)2423 void I8(__fort_copy_descriptor)(F90_Desc *d, F90_Desc *d0)
2424 {
2425   if (F90_TAG_G(d0) == __DESC) {
2426     __fort_bcopy((char *)d, (char *)d0,
2427                  SIZE_OF_RANK_n_ARRAY_DESC(F90_RANK_G(d0)));
2428     SET_F90_DIST_DESC_PTR(d, F90_RANK_G(d));
2429   } else {
2430     F90_TAG_P(d, F90_TAG_G(d0));
2431   }
2432 }
2433 
2434 /* Create a copy of the align-target template in space reserved
2435    following the descriptor. */
2436 
I8(__fort_inherit_template)2437 F90_Desc *I8(__fort_inherit_template)(F90_Desc *d, __INT_T rank,
2438                                       F90_Desc *target)
2439 {
2440   DECL_HDR_PTRS(t);
2441   __INT_T dz, dzr, tz;
2442 
2443 #if defined(DEBUG)
2444   if (rank < 0 || rank > MAXDIMS)
2445     __fort_abort("inherit_descriptor: invalid  rank");
2446   if (target == NULL || F90_TAG_G(target) != __DESC)
2447     __fort_abort("inherit_descriptor: invalid align-target descriptor");
2448 #endif
2449 
2450   dz = SIZE_OF_RANK_n_ARRAY_DESC(rank);
2451   dzr = ALIGNR(dz);
2452   t = (F90_Desc *)((char *)d + dzr);
2453 
2454   I8(__fort_copy_descriptor)(t, target);
2455 
2456   F90_FLAGS_P(t, F90_FLAGS_G(t) | __TEMPLATE);
2457   F90_FLAGS_P(t, F90_FLAGS_G(t) & ~__NOT_COPIED);
2458 
2459   F90_LSIZE_P(t, 0);
2460 
2461   DIST_ALIGN_TARGET_P(t, t);
2462   DIST_NEXT_ALIGNEE_P(t, NULL);
2463   DIST_ACTUAL_ARG_P(t, NULL);
2464 
2465   return t;
2466 }
2467 
2468 /* Return the section extent. */
2469 
2470 __INT_T
ENTFTN(EXTENT,extent)2471 ENTFTN(EXTENT, extent)
2472 (F90_Desc *d, __INT_T *gdim)
2473 {
2474   __INT_T dim;
2475 
2476 #if defined(DEBUG)
2477   if (d == NULL)
2478     __fort_abort("EXTENT: invalid descriptor");
2479 #endif
2480 
2481   if (F90_TAG_G(d) != __DESC)
2482     return 1; /* scalar or sequential */
2483 
2484   dim = *gdim;
2485 
2486 #if defined(DEBUG)
2487   if (dim < 1 || dim > F90_RANK_G(d))
2488     __fort_abort("EXTENT: invalid dimension");
2489 #endif
2490 
2491   return F90_DIM_EXTENT_G(d, dim - 1);
2492 }
2493 
2494 /* this is just like the above, but with an extra argument
2495  * that is set for 'local' bounds, zero for 'global' bounds.
2496  */
2497 __INT_T
ENTFTN(GLEXTENT,glextent)2498 ENTFTN(GLEXTENT, glextent)
2499 (F90_Desc *d, __INT_T *gdim, __INT_T *glocal)
2500 {
2501   DECL_DIM_PTRS(dd);
2502   __INT_T cl, cn, dim, extent, l, u, local;
2503 
2504 #if defined(DEBUG)
2505   if (d == NULL)
2506     __fort_abort("GLEXTENT: invalid descriptor");
2507 #endif
2508 
2509   if (F90_TAG_G(d) != __DESC)
2510     return 1; /* scalar or sequential */
2511 
2512   dim = *gdim;
2513   local = *glocal;
2514 
2515 #if defined(DEBUG)
2516   if (dim < 1 || dim > F90_RANK_G(d))
2517     __fort_abort("GLEXTENT: invalid dimension");
2518 #endif
2519 
2520   SET_DIM_PTRS(dd, d, dim - 1);
2521 
2522   if (local && ~F90_FLAGS_G(d) & __LOCAL) {
2523 
2524     /* coercing global to local: return local extent */
2525 
2526     if (F90_FLAGS_G(d) & __OFF_TEMPLATE)
2527       return 0;
2528 
2529     I8(__fort_cycle_bounds)(d);
2530 
2531     extent = 0;
2532     for (cl = DIST_DPTR_CL_G(dd), cn = DIST_DPTR_CN_G(dd); --cn >= 0;
2533          cl += DIST_DPTR_CS_G(dd))
2534       extent += I8(__fort_block_bounds)(d, dim, cl, &l, &u);
2535   } else {
2536 
2537     /* normal case; return global extent */
2538 
2539     extent = F90_DPTR_EXTENT_G(dd);
2540   }
2541 
2542   return extent;
2543 }
2544 
2545 /* Return the lower bound for the specified dimension */
2546 
2547 __INT_T
ENTFTN(LBOUND,lbound)2548 ENTFTN(LBOUND, lbound)(__INT_T *dim, F90_Desc *pd)
2549 {
2550   if (F90_TAG_G(pd) != __DESC)
2551     __fort_abort("LBOUND: arg not associated with array");
2552   if (!ISPRESENT(dim) || *dim < 1 || *dim > F90_RANK_G(pd))
2553     __fort_abort("LBOUND: invalid dim");
2554   return F90_DIM_LBOUND_G(pd, *dim - 1);
2555 }
2556 
2557 __INT8_T
ENTFTN(KLBOUND,klbound)2558 ENTFTN(KLBOUND, klbound)(__INT_T *dim, F90_Desc *pd)
2559 {
2560   if (F90_TAG_G(pd) != __DESC)
2561     __fort_abort("LBOUND: arg not associated with array");
2562   if (!ISPRESENT(dim) || *dim < 1 || *dim > F90_RANK_G(pd))
2563     __fort_abort("LBOUND: invalid dim");
2564   return F90_DIM_LBOUND_G(pd, *dim - 1);
2565 }
2566 
2567 /* return the upper bound for the specified dimension */
2568 
2569 __INT_T
ENTFTN(UBOUND,ubound)2570 ENTFTN(UBOUND, ubound)(__INT_T *dim, F90_Desc *pd)
2571 {
2572   if (F90_TAG_G(pd) != __DESC)
2573     __fort_abort("UBOUND: arg not associated with array");
2574   if (!ISPRESENT(dim) || *dim < 1 || *dim > F90_RANK_G(pd))
2575     __fort_abort("UBOUND: invalid dim");
2576   return DIM_UBOUND_G(pd, *dim - 1);
2577 }
2578 
2579 __INT8_T
ENTFTN(KUBOUND,kubound)2580 ENTFTN(KUBOUND, kubound)(__INT_T *dim, F90_Desc *pd)
2581 {
2582   if (F90_TAG_G(pd) != __DESC)
2583     __fort_abort("UBOUND: arg not associated with array");
2584   if (!ISPRESENT(dim) || *dim < 1 || *dim > F90_RANK_G(pd))
2585     __fort_abort("UBOUND: invalid dim");
2586   return DIM_UBOUND_G(pd, *dim - 1);
2587 }
2588 
2589 /* Return lower bounds for all dimensions as a rank 1 array */
2590 
ENTFTN(LBOUNDA,lbounda)2591 void ENTFTN(LBOUNDA, lbounda)(__INT_T *arr, F90_Desc *pd)
2592 {
2593   __INT_T dim, rank;
2594 
2595   if (F90_TAG_G(pd) != __DESC)
2596     __fort_abort("LBOUND: arg not associated with array");
2597   rank = F90_RANK_G(pd);
2598   for (dim = 0; dim < rank; ++dim)
2599     arr[dim] = F90_DIM_LBOUND_G(pd, dim);
2600 }
2601 
ENTFTN(LBOUNDA1,lbounda1)2602 void ENTFTN(LBOUNDA1, lbounda1)(__INT1_T *arr, F90_Desc *pd)
2603 {
2604   __INT_T dim, rank;
2605 
2606   if (F90_TAG_G(pd) != __DESC)
2607     __fort_abort("LBOUND: arg not associated with array");
2608   rank = F90_RANK_G(pd);
2609   for (dim = 0; dim < rank; ++dim)
2610     arr[dim] = F90_DIM_LBOUND_G(pd, dim);
2611 }
2612 
ENTFTN(LBOUNDA2,lbounda2)2613 void ENTFTN(LBOUNDA2, lbounda2)(__INT2_T *arr, F90_Desc *pd)
2614 {
2615   __INT_T dim, rank;
2616 
2617   if (F90_TAG_G(pd) != __DESC)
2618     __fort_abort("LBOUND: arg not associated with array");
2619   rank = F90_RANK_G(pd);
2620   for (dim = 0; dim < rank; ++dim)
2621     arr[dim] = F90_DIM_LBOUND_G(pd, dim);
2622 }
2623 
ENTFTN(LBOUNDA4,lbounda4)2624 void ENTFTN(LBOUNDA4, lbounda4)(__INT4_T *arr, F90_Desc *pd)
2625 {
2626   __INT_T dim, rank;
2627 
2628   if (F90_TAG_G(pd) != __DESC)
2629     __fort_abort("LBOUND: arg not associated with array");
2630   rank = F90_RANK_G(pd);
2631   for (dim = 0; dim < rank; ++dim)
2632     arr[dim] = F90_DIM_LBOUND_G(pd, dim);
2633 }
2634 
ENTFTN(LBOUNDA8,lbounda8)2635 void ENTFTN(LBOUNDA8, lbounda8)(__INT8_T *arr, F90_Desc *pd)
2636 {
2637   __INT_T dim, rank;
2638 
2639   if (F90_TAG_G(pd) != __DESC)
2640     __fort_abort("LBOUND: arg not associated with array");
2641   rank = F90_RANK_G(pd);
2642   for (dim = 0; dim < rank; ++dim)
2643     arr[dim] = F90_DIM_LBOUND_G(pd, dim);
2644 }
2645 
ENTFTN(LBOUNDAZ,lboundaz)2646 void ENTFTN(LBOUNDAZ, lboundaz)(__INT4_T *arr, F90_Desc *pd)
2647 {
2648   __INT_T dim, rank;
2649 
2650   if (F90_TAG_G(pd) != __DESC)
2651     __fort_abort("LBOUND: arg not associated with array");
2652   rank = F90_RANK_G(pd);
2653   for (dim = 0; dim < rank; ++dim)
2654     arr[dim] = F90_DIM_LBOUND_G(pd, dim);
2655 }
2656 
ENTFTN(LBOUNDAZ1,lboundaz1)2657 void ENTFTN(LBOUNDAZ1, lboundaz1)(__INT1_T *arr, F90_Desc *pd)
2658 {
2659   __INT_T dim, rank;
2660 
2661   if (F90_TAG_G(pd) != __DESC)
2662     __fort_abort("LBOUND: arg not associated with array");
2663   rank = F90_RANK_G(pd);
2664   for (dim = 0; dim < rank; ++dim)
2665     arr[dim] = F90_DIM_LBOUND_G(pd, dim);
2666 }
2667 
ENTFTN(LBOUNDAZ2,lboundaz2)2668 void ENTFTN(LBOUNDAZ2, lboundaz2)(__INT2_T *arr, F90_Desc *pd)
2669 {
2670   __INT_T dim, rank;
2671 
2672   if (F90_TAG_G(pd) != __DESC)
2673     __fort_abort("LBOUND: arg not associated with array");
2674   rank = F90_RANK_G(pd);
2675   for (dim = 0; dim < rank; ++dim)
2676     arr[dim] = F90_DIM_LBOUND_G(pd, dim);
2677 }
2678 
ENTFTN(LBOUNDAZ4,lboundaz4)2679 void ENTFTN(LBOUNDAZ4, lboundaz4)(__INT4_T *arr, F90_Desc *pd)
2680 {
2681   __INT_T dim, rank;
2682 
2683   if (F90_TAG_G(pd) != __DESC)
2684     __fort_abort("LBOUND: arg not associated with array");
2685   rank = F90_RANK_G(pd);
2686   for (dim = 0; dim < rank; ++dim)
2687     arr[dim] = F90_DIM_LBOUND_G(pd, dim);
2688 }
2689 
ENTFTN(LBOUNDAZ8,lboundaz8)2690 void ENTFTN(LBOUNDAZ8, lboundaz8)(__INT8_T *arr, F90_Desc *pd)
2691 {
2692   __INT_T dim, rank;
2693 
2694   if (F90_TAG_G(pd) != __DESC)
2695     __fort_abort("LBOUND: arg not associated with array");
2696   rank = F90_RANK_G(pd);
2697   for (dim = 0; dim < rank; ++dim)
2698     arr[dim] = F90_DIM_LBOUND_G(pd, dim);
2699 }
2700 
ENTFTN(KLBOUNDA,klbounda)2701 void ENTFTN(KLBOUNDA, klbounda)(__INT_T *arr, F90_Desc *pd)
2702 {
2703   __INT_T dim, rank;
2704 
2705   if (F90_TAG_G(pd) != __DESC)
2706     __fort_abort("LBOUND: arg not associated with array");
2707   rank = F90_RANK_G(pd);
2708   for (dim = 0; dim < rank; ++dim)
2709     arr[dim] = F90_DIM_LBOUND_G(pd, dim);
2710 }
2711 
ENTFTN(KLBOUNDAZ,klboundaz)2712 void ENTFTN(KLBOUNDAZ, klboundaz)(__INT8_T *arr, F90_Desc *pd)
2713 {
2714   __INT_T dim, rank;
2715 
2716   if (F90_TAG_G(pd) != __DESC)
2717     __fort_abort("LBOUND: arg not associated with array");
2718   rank = F90_RANK_G(pd);
2719   for (dim = 0; dim < rank; ++dim)
2720     arr[dim] = F90_DIM_LBOUND_G(pd, dim);
2721 }
2722 
2723 /* Return upper bounds for all dimensions as a rank 1 array */
2724 
ENTFTN(UBOUNDA,ubounda)2725 void ENTFTN(UBOUNDA, ubounda)(__INT_T *arr, F90_Desc *pd)
2726 {
2727   __INT_T dim, rank;
2728 
2729   if (F90_TAG_G(pd) != __DESC)
2730     __fort_abort("UBOUND: arg not associated with array");
2731   rank = F90_RANK_G(pd);
2732   for (dim = 0; dim < rank; ++dim)
2733     arr[dim] = DIM_UBOUND_G(pd, dim);
2734 }
2735 
ENTFTN(UBOUNDA1,ubounda1)2736 void ENTFTN(UBOUNDA1, ubounda1)(__INT1_T *arr, F90_Desc *pd)
2737 {
2738   __INT_T dim, rank;
2739 
2740   if (F90_TAG_G(pd) != __DESC)
2741     __fort_abort("UBOUND: arg not associated with array");
2742   rank = F90_RANK_G(pd);
2743   for (dim = 0; dim < rank; ++dim)
2744     arr[dim] = F90_DIM_UBOUND_G(pd, dim);
2745 }
2746 
ENTFTN(UBOUNDA2,ubounda2)2747 void ENTFTN(UBOUNDA2, ubounda2)(__INT2_T *arr, F90_Desc *pd)
2748 {
2749   __INT_T dim, rank;
2750 
2751   if (F90_TAG_G(pd) != __DESC)
2752     __fort_abort("UBOUND: arg not associated with array");
2753   rank = F90_RANK_G(pd);
2754   for (dim = 0; dim < rank; ++dim)
2755     arr[dim] = F90_DIM_UBOUND_G(pd, dim);
2756 }
2757 
ENTFTN(UBOUNDA4,ubounda4)2758 void ENTFTN(UBOUNDA4, ubounda4)(__INT4_T *arr, F90_Desc *pd)
2759 {
2760   __INT_T dim, rank;
2761 
2762   if (F90_TAG_G(pd) != __DESC)
2763     __fort_abort("UBOUND: arg not associated with array");
2764   rank = F90_RANK_G(pd);
2765   for (dim = 0; dim < rank; ++dim)
2766     arr[dim] = F90_DIM_UBOUND_G(pd, dim);
2767 }
2768 
ENTFTN(UBOUNDA8,ubounda8)2769 void ENTFTN(UBOUNDA8, ubounda8)(__INT8_T *arr, F90_Desc *pd)
2770 {
2771   __INT_T dim, rank;
2772 
2773   if (F90_TAG_G(pd) != __DESC)
2774     __fort_abort("UBOUND: arg not associated with array");
2775   rank = F90_RANK_G(pd);
2776   for (dim = 0; dim < rank; ++dim)
2777     arr[dim] = F90_DIM_UBOUND_G(pd, dim);
2778 }
2779 
ENTFTN(UBOUNDAZ,uboundaz)2780 void ENTFTN(UBOUNDAZ, uboundaz)(__INT4_T *arr, F90_Desc *pd)
2781 {
2782   __INT_T dim, rank;
2783 
2784   if (F90_TAG_G(pd) != __DESC)
2785     __fort_abort("UBOUND: arg not associated with array");
2786   rank = F90_RANK_G(pd);
2787   for (dim = 0; dim < rank; ++dim)
2788     arr[dim] = DIM_UBOUND_G(pd, dim);
2789 }
2790 
ENTFTN(UBOUNDAZ1,uboundaz1)2791 void ENTFTN(UBOUNDAZ1, uboundaz1)(__INT1_T *arr, F90_Desc *pd)
2792 {
2793   __INT_T dim, rank;
2794 
2795   if (F90_TAG_G(pd) != __DESC)
2796     __fort_abort("UBOUND: arg not associated with array");
2797   rank = F90_RANK_G(pd);
2798   for (dim = 0; dim < rank; ++dim)
2799     arr[dim] = F90_DIM_UBOUND_G(pd, dim);
2800 }
2801 
ENTFTN(UBOUNDAZ2,uboundaz2)2802 void ENTFTN(UBOUNDAZ2, uboundaz2)(__INT2_T *arr, F90_Desc *pd)
2803 {
2804   __INT_T dim, rank;
2805 
2806   if (F90_TAG_G(pd) != __DESC)
2807     __fort_abort("UBOUND: arg not associated with array");
2808   rank = F90_RANK_G(pd);
2809   for (dim = 0; dim < rank; ++dim)
2810     arr[dim] = F90_DIM_UBOUND_G(pd, dim);
2811 }
2812 
ENTFTN(UBOUNDAZ4,uboundaz4)2813 void ENTFTN(UBOUNDAZ4, uboundaz4)(__INT4_T *arr, F90_Desc *pd)
2814 {
2815   __INT_T dim, rank;
2816 
2817   if (F90_TAG_G(pd) != __DESC)
2818     __fort_abort("UBOUND: arg not associated with array");
2819   rank = F90_RANK_G(pd);
2820   for (dim = 0; dim < rank; ++dim)
2821     arr[dim] = F90_DIM_UBOUND_G(pd, dim);
2822 }
2823 
ENTFTN(UBOUNDAZ8,uboundaz8)2824 void ENTFTN(UBOUNDAZ8, uboundaz8)(__INT8_T *arr, F90_Desc *pd)
2825 {
2826   __INT_T dim, rank;
2827 
2828   if (F90_TAG_G(pd) != __DESC)
2829     __fort_abort("UBOUND: arg not associated with array");
2830   rank = F90_RANK_G(pd);
2831   for (dim = 0; dim < rank; ++dim)
2832     arr[dim] = F90_DIM_UBOUND_G(pd, dim);
2833 }
2834 
ENTFTN(KUBOUNDA,kubounda)2835 void ENTFTN(KUBOUNDA, kubounda)(__INT_T *arr, F90_Desc *pd)
2836 {
2837   __INT_T dim, rank;
2838 
2839   if (F90_TAG_G(pd) != __DESC)
2840     __fort_abort("UBOUND: arg not associated with array");
2841   rank = F90_RANK_G(pd);
2842   for (dim = 0; dim < rank; ++dim)
2843     arr[dim] = DIM_UBOUND_G(pd, dim);
2844 }
2845 
ENTFTN(KUBOUNDAZ,kuboundaz)2846 void ENTFTN(KUBOUNDAZ, kuboundaz)(__INT8_T *arr, F90_Desc *pd)
2847 {
2848   __INT_T dim, rank;
2849 
2850   if (F90_TAG_G(pd) != __DESC)
2851     __fort_abort("UBOUND: arg not associated with array");
2852   rank = F90_RANK_G(pd);
2853   for (dim = 0; dim < rank; ++dim)
2854     arr[dim] = DIM_UBOUND_G(pd, dim);
2855 }
2856 
2857 /* If dim is present, return the extent for the specified dimension.
2858    otherwise, return the size of the array (the product of all
2859    extents) */
2860 
2861 __INT_T
ENTFTN(SIZE,size)2862 ENTFTN(SIZE, size)(__INT_T *dim, F90_Desc *pd)
2863 {
2864   __INT_T size;
2865 
2866   if (F90_TAG_G(pd) != __DESC)
2867     __fort_abort("SIZE: arg not associated with array");
2868   if (!ISPRESENT(dim))
2869     size = F90_GSIZE_G(pd);
2870   else if (*dim < 1 || *dim > F90_RANK_G(pd))
2871     __fort_abort("SIZE: invalid dim");
2872   else {
2873     size = F90_DIM_EXTENT_G(pd, *dim - 1);
2874   }
2875   return size;
2876 }
2877 
2878 __INT8_T
ENTFTN(KSIZE,ksize)2879 ENTFTN(KSIZE, ksize)(__INT_T *dim, F90_Desc *pd)
2880 {
2881 
2882   /*
2883    * -i8 variant of __size
2884    */
2885 
2886   __INT_T size;
2887 
2888   if (F90_TAG_G(pd) != __DESC)
2889     __fort_abort("SIZE: arg not associated with array");
2890   if (!ISPRESENT(dim))
2891     size = F90_GSIZE_G(pd);
2892   else if (*dim < 1 || *dim > F90_RANK_G(pd))
2893     __fort_abort("SIZE: invalid dim");
2894   else {
2895     size = F90_DIM_EXTENT_G(pd, *dim - 1);
2896   }
2897   return (__INT8_T)size;
2898 }
2899 
2900 /* Return the array shape as a rank 1 array */
2901 
ENTFTN(SHAPE,shape)2902 void ENTFTN(SHAPE, shape)(__INT4_T *arr, F90_Desc *pd)
2903 {
2904   DECL_DIM_PTRS(pdd);
2905   __INT_T dim, rank;
2906 
2907   if (F90_TAG_G(pd) != __DESC)
2908     __fort_abort("SHAPE: arg not associated with array");
2909   rank = F90_RANK_G(pd);
2910   for (dim = 0; dim < rank; ++dim) {
2911     SET_DIM_PTRS(pdd, pd, dim);
2912     arr[dim] = F90_DIM_EXTENT_G(pd, dim);
2913   }
2914 }
2915 
ENTFTN(KSHAPE,kshape)2916 void ENTFTN(KSHAPE, kshape)(__INT8_T *arr, F90_Desc *pd)
2917 {
2918 
2919   /*
2920    * -i8 variant of SHAPE
2921    */
2922 
2923   __INT_T dim, rank;
2924 
2925   if (F90_TAG_G(pd) != __DESC)
2926     __fort_abort("SHAPE: arg not associated with array");
2927   rank = F90_RANK_G(pd);
2928   for (dim = 0; dim < rank; ++dim) {
2929     arr[dim] = F90_DIM_EXTENT_G(pd, dim);
2930   }
2931 }
2932 
I8(__fort_reverse_array)2933 void I8(__fort_reverse_array)(char *db, char *ab, F90_Desc *dd, F90_Desc *ad)
2934 {
2935 
2936   /* make a "reversed" copy of descriptor ad and store it in dd,
2937    * then copy the data in ab and store it in reverse in db ...
2938    *
2939    * Internal procedure....we assume only the run-time calls this
2940    * routine... (The compiler could do a much better job at generating
2941    *             equivalent code for performing this function)
2942    *
2943    */
2944 
2945   __INT_T flags, i;
2946   __INT_T rank;
2947   __INT_T kind, len;
2948   __INT_T _0 = 0;
2949   __INT_T isstar;
2950   DECL_HDR_VARS(dd2);
2951   DECL_DIM_PTRS(add);
2952   __INT_T lbound[MAXDIMS], ubound[MAXDIMS], stride[MAXDIMS], dstfmt[MAXDIMS];
2953   __INT_T paxis[MAXDIMS], no[MAXDIMS], po[MAXDIMS];
2954   __INT_T *gen_block[MAXDIMS];
2955   sked *s;
2956   void *xfer;
2957 
2958   rank = F90_RANK_G(ad);
2959 
2960   SET_F90_DIST_DESC_PTR(dd2, rank);
2961 
2962   flags = (__PRESCRIPTIVE_DIST_TARGET + __PRESCRIPTIVE_DIST_FORMAT +
2963            __DIST_TARGET_AXIS + __ASSUMED_GB_EXTENT + __DUMMY_COLLAPSE_PAXIS);
2964 
2965   isstar = 0;
2966   for (i = 0; i < rank; ++i) {
2967     SET_DIM_PTRS(add, ad, i);
2968     stride[i] = -DIST_DPTR_TSTRIDE_G(add);
2969     lbound[i] = DIST_DPTR_TLB_G(add);
2970     ubound[i] = DIST_DPTR_TUB_G(add);
2971     paxis[i] = DIST_DPTR_PAXIS_G(add);
2972     gen_block[i] = DIST_DPTR_GEN_BLOCK_G(add);
2973     no[i] = DIST_DPTR_NO_G(add);
2974     po[i] = DIST_DPTR_PO_G(add);
2975 
2976     switch (DFMT(ad, i + 1)) {
2977 
2978     case DFMT_GEN_BLOCK:
2979       isstar |= 0x01 << (7 + 3 * i);
2980       dstfmt[i] = 0;
2981       break;
2982     case DFMT_COLLAPSED:
2983       isstar |= 0x01 << i;
2984       dstfmt[i] = 0;
2985       break;
2986     case DFMT_BLOCK:
2987       dstfmt[i] = 0;
2988       break;
2989     case DFMT_BLOCK_K:
2990       dstfmt[i] = DIST_DPTR_BLOCK_G(add);
2991       break;
2992     case DFMT_CYCLIC:
2993       dstfmt[i] = -1;
2994       break;
2995     case DFMT_CYCLIC_K:
2996       dstfmt[i] = -(DIST_DPTR_BLOCK_G(add));
2997       break;
2998 
2999     default:
3000       __fort_abort("__fort_reverse: invalid dist format (internal)");
3001     }
3002   }
3003 
3004   switch (rank) {
3005 
3006   case 1:
3007 
3008     ENTFTN(TEMPLATE, template)
3009     (dd2, &rank, &flags, DIST_DIST_TARGET_G(ad), &isstar, &paxis[0],
3010      (!gen_block[0]) ? &dstfmt[0] : gen_block[0], &lbound[0], &ubound[0]);
3011     break;
3012 
3013   case 2:
3014 
3015     ENTFTN(TEMPLATE, template)
3016     (dd2, &rank, &flags, DIST_DIST_TARGET_G(ad), &isstar, &paxis[0],
3017      (!gen_block[0]) ? &dstfmt[0] : gen_block[0], &lbound[0], &ubound[0],
3018      &paxis[1], (!gen_block[1]) ? &dstfmt[1] : gen_block[1], &lbound[1],
3019      &ubound[1]);
3020     break;
3021 
3022   case 3:
3023 
3024     ENTFTN(TEMPLATE, template)
3025     (dd2, &rank, &flags, DIST_DIST_TARGET_G(ad), &isstar, &paxis[0],
3026      (!gen_block[0]) ? &dstfmt[0] : gen_block[0], &lbound[0], &ubound[0],
3027      &paxis[1], (!gen_block[1]) ? &dstfmt[1] : gen_block[1], &lbound[1],
3028      &ubound[1], &paxis[2], (!gen_block[2]) ? &dstfmt[2] : gen_block[2],
3029      &lbound[2], &ubound[2]);
3030     break;
3031 
3032   case 4:
3033 
3034     ENTFTN(TEMPLATE, template)
3035     (dd2, &rank, &flags, DIST_DIST_TARGET_G(ad), &isstar, &paxis[0],
3036      (!gen_block[0]) ? &dstfmt[0] : gen_block[0], &lbound[0], &ubound[0],
3037      &paxis[1], (!gen_block[1]) ? &dstfmt[1] : gen_block[1], &lbound[1],
3038      &ubound[1], &paxis[2], (!gen_block[2]) ? &dstfmt[2] : gen_block[2],
3039      &lbound[2], &ubound[2], &paxis[3],
3040      (!gen_block[3]) ? &dstfmt[3] : gen_block[3], &lbound[3], &ubound[3]);
3041 
3042     break;
3043 
3044   case 5:
3045 
3046     ENTFTN(TEMPLATE, template)
3047     (dd2, &rank, &flags, DIST_DIST_TARGET_G(ad), &isstar, &paxis[0],
3048      (!gen_block[0]) ? &dstfmt[0] : gen_block[0], &lbound[0], &ubound[0],
3049      &paxis[1], (!gen_block[1]) ? &dstfmt[1] : gen_block[1], &lbound[1],
3050      &ubound[1], &paxis[2], (!gen_block[2]) ? &dstfmt[2] : gen_block[2],
3051      &lbound[2], &ubound[2], &paxis[3],
3052      (!gen_block[3]) ? &dstfmt[3] : gen_block[3], &lbound[3], &ubound[3],
3053      &paxis[4], (!gen_block[4]) ? &dstfmt[4] : gen_block[4], &lbound[4],
3054      &ubound[4]);
3055     break;
3056 
3057   case 6:
3058 
3059     ENTFTN(TEMPLATE, template)
3060     (dd2, &rank, &flags, DIST_DIST_TARGET_G(ad), &isstar, &paxis[0],
3061      (!gen_block[0]) ? &dstfmt[0] : gen_block[0], &lbound[0], &ubound[0],
3062      &paxis[1], (!gen_block[1]) ? &dstfmt[1] : gen_block[1], &lbound[1],
3063      &ubound[1], &paxis[2], (!gen_block[2]) ? &dstfmt[2] : gen_block[2],
3064      &lbound[2], &ubound[2], &paxis[3],
3065      (!gen_block[3]) ? &dstfmt[3] : gen_block[3], &lbound[3], &ubound[3],
3066      &paxis[4], (!gen_block[4]) ? &dstfmt[4] : gen_block[4], &lbound[4],
3067      &ubound[4], &paxis[5], (!gen_block[5]) ? &dstfmt[5] : gen_block[5],
3068      &lbound[5], &ubound[5]);
3069     break;
3070 
3071   case 7:
3072 
3073     ENTFTN(TEMPLATE, template)
3074     (dd2, &rank, &flags, DIST_DIST_TARGET_G(ad), &isstar, &paxis[0],
3075      (!gen_block[0]) ? &dstfmt[0] : gen_block[0], &lbound[0], &ubound[0],
3076      &paxis[1], (!gen_block[1]) ? &dstfmt[1] : gen_block[1], &lbound[1],
3077      &ubound[1], &paxis[2], (!gen_block[2]) ? &dstfmt[2] : gen_block[2],
3078      &lbound[2], &ubound[2], &paxis[3],
3079      (!gen_block[3]) ? &dstfmt[3] : gen_block[3], &lbound[3], &ubound[3],
3080      &paxis[4], (!gen_block[4]) ? &dstfmt[4] : gen_block[4], &lbound[4],
3081      &ubound[4], &paxis[5], (!gen_block[5]) ? &dstfmt[5] : gen_block[5],
3082      &lbound[5], &ubound[5], &paxis[6],
3083      (!gen_block[6]) ? &dstfmt[6] : gen_block[6], &lbound[6], &ubound[6]);
3084 
3085     break;
3086 
3087   default:
3088 
3089     __fort_abort("reverse_array: Temp Invalid Rank (internal error)");
3090   }
3091 
3092   kind = F90_KIND_G(ad);
3093   len = F90_LEN_G(ad);
3094 
3095   switch (rank) {
3096 
3097   case 1:
3098 
3099     ENTFTN(INSTANCE, instance)
3100     (dd2, dd2, &kind, &len, &_0, &no[0], &po[0]);
3101     break;
3102   case 2:
3103 
3104     ENTFTN(INSTANCE, instance)
3105     (dd2, dd2, &kind, &len, &_0, &no[0], &po[0], &no[1], &po[1]);
3106     break;
3107   case 3:
3108 
3109     ENTFTN(INSTANCE, instance)
3110     (dd2, dd2, &kind, &len, &_0, &no[0], &po[0], &no[1], &po[1], &no[2],
3111      &po[2]);
3112     break;
3113   case 4:
3114 
3115     ENTFTN(INSTANCE, instance)
3116     (dd2, dd2, &kind, &len, &_0, &no[0], &po[0], &no[1], &po[1], &no[2], &po[2],
3117      &no[3], &po[3]);
3118     break;
3119   case 5:
3120 
3121     ENTFTN(INSTANCE, instance)
3122     (dd2, dd2, &kind, &len, &_0, &no[0], &po[0], &no[1], &po[1], &no[2], &po[2],
3123      &no[3], &po[3], &no[4], &po[4]);
3124     break;
3125   case 6:
3126 
3127     ENTFTN(INSTANCE, instance)
3128     (dd2, dd2, &kind, &len, &_0, &no[0], &po[0], &no[1], &po[1], &no[2], &po[2],
3129      &no[3], &po[3], &no[4], &po[4], &no[5], &po[5]);
3130     break;
3131   case 7:
3132 
3133     ENTFTN(INSTANCE, instance)
3134     (dd2, dd2, &kind, &len, &_0, &no[0], &po[0], &no[1], &po[1], &no[2], &po[2],
3135      &no[3], &po[3], &no[4], &po[4], &no[5], &po[5], &no[6], &po[6]);
3136     break;
3137 
3138   default:
3139     __fort_abort("reverse_array: Instance Invalid Rank (internal error)");
3140   }
3141 
3142   /* swap bounds for negative stride */
3143 
3144   for (i = 0; i < rank; ++i) {
3145     if (stride[i] < 0) {
3146       __INT_T t;
3147 
3148       t = ubound[i];
3149       ubound[i] = lbound[i];
3150       lbound[i] = t;
3151     }
3152   }
3153 
3154   switch (rank) {
3155 
3156   case 1:
3157 
3158     ENTFTN(SECT, sect)(dd, dd2, &lbound[0], &ubound[0], &stride[0], &rank);
3159     break;
3160 
3161   case 2:
3162 
3163     ENTFTN(SECT,sect) (dd,dd2,&lbound[0],&ubound[0],&stride[0],
3164                                &lbound[1],&ubound[1],&stride[1],&rank);
3165     break;
3166 
3167   case 3:
3168 
3169     ENTFTN(SECT,sect) (dd,dd2,&lbound[0],&ubound[0],&stride[0],
3170                                &lbound[1],&ubound[1],&stride[1],
3171                                &lbound[2],&ubound[2],&stride[2],&rank);
3172     break;
3173 
3174   case 4:
3175 
3176     ENTFTN(SECT,sect) (dd,dd2,&lbound[0],&ubound[0],&stride[0],
3177                                &lbound[1],&ubound[1],&stride[1],
3178                                &lbound[2],&ubound[2],&stride[2],
3179                                &lbound[3],&ubound[3],&stride[3],&rank);
3180     break;
3181 
3182   case 5:
3183     ENTFTN(SECT,sect) (dd,dd2,&lbound[0],&lbound[0],&stride[0],
3184                                &lbound[1],&ubound[1],&stride[1],
3185                                &lbound[2],&ubound[2],&stride[2],
3186                                &lbound[3],&ubound[3],&stride[3],
3187                                &lbound[4],&ubound[4],&stride[4],&rank);
3188     break;
3189 
3190   case 6:
3191 
3192     ENTFTN(SECT,sect) (dd,dd2,&ubound[0],&lbound[0],&stride[0],
3193                                &lbound[1],&ubound[1],&stride[1],
3194                                &lbound[2],&ubound[2],&stride[2],
3195                                &lbound[3],&ubound[3],&stride[3],
3196                                &lbound[4],&ubound[4],&stride[4],
3197                                &lbound[5],&ubound[5],&stride[5],&rank);
3198     break;
3199 
3200   case 7:
3201 
3202     ENTFTN(SECT,sect) (dd,dd2,&ubound[0],&lbound[0],&stride[0],
3203                                &lbound[1],&ubound[1],&stride[1],
3204                                &lbound[2],&ubound[2],&stride[2],
3205                                &lbound[3],&ubound[3],&stride[3],
3206                                &lbound[4],&ubound[4],&stride[4],
3207                                &lbound[5],&ubound[5],&stride[5],
3208                                &lbound[6],&ubound[6],&stride[6],&rank);
3209     break;
3210 
3211   default:
3212 
3213     __fort_abort("reverse_array: Sect Invalid rank (internal error)");
3214   }
3215 
3216   /* copy ab to db */
3217 
3218   s = (sked *)ENTFTN(COMM_COPY, comm_copy)(db, ab, dd, ad);
3219 
3220   xfer = (void *)ENTFTN(COMM_START, comm_start)(&s, db, dd, (char *)ab, ad);
3221 
3222   ENTFTN(COMM_FINISH, comm_finish)(xfer);
3223 }
3224