1 /*
2  * Copyright (c) 1996-2018, NVIDIA CORPORATION.  All rights reserved.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  *
16  */
17 
18 /* clang-format off */
19 
20 /** \file
21  * Intrinsic reduction functions
22  */
23 
24 #include "stdioInterf.h"
25 #include "fioMacros.h"
26 #include "red.h"
27 
28 extern void (*__fort_scalar_copy[__NTYPES])(void *rp, void *sp, int len);
29 
30 void ENTFTN(QOPY_IN, qopy_in)(char **dptr, __POINT_T *doff, char *dbase,
31                               F90_Desc *dd, char *ab, F90_Desc *ad,
32                               __INT_T *p_rank, __INT_T *p_kind, __INT_T *p_len,
33                               __INT_T *p_flags, ...);
34 
35 #if !defined(DESC_I8)
36 void
__fort_red_unimplemented()37 __fort_red_unimplemented()
38 {
39   char str[80];
40 
41   sprintf(str, "%s: unimplemented for data type", __fort_red_what);
42   __fort_abort(str);
43 }
44 
45 void
__fort_red_abort(char * msg)46 __fort_red_abort(char *msg)
47 {
48   char str[80];
49 
50   sprintf(str, "%s: %s", __fort_red_what, msg);
51   __fort_abort(str);
52 }
53 #endif
54 
55 /** \brief reduction, dim argument absent */
I8(red_scalar_loop)56 static void I8(red_scalar_loop)(red_parm *z, __INT_T aof, __INT_T ll, int dim)
57 {
58   DECL_HDR_PTRS(as);
59   DECL_HDR_PTRS(ms);
60   DECL_DIM_PTRS(asd);
61   DECL_DIM_PTRS(msd);
62   char *ap;
63   __LOG_T *mp;
64 
65   __INT_T abl, abn, abu, acl, acn, aclof, ahop, ao, extent, i, li, ls, mhop,
66       mlow;
67 
68   as = z->as;
69   SET_DIM_PTRS(asd, as, dim - 1);
70   acn = DIST_DPTR_CN_G(asd);
71   acl = DIST_DPTR_CL_G(asd);
72   aclof = DIST_DPTR_CLOF_G(asd);
73   ahop = F90_DPTR_SSTRIDE_G(asd) * F90_DPTR_LSTRIDE_G(asd);
74   if (z->mask_present) {
75     ms = z->ms;
76     SET_DIM_PTRS(msd, ms, dim - 1);
77     mlow = F90_DPTR_LBOUND_G(msd);
78     mhop = F90_DPTR_SSTRIDE_G(msd) * F90_DPTR_LSTRIDE_G(msd);
79   } else {
80     mlow = mhop = 0;
81     mp = z->mb;
82   }
83 
84   extent = F90_DPTR_EXTENT_G(asd);
85   if (extent < 0)
86     extent = 0;
87   ll *= extent;
88 
89   if (dim == 1 && acn > 1 && DIST_DPTR_BLOCK_G(asd) == DIST_DPTR_TSTRIDE_G(asd) &&
90       (!z->mask_present || z->mask_stored_alike)) {
91 
92     /* cyclic(1) optimization */
93 
94     abl = acl - DIST_DPTR_TOFFSET_G(asd);
95     if (DIST_DPTR_TSTRIDE_G(asd) != 1)
96       abl /= DIST_DPTR_TSTRIDE_G(asd);
97     ao = aof +
98          (F90_DPTR_SSTRIDE_G(asd) * abl + F90_DPTR_SOFFSET_G(asd) - aclof) *
99              F90_DPTR_LSTRIDE_G(asd);
100 
101     i = abl - F90_DPTR_LBOUND_G(asd); /* section ordinal index */
102     li = ll + i + 1;                  /* linearized location */
103     ls = DIST_DPTR_PSHAPE_G(asd);      /* location stride */
104 
105     if (z->mask_present)
106       mp = (__LOG_T *)((char *)(z->mb) + (ao << z->lk_shift));
107 
108     ap = z->ab + ao * F90_LEN_G(as);
109     if (z->l_fn_b) {
110       z->l_fn_b(z->rb, acn, ap, ahop, mp, mhop, z->xb, li, ls, z->len, z->back);
111     } else {
112       z->l_fn(z->rb, acn, ap, ahop, mp, mhop, z->xb, li, ls, z->len);
113     }
114 
115     return;
116   }
117   while (acn > 0) {
118     abn = I8(__fort_block_bounds)(as, dim, acl, &abl, &abu);
119     ao = aof +
120          (F90_DPTR_SSTRIDE_G(asd) * abl + F90_DPTR_SOFFSET_G(asd) - aclof) *
121              F90_DPTR_LSTRIDE_G(asd);
122 
123     i = abl - F90_DPTR_LBOUND_G(asd); /* ordinal index (zero-based) */
124     li = ll + i + 1;                  /* linearized location */
125 
126     z->mi[dim - 1] = mlow + i; /* mask index */
127 
128     if (dim > 1) {
129       while (abn > 0) {
130         I8(red_scalar_loop)(z, ao, li, dim - 1);
131         li++;
132         z->mi[dim - 1]++;
133         ao += ahop;
134         abn--;
135       }
136     } else {
137       if (z->mask_present) {
138         if (z->mask_stored_alike)
139           mp = (__LOG_T *)((char *)(z->mb) + (ao << z->lk_shift));
140         else {
141           mp = I8(__fort_local_address)(z->mb, ms, z->mi);
142           if (mp == NULL)
143             __fort_red_abort("mask misalignment");
144         }
145       }
146       ap = z->ab + ao * F90_LEN_G(as);
147       if (z->l_fn_b) {
148         z->l_fn_b(z->rb, abn, ap, ahop, mp, mhop, z->xb, li, 1, z->len,
149                   z->back);
150       } else {
151         z->l_fn(z->rb, abn, ap, ahop, mp, mhop, z->xb, li, 1, z->len);
152       }
153     }
154     acl += DIST_DPTR_CS_G(asd);
155     aclof += DIST_DPTR_CLOS_G(asd);
156     --acn;
157   }
158 }
159 
I8(__fort_red_scalar)160 void I8(__fort_red_scalar)(red_parm *z, char *rb, char *ab, char *mb,
161                           F90_Desc *rs, F90_Desc *as, F90_Desc *ms, __INT_T *xb,
162                           red_enum op)
163 {
164   DECL_DIM_PTRS(asd);
165   __INT_T ao, i, len, m, p, q;
166   int loop;
167 
168   z->rb = rb;
169   z->rs = rs;
170   z->ab = ab;
171   z->as = as;
172   z->mb = (__LOG_T *)mb;
173   z->ms = ms;
174   z->xb = xb;
175   z->dim = 0;
176 
177   I8(__fort_cycle_bounds)(as);
178 
179   __fort_scalar_copy[z->kind](rb, z->zb, z->len);
180 
181   if (xb != NULL) {
182     for (i = F90_RANK_G(as); --i >= 0;)
183       xb[i] = 0;
184   }
185 
186   z->mask_present = (F90_TAG_G(ms) == __DESC && F90_RANK_G(ms) > 0);
187   if (z->mask_present) {
188     z->mask_stored_alike = I8(__fort_stored_alike)(as, ms);
189     if (z->mask_stored_alike)
190       z->mb += F90_LBASE_G(ms);
191     for (i = F90_RANK_G(ms); --i >= 0;)
192       z->mi[i] = F90_DIM_LBOUND_G(ms, i);
193   } else if (!ISPRESENT(mb) || I8(__fort_fetch_log)(mb, ms))
194     z->mb = GET_DIST_TRUE_LOG_ADDR;
195   else
196     return; /* scalar mask == .false. */
197 
198   if (~F90_FLAGS_G(as) & __OFF_TEMPLATE) {
199     z->ab += F90_LBASE_G(as) * F90_LEN_G(as);
200     ao = -1;
201     I8(red_scalar_loop)(z, ao, 0, F90_RANK_G(as));
202   }
203 
204   I8(__fort_reduce_section)(rb, z->kind, z->len,
205 			     xb, __INT, sizeof(__INT_T), 1,
206 			     z->g_fn, -1, as);
207 
208   I8(__fort_replicate_result)(rb, z->kind, z->len,
209 			       xb, __INT, sizeof(__INT_T), 1, as);
210 
211   if (xb != NULL && (p = xb[0]) > 0) {
212     for (i = 0; i < F90_RANK_G(as); i++) {
213       SET_DIM_PTRS(asd, as, i);
214       m = F90_DPTR_EXTENT_G(asd);
215       q = (p - 1) / m;
216       xb[i] = p - q * m;
217       p = q;
218     }
219   }
220 }
221 
222 /** \brief  SAME as previous, but allow any logical kind  for the mask */
I8(__fort_red_scalarlk)223 void I8(__fort_red_scalarlk)(red_parm *z, char *rb, char *ab, char *mb,
224                             F90_Desc *rs, F90_Desc *as, F90_Desc *ms,
225                             __INT_T *xb, red_enum op)
226 {
227   DECL_DIM_PTRS(asd);
228   __INT_T ao, i, len, m, p, q;
229   int loop;
230 
231   z->rb = rb;
232   z->rs = rs;
233   z->ab = ab;
234   z->as = as;
235   z->mb = (__LOG_T *)mb;
236   z->ms = ms;
237   z->xb = xb;
238   z->dim = 0;
239 
240   I8(__fort_cycle_bounds)(as);
241 
242   __fort_scalar_copy[z->kind](rb, z->zb, z->len);
243 
244   if (xb != NULL) {
245     for (i = F90_RANK_G(as); --i >= 0;)
246       xb[i] = 0;
247   }
248 
249   z->mask_present = (F90_TAG_G(ms) == __DESC && F90_RANK_G(ms) > 0);
250   if (z->mask_present) {
251     z->mask_stored_alike = I8(__fort_stored_alike)(as, ms);
252     if (z->mask_stored_alike)
253       z->mb = (__LOG_T *)((char *)(z->mb) + (F90_LBASE_G(ms) << z->lk_shift));
254     for (i = F90_RANK_G(ms); --i >= 0;)
255       z->mi[i] = F90_DIM_LBOUND_G(ms, i);
256   } else if (!ISPRESENT(mb) || I8(__fort_fetch_log)(mb, ms))
257     z->mb = GET_DIST_TRUE_LOG_ADDR;
258   else
259     return; /* scalar mask == .false. */
260 
261   if (~F90_FLAGS_G(as) & __OFF_TEMPLATE) {
262     z->ab += F90_LBASE_G(as) * F90_LEN_G(as);
263     ao = -1;
264 
265     I8(red_scalar_loop)(z, ao, 0, F90_RANK_G(as));
266   }
267 
268   I8(__fort_reduce_section)(rb, z->kind, z->len,
269 			     xb, __INT, sizeof(__INT_T), 1,
270 			     z->g_fn, -1, as);
271 
272   I8(__fort_replicate_result)(rb, z->kind, z->len,
273 			       xb, __INT, sizeof(__INT_T), 1, as);
274   if (xb != NULL && (p = xb[0]) > 0) {
275     for (i = 0; i < F90_RANK_G(as); i++) {
276       SET_DIM_PTRS(asd, as, i);
277       m = F90_DPTR_EXTENT_G(asd);
278       q = (p - 1) / m;
279       xb[i] = p - q * m;
280       p = q;
281     }
282   }
283 }
284 
285 /** \brief reduction, dim argument present */
I8(red_array_loop)286 static void I8(red_array_loop)(red_parm *z, __INT_T rof, __INT_T aof, int rdim,
287                                int adim)
288 {
289   DECL_HDR_PTRS(as);
290   DECL_HDR_PTRS(rs);
291   DECL_HDR_PTRS(ms);
292   DECL_DIM_PTRS(asd);
293   DECL_DIM_PTRS(rsd);
294   DECL_DIM_PTRS(msd);
295   char *ap, *rp;
296   __LOG_T *mp;
297   __INT4_T *lp;
298 
299   __INT_T abl, abn, abu, acl, acn, aclof, ahop, ao, i, li, ls, mhop, mlow, rbl,
300       rbn, rbu, rcl, rclof, rhop, ro;
301 
302 #if defined(DEBUG)
303   if (__fort_test & DEBUG_REDU) {
304     printf("%d red_array_loop rdim=%d rof=%d adim=%d aof=%d\n", GET_DIST_LCPU,
305            rdim, rof, adim, aof);
306   }
307 #endif
308 
309   if (rdim > 0) {
310     rs = z->rs;
311     SET_DIM_PTRS(rsd, rs, rdim - 1);
312     rcl = DIST_DPTR_CL_G(rsd);
313     rclof = DIST_DPTR_CLOF_G(rsd);
314     rhop = F90_DPTR_SSTRIDE_G(rsd) * F90_DPTR_LSTRIDE_G(rsd);
315 
316     if (adim == z->dim)
317       --adim;
318   } else {
319     rp = z->rb + rof * z->len;
320     adim = z->dim;
321   }
322 
323   as = z->as;
324   SET_DIM_PTRS(asd, as, adim - 1);
325   acn = DIST_DPTR_CN_G(asd);
326   acl = DIST_DPTR_CL_G(asd);
327   aclof = DIST_DPTR_CLOF_G(asd);
328   ahop = F90_DPTR_SSTRIDE_G(asd) * F90_DPTR_LSTRIDE_G(asd);
329 
330   if (z->mask_present) {
331     ms = z->ms;
332     SET_DIM_PTRS(msd, ms, adim - 1);
333     mlow = F90_DPTR_LBOUND_G(msd);
334     mhop = F90_DPTR_SSTRIDE_G(msd) * F90_DPTR_LSTRIDE_G(msd);
335   } else {
336     mlow = mhop = 0;
337     mp = z->mb;
338   }
339 
340   if (rdim <= 0 && acn > 1 &&
341       DIST_DPTR_BLOCK_G(asd) == DIST_DPTR_TSTRIDE_G(asd) &&
342       (!z->mask_present || z->mask_stored_alike)) {
343 
344     /* cyclic(1) optimization */
345 
346     abl = acl - DIST_DPTR_TOFFSET_G(asd);
347     if (DIST_DPTR_TSTRIDE_G(asd) != 1)
348       abl /= DIST_DPTR_TSTRIDE_G(asd);
349     ao = aof +
350          (F90_DPTR_SSTRIDE_G(asd) * abl + F90_DPTR_SOFFSET_G(asd) - aclof) *
351              F90_DPTR_LSTRIDE_G(asd);
352 
353     i = abl - F90_DPTR_LBOUND_G(asd); /* section ordinal index */
354     li = i + 1;                       /* linearized location */
355     ls = DIST_DPTR_PSHAPE_G(asd);      /* location stride */
356 
357     if (z->mask_present)
358       mp = (__LOG_T *)((char *)(z->mb) + (ao << z->lk_shift));
359 
360     if (z->xb != NULL)
361       lp = ((__INT4_T *)z->xb) + rof;
362     else
363       lp = NULL;
364 
365     ap = z->ab + ao * F90_LEN_G(as);
366     if (z->l_fn_b) {
367       z->l_fn_b(rp, acn, ap, ahop, mp, mhop, lp, li, ls, z->len, z->back);
368     } else {
369       z->l_fn(rp, acn, ap, ahop, mp, mhop, lp, li, ls, z->len);
370     }
371 
372     return;
373   }
374 
375   while (acn > 0) {
376     abn = I8(__fort_block_bounds)(as, adim, acl, &abl, &abu);
377     ao = aof +
378          (F90_DPTR_SSTRIDE_G(asd) * abl + F90_DPTR_SOFFSET_G(asd) - aclof) *
379              F90_DPTR_LSTRIDE_G(asd);
380 
381     i = abl - F90_DPTR_LBOUND_G(asd); /* ordinal index (zero-based) */
382     li = i + 1;                       /* linearized location */
383     z->mi[adim - 1] = mlow + i;       /* mask array index */
384 
385     if (rdim > 0) {
386 
387       /* this array dimension is not being reduced.  there is a
388          result element corresponding to every element of this
389          array dimension. */
390 
391       rbn = I8(__fort_block_bounds)(rs, rdim, rcl, &rbl, &rbu);
392 #if defined(DEBUG)
393       if (rbn != abn)
394         __fort_red_abort("result misalignment");
395 #endif
396       ro = rof +
397            (F90_DPTR_SSTRIDE_G(rsd) * rbl + F90_DPTR_SOFFSET_G(rsd) - rclof) *
398                F90_DPTR_LSTRIDE_G(rsd);
399 
400       while (abn > 0) {
401         I8(red_array_loop)(z, ro, ao, rdim - 1, adim - 1);
402         ro += rhop;
403         ao += ahop;
404         z->mi[adim - 1]++;
405         --abn;
406       }
407       rcl += DIST_DPTR_CS_G(rsd);
408       rclof += DIST_DPTR_CLOS_G(rsd);
409     } else {
410 
411       /* this is the array dimension being reduced */
412 
413       if (z->mask_present) {
414         if (z->mask_stored_alike)
415           mp = (__LOG_T *)((char *)(z->mb) + (ao << z->lk_shift));
416         else {
417           mp = I8(__fort_local_address)(z->mb, ms, z->mi);
418           if (mp == NULL)
419             __fort_red_abort("mask misalignment");
420         }
421       }
422 
423       if (z->xb != NULL)
424         lp = ((__INT4_T *)z->xb) + rof;
425       else
426         lp = NULL;
427 
428       ap = z->ab + ao * F90_LEN_G(as);
429       if (z->l_fn_b) {
430         z->l_fn_b(rp, abn, ap, ahop, mp, mhop, lp, li, 1, z->len, z->back);
431       } else {
432         z->l_fn(rp, abn, ap, ahop, mp, mhop, lp, li, 1, z->len);
433       }
434     }
435     acl += DIST_DPTR_CS_G(asd);
436     aclof += DIST_DPTR_CLOS_G(asd);
437     --acn;
438   }
439 #if defined(DEBUG)
440   if (__fort_test & DEBUG_REDU && rdim <= 0) {
441     printf("%d red_array_loop rp=%x *rp=", GET_DIST_LCPU, rp);
442     __fort_print_scalar(rp, z->kind);
443     printf("\n");
444   }
445 #endif
446 }
447 
I8(__fort_kred_scalarlk)448 void I8(__fort_kred_scalarlk)(red_parm *z, char *rb, char *ab, char *mb,
449                              F90_Desc *rs, F90_Desc *as, F90_Desc *ms,
450                              __INT8_T *xb, red_enum op)
451 {
452   DECL_DIM_PTRS(asd);
453   __INT_T ao, i, len, m, p, q;
454   int loop;
455 
456   z->rb = rb;
457   z->rs = rs;
458   z->ab = ab;
459   z->as = as;
460   z->mb = (__LOG_T *)mb;
461   z->ms = ms;
462   z->xb = (__INT_T *)xb;
463   z->dim = 0;
464 
465   I8(__fort_cycle_bounds)(as);
466 
467   __fort_scalar_copy[z->kind](rb, z->zb, z->len);
468 
469   if (xb != NULL) {
470     for (i = F90_RANK_G(as); --i >= 0;)
471       xb[i] = 0;
472   }
473 
474   z->mask_present = (F90_TAG_G(ms) == __DESC && F90_RANK_G(ms) > 0);
475   if (z->mask_present) {
476     z->mask_stored_alike = I8(__fort_stored_alike)(as, ms);
477     if (z->mask_stored_alike)
478       z->mb = (__LOG_T *)((char *)(z->mb) + (F90_LBASE_G(ms) << z->lk_shift));
479     for (i = F90_RANK_G(ms); --i >= 0;)
480       z->mi[i] = F90_DIM_LBOUND_G(ms, i);
481   } else if (!ISPRESENT(mb) || I8(__fort_fetch_log)(mb, ms))
482     z->mb = GET_DIST_TRUE_LOG_ADDR;
483   else
484     return; /* scalar mask == .false. */
485 
486   if (~F90_FLAGS_G(as) & __OFF_TEMPLATE) {
487     z->ab += F90_LBASE_G(as) * F90_LEN_G(as);
488     ao = -1;
489     I8(red_scalar_loop)(z, ao, 0, F90_RANK_G(as));
490   }
491 
492   I8(__fort_reduce_section)(rb, z->kind, z->len,
493 			     xb, __INT, sizeof(__INT_T), 1,
494 			     z->g_fn, -1, as);
495 
496   I8(__fort_replicate_result)(rb, z->kind, z->len,
497 			       xb, __INT, sizeof(__INT_T), 1, as);
498 
499   if (xb != NULL && (p = xb[0]) > 0) {
500     for (i = 0; i < F90_RANK_G(as); i++) {
501       SET_DIM_PTRS(asd, as, i);
502       m = F90_DPTR_EXTENT_G(asd);
503       q = (p - 1) / m;
504       xb[i] = p - q * m;
505       p = q;
506     }
507   }
508 }
509 
I8(kred_array_loop)510 static void I8(kred_array_loop)(red_parm *z, __INT_T rof, __INT_T aof, int rdim,
511                                 int adim)
512 {
513   DECL_HDR_PTRS(as);
514   DECL_HDR_PTRS(rs);
515   DECL_HDR_PTRS(ms);
516   DECL_DIM_PTRS(asd);
517   DECL_DIM_PTRS(rsd);
518   DECL_DIM_PTRS(msd);
519   char *ap, *rp;
520   __LOG_T *mp;
521   __INT_T *lp;
522 
523   __INT_T abl, abn, abu, acl, acn, aclof, ahop, ao, i, li, ls, mhop, mlow, rbl,
524       rbn, rbu, rcl, rclof, rhop, ro;
525 
526 #if defined(DEBUG)
527   if (__fort_test & DEBUG_REDU) {
528     printf("%d red_array_loop rdim=%d rof=%d adim=%d aof=%d\n", GET_DIST_LCPU,
529            rdim, rof, adim, aof);
530   }
531 #endif
532 
533   if (rdim > 0) {
534     rs = z->rs;
535     SET_DIM_PTRS(rsd, rs, rdim - 1);
536     rcl = DIST_DPTR_CL_G(rsd);
537     rclof = DIST_DPTR_CLOF_G(rsd);
538     rhop = F90_DPTR_SSTRIDE_G(rsd) * F90_DPTR_LSTRIDE_G(rsd);
539 
540     if (adim == z->dim)
541       --adim;
542   } else {
543     rp = z->rb + rof * z->len;
544     adim = z->dim;
545   }
546 
547   as = z->as;
548   SET_DIM_PTRS(asd, as, adim - 1);
549   acn = DIST_DPTR_CN_G(asd);
550   acl = DIST_DPTR_CL_G(asd);
551   aclof = DIST_DPTR_CLOF_G(asd);
552   ahop = F90_DPTR_SSTRIDE_G(asd) * F90_DPTR_LSTRIDE_G(asd);
553 
554   if (z->mask_present) {
555     ms = z->ms;
556     SET_DIM_PTRS(msd, ms, adim - 1);
557     mlow = F90_DPTR_LBOUND_G(msd);
558     mhop = F90_DPTR_SSTRIDE_G(msd) * F90_DPTR_LSTRIDE_G(msd);
559   } else {
560     mlow = mhop = 0;
561     mp = z->mb;
562   }
563 
564   if (rdim <= 0 && acn > 1 &&
565       DIST_DPTR_BLOCK_G(asd) == DIST_DPTR_TSTRIDE_G(asd) &&
566       (!z->mask_present || z->mask_stored_alike)) {
567 
568     /* cyclic(1) optimization */
569 
570     abl = acl - DIST_DPTR_TOFFSET_G(asd);
571     if (DIST_DPTR_TSTRIDE_G(asd) != 1)
572       abl /= DIST_DPTR_TSTRIDE_G(asd);
573     ao = aof +
574          (F90_DPTR_SSTRIDE_G(asd) * abl + F90_DPTR_SOFFSET_G(asd) - aclof) *
575              F90_DPTR_LSTRIDE_G(asd);
576 
577     i = abl - F90_DPTR_LBOUND_G(asd); /* section ordinal index */
578     li = i + 1;                       /* linearized location */
579     ls = DIST_DPTR_PSHAPE_G(asd);      /* location stride */
580 
581     if (z->mask_present)
582       mp = (__LOG_T *)((char *)(z->mb) + (ao << z->lk_shift));
583 
584     if (z->xb != NULL)
585       lp = (__INT_T *)((char *)z->xb + rof * sizeof(__INT8_T));
586     else
587       lp = NULL;
588 
589     ap = z->ab + ao * F90_LEN_G(as);
590     if (z->l_fn_b) {
591       z->l_fn_b(rp, acn, ap, ahop, mp, mhop, lp, li, ls, z->len, z->back);
592     } else {
593       z->l_fn(rp, acn, ap, ahop, mp, mhop, lp, li, ls, z->len);
594     }
595 
596     return;
597   }
598 
599   while (acn > 0) {
600     abn = I8(__fort_block_bounds)(as, adim, acl, &abl, &abu);
601     ao = aof +
602          (F90_DPTR_SSTRIDE_G(asd) * abl + F90_DPTR_SOFFSET_G(asd) - aclof) *
603              F90_DPTR_LSTRIDE_G(asd);
604 
605     i = abl - F90_DPTR_LBOUND_G(asd); /* ordinal index (zero-based) */
606     li = i + 1;                       /* linearized location */
607     z->mi[adim - 1] = mlow + i;       /* mask array index */
608 
609     if (rdim > 0) {
610 
611       /* this array dimension is not being reduced.  there is a
612          result element corresponding to every element of this
613          array dimension. */
614 
615       rbn = I8(__fort_block_bounds)(rs, rdim, rcl, &rbl, &rbu);
616 #if defined(DEBUG)
617       if (rbn != abn)
618         __fort_red_abort("result misalignment");
619 #endif
620       ro = rof +
621            (F90_DPTR_SSTRIDE_G(rsd) * rbl + F90_DPTR_SOFFSET_G(rsd) - rclof) *
622                F90_DPTR_LSTRIDE_G(rsd);
623 
624       while (abn > 0) {
625         I8(kred_array_loop)(z, ro, ao, rdim - 1, adim - 1);
626         ro += rhop;
627         ao += ahop;
628         z->mi[adim - 1]++;
629         --abn;
630       }
631       rcl += DIST_DPTR_CS_G(rsd);
632       rclof += DIST_DPTR_CLOS_G(rsd);
633     } else {
634 
635       /* this is the array dimension being reduced */
636 
637       if (z->mask_present) {
638         if (z->mask_stored_alike)
639           mp = (__LOG_T *)((char *)(z->mb) + (ao << z->lk_shift));
640         else {
641           mp = I8(__fort_local_address)(z->mb, ms, z->mi);
642           if (mp == NULL)
643             __fort_red_abort("mask misalignment");
644         }
645       }
646 
647       if (z->xb != NULL)
648         lp = (__INT_T *)((char *)z->xb + rof * sizeof(__INT8_T));
649       else
650         lp = NULL;
651 
652       ap = z->ab + ao * F90_LEN_G(as);
653       if (z->l_fn_b) {
654         z->l_fn_b(rp, abn, ap, ahop, mp, mhop, lp, li, 1, z->len, z->back);
655       } else {
656         z->l_fn(rp, abn, ap, ahop, mp, mhop, lp, li, 1, z->len);
657       }
658     }
659     acl += DIST_DPTR_CS_G(asd);
660     aclof += DIST_DPTR_CLOS_G(asd);
661     --acn;
662   }
663 #if defined(DEBUG)
664   if (__fort_test & DEBUG_REDU && rdim <= 0) {
665     printf("%d red_array_loop rp=%x *rp=", GET_DIST_LCPU, rp);
666     __fort_print_scalar(rp, z->kind);
667     printf("\n");
668   }
669 #endif
670 }
671 
I8(__fort_red_array)672 void I8(__fort_red_array)(red_parm *z, char *rb0, char *ab, char *mb, char *db,
673                          F90_Desc *rs0, F90_Desc *as, F90_Desc *ms,
674                          F90_Desc *ds, red_enum op)
675 {
676   DECL_HDR_PTRS(rs);
677   DECL_HDR_VARS(rs1);
678   char *rb = 0, *xb, *zb;
679   __INT_T flags, kind, len, rank, _1 = 1;
680   int i, n, rc, rl;
681   __INT_T ao, ro;
682 
683   z->dim = I8(__fort_fetch_int)(db, ds);
684 
685 #if defined(DEBUG)
686   if (__fort_test & DEBUG_REDU) {
687     printf("%d r", GET_DIST_LCPU);
688     I8(__fort_show_section)(rs0);
689     printf("@%x = %s(a", rb0, __fort_red_what);
690     I8(__fort_show_section)(as);
691     printf("@%x, dim=%d, mask", ab, z->dim);
692     I8(__fort_show_section)(ms);
693     printf("@%x)\n", mb);
694   }
695 #endif
696 
697   if (as == NULL || F90_TAG_G(as) != __DESC)
698     __fort_red_abort("invalid array argument descriptor");
699   if (z->dim < 1 || z->dim > F90_RANK_G(as))
700     __fort_red_abort("invalid DIM argument");
701   rank = F90_RANK_G(as) - 1;
702 
703   I8(__fort_cycle_bounds)(as);
704 
705   rs = rs0;
706   rb = rb0;
707   if (F90_TAG_G(rs0) == __DESC) {
708     if ((op == __MINLOC || op == __MAXLOC || op == __FINDLOC) &&
709         z->kind != __STR) {
710       kind = __INT;
711       len = sizeof(__INT_T);
712     } else {
713       kind = z->kind;
714       len = z->len;
715     }
716     if (DIST_MAPPED_G(rs0) ||
717         I8(is_nonsequential_section)(rs0, F90_RANK_G(rs0))) {
718       rs = rs1;
719       flags = (__ASSUMED_SHAPE + __ASSUMED_OVERLAPS + __INTENT_OUT + __INHERIT +
720                __TRANSCRIPTIVE_DIST_TARGET + __TRANSCRIPTIVE_DIST_FORMAT);
721       ENTFTN(QOPY_IN, qopy_in)
722       (&rb, (__POINT_T *)ABSENT, rb0, rs, rb0, rs0, &rank, &kind, &len, &flags,
723        &_1, &_1, &_1, &_1, &_1, &_1, &_1); /* lb */
724     }
725     I8(__fort_cycle_bounds)(rs);
726     ro = F90_LBASE_G(rs) - 1;
727     rc = F90_LSIZE_G(rs);
728     rl = F90_LEN_G(rs);
729   } else {
730 #if defined(DEBUG)
731     if (rank != 0)
732       __fort_red_abort("result/array rank mismatch");
733 #endif
734     ro = 0;
735     rc = 1;
736     rl = GET_DIST_SIZE_OF(F90_TAG_G(rs0));
737   }
738 
739   if (op == __MINLOC || op == __MAXLOC || op == __FINDLOC) {
740     if (rc > 0)
741       memset(rb, '\0', rc * rl);
742     xb = rb;
743     rb = (char *)__fort_gmalloc(rc * F90_LEN_G(as));
744   } else
745     xb = NULL;
746 
747   z->rb = rb;
748   z->rs = rs;
749   z->ab = ab;
750   z->as = as;
751   z->mb = (__LOG_T *)mb;
752   z->ms = ms;
753   z->xb = (__INT_T *)xb;
754 
755   zb = z->zb;
756 #if defined(DEBUG)
757   if (zb == NULL)
758     __fort_red_abort("missing null constant (unimplemented)");
759 #endif
760   switch (z->kind) {
761   case __LOG1:
762     for (i = 0; i < rc; ++i)
763       ((__LOG1_T *)rb)[i] = *(__LOG1_T *)zb;
764     break;
765   case __LOG2:
766     for (i = 0; i < rc; ++i)
767       ((__LOG2_T *)rb)[i] = *(__LOG2_T *)zb;
768     break;
769   case __LOG4:
770     for (i = 0; i < rc; ++i)
771       ((__LOG4_T *)rb)[i] = *(__LOG4_T *)zb;
772     break;
773   case __LOG8:
774     for (i = 0; i < rc; ++i)
775       ((__LOG8_T *)rb)[i] = *(__LOG8_T *)zb;
776     break;
777   case __INT1:
778     for (i = 0; i < rc; ++i)
779       ((__INT1_T *)rb)[i] = *(__INT1_T *)zb;
780     break;
781   case __INT2:
782     for (i = 0; i < rc; ++i)
783       ((__INT2_T *)rb)[i] = *(__INT2_T *)zb;
784     break;
785   case __INT4:
786     for (i = 0; i < rc; ++i)
787       ((__INT4_T *)rb)[i] = *(__INT4_T *)zb;
788     break;
789   case __INT8:
790     for (i = 0; i < rc; ++i)
791       ((__INT8_T *)rb)[i] = *(__INT8_T *)zb;
792     break;
793   case __REAL4:
794     for (i = 0; i < rc; ++i)
795       ((__REAL4_T *)rb)[i] = *(__REAL4_T *)zb;
796     break;
797   case __REAL8:
798     for (i = 0; i < rc; ++i)
799       ((__REAL8_T *)rb)[i] = *(__REAL8_T *)zb;
800     break;
801   case __REAL16:
802     for (i = 0; i < rc; ++i)
803       ((__REAL16_T *)rb)[i] = *(__REAL16_T *)zb;
804     break;
805   case __CPLX8:
806     for (i = 0; i < rc; ++i)
807       ((__CPLX8_T *)rb)[i] = *(__CPLX8_T *)zb;
808     break;
809   case __CPLX16:
810     for (i = 0; i < rc; ++i)
811       ((__CPLX16_T *)rb)[i] = *(__CPLX16_T *)zb;
812     break;
813   case __CPLX32:
814     for (i = 0; i < rc; ++i)
815       ((__CPLX32_T *)rb)[i] = *(__CPLX32_T *)zb;
816     break;
817   case __STR:
818     if (op == __FINDLOC) {
819       for (i = 0; i < rc; ++i)
820         memcpy(&rb[i * z->len], (char *)zb, z->len);
821     } else {
822       for (i = 0; i < rc; ++i)
823         memset(&rb[i * z->len], *(__STR_T *)zb, z->len * sizeof(__STR_T));
824     }
825     break;
826   default:
827     __fort_red_abort("unsupported result type");
828   }
829 
830   z->mask_present = (F90_TAG_G(ms) == __DESC && F90_RANK_G(ms) > 0);
831   if (z->mask_present) {
832     z->mask_stored_alike = I8(__fort_stored_alike)(as, ms);
833     if (z->mask_stored_alike)
834       z->mb = (__LOG_T *)((char *)z->mb + (F90_LBASE_G(ms) << z->lk_shift));
835     for (i = F90_RANK_G(ms); --i >= 0;)
836       z->mi[i] = F90_DIM_LBOUND_G(ms, i);
837   } else if (!ISPRESENT(mb) || I8(__fort_fetch_log)(mb, ms))
838     z->mb = GET_DIST_TRUE_LOG_ADDR;
839   else
840     z->mb = (__LOG_T *)GET_DIST_ZED; /* scalar mask == .false. */
841 
842   if (~F90_FLAGS_G(as) & __OFF_TEMPLATE) {
843     z->ab += F90_LBASE_G(as) * F90_LEN_G(as);
844     ao = -1;
845     I8(red_array_loop)(z, ro, ao, rank, F90_RANK_G(as));
846   }
847 
848   I8(__fort_reduce_section)(rb, z->kind, z->len,
849 			     xb, __INT, sizeof(__INT_T), rc,
850 			     z->g_fn, z->dim, as);
851 
852   I8(__fort_replicate_result)(rb, z->kind, z->len,
853 			       xb, __INT, sizeof(__INT_T), rc, as);
854 
855   if (xb != NULL) {
856     __fort_gfree(rb);
857     rb = xb;
858   }
859   if (rs == rs1)
860     I8(__fort_copy_out)(rb0, rb, rs0, rs, __INTENT_OUT);
861 }
862 
863 /** \brief  SAME as previous, but allow any logical kind  for the mask */
I8(__fort_red_arraylk)864 void I8(__fort_red_arraylk)(red_parm *z, char *rb0, char *ab, char *mb, char *db,
865                            F90_Desc *rs0, F90_Desc *as, F90_Desc *ms,
866                            F90_Desc *ds, red_enum op)
867 {
868   DECL_HDR_PTRS(rs);
869   DECL_HDR_VARS(rs1);
870   char *rb = 0, *xb, *zb;
871   __INT_T flags, kind, len, rank, _1 = 1;
872   int i, n, rc, rl;
873   __INT_T ao, ro;
874 
875   z->dim = I8(__fort_fetch_int)(db, ds);
876 
877 #if defined(DEBUG)
878   if (__fort_test & DEBUG_REDU) {
879     printf("%d r", GET_DIST_LCPU);
880     I8(__fort_show_section)(rs0);
881     printf("@%x = %s(a", rb0, __fort_red_what);
882     I8(__fort_show_section)(as);
883     printf("@%x, dim=%d, mask", ab, z->dim);
884     I8(__fort_show_section)(ms);
885     printf("@%x)\n", mb);
886   }
887 #endif
888 
889   if (as == NULL || F90_TAG_G(as) != __DESC)
890     __fort_red_abort("invalid array argument descriptor");
891   if (z->dim < 1 || z->dim > F90_RANK_G(as))
892     __fort_red_abort("invalid DIM argument");
893   rank = F90_RANK_G(as) - 1;
894 
895   I8(__fort_cycle_bounds)(as);
896 
897   rs = rs0;
898   rb = rb0;
899   if (F90_TAG_G(rs0) == __DESC) {
900     if ((op == __MINLOC || op == __MAXLOC || op == __FINDLOC) &&
901         z->kind != __STR) {
902       kind = __INT;
903       len = sizeof(__INT_T);
904     } else {
905       kind = z->kind;
906       len = z->len;
907     }
908     if (DIST_MAPPED_G(rs0) ||
909         I8(is_nonsequential_section)(rs0, F90_RANK_G(rs0))) {
910       rs = rs1;
911       flags = (__ASSUMED_SHAPE + __ASSUMED_OVERLAPS + __INTENT_OUT + __INHERIT +
912                __TRANSCRIPTIVE_DIST_TARGET + __TRANSCRIPTIVE_DIST_FORMAT);
913       ENTFTN(QOPY_IN, qopy_in)
914       (&rb, (__POINT_T *)ABSENT, rb0, rs, rb0, rs0, &rank, &kind, &len, &flags,
915        &_1, &_1, &_1, &_1, &_1, &_1, &_1); /* lb */
916     }
917     I8(__fort_cycle_bounds)(rs);
918     ro = F90_LBASE_G(rs) - 1;
919     rc = F90_LSIZE_G(rs);
920     rl = F90_LEN_G(rs);
921   } else {
922 #if defined(DEBUG)
923     if (rank != 0)
924       __fort_red_abort("result/array rank mismatch");
925 #endif
926     rank = 0;
927     ro = 0;
928     rc = 1;
929     rl = GET_DIST_SIZE_OF(F90_TAG_G(rs0));
930   }
931 
932   if (op == __MINLOC || op == __MAXLOC || op == __FINDLOC) {
933     if (rc > 0)
934       memset(rb, '\0', rc * rl);
935     xb = rb;
936     rb = (char *)__fort_gmalloc(rc * F90_LEN_G(as));
937   } else
938     xb = NULL;
939 
940   z->rb = rb;
941   z->rs = rs;
942   z->ab = ab;
943   z->as = as;
944   z->mb = (__LOG_T *)mb;
945   z->ms = ms;
946   z->xb = (__INT_T *)xb;
947 
948   zb = z->zb;
949 #if defined(DEBUG)
950   if (zb == NULL)
951     __fort_red_abort("missing null constant (unimplemented)");
952 #endif
953   switch (z->kind) {
954   case __LOG1:
955     for (i = 0; i < rc; ++i)
956       ((__LOG1_T *)rb)[i] = *(__LOG1_T *)zb;
957     break;
958   case __LOG2:
959     for (i = 0; i < rc; ++i)
960       ((__LOG2_T *)rb)[i] = *(__LOG2_T *)zb;
961     break;
962   case __LOG4:
963     for (i = 0; i < rc; ++i)
964       ((__LOG4_T *)rb)[i] = *(__LOG4_T *)zb;
965     break;
966   case __LOG8:
967     for (i = 0; i < rc; ++i)
968       ((__LOG8_T *)rb)[i] = *(__LOG8_T *)zb;
969     break;
970   case __INT1:
971     for (i = 0; i < rc; ++i)
972       ((__INT1_T *)rb)[i] = *(__INT1_T *)zb;
973     break;
974   case __INT2:
975     for (i = 0; i < rc; ++i)
976       ((__INT2_T *)rb)[i] = *(__INT2_T *)zb;
977     break;
978   case __INT4:
979     for (i = 0; i < rc; ++i)
980       ((__INT4_T *)rb)[i] = *(__INT4_T *)zb;
981     break;
982   case __INT8:
983     for (i = 0; i < rc; ++i)
984       ((__INT8_T *)rb)[i] = *(__INT8_T *)zb;
985     break;
986   case __REAL4:
987     for (i = 0; i < rc; ++i)
988       ((__REAL4_T *)rb)[i] = *(__REAL4_T *)zb;
989     break;
990   case __REAL8:
991     for (i = 0; i < rc; ++i)
992       ((__REAL8_T *)rb)[i] = *(__REAL8_T *)zb;
993     break;
994   case __REAL16:
995     for (i = 0; i < rc; ++i)
996       ((__REAL16_T *)rb)[i] = *(__REAL16_T *)zb;
997     break;
998   case __CPLX8:
999     for (i = 0; i < rc; ++i)
1000       ((__CPLX8_T *)rb)[i] = *(__CPLX8_T *)zb;
1001     break;
1002   case __CPLX16:
1003     for (i = 0; i < rc; ++i)
1004       ((__CPLX16_T *)rb)[i] = *(__CPLX16_T *)zb;
1005     break;
1006   case __CPLX32:
1007     for (i = 0; i < rc; ++i)
1008       ((__CPLX32_T *)rb)[i] = *(__CPLX32_T *)zb;
1009     break;
1010   case __STR:
1011     if (op == __FINDLOC) {
1012       for (i = 0; i < rc; ++i)
1013         memcpy(&rb[i * z->len], (char *)zb, z->len);
1014     } else {
1015       for (i = 0; i < rc; ++i)
1016         memset(&rb[i * z->len], *(__STR_T *)zb, z->len * sizeof(__STR_T));
1017     }
1018     break;
1019   default:
1020     __fort_red_abort("unsupported result type");
1021   }
1022 
1023   if (z->mask_present) {
1024     z->mask_stored_alike = I8(__fort_stored_alike)(as, ms);
1025     if (z->mask_stored_alike)
1026       z->mb = (__LOG_T *)((char *)z->mb + (F90_LBASE_G(ms) << z->lk_shift));
1027     for (i = F90_RANK_G(ms); --i >= 0;)
1028       z->mi[i] = F90_DIM_LBOUND_G(ms, i);
1029   } else if (!ISPRESENT(mb) || I8(__fort_fetch_log)(mb, ms))
1030     z->mb = GET_DIST_TRUE_LOG_ADDR;
1031   else
1032     z->mb = (__LOG_T *)GET_DIST_ZED; /* scalar mask == .false. */
1033 
1034   if (~F90_FLAGS_G(as) & __OFF_TEMPLATE) {
1035     z->ab += F90_LBASE_G(as) * F90_LEN_G(as);
1036     ao = -1;
1037     I8(red_array_loop)(z, ro, ao, rank, F90_RANK_G(as));
1038   }
1039 
1040   I8(__fort_reduce_section)(rb, z->kind, z->len,
1041 			     xb, __INT, sizeof(__INT_T), rc,
1042 			     z->g_fn, z->dim, as);
1043 
1044   I8(__fort_replicate_result)(rb, z->kind, z->len,
1045 			       xb, __INT, sizeof(__INT_T), rc, as);
1046 
1047   if (xb != NULL) {
1048     __fort_gfree(rb);
1049     rb = xb;
1050   }
1051   if (rs == rs1)
1052     I8(__fort_copy_out)(rb0, rb, rs0, rs, __INTENT_OUT);
1053 }
1054 
I8(__fort_kred_arraylk)1055 void I8(__fort_kred_arraylk)(red_parm *z, char *rb0, char *ab, char *mb,
1056                             char *db, F90_Desc *rs0, F90_Desc *as, F90_Desc *ms,
1057                             F90_Desc *ds, red_enum op)
1058 {
1059   DECL_HDR_PTRS(rs);
1060   DECL_HDR_VARS(rs1);
1061   char *rb = 0, *xb, *zb;
1062   __INT_T flags, kind, len, rank, _1 = 1;
1063   int i, n, rc, rl;
1064   __INT_T ao, ro;
1065 
1066   z->dim = I8(__fort_fetch_int)(db, ds);
1067 
1068 #if defined(DEBUG)
1069   if (__fort_test & DEBUG_REDU) {
1070     printf("%d r", GET_DIST_LCPU);
1071     I8(__fort_show_section)(rs0);
1072     printf("@%x = %s(a", rb0, __fort_red_what);
1073     I8(__fort_show_section)(as);
1074     printf("@%x, dim=%d, mask", ab, z->dim);
1075     I8(__fort_show_section)(ms);
1076     printf("@%x)\n", mb);
1077   }
1078 #endif
1079 
1080   if (as == NULL || F90_TAG_G(as) != __DESC)
1081     __fort_red_abort("invalid array argument descriptor");
1082   if (z->dim < 1 || z->dim > F90_RANK_G(as))
1083     __fort_red_abort("invalid DIM argument");
1084   rank = F90_RANK_G(as) - 1;
1085 
1086   I8(__fort_cycle_bounds)(as);
1087 
1088   rs = rs0;
1089   rb = rb0;
1090   if (F90_TAG_G(rs0) == __DESC) {
1091     if ((op == __MINLOC || op == __MAXLOC || op == __FINDLOC) &&
1092         z->kind != __STR) {
1093       kind = __INT8;
1094       len = sizeof(__INT8_T);
1095     } else {
1096       kind = z->kind;
1097       len = z->len;
1098     }
1099     if (DIST_MAPPED_G(rs0) ||
1100         I8(is_nonsequential_section)(rs0, F90_RANK_G(rs0))) {
1101       rs = rs1;
1102       flags = (__ASSUMED_SHAPE + __ASSUMED_OVERLAPS + __INTENT_OUT + __INHERIT +
1103                __TRANSCRIPTIVE_DIST_TARGET + __TRANSCRIPTIVE_DIST_FORMAT);
1104       ENTFTN(QOPY_IN, qopy_in)
1105       (&rb, (__POINT_T *)ABSENT, rb0, rs, rb0, rs0, &rank, &kind, &len, &flags,
1106        &_1, &_1, &_1, &_1, &_1, &_1, &_1); /* lb */
1107     }
1108     I8(__fort_cycle_bounds)(rs);
1109     ro = F90_LBASE_G(rs) - 1;
1110     rc = F90_LSIZE_G(rs);
1111     rl = F90_LEN_G(rs);
1112   } else {
1113 #if defined(DEBUG)
1114     if (rank != 0)
1115       __fort_red_abort("result/array rank mismatch");
1116 #endif
1117     rank = 0;
1118     ro = 0;
1119     rc = 1;
1120     rl = GET_DIST_SIZE_OF(F90_TAG_G(rs0)); /* same as sizeof(__INT8_T) */
1121   }
1122 
1123   if (op == __MINLOC || op == __MAXLOC || op == __FINDLOC) {
1124     if (rc > 0)
1125       memset(rb, '\0', rc * rl);
1126     xb = rb;
1127     rb = (char *)__fort_gmalloc(rc * F90_LEN_G(as));
1128   } else
1129     xb = NULL;
1130 
1131   z->rb = rb;
1132   z->rs = rs;
1133   z->ab = ab;
1134   z->as = as;
1135   z->mb = (__LOG_T *)mb;
1136   z->ms = ms;
1137   z->xb = (__INT_T *)xb;
1138 
1139   zb = z->zb;
1140 #if defined(DEBUG)
1141   if (zb == NULL)
1142     __fort_red_abort("missing null constant (unimplemented)");
1143 #endif
1144   switch (z->kind) {
1145   case __LOG1:
1146     for (i = 0; i < rc; ++i)
1147       ((__LOG1_T *)rb)[i] = *(__LOG1_T *)zb;
1148     break;
1149   case __LOG2:
1150     for (i = 0; i < rc; ++i)
1151       ((__LOG2_T *)rb)[i] = *(__LOG2_T *)zb;
1152     break;
1153   case __LOG4:
1154     for (i = 0; i < rc; ++i)
1155       ((__LOG4_T *)rb)[i] = *(__LOG4_T *)zb;
1156     break;
1157   case __LOG8:
1158     for (i = 0; i < rc; ++i)
1159       ((__LOG8_T *)rb)[i] = *(__LOG8_T *)zb;
1160     break;
1161   case __INT1:
1162     for (i = 0; i < rc; ++i)
1163       ((__INT1_T *)rb)[i] = *(__INT1_T *)zb;
1164     break;
1165   case __INT2:
1166     for (i = 0; i < rc; ++i)
1167       ((__INT2_T *)rb)[i] = *(__INT2_T *)zb;
1168     break;
1169   case __INT4:
1170     for (i = 0; i < rc; ++i)
1171       ((__INT4_T *)rb)[i] = *(__INT4_T *)zb;
1172     break;
1173   case __INT8:
1174     for (i = 0; i < rc; ++i)
1175       ((__INT8_T *)rb)[i] = *(__INT8_T *)zb;
1176     break;
1177   case __REAL4:
1178     for (i = 0; i < rc; ++i)
1179       ((__REAL4_T *)rb)[i] = *(__REAL4_T *)zb;
1180     break;
1181   case __REAL8:
1182     for (i = 0; i < rc; ++i)
1183       ((__REAL8_T *)rb)[i] = *(__REAL8_T *)zb;
1184     break;
1185   case __REAL16:
1186     for (i = 0; i < rc; ++i)
1187       ((__REAL16_T *)rb)[i] = *(__REAL16_T *)zb;
1188     break;
1189   case __CPLX8:
1190     for (i = 0; i < rc; ++i)
1191       ((__CPLX8_T *)rb)[i] = *(__CPLX8_T *)zb;
1192     break;
1193   case __CPLX16:
1194     for (i = 0; i < rc; ++i)
1195       ((__CPLX16_T *)rb)[i] = *(__CPLX16_T *)zb;
1196     break;
1197   case __CPLX32:
1198     for (i = 0; i < rc; ++i)
1199       ((__CPLX32_T *)rb)[i] = *(__CPLX32_T *)zb;
1200     break;
1201   case __STR:
1202     if (op == __FINDLOC) {
1203       for (i = 0; i < rc; ++i)
1204         memcpy(&rb[i * z->len], (char *)zb, z->len);
1205     } else {
1206       for (i = 0; i < rc; ++i)
1207         memset(&rb[i * z->len], *(__STR_T *)zb, z->len * sizeof(__STR_T));
1208     }
1209     break;
1210   default:
1211     __fort_red_abort("unsupported result type");
1212   }
1213 
1214   if (z->mask_present) {
1215     z->mask_stored_alike = I8(__fort_stored_alike)(as, ms);
1216     if (z->mask_stored_alike)
1217       z->mb = (__LOG_T *)((char *)z->mb + (F90_LBASE_G(ms) << z->lk_shift));
1218     for (i = F90_RANK_G(ms); --i >= 0;)
1219       z->mi[i] = F90_DIM_LBOUND_G(ms, i);
1220   } else if (!ISPRESENT(mb) || I8(__fort_fetch_log)(mb, ms))
1221     z->mb = GET_DIST_TRUE_LOG_ADDR;
1222   else
1223     z->mb = (__LOG_T *)GET_DIST_ZED; /* scalar mask == .false. */
1224 
1225   if (~F90_FLAGS_G(as) & __OFF_TEMPLATE) {
1226     z->ab += F90_LBASE_G(as) * F90_LEN_G(as);
1227     ao = -1;
1228     I8(kred_array_loop)(z, ro, ao, rank, F90_RANK_G(as));
1229   }
1230 
1231   I8(__fort_reduce_section)(rb, z->kind, z->len,
1232 			     xb, __INT, sizeof(__INT_T), rc,
1233 			     z->g_fn, z->dim, as);
1234 
1235   I8(__fort_replicate_result)(rb, z->kind, z->len,
1236 			       xb, __INT, sizeof(__INT_T), rc, as);
1237 
1238   if (xb != NULL) {
1239     __fort_gfree(rb);
1240     rb = xb;
1241   }
1242   if (rs == rs1)
1243     I8(__fort_copy_out)(rb0, rb, rs0, rs, __INTENT_OUT);
1244 }
1245 
1246 /** \brief set up result descriptor for reduction intrinsic -- used when the
1247  * dim arg is variable.  result dimensions are aligned with the
1248  * corresponding source dimensions and the result array becomes
1249  * replicated over the reduced dimension.  lbounds are set to 1 and
1250  * overlap allowances are set to 0.
1251  */
ENTFTN(REDUCE_DESCRIPTOR,reduce_descriptor)1252 void ENTFTN(REDUCE_DESCRIPTOR,
1253             reduce_descriptor)(F90_Desc *rd,   /* result descriptor */
1254                                __INT_T *kindb, /* result kind */
1255                                __INT_T *lenb,  /* result data item length */
1256                                F90_Desc *ad,   /* array descriptor */
1257                                __INT_T *dimb)  /* dimension */
1258 {
1259   DECL_DIM_PTRS(rdd);
1260   DECL_DIM_PTRS(add);
1261   DECL_HDR_PTRS(td);
1262   dtype kind;
1263   __INT_T dim, extent, len, m, offset, rx, ax, tx;
1264 
1265 #if defined(DEBUG)
1266   if (F90_TAG_G(ad) != __DESC)
1267     __fort_abort("reduction intrinsic: invalid array arg");
1268 #endif
1269 
1270   kind = (dtype)*kindb;
1271   len = *lenb;
1272   dim = *dimb;
1273   if (dim < 1 || dim > F90_RANK_G(ad))
1274     __fort_abort("reduction intrinsic: invalid dim");
1275 
1276   td = DIST_ALIGN_TARGET_G(ad);
1277   __DIST_INIT_DESCRIPTOR(rd, F90_RANK_G(ad) - 1, kind, len, F90_FLAGS_G(ad), td);
1278   for (rx = ax = 1; ax <= F90_RANK_G(ad); ++ax) {
1279     if (ax == dim)
1280       continue;
1281     SET_DIM_PTRS(add, ad, ax - 1);
1282     extent = F90_DPTR_EXTENT_G(add);
1283     offset = DIST_DPTR_TSTRIDE_G(add) * (F90_DPTR_LBOUND_G(add) - 1) +
1284              DIST_DPTR_TOFFSET_G(add);
1285 
1286     /*
1287      * added gen_block argument to __fort_set_alignment ...
1288      * Should last arg be &DIST_DIM_GEN_BLOCK_G(td,(DIST_DPTR_TAXIS_G(add))-1) or
1289      * &DIST_DPTR_GEN_BLOCK_G(add) ???
1290      */
1291     I8(__fort_set_alignment)(rd, rx, 1, extent, DIST_DPTR_TAXIS_G(add),
1292 			        DIST_DPTR_TSTRIDE_G(add), offset,
1293 			        &DIST_DIM_GEN_BLOCK_G(td,(DIST_DPTR_TAXIS_G(add))-1));
1294     __DIST_SET_ALLOCATION(rd, rx, 0, 0);
1295     ++rx;
1296   }
1297   m = DIST_SINGLE_G(ad);
1298   for (tx = 1; m > 0; ++tx, m >>= 1) {
1299     if (m & 1)
1300       I8(__fort_set_single)(rd, td, tx, DIST_INFO_G(ad, tx - 1), __SINGLE);
1301   }
1302   I8(__fort_finish_descriptor)(rd);
1303 }
1304 
I8(__fort_create_conforming_mask_array)1305 void *I8(__fort_create_conforming_mask_array)(char *what, char *ab, char *mb,
1306                                              F90_Desc *as, F90_Desc *ms,
1307                                              F90_Desc *new_ms)
1308 {
1309 
1310   /* Create a conforming mask array. Returns a pointer to the
1311    * array and assigns a new descriptor in ms. Caller responsible
1312    * for the __fort_gfree() ...
1313    */
1314 
1315   __INT_T mask_kind;
1316   __INT_T mask_len;
1317   __INT_T i, _255 = 255;
1318   void *mask_array;
1319 
1320   if (!ISSCALAR(ms)) {
1321     __fort_abort("__fort_create_conforming_mask_array: bad mask descriptor");
1322   }
1323 
1324   mask_kind = F90_TAG_G(ms);
1325 
1326   switch (mask_kind) {
1327 
1328   case __LOG1:
1329     mask_len = sizeof(__LOG1_T);
1330     break;
1331 
1332   case __LOG2:
1333 
1334     mask_len = sizeof(__LOG2_T);
1335     break;
1336 
1337   case __LOG4:
1338     mask_len = sizeof(__LOG4_T);
1339     break;
1340 
1341   case __LOG8:
1342     mask_len = sizeof(__LOG8_T);
1343     break;
1344 
1345   default:
1346     printf("%d %s: bad type for mask loc=1\n", GET_DIST_LCPU, what);
1347     __fort_abort((char *)0);
1348   }
1349 
1350   ENTFTN(INSTANCE, instance)
1351   (new_ms, as, &mask_kind, &mask_len, &_255); /*no overlaps*/
1352   mask_array = (void *)__fort_gmalloc(F90_GSIZE_G(new_ms) * mask_len);
1353 
1354   switch (mask_kind) {
1355 
1356   case __LOG1:
1357     for (i = 0; i < F90_LSIZE_G(new_ms); ++i)
1358       *((__LOG1_T *)mask_array + i) = *((__LOG1_T *)mb);
1359     break;
1360 
1361   case __LOG2:
1362     for (i = 0; i < F90_LSIZE_G(new_ms); ++i)
1363       *((__LOG2_T *)mask_array + i) = *((__LOG2_T *)mb);
1364     break;
1365 
1366   case __LOG4:
1367     for (i = 0; i < F90_LSIZE_G(new_ms); ++i)
1368       *((__LOG4_T *)mask_array + i) = *((__LOG4_T *)mb);
1369     break;
1370 
1371   case __LOG8:
1372     for (i = 0; i < F90_LSIZE_G(new_ms); ++i)
1373       *((__LOG8_T *)mask_array + i) = *((__LOG8_T *)mb);
1374     break;
1375 
1376   default:
1377     printf("%d %s: bad type for mask loc=2\n", GET_DIST_LCPU, what);
1378     __fort_abort((char *)0);
1379   }
1380 
1381   return mask_array;
1382 }
1383