1 /*
2 * Copyright (c) 1996-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *
16 */
17
18 /* clang-format off */
19
20 /** \file
21 * Intrinsic reduction functions
22 */
23
24 #include "stdioInterf.h"
25 #include "fioMacros.h"
26 #include "red.h"
27
28 extern void (*__fort_scalar_copy[__NTYPES])(void *rp, void *sp, int len);
29
30 void ENTFTN(QOPY_IN, qopy_in)(char **dptr, __POINT_T *doff, char *dbase,
31 F90_Desc *dd, char *ab, F90_Desc *ad,
32 __INT_T *p_rank, __INT_T *p_kind, __INT_T *p_len,
33 __INT_T *p_flags, ...);
34
35 #if !defined(DESC_I8)
36 void
__fort_red_unimplemented()37 __fort_red_unimplemented()
38 {
39 char str[80];
40
41 sprintf(str, "%s: unimplemented for data type", __fort_red_what);
42 __fort_abort(str);
43 }
44
45 void
__fort_red_abort(char * msg)46 __fort_red_abort(char *msg)
47 {
48 char str[80];
49
50 sprintf(str, "%s: %s", __fort_red_what, msg);
51 __fort_abort(str);
52 }
53 #endif
54
55 /** \brief reduction, dim argument absent */
I8(red_scalar_loop)56 static void I8(red_scalar_loop)(red_parm *z, __INT_T aof, __INT_T ll, int dim)
57 {
58 DECL_HDR_PTRS(as);
59 DECL_HDR_PTRS(ms);
60 DECL_DIM_PTRS(asd);
61 DECL_DIM_PTRS(msd);
62 char *ap;
63 __LOG_T *mp;
64
65 __INT_T abl, abn, abu, acl, acn, aclof, ahop, ao, extent, i, li, ls, mhop,
66 mlow;
67
68 as = z->as;
69 SET_DIM_PTRS(asd, as, dim - 1);
70 acn = DIST_DPTR_CN_G(asd);
71 acl = DIST_DPTR_CL_G(asd);
72 aclof = DIST_DPTR_CLOF_G(asd);
73 ahop = F90_DPTR_SSTRIDE_G(asd) * F90_DPTR_LSTRIDE_G(asd);
74 if (z->mask_present) {
75 ms = z->ms;
76 SET_DIM_PTRS(msd, ms, dim - 1);
77 mlow = F90_DPTR_LBOUND_G(msd);
78 mhop = F90_DPTR_SSTRIDE_G(msd) * F90_DPTR_LSTRIDE_G(msd);
79 } else {
80 mlow = mhop = 0;
81 mp = z->mb;
82 }
83
84 extent = F90_DPTR_EXTENT_G(asd);
85 if (extent < 0)
86 extent = 0;
87 ll *= extent;
88
89 if (dim == 1 && acn > 1 && DIST_DPTR_BLOCK_G(asd) == DIST_DPTR_TSTRIDE_G(asd) &&
90 (!z->mask_present || z->mask_stored_alike)) {
91
92 /* cyclic(1) optimization */
93
94 abl = acl - DIST_DPTR_TOFFSET_G(asd);
95 if (DIST_DPTR_TSTRIDE_G(asd) != 1)
96 abl /= DIST_DPTR_TSTRIDE_G(asd);
97 ao = aof +
98 (F90_DPTR_SSTRIDE_G(asd) * abl + F90_DPTR_SOFFSET_G(asd) - aclof) *
99 F90_DPTR_LSTRIDE_G(asd);
100
101 i = abl - F90_DPTR_LBOUND_G(asd); /* section ordinal index */
102 li = ll + i + 1; /* linearized location */
103 ls = DIST_DPTR_PSHAPE_G(asd); /* location stride */
104
105 if (z->mask_present)
106 mp = (__LOG_T *)((char *)(z->mb) + (ao << z->lk_shift));
107
108 ap = z->ab + ao * F90_LEN_G(as);
109 if (z->l_fn_b) {
110 z->l_fn_b(z->rb, acn, ap, ahop, mp, mhop, z->xb, li, ls, z->len, z->back);
111 } else {
112 z->l_fn(z->rb, acn, ap, ahop, mp, mhop, z->xb, li, ls, z->len);
113 }
114
115 return;
116 }
117 while (acn > 0) {
118 abn = I8(__fort_block_bounds)(as, dim, acl, &abl, &abu);
119 ao = aof +
120 (F90_DPTR_SSTRIDE_G(asd) * abl + F90_DPTR_SOFFSET_G(asd) - aclof) *
121 F90_DPTR_LSTRIDE_G(asd);
122
123 i = abl - F90_DPTR_LBOUND_G(asd); /* ordinal index (zero-based) */
124 li = ll + i + 1; /* linearized location */
125
126 z->mi[dim - 1] = mlow + i; /* mask index */
127
128 if (dim > 1) {
129 while (abn > 0) {
130 I8(red_scalar_loop)(z, ao, li, dim - 1);
131 li++;
132 z->mi[dim - 1]++;
133 ao += ahop;
134 abn--;
135 }
136 } else {
137 if (z->mask_present) {
138 if (z->mask_stored_alike)
139 mp = (__LOG_T *)((char *)(z->mb) + (ao << z->lk_shift));
140 else {
141 mp = I8(__fort_local_address)(z->mb, ms, z->mi);
142 if (mp == NULL)
143 __fort_red_abort("mask misalignment");
144 }
145 }
146 ap = z->ab + ao * F90_LEN_G(as);
147 if (z->l_fn_b) {
148 z->l_fn_b(z->rb, abn, ap, ahop, mp, mhop, z->xb, li, 1, z->len,
149 z->back);
150 } else {
151 z->l_fn(z->rb, abn, ap, ahop, mp, mhop, z->xb, li, 1, z->len);
152 }
153 }
154 acl += DIST_DPTR_CS_G(asd);
155 aclof += DIST_DPTR_CLOS_G(asd);
156 --acn;
157 }
158 }
159
I8(__fort_red_scalar)160 void I8(__fort_red_scalar)(red_parm *z, char *rb, char *ab, char *mb,
161 F90_Desc *rs, F90_Desc *as, F90_Desc *ms, __INT_T *xb,
162 red_enum op)
163 {
164 DECL_DIM_PTRS(asd);
165 __INT_T ao, i, len, m, p, q;
166 int loop;
167
168 z->rb = rb;
169 z->rs = rs;
170 z->ab = ab;
171 z->as = as;
172 z->mb = (__LOG_T *)mb;
173 z->ms = ms;
174 z->xb = xb;
175 z->dim = 0;
176
177 I8(__fort_cycle_bounds)(as);
178
179 __fort_scalar_copy[z->kind](rb, z->zb, z->len);
180
181 if (xb != NULL) {
182 for (i = F90_RANK_G(as); --i >= 0;)
183 xb[i] = 0;
184 }
185
186 z->mask_present = (F90_TAG_G(ms) == __DESC && F90_RANK_G(ms) > 0);
187 if (z->mask_present) {
188 z->mask_stored_alike = I8(__fort_stored_alike)(as, ms);
189 if (z->mask_stored_alike)
190 z->mb += F90_LBASE_G(ms);
191 for (i = F90_RANK_G(ms); --i >= 0;)
192 z->mi[i] = F90_DIM_LBOUND_G(ms, i);
193 } else if (!ISPRESENT(mb) || I8(__fort_fetch_log)(mb, ms))
194 z->mb = GET_DIST_TRUE_LOG_ADDR;
195 else
196 return; /* scalar mask == .false. */
197
198 if (~F90_FLAGS_G(as) & __OFF_TEMPLATE) {
199 z->ab += F90_LBASE_G(as) * F90_LEN_G(as);
200 ao = -1;
201 I8(red_scalar_loop)(z, ao, 0, F90_RANK_G(as));
202 }
203
204 I8(__fort_reduce_section)(rb, z->kind, z->len,
205 xb, __INT, sizeof(__INT_T), 1,
206 z->g_fn, -1, as);
207
208 I8(__fort_replicate_result)(rb, z->kind, z->len,
209 xb, __INT, sizeof(__INT_T), 1, as);
210
211 if (xb != NULL && (p = xb[0]) > 0) {
212 for (i = 0; i < F90_RANK_G(as); i++) {
213 SET_DIM_PTRS(asd, as, i);
214 m = F90_DPTR_EXTENT_G(asd);
215 q = (p - 1) / m;
216 xb[i] = p - q * m;
217 p = q;
218 }
219 }
220 }
221
222 /** \brief SAME as previous, but allow any logical kind for the mask */
I8(__fort_red_scalarlk)223 void I8(__fort_red_scalarlk)(red_parm *z, char *rb, char *ab, char *mb,
224 F90_Desc *rs, F90_Desc *as, F90_Desc *ms,
225 __INT_T *xb, red_enum op)
226 {
227 DECL_DIM_PTRS(asd);
228 __INT_T ao, i, len, m, p, q;
229 int loop;
230
231 z->rb = rb;
232 z->rs = rs;
233 z->ab = ab;
234 z->as = as;
235 z->mb = (__LOG_T *)mb;
236 z->ms = ms;
237 z->xb = xb;
238 z->dim = 0;
239
240 I8(__fort_cycle_bounds)(as);
241
242 __fort_scalar_copy[z->kind](rb, z->zb, z->len);
243
244 if (xb != NULL) {
245 for (i = F90_RANK_G(as); --i >= 0;)
246 xb[i] = 0;
247 }
248
249 z->mask_present = (F90_TAG_G(ms) == __DESC && F90_RANK_G(ms) > 0);
250 if (z->mask_present) {
251 z->mask_stored_alike = I8(__fort_stored_alike)(as, ms);
252 if (z->mask_stored_alike)
253 z->mb = (__LOG_T *)((char *)(z->mb) + (F90_LBASE_G(ms) << z->lk_shift));
254 for (i = F90_RANK_G(ms); --i >= 0;)
255 z->mi[i] = F90_DIM_LBOUND_G(ms, i);
256 } else if (!ISPRESENT(mb) || I8(__fort_fetch_log)(mb, ms))
257 z->mb = GET_DIST_TRUE_LOG_ADDR;
258 else
259 return; /* scalar mask == .false. */
260
261 if (~F90_FLAGS_G(as) & __OFF_TEMPLATE) {
262 z->ab += F90_LBASE_G(as) * F90_LEN_G(as);
263 ao = -1;
264
265 I8(red_scalar_loop)(z, ao, 0, F90_RANK_G(as));
266 }
267
268 I8(__fort_reduce_section)(rb, z->kind, z->len,
269 xb, __INT, sizeof(__INT_T), 1,
270 z->g_fn, -1, as);
271
272 I8(__fort_replicate_result)(rb, z->kind, z->len,
273 xb, __INT, sizeof(__INT_T), 1, as);
274 if (xb != NULL && (p = xb[0]) > 0) {
275 for (i = 0; i < F90_RANK_G(as); i++) {
276 SET_DIM_PTRS(asd, as, i);
277 m = F90_DPTR_EXTENT_G(asd);
278 q = (p - 1) / m;
279 xb[i] = p - q * m;
280 p = q;
281 }
282 }
283 }
284
285 /** \brief reduction, dim argument present */
I8(red_array_loop)286 static void I8(red_array_loop)(red_parm *z, __INT_T rof, __INT_T aof, int rdim,
287 int adim)
288 {
289 DECL_HDR_PTRS(as);
290 DECL_HDR_PTRS(rs);
291 DECL_HDR_PTRS(ms);
292 DECL_DIM_PTRS(asd);
293 DECL_DIM_PTRS(rsd);
294 DECL_DIM_PTRS(msd);
295 char *ap, *rp;
296 __LOG_T *mp;
297 __INT4_T *lp;
298
299 __INT_T abl, abn, abu, acl, acn, aclof, ahop, ao, i, li, ls, mhop, mlow, rbl,
300 rbn, rbu, rcl, rclof, rhop, ro;
301
302 #if defined(DEBUG)
303 if (__fort_test & DEBUG_REDU) {
304 printf("%d red_array_loop rdim=%d rof=%d adim=%d aof=%d\n", GET_DIST_LCPU,
305 rdim, rof, adim, aof);
306 }
307 #endif
308
309 if (rdim > 0) {
310 rs = z->rs;
311 SET_DIM_PTRS(rsd, rs, rdim - 1);
312 rcl = DIST_DPTR_CL_G(rsd);
313 rclof = DIST_DPTR_CLOF_G(rsd);
314 rhop = F90_DPTR_SSTRIDE_G(rsd) * F90_DPTR_LSTRIDE_G(rsd);
315
316 if (adim == z->dim)
317 --adim;
318 } else {
319 rp = z->rb + rof * z->len;
320 adim = z->dim;
321 }
322
323 as = z->as;
324 SET_DIM_PTRS(asd, as, adim - 1);
325 acn = DIST_DPTR_CN_G(asd);
326 acl = DIST_DPTR_CL_G(asd);
327 aclof = DIST_DPTR_CLOF_G(asd);
328 ahop = F90_DPTR_SSTRIDE_G(asd) * F90_DPTR_LSTRIDE_G(asd);
329
330 if (z->mask_present) {
331 ms = z->ms;
332 SET_DIM_PTRS(msd, ms, adim - 1);
333 mlow = F90_DPTR_LBOUND_G(msd);
334 mhop = F90_DPTR_SSTRIDE_G(msd) * F90_DPTR_LSTRIDE_G(msd);
335 } else {
336 mlow = mhop = 0;
337 mp = z->mb;
338 }
339
340 if (rdim <= 0 && acn > 1 &&
341 DIST_DPTR_BLOCK_G(asd) == DIST_DPTR_TSTRIDE_G(asd) &&
342 (!z->mask_present || z->mask_stored_alike)) {
343
344 /* cyclic(1) optimization */
345
346 abl = acl - DIST_DPTR_TOFFSET_G(asd);
347 if (DIST_DPTR_TSTRIDE_G(asd) != 1)
348 abl /= DIST_DPTR_TSTRIDE_G(asd);
349 ao = aof +
350 (F90_DPTR_SSTRIDE_G(asd) * abl + F90_DPTR_SOFFSET_G(asd) - aclof) *
351 F90_DPTR_LSTRIDE_G(asd);
352
353 i = abl - F90_DPTR_LBOUND_G(asd); /* section ordinal index */
354 li = i + 1; /* linearized location */
355 ls = DIST_DPTR_PSHAPE_G(asd); /* location stride */
356
357 if (z->mask_present)
358 mp = (__LOG_T *)((char *)(z->mb) + (ao << z->lk_shift));
359
360 if (z->xb != NULL)
361 lp = ((__INT4_T *)z->xb) + rof;
362 else
363 lp = NULL;
364
365 ap = z->ab + ao * F90_LEN_G(as);
366 if (z->l_fn_b) {
367 z->l_fn_b(rp, acn, ap, ahop, mp, mhop, lp, li, ls, z->len, z->back);
368 } else {
369 z->l_fn(rp, acn, ap, ahop, mp, mhop, lp, li, ls, z->len);
370 }
371
372 return;
373 }
374
375 while (acn > 0) {
376 abn = I8(__fort_block_bounds)(as, adim, acl, &abl, &abu);
377 ao = aof +
378 (F90_DPTR_SSTRIDE_G(asd) * abl + F90_DPTR_SOFFSET_G(asd) - aclof) *
379 F90_DPTR_LSTRIDE_G(asd);
380
381 i = abl - F90_DPTR_LBOUND_G(asd); /* ordinal index (zero-based) */
382 li = i + 1; /* linearized location */
383 z->mi[adim - 1] = mlow + i; /* mask array index */
384
385 if (rdim > 0) {
386
387 /* this array dimension is not being reduced. there is a
388 result element corresponding to every element of this
389 array dimension. */
390
391 rbn = I8(__fort_block_bounds)(rs, rdim, rcl, &rbl, &rbu);
392 #if defined(DEBUG)
393 if (rbn != abn)
394 __fort_red_abort("result misalignment");
395 #endif
396 ro = rof +
397 (F90_DPTR_SSTRIDE_G(rsd) * rbl + F90_DPTR_SOFFSET_G(rsd) - rclof) *
398 F90_DPTR_LSTRIDE_G(rsd);
399
400 while (abn > 0) {
401 I8(red_array_loop)(z, ro, ao, rdim - 1, adim - 1);
402 ro += rhop;
403 ao += ahop;
404 z->mi[adim - 1]++;
405 --abn;
406 }
407 rcl += DIST_DPTR_CS_G(rsd);
408 rclof += DIST_DPTR_CLOS_G(rsd);
409 } else {
410
411 /* this is the array dimension being reduced */
412
413 if (z->mask_present) {
414 if (z->mask_stored_alike)
415 mp = (__LOG_T *)((char *)(z->mb) + (ao << z->lk_shift));
416 else {
417 mp = I8(__fort_local_address)(z->mb, ms, z->mi);
418 if (mp == NULL)
419 __fort_red_abort("mask misalignment");
420 }
421 }
422
423 if (z->xb != NULL)
424 lp = ((__INT4_T *)z->xb) + rof;
425 else
426 lp = NULL;
427
428 ap = z->ab + ao * F90_LEN_G(as);
429 if (z->l_fn_b) {
430 z->l_fn_b(rp, abn, ap, ahop, mp, mhop, lp, li, 1, z->len, z->back);
431 } else {
432 z->l_fn(rp, abn, ap, ahop, mp, mhop, lp, li, 1, z->len);
433 }
434 }
435 acl += DIST_DPTR_CS_G(asd);
436 aclof += DIST_DPTR_CLOS_G(asd);
437 --acn;
438 }
439 #if defined(DEBUG)
440 if (__fort_test & DEBUG_REDU && rdim <= 0) {
441 printf("%d red_array_loop rp=%x *rp=", GET_DIST_LCPU, rp);
442 __fort_print_scalar(rp, z->kind);
443 printf("\n");
444 }
445 #endif
446 }
447
I8(__fort_kred_scalarlk)448 void I8(__fort_kred_scalarlk)(red_parm *z, char *rb, char *ab, char *mb,
449 F90_Desc *rs, F90_Desc *as, F90_Desc *ms,
450 __INT8_T *xb, red_enum op)
451 {
452 DECL_DIM_PTRS(asd);
453 __INT_T ao, i, len, m, p, q;
454 int loop;
455
456 z->rb = rb;
457 z->rs = rs;
458 z->ab = ab;
459 z->as = as;
460 z->mb = (__LOG_T *)mb;
461 z->ms = ms;
462 z->xb = (__INT_T *)xb;
463 z->dim = 0;
464
465 I8(__fort_cycle_bounds)(as);
466
467 __fort_scalar_copy[z->kind](rb, z->zb, z->len);
468
469 if (xb != NULL) {
470 for (i = F90_RANK_G(as); --i >= 0;)
471 xb[i] = 0;
472 }
473
474 z->mask_present = (F90_TAG_G(ms) == __DESC && F90_RANK_G(ms) > 0);
475 if (z->mask_present) {
476 z->mask_stored_alike = I8(__fort_stored_alike)(as, ms);
477 if (z->mask_stored_alike)
478 z->mb = (__LOG_T *)((char *)(z->mb) + (F90_LBASE_G(ms) << z->lk_shift));
479 for (i = F90_RANK_G(ms); --i >= 0;)
480 z->mi[i] = F90_DIM_LBOUND_G(ms, i);
481 } else if (!ISPRESENT(mb) || I8(__fort_fetch_log)(mb, ms))
482 z->mb = GET_DIST_TRUE_LOG_ADDR;
483 else
484 return; /* scalar mask == .false. */
485
486 if (~F90_FLAGS_G(as) & __OFF_TEMPLATE) {
487 z->ab += F90_LBASE_G(as) * F90_LEN_G(as);
488 ao = -1;
489 I8(red_scalar_loop)(z, ao, 0, F90_RANK_G(as));
490 }
491
492 I8(__fort_reduce_section)(rb, z->kind, z->len,
493 xb, __INT, sizeof(__INT_T), 1,
494 z->g_fn, -1, as);
495
496 I8(__fort_replicate_result)(rb, z->kind, z->len,
497 xb, __INT, sizeof(__INT_T), 1, as);
498
499 if (xb != NULL && (p = xb[0]) > 0) {
500 for (i = 0; i < F90_RANK_G(as); i++) {
501 SET_DIM_PTRS(asd, as, i);
502 m = F90_DPTR_EXTENT_G(asd);
503 q = (p - 1) / m;
504 xb[i] = p - q * m;
505 p = q;
506 }
507 }
508 }
509
I8(kred_array_loop)510 static void I8(kred_array_loop)(red_parm *z, __INT_T rof, __INT_T aof, int rdim,
511 int adim)
512 {
513 DECL_HDR_PTRS(as);
514 DECL_HDR_PTRS(rs);
515 DECL_HDR_PTRS(ms);
516 DECL_DIM_PTRS(asd);
517 DECL_DIM_PTRS(rsd);
518 DECL_DIM_PTRS(msd);
519 char *ap, *rp;
520 __LOG_T *mp;
521 __INT_T *lp;
522
523 __INT_T abl, abn, abu, acl, acn, aclof, ahop, ao, i, li, ls, mhop, mlow, rbl,
524 rbn, rbu, rcl, rclof, rhop, ro;
525
526 #if defined(DEBUG)
527 if (__fort_test & DEBUG_REDU) {
528 printf("%d red_array_loop rdim=%d rof=%d adim=%d aof=%d\n", GET_DIST_LCPU,
529 rdim, rof, adim, aof);
530 }
531 #endif
532
533 if (rdim > 0) {
534 rs = z->rs;
535 SET_DIM_PTRS(rsd, rs, rdim - 1);
536 rcl = DIST_DPTR_CL_G(rsd);
537 rclof = DIST_DPTR_CLOF_G(rsd);
538 rhop = F90_DPTR_SSTRIDE_G(rsd) * F90_DPTR_LSTRIDE_G(rsd);
539
540 if (adim == z->dim)
541 --adim;
542 } else {
543 rp = z->rb + rof * z->len;
544 adim = z->dim;
545 }
546
547 as = z->as;
548 SET_DIM_PTRS(asd, as, adim - 1);
549 acn = DIST_DPTR_CN_G(asd);
550 acl = DIST_DPTR_CL_G(asd);
551 aclof = DIST_DPTR_CLOF_G(asd);
552 ahop = F90_DPTR_SSTRIDE_G(asd) * F90_DPTR_LSTRIDE_G(asd);
553
554 if (z->mask_present) {
555 ms = z->ms;
556 SET_DIM_PTRS(msd, ms, adim - 1);
557 mlow = F90_DPTR_LBOUND_G(msd);
558 mhop = F90_DPTR_SSTRIDE_G(msd) * F90_DPTR_LSTRIDE_G(msd);
559 } else {
560 mlow = mhop = 0;
561 mp = z->mb;
562 }
563
564 if (rdim <= 0 && acn > 1 &&
565 DIST_DPTR_BLOCK_G(asd) == DIST_DPTR_TSTRIDE_G(asd) &&
566 (!z->mask_present || z->mask_stored_alike)) {
567
568 /* cyclic(1) optimization */
569
570 abl = acl - DIST_DPTR_TOFFSET_G(asd);
571 if (DIST_DPTR_TSTRIDE_G(asd) != 1)
572 abl /= DIST_DPTR_TSTRIDE_G(asd);
573 ao = aof +
574 (F90_DPTR_SSTRIDE_G(asd) * abl + F90_DPTR_SOFFSET_G(asd) - aclof) *
575 F90_DPTR_LSTRIDE_G(asd);
576
577 i = abl - F90_DPTR_LBOUND_G(asd); /* section ordinal index */
578 li = i + 1; /* linearized location */
579 ls = DIST_DPTR_PSHAPE_G(asd); /* location stride */
580
581 if (z->mask_present)
582 mp = (__LOG_T *)((char *)(z->mb) + (ao << z->lk_shift));
583
584 if (z->xb != NULL)
585 lp = (__INT_T *)((char *)z->xb + rof * sizeof(__INT8_T));
586 else
587 lp = NULL;
588
589 ap = z->ab + ao * F90_LEN_G(as);
590 if (z->l_fn_b) {
591 z->l_fn_b(rp, acn, ap, ahop, mp, mhop, lp, li, ls, z->len, z->back);
592 } else {
593 z->l_fn(rp, acn, ap, ahop, mp, mhop, lp, li, ls, z->len);
594 }
595
596 return;
597 }
598
599 while (acn > 0) {
600 abn = I8(__fort_block_bounds)(as, adim, acl, &abl, &abu);
601 ao = aof +
602 (F90_DPTR_SSTRIDE_G(asd) * abl + F90_DPTR_SOFFSET_G(asd) - aclof) *
603 F90_DPTR_LSTRIDE_G(asd);
604
605 i = abl - F90_DPTR_LBOUND_G(asd); /* ordinal index (zero-based) */
606 li = i + 1; /* linearized location */
607 z->mi[adim - 1] = mlow + i; /* mask array index */
608
609 if (rdim > 0) {
610
611 /* this array dimension is not being reduced. there is a
612 result element corresponding to every element of this
613 array dimension. */
614
615 rbn = I8(__fort_block_bounds)(rs, rdim, rcl, &rbl, &rbu);
616 #if defined(DEBUG)
617 if (rbn != abn)
618 __fort_red_abort("result misalignment");
619 #endif
620 ro = rof +
621 (F90_DPTR_SSTRIDE_G(rsd) * rbl + F90_DPTR_SOFFSET_G(rsd) - rclof) *
622 F90_DPTR_LSTRIDE_G(rsd);
623
624 while (abn > 0) {
625 I8(kred_array_loop)(z, ro, ao, rdim - 1, adim - 1);
626 ro += rhop;
627 ao += ahop;
628 z->mi[adim - 1]++;
629 --abn;
630 }
631 rcl += DIST_DPTR_CS_G(rsd);
632 rclof += DIST_DPTR_CLOS_G(rsd);
633 } else {
634
635 /* this is the array dimension being reduced */
636
637 if (z->mask_present) {
638 if (z->mask_stored_alike)
639 mp = (__LOG_T *)((char *)(z->mb) + (ao << z->lk_shift));
640 else {
641 mp = I8(__fort_local_address)(z->mb, ms, z->mi);
642 if (mp == NULL)
643 __fort_red_abort("mask misalignment");
644 }
645 }
646
647 if (z->xb != NULL)
648 lp = (__INT_T *)((char *)z->xb + rof * sizeof(__INT8_T));
649 else
650 lp = NULL;
651
652 ap = z->ab + ao * F90_LEN_G(as);
653 if (z->l_fn_b) {
654 z->l_fn_b(rp, abn, ap, ahop, mp, mhop, lp, li, 1, z->len, z->back);
655 } else {
656 z->l_fn(rp, abn, ap, ahop, mp, mhop, lp, li, 1, z->len);
657 }
658 }
659 acl += DIST_DPTR_CS_G(asd);
660 aclof += DIST_DPTR_CLOS_G(asd);
661 --acn;
662 }
663 #if defined(DEBUG)
664 if (__fort_test & DEBUG_REDU && rdim <= 0) {
665 printf("%d red_array_loop rp=%x *rp=", GET_DIST_LCPU, rp);
666 __fort_print_scalar(rp, z->kind);
667 printf("\n");
668 }
669 #endif
670 }
671
I8(__fort_red_array)672 void I8(__fort_red_array)(red_parm *z, char *rb0, char *ab, char *mb, char *db,
673 F90_Desc *rs0, F90_Desc *as, F90_Desc *ms,
674 F90_Desc *ds, red_enum op)
675 {
676 DECL_HDR_PTRS(rs);
677 DECL_HDR_VARS(rs1);
678 char *rb = 0, *xb, *zb;
679 __INT_T flags, kind, len, rank, _1 = 1;
680 int i, n, rc, rl;
681 __INT_T ao, ro;
682
683 z->dim = I8(__fort_fetch_int)(db, ds);
684
685 #if defined(DEBUG)
686 if (__fort_test & DEBUG_REDU) {
687 printf("%d r", GET_DIST_LCPU);
688 I8(__fort_show_section)(rs0);
689 printf("@%x = %s(a", rb0, __fort_red_what);
690 I8(__fort_show_section)(as);
691 printf("@%x, dim=%d, mask", ab, z->dim);
692 I8(__fort_show_section)(ms);
693 printf("@%x)\n", mb);
694 }
695 #endif
696
697 if (as == NULL || F90_TAG_G(as) != __DESC)
698 __fort_red_abort("invalid array argument descriptor");
699 if (z->dim < 1 || z->dim > F90_RANK_G(as))
700 __fort_red_abort("invalid DIM argument");
701 rank = F90_RANK_G(as) - 1;
702
703 I8(__fort_cycle_bounds)(as);
704
705 rs = rs0;
706 rb = rb0;
707 if (F90_TAG_G(rs0) == __DESC) {
708 if ((op == __MINLOC || op == __MAXLOC || op == __FINDLOC) &&
709 z->kind != __STR) {
710 kind = __INT;
711 len = sizeof(__INT_T);
712 } else {
713 kind = z->kind;
714 len = z->len;
715 }
716 if (DIST_MAPPED_G(rs0) ||
717 I8(is_nonsequential_section)(rs0, F90_RANK_G(rs0))) {
718 rs = rs1;
719 flags = (__ASSUMED_SHAPE + __ASSUMED_OVERLAPS + __INTENT_OUT + __INHERIT +
720 __TRANSCRIPTIVE_DIST_TARGET + __TRANSCRIPTIVE_DIST_FORMAT);
721 ENTFTN(QOPY_IN, qopy_in)
722 (&rb, (__POINT_T *)ABSENT, rb0, rs, rb0, rs0, &rank, &kind, &len, &flags,
723 &_1, &_1, &_1, &_1, &_1, &_1, &_1); /* lb */
724 }
725 I8(__fort_cycle_bounds)(rs);
726 ro = F90_LBASE_G(rs) - 1;
727 rc = F90_LSIZE_G(rs);
728 rl = F90_LEN_G(rs);
729 } else {
730 #if defined(DEBUG)
731 if (rank != 0)
732 __fort_red_abort("result/array rank mismatch");
733 #endif
734 ro = 0;
735 rc = 1;
736 rl = GET_DIST_SIZE_OF(F90_TAG_G(rs0));
737 }
738
739 if (op == __MINLOC || op == __MAXLOC || op == __FINDLOC) {
740 if (rc > 0)
741 memset(rb, '\0', rc * rl);
742 xb = rb;
743 rb = (char *)__fort_gmalloc(rc * F90_LEN_G(as));
744 } else
745 xb = NULL;
746
747 z->rb = rb;
748 z->rs = rs;
749 z->ab = ab;
750 z->as = as;
751 z->mb = (__LOG_T *)mb;
752 z->ms = ms;
753 z->xb = (__INT_T *)xb;
754
755 zb = z->zb;
756 #if defined(DEBUG)
757 if (zb == NULL)
758 __fort_red_abort("missing null constant (unimplemented)");
759 #endif
760 switch (z->kind) {
761 case __LOG1:
762 for (i = 0; i < rc; ++i)
763 ((__LOG1_T *)rb)[i] = *(__LOG1_T *)zb;
764 break;
765 case __LOG2:
766 for (i = 0; i < rc; ++i)
767 ((__LOG2_T *)rb)[i] = *(__LOG2_T *)zb;
768 break;
769 case __LOG4:
770 for (i = 0; i < rc; ++i)
771 ((__LOG4_T *)rb)[i] = *(__LOG4_T *)zb;
772 break;
773 case __LOG8:
774 for (i = 0; i < rc; ++i)
775 ((__LOG8_T *)rb)[i] = *(__LOG8_T *)zb;
776 break;
777 case __INT1:
778 for (i = 0; i < rc; ++i)
779 ((__INT1_T *)rb)[i] = *(__INT1_T *)zb;
780 break;
781 case __INT2:
782 for (i = 0; i < rc; ++i)
783 ((__INT2_T *)rb)[i] = *(__INT2_T *)zb;
784 break;
785 case __INT4:
786 for (i = 0; i < rc; ++i)
787 ((__INT4_T *)rb)[i] = *(__INT4_T *)zb;
788 break;
789 case __INT8:
790 for (i = 0; i < rc; ++i)
791 ((__INT8_T *)rb)[i] = *(__INT8_T *)zb;
792 break;
793 case __REAL4:
794 for (i = 0; i < rc; ++i)
795 ((__REAL4_T *)rb)[i] = *(__REAL4_T *)zb;
796 break;
797 case __REAL8:
798 for (i = 0; i < rc; ++i)
799 ((__REAL8_T *)rb)[i] = *(__REAL8_T *)zb;
800 break;
801 case __REAL16:
802 for (i = 0; i < rc; ++i)
803 ((__REAL16_T *)rb)[i] = *(__REAL16_T *)zb;
804 break;
805 case __CPLX8:
806 for (i = 0; i < rc; ++i)
807 ((__CPLX8_T *)rb)[i] = *(__CPLX8_T *)zb;
808 break;
809 case __CPLX16:
810 for (i = 0; i < rc; ++i)
811 ((__CPLX16_T *)rb)[i] = *(__CPLX16_T *)zb;
812 break;
813 case __CPLX32:
814 for (i = 0; i < rc; ++i)
815 ((__CPLX32_T *)rb)[i] = *(__CPLX32_T *)zb;
816 break;
817 case __STR:
818 if (op == __FINDLOC) {
819 for (i = 0; i < rc; ++i)
820 memcpy(&rb[i * z->len], (char *)zb, z->len);
821 } else {
822 for (i = 0; i < rc; ++i)
823 memset(&rb[i * z->len], *(__STR_T *)zb, z->len * sizeof(__STR_T));
824 }
825 break;
826 default:
827 __fort_red_abort("unsupported result type");
828 }
829
830 z->mask_present = (F90_TAG_G(ms) == __DESC && F90_RANK_G(ms) > 0);
831 if (z->mask_present) {
832 z->mask_stored_alike = I8(__fort_stored_alike)(as, ms);
833 if (z->mask_stored_alike)
834 z->mb = (__LOG_T *)((char *)z->mb + (F90_LBASE_G(ms) << z->lk_shift));
835 for (i = F90_RANK_G(ms); --i >= 0;)
836 z->mi[i] = F90_DIM_LBOUND_G(ms, i);
837 } else if (!ISPRESENT(mb) || I8(__fort_fetch_log)(mb, ms))
838 z->mb = GET_DIST_TRUE_LOG_ADDR;
839 else
840 z->mb = (__LOG_T *)GET_DIST_ZED; /* scalar mask == .false. */
841
842 if (~F90_FLAGS_G(as) & __OFF_TEMPLATE) {
843 z->ab += F90_LBASE_G(as) * F90_LEN_G(as);
844 ao = -1;
845 I8(red_array_loop)(z, ro, ao, rank, F90_RANK_G(as));
846 }
847
848 I8(__fort_reduce_section)(rb, z->kind, z->len,
849 xb, __INT, sizeof(__INT_T), rc,
850 z->g_fn, z->dim, as);
851
852 I8(__fort_replicate_result)(rb, z->kind, z->len,
853 xb, __INT, sizeof(__INT_T), rc, as);
854
855 if (xb != NULL) {
856 __fort_gfree(rb);
857 rb = xb;
858 }
859 if (rs == rs1)
860 I8(__fort_copy_out)(rb0, rb, rs0, rs, __INTENT_OUT);
861 }
862
863 /** \brief SAME as previous, but allow any logical kind for the mask */
I8(__fort_red_arraylk)864 void I8(__fort_red_arraylk)(red_parm *z, char *rb0, char *ab, char *mb, char *db,
865 F90_Desc *rs0, F90_Desc *as, F90_Desc *ms,
866 F90_Desc *ds, red_enum op)
867 {
868 DECL_HDR_PTRS(rs);
869 DECL_HDR_VARS(rs1);
870 char *rb = 0, *xb, *zb;
871 __INT_T flags, kind, len, rank, _1 = 1;
872 int i, n, rc, rl;
873 __INT_T ao, ro;
874
875 z->dim = I8(__fort_fetch_int)(db, ds);
876
877 #if defined(DEBUG)
878 if (__fort_test & DEBUG_REDU) {
879 printf("%d r", GET_DIST_LCPU);
880 I8(__fort_show_section)(rs0);
881 printf("@%x = %s(a", rb0, __fort_red_what);
882 I8(__fort_show_section)(as);
883 printf("@%x, dim=%d, mask", ab, z->dim);
884 I8(__fort_show_section)(ms);
885 printf("@%x)\n", mb);
886 }
887 #endif
888
889 if (as == NULL || F90_TAG_G(as) != __DESC)
890 __fort_red_abort("invalid array argument descriptor");
891 if (z->dim < 1 || z->dim > F90_RANK_G(as))
892 __fort_red_abort("invalid DIM argument");
893 rank = F90_RANK_G(as) - 1;
894
895 I8(__fort_cycle_bounds)(as);
896
897 rs = rs0;
898 rb = rb0;
899 if (F90_TAG_G(rs0) == __DESC) {
900 if ((op == __MINLOC || op == __MAXLOC || op == __FINDLOC) &&
901 z->kind != __STR) {
902 kind = __INT;
903 len = sizeof(__INT_T);
904 } else {
905 kind = z->kind;
906 len = z->len;
907 }
908 if (DIST_MAPPED_G(rs0) ||
909 I8(is_nonsequential_section)(rs0, F90_RANK_G(rs0))) {
910 rs = rs1;
911 flags = (__ASSUMED_SHAPE + __ASSUMED_OVERLAPS + __INTENT_OUT + __INHERIT +
912 __TRANSCRIPTIVE_DIST_TARGET + __TRANSCRIPTIVE_DIST_FORMAT);
913 ENTFTN(QOPY_IN, qopy_in)
914 (&rb, (__POINT_T *)ABSENT, rb0, rs, rb0, rs0, &rank, &kind, &len, &flags,
915 &_1, &_1, &_1, &_1, &_1, &_1, &_1); /* lb */
916 }
917 I8(__fort_cycle_bounds)(rs);
918 ro = F90_LBASE_G(rs) - 1;
919 rc = F90_LSIZE_G(rs);
920 rl = F90_LEN_G(rs);
921 } else {
922 #if defined(DEBUG)
923 if (rank != 0)
924 __fort_red_abort("result/array rank mismatch");
925 #endif
926 rank = 0;
927 ro = 0;
928 rc = 1;
929 rl = GET_DIST_SIZE_OF(F90_TAG_G(rs0));
930 }
931
932 if (op == __MINLOC || op == __MAXLOC || op == __FINDLOC) {
933 if (rc > 0)
934 memset(rb, '\0', rc * rl);
935 xb = rb;
936 rb = (char *)__fort_gmalloc(rc * F90_LEN_G(as));
937 } else
938 xb = NULL;
939
940 z->rb = rb;
941 z->rs = rs;
942 z->ab = ab;
943 z->as = as;
944 z->mb = (__LOG_T *)mb;
945 z->ms = ms;
946 z->xb = (__INT_T *)xb;
947
948 zb = z->zb;
949 #if defined(DEBUG)
950 if (zb == NULL)
951 __fort_red_abort("missing null constant (unimplemented)");
952 #endif
953 switch (z->kind) {
954 case __LOG1:
955 for (i = 0; i < rc; ++i)
956 ((__LOG1_T *)rb)[i] = *(__LOG1_T *)zb;
957 break;
958 case __LOG2:
959 for (i = 0; i < rc; ++i)
960 ((__LOG2_T *)rb)[i] = *(__LOG2_T *)zb;
961 break;
962 case __LOG4:
963 for (i = 0; i < rc; ++i)
964 ((__LOG4_T *)rb)[i] = *(__LOG4_T *)zb;
965 break;
966 case __LOG8:
967 for (i = 0; i < rc; ++i)
968 ((__LOG8_T *)rb)[i] = *(__LOG8_T *)zb;
969 break;
970 case __INT1:
971 for (i = 0; i < rc; ++i)
972 ((__INT1_T *)rb)[i] = *(__INT1_T *)zb;
973 break;
974 case __INT2:
975 for (i = 0; i < rc; ++i)
976 ((__INT2_T *)rb)[i] = *(__INT2_T *)zb;
977 break;
978 case __INT4:
979 for (i = 0; i < rc; ++i)
980 ((__INT4_T *)rb)[i] = *(__INT4_T *)zb;
981 break;
982 case __INT8:
983 for (i = 0; i < rc; ++i)
984 ((__INT8_T *)rb)[i] = *(__INT8_T *)zb;
985 break;
986 case __REAL4:
987 for (i = 0; i < rc; ++i)
988 ((__REAL4_T *)rb)[i] = *(__REAL4_T *)zb;
989 break;
990 case __REAL8:
991 for (i = 0; i < rc; ++i)
992 ((__REAL8_T *)rb)[i] = *(__REAL8_T *)zb;
993 break;
994 case __REAL16:
995 for (i = 0; i < rc; ++i)
996 ((__REAL16_T *)rb)[i] = *(__REAL16_T *)zb;
997 break;
998 case __CPLX8:
999 for (i = 0; i < rc; ++i)
1000 ((__CPLX8_T *)rb)[i] = *(__CPLX8_T *)zb;
1001 break;
1002 case __CPLX16:
1003 for (i = 0; i < rc; ++i)
1004 ((__CPLX16_T *)rb)[i] = *(__CPLX16_T *)zb;
1005 break;
1006 case __CPLX32:
1007 for (i = 0; i < rc; ++i)
1008 ((__CPLX32_T *)rb)[i] = *(__CPLX32_T *)zb;
1009 break;
1010 case __STR:
1011 if (op == __FINDLOC) {
1012 for (i = 0; i < rc; ++i)
1013 memcpy(&rb[i * z->len], (char *)zb, z->len);
1014 } else {
1015 for (i = 0; i < rc; ++i)
1016 memset(&rb[i * z->len], *(__STR_T *)zb, z->len * sizeof(__STR_T));
1017 }
1018 break;
1019 default:
1020 __fort_red_abort("unsupported result type");
1021 }
1022
1023 if (z->mask_present) {
1024 z->mask_stored_alike = I8(__fort_stored_alike)(as, ms);
1025 if (z->mask_stored_alike)
1026 z->mb = (__LOG_T *)((char *)z->mb + (F90_LBASE_G(ms) << z->lk_shift));
1027 for (i = F90_RANK_G(ms); --i >= 0;)
1028 z->mi[i] = F90_DIM_LBOUND_G(ms, i);
1029 } else if (!ISPRESENT(mb) || I8(__fort_fetch_log)(mb, ms))
1030 z->mb = GET_DIST_TRUE_LOG_ADDR;
1031 else
1032 z->mb = (__LOG_T *)GET_DIST_ZED; /* scalar mask == .false. */
1033
1034 if (~F90_FLAGS_G(as) & __OFF_TEMPLATE) {
1035 z->ab += F90_LBASE_G(as) * F90_LEN_G(as);
1036 ao = -1;
1037 I8(red_array_loop)(z, ro, ao, rank, F90_RANK_G(as));
1038 }
1039
1040 I8(__fort_reduce_section)(rb, z->kind, z->len,
1041 xb, __INT, sizeof(__INT_T), rc,
1042 z->g_fn, z->dim, as);
1043
1044 I8(__fort_replicate_result)(rb, z->kind, z->len,
1045 xb, __INT, sizeof(__INT_T), rc, as);
1046
1047 if (xb != NULL) {
1048 __fort_gfree(rb);
1049 rb = xb;
1050 }
1051 if (rs == rs1)
1052 I8(__fort_copy_out)(rb0, rb, rs0, rs, __INTENT_OUT);
1053 }
1054
I8(__fort_kred_arraylk)1055 void I8(__fort_kred_arraylk)(red_parm *z, char *rb0, char *ab, char *mb,
1056 char *db, F90_Desc *rs0, F90_Desc *as, F90_Desc *ms,
1057 F90_Desc *ds, red_enum op)
1058 {
1059 DECL_HDR_PTRS(rs);
1060 DECL_HDR_VARS(rs1);
1061 char *rb = 0, *xb, *zb;
1062 __INT_T flags, kind, len, rank, _1 = 1;
1063 int i, n, rc, rl;
1064 __INT_T ao, ro;
1065
1066 z->dim = I8(__fort_fetch_int)(db, ds);
1067
1068 #if defined(DEBUG)
1069 if (__fort_test & DEBUG_REDU) {
1070 printf("%d r", GET_DIST_LCPU);
1071 I8(__fort_show_section)(rs0);
1072 printf("@%x = %s(a", rb0, __fort_red_what);
1073 I8(__fort_show_section)(as);
1074 printf("@%x, dim=%d, mask", ab, z->dim);
1075 I8(__fort_show_section)(ms);
1076 printf("@%x)\n", mb);
1077 }
1078 #endif
1079
1080 if (as == NULL || F90_TAG_G(as) != __DESC)
1081 __fort_red_abort("invalid array argument descriptor");
1082 if (z->dim < 1 || z->dim > F90_RANK_G(as))
1083 __fort_red_abort("invalid DIM argument");
1084 rank = F90_RANK_G(as) - 1;
1085
1086 I8(__fort_cycle_bounds)(as);
1087
1088 rs = rs0;
1089 rb = rb0;
1090 if (F90_TAG_G(rs0) == __DESC) {
1091 if ((op == __MINLOC || op == __MAXLOC || op == __FINDLOC) &&
1092 z->kind != __STR) {
1093 kind = __INT8;
1094 len = sizeof(__INT8_T);
1095 } else {
1096 kind = z->kind;
1097 len = z->len;
1098 }
1099 if (DIST_MAPPED_G(rs0) ||
1100 I8(is_nonsequential_section)(rs0, F90_RANK_G(rs0))) {
1101 rs = rs1;
1102 flags = (__ASSUMED_SHAPE + __ASSUMED_OVERLAPS + __INTENT_OUT + __INHERIT +
1103 __TRANSCRIPTIVE_DIST_TARGET + __TRANSCRIPTIVE_DIST_FORMAT);
1104 ENTFTN(QOPY_IN, qopy_in)
1105 (&rb, (__POINT_T *)ABSENT, rb0, rs, rb0, rs0, &rank, &kind, &len, &flags,
1106 &_1, &_1, &_1, &_1, &_1, &_1, &_1); /* lb */
1107 }
1108 I8(__fort_cycle_bounds)(rs);
1109 ro = F90_LBASE_G(rs) - 1;
1110 rc = F90_LSIZE_G(rs);
1111 rl = F90_LEN_G(rs);
1112 } else {
1113 #if defined(DEBUG)
1114 if (rank != 0)
1115 __fort_red_abort("result/array rank mismatch");
1116 #endif
1117 rank = 0;
1118 ro = 0;
1119 rc = 1;
1120 rl = GET_DIST_SIZE_OF(F90_TAG_G(rs0)); /* same as sizeof(__INT8_T) */
1121 }
1122
1123 if (op == __MINLOC || op == __MAXLOC || op == __FINDLOC) {
1124 if (rc > 0)
1125 memset(rb, '\0', rc * rl);
1126 xb = rb;
1127 rb = (char *)__fort_gmalloc(rc * F90_LEN_G(as));
1128 } else
1129 xb = NULL;
1130
1131 z->rb = rb;
1132 z->rs = rs;
1133 z->ab = ab;
1134 z->as = as;
1135 z->mb = (__LOG_T *)mb;
1136 z->ms = ms;
1137 z->xb = (__INT_T *)xb;
1138
1139 zb = z->zb;
1140 #if defined(DEBUG)
1141 if (zb == NULL)
1142 __fort_red_abort("missing null constant (unimplemented)");
1143 #endif
1144 switch (z->kind) {
1145 case __LOG1:
1146 for (i = 0; i < rc; ++i)
1147 ((__LOG1_T *)rb)[i] = *(__LOG1_T *)zb;
1148 break;
1149 case __LOG2:
1150 for (i = 0; i < rc; ++i)
1151 ((__LOG2_T *)rb)[i] = *(__LOG2_T *)zb;
1152 break;
1153 case __LOG4:
1154 for (i = 0; i < rc; ++i)
1155 ((__LOG4_T *)rb)[i] = *(__LOG4_T *)zb;
1156 break;
1157 case __LOG8:
1158 for (i = 0; i < rc; ++i)
1159 ((__LOG8_T *)rb)[i] = *(__LOG8_T *)zb;
1160 break;
1161 case __INT1:
1162 for (i = 0; i < rc; ++i)
1163 ((__INT1_T *)rb)[i] = *(__INT1_T *)zb;
1164 break;
1165 case __INT2:
1166 for (i = 0; i < rc; ++i)
1167 ((__INT2_T *)rb)[i] = *(__INT2_T *)zb;
1168 break;
1169 case __INT4:
1170 for (i = 0; i < rc; ++i)
1171 ((__INT4_T *)rb)[i] = *(__INT4_T *)zb;
1172 break;
1173 case __INT8:
1174 for (i = 0; i < rc; ++i)
1175 ((__INT8_T *)rb)[i] = *(__INT8_T *)zb;
1176 break;
1177 case __REAL4:
1178 for (i = 0; i < rc; ++i)
1179 ((__REAL4_T *)rb)[i] = *(__REAL4_T *)zb;
1180 break;
1181 case __REAL8:
1182 for (i = 0; i < rc; ++i)
1183 ((__REAL8_T *)rb)[i] = *(__REAL8_T *)zb;
1184 break;
1185 case __REAL16:
1186 for (i = 0; i < rc; ++i)
1187 ((__REAL16_T *)rb)[i] = *(__REAL16_T *)zb;
1188 break;
1189 case __CPLX8:
1190 for (i = 0; i < rc; ++i)
1191 ((__CPLX8_T *)rb)[i] = *(__CPLX8_T *)zb;
1192 break;
1193 case __CPLX16:
1194 for (i = 0; i < rc; ++i)
1195 ((__CPLX16_T *)rb)[i] = *(__CPLX16_T *)zb;
1196 break;
1197 case __CPLX32:
1198 for (i = 0; i < rc; ++i)
1199 ((__CPLX32_T *)rb)[i] = *(__CPLX32_T *)zb;
1200 break;
1201 case __STR:
1202 if (op == __FINDLOC) {
1203 for (i = 0; i < rc; ++i)
1204 memcpy(&rb[i * z->len], (char *)zb, z->len);
1205 } else {
1206 for (i = 0; i < rc; ++i)
1207 memset(&rb[i * z->len], *(__STR_T *)zb, z->len * sizeof(__STR_T));
1208 }
1209 break;
1210 default:
1211 __fort_red_abort("unsupported result type");
1212 }
1213
1214 if (z->mask_present) {
1215 z->mask_stored_alike = I8(__fort_stored_alike)(as, ms);
1216 if (z->mask_stored_alike)
1217 z->mb = (__LOG_T *)((char *)z->mb + (F90_LBASE_G(ms) << z->lk_shift));
1218 for (i = F90_RANK_G(ms); --i >= 0;)
1219 z->mi[i] = F90_DIM_LBOUND_G(ms, i);
1220 } else if (!ISPRESENT(mb) || I8(__fort_fetch_log)(mb, ms))
1221 z->mb = GET_DIST_TRUE_LOG_ADDR;
1222 else
1223 z->mb = (__LOG_T *)GET_DIST_ZED; /* scalar mask == .false. */
1224
1225 if (~F90_FLAGS_G(as) & __OFF_TEMPLATE) {
1226 z->ab += F90_LBASE_G(as) * F90_LEN_G(as);
1227 ao = -1;
1228 I8(kred_array_loop)(z, ro, ao, rank, F90_RANK_G(as));
1229 }
1230
1231 I8(__fort_reduce_section)(rb, z->kind, z->len,
1232 xb, __INT, sizeof(__INT_T), rc,
1233 z->g_fn, z->dim, as);
1234
1235 I8(__fort_replicate_result)(rb, z->kind, z->len,
1236 xb, __INT, sizeof(__INT_T), rc, as);
1237
1238 if (xb != NULL) {
1239 __fort_gfree(rb);
1240 rb = xb;
1241 }
1242 if (rs == rs1)
1243 I8(__fort_copy_out)(rb0, rb, rs0, rs, __INTENT_OUT);
1244 }
1245
1246 /** \brief set up result descriptor for reduction intrinsic -- used when the
1247 * dim arg is variable. result dimensions are aligned with the
1248 * corresponding source dimensions and the result array becomes
1249 * replicated over the reduced dimension. lbounds are set to 1 and
1250 * overlap allowances are set to 0.
1251 */
ENTFTN(REDUCE_DESCRIPTOR,reduce_descriptor)1252 void ENTFTN(REDUCE_DESCRIPTOR,
1253 reduce_descriptor)(F90_Desc *rd, /* result descriptor */
1254 __INT_T *kindb, /* result kind */
1255 __INT_T *lenb, /* result data item length */
1256 F90_Desc *ad, /* array descriptor */
1257 __INT_T *dimb) /* dimension */
1258 {
1259 DECL_DIM_PTRS(rdd);
1260 DECL_DIM_PTRS(add);
1261 DECL_HDR_PTRS(td);
1262 dtype kind;
1263 __INT_T dim, extent, len, m, offset, rx, ax, tx;
1264
1265 #if defined(DEBUG)
1266 if (F90_TAG_G(ad) != __DESC)
1267 __fort_abort("reduction intrinsic: invalid array arg");
1268 #endif
1269
1270 kind = (dtype)*kindb;
1271 len = *lenb;
1272 dim = *dimb;
1273 if (dim < 1 || dim > F90_RANK_G(ad))
1274 __fort_abort("reduction intrinsic: invalid dim");
1275
1276 td = DIST_ALIGN_TARGET_G(ad);
1277 __DIST_INIT_DESCRIPTOR(rd, F90_RANK_G(ad) - 1, kind, len, F90_FLAGS_G(ad), td);
1278 for (rx = ax = 1; ax <= F90_RANK_G(ad); ++ax) {
1279 if (ax == dim)
1280 continue;
1281 SET_DIM_PTRS(add, ad, ax - 1);
1282 extent = F90_DPTR_EXTENT_G(add);
1283 offset = DIST_DPTR_TSTRIDE_G(add) * (F90_DPTR_LBOUND_G(add) - 1) +
1284 DIST_DPTR_TOFFSET_G(add);
1285
1286 /*
1287 * added gen_block argument to __fort_set_alignment ...
1288 * Should last arg be &DIST_DIM_GEN_BLOCK_G(td,(DIST_DPTR_TAXIS_G(add))-1) or
1289 * &DIST_DPTR_GEN_BLOCK_G(add) ???
1290 */
1291 I8(__fort_set_alignment)(rd, rx, 1, extent, DIST_DPTR_TAXIS_G(add),
1292 DIST_DPTR_TSTRIDE_G(add), offset,
1293 &DIST_DIM_GEN_BLOCK_G(td,(DIST_DPTR_TAXIS_G(add))-1));
1294 __DIST_SET_ALLOCATION(rd, rx, 0, 0);
1295 ++rx;
1296 }
1297 m = DIST_SINGLE_G(ad);
1298 for (tx = 1; m > 0; ++tx, m >>= 1) {
1299 if (m & 1)
1300 I8(__fort_set_single)(rd, td, tx, DIST_INFO_G(ad, tx - 1), __SINGLE);
1301 }
1302 I8(__fort_finish_descriptor)(rd);
1303 }
1304
I8(__fort_create_conforming_mask_array)1305 void *I8(__fort_create_conforming_mask_array)(char *what, char *ab, char *mb,
1306 F90_Desc *as, F90_Desc *ms,
1307 F90_Desc *new_ms)
1308 {
1309
1310 /* Create a conforming mask array. Returns a pointer to the
1311 * array and assigns a new descriptor in ms. Caller responsible
1312 * for the __fort_gfree() ...
1313 */
1314
1315 __INT_T mask_kind;
1316 __INT_T mask_len;
1317 __INT_T i, _255 = 255;
1318 void *mask_array;
1319
1320 if (!ISSCALAR(ms)) {
1321 __fort_abort("__fort_create_conforming_mask_array: bad mask descriptor");
1322 }
1323
1324 mask_kind = F90_TAG_G(ms);
1325
1326 switch (mask_kind) {
1327
1328 case __LOG1:
1329 mask_len = sizeof(__LOG1_T);
1330 break;
1331
1332 case __LOG2:
1333
1334 mask_len = sizeof(__LOG2_T);
1335 break;
1336
1337 case __LOG4:
1338 mask_len = sizeof(__LOG4_T);
1339 break;
1340
1341 case __LOG8:
1342 mask_len = sizeof(__LOG8_T);
1343 break;
1344
1345 default:
1346 printf("%d %s: bad type for mask loc=1\n", GET_DIST_LCPU, what);
1347 __fort_abort((char *)0);
1348 }
1349
1350 ENTFTN(INSTANCE, instance)
1351 (new_ms, as, &mask_kind, &mask_len, &_255); /*no overlaps*/
1352 mask_array = (void *)__fort_gmalloc(F90_GSIZE_G(new_ms) * mask_len);
1353
1354 switch (mask_kind) {
1355
1356 case __LOG1:
1357 for (i = 0; i < F90_LSIZE_G(new_ms); ++i)
1358 *((__LOG1_T *)mask_array + i) = *((__LOG1_T *)mb);
1359 break;
1360
1361 case __LOG2:
1362 for (i = 0; i < F90_LSIZE_G(new_ms); ++i)
1363 *((__LOG2_T *)mask_array + i) = *((__LOG2_T *)mb);
1364 break;
1365
1366 case __LOG4:
1367 for (i = 0; i < F90_LSIZE_G(new_ms); ++i)
1368 *((__LOG4_T *)mask_array + i) = *((__LOG4_T *)mb);
1369 break;
1370
1371 case __LOG8:
1372 for (i = 0; i < F90_LSIZE_G(new_ms); ++i)
1373 *((__LOG8_T *)mask_array + i) = *((__LOG8_T *)mb);
1374 break;
1375
1376 default:
1377 printf("%d %s: bad type for mask loc=2\n", GET_DIST_LCPU, what);
1378 __fort_abort((char *)0);
1379 }
1380
1381 return mask_array;
1382 }
1383