1 /*
2  * Copyright (c) 1995-2018, NVIDIA CORPORATION.  All rights reserved.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  *
16  */
17 
18 /* clang-format off */
19 
20 #include "stdioInterf.h"
21 #include "fioMacros.h"
22 
23 /* spread intrinsic -- copy sections for ncopies into appropriate
24    dimensions */
25 
ENTFTN(SPREAD,spread)26 void ENTFTN(SPREAD, spread)(void *rb,           /* result base */
27                             void *sb,           /* source base */
28                             void *dimb,         /* dimension base */
29                             void *ncopiesb,     /* ncopies base */
30                             F90_Desc *rd,       /* result descriptor */
31                             F90_Desc *sd,       /* source descriptor */
32                             F90_Desc *dimd,     /* dimension descriptor */
33                             F90_Desc *ncopiesd) /* ncopies descriptor */
34 {
35   char *rp, *sp;
36   chdr *c;
37   DECL_DIM_PTRS(rdd);
38   DECL_HDR_VARS(td);
39   int k, dim, ncopies;
40   __INT_T i, rank, rx, tx;
41   __INT_T olb[MAXDIMS], oub[MAXDIMS];
42   __INT_T flags, lbase, pbase, repli, scoff;
43 
44   dim = I8(__fort_fetch_int)(dimb, dimd);
45   ncopies = I8(__fort_fetch_int)(ncopiesb, ncopiesd);
46 
47   /* form temporary descriptor with a scalar subscript in the spread
48      dimension */
49 
50   rank = F90_RANK_G(rd) - 1;
51   __DIST_INIT_SECTION(td, rank, rd);
52 
53   /* set up non-spread dimensions */
54 
55   for (tx = 0, rx = 1; rx <= F90_RANK_G(rd); ++rx) {
56     if (rx != dim) {
57       SET_DIM_PTRS(rdd, rd, rx - 1);
58       I8(__fort_set_section)(td, ++tx, rd, rx, F90_DPTR_LBOUND_G(rdd),
59                                   DPTR_UBOUND_G(rdd), 1);
60     }
61   }
62 
63   flags = F90_FLAGS_G(td); /* save descriptor fields */
64   lbase = F90_LBASE_G(td);
65   pbase = DIST_PBASE_G(td);
66   repli = DIST_REPLICATED_G(td);
67   scoff = DIST_SCOFF_G(td);
68   for (i = rank; --i >= 0;) {
69     olb[i] = DIST_DIM_OLB_G(td, i);
70     oub[i] = DIST_DIM_OUB_G(td, i);
71   }
72 
73   sp = (char *)sb + DIST_SCOFF_G(sd) * F90_LEN_G(sd);
74   SET_DIM_PTRS(rdd, rd, dim - 1);
75   for (k = 0; k < ncopies; ++k) {
76 
77     /* set scalar subscript in spread dimension */
78 
79     I8(__fort_set_single)((td), rd, dim, F90_DPTR_LBOUND_G(rdd) + k, __SCALAR);
80     I8(__fort_finish_section)((td));
81 
82     rp = (char *)rb + DIST_SCOFF_G(td) * F90_LEN_G(td);
83     c = I8(__fort_copy)(rp, sp, td, sd, NULL);
84     __fort_doit(c);
85     __fort_frechn(c);
86 
87     F90_FLAGS_P(td, flags); /* restore descriptor fields */
88     F90_LBASE_P(td, lbase);
89     DIST_PBASE_P(td, pbase);
90     DIST_REPLICATED_P(td, repli);
91     DIST_SCOFF_P(td, scoff);
92     for (i = rank; --i >= 0;) {
93       DIST_DIM_OLB_P(td, i, olb[i]);
94       DIST_DIM_OUB_P(td, i, oub[i]);
95     }
96     DIST_CACHED_P(td, 0);
97   }
98 }
99 
100 /* spread of a scalar - copy the scalar to a rank 1 array, ignore 'dim' */
ENTFTN(SPREADSA,spreadsa)101 void ENTFTN(SPREADSA, spreadsa)(void *rb,           /* result base */
102                               void *sb,           /* source base */
103                               void *dimb,         /* dimension base */
104                               void *ncopiesb,     /* ncopies base */
105                               __CLEN_T *szb,       /* sizeof source base */
106                               F90_Desc *rd,       /* result descriptor */
107                               F90_Desc *sd,       /* source descriptor */
108                               F90_Desc *dimd,     /* dimension descriptor */
109                               F90_Desc *ncopiesd, /* ncopies descriptor */
110                               F90_Desc *szd)      /* sizeof source descriptor */
111 {
112   char *rp;
113   int ncopies;
114   __CLEN_T size;
115 
116   /* we assume that result is replicated and contiguous */
117 
118   ncopies = I8(__fort_fetch_int)(ncopiesb, ncopiesd);
119   size = *szb;
120   rp = (char *)rb;
121   while (ncopies-- > 0) {
122     __fort_bcopy(rp, sb, size);
123     rp = rp + size;
124   }
125 }
126 /* 32 bit CLEN version */
ENTFTN(SPREADS,spreads)127 void ENTFTN(SPREADS, spreads)(void *rb,           /* result base */
128                               void *sb,           /* source base */
129                               void *dimb,         /* dimension base */
130                               void *ncopiesb,     /* ncopies base */
131                               __INT_T *szb,       /* sizeof source base */
132                               F90_Desc *rd,       /* result descriptor */
133                               F90_Desc *sd,       /* source descriptor */
134                               F90_Desc *dimd,     /* dimension descriptor */
135                               F90_Desc *ncopiesd, /* ncopies descriptor */
136                               F90_Desc *szd)      /* sizeof source descriptor */
137 {
138   ENTFTN(SPREADSA, spreadsa)(rb, sb, dimb, ncopiesb, (__CLEN_T *)szb, rd, sd,
139          dimd, ncopiesd, szd);
140 }
141 
ENTFTN(SPREADCA,spreadca)142 void ENTFTN(SPREADCA, spreadca)(DCHAR(rb),         /* result char base */
143                               DCHAR(sb),         /* source char base */
144                               void *dimb,        /* dimension base */
145                               void *ncopiesb,    /* ncopies base */
146                               F90_Desc *rd,      /* result descriptor */
147                               F90_Desc *sd,      /* source descriptor */
148                               F90_Desc *dimd,    /* ncopies descriptor */
149                               F90_Desc *ncopiesd /* dimension descriptor */
150                               DCLEN64(rb)          /* result char len */
151                               DCLEN64(sb))         /* source char len */
152 {
153   ENTFTN(SPREAD,spread)(CADR(rb), CADR(sb), dimb, ncopiesb,
154 			  rd, sd, dimd, ncopiesd);
155 }
156 /* 32 bit CLEN version */
ENTFTN(SPREADC,spreadc)157 void ENTFTN(SPREADC, spreadc)(DCHAR(rb),         /* result char base */
158                               DCHAR(sb),         /* source char base */
159                               void *dimb,        /* dimension base */
160                               void *ncopiesb,    /* ncopies base */
161                               F90_Desc *rd,      /* result descriptor */
162                               F90_Desc *sd,      /* source descriptor */
163                               F90_Desc *dimd,    /* ncopies descriptor */
164                               F90_Desc *ncopiesd /* dimension descriptor */
165                               DCLEN(rb)          /* result char len */
166                               DCLEN(sb))         /* source char len */
167 {
168   ENTFTN(SPREADCA, spreadca)(CADR(rb), CADR(sb), dimb, ncopiesb, rd, sd, dimd,
169          ncopiesd, (__CLEN_T)CLEN(rb), (__CLEN_T)CLEN(sb));
170 }
171 
172 /* spread of a character scalar - copy the scalar to a rank 1 array, ignore
173  * 'dim' */
ENTFTN(SPREADCSA,spreadcsa)174 void ENTFTN(SPREADCSA,
175             spreadcsa)(DCHAR(rb),      /* result char base */
176                       DCHAR(sb),      /* source char base */
177                       void *dimb,     /* dimension base */
178                       void *ncopiesb, /* ncopies base */
179                       __CLEN_T *szb,   /* sizeof source base - 0 for spreadcs */
180                       F90_Desc *rd,   /* result descriptor */
181                       F90_Desc *sd,   /* source descriptor */
182                       F90_Desc *dimd, /* ncopies descriptor */
183                       F90_Desc *ncopiesd, /* dimension descriptor */
184                       F90_Desc *szd       /* sizeof source descriptor */
185                       DCLEN64(rb)           /* result char len */
186                       DCLEN64(sb))          /* source char len */
187 {
188   __CLEN_T size;
189   size = CLEN(sb);
190   ENTFTN(SPREADS,spreads)(CADR(rb), CADR(sb), dimb, ncopiesb, &size,
191 			    rd, sd, dimd, ncopiesd, szd);
192 }
193 /* 32 bit CLEN version */
ENTFTN(SPREADCS,spreadcs)194 void ENTFTN(SPREADCS,
195             spreadcs)(DCHAR(rb),      /* result char base */
196                       DCHAR(sb),      /* source char base */
197                       void *dimb,     /* dimension base */
198                       void *ncopiesb, /* ncopies base */
199                       __INT_T *szb,   /* sizeof source base - 0 for spreadcs */
200                       F90_Desc *rd,   /* result descriptor */
201                       F90_Desc *sd,   /* source descriptor */
202                       F90_Desc *dimd, /* ncopies descriptor */
203                       F90_Desc *ncopiesd, /* dimension descriptor */
204                       F90_Desc *szd       /* sizeof source descriptor */
205                       DCLEN(rb)           /* result char len */
206                       DCLEN(sb))          /* source char len */
207 {
208   ENTFTN(SPREADCSA, spreadcsa)(CADR(rb), CADR(sb), dimb, ncopiesb,
209          (__CLEN_T *)szb, rd, sd, dimd, ncopiesd, szd, (__CLEN_T)CLEN(rb),
210          (__CLEN_T)CLEN(sb));
211 }
212 
213 /* set up result descriptor for spread intrinsic -- used when the dim
214    arg is variable.  the added spread dimension is given a collapsed
215    distribution and the remaining dimensions are aligned with the
216    corresponding source dimensions.  lbounds are set to 1 and overlap
217    allowances are set to 0. */
218 
ENTFTN(SPREAD_DESCRIPTOR,spread_descriptor)219 void ENTFTN(SPREAD_DESCRIPTOR,
220             spread_descriptor)(F90_Desc *rd,      /* result descriptor */
221                                F90_Desc *sd,      /* source descriptor */
222                                __INT_T *dimb,     /* dimension */
223                                __INT_T *ncopiesb) /* ncopies base */
224 {
225   DECL_DIM_PTRS(rdd);
226   DECL_DIM_PTRS(sdd);
227   DECL_HDR_PTRS(td);
228   __INT_T dim, extent, m, ncopies, offset, rx, sx, tx;
229 
230 #if defined(DEBUG)
231   if (F90_TAG_G(sd) != __DESC)
232     __fort_abort("SPREAD: invalid source arg");
233 #endif
234 
235   dim = *dimb;
236   if (dim < 1 || dim > F90_RANK_G(sd) + 1)
237     __fort_abort("SPREAD: invalid dim");
238 
239   ncopies = *ncopiesb;
240   if (ncopies < 0)
241     ncopies = 0;
242 
243   td = DIST_ALIGN_TARGET_G(sd);
244   __DIST_INIT_DESCRIPTOR(rd, F90_RANK_G(sd) + 1, F90_KIND_G(sd), F90_LEN_G(sd),
245                         F90_FLAGS_G(sd), td);
246   for (rx = sx = 1; sx <= F90_RANK_G(sd); ++rx, ++sx) {
247     if (sx == dim)
248       ++rx;
249     SET_DIM_PTRS(sdd, sd, sx - 1);
250     extent = F90_DPTR_EXTENT_G(sdd);
251     offset = DIST_DPTR_TSTRIDE_G(sdd) * (F90_DPTR_LBOUND_G(sdd) - 1) +
252              DIST_DPTR_TOFFSET_G(sdd);
253 
254     /*
255      * added &DIST_DIM_GEN_BLOCK_G(td,(DIST_DPTR_TAXIS_G(sdd))-1) arg
256      */
257 
258     I8(__fort_set_alignment)(rd, rx, 1, extent, DIST_DPTR_TAXIS_G(sdd),
259                                 DIST_DPTR_TSTRIDE_G(sdd), offset,
260                                 &DIST_DIM_GEN_BLOCK_G(td,(DIST_DPTR_TAXIS_G(sdd))-1));
261     __DIST_SET_ALLOCATION(rd, rx, 0, 0);
262   }
263   I8(__fort_set_alignment)(rd, dim, 1, ncopies, 0, 1, 0);
264   __DIST_SET_ALLOCATION(rd, dim, 0, 0);
265   m = DIST_SINGLE_G(sd);
266   for (tx = 1; m > 0; ++tx, m >>= 1) {
267     if (m & 1)
268       I8(__fort_set_single)(rd, td, tx, DIST_INFO_G(sd, tx - 1), __SINGLE);
269   }
270   I8(__fort_finish_descriptor)(rd);
271 }
272