1 /*
2 * Copyright (c) 1995-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *
16 */
17
18 /* clang-format off */
19
20 #include "stdioInterf.h"
21 #include "fioMacros.h"
22
23 /* spread intrinsic -- copy sections for ncopies into appropriate
24 dimensions */
25
ENTFTN(SPREAD,spread)26 void ENTFTN(SPREAD, spread)(void *rb, /* result base */
27 void *sb, /* source base */
28 void *dimb, /* dimension base */
29 void *ncopiesb, /* ncopies base */
30 F90_Desc *rd, /* result descriptor */
31 F90_Desc *sd, /* source descriptor */
32 F90_Desc *dimd, /* dimension descriptor */
33 F90_Desc *ncopiesd) /* ncopies descriptor */
34 {
35 char *rp, *sp;
36 chdr *c;
37 DECL_DIM_PTRS(rdd);
38 DECL_HDR_VARS(td);
39 int k, dim, ncopies;
40 __INT_T i, rank, rx, tx;
41 __INT_T olb[MAXDIMS], oub[MAXDIMS];
42 __INT_T flags, lbase, pbase, repli, scoff;
43
44 dim = I8(__fort_fetch_int)(dimb, dimd);
45 ncopies = I8(__fort_fetch_int)(ncopiesb, ncopiesd);
46
47 /* form temporary descriptor with a scalar subscript in the spread
48 dimension */
49
50 rank = F90_RANK_G(rd) - 1;
51 __DIST_INIT_SECTION(td, rank, rd);
52
53 /* set up non-spread dimensions */
54
55 for (tx = 0, rx = 1; rx <= F90_RANK_G(rd); ++rx) {
56 if (rx != dim) {
57 SET_DIM_PTRS(rdd, rd, rx - 1);
58 I8(__fort_set_section)(td, ++tx, rd, rx, F90_DPTR_LBOUND_G(rdd),
59 DPTR_UBOUND_G(rdd), 1);
60 }
61 }
62
63 flags = F90_FLAGS_G(td); /* save descriptor fields */
64 lbase = F90_LBASE_G(td);
65 pbase = DIST_PBASE_G(td);
66 repli = DIST_REPLICATED_G(td);
67 scoff = DIST_SCOFF_G(td);
68 for (i = rank; --i >= 0;) {
69 olb[i] = DIST_DIM_OLB_G(td, i);
70 oub[i] = DIST_DIM_OUB_G(td, i);
71 }
72
73 sp = (char *)sb + DIST_SCOFF_G(sd) * F90_LEN_G(sd);
74 SET_DIM_PTRS(rdd, rd, dim - 1);
75 for (k = 0; k < ncopies; ++k) {
76
77 /* set scalar subscript in spread dimension */
78
79 I8(__fort_set_single)((td), rd, dim, F90_DPTR_LBOUND_G(rdd) + k, __SCALAR);
80 I8(__fort_finish_section)((td));
81
82 rp = (char *)rb + DIST_SCOFF_G(td) * F90_LEN_G(td);
83 c = I8(__fort_copy)(rp, sp, td, sd, NULL);
84 __fort_doit(c);
85 __fort_frechn(c);
86
87 F90_FLAGS_P(td, flags); /* restore descriptor fields */
88 F90_LBASE_P(td, lbase);
89 DIST_PBASE_P(td, pbase);
90 DIST_REPLICATED_P(td, repli);
91 DIST_SCOFF_P(td, scoff);
92 for (i = rank; --i >= 0;) {
93 DIST_DIM_OLB_P(td, i, olb[i]);
94 DIST_DIM_OUB_P(td, i, oub[i]);
95 }
96 DIST_CACHED_P(td, 0);
97 }
98 }
99
100 /* spread of a scalar - copy the scalar to a rank 1 array, ignore 'dim' */
ENTFTN(SPREADSA,spreadsa)101 void ENTFTN(SPREADSA, spreadsa)(void *rb, /* result base */
102 void *sb, /* source base */
103 void *dimb, /* dimension base */
104 void *ncopiesb, /* ncopies base */
105 __CLEN_T *szb, /* sizeof source base */
106 F90_Desc *rd, /* result descriptor */
107 F90_Desc *sd, /* source descriptor */
108 F90_Desc *dimd, /* dimension descriptor */
109 F90_Desc *ncopiesd, /* ncopies descriptor */
110 F90_Desc *szd) /* sizeof source descriptor */
111 {
112 char *rp;
113 int ncopies;
114 __CLEN_T size;
115
116 /* we assume that result is replicated and contiguous */
117
118 ncopies = I8(__fort_fetch_int)(ncopiesb, ncopiesd);
119 size = *szb;
120 rp = (char *)rb;
121 while (ncopies-- > 0) {
122 __fort_bcopy(rp, sb, size);
123 rp = rp + size;
124 }
125 }
126 /* 32 bit CLEN version */
ENTFTN(SPREADS,spreads)127 void ENTFTN(SPREADS, spreads)(void *rb, /* result base */
128 void *sb, /* source base */
129 void *dimb, /* dimension base */
130 void *ncopiesb, /* ncopies base */
131 __INT_T *szb, /* sizeof source base */
132 F90_Desc *rd, /* result descriptor */
133 F90_Desc *sd, /* source descriptor */
134 F90_Desc *dimd, /* dimension descriptor */
135 F90_Desc *ncopiesd, /* ncopies descriptor */
136 F90_Desc *szd) /* sizeof source descriptor */
137 {
138 ENTFTN(SPREADSA, spreadsa)(rb, sb, dimb, ncopiesb, (__CLEN_T *)szb, rd, sd,
139 dimd, ncopiesd, szd);
140 }
141
ENTFTN(SPREADCA,spreadca)142 void ENTFTN(SPREADCA, spreadca)(DCHAR(rb), /* result char base */
143 DCHAR(sb), /* source char base */
144 void *dimb, /* dimension base */
145 void *ncopiesb, /* ncopies base */
146 F90_Desc *rd, /* result descriptor */
147 F90_Desc *sd, /* source descriptor */
148 F90_Desc *dimd, /* ncopies descriptor */
149 F90_Desc *ncopiesd /* dimension descriptor */
150 DCLEN64(rb) /* result char len */
151 DCLEN64(sb)) /* source char len */
152 {
153 ENTFTN(SPREAD,spread)(CADR(rb), CADR(sb), dimb, ncopiesb,
154 rd, sd, dimd, ncopiesd);
155 }
156 /* 32 bit CLEN version */
ENTFTN(SPREADC,spreadc)157 void ENTFTN(SPREADC, spreadc)(DCHAR(rb), /* result char base */
158 DCHAR(sb), /* source char base */
159 void *dimb, /* dimension base */
160 void *ncopiesb, /* ncopies base */
161 F90_Desc *rd, /* result descriptor */
162 F90_Desc *sd, /* source descriptor */
163 F90_Desc *dimd, /* ncopies descriptor */
164 F90_Desc *ncopiesd /* dimension descriptor */
165 DCLEN(rb) /* result char len */
166 DCLEN(sb)) /* source char len */
167 {
168 ENTFTN(SPREADCA, spreadca)(CADR(rb), CADR(sb), dimb, ncopiesb, rd, sd, dimd,
169 ncopiesd, (__CLEN_T)CLEN(rb), (__CLEN_T)CLEN(sb));
170 }
171
172 /* spread of a character scalar - copy the scalar to a rank 1 array, ignore
173 * 'dim' */
ENTFTN(SPREADCSA,spreadcsa)174 void ENTFTN(SPREADCSA,
175 spreadcsa)(DCHAR(rb), /* result char base */
176 DCHAR(sb), /* source char base */
177 void *dimb, /* dimension base */
178 void *ncopiesb, /* ncopies base */
179 __CLEN_T *szb, /* sizeof source base - 0 for spreadcs */
180 F90_Desc *rd, /* result descriptor */
181 F90_Desc *sd, /* source descriptor */
182 F90_Desc *dimd, /* ncopies descriptor */
183 F90_Desc *ncopiesd, /* dimension descriptor */
184 F90_Desc *szd /* sizeof source descriptor */
185 DCLEN64(rb) /* result char len */
186 DCLEN64(sb)) /* source char len */
187 {
188 __CLEN_T size;
189 size = CLEN(sb);
190 ENTFTN(SPREADS,spreads)(CADR(rb), CADR(sb), dimb, ncopiesb, &size,
191 rd, sd, dimd, ncopiesd, szd);
192 }
193 /* 32 bit CLEN version */
ENTFTN(SPREADCS,spreadcs)194 void ENTFTN(SPREADCS,
195 spreadcs)(DCHAR(rb), /* result char base */
196 DCHAR(sb), /* source char base */
197 void *dimb, /* dimension base */
198 void *ncopiesb, /* ncopies base */
199 __INT_T *szb, /* sizeof source base - 0 for spreadcs */
200 F90_Desc *rd, /* result descriptor */
201 F90_Desc *sd, /* source descriptor */
202 F90_Desc *dimd, /* ncopies descriptor */
203 F90_Desc *ncopiesd, /* dimension descriptor */
204 F90_Desc *szd /* sizeof source descriptor */
205 DCLEN(rb) /* result char len */
206 DCLEN(sb)) /* source char len */
207 {
208 ENTFTN(SPREADCSA, spreadcsa)(CADR(rb), CADR(sb), dimb, ncopiesb,
209 (__CLEN_T *)szb, rd, sd, dimd, ncopiesd, szd, (__CLEN_T)CLEN(rb),
210 (__CLEN_T)CLEN(sb));
211 }
212
213 /* set up result descriptor for spread intrinsic -- used when the dim
214 arg is variable. the added spread dimension is given a collapsed
215 distribution and the remaining dimensions are aligned with the
216 corresponding source dimensions. lbounds are set to 1 and overlap
217 allowances are set to 0. */
218
ENTFTN(SPREAD_DESCRIPTOR,spread_descriptor)219 void ENTFTN(SPREAD_DESCRIPTOR,
220 spread_descriptor)(F90_Desc *rd, /* result descriptor */
221 F90_Desc *sd, /* source descriptor */
222 __INT_T *dimb, /* dimension */
223 __INT_T *ncopiesb) /* ncopies base */
224 {
225 DECL_DIM_PTRS(rdd);
226 DECL_DIM_PTRS(sdd);
227 DECL_HDR_PTRS(td);
228 __INT_T dim, extent, m, ncopies, offset, rx, sx, tx;
229
230 #if defined(DEBUG)
231 if (F90_TAG_G(sd) != __DESC)
232 __fort_abort("SPREAD: invalid source arg");
233 #endif
234
235 dim = *dimb;
236 if (dim < 1 || dim > F90_RANK_G(sd) + 1)
237 __fort_abort("SPREAD: invalid dim");
238
239 ncopies = *ncopiesb;
240 if (ncopies < 0)
241 ncopies = 0;
242
243 td = DIST_ALIGN_TARGET_G(sd);
244 __DIST_INIT_DESCRIPTOR(rd, F90_RANK_G(sd) + 1, F90_KIND_G(sd), F90_LEN_G(sd),
245 F90_FLAGS_G(sd), td);
246 for (rx = sx = 1; sx <= F90_RANK_G(sd); ++rx, ++sx) {
247 if (sx == dim)
248 ++rx;
249 SET_DIM_PTRS(sdd, sd, sx - 1);
250 extent = F90_DPTR_EXTENT_G(sdd);
251 offset = DIST_DPTR_TSTRIDE_G(sdd) * (F90_DPTR_LBOUND_G(sdd) - 1) +
252 DIST_DPTR_TOFFSET_G(sdd);
253
254 /*
255 * added &DIST_DIM_GEN_BLOCK_G(td,(DIST_DPTR_TAXIS_G(sdd))-1) arg
256 */
257
258 I8(__fort_set_alignment)(rd, rx, 1, extent, DIST_DPTR_TAXIS_G(sdd),
259 DIST_DPTR_TSTRIDE_G(sdd), offset,
260 &DIST_DIM_GEN_BLOCK_G(td,(DIST_DPTR_TAXIS_G(sdd))-1));
261 __DIST_SET_ALLOCATION(rd, rx, 0, 0);
262 }
263 I8(__fort_set_alignment)(rd, dim, 1, ncopies, 0, 1, 0);
264 __DIST_SET_ALLOCATION(rd, dim, 0, 0);
265 m = DIST_SINGLE_G(sd);
266 for (tx = 1; m > 0; ++tx, m >>= 1) {
267 if (m & 1)
268 I8(__fort_set_single)(rd, td, tx, DIST_INFO_G(sd, tx - 1), __SINGLE);
269 }
270 I8(__fort_finish_descriptor)(rd);
271 }
272