1 /*
2  * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  *
16  */
17 
18 /** \file commopt.h
19     \brief macros, definitions, and prototypes for communications module
20 */
21 
22 #define MAXFUSE 10
23 #define MAXOMEM 40
24 typedef struct {
25   int lhs;
26   int ifast;
27   int endifast;
28   LITEMF *inner_cyclic;
29   int c_lof[7];
30   int idx[7];
31   int cb_init[7]; /* cyclic_block initilization asts */
32   int cb_do[7];
33   int cb_block[7];
34   int cb_inc[7];
35   int cb_enddo[7];
36 
37   int c_init[7]; /* cyclic initilization asts */
38   int c_inc[7];
39   int c_dupl[7];
40   int c_idx[7];
41   int c_dstt[7];
42 } CTYPE;
43 
44 typedef union {
45   struct {
46     int same;
47     int reuse;
48     int nomem;
49     int omem[MAXOMEM];
50     int omemed;
51   } getsclr;
52   struct {
53     int idx;
54     int lhs;
55     int same;
56     int call;
57   } bnd;
58   struct {
59     int alloc;
60     int lhs;
61     int rhs;
62     int sptr;
63     int sectl;
64     int sectr;
65     int uselhs;
66     int free;
67     int same;
68     int out;
69     int reuse;
70     int lhssec;
71     int notlhs;
72   } ccopy;
73   struct {
74     int alloc;
75     int lhs;
76     int rhs;
77     int vsub;
78     int nvsub;
79     int mask;
80     int sectvsub;
81     int sectnvsub;
82     int sectm;
83     int sectv[7];
84     int v[7];
85     int permute[7];
86     int vflag;
87     int pflag;
88     int vdim;
89     int pdim;
90     int nvec;
91     int nper;
92     int type;
93     int uselhs;
94     int free;
95     int same;
96     int out;
97     int reuse;
98     int indexreuse;
99     int lhssec;
100     int notlhs;
101   } cgather;
102   struct {
103     int rhs;
104     int free;
105     int same;
106     int out;
107     int reuse;
108     int type;
109     int boundary;
110   } shift;
111   struct {
112     int comm;
113     int lhs;
114     int rhs;
115     int sectl;
116     int sectr;
117     int free;
118     int same;
119     int out;
120     int alloc;
121     int ref;
122     int reuse;
123     int invmvd; /* invariant moved */
124     int type;
125     int uselhs;
126     int usedstd;
127   } cstart;
128   struct {
129     int arr;
130     int sptr;
131     int alloc;
132     int free;
133     int flag;
134     int same;
135     int out;
136     int reuse;
137   } sect;
138   struct {
139     int sptr;
140     int free;
141     int same;
142     int out;
143     int reuse;
144     int used;
145     int ptasgn;
146   } alloc;
147   struct {
148     int sptr;
149     int ncall;
150     LITEMF *call;
151     int pos;
152   } call;
153   struct {
154     int sectl;
155     int nrt;
156     LITEMF *rtl;
157     int nmcall;
158     LITEMF *mcall;
159     int nscall;
160     LITEMF *scall;
161     int npcall;
162     LITEMF *pcall;
163     int nmget;
164     LITEMF *mget;
165     int nsget;
166     LITEMF *sget;
167     CTYPE *cyclic;
168     int fuselp[7][MAXFUSE];
169     int fusedstd[7][MAXFUSE];
170     int nfuse[7];
171     int header;
172     int barr1;
173     int barr2;
174     int fg; /* fg node */
175     int ignore : 1;
176     int fused : 1;
177   } forall;
178 } FTABLE;
179 
180 typedef struct {
181   int fall;
182   int std;
183   FTABLE f;
184 } FT;
185 
186 typedef struct {
187   FT *base;
188   int size;
189   int avl;
190 } FTB;
191 
192 extern FTB ftb;
193 
194 #define FT_STD(i) ftb.base[i].std
195 #define FT_FORALL(i) ftb.base[i].fall
196 
197 #define FT_GETSCLR_SAME(i) ftb.base[i].f.getsclr.same
198 #define FT_GETSCLR_REUSE(i) ftb.base[i].f.getsclr.reuse
199 #define FT_GETSCLR_NOMEM(i) ftb.base[i].f.getsclr.nomem
200 #define FT_GETSCLR_OMEM(i, j) ftb.base[i].f.getsclr.omem[j]
201 #define FT_GETSCLR_OMEMED(i) ftb.base[i].f.getsclr.omemed
202 
203 #define FT_BND_LHS(i) ftb.base[i].f.bnd.lhs
204 #define FT_BND_IDX(i) ftb.base[i].f.bnd.idx
205 #define FT_BND_SAME(i) ftb.base[i].f.bnd.same
206 #define FT_BND_CALL(i) ftb.base[i].f.bnd.call
207 
208 #define FT_ALLOC_SPTR(i) ftb.base[i].f.alloc.sptr
209 #define FT_ALLOC_FREE(i) ftb.base[i].f.alloc.free
210 #define FT_ALLOC_SAME(i) ftb.base[i].f.alloc.same
211 #define FT_ALLOC_OUT(i) ftb.base[i].f.alloc.out
212 #define FT_ALLOC_REUSE(i) ftb.base[i].f.alloc.reuse
213 #define FT_ALLOC_USED(i) ftb.base[i].f.alloc.used
214 #define FT_ALLOC_PTASGN(i) ftb.base[i].f.alloc.ptasgn
215 
216 #define FT_SECT_ARR(i) ftb.base[i].f.sect.arr
217 #define FT_SECT_SPTR(i) ftb.base[i].f.sect.sptr
218 #define FT_SECT_ALLOC(i) ftb.base[i].f.sect.alloc
219 #define FT_SECT_FREE(i) ftb.base[i].f.sect.free
220 #define FT_SECT_FLAG(i) ftb.base[i].f.sect.flag
221 #define FT_SECT_SAME(i) ftb.base[i].f.sect.same
222 #define FT_SECT_OUT(i) ftb.base[i].f.sect.out
223 #define FT_SECT_REUSE(i) ftb.base[i].f.sect.reuse
224 
225 #define FT_CCOPY_ALLOC(i) ftb.base[i].f.ccopy.alloc
226 #define FT_CCOPY_LHS(i) ftb.base[i].f.ccopy.lhs
227 #define FT_CCOPY_RHS(i) ftb.base[i].f.ccopy.rhs
228 #define FT_CCOPY_TSPTR(i) ftb.base[i].f.ccopy.sptr
229 #define FT_CCOPY_SECTL(i) ftb.base[i].f.ccopy.sectl
230 #define FT_CCOPY_SECTR(i) ftb.base[i].f.ccopy.sectr
231 #define FT_CCOPY_USELHS(i) ftb.base[i].f.ccopy.uselhs
232 #define FT_CCOPY_FREE(i) ftb.base[i].f.ccopy.free
233 #define FT_CCOPY_SAME(i) ftb.base[i].f.ccopy.same
234 #define FT_CCOPY_OUT(i) ftb.base[i].f.ccopy.out
235 #define FT_CCOPY_REUSE(i) ftb.base[i].f.ccopy.reuse
236 #define FT_CCOPY_LHSSEC(i) ftb.base[i].f.ccopy.lhssec
237 #define FT_CCOPY_NOTLHS(i) ftb.base[i].f.ccopy.notlhs
238 
239 #define FT_CGATHER_ALLOC(i) ftb.base[i].f.cgather.alloc
240 #define FT_CGATHER_LHS(i) ftb.base[i].f.cgather.lhs
241 #define FT_CGATHER_RHS(i) ftb.base[i].f.cgather.rhs
242 #define FT_CGATHER_VSUB(i) ftb.base[i].f.cgather.vsub
243 #define FT_CGATHER_NVSUB(i) ftb.base[i].f.cgather.nvsub
244 #define FT_CGATHER_MASK(i) ftb.base[i].f.cgather.mask
245 #define FT_CGATHER_SECTVSUB(i) ftb.base[i].f.cgather.sectvsub
246 #define FT_CGATHER_SECTNVSUB(i) ftb.base[i].f.cgather.sectnvsub
247 #define FT_CGATHER_SECTM(i) ftb.base[i].f.cgather.sectm
248 #define FT_CGATHER_SECTV(i, j) ftb.base[i].f.cgather.sectv[j]
249 #define FT_CGATHER_V(i, j) ftb.base[i].f.cgather.v[j]
250 #define FT_CGATHER_PERMUTE(i, j) ftb.base[i].f.cgather.permute[j]
251 #define FT_CGATHER_VFLAG(i) ftb.base[i].f.cgather.vflag
252 #define FT_CGATHER_PFLAG(i) ftb.base[i].f.cgather.pflag
253 #define FT_CGATHER_VDIM(i) ftb.base[i].f.cgather.vdim
254 #define FT_CGATHER_PDIM(i) ftb.base[i].f.cgather.pdim
255 #define FT_CGATHER_NVEC(i) ftb.base[i].f.cgather.nvec
256 #define FT_CGATHER_NPER(i) ftb.base[i].f.cgather.nper
257 #define FT_CGATHER_TYPE(i) ftb.base[i].f.cgather.type
258 #define FT_CGATHER_USELHS(i) ftb.base[i].f.cgather.uselhs
259 #define FT_CGATHER_FREE(i) ftb.base[i].f.cgather.free
260 #define FT_CGATHER_SAME(i) ftb.base[i].f.cgather.same
261 #define FT_CGATHER_OUT(i) ftb.base[i].f.cgather.out
262 #define FT_CGATHER_REUSE(i) ftb.base[i].f.cgather.reuse
263 #define FT_CGATHER_INDEXREUSE(i) ftb.base[i].f.cgather.indexreuse
264 #define FT_CGATHER_LHSSEC(i) ftb.base[i].f.cgather.lhssec
265 #define FT_CGATHER_NOTLHS(i) ftb.base[i].f.cgather.notlhs
266 
267 #define FT_SHIFT_RHS(i) ftb.base[i].f.shift.rhs
268 #define FT_SHIFT_FREE(i) ftb.base[i].f.shift.free
269 #define FT_SHIFT_SAME(i) ftb.base[i].f.shift.same
270 #define FT_SHIFT_OUT(i) ftb.base[i].f.shift.out
271 #define FT_SHIFT_REUSE(i) ftb.base[i].f.shift.reuse
272 #define FT_SHIFT_TYPE(i) ftb.base[i].f.shift.type
273 #define FT_SHIFT_BOUNDARY(i) ftb.base[i].f.shift.boundary
274 
275 #define FT_CSTART_COMM(i) ftb.base[i].f.cstart.comm
276 #define FT_CSTART_LHS(i) ftb.base[i].f.cstart.lhs
277 #define FT_CSTART_RHS(i) ftb.base[i].f.cstart.rhs
278 #define FT_CSTART_SECTL(i) ftb.base[i].f.cstart.sectl
279 #define FT_CSTART_SECTR(i) ftb.base[i].f.cstart.sectr
280 #define FT_CSTART_FREE(i) ftb.base[i].f.cstart.free
281 #define FT_CSTART_SAME(i) ftb.base[i].f.cstart.same
282 #define FT_CSTART_OUT(i) ftb.base[i].f.cstart.out
283 #define FT_CSTART_ALLOC(i) ftb.base[i].f.cstart.alloc
284 #define FT_CSTART_REF(i) ftb.base[i].f.cstart.ref
285 #define FT_CSTART_REUSE(i) ftb.base[i].f.cstart.reuse
286 #define FT_CSTART_INVMVD(i) ftb.base[i].f.cstart.invmvd
287 #define FT_CSTART_TYPE(i) ftb.base[i].f.cstart.type
288 #define FT_CSTART_USELHS(i) ftb.base[i].f.cstart.uselhs
289 #define FT_CSTART_USEDSTD(i) ftb.base[i].f.cstart.usedstd
290 
291 #define FT_CALL_SPTR(i) ftb.base[i].f.call.sptr
292 #define FT_CALL_NCALL(i) ftb.base[i].f.call.ncall
293 #define FT_CALL_CALL(i) ftb.base[i].f.call.call
294 #define FT_CALL_POS(i) ftb.base[i].f.call.pos
295 
296 #define FT_NRT(i) ftb.base[i].f.forall.nrt
297 #define FT_RTL(i) ftb.base[i].f.forall.rtl
298 #define FT_NMCALL(i) ftb.base[i].f.forall.nmcall
299 #define FT_MCALL(i) ftb.base[i].f.forall.mcall
300 #define FT_NSCALL(i) ftb.base[i].f.forall.nscall
301 #define FT_SCALL(i) ftb.base[i].f.forall.scall
302 #define FT_NPCALL(i) ftb.base[i].f.forall.npcall
303 #define FT_PCALL(i) ftb.base[i].f.forall.pcall
304 #define FT_NMGET(i) ftb.base[i].f.forall.nmget
305 #define FT_MGET(i) ftb.base[i].f.forall.mget
306 #define FT_NSGET(i) ftb.base[i].f.forall.nsget
307 #define FT_SGET(i) ftb.base[i].f.forall.sget
308 #define FT_IGNORE(i) ftb.base[i].f.forall.ignore
309 #define FT_CYCLIC(i) ftb.base[i].f.forall.cyclic
310 #define FT_SECTL(i) ftb.base[i].f.forall.sectl
311 #define FT_NFUSE(i, j) ftb.base[i].f.forall.nfuse[j]
312 #define FT_FUSELP(i, j, k) ftb.base[i].f.forall.fuselp[(j)][(k)]
313 #define FT_FUSEDSTD(i, j, k) ftb.base[i].f.forall.fusedstd[(j)][(k)]
314 #define FT_FUSED(i) ftb.base[i].f.forall.fused
315 #define FT_HEADER(i) ftb.base[i].f.forall.header
316 #define FT_BARR1(i) ftb.base[i].f.forall.barr1
317 #define FT_BARR2(i) ftb.base[i].f.forall.barr2
318 #define FT_FG(i) ftb.base[i].f.forall.fg
319 
320 typedef struct {
321   int fuse;
322   int bnd;
323   int alloc;
324   int sect;
325   int copysection;
326   int gatherx;
327   int scatterx;
328   int shift;
329   int start;
330 } OPTSUM;
331 
332 extern OPTSUM optsum;
333 
334 #define BOGUSFLAG 0x100
335 #define NOTSECTFLAG 0 /* remove this flag, 0x200 */
336 #define NOREINDEX 0x2000000
337 
338 extern void add_loop_hd(int);
339 extern LOGICAL same_forall_size(int, int, int);
340 extern void comm_analyze(void); /* comm.c */
341 extern void comm_optimize_post(void);
342 extern void comm_invar(void);     /* comminvar.c */
343 extern void comm_generator(void); /* commgen.c */
344 extern void comm_optimize_pre(void);
345