1 /* 2 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 * 16 */ 17 18 /** \file commopt.h 19 \brief macros, definitions, and prototypes for communications module 20 */ 21 22 #define MAXFUSE 10 23 #define MAXOMEM 40 24 typedef struct { 25 int lhs; 26 int ifast; 27 int endifast; 28 LITEMF *inner_cyclic; 29 int c_lof[7]; 30 int idx[7]; 31 int cb_init[7]; /* cyclic_block initilization asts */ 32 int cb_do[7]; 33 int cb_block[7]; 34 int cb_inc[7]; 35 int cb_enddo[7]; 36 37 int c_init[7]; /* cyclic initilization asts */ 38 int c_inc[7]; 39 int c_dupl[7]; 40 int c_idx[7]; 41 int c_dstt[7]; 42 } CTYPE; 43 44 typedef union { 45 struct { 46 int same; 47 int reuse; 48 int nomem; 49 int omem[MAXOMEM]; 50 int omemed; 51 } getsclr; 52 struct { 53 int idx; 54 int lhs; 55 int same; 56 int call; 57 } bnd; 58 struct { 59 int alloc; 60 int lhs; 61 int rhs; 62 int sptr; 63 int sectl; 64 int sectr; 65 int uselhs; 66 int free; 67 int same; 68 int out; 69 int reuse; 70 int lhssec; 71 int notlhs; 72 } ccopy; 73 struct { 74 int alloc; 75 int lhs; 76 int rhs; 77 int vsub; 78 int nvsub; 79 int mask; 80 int sectvsub; 81 int sectnvsub; 82 int sectm; 83 int sectv[7]; 84 int v[7]; 85 int permute[7]; 86 int vflag; 87 int pflag; 88 int vdim; 89 int pdim; 90 int nvec; 91 int nper; 92 int type; 93 int uselhs; 94 int free; 95 int same; 96 int out; 97 int reuse; 98 int indexreuse; 99 int lhssec; 100 int notlhs; 101 } cgather; 102 struct { 103 int rhs; 104 int free; 105 int same; 106 int out; 107 int reuse; 108 int type; 109 int boundary; 110 } shift; 111 struct { 112 int comm; 113 int lhs; 114 int rhs; 115 int sectl; 116 int sectr; 117 int free; 118 int same; 119 int out; 120 int alloc; 121 int ref; 122 int reuse; 123 int invmvd; /* invariant moved */ 124 int type; 125 int uselhs; 126 int usedstd; 127 } cstart; 128 struct { 129 int arr; 130 int sptr; 131 int alloc; 132 int free; 133 int flag; 134 int same; 135 int out; 136 int reuse; 137 } sect; 138 struct { 139 int sptr; 140 int free; 141 int same; 142 int out; 143 int reuse; 144 int used; 145 int ptasgn; 146 } alloc; 147 struct { 148 int sptr; 149 int ncall; 150 LITEMF *call; 151 int pos; 152 } call; 153 struct { 154 int sectl; 155 int nrt; 156 LITEMF *rtl; 157 int nmcall; 158 LITEMF *mcall; 159 int nscall; 160 LITEMF *scall; 161 int npcall; 162 LITEMF *pcall; 163 int nmget; 164 LITEMF *mget; 165 int nsget; 166 LITEMF *sget; 167 CTYPE *cyclic; 168 int fuselp[7][MAXFUSE]; 169 int fusedstd[7][MAXFUSE]; 170 int nfuse[7]; 171 int header; 172 int barr1; 173 int barr2; 174 int fg; /* fg node */ 175 int ignore : 1; 176 int fused : 1; 177 } forall; 178 } FTABLE; 179 180 typedef struct { 181 int fall; 182 int std; 183 FTABLE f; 184 } FT; 185 186 typedef struct { 187 FT *base; 188 int size; 189 int avl; 190 } FTB; 191 192 extern FTB ftb; 193 194 #define FT_STD(i) ftb.base[i].std 195 #define FT_FORALL(i) ftb.base[i].fall 196 197 #define FT_GETSCLR_SAME(i) ftb.base[i].f.getsclr.same 198 #define FT_GETSCLR_REUSE(i) ftb.base[i].f.getsclr.reuse 199 #define FT_GETSCLR_NOMEM(i) ftb.base[i].f.getsclr.nomem 200 #define FT_GETSCLR_OMEM(i, j) ftb.base[i].f.getsclr.omem[j] 201 #define FT_GETSCLR_OMEMED(i) ftb.base[i].f.getsclr.omemed 202 203 #define FT_BND_LHS(i) ftb.base[i].f.bnd.lhs 204 #define FT_BND_IDX(i) ftb.base[i].f.bnd.idx 205 #define FT_BND_SAME(i) ftb.base[i].f.bnd.same 206 #define FT_BND_CALL(i) ftb.base[i].f.bnd.call 207 208 #define FT_ALLOC_SPTR(i) ftb.base[i].f.alloc.sptr 209 #define FT_ALLOC_FREE(i) ftb.base[i].f.alloc.free 210 #define FT_ALLOC_SAME(i) ftb.base[i].f.alloc.same 211 #define FT_ALLOC_OUT(i) ftb.base[i].f.alloc.out 212 #define FT_ALLOC_REUSE(i) ftb.base[i].f.alloc.reuse 213 #define FT_ALLOC_USED(i) ftb.base[i].f.alloc.used 214 #define FT_ALLOC_PTASGN(i) ftb.base[i].f.alloc.ptasgn 215 216 #define FT_SECT_ARR(i) ftb.base[i].f.sect.arr 217 #define FT_SECT_SPTR(i) ftb.base[i].f.sect.sptr 218 #define FT_SECT_ALLOC(i) ftb.base[i].f.sect.alloc 219 #define FT_SECT_FREE(i) ftb.base[i].f.sect.free 220 #define FT_SECT_FLAG(i) ftb.base[i].f.sect.flag 221 #define FT_SECT_SAME(i) ftb.base[i].f.sect.same 222 #define FT_SECT_OUT(i) ftb.base[i].f.sect.out 223 #define FT_SECT_REUSE(i) ftb.base[i].f.sect.reuse 224 225 #define FT_CCOPY_ALLOC(i) ftb.base[i].f.ccopy.alloc 226 #define FT_CCOPY_LHS(i) ftb.base[i].f.ccopy.lhs 227 #define FT_CCOPY_RHS(i) ftb.base[i].f.ccopy.rhs 228 #define FT_CCOPY_TSPTR(i) ftb.base[i].f.ccopy.sptr 229 #define FT_CCOPY_SECTL(i) ftb.base[i].f.ccopy.sectl 230 #define FT_CCOPY_SECTR(i) ftb.base[i].f.ccopy.sectr 231 #define FT_CCOPY_USELHS(i) ftb.base[i].f.ccopy.uselhs 232 #define FT_CCOPY_FREE(i) ftb.base[i].f.ccopy.free 233 #define FT_CCOPY_SAME(i) ftb.base[i].f.ccopy.same 234 #define FT_CCOPY_OUT(i) ftb.base[i].f.ccopy.out 235 #define FT_CCOPY_REUSE(i) ftb.base[i].f.ccopy.reuse 236 #define FT_CCOPY_LHSSEC(i) ftb.base[i].f.ccopy.lhssec 237 #define FT_CCOPY_NOTLHS(i) ftb.base[i].f.ccopy.notlhs 238 239 #define FT_CGATHER_ALLOC(i) ftb.base[i].f.cgather.alloc 240 #define FT_CGATHER_LHS(i) ftb.base[i].f.cgather.lhs 241 #define FT_CGATHER_RHS(i) ftb.base[i].f.cgather.rhs 242 #define FT_CGATHER_VSUB(i) ftb.base[i].f.cgather.vsub 243 #define FT_CGATHER_NVSUB(i) ftb.base[i].f.cgather.nvsub 244 #define FT_CGATHER_MASK(i) ftb.base[i].f.cgather.mask 245 #define FT_CGATHER_SECTVSUB(i) ftb.base[i].f.cgather.sectvsub 246 #define FT_CGATHER_SECTNVSUB(i) ftb.base[i].f.cgather.sectnvsub 247 #define FT_CGATHER_SECTM(i) ftb.base[i].f.cgather.sectm 248 #define FT_CGATHER_SECTV(i, j) ftb.base[i].f.cgather.sectv[j] 249 #define FT_CGATHER_V(i, j) ftb.base[i].f.cgather.v[j] 250 #define FT_CGATHER_PERMUTE(i, j) ftb.base[i].f.cgather.permute[j] 251 #define FT_CGATHER_VFLAG(i) ftb.base[i].f.cgather.vflag 252 #define FT_CGATHER_PFLAG(i) ftb.base[i].f.cgather.pflag 253 #define FT_CGATHER_VDIM(i) ftb.base[i].f.cgather.vdim 254 #define FT_CGATHER_PDIM(i) ftb.base[i].f.cgather.pdim 255 #define FT_CGATHER_NVEC(i) ftb.base[i].f.cgather.nvec 256 #define FT_CGATHER_NPER(i) ftb.base[i].f.cgather.nper 257 #define FT_CGATHER_TYPE(i) ftb.base[i].f.cgather.type 258 #define FT_CGATHER_USELHS(i) ftb.base[i].f.cgather.uselhs 259 #define FT_CGATHER_FREE(i) ftb.base[i].f.cgather.free 260 #define FT_CGATHER_SAME(i) ftb.base[i].f.cgather.same 261 #define FT_CGATHER_OUT(i) ftb.base[i].f.cgather.out 262 #define FT_CGATHER_REUSE(i) ftb.base[i].f.cgather.reuse 263 #define FT_CGATHER_INDEXREUSE(i) ftb.base[i].f.cgather.indexreuse 264 #define FT_CGATHER_LHSSEC(i) ftb.base[i].f.cgather.lhssec 265 #define FT_CGATHER_NOTLHS(i) ftb.base[i].f.cgather.notlhs 266 267 #define FT_SHIFT_RHS(i) ftb.base[i].f.shift.rhs 268 #define FT_SHIFT_FREE(i) ftb.base[i].f.shift.free 269 #define FT_SHIFT_SAME(i) ftb.base[i].f.shift.same 270 #define FT_SHIFT_OUT(i) ftb.base[i].f.shift.out 271 #define FT_SHIFT_REUSE(i) ftb.base[i].f.shift.reuse 272 #define FT_SHIFT_TYPE(i) ftb.base[i].f.shift.type 273 #define FT_SHIFT_BOUNDARY(i) ftb.base[i].f.shift.boundary 274 275 #define FT_CSTART_COMM(i) ftb.base[i].f.cstart.comm 276 #define FT_CSTART_LHS(i) ftb.base[i].f.cstart.lhs 277 #define FT_CSTART_RHS(i) ftb.base[i].f.cstart.rhs 278 #define FT_CSTART_SECTL(i) ftb.base[i].f.cstart.sectl 279 #define FT_CSTART_SECTR(i) ftb.base[i].f.cstart.sectr 280 #define FT_CSTART_FREE(i) ftb.base[i].f.cstart.free 281 #define FT_CSTART_SAME(i) ftb.base[i].f.cstart.same 282 #define FT_CSTART_OUT(i) ftb.base[i].f.cstart.out 283 #define FT_CSTART_ALLOC(i) ftb.base[i].f.cstart.alloc 284 #define FT_CSTART_REF(i) ftb.base[i].f.cstart.ref 285 #define FT_CSTART_REUSE(i) ftb.base[i].f.cstart.reuse 286 #define FT_CSTART_INVMVD(i) ftb.base[i].f.cstart.invmvd 287 #define FT_CSTART_TYPE(i) ftb.base[i].f.cstart.type 288 #define FT_CSTART_USELHS(i) ftb.base[i].f.cstart.uselhs 289 #define FT_CSTART_USEDSTD(i) ftb.base[i].f.cstart.usedstd 290 291 #define FT_CALL_SPTR(i) ftb.base[i].f.call.sptr 292 #define FT_CALL_NCALL(i) ftb.base[i].f.call.ncall 293 #define FT_CALL_CALL(i) ftb.base[i].f.call.call 294 #define FT_CALL_POS(i) ftb.base[i].f.call.pos 295 296 #define FT_NRT(i) ftb.base[i].f.forall.nrt 297 #define FT_RTL(i) ftb.base[i].f.forall.rtl 298 #define FT_NMCALL(i) ftb.base[i].f.forall.nmcall 299 #define FT_MCALL(i) ftb.base[i].f.forall.mcall 300 #define FT_NSCALL(i) ftb.base[i].f.forall.nscall 301 #define FT_SCALL(i) ftb.base[i].f.forall.scall 302 #define FT_NPCALL(i) ftb.base[i].f.forall.npcall 303 #define FT_PCALL(i) ftb.base[i].f.forall.pcall 304 #define FT_NMGET(i) ftb.base[i].f.forall.nmget 305 #define FT_MGET(i) ftb.base[i].f.forall.mget 306 #define FT_NSGET(i) ftb.base[i].f.forall.nsget 307 #define FT_SGET(i) ftb.base[i].f.forall.sget 308 #define FT_IGNORE(i) ftb.base[i].f.forall.ignore 309 #define FT_CYCLIC(i) ftb.base[i].f.forall.cyclic 310 #define FT_SECTL(i) ftb.base[i].f.forall.sectl 311 #define FT_NFUSE(i, j) ftb.base[i].f.forall.nfuse[j] 312 #define FT_FUSELP(i, j, k) ftb.base[i].f.forall.fuselp[(j)][(k)] 313 #define FT_FUSEDSTD(i, j, k) ftb.base[i].f.forall.fusedstd[(j)][(k)] 314 #define FT_FUSED(i) ftb.base[i].f.forall.fused 315 #define FT_HEADER(i) ftb.base[i].f.forall.header 316 #define FT_BARR1(i) ftb.base[i].f.forall.barr1 317 #define FT_BARR2(i) ftb.base[i].f.forall.barr2 318 #define FT_FG(i) ftb.base[i].f.forall.fg 319 320 typedef struct { 321 int fuse; 322 int bnd; 323 int alloc; 324 int sect; 325 int copysection; 326 int gatherx; 327 int scatterx; 328 int shift; 329 int start; 330 } OPTSUM; 331 332 extern OPTSUM optsum; 333 334 #define BOGUSFLAG 0x100 335 #define NOTSECTFLAG 0 /* remove this flag, 0x200 */ 336 #define NOREINDEX 0x2000000 337 338 extern void add_loop_hd(int); 339 extern LOGICAL same_forall_size(int, int, int); 340 extern void comm_analyze(void); /* comm.c */ 341 extern void comm_optimize_post(void); 342 extern void comm_invar(void); /* comminvar.c */ 343 extern void comm_generator(void); /* commgen.c */ 344 extern void comm_optimize_pre(void); 345