1 /*$Id: base.h,v 1.40.2.4 2007/12/18 18:41:27 d3g293 Exp $ */ 2 #include "armci.h" 3 #include "gaconfig.h" 4 #include "typesf2c.h" 5 6 #ifdef MSG_COMMS_MPI 7 #include <mpi.h> 8 #include "ga-mpi.h" 9 #endif 10 11 extern int _max_global_array; 12 extern Integer GAme, GAnproc; 13 extern int GA_Default_Proc_Group; 14 extern int** GA_Update_Flags; 15 extern int* GA_Update_Signal; 16 extern short int _ga_irreg_flag; 17 extern Integer GA_Debug_flag; 18 19 #define FNAM 31 /* length of array names */ 20 #define CACHE_SIZE 512 /* size of the cache inside GA DS*/ 21 22 #ifdef __crayx1 23 #define __CRAYX1_PRAGMA _Pragma 24 #else 25 #define __CRAYX1_PRAGMA(_pragf) 26 #endif 27 28 enum data_distribution {REGULAR, BLOCK_CYCLIC, SCALAPACK, TILED, TILED_IRREG}; 29 30 typedef int ARMCI_Datatype; 31 typedef struct { 32 int mirrored; 33 int map_nproc; 34 int actv; 35 int parent; 36 int *map_proc_list; 37 int *inv_map_proc_list; 38 #ifdef MSG_COMMS_MPI 39 ARMCI_Group group; 40 #endif 41 } proc_list_t; 42 43 typedef Integer C_Integer; 44 typedef armci_size_t C_Long; 45 46 typedef struct cache_struct{ 47 int lo[MAXDIM]; 48 int hi[MAXDIM]; 49 void* cache_buf; 50 struct cache_struct *next; 51 } cache_struct_t; 52 53 typedef struct { 54 short int ndim; /* number of dimensions */ 55 short int irreg; /* 0-regular; 1-irregular distribution */ 56 int type; /* data type in array */ 57 int actv; /* activity status, GA is allocated */ 58 int actv_handle; /* handle is created */ 59 C_Long size; /* size of local data in bytes */ 60 int elemsize; /* sizeof(datatype) */ 61 int ghosts; /* flag indicating presence of ghosts */ 62 long lock; /* lock */ 63 long id; /* ID of shmem region / MA handle */ 64 C_Integer dims[MAXDIM]; /* global array dimensions */ 65 C_Integer chunk[MAXDIM]; /* chunking */ 66 int nblock[MAXDIM]; /* number of blocks per dimension in */ 67 /* processor grid */ 68 C_Integer width[MAXDIM]; /* boundary cells per dimension */ 69 C_Integer first[MAXDIM]; /* (Mirrored only) first local element */ 70 C_Integer last[MAXDIM]; /* (Mirrored only) last local element */ 71 C_Long shm_length; /* (Mirrored only) local shmem length */ 72 C_Integer lo[MAXDIM]; /* top/left corner in local patch */ 73 double scale[MAXDIM]; /* nblock/dim (precomputed) */ 74 char **ptr; /* arrays of pointers to remote data */ 75 C_Integer *mapc; /* block distribution map */ 76 char name[FNAM+1]; /* array name */ 77 int p_handle; /* pointer to processor list for array */ 78 double *cache; /* store for frequently accessed ptrs */ 79 int corner_flag; /* flag for updating corner ghost cells */ 80 int distr_type; /* tag for data distribution type */ 81 C_Integer block_dims[MAXDIM];/* array of block dimensions */ 82 C_Integer num_blocks[MAXDIM];/* number of blocks in each dimension */ 83 C_Integer block_total; /* total number of blocks in array */ 84 /* using restricted arrays */ 85 C_Integer *rstrctd_list; /* list of processors with data */ 86 C_Integer num_rstrctd; /* number of processors with data */ 87 C_Integer has_data; /* flag that processor has data */ 88 C_Integer rstrctd_id; /* rank of processor in restricted list */ 89 C_Integer *rank_rstrctd; /* ranks of processors with data */ 90 /* Properties */ 91 int property; /* property type for GA */ 92 Integer *old_mapc; /* copy of original map */ 93 int old_nblock[MAXDIM]; /* copy of original nblock array */ 94 int old_handle; /* original group handle */ 95 int old_lo[MAXDIM]; /* original lo array */ 96 int old_chunk[MAXDIM]; /* original chunk array */ 97 #ifdef ENABLE_CHECKPOINT 98 int record_id; /* record id for writing ga to disk */ 99 #endif 100 /* new */ 101 int read_cache; /* flag for read only pointer in cache */ 102 cache_struct_t *cache_head; /* linked list of cached reads */ 103 int mem_dev_set; /* flag for setting memory device */ 104 char mem_dev[FNAM+1]; /* memory device type */ 105 int overlay; /* GA uses memory from another GA */ 106 107 } global_array_t; 108 109 enum property_type { NO_PROPERTY, 110 READ_ONLY, 111 READ_CACHE /* new */ 112 }; 113 114 extern global_array_t *_ga_main_data_structure; 115 extern proc_list_t *_proc_list_main_data_structure; 116 /*\ 117 *The following statement had to be moved here because of a problem in the c 118 *compiler on SV1. The problem is that when a c file is compiled with a 119 *-htaskprivate option on SV1, all global objects are given task-private status 120 *even static variables are supposed to be initialized and given a task-private 121 *memory/status. Somehow SV1 fails to do this for global variables that are 122 *initialized during declaration. 123 *So to handle that,we cannot initialize global variables to be able to run 124 *on SV1. 125 \*/ 126 extern global_array_t *GA; 127 extern proc_list_t *PGRP_LIST; 128 129 /*\ 130 * Copy of the world communicator used by GA so that applications using MPI 131 * libraries will not use the same communicator that GA uses internally (at 132 * least for the world communictor) 133 \*/ 134 #ifdef MSG_COMMS_MPI 135 extern MPI_Comm GA_MPI_World_comm_dup; 136 #endif 137 138 #define ERR_STR_LEN 256 /* length of string for error reporting */ 139 140 /**************************** MACROS ************************************/ 141 142 143 #define ga_check_handleM(g_a, string) \ 144 {\ 145 if(GA_OFFSET+ (g_a) < 0 || GA_OFFSET+(g_a) >=_max_global_array){ \ 146 char err_string[ERR_STR_LEN]; \ 147 sprintf(err_string, "%s: INVALID ARRAY HANDLE", string); \ 148 pnga_error(err_string, (g_a)); \ 149 } \ 150 if( ! (GA[GA_OFFSET+(g_a)].actv) ){ \ 151 char err_string[ERR_STR_LEN]; \ 152 sprintf(err_string, "%s: ARRAY NOT ACTIVE", string); \ 153 pnga_error(err_string, (g_a)); \ 154 } \ 155 } 156 157 /* this macro finds coordinates of the chunk of array owned by processor proc */ 158 #define ga_ownsM_no_handle(ndim, dims, nblock, mapc, proc, lo, hi) \ 159 { \ 160 Integer _loc, _nb, _d, _index, _dim=ndim,_dimstart=0, _dimpos; \ 161 for(_nb=1, _d=0; _d<_dim; _d++)_nb *= (Integer)nblock[_d]; \ 162 if((Integer)proc > _nb - 1 || proc<0){ \ 163 __CRAYX1_PRAGMA("_CRI novector"); \ 164 for(_d=0; _d<_dim; _d++){ \ 165 lo[_d] = (Integer)0; \ 166 hi[_d] = (Integer)-1;} \ 167 } \ 168 else{ \ 169 _index = proc; \ 170 __CRAYX1_PRAGMA("_CRI novector"); \ 171 for(_d=0; _d<_dim; _d++){ \ 172 _loc = _index% (Integer)nblock[_d]; \ 173 _index /= (Integer)nblock[_d]; \ 174 _dimpos = _loc + _dimstart; /* correction to find place in mapc */\ 175 _dimstart += (Integer)nblock[_d]; \ 176 lo[_d] = (Integer)mapc[_dimpos]; \ 177 if (_loc==nblock[_d]-1) hi[_d]=dims[_d]; \ 178 else hi[_d] = mapc[_dimpos+1]-1; \ 179 } \ 180 } \ 181 } 182 183 /* this macro finds the block indices for a given block */ 184 #define gam_find_block_indices(ga_handle,nblock,index) { \ 185 int _itmp, _i; \ 186 int _ndim = GA[ga_handle].ndim; \ 187 _itmp = nblock; \ 188 index[0] = _itmp%GA[ga_handle].num_blocks[0]; \ 189 for (_i=1; _i<_ndim; _i++) { \ 190 _itmp = (_itmp-index[_i-1])/GA[ga_handle].num_blocks[_i-1]; \ 191 index[_i] = _itmp%GA[ga_handle].num_blocks[_i]; \ 192 } \ 193 } 194 195 /* this macro finds the ScaLAPACK indices for a given processor */ 196 /* gam_find_proc_indices(ga_handle,proc,index) */ 197 #define gam_find_tile_proc_indices(ga_handle,proc,index) { \ 198 Integer _itmp, _i; \ 199 Integer _ndim = GA[ga_handle].ndim; \ 200 _itmp = proc; \ 201 index[0] = _itmp%GA[ga_handle].nblock[0]; \ 202 for (_i=1; _i<_ndim; _i++) { \ 203 _itmp = (_itmp-index[_i-1])/GA[ga_handle].nblock[_i-1]; \ 204 index[_i] = _itmp%GA[ga_handle].nblock[_i]; \ 205 } \ 206 } 207 /* 208 #define gam_find_proc_indices(ga_handle,proc,index) { \ 209 Integer _itmp, _i; \ 210 Integer _ndim = GA[ga_handle].ndim; \ 211 _itmp = proc; \ 212 index[_ndim-1] = _itmp%GA[ga_handle].nblock[_ndim-1]; \ 213 for (_i=_ndim-2; _i>=0; _i--) { \ 214 _itmp = (_itmp-index[_i+1])/GA[ga_handle].nblock[_i+1]; \ 215 index[_i] = _itmp%GA[ga_handle].nblock[_i]; \ 216 } \ 217 } 218 */ 219 #define gam_find_proc_indices(ga_handle,proc,index) { \ 220 Integer _itmp, _i; \ 221 Integer _ndim = GA[ga_handle].ndim; \ 222 _itmp = proc; \ 223 index[0] = _itmp%GA[ga_handle].nblock[0]; \ 224 for (_i=1; _i<_ndim; _i++) { \ 225 _itmp = (_itmp-index[_i-1])/GA[ga_handle].nblock[_i-1]; \ 226 index[_i] = _itmp%GA[ga_handle].nblock[_i]; \ 227 } \ 228 } 229 230 /* this macro finds cordinates of the chunk of array owned by processor proc 231 * ga_handle: global array handle 232 * proc: processor (or block) index 233 * lo: lower indices of elements owned by processor (or block) 234 * hi: upper indices of elements owned by processor (or block) 235 */ 236 #define ga_ownsM(ga_handle, proc, lo, hi) \ 237 { \ 238 if (GA[ga_handle].distr_type == REGULAR) { \ 239 if (GA[ga_handle].num_rstrctd == 0) { \ 240 ga_ownsM_no_handle(GA[ga_handle].ndim, GA[ga_handle].dims, \ 241 GA[ga_handle].nblock, GA[ga_handle].mapc, \ 242 proc,lo, hi ) \ 243 } else { \ 244 if (proc < GA[ga_handle].num_rstrctd) { \ 245 ga_ownsM_no_handle(GA[ga_handle].ndim, GA[ga_handle].dims, \ 246 GA[ga_handle].nblock, GA[ga_handle].mapc, \ 247 proc,lo, hi ) \ 248 } else { \ 249 int _i; \ 250 int _ndim = GA[ga_handle].ndim; \ 251 for (_i=0; _i<_ndim; _i++) { \ 252 lo[_i] = 0; \ 253 hi[_i] = -1; \ 254 } \ 255 } \ 256 } \ 257 } else if (GA[ga_handle].distr_type == BLOCK_CYCLIC || \ 258 GA[ga_handle].distr_type == SCALAPACK || \ 259 GA[ga_handle].distr_type == TILED) { \ 260 int _index[MAXDIM]; \ 261 int _i; \ 262 int _ndim = GA[ga_handle].ndim; \ 263 gam_find_block_indices(ga_handle,proc,_index); \ 264 for (_i=0; _i<_ndim; _i++) { \ 265 lo[_i] = _index[_i]*GA[ga_handle].block_dims[_i]+1; \ 266 hi[_i] = (_index[_i]+1)*GA[ga_handle].block_dims[_i]; \ 267 if (hi[_i] > GA[ga_handle].dims[_i]) hi[_i]=GA[ga_handle].dims[_i]; \ 268 } \ 269 } else if (GA[ga_handle].distr_type == TILED_IRREG) { \ 270 int _index[MAXDIM]; \ 271 int _i; \ 272 int _ndim = GA[ga_handle].ndim; \ 273 int _offset = 0; \ 274 gam_find_block_indices(ga_handle,proc,_index); \ 275 for (_i=0; _i<_ndim; _i++) { \ 276 lo[_i] = GA[ga_handle].mapc[_offset+_index[_i]]; \ 277 if (_index[_i] < GA[ga_handle].num_blocks[_i]-1) { \ 278 hi[_i] = GA[ga_handle].mapc[_offset+_index[_i]+1]-1; \ 279 } else { \ 280 hi[_i] = GA[ga_handle].dims[_i]; \ 281 } \ 282 _offset += GA[ga_handle].num_blocks[_i]; \ 283 } \ 284 } \ 285 } 286 287 /* this macro finds the block index corresponding to a given set of indices */ 288 #define gam_find_block_from_indices(ga_handle,nblock,index) { \ 289 int _ndim = GA[ga_handle].ndim; \ 290 int _i; \ 291 nblock = index[_ndim-1]; \ 292 for (_i=_ndim-2; _i >= 0; _i--) { \ 293 nblock = nblock*GA[ga_handle].num_blocks[_i]+index[_i]; \ 294 } \ 295 } 296 297 /* this macro finds the proc that owns a given set block indices 298 using the ScaLAPACK data distribution */ 299 /* gam_find_proc_from_sl_indices(ga_handle,proc,index) */ 300 #define gam_find_tile_proc_from_indices(ga_handle,proc,index) { \ 301 int _ndim = GA[ga_handle].ndim; \ 302 int _i; \ 303 Integer _index2[MAXDIM]; \ 304 for (_i=0; _i<_ndim; _i++) { \ 305 _index2[_i] = index[_i]%GA[ga_handle].nblock[_i]; \ 306 } \ 307 proc = _index2[_ndim-1]; \ 308 for (_i=_ndim-2; _i >= 0; _i--) { \ 309 proc = proc*GA[ga_handle].nblock[_i]+_index2[_i]; \ 310 } \ 311 } 312 #define gam_find_proc_from_sl_indices(ga_handle,proc,index) { \ 313 int _ndim = GA[ga_handle].ndim; \ 314 int _i; \ 315 Integer _index2[MAXDIM]; \ 316 for (_i=0; _i<_ndim; _i++) { \ 317 _index2[_i] = index[_i]%GA[ga_handle].nblock[_i]; \ 318 } \ 319 proc = _index2[0]; \ 320 for (_i=1; _i < _ndim; _i++) { \ 321 proc = proc*GA[ga_handle].nblock[_i]+_index2[_i]; \ 322 } \ 323 } 324 325 /* this macro computes the strides on both the remote and local 326 processors that map out the data. ld and ldrem are the physical dimensions 327 of the memory on both the local and remote processors. */ 328 /* NEEDS C_INT64 CONVERSION */ 329 #define gam_setstride(ndim, size, ld, ldrem, stride_rem, stride_loc){\ 330 int _i; \ 331 stride_rem[0]= stride_loc[0] = (int)size; \ 332 __CRAYX1_PRAGMA("_CRI novector"); \ 333 for(_i=0;_i<ndim-1;_i++){ \ 334 stride_rem[_i] *= (int)ldrem[_i]; \ 335 stride_loc[_i] *= (int)ld[_i]; \ 336 stride_rem[_i+1] = stride_rem[_i]; \ 337 stride_loc[_i+1] = stride_loc[_i]; \ 338 } \ 339 } 340 341 /* Count total number of elmenents in array based on values of ndim, 342 lo, and hi */ 343 #define gam_CountElems(ndim, lo, hi, pelems){ \ 344 int _d; \ 345 __CRAYX1_PRAGMA("_CRI novector"); \ 346 for(_d=0,*pelems=1; _d< ndim;_d++) *pelems *= hi[_d]-lo[_d]+1; \ 347 } 348 349 /* NEEDS C_INT64 CONVERSION */ 350 #define gam_ComputeCount(ndim, lo, hi, count){ \ 351 int _d; \ 352 __CRAYX1_PRAGMA("_CRI novector"); \ 353 for(_d=0; _d< ndim;_d++) count[_d] = (int)(hi[_d]-lo[_d])+1; \ 354 } 355 356 #define ga_RegionError(ndim, lo, hi, val){ \ 357 int _d, _l; \ 358 const char *str= "cannot locate region: "; \ 359 char err_string[ERR_STR_LEN]; \ 360 sprintf(err_string, "%s", str); \ 361 _d=0; \ 362 _l = strlen(str); \ 363 sprintf(err_string+_l, "%s", GA[val+GA_OFFSET].name); \ 364 _l = strlen(err_string); \ 365 sprintf(err_string+_l, " [%ld:%ld ",(long)lo[_d],(long)hi[_d]); \ 366 _l=strlen(err_string); \ 367 __CRAYX1_PRAGMA("_CRI novector"); \ 368 for(_d=1; _d< ndim; _d++){ \ 369 sprintf(err_string+_l, ",%ld:%ld ",(long)lo[_d],(long)hi[_d]); \ 370 _l=strlen(err_string); \ 371 } \ 372 sprintf(err_string+_l, "%s", "]"); \ 373 _l=strlen(err_string); \ 374 pnga_error(err_string, val); \ 375 } 376 377 /*\ Just return pointer (ptr_loc) to location in memory of element with 378 * subscripts (subscript). 379 \*/ 380 #define gam_Loc_ptr(proc, g_handle, subscript, ptr_loc) \ 381 { \ 382 Integer _offset=0, _d, _w, _factor=1, _last=GA[g_handle].ndim-1; \ 383 Integer _lo[MAXDIM], _hi[MAXDIM], _p_handle, _iproc; \ 384 \ 385 ga_ownsM(g_handle, proc, _lo, _hi); \ 386 _p_handle = GA[g_handle].p_handle; \ 387 _iproc = proc; \ 388 gaCheckSubscriptM(subscript, _lo, _hi, GA[g_handle].ndim); \ 389 __CRAYX1_PRAGMA("_CRI novector"); \ 390 for(_d=0; _d < _last; _d++) { \ 391 _w = (Integer)GA[g_handle].width[_d]; \ 392 _offset += (subscript[_d]-_lo[_d]+_w) * _factor; \ 393 _factor *= _hi[_d] - _lo[_d]+1+2*_w; \ 394 } \ 395 _offset += (subscript[_last]-_lo[_last] \ 396 + (Integer)GA[g_handle].width[_last]) \ 397 * _factor; \ 398 if (_p_handle == 0) { \ 399 _iproc = PGRP_LIST[_p_handle].inv_map_proc_list[_iproc]; \ 400 } \ 401 if (GA[g_handle].num_rstrctd > 0) \ 402 _iproc = GA[g_handle].rstrctd_list[_iproc]; \ 403 *(ptr_loc) = GA[g_handle].ptr[_iproc]+_offset*GA[g_handle].elemsize; \ 404 } 405 406 #define ga_check_regionM(g_a, ilo, ihi, jlo, jhi, string){ \ 407 if (*(ilo) <= 0 || *(ihi) > GA[GA_OFFSET + *(g_a)].dims[0] || \ 408 *(jlo) <= 0 || *(jhi) > GA[GA_OFFSET + *(g_a)].dims[1] || \ 409 *(ihi) < *(ilo) || *(jhi) < *(jlo)){ \ 410 char err_string[ERR_STR_LEN]; \ 411 sprintf(err_string,"%s:req(%ld:%ld,%ld:%ld) out of range (1:%ld,1:%ld)",\ 412 string, (long)*(ilo), (long)*(ihi), (long)*(jlo), (long)*(jhi), \ 413 (long)GA[GA_OFFSET + *(g_a)].dims[0], \ 414 (long)GA[GA_OFFSET + *(g_a)].dims[1]); \ 415 pnga_error(err_string, *(g_a)); \ 416 } \ 417 } 418 419 #define gaCheckSubscriptM(subscr, lo, hi, ndim) \ 420 { \ 421 Integer _d; \ 422 __CRAYX1_PRAGMA("_CRI novector"); \ 423 for(_d=0; _d< ndim; _d++) \ 424 if( subscr[_d]< lo[_d] || subscr[_d]> hi[_d]){ \ 425 char err_string[ERR_STR_LEN]; \ 426 sprintf(err_string,"check subscript failed:%ld not in (%ld:%ld) dim=%d", \ 427 (long)subscr[_d], (long)lo[_d], (long)hi[_d], (int)_d); \ 428 pnga_error(err_string, _d); \ 429 }\ 430 } 431 432 extern void pna_access_block_grid_ptr(Integer g_a, Integer *index, void *ptr, 433 Integer ld); 434