1 /*$Id: base.h,v 1.40.2.4 2007/12/18 18:41:27 d3g293 Exp $ */
2 #include "armci.h"
3 #include "gaconfig.h"
4 #include "typesf2c.h"
5 
6 #ifdef MSG_COMMS_MPI
7 #include <mpi.h>
8 #include "ga-mpi.h"
9 #endif
10 
11 extern int _max_global_array;
12 extern Integer GAme, GAnproc;
13 extern int GA_Default_Proc_Group;
14 extern int** GA_Update_Flags;
15 extern int* GA_Update_Signal;
16 extern short int _ga_irreg_flag;
17 extern Integer GA_Debug_flag;
18 
19 #define FNAM        31              /* length of array names   */
20 #define CACHE_SIZE  512             /* size of the cache inside GA DS*/
21 
22 #ifdef __crayx1
23 #define __CRAYX1_PRAGMA _Pragma
24 #else
25 #define __CRAYX1_PRAGMA(_pragf)
26 #endif
27 
28 enum data_distribution {REGULAR, BLOCK_CYCLIC, SCALAPACK, TILED, TILED_IRREG};
29 
30 typedef int ARMCI_Datatype;
31 typedef struct {
32        int mirrored;
33        int map_nproc;
34        int actv;
35        int parent;
36        int *map_proc_list;
37        int *inv_map_proc_list;
38 #ifdef MSG_COMMS_MPI
39        ARMCI_Group group;
40 #endif
41 } proc_list_t;
42 
43 typedef Integer C_Integer;
44 typedef armci_size_t C_Long;
45 
46 typedef struct cache_struct{
47   int lo[MAXDIM];
48   int hi[MAXDIM];
49   void* cache_buf;
50   struct cache_struct *next;
51 } cache_struct_t;
52 
53 typedef struct {
54        short int  ndim;             /* number of dimensions                 */
55        short int  irreg;            /* 0-regular; 1-irregular distribution  */
56        int  type;                   /* data type in array                   */
57        int  actv;                   /* activity status, GA is allocated     */
58        int  actv_handle;            /* handle is created                    */
59        C_Long   size;               /* size of local data in bytes          */
60        int  elemsize;               /* sizeof(datatype)                     */
61        int  ghosts;                 /* flag indicating presence of ghosts   */
62        long lock;                   /* lock                                 */
63        long id;                     /* ID of shmem region / MA handle       */
64        C_Integer  dims[MAXDIM];     /* global array dimensions              */
65        C_Integer  chunk[MAXDIM];    /* chunking                             */
66        int  nblock[MAXDIM];         /* number of blocks per dimension in    */
67                                     /* processor grid                       */
68        C_Integer  width[MAXDIM];    /* boundary cells per dimension         */
69        C_Integer  first[MAXDIM];    /* (Mirrored only) first local element  */
70        C_Integer  last[MAXDIM];     /* (Mirrored only) last local element   */
71        C_Long  shm_length;          /* (Mirrored only) local shmem length   */
72        C_Integer  lo[MAXDIM];       /* top/left corner in local patch       */
73        double scale[MAXDIM];        /* nblock/dim (precomputed)             */
74        char **ptr;                  /* arrays of pointers to remote data    */
75        C_Integer  *mapc;            /* block distribution map               */
76        char name[FNAM+1];           /* array name                           */
77        int p_handle;                /* pointer to processor list for array  */
78        double *cache;               /* store for frequently accessed ptrs   */
79        int corner_flag;             /* flag for updating corner ghost cells */
80        int distr_type;              /* tag for data distribution type       */
81        C_Integer block_dims[MAXDIM];/* array of block dimensions            */
82        C_Integer num_blocks[MAXDIM];/* number of blocks in each dimension   */
83        C_Integer block_total;       /* total number of blocks in array      */
84                                     /* using restricted arrays              */
85        C_Integer *rstrctd_list;     /* list of processors with data         */
86        C_Integer num_rstrctd;       /* number of processors with data       */
87        C_Integer has_data;          /* flag that processor has data         */
88        C_Integer rstrctd_id;        /* rank of processor in restricted list */
89        C_Integer *rank_rstrctd;     /* ranks of processors with data        */
90                                     /* Properties                           */
91        int property;                /* property type for GA                 */
92        Integer *old_mapc;           /* copy of original map                 */
93        int old_nblock[MAXDIM];      /* copy of original nblock array        */
94        int old_handle;              /* original group handle                */
95        int old_lo[MAXDIM];          /* original lo array                    */
96        int old_chunk[MAXDIM];       /* original chunk array                 */
97 #ifdef ENABLE_CHECKPOINT
98        int record_id;               /* record id for writing ga to disk     */
99 #endif
100        /* new */
101        int read_cache;              /* flag for read only pointer in cache  */
102        cache_struct_t *cache_head;  /* linked list of cached reads          */
103        int mem_dev_set;             /* flag for setting memory device       */
104        char mem_dev[FNAM+1];        /* memory device type                   */
105        int overlay;                 /* GA uses memory from another GA       */
106 
107 } global_array_t;
108 
109 enum property_type { NO_PROPERTY,
110                      READ_ONLY,
111                      READ_CACHE /* new */
112 };
113 
114 extern global_array_t *_ga_main_data_structure;
115 extern proc_list_t *_proc_list_main_data_structure;
116 /*\
117  *The following statement had to be moved here because of a problem in the c
118  *compiler on SV1. The problem is that when a c file is compiled with a
119  *-htaskprivate option on SV1, all global objects are given task-private status
120  *even static variables are supposed to be initialized and given a task-private
121  *memory/status. Somehow SV1 fails to do this for global variables that are
122  *initialized during declaration.
123  *So to handle that,we cannot initialize global variables to be able to run
124  *on SV1.
125 \*/
126 extern global_array_t *GA;
127 extern proc_list_t *PGRP_LIST;
128 
129 /*\
130  * Copy of the world communicator used by GA so that applications using MPI
131  * libraries will not use the same communicator that GA uses internally (at
132  * least for the world communictor)
133 \*/
134 #ifdef MSG_COMMS_MPI
135 extern MPI_Comm GA_MPI_World_comm_dup;
136 #endif
137 
138 #define ERR_STR_LEN 256               /* length of string for error reporting */
139 
140 /**************************** MACROS ************************************/
141 
142 
143 #define ga_check_handleM(g_a, string) \
144 {\
145     if(GA_OFFSET+ (g_a) < 0 || GA_OFFSET+(g_a) >=_max_global_array){   \
146       char err_string[ERR_STR_LEN];                                    \
147       sprintf(err_string, "%s: INVALID ARRAY HANDLE", string);         \
148       pnga_error(err_string, (g_a));                                   \
149     }                                                                  \
150     if( ! (GA[GA_OFFSET+(g_a)].actv) ){                                \
151       char err_string[ERR_STR_LEN];                                    \
152       sprintf(err_string, "%s: ARRAY NOT ACTIVE", string);             \
153       pnga_error(err_string, (g_a));                                   \
154     }                                                                  \
155 }
156 
157 /* this macro finds coordinates of the chunk of array owned by processor proc */
158 #define ga_ownsM_no_handle(ndim, dims, nblock, mapc, proc, lo, hi)             \
159 {                                                                              \
160    Integer _loc, _nb, _d, _index, _dim=ndim,_dimstart=0, _dimpos;              \
161    for(_nb=1, _d=0; _d<_dim; _d++)_nb *= (Integer)nblock[_d];                  \
162    if((Integer)proc > _nb - 1 || proc<0){                                      \
163       __CRAYX1_PRAGMA("_CRI novector");                                        \
164            for(_d=0; _d<_dim; _d++){                                           \
165          lo[_d] = (Integer)0;                                                  \
166          hi[_d] = (Integer)-1;}                                                \
167    }                                                                           \
168    else{                                                                       \
169          _index = proc;                                                        \
170       __CRAYX1_PRAGMA("_CRI novector");                                        \
171          for(_d=0; _d<_dim; _d++){                                             \
172              _loc = _index% (Integer)nblock[_d];                               \
173              _index  /= (Integer)nblock[_d];                                   \
174              _dimpos = _loc + _dimstart; /* correction to find place in mapc */\
175              _dimstart += (Integer)nblock[_d];                                 \
176              lo[_d] = (Integer)mapc[_dimpos];                                  \
177              if (_loc==nblock[_d]-1) hi[_d]=dims[_d];                          \
178              else hi[_d] = mapc[_dimpos+1]-1;                                  \
179          }                                                                     \
180    }                                                                           \
181 }
182 
183 /* this macro finds the block indices for a given block */
184 #define gam_find_block_indices(ga_handle,nblock,index) {                       \
185   int _itmp, _i;                                                               \
186   int _ndim = GA[ga_handle].ndim;                                              \
187   _itmp = nblock;                                                              \
188   index[0] = _itmp%GA[ga_handle].num_blocks[0];                                \
189   for (_i=1; _i<_ndim; _i++) {                                                 \
190     _itmp = (_itmp-index[_i-1])/GA[ga_handle].num_blocks[_i-1];                \
191     index[_i] = _itmp%GA[ga_handle].num_blocks[_i];                            \
192   }                                                                            \
193 }
194 
195 /* this macro finds the ScaLAPACK indices for a given processor */
196 /* gam_find_proc_indices(ga_handle,proc,index) */
197 #define gam_find_tile_proc_indices(ga_handle,proc,index) {                     \
198   Integer _itmp, _i;                                                           \
199   Integer _ndim = GA[ga_handle].ndim;                                          \
200   _itmp = proc;                                                                \
201   index[0] = _itmp%GA[ga_handle].nblock[0];                                    \
202   for (_i=1; _i<_ndim; _i++) {                                                 \
203     _itmp = (_itmp-index[_i-1])/GA[ga_handle].nblock[_i-1];                    \
204     index[_i] = _itmp%GA[ga_handle].nblock[_i];                                \
205   }                                                                            \
206 }
207 /*
208 #define gam_find_proc_indices(ga_handle,proc,index) {                          \
209   Integer _itmp, _i;                                                           \
210   Integer _ndim = GA[ga_handle].ndim;                                          \
211   _itmp = proc;                                                                \
212   index[_ndim-1] = _itmp%GA[ga_handle].nblock[_ndim-1];                        \
213   for (_i=_ndim-2; _i>=0; _i--) {                                              \
214     _itmp = (_itmp-index[_i+1])/GA[ga_handle].nblock[_i+1];                    \
215     index[_i] = _itmp%GA[ga_handle].nblock[_i];                                \
216   }                                                                            \
217 }
218 */
219 #define gam_find_proc_indices(ga_handle,proc,index) {                          \
220   Integer _itmp, _i;                                                           \
221   Integer _ndim = GA[ga_handle].ndim;                                          \
222   _itmp = proc;                                                                \
223   index[0] = _itmp%GA[ga_handle].nblock[0];                        \
224   for (_i=1; _i<_ndim; _i++) {                                              \
225     _itmp = (_itmp-index[_i-1])/GA[ga_handle].nblock[_i-1];                    \
226     index[_i] = _itmp%GA[ga_handle].nblock[_i];                                \
227   }                                                                            \
228 }
229 
230 /* this macro finds cordinates of the chunk of array owned by processor proc
231  * ga_handle: global array handle
232  * proc: processor (or block) index
233  * lo: lower indices of elements owned by processor (or block)
234  * hi: upper indices of elements owned by processor (or block)
235  */
236 #define ga_ownsM(ga_handle, proc, lo, hi)                                      \
237 {                                                                              \
238   if (GA[ga_handle].distr_type == REGULAR) {                                   \
239     if (GA[ga_handle].num_rstrctd == 0) {                                      \
240       ga_ownsM_no_handle(GA[ga_handle].ndim, GA[ga_handle].dims,               \
241                          GA[ga_handle].nblock, GA[ga_handle].mapc,             \
242                          proc,lo, hi )                                         \
243     } else {                                                                   \
244       if (proc < GA[ga_handle].num_rstrctd) {                                  \
245         ga_ownsM_no_handle(GA[ga_handle].ndim, GA[ga_handle].dims,             \
246                            GA[ga_handle].nblock, GA[ga_handle].mapc,           \
247                            proc,lo, hi )                                       \
248       } else {                                                                 \
249         int _i;                                                                \
250         int _ndim = GA[ga_handle].ndim;                                        \
251         for (_i=0; _i<_ndim; _i++) {                                           \
252           lo[_i] = 0;                                                          \
253           hi[_i] = -1;                                                         \
254         }                                                                      \
255       }                                                                        \
256     }                                                                          \
257   } else if (GA[ga_handle].distr_type == BLOCK_CYCLIC ||                       \
258       GA[ga_handle].distr_type == SCALAPACK ||                                 \
259       GA[ga_handle].distr_type == TILED) {                                     \
260     int _index[MAXDIM];                                                        \
261     int _i;                                                                    \
262     int _ndim = GA[ga_handle].ndim;                                            \
263     gam_find_block_indices(ga_handle,proc,_index);                             \
264     for (_i=0; _i<_ndim; _i++) {                                               \
265       lo[_i] = _index[_i]*GA[ga_handle].block_dims[_i]+1;                      \
266       hi[_i] = (_index[_i]+1)*GA[ga_handle].block_dims[_i];                    \
267       if (hi[_i] > GA[ga_handle].dims[_i]) hi[_i]=GA[ga_handle].dims[_i];      \
268     }                                                                          \
269   } else if (GA[ga_handle].distr_type == TILED_IRREG) {                        \
270     int _index[MAXDIM];                                                        \
271     int _i;                                                                    \
272     int _ndim = GA[ga_handle].ndim;                                            \
273     int _offset = 0;                                                           \
274     gam_find_block_indices(ga_handle,proc,_index);                             \
275     for (_i=0; _i<_ndim; _i++) {                                               \
276       lo[_i] = GA[ga_handle].mapc[_offset+_index[_i]];                         \
277       if (_index[_i] < GA[ga_handle].num_blocks[_i]-1) {                       \
278         hi[_i] = GA[ga_handle].mapc[_offset+_index[_i]+1]-1;                   \
279       } else {                                                                 \
280         hi[_i] = GA[ga_handle].dims[_i];                                     \
281       }                                                                        \
282       _offset += GA[ga_handle].num_blocks[_i];                                   \
283     }                                                                          \
284   }                                                                            \
285 }
286 
287 /* this macro finds the block index corresponding to a given set of indices */
288 #define gam_find_block_from_indices(ga_handle,nblock,index) {                  \
289   int _ndim = GA[ga_handle].ndim;                                              \
290   int _i;                                                                      \
291   nblock = index[_ndim-1];                                                     \
292   for (_i=_ndim-2; _i >= 0; _i--) {                                            \
293     nblock  = nblock*GA[ga_handle].num_blocks[_i]+index[_i];                   \
294   }                                                                            \
295 }
296 
297 /* this macro finds the proc that owns a given set block indices
298    using the ScaLAPACK data distribution */
299 /* gam_find_proc_from_sl_indices(ga_handle,proc,index) */
300 #define gam_find_tile_proc_from_indices(ga_handle,proc,index) {                \
301   int _ndim = GA[ga_handle].ndim;                                              \
302   int _i;                                                                      \
303   Integer _index2[MAXDIM];                                                     \
304   for (_i=0; _i<_ndim; _i++) {                                                 \
305     _index2[_i] = index[_i]%GA[ga_handle].nblock[_i];                          \
306   }                                                                            \
307   proc = _index2[_ndim-1];                                                     \
308   for (_i=_ndim-2; _i >= 0; _i--) {                                            \
309     proc = proc*GA[ga_handle].nblock[_i]+_index2[_i];                          \
310   }                                                                            \
311 }
312 #define gam_find_proc_from_sl_indices(ga_handle,proc,index) {                  \
313   int _ndim = GA[ga_handle].ndim;                                              \
314   int _i;                                                                      \
315   Integer _index2[MAXDIM];                                                     \
316   for (_i=0; _i<_ndim; _i++) {                                                 \
317     _index2[_i] = index[_i]%GA[ga_handle].nblock[_i];                          \
318   }                                                                            \
319   proc = _index2[0];                                                           \
320   for (_i=1; _i < _ndim; _i++) {                                               \
321     proc = proc*GA[ga_handle].nblock[_i]+_index2[_i];                          \
322   }                                                                            \
323 }
324 
325 /* this macro computes the strides on both the remote and local
326    processors that map out the data. ld and ldrem are the physical dimensions
327    of the memory on both the local and remote processors. */
328 /* NEEDS C_INT64 CONVERSION */
329 #define gam_setstride(ndim, size, ld, ldrem, stride_rem, stride_loc){\
330   int _i;                                                            \
331   stride_rem[0]= stride_loc[0] = (int)size;                          \
332   __CRAYX1_PRAGMA("_CRI novector");                                  \
333   for(_i=0;_i<ndim-1;_i++){                                          \
334     stride_rem[_i] *= (int)ldrem[_i];                                \
335     stride_loc[_i] *= (int)ld[_i];                                   \
336       stride_rem[_i+1] = stride_rem[_i];                             \
337       stride_loc[_i+1] = stride_loc[_i];                             \
338   }                                                                  \
339 }
340 
341 /* Count total number of elmenents in array based on values of ndim,
342       lo, and hi */
343 #define gam_CountElems(ndim, lo, hi, pelems){                        \
344   int _d;                                                            \
345   __CRAYX1_PRAGMA("_CRI novector");                                         \
346   for(_d=0,*pelems=1; _d< ndim;_d++)  *pelems *= hi[_d]-lo[_d]+1;    \
347 }
348 
349 /* NEEDS C_INT64 CONVERSION */
350 #define gam_ComputeCount(ndim, lo, hi, count){                       \
351   int _d;                                                            \
352   __CRAYX1_PRAGMA("_CRI novector");                                         \
353   for(_d=0; _d< ndim;_d++) count[_d] = (int)(hi[_d]-lo[_d])+1;       \
354 }
355 
356 #define ga_RegionError(ndim, lo, hi, val){                           \
357   int _d, _l;                                                        \
358   const char *str= "cannot locate region: ";                         \
359   char err_string[ERR_STR_LEN];                                      \
360   sprintf(err_string, "%s", str);                                    \
361   _d=0;                                                              \
362   _l = strlen(str);                                                  \
363   sprintf(err_string+_l, "%s", GA[val+GA_OFFSET].name);              \
364   _l = strlen(err_string);                                           \
365   sprintf(err_string+_l, " [%ld:%ld ",(long)lo[_d],(long)hi[_d]);    \
366   _l=strlen(err_string);                                             \
367   __CRAYX1_PRAGMA("_CRI novector");                                  \
368   for(_d=1; _d< ndim; _d++){                                         \
369     sprintf(err_string+_l, ",%ld:%ld ",(long)lo[_d],(long)hi[_d]);   \
370     _l=strlen(err_string);                                           \
371   }                                                                  \
372   sprintf(err_string+_l, "%s", "]");                                 \
373   _l=strlen(err_string);                                             \
374   pnga_error(err_string, val);                                       \
375 }
376 
377 /*\ Just return pointer (ptr_loc) to location in memory of element with
378  *  subscripts (subscript).
379 \*/
380 #define gam_Loc_ptr(proc, g_handle,  subscript, ptr_loc)                      \
381 {                                                                             \
382 Integer _offset=0, _d, _w, _factor=1, _last=GA[g_handle].ndim-1;              \
383 Integer _lo[MAXDIM], _hi[MAXDIM], _p_handle, _iproc;                          \
384                                                                               \
385       ga_ownsM(g_handle, proc, _lo, _hi);                                     \
386       _p_handle = GA[g_handle].p_handle;                                      \
387       _iproc = proc;                                                          \
388       gaCheckSubscriptM(subscript, _lo, _hi, GA[g_handle].ndim);              \
389   __CRAYX1_PRAGMA("_CRI novector");                                           \
390       for(_d=0; _d < _last; _d++)            {                                \
391           _w = (Integer)GA[g_handle].width[_d];                               \
392           _offset += (subscript[_d]-_lo[_d]+_w) * _factor;                    \
393           _factor *= _hi[_d] - _lo[_d]+1+2*_w;                                \
394       }                                                                       \
395       _offset += (subscript[_last]-_lo[_last]                                 \
396                + (Integer)GA[g_handle].width[_last])                          \
397                * _factor;                                                     \
398       if (_p_handle == 0) {                                                   \
399         _iproc = PGRP_LIST[_p_handle].inv_map_proc_list[_iproc];              \
400       }                                                                       \
401       if (GA[g_handle].num_rstrctd > 0)                                       \
402         _iproc = GA[g_handle].rstrctd_list[_iproc];                           \
403       *(ptr_loc) =  GA[g_handle].ptr[_iproc]+_offset*GA[g_handle].elemsize;   \
404 }
405 
406 #define ga_check_regionM(g_a, ilo, ihi, jlo, jhi, string){                     \
407    if (*(ilo) <= 0 || *(ihi) > GA[GA_OFFSET + *(g_a)].dims[0] ||               \
408        *(jlo) <= 0 || *(jhi) > GA[GA_OFFSET + *(g_a)].dims[1] ||               \
409        *(ihi) < *(ilo) ||  *(jhi) < *(jlo)){                                   \
410        char err_string[ERR_STR_LEN];                                           \
411        sprintf(err_string,"%s:req(%ld:%ld,%ld:%ld) out of range (1:%ld,1:%ld)",\
412                string, (long)*(ilo), (long)*(ihi), (long)*(jlo), (long)*(jhi), \
413                (long)GA[GA_OFFSET + *(g_a)].dims[0],                           \
414                (long)GA[GA_OFFSET + *(g_a)].dims[1]);                          \
415        pnga_error(err_string, *(g_a));                                         \
416    }                                                                           \
417 }
418 
419 #define gaCheckSubscriptM(subscr, lo, hi, ndim)                                \
420 {                                                                              \
421 Integer _d;                                                                    \
422   __CRAYX1_PRAGMA("_CRI novector");                                            \
423    for(_d=0; _d<  ndim; _d++)                                                  \
424       if( subscr[_d]<  lo[_d] ||  subscr[_d]>  hi[_d]){                        \
425         char err_string[ERR_STR_LEN];                                          \
426         sprintf(err_string,"check subscript failed:%ld not in (%ld:%ld) dim=%d", \
427                   (long)subscr[_d],  (long)lo[_d],  (long)hi[_d], (int)_d);    \
428           pnga_error(err_string, _d);                                          \
429       }\
430 }
431 
432 extern void pna_access_block_grid_ptr(Integer g_a, Integer *index, void *ptr,
433     Integer ld);
434