1 #if HAVE_CONFIG_H
2 #   include "config.h"
3 #endif
4 
5 /* $Id: memory.c,v 1.56.2.3 2007-04-25 23:49:55 d3p687 Exp $ */
6 #if HAVE_STDIO_H
7 #   include <stdio.h>
8 #endif
9 #if HAVE_ASSERT_H
10 #   include <assert.h>
11 #endif
12 #include "armcip.h"
13 #include "message.h"
14 #include "kr_malloc.h"
15 
16 #define DEBUG_ 0
17 #define USE_MALLOC
18 #define USE_SHMEM_
19 #define SHM_UNIT 1024
20 
21 #if defined(CRAY_SHMEM)
22 extern void armci_shmalloc_exchange_address(void **ptr_arr);
23 extern void armci_shmalloc_exchange_offsets(context_t *);
24 #  if defined(CRAY_XT)
25 #  include <mpp/shmem.h>
26 #    ifdef CATAMOUNT
27 #      include <catamount/data.h>
28 #    endif
29 #  endif
30 #endif
31 
32 static context_t ctx_localmem;
33 #if defined(PORTALS_WITHREG) || defined(PORTALS) || defined(ALLOW_PIN)
34 static context_t ctx_mlocalmem;
35 #endif
36 
37 #if defined(SYSV) || defined(WIN32) || defined(MMAP) || defined(HITACHI)
38 #include "armci_shmem.h"
39 
40 #if !defined(USE_SHMEM) && (defined(HITACHI) || defined(MULTI_CTX))
41 #    define USE_SHMEM
42 #endif
43 
44 #if !(defined(LAPI)||defined(QUADRICS)||defined(SERVER_THREAD)) ||\
45     defined(USE_SHMEM)
46 #define RMA_NEEDS_SHMEM
47 #endif
48 
49 #if defined(DATA_SERVER) && !defined(SERVER_THREAD)
50 extern int _armci_server_started;
51 #endif
52 
53 /****************************************************************************
54  * Memory Allocator called by kr_malloc on SGI Altix to get more core from OS
55  */
56 #ifdef SGIALTIX
57 
58 #include <mpp/shmem.h>
59 #if HAVE_UNISTD_H
60 #   include <unistd.h>
61 #endif
62 
63 #define DEF_UNITS (64)
64 #define MAX_SEGS  512
65 
66 #define _SHMMAX_ALTIX     32*1024  /* 32 MB */
67 #define _SHMMAX_ALTIX_GRP 512*1024 /* 512 MB */
68 
69 static  context_t altix_ctx_shmem;
70 static  context_t altix_ctx_shmem_grp;
71 static  size_t altix_pagesize;
72 extern void armci_memoffset_table_newentry(void *ptr, size_t seg_size);
73 
armci_altix_allocate(size_t bytes)74 void *armci_altix_allocate(size_t bytes)
75 {
76     void *ptr, *sptr;
77     ARMCI_PR_DBG("enter",0);
78     sptr=ptr= shmalloc(bytes);
79     if(sptr == NULL) armci_die("armci_altix_allocate: shmalloc failed\n",
80                                armci_me);
81     armci_memoffset_table_newentry(ptr, bytes);
82 #if 0
83     if(ptr){  /* touch each page to establish ownership */
84        int i;
85        for(i=0; i< bytes/altix_pagesize; i++){
86            *(double*)ptr=0.;
87            ((char*)ptr) += altix_pagesize;
88        }
89     }
90 #endif
91     ARMCI_PR_DBG("exit",0);
92     return sptr;
93 }
94 
armci_altix_shm_init()95 void armci_altix_shm_init()
96 {
97     ARMCI_PR_DBG("enter",0);
98     altix_pagesize = getpagesize();
99     kr_malloc_init(SHM_UNIT, _SHMMAX_ALTIX, 0,
100                    armci_altix_allocate, 0, &altix_ctx_shmem);
101     kr_malloc_init(SHM_UNIT, _SHMMAX_ALTIX_GRP, _SHMMAX_ALTIX_GRP,
102                    armci_altix_allocate, 0, &altix_ctx_shmem_grp);
103     /* allocate a huge segment for groups. When kr_malloc() is called for
104      the first time for this altix_ctx_shmem_grp context with some minimal
105      size of 8 bytes, a huge segment of size (SHM_UNIT*_SHMMAX_ALTIX_GRP)
106      will be created */
107     {
108        void *ptr;
109        ptr=kr_malloc((size_t)8, &altix_ctx_shmem_grp);
110        if(ptr==NULL)
111            armci_die("armci_altix_shm_init(): malloc failed", armci_me);
112     }
113     ARMCI_PR_DBG("exit",0);
114 }
115 
armci_altix_shm_malloc(void * ptr_arr[],armci_size_t bytes)116 void armci_altix_shm_malloc(void *ptr_arr[], armci_size_t bytes)
117 {
118     long size=bytes;
119     void *ptr;
120     int i;
121     ARMCI_PR_DBG("enter",0);
122     armci_msg_lgop(&size,1,"max");
123     ptr=kr_malloc((size_t)size, &altix_ctx_shmem);
124     bzero(ptr_arr,(armci_nproc)*sizeof(void*));
125     ptr_arr[armci_me] = ptr;
126     if(size!=0 && ptr==NULL)
127        armci_die("armci_altix_shm_malloc(): malloc failed", armci_me);
128     for(i=0; i< armci_nproc; i++) if(i!=armci_me) ptr_arr[i]=shmem_ptr(ptr,i);
129     ARMCI_PR_DBG("exit",0);
130 }
131 
132 #ifdef MSG_COMMS_MPI
armci_altix_shm_malloc_group(void * ptr_arr[],armci_size_t bytes,ARMCI_Group * group)133 void armci_altix_shm_malloc_group(void *ptr_arr[], armci_size_t bytes,
134                                   ARMCI_Group *group) {
135     long size=bytes;
136     void *ptr;
137     int i,grp_me, grp_nproc;
138     armci_grp_attr_t *grp_attr=ARMCI_Group_getattr(group);
139     ARMCI_PR_DBG("enter",0);
140 
141     ARMCI_Group_size(group, &grp_nproc);
142     ARMCI_Group_rank(group, &grp_me);
143     armci_msg_group_lgop(&size,1,"max",group);
144     ptr=kr_malloc((size_t)size, &altix_ctx_shmem_grp);
145     if(size!=0 && ptr==NULL)
146        armci_die("armci_altix_shm_malloc_group(): malloc failed for groups. Increase _SHMMAX_ALTIX_GRP", armci_me);
147     bzero(ptr_arr,(grp_nproc)*sizeof(void*));
148     ptr_arr[grp_me] = ptr;
149     for(i=0; i< grp_nproc; i++) if(i!=grp_me) ptr_arr[i]=shmem_ptr(ptr,ARMCI_Absolute_id(group, i));
150     ARMCI_PR_DBG("exit",0);
151 }
152 #endif
153 
154 #endif /* end ifdef SGIALTIX */
155 /* ------------------ End Altix memory allocator ----------------- */
156 
kr_check_local()157 void kr_check_local()
158 {
159 #if 0
160 kr_malloc_print_stats(&ctx_localmem);
161 #endif
162 kr_malloc_verify(&ctx_localmem);
163 #if defined(PORTALS_WITHREG)
164 kr_malloc_verify(&ctx_mlocalmem);
165 #endif
166 }
167 
armci_print_ptr(void ** ptr_arr,int bytes,int size,void * myptr,int off)168 void  armci_print_ptr(void **ptr_arr, int bytes, int size, void* myptr, int off)
169 {
170 int i;
171 int nproc = armci_clus_info[armci_clus_me].nslave;
172 
173     ARMCI_PR_DBG("enter",0);
174     for(i=0; i< armci_nproc; i++){
175       int j;
176       if(armci_me ==i){
177         printf("%d master =%d nproc=%d off=%d\n",armci_me,
178                armci_master,nproc, off);
179         printf("%d:bytes=%d mptr=%p s=%d ",armci_me, bytes, myptr,size);
180         for(j = 0; j< armci_nproc; j++)printf(" %p",ptr_arr[j]);
181         printf("\n"); fflush(stdout);
182       }
183       armci_msg_barrier();
184     }
185     ARMCI_PR_DBG("exit",0);
186 }
187 
188 
189 /*\ master exports its address of shmem region at the beggining of that region
190 \*/
armci_master_exp_attached_ptr(void * ptr)191 static void armci_master_exp_attached_ptr(void* ptr)
192 {
193     ARMCI_PR_DBG("enter",0);
194     if(!ptr) armci_die("armci_master_exp_att_ptr: null ptr",0);
195     *(volatile void**)ptr = ptr;
196     ARMCI_PR_DBG("exit",0);
197 }
198 
199 
200 /*\ Collective Memory Allocation on shared memory systems
201 \*/
armci_shmem_malloc(void * ptr_arr[],armci_size_t bytes)202 void armci_shmem_malloc(void *ptr_arr[], armci_size_t bytes)
203 {
204     void *myptr=NULL, *ptr=NULL;
205     long idlist[SHMIDLEN];
206     long size=0, offset=0;
207     long *size_arr;
208     void **ptr_ref_arr;
209     int  i,cn, len;
210     int  nproc = armci_clus_info[armci_clus_me].nslave;
211     ARMCI_PR_DBG("enter",0);
212     bzero((char*)ptr_arr,armci_nproc*sizeof(void*));
213 
214     /* allocate work arrays */
215     size_arr = (long*)calloc(armci_nproc,sizeof(long));
216     if(!size_arr)armci_die("armci_malloc:calloc failed",armci_nproc);
217 
218     /* allocate arrays for cluster address translations */
219 #   if defined(DATA_SERVER)
220        len = armci_nclus;
221 #   else
222        len = nproc;
223 #   endif
224 
225     ptr_ref_arr = calloc(len,sizeof(void*)); /* must be zero */
226     if(!ptr_ref_arr)armci_die("armci_malloc:calloc 2 failed",len);
227 
228     /* combine all memory requests into size_arr  */
229     size_arr[armci_me] = bytes;
230     armci_msg_lgop(size_arr, armci_nproc, "+");
231 
232     /* determine aggregate request size on the cluster node */
233     for(i=0, size=0; i< nproc; i++) size += size_arr[i+armci_master];
234 
235     /* master process creates shmem region and then others attach to it */
236     if(armci_me == armci_master ){
237 
238        /* can malloc if there is no data server process and has 1 process/node*/
239 #      ifndef RMA_NEEDS_SHMEM
240              if(nproc == 1)
241                 myptr = kr_malloc(size, &ctx_localmem);
242              else
243 #      endif
244                 myptr = Create_Shared_Region(idlist+1,size,idlist);
245        if(!myptr && size>0 )armci_die("armci_malloc: could not create", (int)(size>>10));
246 
247        /* place its address at begining of attached region for others to see */
248        if(size)armci_master_exp_attached_ptr(myptr);
249 
250        if(DEBUG_){
251          printf("%d:armci_malloc addr mptr=%p size=%ld\n",armci_me,myptr,size);
252          fflush(stdout);
253        }
254     }
255 
256     /* broadcast shmem id to other processes on the same cluster node */
257     armci_msg_clus_brdcst(idlist, SHMIDLEN*sizeof(long));
258 
259     if(armci_me != armci_master){
260        myptr=(double*)Attach_Shared_Region(idlist+1,size,idlist[0]);
261        if(!myptr)armci_die("armci_malloc: could not attach", (int)(size>>10));
262 
263        /* now every process in a SMP node needs to find out its offset
264         * w.r.t. master - this offset is necessary to use memlock table
265         */
266        if(size) armci_set_mem_offset(myptr);
267        if(DEBUG_){
268           printf("%d:armci_malloc attached addr mptr=%p ref=%p size=%ld\n",
269                  armci_me,myptr, *(void**)myptr,size); fflush(stdout);
270        }
271     }
272 #   ifdef HITACHI
273         armci_register_shmem(myptr,size,idlist+1,idlist[0],ptr_ref_arr[armci_clus_me]);
274 #   endif
275 #   if defined(DATA_SERVER)
276 
277        /* get server reference address for every cluster node to perform
278         * remote address translation for global address space */
279        if(armci_nclus>1){
280           if(armci_me == armci_master){
281 
282 #            ifdef SERVER_THREAD
283 
284                /* data server thread runs on master process */
285                ptr_ref_arr[armci_clus_me]=myptr;
286 
287 #            else
288                /* ask dataserver process to attach to the region and get
289                 * ptr*/
290                {
291                   if(_armci_server_started) {
292                      armci_serv_attach_req(idlist, SHMIDLEN*sizeof(long), size,
293                                            &ptr, sizeof(void*));
294                      ptr_ref_arr[armci_clus_me]= ptr; /* from server*/
295                   }
296                   else /* server not yet started */
297                      ptr_ref_arr[armci_clus_me]=myptr;
298                }
299 
300                if(DEBUG_){
301                  printf("%d:addresses server=%p myptr=%p\n",armci_me,ptr,myptr);
302                  fflush(stdout);
303                }
304 #            endif
305           }
306           /* exchange ref addr of shared memory region on every cluster node*/
307           armci_exchange_address(ptr_ref_arr, armci_nclus);
308        }else {
309 
310           ptr_ref_arr[armci_master] = myptr;
311 
312        }
313 
314        /* translate addresses for all cluster nodes */
315        for(cn = 0; cn < armci_nclus; cn++){
316 
317          int master = armci_clus_info[cn].master;
318          offset = 0;
319 
320          /* on local cluster node use myptr directly */
321          ptr = (armci_clus_me == cn) ? myptr: ptr_ref_arr[cn];
322 
323          /* compute addresses pointing to the memory regions on cluster node*/
324          for(i=0; i< armci_clus_info[cn].nslave; i++){
325 
326            /* NULL if request size is 0*/
327            ptr_arr[i+master] = (size_arr[i+master])? ((char*)ptr)+offset : NULL;
328            offset += size_arr[i+master];
329 
330          }
331        }
332 
333 #   else
334 
335       /* compute addresses for local cluster node */
336       offset =0;
337       for(i=0; i< nproc; i++) {
338 
339         ptr_ref_arr[i] = (size_arr[i+armci_master])? ((char*)myptr)+offset : 0L;
340         offset += size_arr[i+armci_master];
341 
342       }
343 
344       /* exchange addreses with all other processes */
345       ptr_arr[armci_me] = (char*)ptr_ref_arr[armci_me-armci_master];
346       armci_exchange_address(ptr_arr, armci_nproc);
347 
348       /* overwrite entries for local cluster node with ptr_ref_arr */
349       bcopy((char*)ptr_ref_arr, (char*)(ptr_arr+armci_master), nproc*sizeof(void*));
350 
351      /*  armci_print_ptr(ptr_arr, bytes, size, myptr, offset);*/
352 
353 #   endif
354 
355 #ifdef ALLOW_PIN
356     if(armci_nclus>1)armci_global_region_exchange(myptr, (long) size_arr[armci_me]);
357     else
358 #endif
359     armci_msg_barrier();
360 
361     /* free work arrays */
362     free(ptr_ref_arr);
363     free(size_arr);
364     ARMCI_PR_DBG("exit",0);
365 
366 }
367 
368 /********************************************************************
369  * Non-collective Memory Allocation on shared memory systems
370 \*/
armci_shmem_memget(armci_meminfo_t * meminfo,size_t size)371 void armci_shmem_memget(armci_meminfo_t *meminfo, size_t size) {
372     void *myptr=NULL;
373     void *armci_ptr=NULL; /* legal ARCMIptr used in ARMCI data xfer ops */
374     long idlist[SHMIDLEN];
375 
376     /* can malloc if there is no data server process & has 1 process/node*/
377 #ifndef RMA_NEEDS_SHMEM
378     if( armci_clus_info[armci_clus_me].nslave == 1)
379        myptr = kr_malloc(size, &ctx_localmem);
380     else
381 #endif
382        myptr = Create_Shared_Region(idlist+1,size,idlist);
383 
384     if(!myptr && size>0 )
385        armci_die("armci_shmem_memget: create failed", (int)(size>>10));
386 
387     if(DEBUG_)
388     {
389        printf("%d: armci_shmem_memget: addr=%p size=%ld %ld %ld \n", armci_me,
390               myptr, (long)size, idlist[0], idlist[1]);
391        fflush(stdout);
392     }
393 
394     armci_ptr = myptr;
395 
396 #if defined(DATA_SERVER)
397 
398     /* get server reference address to perform
399      * remote address translation for global address space */
400     if(armci_nclus>1)
401     {
402 #   ifdef SERVER_THREAD
403 
404        /* data server thread runs on master process */
405        if(armci_me != armci_master) {
406           armci_serv_attach_req(idlist, SHMIDLEN*sizeof(long), size,
407                                 &armci_ptr, sizeof(void*));
408        }
409 
410 #   else
411        /* ask dataserver process to attach to region and get ptr*/
412        {
413           if(_armci_server_started) {
414              armci_serv_attach_req(idlist, SHMIDLEN*sizeof(long), size,
415                                    &armci_ptr, sizeof(void*));
416           }
417        }
418 #   endif
419     }
420 #endif
421 
422     /* fill the meminfo structure */
423     meminfo->armci_addr = armci_ptr;
424     meminfo->addr       = myptr;
425     meminfo->size       = size;
426     meminfo->cpid       = armci_me;
427     bcopy(idlist, meminfo->idlist, SHMIDLEN*sizeof(long));
428 
429 }
430 
armci_shmem_memat(armci_meminfo_t * meminfo)431 void* armci_shmem_memat(armci_meminfo_t *meminfo) {
432     void *ptr=NULL;
433     long size    = (long)  meminfo->size;
434     long *idlist = (long*) meminfo->idlist;
435 
436     if(SAMECLUSNODE(meminfo->cpid))
437     {
438        /* Attach to the shared memory segment */
439        ptr=(double*)Attach_Shared_Region(idlist+1,size,idlist[0]);
440        if(!ptr)armci_die("ARMCi_Memat: could not attach", (int)(size>>10));
441 
442        /* CHECK: now every process in a SMP node needs to find out its offset
443         * w.r.t. master - this offset is necessary to use memlock table
444         */
445        if(size) armci_set_mem_offset(ptr);
446     }
447     else
448     {
449        ptr = meminfo->armci_addr; /* remote address  */
450     }
451 
452     return ptr;
453 }
454 
armci_shmem_memctl(armci_meminfo_t * meminfo)455 void armci_shmem_memctl(armci_meminfo_t *meminfo) {
456 
457     /* only the creator can delete the segment */
458     if(meminfo->cpid == armci_me) {
459        void *ptr = meminfo->addr;
460 
461 #ifdef RMA_NEEDS_SHMEM
462        Free_Shmem_Ptr(0,0,ptr);
463 #else
464        if(armci_clus_info[armci_clus_me].nslave>1)
465           Free_Shmem_Ptr(0,0,ptr);
466        else kr_free(ptr, &ctx_localmem);
467 #endif
468     }
469 }
470 
471 /****** End: Non-collective memory allocation on shared memory systems *****/
472 
473 #ifdef MSG_COMMS_MPI
474 /********************************************************************
475  * Group Memory Allocation on shared memory systems for ARMCI Groups
476 \*/
armci_shmem_malloc_group(void * ptr_arr[],armci_size_t bytes,ARMCI_Group * group)477 void armci_shmem_malloc_group(void *ptr_arr[], armci_size_t bytes,
478                               ARMCI_Group *group)
479 {
480     void *myptr=NULL, *ptr=NULL;
481     long idlist[SHMIDLEN];
482     long size=0, offset=0;
483     long *size_arr;
484     void **ptr_ref_arr;
485     int  i,cn, len;
486     /* int  nproc = armci_clus_info[armci_clus_me].nslave; ? change ? */
487     int grp_me, grp_nproc, grp_nclus, grp_master, grp_clus_nproc, grp_clus_me;
488     armci_grp_attr_t *grp_attr=ARMCI_Group_getattr(group);
489     ARMCI_PR_DBG("enter",0);
490 
491     /* Get the group info: group size & group rank */
492     ARMCI_Group_size(group, &grp_nproc);
493     ARMCI_Group_rank(group, &grp_me);
494     if(grp_me == MPI_UNDEFINED) { /* check if the process is in this group */
495        armci_die("armci_malloc_group: process is not a member in this group",
496                  armci_me);
497     }
498 
499     grp_nclus      = grp_attr->grp_nclus;
500     grp_clus_me    = grp_attr->grp_clus_me;
501     grp_master     = grp_attr->grp_clus_info[grp_clus_me].master;
502     grp_clus_nproc = grp_attr->grp_clus_info[grp_clus_me].nslave;
503 
504     bzero((char*)ptr_arr,grp_nproc*sizeof(void*));
505 
506     /* allocate work arrays */
507     size_arr = (long*)calloc(grp_nproc,sizeof(long));
508     if(!size_arr)armci_die("armci_malloc_group:calloc failed",grp_nproc);
509 
510     /* allocate arrays for cluster address translations */
511 #   if defined(DATA_SERVER)
512         len = grp_nclus;
513 #   else
514         len = grp_clus_nproc;
515 #   endif
516 
517     ptr_ref_arr = calloc(len,sizeof(void*)); /* must be zero */
518     if(!ptr_ref_arr)armci_die("armci_malloc_group:calloc 2 failed",len);
519 
520     /* combine all memory requests into size_arr  */
521     size_arr[grp_me] = bytes;
522     armci_msg_group_gop_scope(SCOPE_ALL, size_arr, grp_nproc, "+", ARMCI_LONG,
523                               group);
524 
525     /* determine aggregate request size on the cluster node */
526     for(i=0, size=0; i< grp_clus_nproc; i++) size += size_arr[i+grp_master];
527 
528     /* master process creates shmem region and then others attach to it */
529     if(grp_me == grp_master ){
530 
531 
532        /* can malloc if there is no data server process and has 1 process/node*/
533 #     ifndef RMA_NEEDS_SHMEM
534        if( armci_clus_info[armci_clus_me].nslave == 1)
535          myptr = kr_malloc(size, &ctx_localmem);
536        else
537 #     endif
538          myptr = Create_Shared_Region(idlist+1,size,idlist);
539        if(!myptr && size>0 )armci_die("armci_malloc_group: could not create", (int)(size>>10));
540 
541        /* place its address at begining of attached region for others to see */
542        if(size)armci_master_exp_attached_ptr(myptr);
543 
544        if(DEBUG_){
545          printf("%d:armci_malloc_group addr mptr=%p ref=%p size=%ld %ld %ld \n",armci_me,myptr,*(void**)myptr, size,idlist[0],idlist[1]);
546          fflush(stdout);
547        }
548     }
549 
550     /* broadcast shmem id to other processes (in the same group) on the
551        same cluster node */
552     armci_grp_clus_brdcst(idlist, SHMIDLEN*sizeof(long), grp_master,
553                           grp_clus_nproc, group);
554 
555     if(grp_me != grp_master){
556        myptr=(double*)Attach_Shared_Region(idlist+1,size,idlist[0]);
557        if(!myptr)armci_die("armci_malloc_group: could not attach", (int)(size>>10));
558 
559        /* now every process in a SMP node needs to find out its offset
560         * w.r.t. master - this offset is necessary to use memlock table
561         */
562        if(size) armci_set_mem_offset(myptr);
563        if(DEBUG_){
564           printf("%d:armci_malloc_group attached addr mptr=%p ref=%p size=%ld\n",
565                  armci_me,myptr, *(void**)myptr,size); fflush(stdout);
566        }
567     }
568 #   ifdef HITACHI
569     armci_register_shmem_grp(myptr,size,idlist+1,idlist[0],ptr_ref_arr[armci_clus_me],group);
570 #   endif
571 
572 #   if defined(DATA_SERVER)
573 
574     /* get server reference address for every cluster node in the group
575      * to perform remote address translation for global address space */
576     if(grp_nclus>1){
577        if(grp_me == grp_master){
578 
579 #            ifdef SERVER_THREAD
580 
581           /* data server thread runs on master process */
582           if(ARMCI_Absolute_id(group,grp_master)!=armci_master){
583             /*printf("\n%d: grp_master=%d %ld %ld \n",armci_me,ARMCI_Absolute_id(group,grp_master),idlist[0],idlist[1]);*/
584             armci_serv_attach_req(idlist, SHMIDLEN*sizeof(long), size,
585                                   &ptr, sizeof(void*));
586             ptr_ref_arr[grp_clus_me]= ptr; /* from server*/
587           }
588           else
589             ptr_ref_arr[grp_clus_me]=myptr;
590 
591 #            else
592           /* ask data server process to attach to the region and get ptr */
593           {
594              if(_armci_server_started) {
595                 armci_serv_attach_req(idlist, SHMIDLEN*sizeof(long), size,
596                                       &ptr, sizeof(void*));
597                 ptr_ref_arr[grp_clus_me]= ptr; /* from server*/
598              }
599              else /* server not yet started */
600                 ptr_ref_arr[grp_clus_me]=myptr;
601           }
602 
603           if(DEBUG_){
604              printf("%d:addresses server=%p myptr=%p\n",grp_me,ptr,myptr);
605                fflush(stdout);
606           }
607 #            endif
608        }
609        /* exchange ref addr of shared memory region on every cluster node*/
610        {
611           int ratio = sizeof(void*)/sizeof(int);
612           if(DEBUG_)printf("%d: exchanging %ld ratio=%d\n",armci_me,
613                            (long)ptr_arr[grp_me], ratio);
614           armci_msg_group_gop_scope(SCOPE_ALL, ptr_ref_arr, grp_nclus*ratio,
615                                     "+", ARMCI_INT, group);
616        }
617     }else {
618 
619        ptr_ref_arr[grp_master] = myptr;
620 
621     }
622 
623     /* translate addresses for all cluster nodes */
624     for(cn = 0; cn < grp_nclus; cn++){
625 
626        int master = grp_attr->grp_clus_info[cn].master;
627        offset = 0;
628 
629        /* on local cluster node use myptr directly */
630        ptr = (grp_clus_me == cn) ? myptr: ptr_ref_arr[cn];
631 
632        /* compute addresses pointing to the memory regions on cluster node*/
633        for(i=0; i< grp_attr->grp_clus_info[cn].nslave; i++){
634 
635           /* NULL if request size is 0*/
636           ptr_arr[i+master] =(size_arr[i+master])? ((char*)ptr)+offset: NULL;
637             offset += size_arr[i+master];
638        }
639     }
640 
641 #   else
642 
643     /* compute addresses for local cluster node */
644     offset =0;
645     for(i=0; i< grp_clus_nproc; i++) {
646 
647        ptr_ref_arr[i] = (size_arr[i+grp_master])? ((char*)myptr)+offset : 0L;
648        offset += size_arr[i+grp_master];
649 
650     }
651 
652     /* exchange addreses with all other processes */
653     ptr_arr[grp_me] = (char*)ptr_ref_arr[grp_me-grp_master];
654     armci_exchange_address_grp(ptr_arr, grp_nproc, group);
655 
656     /* overwrite entries for local cluster node with ptr_ref_arr */
657     bcopy((char*)ptr_ref_arr, (char*)(ptr_arr+grp_master), grp_clus_nproc*sizeof(void*));
658 
659 #   endif
660 
661     /*  armci_print_ptr(ptr_arr, bytes, size, myptr, offset);*/
662 
663 #ifdef ALLOW_PIN
664 #if 0
665     /* ????? check ??????  */
666     armci_die("armci_malloc_group: Not yet implemented", 0);
667     if(grp_nclus>1)armci_global_region_exchange(myptr, (long) size_arr[grp_me]);
668     else
669 #endif
670 #endif
671        armci_msg_group_barrier(group);
672 
673     /* free work arrays */
674     free(ptr_ref_arr);
675     free(size_arr);
676     ARMCI_PR_DBG("exit",0);
677 }
678 #endif /* ifdef MSG_COMMS_MPI */
679 
680 #else
681 
armci_shmem_malloc(void * ptr_arr[],int bytes)682 void armci_shmem_malloc(void* ptr_arr[], int bytes)
683 {
684   armci_die("armci_shmem_malloc should never be called on this system",0);
685 }
armci_shmem_memget(armci_meminfo_t * meminfo,size_t size)686 void armci_shmem_memget(armci_meminfo_t *meminfo, size_t size) {
687   armci_die("armci_shmem_memget should never be called on this system",0);
688 }
armci_shmem_memat(armci_meminfo_t * meminfo)689 void* armci_shmem_memat(armci_meminfo_t *meminfo) {
690   armci_die("armci_shmem_memat should never be called on this system",0);
691 }
armci_shmem_memctl(armci_meminfo_t * meminfo)692 void armci_shmem_memctl(armci_meminfo_t *meminfo) {
693   armci_die("armci_shmem_memctl should never be called on this system",0);
694 }
695 # ifdef MSG_COMMS_MPI
armci_shmem_malloc_group(void * ptr_arr[],armci_size_t bytes,ARMCI_Group * group)696   void armci_shmem_malloc_group(void *ptr_arr[], armci_size_t bytes,
697                                 ARMCI_Group *group) {
698       armci_die("armci_shmem_malloc_group should never be called on this system",0);
699   }
700 # endif
701 
702 #endif
703 
704 
705 #ifdef ALLOW_PIN
reg_malloc(size_t size)706 void *reg_malloc(size_t size)
707 {
708 #ifdef PORTALS
709 char *ptr;
710 extern void *shmalloc(size_t);
711     ARMCI_PR_DBG("enter",0);
712     ptr = malloc(size);
713 #else
714 char *ptr;
715     ARMCI_PR_DBG("enter",0);
716     ptr = malloc(size);
717 #endif
718     armci_region_register_loc(ptr,size);
719     ARMCI_PR_DBG("exit",0);
720     return(ptr);
721 }
722 #endif
723 
724 
725 /* public constructor to initialize the kr_malloc context */
armci_krmalloc_init_localmem()726 void armci_krmalloc_init_localmem() {
727 #if defined(ALLOW_PIN)
728     kr_malloc_init(0, 0, 0, reg_malloc, 0, &ctx_localmem);
729     kr_malloc_init(0, 0, 0, malloc, 0, &ctx_mlocalmem);
730     ctx_mlocalmem.ctx_type = KR_CTX_LOCALMEM;
731 #elif defined(CRAY_SHMEM) && defined(CRAY_XT)
732 #   ifdef CATAMOUNT
733     int units_avail = (cnos_shmem_size() - 1024 * 1024) / SHM_UNIT;
734 #   else
735     extern size_t get_xt_heapsize();
736     int units_avail = (get_xt_heapsize() - 1024 * 1024) / SHM_UNIT;
737 #   endif
738 
739     if(DEBUG_)
740     {
741        fprintf(stderr,"%d:krmalloc_init_localmem: symheap=%llu,units(%d)=%d\n",
742                armci_me, SHM_UNIT*units_avail, SHM_UNIT, units_avail);
743     }
744     kr_malloc_init(SHM_UNIT, units_avail, units_avail, shmalloc, 0,
745                    &ctx_localmem);
746     armci_shmalloc_exchange_offsets(&ctx_localmem);
747 #else
748 
749     kr_malloc_init(0, 0, 0, malloc, 0, &ctx_localmem);
750 
751 #endif
752 
753     ctx_localmem.ctx_type = KR_CTX_LOCALMEM;
754 }
755 
756 /**
757  * Local Memory Allocation and Free
758  */
PARMCI_Malloc_local(armci_size_t bytes)759 void *PARMCI_Malloc_local(armci_size_t bytes) {
760 #if defined(PORTALS)
761     void *rptr;
762 #endif
763     ARMCI_PR_DBG("enter",0);
764 #if defined(PORTALS)
765     rptr=kr_malloc((size_t)bytes, &ctx_mlocalmem);
766     ARMCI_PR_DBG("exit",0);
767     return rptr;
768 #else
769     ARMCI_PR_DBG("exit",0);
770     return (void *)kr_malloc((size_t)bytes, &ctx_localmem);
771 #endif
772 }
773 
PARMCI_Free_local(void * ptr)774 int PARMCI_Free_local(void *ptr) {
775     ARMCI_PR_DBG("enter",0);
776 #if defined(PORTALS)
777     kr_free((char *)ptr, &ctx_mlocalmem);
778 #else
779     kr_free((char *)ptr, &ctx_localmem);
780 #endif
781     ARMCI_PR_DBG("exit",0);
782     return 0;
783 }
784 
785 #ifdef REGION_ALLOC
786 static  context_t ctx_region_shmem;
787 static  long *reg_pids=NULL;
788 
armci_region_shm_malloc(void * ptr_arr[],size_t bytes)789 void armci_region_shm_malloc(void *ptr_arr[], size_t bytes)
790 {
791     long size=bytes;
792     void *ptr;
793     int i, peers=armci_clus_last-armci_clus_first+1;
794     extern void* armci_region_getcore(size_t);
795     extern int armci_region_register(int p, void **pinout, long pid, size_t bytes);
796 
797     if(!reg_pids){
798        kr_malloc_init(0,0,500*1024*1024, armci_region_getcore, 0,
799                       &ctx_region_shmem);
800        reg_pids = (long*)calloc(peers,sizeof(long));
801        reg_pids[armci_me -armci_clus_first] = getpid();
802        armci_msg_gop_scope(SCOPE_NODE,reg_pids, peers,"+",ARMCI_LONG);
803     }
804 
805     ptr=kr_malloc((size_t)size, &ctx_region_shmem);
806     if(bytes) if(!ptr) armci_die("armci_region_shm_malloc: failed",bytes);
807 
808     bzero((char*)ptr_arr,armci_nproc*sizeof(void*));
809     ptr_arr[armci_me] = ptr;
810 
811     /* now combine individual addresses into a single array */
812     armci_exchange_address(ptr_arr, armci_nproc);
813 
814     for(i=0; i<peers; i++)
815        if(i+armci_clus_first == armci_me) continue;
816        else if(ptr_arr[i+armci_clus_first])armci_region_register(i,ptr_arr+i+armci_clus_first,reg_pids[i], bytes);
817 }
818 
819 #ifdef MSG_COMMS_MPI
armci_region_shm_malloc_grp(void * ptr_arr[],size_t bytes,ARMCI_Group * group)820 void armci_region_shm_malloc_grp(void *ptr_arr[], size_t bytes, ARMCI_Group *group)
821 {
822     long size=bytes;
823     void *ptr;
824     armci_grp_attr_t *grp_attr=ARMCI_Group_getattr(group);
825     int grp_me, grp_nproc, grp_clus_me=grp_attr->grp_clus_me;
826     int i, peers=grp_attr->grp_clus_info[grp_clus_me].nslave;
827     int grp_clus_first=grp_attr->grp_clus_info[grp_clus_me].master;
828     extern void* armci_region_getcore(size_t);
829     extern int armci_region_register(int p, void **pinout, long pid, size_t bytes);
830 
831     ARMCI_Group_rank(group, &grp_me);
832     ARMCI_Group_size(group, &grp_nproc);
833 
834     ptr=kr_malloc((size_t)size, &ctx_region_shmem);
835     if(bytes) if(!ptr) armci_die("armci_region_shm_malloc_grp: failed",bytes);
836 
837     bzero((char*)ptr_arr,grp_nproc*sizeof(void*));
838     ptr_arr[grp_me] = ptr;
839 
840     /* now combine individual addresses into a single array */
841     armci_exchange_address_grp(ptr_arr, grp_nproc, group);
842 
843     for(i=0; i<peers; i++)
844        if(i+grp_clus_first == grp_me) continue;
845        else if(ptr_arr[i+grp_clus_first]) {
846           int p;
847           /* get global peer id within SMP */
848           p = (ARMCI_Absolute_id(group, grp_clus_first+i) - armci_clus_first);
849           armci_region_register(p,ptr_arr+i+grp_clus_first,reg_pids[p], bytes);
850        }
851 }
852 #endif /* ifdef MSG_COMMS_MPI */
853 #endif
854 
855 
856 /*\ Collective Memory Allocation
857  *  returns array of pointers to blocks of memory allocated by everybody
858  *  Note: as the same shared memory region can be mapped at different locations
859  *        in each process address space, the array might hold different values
860  *        on every process. However, the addresses are legitimate
861  *        and can be used in the ARMCI data transfer operations.
862  *        ptr_arr[nproc]
863 \*/
PARMCI_Malloc(void * ptr_arr[],armci_size_t bytes)864 int PARMCI_Malloc(void *ptr_arr[], armci_size_t bytes)
865 {
866     void *ptr;
867     ARMCI_PR_DBG("enter",0);
868     if(DEBUG_){
869        fprintf(stderr,"%d bytes in armci_malloc %d\n",armci_me, (int)bytes);
870        fflush(stderr);
871        armci_msg_barrier();
872     }
873 
874 #ifdef REGION_ALLOC
875     armci_region_shm_malloc(ptr_arr, bytes);
876 #else
877 #  ifdef USE_MALLOC
878     if(armci_nproc == 1) {
879       ptr = kr_malloc((size_t) bytes, &ctx_localmem);
880       if(bytes) if(!ptr) armci_die("armci_malloc:malloc 1 failed",(int)bytes);
881       ptr_arr[armci_me] = ptr;
882       ARMCI_PR_DBG("exit",0);
883       return (0);
884     }
885 #  endif
886 
887 #  ifdef SGIALTIX
888     if( ARMCI_Uses_shm() ) armci_altix_shm_malloc(ptr_arr,bytes);
889 #  else
890     if( ARMCI_Uses_shm() ) armci_shmem_malloc(ptr_arr,bytes);
891 #  endif
892     else {
893       /* on distributed-memory systems just malloc & collect all addresses */
894       ptr = kr_malloc(bytes, &ctx_localmem);
895       if(bytes) if(!ptr) armci_die("armci_malloc:malloc 2 failed",bytes);
896 
897       bzero((char*)ptr_arr,armci_nproc*sizeof(void*));
898       ptr_arr[armci_me] = ptr;
899 
900 #  if defined(CRAY_SHMEM)
901       armci_shmalloc_exchange_address(ptr_arr);
902 #  else
903 
904       /* now combine individual addresses into a single array */
905       armci_exchange_address(ptr_arr, armci_nproc);
906 #  endif
907 #  ifdef ALLOW_PIN
908       armci_global_region_exchange(ptr, (long) bytes);
909 #  endif
910     }
911 #endif
912     ARMCI_PR_DBG("exit",0);
913     return(0);
914 }
915 
916 /*\
917  * Just a wrapper on PARMCI_Malloc to keep old code from breaking
918 \*/
PARMCI_Malloc_memdev(void * ptr_arr[],armci_size_t bytes,const char * device)919 int PARMCI_Malloc_memdev(void *ptr_arr[], armci_size_t bytes, const char *device)
920 {
921   return PARMCI_Malloc(ptr_arr,bytes);
922 }
923 
924 
925 
926 /*\ shared memory is released to kr_malloc only on process 0
927  *  with data server malloc cannot be used
928 \*/
PARMCI_Free(void * ptr)929 int PARMCI_Free(void *ptr)
930 {
931     ARMCI_PR_DBG("enter",0);
932     if(!ptr)return 1;
933 
934 #ifndef SGIALTIX
935 #  ifdef REGION_ALLOC
936      kr_free(ptr, &ctx_region_shmem);
937 #  else
938 
939 #    if (defined(SYSV) || defined(WIN32) || defined(MMAP)) && !defined(NO_SHM)
940 #       ifdef USE_MALLOC
941           if(armci_nproc > 1)
942 #       endif
943              if(ARMCI_Uses_shm()){
944                 if(armci_me==armci_master){
945 #               ifdef RMA_NEEDS_SHMEM
946                    Free_Shmem_Ptr(0,0,ptr);
947 #               else
948                    if(armci_clus_info[armci_clus_me].nslave>1)
949                       Free_Shmem_Ptr(0,0,ptr);
950                    else kr_free(ptr, &ctx_localmem);
951 #               endif
952                 }
953                 ptr = NULL;
954                 return 0;
955              }
956 #    endif
957 	  kr_free(ptr, &ctx_localmem);
958 #  endif /* REGION_ALLOC */
959 #else
960      /* Altix */
961      if( ARMCI_Uses_shm() ) kr_free(ptr, &altix_ctx_shmem);
962      else kr_free(ptr, &ctx_localmem);
963 #endif
964 
965      ptr = NULL;
966     ARMCI_PR_DBG("exit",0);
967      return 0;
968 }
969 
PARMCI_Free_memdev(void * ptr)970 int PARMCI_Free_memdev(void *ptr)
971 {
972   return PARMCI_Free(ptr);
973 }
974 
975 
ARMCI_Uses_shm()976 int ARMCI_Uses_shm()
977 {
978     int uses=0;
979 
980 #if (defined(SYSV) || defined(WIN32) || defined(MMAP) ||defined(HITACHI)) && !defined(NO_SHM)
981 #   ifdef RMA_NEEDS_SHMEM
982       if(armci_nproc >1) uses= 1; /* always unless serial mode */
983 #   else
984       if(armci_nproc != armci_nclus)uses= 1; /* only when > 1 node used */
985 #   endif
986 #endif
987     if(DEBUG_) fprintf(stderr,"%d:uses shmem %d\n",armci_me, uses);
988     return uses;
989 }
990 #ifdef MSG_COMMS_MPI
991 
ARMCI_Uses_shm_grp(ARMCI_Group * group)992 int ARMCI_Uses_shm_grp(ARMCI_Group *group)
993 {
994     int uses=0, grp_me, grp_nproc;
995     /*int grp_nclus;*/
996     /*armci_grp_attr_t *grp_attr=ARMCI_Group_getattr(group);*/
997     ARMCI_PR_DBG("enter",0);
998 
999     ARMCI_Group_size(group, &grp_nproc);
1000     ARMCI_Group_rank(group, &grp_me);
1001 
1002 #if (defined(SYSV) || defined(WIN32) || defined(MMAP) ||defined(HITACHI)) && !defined(NO_SHM)
1003 #   ifdef RMA_NEEDS_SHMEM
1004       if(grp_nproc >1) uses= 1; /* always unless serial mode */
1005 #   else
1006 #if 0
1007       grp_nclus = grp_attr->grp_nclus;
1008       if(grp_nproc != grp_nclus)uses= 1; /* only when > 1 node used */
1009 #else
1010       if(armci_nproc != armci_nclus)uses= 1; /* only when > 1 node used */
1011 #endif
1012 #   endif
1013 #endif
1014     if(DEBUG_) fprintf(stderr,"%d (grp_id=%d):uses shmem %d\n",armci_me, grp_me, uses);
1015     ARMCI_PR_DBG("exit",0);
1016     return uses;
1017 }
1018 
1019 /*\ ************** Begin Group Collective Memory Allocation ******************
1020  *  returns array of pointers to blocks of memory allocated by everybody
1021  *  Note: as the same shared memory region can be mapped at different locations
1022  *        in each process address space, the array might hold different values
1023  *        on every process. However, the addresses are legitimate
1024  *        and can be used in the ARMCI data transfer operations.
1025  *        ptr_arr[nproc]
1026 \*/
ARMCI_Malloc_group(void * ptr_arr[],armci_size_t bytes,ARMCI_Group * group)1027 int ARMCI_Malloc_group(void *ptr_arr[], armci_size_t bytes,
1028                        ARMCI_Group *group)
1029 {
1030     void *ptr;
1031     int grp_me, grp_nproc;
1032     ARMCI_PR_DBG("enter",0);
1033     ARMCI_Group_size(group, &grp_nproc);
1034     ARMCI_Group_rank(group, &grp_me);
1035     if(DEBUG_)fprintf(stderr,"%d (grp_id=%d) bytes in armci_malloc_group %d\n",
1036                       armci_me, grp_me, (int)bytes);
1037 #ifdef REGION_ALLOC
1038     armci_region_shm_malloc_grp(ptr_arr, bytes, group);
1039 #else
1040 #ifdef USE_MALLOC
1041     if(grp_nproc == 1) {
1042        ptr = kr_malloc((size_t) bytes, &ctx_localmem);
1043        if(bytes) if(!ptr) armci_die("armci_malloc_group:malloc 1 failed",(int)bytes);
1044        ptr_arr[grp_me] = ptr;
1045        ARMCI_PR_DBG("exit",0);
1046        return (0);
1047     }
1048 #endif
1049 
1050     if( ARMCI_Uses_shm_grp(group) ) {
1051 #      ifdef SGIALTIX
1052           armci_altix_shm_malloc_group(ptr_arr,bytes,group);
1053 #      else
1054           armci_shmem_malloc_group(ptr_arr,bytes,group);
1055 #      endif
1056     }
1057     else {
1058        /* on distributed-memory systems just malloc & collect all addresses */
1059        ptr = kr_malloc(bytes, &ctx_localmem);
1060        if(bytes) if(!ptr) armci_die("armci_malloc:malloc 2 failed",bytes);
1061 
1062        bzero((char*)ptr_arr,grp_nproc*sizeof(void*));
1063        ptr_arr[grp_me] = ptr;
1064 
1065        /* now combine individual addresses into a single array */
1066 #if defined(CRAY_SHMEM)
1067        armci_shmalloc_exchange_address_grp(ptr_arr, group);
1068 #else
1069        armci_exchange_address_grp(ptr_arr, grp_nproc, group);
1070 #endif
1071 
1072 #      ifdef ALLOW_PIN
1073 #         if 0
1074              /* ????? check ?????? */
1075              armci_die("armci_malloc_group: Not yet implemented", 0);
1076              armci_global_region_exchange(ptr, (long) bytes);
1077 #         endif
1078 #      endif
1079     }
1080 #endif
1081     ARMCI_PR_DBG("exit",0);
1082     return(0);
1083 }
1084 /*\
1085  * Just a wrapper on ARMCI_Malloc_group to keep old code from breaking
1086 \*/
ARMCI_Malloc_group_memdev(void * ptr_arr[],armci_size_t bytes,ARMCI_Group * group,const char * device)1087 int ARMCI_Malloc_group_memdev(void *ptr_arr[], armci_size_t bytes,
1088                        ARMCI_Group *group, const char *device)
1089 {
1090   return ARMCI_Malloc_group(ptr_arr, bytes, group);
1091 }
1092 
1093 
1094 /*\ shared memory is released to kr_malloc only on process 0
1095  *  with data server malloc cannot be used
1096  \*/
ARMCI_Free_group(void * ptr,ARMCI_Group * group)1097 int ARMCI_Free_group(void *ptr, ARMCI_Group *group)
1098 {
1099     int grp_me, grp_nproc, grp_master, grp_clus_me;
1100     armci_grp_attr_t *grp_attr=ARMCI_Group_getattr(group);
1101     ARMCI_PR_DBG("enter",0);
1102 
1103     if(!ptr)return 1;
1104 
1105     ARMCI_Group_size(group, &grp_nproc);
1106     ARMCI_Group_rank(group, &grp_me);
1107     if(grp_me == MPI_UNDEFINED) { /* check if the process is in this group */
1108        armci_die("armci_malloc_group: process is not a member in this group",
1109                  armci_me);
1110     }
1111     /* get the group cluster info */
1112     grp_clus_me    = grp_attr->grp_clus_me;
1113     grp_master     = grp_attr->grp_clus_info[grp_clus_me].master;
1114 
1115 #ifndef SGIALTIX
1116 #ifdef REGION_ALLOC
1117     kr_free(ptr, &ctx_region_shmem);
1118 #else
1119 #   if (defined(SYSV) || defined(WIN32) || defined(MMAP)) && !defined(NO_SHM)
1120 #      ifdef USE_MALLOC
1121          if(grp_nproc > 1)
1122 #      endif
1123        if(ARMCI_Uses_shm_grp(group)){
1124           if(grp_me == grp_master) {
1125 #            ifdef RMA_NEEDS_SHMEM
1126              Free_Shmem_Ptr(0,0,ptr);
1127 #            else
1128              if(armci_clus_info[armci_clus_me].nslave>1) Free_Shmem_Ptr(0,0,ptr);
1129              else kr_free(ptr, &ctx_localmem);
1130 #            endif
1131           }
1132           ptr = NULL;
1133           ARMCI_PR_DBG("exit",0);
1134           return 0;
1135        }
1136 #   endif
1137     kr_free(ptr, &ctx_localmem);
1138 #endif /* ifdef REGION_ALLOC */
1139 #else /* SGI Altix */
1140     if(ARMCI_Uses_shm_grp(group))
1141        kr_free(ptr, &altix_ctx_shmem_grp);
1142     else kr_free(ptr, &ctx_localmem);
1143 
1144 #endif /* SGIALTIX */
1145 
1146     ptr = NULL;
1147     ARMCI_PR_DBG("exit",0);
1148     return 0;
1149 }
1150 /* ***************** End Group Collective Memory Allocation ******************/
1151 #endif
1152 
1153 /* ************** Begin Non-Collective Memory Allocation ******************
1154  * Prototype similar to SysV shared memory.
1155  */
1156 
1157 /**
1158  * CHECK: On Altix we are forced to use SysV as shmalloc is collective. We
1159  * may use a preallocated shmalloc memory, however, it may NOT still solve
1160  * our problem...
1161  * NOTE: "int memflg" option for future optimiztions.
1162  */
PARMCI_Memget(size_t bytes,armci_meminfo_t * meminfo,int memflg)1163 void PARMCI_Memget(size_t bytes, armci_meminfo_t *meminfo, int memflg) {
1164 
1165     void *myptr=NULL;
1166     void *armci_ptr=NULL; /* legal ARCMI ptr used in ARMCI data xfer ops*/
1167     size_t size = bytes;
1168 
1169     if(size<=0) armci_die("PARMCI_Memget: size must be > 0", (int)size);
1170     if(meminfo==NULL) armci_die("PARMCI_Memget: Invalid arg #2 (NULL ptr)",0);
1171     if(memflg!=0) armci_die("PARMCI_Memget: Invalid memflg", memflg);
1172 
1173     if( !ARMCI_Uses_shm() )
1174     {
1175        armci_ptr = myptr = kr_malloc(size, &ctx_localmem);
1176        if(size) if(!myptr) armci_die("PARMCI_Memget failed", (int)size);
1177 
1178        /* fill the meminfo structure */
1179        meminfo->armci_addr = armci_ptr;
1180        meminfo->addr       = myptr;
1181        meminfo->size       = size;
1182        meminfo->cpid       = armci_me;
1183        /* meminfo->attr       = NULL; */
1184     }
1185     else
1186     {
1187        armci_shmem_memget(meminfo, size);
1188     }
1189 
1190 #ifdef ALLOW_PIN
1191 #  if 0 /* disabled for now. May not go thru' the fast zero-copy path */
1192     if(armci_nclus>1)
1193        armci_global_region_exchange(meminfo->addr, (long)size_arr[armci_me]);
1194 #  endif
1195 #endif
1196 
1197     if(DEBUG_){
1198        printf("%d: PARMCI_Memget: addresses server=%p myptr=%p bytes=%ld\n",
1199               armci_me, meminfo->armci_addr, meminfo->addr, (long)bytes);
1200        fflush(stdout);
1201     }
1202 }
1203 
PARMCI_Memat(armci_meminfo_t * meminfo,long offset)1204 void* PARMCI_Memat(armci_meminfo_t *meminfo, long offset) {
1205     void *ptr=NULL;
1206 
1207     if(meminfo==NULL) armci_die("PARMCI_Memat: Invalid arg #1 (NULL ptr)",0);
1208 
1209     if(meminfo->cpid==armci_me) { ptr = meminfo->addr; return ptr; }
1210 
1211     if( !ARMCI_Uses_shm())
1212     {
1213        ptr = meminfo->addr;
1214     }
1215     else
1216     {
1217        ptr = armci_shmem_memat(meminfo);
1218        ptr = ((char*)ptr) + offset;
1219     }
1220 
1221     if(DEBUG_)
1222     {
1223        printf("%d:PARMCI_Memat: attached addr mptr=%p size=%ld\n",
1224               armci_me, ptr, (long)meminfo->size); fflush(stdout);
1225     }
1226 
1227     return ptr;
1228 }
1229 
ARMCI_Memdt(armci_meminfo_t * meminfo,long offset)1230 void ARMCI_Memdt(armci_meminfo_t *meminfo, long offset) {
1231   /**
1232    * Do nothing. May be we need to have reference counting in future. This
1233    * is to avoid the case of dangling pointers when the creator of shm
1234    * segment calls Memctl and other processes are still attached to this
1235    * segment
1236    */
1237 }
1238 
ARMCI_Memctl(armci_meminfo_t * meminfo)1239 void ARMCI_Memctl(armci_meminfo_t *meminfo) {
1240 
1241     if(meminfo==NULL) armci_die("PARMCI_Memget: Invalid arg #2 (NULL ptr)",0);
1242 
1243     /* only the creator can delete the segment */
1244     if(meminfo->cpid == armci_me)
1245     {
1246        if( !ARMCI_Uses_shm() )
1247        {
1248           void *ptr = meminfo->addr;
1249           kr_free(ptr, &ctx_localmem);
1250        }
1251        else
1252        {
1253           armci_shmem_memctl(meminfo);
1254        }
1255     }
1256 
1257     meminfo->addr       = NULL;
1258     meminfo->armci_addr = NULL;
1259     /* if(meminfo->attr!=NULL) free(meminfo->attr); */
1260 }
1261 
1262 /* ***************** End Non-Collective Memory Allocation ******************/
1263 
1264