1 #if HAVE_CONFIG_H
2 # include "config.h"
3 #endif
4
5 /* $Id: memory.c,v 1.56.2.3 2007-04-25 23:49:55 d3p687 Exp $ */
6 #if HAVE_STDIO_H
7 # include <stdio.h>
8 #endif
9 #if HAVE_ASSERT_H
10 # include <assert.h>
11 #endif
12 #include "armcip.h"
13 #include "message.h"
14 #include "kr_malloc.h"
15
16 #define DEBUG_ 0
17 #define USE_MALLOC
18 #define USE_SHMEM_
19 #define SHM_UNIT 1024
20
21 #if defined(CRAY_SHMEM)
22 extern void armci_shmalloc_exchange_address(void **ptr_arr);
23 extern void armci_shmalloc_exchange_offsets(context_t *);
24 # if defined(CRAY_XT)
25 # include <mpp/shmem.h>
26 # ifdef CATAMOUNT
27 # include <catamount/data.h>
28 # endif
29 # endif
30 #endif
31
32 static context_t ctx_localmem;
33 #if defined(PORTALS_WITHREG) || defined(PORTALS) || defined(ALLOW_PIN)
34 static context_t ctx_mlocalmem;
35 #endif
36
37 #if defined(SYSV) || defined(WIN32) || defined(MMAP) || defined(HITACHI)
38 #include "armci_shmem.h"
39
40 #if !defined(USE_SHMEM) && (defined(HITACHI) || defined(MULTI_CTX))
41 # define USE_SHMEM
42 #endif
43
44 #if !(defined(LAPI)||defined(QUADRICS)||defined(SERVER_THREAD)) ||\
45 defined(USE_SHMEM)
46 #define RMA_NEEDS_SHMEM
47 #endif
48
49 #if defined(DATA_SERVER) && !defined(SERVER_THREAD)
50 extern int _armci_server_started;
51 #endif
52
53 /****************************************************************************
54 * Memory Allocator called by kr_malloc on SGI Altix to get more core from OS
55 */
56 #ifdef SGIALTIX
57
58 #include <mpp/shmem.h>
59 #if HAVE_UNISTD_H
60 # include <unistd.h>
61 #endif
62
63 #define DEF_UNITS (64)
64 #define MAX_SEGS 512
65
66 #define _SHMMAX_ALTIX 32*1024 /* 32 MB */
67 #define _SHMMAX_ALTIX_GRP 512*1024 /* 512 MB */
68
69 static context_t altix_ctx_shmem;
70 static context_t altix_ctx_shmem_grp;
71 static size_t altix_pagesize;
72 extern void armci_memoffset_table_newentry(void *ptr, size_t seg_size);
73
armci_altix_allocate(size_t bytes)74 void *armci_altix_allocate(size_t bytes)
75 {
76 void *ptr, *sptr;
77 ARMCI_PR_DBG("enter",0);
78 sptr=ptr= shmalloc(bytes);
79 if(sptr == NULL) armci_die("armci_altix_allocate: shmalloc failed\n",
80 armci_me);
81 armci_memoffset_table_newentry(ptr, bytes);
82 #if 0
83 if(ptr){ /* touch each page to establish ownership */
84 int i;
85 for(i=0; i< bytes/altix_pagesize; i++){
86 *(double*)ptr=0.;
87 ((char*)ptr) += altix_pagesize;
88 }
89 }
90 #endif
91 ARMCI_PR_DBG("exit",0);
92 return sptr;
93 }
94
armci_altix_shm_init()95 void armci_altix_shm_init()
96 {
97 ARMCI_PR_DBG("enter",0);
98 altix_pagesize = getpagesize();
99 kr_malloc_init(SHM_UNIT, _SHMMAX_ALTIX, 0,
100 armci_altix_allocate, 0, &altix_ctx_shmem);
101 kr_malloc_init(SHM_UNIT, _SHMMAX_ALTIX_GRP, _SHMMAX_ALTIX_GRP,
102 armci_altix_allocate, 0, &altix_ctx_shmem_grp);
103 /* allocate a huge segment for groups. When kr_malloc() is called for
104 the first time for this altix_ctx_shmem_grp context with some minimal
105 size of 8 bytes, a huge segment of size (SHM_UNIT*_SHMMAX_ALTIX_GRP)
106 will be created */
107 {
108 void *ptr;
109 ptr=kr_malloc((size_t)8, &altix_ctx_shmem_grp);
110 if(ptr==NULL)
111 armci_die("armci_altix_shm_init(): malloc failed", armci_me);
112 }
113 ARMCI_PR_DBG("exit",0);
114 }
115
armci_altix_shm_malloc(void * ptr_arr[],armci_size_t bytes)116 void armci_altix_shm_malloc(void *ptr_arr[], armci_size_t bytes)
117 {
118 long size=bytes;
119 void *ptr;
120 int i;
121 ARMCI_PR_DBG("enter",0);
122 armci_msg_lgop(&size,1,"max");
123 ptr=kr_malloc((size_t)size, &altix_ctx_shmem);
124 bzero(ptr_arr,(armci_nproc)*sizeof(void*));
125 ptr_arr[armci_me] = ptr;
126 if(size!=0 && ptr==NULL)
127 armci_die("armci_altix_shm_malloc(): malloc failed", armci_me);
128 for(i=0; i< armci_nproc; i++) if(i!=armci_me) ptr_arr[i]=shmem_ptr(ptr,i);
129 ARMCI_PR_DBG("exit",0);
130 }
131
132 #ifdef MSG_COMMS_MPI
armci_altix_shm_malloc_group(void * ptr_arr[],armci_size_t bytes,ARMCI_Group * group)133 void armci_altix_shm_malloc_group(void *ptr_arr[], armci_size_t bytes,
134 ARMCI_Group *group) {
135 long size=bytes;
136 void *ptr;
137 int i,grp_me, grp_nproc;
138 armci_grp_attr_t *grp_attr=ARMCI_Group_getattr(group);
139 ARMCI_PR_DBG("enter",0);
140
141 ARMCI_Group_size(group, &grp_nproc);
142 ARMCI_Group_rank(group, &grp_me);
143 armci_msg_group_lgop(&size,1,"max",group);
144 ptr=kr_malloc((size_t)size, &altix_ctx_shmem_grp);
145 if(size!=0 && ptr==NULL)
146 armci_die("armci_altix_shm_malloc_group(): malloc failed for groups. Increase _SHMMAX_ALTIX_GRP", armci_me);
147 bzero(ptr_arr,(grp_nproc)*sizeof(void*));
148 ptr_arr[grp_me] = ptr;
149 for(i=0; i< grp_nproc; i++) if(i!=grp_me) ptr_arr[i]=shmem_ptr(ptr,ARMCI_Absolute_id(group, i));
150 ARMCI_PR_DBG("exit",0);
151 }
152 #endif
153
154 #endif /* end ifdef SGIALTIX */
155 /* ------------------ End Altix memory allocator ----------------- */
156
kr_check_local()157 void kr_check_local()
158 {
159 #if 0
160 kr_malloc_print_stats(&ctx_localmem);
161 #endif
162 kr_malloc_verify(&ctx_localmem);
163 #if defined(PORTALS_WITHREG)
164 kr_malloc_verify(&ctx_mlocalmem);
165 #endif
166 }
167
armci_print_ptr(void ** ptr_arr,int bytes,int size,void * myptr,int off)168 void armci_print_ptr(void **ptr_arr, int bytes, int size, void* myptr, int off)
169 {
170 int i;
171 int nproc = armci_clus_info[armci_clus_me].nslave;
172
173 ARMCI_PR_DBG("enter",0);
174 for(i=0; i< armci_nproc; i++){
175 int j;
176 if(armci_me ==i){
177 printf("%d master =%d nproc=%d off=%d\n",armci_me,
178 armci_master,nproc, off);
179 printf("%d:bytes=%d mptr=%p s=%d ",armci_me, bytes, myptr,size);
180 for(j = 0; j< armci_nproc; j++)printf(" %p",ptr_arr[j]);
181 printf("\n"); fflush(stdout);
182 }
183 armci_msg_barrier();
184 }
185 ARMCI_PR_DBG("exit",0);
186 }
187
188
189 /*\ master exports its address of shmem region at the beggining of that region
190 \*/
armci_master_exp_attached_ptr(void * ptr)191 static void armci_master_exp_attached_ptr(void* ptr)
192 {
193 ARMCI_PR_DBG("enter",0);
194 if(!ptr) armci_die("armci_master_exp_att_ptr: null ptr",0);
195 *(volatile void**)ptr = ptr;
196 ARMCI_PR_DBG("exit",0);
197 }
198
199
200 /*\ Collective Memory Allocation on shared memory systems
201 \*/
armci_shmem_malloc(void * ptr_arr[],armci_size_t bytes)202 void armci_shmem_malloc(void *ptr_arr[], armci_size_t bytes)
203 {
204 void *myptr=NULL, *ptr=NULL;
205 long idlist[SHMIDLEN];
206 long size=0, offset=0;
207 long *size_arr;
208 void **ptr_ref_arr;
209 int i,cn, len;
210 int nproc = armci_clus_info[armci_clus_me].nslave;
211 ARMCI_PR_DBG("enter",0);
212 bzero((char*)ptr_arr,armci_nproc*sizeof(void*));
213
214 /* allocate work arrays */
215 size_arr = (long*)calloc(armci_nproc,sizeof(long));
216 if(!size_arr)armci_die("armci_malloc:calloc failed",armci_nproc);
217
218 /* allocate arrays for cluster address translations */
219 # if defined(DATA_SERVER)
220 len = armci_nclus;
221 # else
222 len = nproc;
223 # endif
224
225 ptr_ref_arr = calloc(len,sizeof(void*)); /* must be zero */
226 if(!ptr_ref_arr)armci_die("armci_malloc:calloc 2 failed",len);
227
228 /* combine all memory requests into size_arr */
229 size_arr[armci_me] = bytes;
230 armci_msg_lgop(size_arr, armci_nproc, "+");
231
232 /* determine aggregate request size on the cluster node */
233 for(i=0, size=0; i< nproc; i++) size += size_arr[i+armci_master];
234
235 /* master process creates shmem region and then others attach to it */
236 if(armci_me == armci_master ){
237
238 /* can malloc if there is no data server process and has 1 process/node*/
239 # ifndef RMA_NEEDS_SHMEM
240 if(nproc == 1)
241 myptr = kr_malloc(size, &ctx_localmem);
242 else
243 # endif
244 myptr = Create_Shared_Region(idlist+1,size,idlist);
245 if(!myptr && size>0 )armci_die("armci_malloc: could not create", (int)(size>>10));
246
247 /* place its address at begining of attached region for others to see */
248 if(size)armci_master_exp_attached_ptr(myptr);
249
250 if(DEBUG_){
251 printf("%d:armci_malloc addr mptr=%p size=%ld\n",armci_me,myptr,size);
252 fflush(stdout);
253 }
254 }
255
256 /* broadcast shmem id to other processes on the same cluster node */
257 armci_msg_clus_brdcst(idlist, SHMIDLEN*sizeof(long));
258
259 if(armci_me != armci_master){
260 myptr=(double*)Attach_Shared_Region(idlist+1,size,idlist[0]);
261 if(!myptr)armci_die("armci_malloc: could not attach", (int)(size>>10));
262
263 /* now every process in a SMP node needs to find out its offset
264 * w.r.t. master - this offset is necessary to use memlock table
265 */
266 if(size) armci_set_mem_offset(myptr);
267 if(DEBUG_){
268 printf("%d:armci_malloc attached addr mptr=%p ref=%p size=%ld\n",
269 armci_me,myptr, *(void**)myptr,size); fflush(stdout);
270 }
271 }
272 # ifdef HITACHI
273 armci_register_shmem(myptr,size,idlist+1,idlist[0],ptr_ref_arr[armci_clus_me]);
274 # endif
275 # if defined(DATA_SERVER)
276
277 /* get server reference address for every cluster node to perform
278 * remote address translation for global address space */
279 if(armci_nclus>1){
280 if(armci_me == armci_master){
281
282 # ifdef SERVER_THREAD
283
284 /* data server thread runs on master process */
285 ptr_ref_arr[armci_clus_me]=myptr;
286
287 # else
288 /* ask dataserver process to attach to the region and get
289 * ptr*/
290 {
291 if(_armci_server_started) {
292 armci_serv_attach_req(idlist, SHMIDLEN*sizeof(long), size,
293 &ptr, sizeof(void*));
294 ptr_ref_arr[armci_clus_me]= ptr; /* from server*/
295 }
296 else /* server not yet started */
297 ptr_ref_arr[armci_clus_me]=myptr;
298 }
299
300 if(DEBUG_){
301 printf("%d:addresses server=%p myptr=%p\n",armci_me,ptr,myptr);
302 fflush(stdout);
303 }
304 # endif
305 }
306 /* exchange ref addr of shared memory region on every cluster node*/
307 armci_exchange_address(ptr_ref_arr, armci_nclus);
308 }else {
309
310 ptr_ref_arr[armci_master] = myptr;
311
312 }
313
314 /* translate addresses for all cluster nodes */
315 for(cn = 0; cn < armci_nclus; cn++){
316
317 int master = armci_clus_info[cn].master;
318 offset = 0;
319
320 /* on local cluster node use myptr directly */
321 ptr = (armci_clus_me == cn) ? myptr: ptr_ref_arr[cn];
322
323 /* compute addresses pointing to the memory regions on cluster node*/
324 for(i=0; i< armci_clus_info[cn].nslave; i++){
325
326 /* NULL if request size is 0*/
327 ptr_arr[i+master] = (size_arr[i+master])? ((char*)ptr)+offset : NULL;
328 offset += size_arr[i+master];
329
330 }
331 }
332
333 # else
334
335 /* compute addresses for local cluster node */
336 offset =0;
337 for(i=0; i< nproc; i++) {
338
339 ptr_ref_arr[i] = (size_arr[i+armci_master])? ((char*)myptr)+offset : 0L;
340 offset += size_arr[i+armci_master];
341
342 }
343
344 /* exchange addreses with all other processes */
345 ptr_arr[armci_me] = (char*)ptr_ref_arr[armci_me-armci_master];
346 armci_exchange_address(ptr_arr, armci_nproc);
347
348 /* overwrite entries for local cluster node with ptr_ref_arr */
349 bcopy((char*)ptr_ref_arr, (char*)(ptr_arr+armci_master), nproc*sizeof(void*));
350
351 /* armci_print_ptr(ptr_arr, bytes, size, myptr, offset);*/
352
353 # endif
354
355 #ifdef ALLOW_PIN
356 if(armci_nclus>1)armci_global_region_exchange(myptr, (long) size_arr[armci_me]);
357 else
358 #endif
359 armci_msg_barrier();
360
361 /* free work arrays */
362 free(ptr_ref_arr);
363 free(size_arr);
364 ARMCI_PR_DBG("exit",0);
365
366 }
367
368 /********************************************************************
369 * Non-collective Memory Allocation on shared memory systems
370 \*/
armci_shmem_memget(armci_meminfo_t * meminfo,size_t size)371 void armci_shmem_memget(armci_meminfo_t *meminfo, size_t size) {
372 void *myptr=NULL;
373 void *armci_ptr=NULL; /* legal ARCMIptr used in ARMCI data xfer ops */
374 long idlist[SHMIDLEN];
375
376 /* can malloc if there is no data server process & has 1 process/node*/
377 #ifndef RMA_NEEDS_SHMEM
378 if( armci_clus_info[armci_clus_me].nslave == 1)
379 myptr = kr_malloc(size, &ctx_localmem);
380 else
381 #endif
382 myptr = Create_Shared_Region(idlist+1,size,idlist);
383
384 if(!myptr && size>0 )
385 armci_die("armci_shmem_memget: create failed", (int)(size>>10));
386
387 if(DEBUG_)
388 {
389 printf("%d: armci_shmem_memget: addr=%p size=%ld %ld %ld \n", armci_me,
390 myptr, (long)size, idlist[0], idlist[1]);
391 fflush(stdout);
392 }
393
394 armci_ptr = myptr;
395
396 #if defined(DATA_SERVER)
397
398 /* get server reference address to perform
399 * remote address translation for global address space */
400 if(armci_nclus>1)
401 {
402 # ifdef SERVER_THREAD
403
404 /* data server thread runs on master process */
405 if(armci_me != armci_master) {
406 armci_serv_attach_req(idlist, SHMIDLEN*sizeof(long), size,
407 &armci_ptr, sizeof(void*));
408 }
409
410 # else
411 /* ask dataserver process to attach to region and get ptr*/
412 {
413 if(_armci_server_started) {
414 armci_serv_attach_req(idlist, SHMIDLEN*sizeof(long), size,
415 &armci_ptr, sizeof(void*));
416 }
417 }
418 # endif
419 }
420 #endif
421
422 /* fill the meminfo structure */
423 meminfo->armci_addr = armci_ptr;
424 meminfo->addr = myptr;
425 meminfo->size = size;
426 meminfo->cpid = armci_me;
427 bcopy(idlist, meminfo->idlist, SHMIDLEN*sizeof(long));
428
429 }
430
armci_shmem_memat(armci_meminfo_t * meminfo)431 void* armci_shmem_memat(armci_meminfo_t *meminfo) {
432 void *ptr=NULL;
433 long size = (long) meminfo->size;
434 long *idlist = (long*) meminfo->idlist;
435
436 if(SAMECLUSNODE(meminfo->cpid))
437 {
438 /* Attach to the shared memory segment */
439 ptr=(double*)Attach_Shared_Region(idlist+1,size,idlist[0]);
440 if(!ptr)armci_die("ARMCi_Memat: could not attach", (int)(size>>10));
441
442 /* CHECK: now every process in a SMP node needs to find out its offset
443 * w.r.t. master - this offset is necessary to use memlock table
444 */
445 if(size) armci_set_mem_offset(ptr);
446 }
447 else
448 {
449 ptr = meminfo->armci_addr; /* remote address */
450 }
451
452 return ptr;
453 }
454
armci_shmem_memctl(armci_meminfo_t * meminfo)455 void armci_shmem_memctl(armci_meminfo_t *meminfo) {
456
457 /* only the creator can delete the segment */
458 if(meminfo->cpid == armci_me) {
459 void *ptr = meminfo->addr;
460
461 #ifdef RMA_NEEDS_SHMEM
462 Free_Shmem_Ptr(0,0,ptr);
463 #else
464 if(armci_clus_info[armci_clus_me].nslave>1)
465 Free_Shmem_Ptr(0,0,ptr);
466 else kr_free(ptr, &ctx_localmem);
467 #endif
468 }
469 }
470
471 /****** End: Non-collective memory allocation on shared memory systems *****/
472
473 #ifdef MSG_COMMS_MPI
474 /********************************************************************
475 * Group Memory Allocation on shared memory systems for ARMCI Groups
476 \*/
armci_shmem_malloc_group(void * ptr_arr[],armci_size_t bytes,ARMCI_Group * group)477 void armci_shmem_malloc_group(void *ptr_arr[], armci_size_t bytes,
478 ARMCI_Group *group)
479 {
480 void *myptr=NULL, *ptr=NULL;
481 long idlist[SHMIDLEN];
482 long size=0, offset=0;
483 long *size_arr;
484 void **ptr_ref_arr;
485 int i,cn, len;
486 /* int nproc = armci_clus_info[armci_clus_me].nslave; ? change ? */
487 int grp_me, grp_nproc, grp_nclus, grp_master, grp_clus_nproc, grp_clus_me;
488 armci_grp_attr_t *grp_attr=ARMCI_Group_getattr(group);
489 ARMCI_PR_DBG("enter",0);
490
491 /* Get the group info: group size & group rank */
492 ARMCI_Group_size(group, &grp_nproc);
493 ARMCI_Group_rank(group, &grp_me);
494 if(grp_me == MPI_UNDEFINED) { /* check if the process is in this group */
495 armci_die("armci_malloc_group: process is not a member in this group",
496 armci_me);
497 }
498
499 grp_nclus = grp_attr->grp_nclus;
500 grp_clus_me = grp_attr->grp_clus_me;
501 grp_master = grp_attr->grp_clus_info[grp_clus_me].master;
502 grp_clus_nproc = grp_attr->grp_clus_info[grp_clus_me].nslave;
503
504 bzero((char*)ptr_arr,grp_nproc*sizeof(void*));
505
506 /* allocate work arrays */
507 size_arr = (long*)calloc(grp_nproc,sizeof(long));
508 if(!size_arr)armci_die("armci_malloc_group:calloc failed",grp_nproc);
509
510 /* allocate arrays for cluster address translations */
511 # if defined(DATA_SERVER)
512 len = grp_nclus;
513 # else
514 len = grp_clus_nproc;
515 # endif
516
517 ptr_ref_arr = calloc(len,sizeof(void*)); /* must be zero */
518 if(!ptr_ref_arr)armci_die("armci_malloc_group:calloc 2 failed",len);
519
520 /* combine all memory requests into size_arr */
521 size_arr[grp_me] = bytes;
522 armci_msg_group_gop_scope(SCOPE_ALL, size_arr, grp_nproc, "+", ARMCI_LONG,
523 group);
524
525 /* determine aggregate request size on the cluster node */
526 for(i=0, size=0; i< grp_clus_nproc; i++) size += size_arr[i+grp_master];
527
528 /* master process creates shmem region and then others attach to it */
529 if(grp_me == grp_master ){
530
531
532 /* can malloc if there is no data server process and has 1 process/node*/
533 # ifndef RMA_NEEDS_SHMEM
534 if( armci_clus_info[armci_clus_me].nslave == 1)
535 myptr = kr_malloc(size, &ctx_localmem);
536 else
537 # endif
538 myptr = Create_Shared_Region(idlist+1,size,idlist);
539 if(!myptr && size>0 )armci_die("armci_malloc_group: could not create", (int)(size>>10));
540
541 /* place its address at begining of attached region for others to see */
542 if(size)armci_master_exp_attached_ptr(myptr);
543
544 if(DEBUG_){
545 printf("%d:armci_malloc_group addr mptr=%p ref=%p size=%ld %ld %ld \n",armci_me,myptr,*(void**)myptr, size,idlist[0],idlist[1]);
546 fflush(stdout);
547 }
548 }
549
550 /* broadcast shmem id to other processes (in the same group) on the
551 same cluster node */
552 armci_grp_clus_brdcst(idlist, SHMIDLEN*sizeof(long), grp_master,
553 grp_clus_nproc, group);
554
555 if(grp_me != grp_master){
556 myptr=(double*)Attach_Shared_Region(idlist+1,size,idlist[0]);
557 if(!myptr)armci_die("armci_malloc_group: could not attach", (int)(size>>10));
558
559 /* now every process in a SMP node needs to find out its offset
560 * w.r.t. master - this offset is necessary to use memlock table
561 */
562 if(size) armci_set_mem_offset(myptr);
563 if(DEBUG_){
564 printf("%d:armci_malloc_group attached addr mptr=%p ref=%p size=%ld\n",
565 armci_me,myptr, *(void**)myptr,size); fflush(stdout);
566 }
567 }
568 # ifdef HITACHI
569 armci_register_shmem_grp(myptr,size,idlist+1,idlist[0],ptr_ref_arr[armci_clus_me],group);
570 # endif
571
572 # if defined(DATA_SERVER)
573
574 /* get server reference address for every cluster node in the group
575 * to perform remote address translation for global address space */
576 if(grp_nclus>1){
577 if(grp_me == grp_master){
578
579 # ifdef SERVER_THREAD
580
581 /* data server thread runs on master process */
582 if(ARMCI_Absolute_id(group,grp_master)!=armci_master){
583 /*printf("\n%d: grp_master=%d %ld %ld \n",armci_me,ARMCI_Absolute_id(group,grp_master),idlist[0],idlist[1]);*/
584 armci_serv_attach_req(idlist, SHMIDLEN*sizeof(long), size,
585 &ptr, sizeof(void*));
586 ptr_ref_arr[grp_clus_me]= ptr; /* from server*/
587 }
588 else
589 ptr_ref_arr[grp_clus_me]=myptr;
590
591 # else
592 /* ask data server process to attach to the region and get ptr */
593 {
594 if(_armci_server_started) {
595 armci_serv_attach_req(idlist, SHMIDLEN*sizeof(long), size,
596 &ptr, sizeof(void*));
597 ptr_ref_arr[grp_clus_me]= ptr; /* from server*/
598 }
599 else /* server not yet started */
600 ptr_ref_arr[grp_clus_me]=myptr;
601 }
602
603 if(DEBUG_){
604 printf("%d:addresses server=%p myptr=%p\n",grp_me,ptr,myptr);
605 fflush(stdout);
606 }
607 # endif
608 }
609 /* exchange ref addr of shared memory region on every cluster node*/
610 {
611 int ratio = sizeof(void*)/sizeof(int);
612 if(DEBUG_)printf("%d: exchanging %ld ratio=%d\n",armci_me,
613 (long)ptr_arr[grp_me], ratio);
614 armci_msg_group_gop_scope(SCOPE_ALL, ptr_ref_arr, grp_nclus*ratio,
615 "+", ARMCI_INT, group);
616 }
617 }else {
618
619 ptr_ref_arr[grp_master] = myptr;
620
621 }
622
623 /* translate addresses for all cluster nodes */
624 for(cn = 0; cn < grp_nclus; cn++){
625
626 int master = grp_attr->grp_clus_info[cn].master;
627 offset = 0;
628
629 /* on local cluster node use myptr directly */
630 ptr = (grp_clus_me == cn) ? myptr: ptr_ref_arr[cn];
631
632 /* compute addresses pointing to the memory regions on cluster node*/
633 for(i=0; i< grp_attr->grp_clus_info[cn].nslave; i++){
634
635 /* NULL if request size is 0*/
636 ptr_arr[i+master] =(size_arr[i+master])? ((char*)ptr)+offset: NULL;
637 offset += size_arr[i+master];
638 }
639 }
640
641 # else
642
643 /* compute addresses for local cluster node */
644 offset =0;
645 for(i=0; i< grp_clus_nproc; i++) {
646
647 ptr_ref_arr[i] = (size_arr[i+grp_master])? ((char*)myptr)+offset : 0L;
648 offset += size_arr[i+grp_master];
649
650 }
651
652 /* exchange addreses with all other processes */
653 ptr_arr[grp_me] = (char*)ptr_ref_arr[grp_me-grp_master];
654 armci_exchange_address_grp(ptr_arr, grp_nproc, group);
655
656 /* overwrite entries for local cluster node with ptr_ref_arr */
657 bcopy((char*)ptr_ref_arr, (char*)(ptr_arr+grp_master), grp_clus_nproc*sizeof(void*));
658
659 # endif
660
661 /* armci_print_ptr(ptr_arr, bytes, size, myptr, offset);*/
662
663 #ifdef ALLOW_PIN
664 #if 0
665 /* ????? check ?????? */
666 armci_die("armci_malloc_group: Not yet implemented", 0);
667 if(grp_nclus>1)armci_global_region_exchange(myptr, (long) size_arr[grp_me]);
668 else
669 #endif
670 #endif
671 armci_msg_group_barrier(group);
672
673 /* free work arrays */
674 free(ptr_ref_arr);
675 free(size_arr);
676 ARMCI_PR_DBG("exit",0);
677 }
678 #endif /* ifdef MSG_COMMS_MPI */
679
680 #else
681
armci_shmem_malloc(void * ptr_arr[],int bytes)682 void armci_shmem_malloc(void* ptr_arr[], int bytes)
683 {
684 armci_die("armci_shmem_malloc should never be called on this system",0);
685 }
armci_shmem_memget(armci_meminfo_t * meminfo,size_t size)686 void armci_shmem_memget(armci_meminfo_t *meminfo, size_t size) {
687 armci_die("armci_shmem_memget should never be called on this system",0);
688 }
armci_shmem_memat(armci_meminfo_t * meminfo)689 void* armci_shmem_memat(armci_meminfo_t *meminfo) {
690 armci_die("armci_shmem_memat should never be called on this system",0);
691 }
armci_shmem_memctl(armci_meminfo_t * meminfo)692 void armci_shmem_memctl(armci_meminfo_t *meminfo) {
693 armci_die("armci_shmem_memctl should never be called on this system",0);
694 }
695 # ifdef MSG_COMMS_MPI
armci_shmem_malloc_group(void * ptr_arr[],armci_size_t bytes,ARMCI_Group * group)696 void armci_shmem_malloc_group(void *ptr_arr[], armci_size_t bytes,
697 ARMCI_Group *group) {
698 armci_die("armci_shmem_malloc_group should never be called on this system",0);
699 }
700 # endif
701
702 #endif
703
704
705 #ifdef ALLOW_PIN
reg_malloc(size_t size)706 void *reg_malloc(size_t size)
707 {
708 #ifdef PORTALS
709 char *ptr;
710 extern void *shmalloc(size_t);
711 ARMCI_PR_DBG("enter",0);
712 ptr = malloc(size);
713 #else
714 char *ptr;
715 ARMCI_PR_DBG("enter",0);
716 ptr = malloc(size);
717 #endif
718 armci_region_register_loc(ptr,size);
719 ARMCI_PR_DBG("exit",0);
720 return(ptr);
721 }
722 #endif
723
724
725 /* public constructor to initialize the kr_malloc context */
armci_krmalloc_init_localmem()726 void armci_krmalloc_init_localmem() {
727 #if defined(ALLOW_PIN)
728 kr_malloc_init(0, 0, 0, reg_malloc, 0, &ctx_localmem);
729 kr_malloc_init(0, 0, 0, malloc, 0, &ctx_mlocalmem);
730 ctx_mlocalmem.ctx_type = KR_CTX_LOCALMEM;
731 #elif defined(CRAY_SHMEM) && defined(CRAY_XT)
732 # ifdef CATAMOUNT
733 int units_avail = (cnos_shmem_size() - 1024 * 1024) / SHM_UNIT;
734 # else
735 extern size_t get_xt_heapsize();
736 int units_avail = (get_xt_heapsize() - 1024 * 1024) / SHM_UNIT;
737 # endif
738
739 if(DEBUG_)
740 {
741 fprintf(stderr,"%d:krmalloc_init_localmem: symheap=%llu,units(%d)=%d\n",
742 armci_me, SHM_UNIT*units_avail, SHM_UNIT, units_avail);
743 }
744 kr_malloc_init(SHM_UNIT, units_avail, units_avail, shmalloc, 0,
745 &ctx_localmem);
746 armci_shmalloc_exchange_offsets(&ctx_localmem);
747 #else
748
749 kr_malloc_init(0, 0, 0, malloc, 0, &ctx_localmem);
750
751 #endif
752
753 ctx_localmem.ctx_type = KR_CTX_LOCALMEM;
754 }
755
756 /**
757 * Local Memory Allocation and Free
758 */
PARMCI_Malloc_local(armci_size_t bytes)759 void *PARMCI_Malloc_local(armci_size_t bytes) {
760 #if defined(PORTALS)
761 void *rptr;
762 #endif
763 ARMCI_PR_DBG("enter",0);
764 #if defined(PORTALS)
765 rptr=kr_malloc((size_t)bytes, &ctx_mlocalmem);
766 ARMCI_PR_DBG("exit",0);
767 return rptr;
768 #else
769 ARMCI_PR_DBG("exit",0);
770 return (void *)kr_malloc((size_t)bytes, &ctx_localmem);
771 #endif
772 }
773
PARMCI_Free_local(void * ptr)774 int PARMCI_Free_local(void *ptr) {
775 ARMCI_PR_DBG("enter",0);
776 #if defined(PORTALS)
777 kr_free((char *)ptr, &ctx_mlocalmem);
778 #else
779 kr_free((char *)ptr, &ctx_localmem);
780 #endif
781 ARMCI_PR_DBG("exit",0);
782 return 0;
783 }
784
785 #ifdef REGION_ALLOC
786 static context_t ctx_region_shmem;
787 static long *reg_pids=NULL;
788
armci_region_shm_malloc(void * ptr_arr[],size_t bytes)789 void armci_region_shm_malloc(void *ptr_arr[], size_t bytes)
790 {
791 long size=bytes;
792 void *ptr;
793 int i, peers=armci_clus_last-armci_clus_first+1;
794 extern void* armci_region_getcore(size_t);
795 extern int armci_region_register(int p, void **pinout, long pid, size_t bytes);
796
797 if(!reg_pids){
798 kr_malloc_init(0,0,500*1024*1024, armci_region_getcore, 0,
799 &ctx_region_shmem);
800 reg_pids = (long*)calloc(peers,sizeof(long));
801 reg_pids[armci_me -armci_clus_first] = getpid();
802 armci_msg_gop_scope(SCOPE_NODE,reg_pids, peers,"+",ARMCI_LONG);
803 }
804
805 ptr=kr_malloc((size_t)size, &ctx_region_shmem);
806 if(bytes) if(!ptr) armci_die("armci_region_shm_malloc: failed",bytes);
807
808 bzero((char*)ptr_arr,armci_nproc*sizeof(void*));
809 ptr_arr[armci_me] = ptr;
810
811 /* now combine individual addresses into a single array */
812 armci_exchange_address(ptr_arr, armci_nproc);
813
814 for(i=0; i<peers; i++)
815 if(i+armci_clus_first == armci_me) continue;
816 else if(ptr_arr[i+armci_clus_first])armci_region_register(i,ptr_arr+i+armci_clus_first,reg_pids[i], bytes);
817 }
818
819 #ifdef MSG_COMMS_MPI
armci_region_shm_malloc_grp(void * ptr_arr[],size_t bytes,ARMCI_Group * group)820 void armci_region_shm_malloc_grp(void *ptr_arr[], size_t bytes, ARMCI_Group *group)
821 {
822 long size=bytes;
823 void *ptr;
824 armci_grp_attr_t *grp_attr=ARMCI_Group_getattr(group);
825 int grp_me, grp_nproc, grp_clus_me=grp_attr->grp_clus_me;
826 int i, peers=grp_attr->grp_clus_info[grp_clus_me].nslave;
827 int grp_clus_first=grp_attr->grp_clus_info[grp_clus_me].master;
828 extern void* armci_region_getcore(size_t);
829 extern int armci_region_register(int p, void **pinout, long pid, size_t bytes);
830
831 ARMCI_Group_rank(group, &grp_me);
832 ARMCI_Group_size(group, &grp_nproc);
833
834 ptr=kr_malloc((size_t)size, &ctx_region_shmem);
835 if(bytes) if(!ptr) armci_die("armci_region_shm_malloc_grp: failed",bytes);
836
837 bzero((char*)ptr_arr,grp_nproc*sizeof(void*));
838 ptr_arr[grp_me] = ptr;
839
840 /* now combine individual addresses into a single array */
841 armci_exchange_address_grp(ptr_arr, grp_nproc, group);
842
843 for(i=0; i<peers; i++)
844 if(i+grp_clus_first == grp_me) continue;
845 else if(ptr_arr[i+grp_clus_first]) {
846 int p;
847 /* get global peer id within SMP */
848 p = (ARMCI_Absolute_id(group, grp_clus_first+i) - armci_clus_first);
849 armci_region_register(p,ptr_arr+i+grp_clus_first,reg_pids[p], bytes);
850 }
851 }
852 #endif /* ifdef MSG_COMMS_MPI */
853 #endif
854
855
856 /*\ Collective Memory Allocation
857 * returns array of pointers to blocks of memory allocated by everybody
858 * Note: as the same shared memory region can be mapped at different locations
859 * in each process address space, the array might hold different values
860 * on every process. However, the addresses are legitimate
861 * and can be used in the ARMCI data transfer operations.
862 * ptr_arr[nproc]
863 \*/
PARMCI_Malloc(void * ptr_arr[],armci_size_t bytes)864 int PARMCI_Malloc(void *ptr_arr[], armci_size_t bytes)
865 {
866 void *ptr;
867 ARMCI_PR_DBG("enter",0);
868 if(DEBUG_){
869 fprintf(stderr,"%d bytes in armci_malloc %d\n",armci_me, (int)bytes);
870 fflush(stderr);
871 armci_msg_barrier();
872 }
873
874 #ifdef REGION_ALLOC
875 armci_region_shm_malloc(ptr_arr, bytes);
876 #else
877 # ifdef USE_MALLOC
878 if(armci_nproc == 1) {
879 ptr = kr_malloc((size_t) bytes, &ctx_localmem);
880 if(bytes) if(!ptr) armci_die("armci_malloc:malloc 1 failed",(int)bytes);
881 ptr_arr[armci_me] = ptr;
882 ARMCI_PR_DBG("exit",0);
883 return (0);
884 }
885 # endif
886
887 # ifdef SGIALTIX
888 if( ARMCI_Uses_shm() ) armci_altix_shm_malloc(ptr_arr,bytes);
889 # else
890 if( ARMCI_Uses_shm() ) armci_shmem_malloc(ptr_arr,bytes);
891 # endif
892 else {
893 /* on distributed-memory systems just malloc & collect all addresses */
894 ptr = kr_malloc(bytes, &ctx_localmem);
895 if(bytes) if(!ptr) armci_die("armci_malloc:malloc 2 failed",bytes);
896
897 bzero((char*)ptr_arr,armci_nproc*sizeof(void*));
898 ptr_arr[armci_me] = ptr;
899
900 # if defined(CRAY_SHMEM)
901 armci_shmalloc_exchange_address(ptr_arr);
902 # else
903
904 /* now combine individual addresses into a single array */
905 armci_exchange_address(ptr_arr, armci_nproc);
906 # endif
907 # ifdef ALLOW_PIN
908 armci_global_region_exchange(ptr, (long) bytes);
909 # endif
910 }
911 #endif
912 ARMCI_PR_DBG("exit",0);
913 return(0);
914 }
915
916 /*\
917 * Just a wrapper on PARMCI_Malloc to keep old code from breaking
918 \*/
PARMCI_Malloc_memdev(void * ptr_arr[],armci_size_t bytes,const char * device)919 int PARMCI_Malloc_memdev(void *ptr_arr[], armci_size_t bytes, const char *device)
920 {
921 return PARMCI_Malloc(ptr_arr,bytes);
922 }
923
924
925
926 /*\ shared memory is released to kr_malloc only on process 0
927 * with data server malloc cannot be used
928 \*/
PARMCI_Free(void * ptr)929 int PARMCI_Free(void *ptr)
930 {
931 ARMCI_PR_DBG("enter",0);
932 if(!ptr)return 1;
933
934 #ifndef SGIALTIX
935 # ifdef REGION_ALLOC
936 kr_free(ptr, &ctx_region_shmem);
937 # else
938
939 # if (defined(SYSV) || defined(WIN32) || defined(MMAP)) && !defined(NO_SHM)
940 # ifdef USE_MALLOC
941 if(armci_nproc > 1)
942 # endif
943 if(ARMCI_Uses_shm()){
944 if(armci_me==armci_master){
945 # ifdef RMA_NEEDS_SHMEM
946 Free_Shmem_Ptr(0,0,ptr);
947 # else
948 if(armci_clus_info[armci_clus_me].nslave>1)
949 Free_Shmem_Ptr(0,0,ptr);
950 else kr_free(ptr, &ctx_localmem);
951 # endif
952 }
953 ptr = NULL;
954 return 0;
955 }
956 # endif
957 kr_free(ptr, &ctx_localmem);
958 # endif /* REGION_ALLOC */
959 #else
960 /* Altix */
961 if( ARMCI_Uses_shm() ) kr_free(ptr, &altix_ctx_shmem);
962 else kr_free(ptr, &ctx_localmem);
963 #endif
964
965 ptr = NULL;
966 ARMCI_PR_DBG("exit",0);
967 return 0;
968 }
969
PARMCI_Free_memdev(void * ptr)970 int PARMCI_Free_memdev(void *ptr)
971 {
972 return PARMCI_Free(ptr);
973 }
974
975
ARMCI_Uses_shm()976 int ARMCI_Uses_shm()
977 {
978 int uses=0;
979
980 #if (defined(SYSV) || defined(WIN32) || defined(MMAP) ||defined(HITACHI)) && !defined(NO_SHM)
981 # ifdef RMA_NEEDS_SHMEM
982 if(armci_nproc >1) uses= 1; /* always unless serial mode */
983 # else
984 if(armci_nproc != armci_nclus)uses= 1; /* only when > 1 node used */
985 # endif
986 #endif
987 if(DEBUG_) fprintf(stderr,"%d:uses shmem %d\n",armci_me, uses);
988 return uses;
989 }
990 #ifdef MSG_COMMS_MPI
991
ARMCI_Uses_shm_grp(ARMCI_Group * group)992 int ARMCI_Uses_shm_grp(ARMCI_Group *group)
993 {
994 int uses=0, grp_me, grp_nproc;
995 /*int grp_nclus;*/
996 /*armci_grp_attr_t *grp_attr=ARMCI_Group_getattr(group);*/
997 ARMCI_PR_DBG("enter",0);
998
999 ARMCI_Group_size(group, &grp_nproc);
1000 ARMCI_Group_rank(group, &grp_me);
1001
1002 #if (defined(SYSV) || defined(WIN32) || defined(MMAP) ||defined(HITACHI)) && !defined(NO_SHM)
1003 # ifdef RMA_NEEDS_SHMEM
1004 if(grp_nproc >1) uses= 1; /* always unless serial mode */
1005 # else
1006 #if 0
1007 grp_nclus = grp_attr->grp_nclus;
1008 if(grp_nproc != grp_nclus)uses= 1; /* only when > 1 node used */
1009 #else
1010 if(armci_nproc != armci_nclus)uses= 1; /* only when > 1 node used */
1011 #endif
1012 # endif
1013 #endif
1014 if(DEBUG_) fprintf(stderr,"%d (grp_id=%d):uses shmem %d\n",armci_me, grp_me, uses);
1015 ARMCI_PR_DBG("exit",0);
1016 return uses;
1017 }
1018
1019 /*\ ************** Begin Group Collective Memory Allocation ******************
1020 * returns array of pointers to blocks of memory allocated by everybody
1021 * Note: as the same shared memory region can be mapped at different locations
1022 * in each process address space, the array might hold different values
1023 * on every process. However, the addresses are legitimate
1024 * and can be used in the ARMCI data transfer operations.
1025 * ptr_arr[nproc]
1026 \*/
ARMCI_Malloc_group(void * ptr_arr[],armci_size_t bytes,ARMCI_Group * group)1027 int ARMCI_Malloc_group(void *ptr_arr[], armci_size_t bytes,
1028 ARMCI_Group *group)
1029 {
1030 void *ptr;
1031 int grp_me, grp_nproc;
1032 ARMCI_PR_DBG("enter",0);
1033 ARMCI_Group_size(group, &grp_nproc);
1034 ARMCI_Group_rank(group, &grp_me);
1035 if(DEBUG_)fprintf(stderr,"%d (grp_id=%d) bytes in armci_malloc_group %d\n",
1036 armci_me, grp_me, (int)bytes);
1037 #ifdef REGION_ALLOC
1038 armci_region_shm_malloc_grp(ptr_arr, bytes, group);
1039 #else
1040 #ifdef USE_MALLOC
1041 if(grp_nproc == 1) {
1042 ptr = kr_malloc((size_t) bytes, &ctx_localmem);
1043 if(bytes) if(!ptr) armci_die("armci_malloc_group:malloc 1 failed",(int)bytes);
1044 ptr_arr[grp_me] = ptr;
1045 ARMCI_PR_DBG("exit",0);
1046 return (0);
1047 }
1048 #endif
1049
1050 if( ARMCI_Uses_shm_grp(group) ) {
1051 # ifdef SGIALTIX
1052 armci_altix_shm_malloc_group(ptr_arr,bytes,group);
1053 # else
1054 armci_shmem_malloc_group(ptr_arr,bytes,group);
1055 # endif
1056 }
1057 else {
1058 /* on distributed-memory systems just malloc & collect all addresses */
1059 ptr = kr_malloc(bytes, &ctx_localmem);
1060 if(bytes) if(!ptr) armci_die("armci_malloc:malloc 2 failed",bytes);
1061
1062 bzero((char*)ptr_arr,grp_nproc*sizeof(void*));
1063 ptr_arr[grp_me] = ptr;
1064
1065 /* now combine individual addresses into a single array */
1066 #if defined(CRAY_SHMEM)
1067 armci_shmalloc_exchange_address_grp(ptr_arr, group);
1068 #else
1069 armci_exchange_address_grp(ptr_arr, grp_nproc, group);
1070 #endif
1071
1072 # ifdef ALLOW_PIN
1073 # if 0
1074 /* ????? check ?????? */
1075 armci_die("armci_malloc_group: Not yet implemented", 0);
1076 armci_global_region_exchange(ptr, (long) bytes);
1077 # endif
1078 # endif
1079 }
1080 #endif
1081 ARMCI_PR_DBG("exit",0);
1082 return(0);
1083 }
1084 /*\
1085 * Just a wrapper on ARMCI_Malloc_group to keep old code from breaking
1086 \*/
ARMCI_Malloc_group_memdev(void * ptr_arr[],armci_size_t bytes,ARMCI_Group * group,const char * device)1087 int ARMCI_Malloc_group_memdev(void *ptr_arr[], armci_size_t bytes,
1088 ARMCI_Group *group, const char *device)
1089 {
1090 return ARMCI_Malloc_group(ptr_arr, bytes, group);
1091 }
1092
1093
1094 /*\ shared memory is released to kr_malloc only on process 0
1095 * with data server malloc cannot be used
1096 \*/
ARMCI_Free_group(void * ptr,ARMCI_Group * group)1097 int ARMCI_Free_group(void *ptr, ARMCI_Group *group)
1098 {
1099 int grp_me, grp_nproc, grp_master, grp_clus_me;
1100 armci_grp_attr_t *grp_attr=ARMCI_Group_getattr(group);
1101 ARMCI_PR_DBG("enter",0);
1102
1103 if(!ptr)return 1;
1104
1105 ARMCI_Group_size(group, &grp_nproc);
1106 ARMCI_Group_rank(group, &grp_me);
1107 if(grp_me == MPI_UNDEFINED) { /* check if the process is in this group */
1108 armci_die("armci_malloc_group: process is not a member in this group",
1109 armci_me);
1110 }
1111 /* get the group cluster info */
1112 grp_clus_me = grp_attr->grp_clus_me;
1113 grp_master = grp_attr->grp_clus_info[grp_clus_me].master;
1114
1115 #ifndef SGIALTIX
1116 #ifdef REGION_ALLOC
1117 kr_free(ptr, &ctx_region_shmem);
1118 #else
1119 # if (defined(SYSV) || defined(WIN32) || defined(MMAP)) && !defined(NO_SHM)
1120 # ifdef USE_MALLOC
1121 if(grp_nproc > 1)
1122 # endif
1123 if(ARMCI_Uses_shm_grp(group)){
1124 if(grp_me == grp_master) {
1125 # ifdef RMA_NEEDS_SHMEM
1126 Free_Shmem_Ptr(0,0,ptr);
1127 # else
1128 if(armci_clus_info[armci_clus_me].nslave>1) Free_Shmem_Ptr(0,0,ptr);
1129 else kr_free(ptr, &ctx_localmem);
1130 # endif
1131 }
1132 ptr = NULL;
1133 ARMCI_PR_DBG("exit",0);
1134 return 0;
1135 }
1136 # endif
1137 kr_free(ptr, &ctx_localmem);
1138 #endif /* ifdef REGION_ALLOC */
1139 #else /* SGI Altix */
1140 if(ARMCI_Uses_shm_grp(group))
1141 kr_free(ptr, &altix_ctx_shmem_grp);
1142 else kr_free(ptr, &ctx_localmem);
1143
1144 #endif /* SGIALTIX */
1145
1146 ptr = NULL;
1147 ARMCI_PR_DBG("exit",0);
1148 return 0;
1149 }
1150 /* ***************** End Group Collective Memory Allocation ******************/
1151 #endif
1152
1153 /* ************** Begin Non-Collective Memory Allocation ******************
1154 * Prototype similar to SysV shared memory.
1155 */
1156
1157 /**
1158 * CHECK: On Altix we are forced to use SysV as shmalloc is collective. We
1159 * may use a preallocated shmalloc memory, however, it may NOT still solve
1160 * our problem...
1161 * NOTE: "int memflg" option for future optimiztions.
1162 */
PARMCI_Memget(size_t bytes,armci_meminfo_t * meminfo,int memflg)1163 void PARMCI_Memget(size_t bytes, armci_meminfo_t *meminfo, int memflg) {
1164
1165 void *myptr=NULL;
1166 void *armci_ptr=NULL; /* legal ARCMI ptr used in ARMCI data xfer ops*/
1167 size_t size = bytes;
1168
1169 if(size<=0) armci_die("PARMCI_Memget: size must be > 0", (int)size);
1170 if(meminfo==NULL) armci_die("PARMCI_Memget: Invalid arg #2 (NULL ptr)",0);
1171 if(memflg!=0) armci_die("PARMCI_Memget: Invalid memflg", memflg);
1172
1173 if( !ARMCI_Uses_shm() )
1174 {
1175 armci_ptr = myptr = kr_malloc(size, &ctx_localmem);
1176 if(size) if(!myptr) armci_die("PARMCI_Memget failed", (int)size);
1177
1178 /* fill the meminfo structure */
1179 meminfo->armci_addr = armci_ptr;
1180 meminfo->addr = myptr;
1181 meminfo->size = size;
1182 meminfo->cpid = armci_me;
1183 /* meminfo->attr = NULL; */
1184 }
1185 else
1186 {
1187 armci_shmem_memget(meminfo, size);
1188 }
1189
1190 #ifdef ALLOW_PIN
1191 # if 0 /* disabled for now. May not go thru' the fast zero-copy path */
1192 if(armci_nclus>1)
1193 armci_global_region_exchange(meminfo->addr, (long)size_arr[armci_me]);
1194 # endif
1195 #endif
1196
1197 if(DEBUG_){
1198 printf("%d: PARMCI_Memget: addresses server=%p myptr=%p bytes=%ld\n",
1199 armci_me, meminfo->armci_addr, meminfo->addr, (long)bytes);
1200 fflush(stdout);
1201 }
1202 }
1203
PARMCI_Memat(armci_meminfo_t * meminfo,long offset)1204 void* PARMCI_Memat(armci_meminfo_t *meminfo, long offset) {
1205 void *ptr=NULL;
1206
1207 if(meminfo==NULL) armci_die("PARMCI_Memat: Invalid arg #1 (NULL ptr)",0);
1208
1209 if(meminfo->cpid==armci_me) { ptr = meminfo->addr; return ptr; }
1210
1211 if( !ARMCI_Uses_shm())
1212 {
1213 ptr = meminfo->addr;
1214 }
1215 else
1216 {
1217 ptr = armci_shmem_memat(meminfo);
1218 ptr = ((char*)ptr) + offset;
1219 }
1220
1221 if(DEBUG_)
1222 {
1223 printf("%d:PARMCI_Memat: attached addr mptr=%p size=%ld\n",
1224 armci_me, ptr, (long)meminfo->size); fflush(stdout);
1225 }
1226
1227 return ptr;
1228 }
1229
ARMCI_Memdt(armci_meminfo_t * meminfo,long offset)1230 void ARMCI_Memdt(armci_meminfo_t *meminfo, long offset) {
1231 /**
1232 * Do nothing. May be we need to have reference counting in future. This
1233 * is to avoid the case of dangling pointers when the creator of shm
1234 * segment calls Memctl and other processes are still attached to this
1235 * segment
1236 */
1237 }
1238
ARMCI_Memctl(armci_meminfo_t * meminfo)1239 void ARMCI_Memctl(armci_meminfo_t *meminfo) {
1240
1241 if(meminfo==NULL) armci_die("PARMCI_Memget: Invalid arg #2 (NULL ptr)",0);
1242
1243 /* only the creator can delete the segment */
1244 if(meminfo->cpid == armci_me)
1245 {
1246 if( !ARMCI_Uses_shm() )
1247 {
1248 void *ptr = meminfo->addr;
1249 kr_free(ptr, &ctx_localmem);
1250 }
1251 else
1252 {
1253 armci_shmem_memctl(meminfo);
1254 }
1255 }
1256
1257 meminfo->addr = NULL;
1258 meminfo->armci_addr = NULL;
1259 /* if(meminfo->attr!=NULL) free(meminfo->attr); */
1260 }
1261
1262 /* ***************** End Non-Collective Memory Allocation ******************/
1263
1264