1 #if HAVE_CONFIG_H
2 # include "config.h"
3 #endif
4
5 /* $Id: regions.c,v 1.14.2.3 2007-10-05 16:51:51 manoj Exp $ interface to keep track of memory regions accross the cluster */
6 /*
7 * armci_region_init - allocates list of regions, initialization
8 * armci_region_register_shm - registers shared memory on the current node
9 * armci_region_register_loc - registers local memory
10 * armci_region_clus_record - stores info on pinned region on a given cluster node
11 * armci_region_clus_found - returns 1/0 if specified memory is registered
12 * armci_region_loc_found - same for local memory
13 * armci_region_loc_both_found - returns 1 if local and remote are found, otherwise 0
14 *
15 */
16
17
18 /*7/7/06
19 * REGIONS REQUIRE MEMHDL was for all networks like via, infiniband, etc..
20 * which had a handle associated with remote/local memory required for
21 * rdma. Coincidentally all these networks also used a server thread.
22 * so server_regions were allocated and enabled when REGIONS_REQUIRE_MEMHDL
23 * was defined.
24 * With Catamount, we require portals memory descriptors to be stored
25 * there is no server but we still need the server_regions to post match all
26 * md to accept all incomming requests
27 */
28
29 #include "armcip.h"
30 #if HAVE_STDLIB_H
31 # include <stdlib.h>
32 #endif
33 #if HAVE_STDIO_H
34 # include <stdio.h>
35 #endif
36 #include "copy.h"
37
38 /*this should match similar def in portals.c vapi.c and openib.c */
39 #ifdef PORTALS
40 # define MAX_REGIONS 24
41 #else
42 # define MAX_REGIONS 8
43 #endif
44
45 typedef struct {
46 void *start;
47 void *end;
48 #ifdef REGIONS_REQUIRE_MEMHDL
49 ARMCI_MEMHDL_T memhdl;
50 #endif
51 } armci_region_t;
52
53 typedef struct {
54 long n;
55 armci_region_t list[MAX_REGIONS];
56 } armci_reglist_t;
57
58 static int allow_pin=0;
59 static armci_reglist_t *clus_regions; /* cluster shared/remote memory */
60 #ifdef REGIONS_REQUIRE_MEMHDL
61 static armci_reglist_t *serv_regions; /* server regions */
62 #endif
63 static armci_reglist_t loc_regions_arr; /* local memory */
64 static void *needs_pin_shmptr=NULL, *needs_pin_ptr=NULL;
65 static size_t needs_pin_shmsize=0, needs_pin_size=0;
66
67 #ifdef REGIONS_REQUIRE_MEMHDL
68 extern int armci_pin_contig_hndl(void *ptr, size_t bytes, ARMCI_MEMHDL_T *memhdl);
69 extern int armci_server_pin_contig(void *ptr, size_t bytes, ARMCI_MEMHDL_T *memhdl);
70 #else
71 extern int armci_pin_contig1(void *ptr, size_t bytes);
72 #endif
73 static void **exch_list=(void**)0;
74 static char exch_loc[MAX_REGIONS];
75 static char exch_rem[MAX_REGIONS];
76 void armci_serv_register_req(void *start,long bytes, ARMCI_MEMHDL_T *reg_mem);
77
78
armci_region_record(void * start,void * end,armci_reglist_t * reg)79 static int armci_region_record(void *start, void *end, armci_reglist_t *reg)
80 {
81 int cur=reg->n;
82 #ifdef DEBUG_
83 int i;
84 for(i=0; i<reg->n; i++)
85 if(reg->start >= start && reg->end < start)
86 armci_die("armci_region_record: already recorded",i);
87 #endif
88 if(reg->n >= MAX_REGIONS) return 0;
89 (reg->list+cur)->start = start;
90 (reg->list+cur)->end = end;
91 reg->n++;
92 return 1;
93 }
94
armci_region_register(void * start,long size,armci_reglist_t * reg)95 static void armci_region_register(void *start, long size, armci_reglist_t *reg)
96 {
97 #ifdef REGIONS_REQUIRE_MEMHDL
98 # if defined(PORTALS)
99 int regid = reg->n;
100 # endif
101 #endif
102 ARMCI_PR_DBG("enter",0);
103 if(reg->n >= MAX_REGIONS) return;
104 if(armci_nclus<=1)return;
105
106 #ifdef REGIONS_REQUIRE_MEMHDL
107 # if defined(PORTALS)
108 /*we really shouldn't have network specific ifdef's here but this is an
109 * exception to avoid significant code change in the portals layer
110 * ARMCI portals layer maintains a list of memory descriptors for each
111 * region allocated. It uses them in a round robin fashion. We store it
112 * in the memhdl to identify which memory region the memory used by a
113 * communication call corresponds to.
114 */
115 (reg->list+(regid))->memhdl.regid=regid;
116 # endif
117
118 if(!armci_pin_contig_hndl(start, size, &((reg->list+reg->n)->memhdl))){
119 printf("%d pin failed %p bytes=%ld\n",armci_me,start,size);
120 fflush(stdout); return;
121 }
122 #else
123 if(!armci_pin_contig1(start, size)){
124 printf("%d pin failed %p bytes=%ld\n",armci_me,start,size);
125 fflush(stdout); return;
126 }
127 #endif
128
129 (void) armci_region_record(start,((char*)start)+size,reg);
130 }
131
132
armci_region_register_shm(void * start,long size)133 void armci_region_register_shm(void *start, long size)
134 {
135 #ifdef PORTALS
136 armci_reglist_t *reg = clus_regions+armci_clus_me;
137 #endif
138 if(allow_pin)
139 armci_region_register(start, size, clus_regions+armci_clus_me);
140 else{
141 needs_pin_shmptr = start;
142 needs_pin_shmsize= size;
143 }
144
145 #ifdef PORTALS
146 /* we mark the region as local region so that portals layer uses
147 * the md from memhdl instead of any region list*/
148 (reg->list+(reg->n-1))->memhdl.islocal=0;
149 #endif
150
151 #if 0
152 if(allow_pin){
153 printf("\n%d:%d registering shm %p bytes=%ld\n",armci_me,allow_pin,start,size);
154 fflush(stdout);
155 }
156 #endif
157 }
158
159
armci_region_register_loc(void * start,long size)160 void armci_region_register_loc(void *start, long size)
161 {
162 #ifdef PORTALS
163 armci_reglist_t *reg = &loc_regions_arr;
164 #endif
165 if(allow_pin)armci_region_register(start, size, &loc_regions_arr);
166 else{
167 needs_pin_ptr = start;
168 needs_pin_size= size;
169 }
170
171 #ifdef PORTALS
172 {
173 extern int _armci_malloc_local_region;
174 if(_armci_malloc_local_region){
175 (reg->list+(reg->n-1))->memhdl.islocal=1;
176 _armci_malloc_local_region=0;
177 }
178 else
179 (reg->list+(reg->n-1))->memhdl.islocal=0;
180 }
181 #endif
182
183 #ifdef DEBUG_
184 if(allow_pin){
185 printf("\n%d:%d registered local %p bytes=%ld\n",armci_me,allow_pin,start,size);
186 fflush(stdout);
187 }
188 #endif
189 }
190
191
armci_region_clus_record(int node,void * start,long size)192 void armci_region_clus_record(int node, void *start, long size)
193 {
194 if(node > armci_nclus || node <0 )
195 armci_die("armci_region_remote: bad node ",node);
196
197 (void)armci_region_record(start,((char*)start)+size,clus_regions+node);
198 }
199
200
armci_region_init()201 void armci_region_init()
202 {
203 ARMCI_PR_DBG("enter",0);
204 allow_pin =1;
205 clus_regions=(armci_reglist_t*)calloc(armci_nclus,sizeof(armci_reglist_t));
206 if(!clus_regions)armci_die("armci_region_init: calloc failed",armci_nclus);
207 #ifdef REGIONS_REQUIRE_MEMHDL
208 serv_regions=(armci_reglist_t*)calloc(armci_nclus,sizeof(armci_reglist_t));
209 if(!serv_regions)armci_die("armci_region_init: calloc failed",armci_nclus);
210 #endif
211 exch_list = (void**)calloc(2*armci_nclus, sizeof(void*));
212 if(!exch_list) armci_die("armci_region_init: calloc 2 failed",armci_nclus);
213 bzero(exch_loc,sizeof(exch_loc));
214 bzero(exch_rem,sizeof(exch_rem));
215
216 if(needs_pin_ptr) armci_region_register_loc(needs_pin_ptr,needs_pin_size);
217 if(needs_pin_shmptr) armci_region_register_shm(needs_pin_shmptr,needs_pin_shmsize);
218 ARMCI_PR_DBG("exit",0);
219 }
220
armci_region_destroy()221 void armci_region_destroy()
222 {
223 armci_reglist_t *reg = &loc_regions_arr;
224 int i;
225 #ifdef REGIONS_REQUIRE_MEMHDL
226 ARMCI_MEMHDL_T *loc_memhdl;
227 #endif
228 if(!allow_pin) return;
229 for(i=0; i<reg->n; i++){
230 #ifdef REGIONS_REQUIRE_MEMHDL
231 loc_memhdl=&((reg->list+i)->memhdl);
232 armci_network_client_deregister_memory(loc_memhdl);
233 #endif
234 }
235
236 reg=clus_regions+armci_clus_me;
237 for(i=0; i<reg->n; i++){
238 #ifdef REGIONS_REQUIRE_MEMHDL
239 loc_memhdl=&((reg->list+i)->memhdl);
240 armci_network_client_deregister_memory(loc_memhdl);
241 #endif
242 }
243 }
244
armci_server_region_destroy()245 void armci_server_region_destroy()
246 {
247 armci_reglist_t *reg;
248 int i;
249 #ifdef REGIONS_REQUIRE_MEMHDL
250 ARMCI_MEMHDL_T *loc_memhdl;
251 ARMCI_PR_DBG("enter",0);
252 reg=serv_regions+armci_clus_me;
253 for(i=0; i<reg->n; i++){
254 loc_memhdl=&((reg->list+i)->memhdl);
255 armci_network_server_deregister_memory(loc_memhdl);
256 }
257 ARMCI_PR_DBG("exit",0);
258 #endif
259 }
260
261
armci_region_clus_found(int node,void * start,int size)262 int armci_region_clus_found(int node, void *start, int size)
263 {
264 armci_reglist_t *reg=clus_regions+node;
265 int i,found=-1;
266 if(!allow_pin) return 0;
267 if(node > armci_nclus || node <0 )
268 armci_die("armci_region_clus_found: bad node ",node);
269 for(i=0; i<reg->n; i++)
270 if((reg->list+i)->start <= start && (reg->list+i)->end > start){
271 found=i; break;
272 }
273
274 return(found);
275 }
276
armci_region_loc_found(void * start,int size)277 int armci_region_loc_found(void *start, int size)
278 {
279 armci_reglist_t *reg = &loc_regions_arr;
280 int i,found=-1;
281 ARMCI_PR_DBG("enter",0);
282 if(!allow_pin) return 0;
283 for(i=0; i<reg->n; i++)
284 if((reg->list+i)->start <= start && (reg->list+i)->end > start){
285 found=i;
286 break;
287 }
288 #ifdef DEBUG
289 if(found){
290 printf("%d: found loc %d n=%ld (%p,%p) %p\n",armci_me,found,reg->n,
291 (reg->list)->start,(reg->list)->end, start); fflush(stdout);
292 }
293 #endif
294 ARMCI_PR_DBG("exit",0);
295 return(found);
296 }
297
298 #ifdef REGIONS_REQUIRE_MEMHDL
armci_region_both_found_hndl(void * loc,void * rem,int size,int node,ARMCI_MEMHDL_T ** loc_memhdl,ARMCI_MEMHDL_T ** rem_memhdl)299 int armci_region_both_found_hndl(void *loc, void *rem, int size, int node,
300 ARMCI_MEMHDL_T **loc_memhdl,ARMCI_MEMHDL_T **rem_memhdl)
301 {
302 armci_reglist_t *reg = &loc_regions_arr;
303 int i,found=0;
304 if(!allow_pin) return 0;
305 /* first scan for local */
306 for(i=0; i<reg->n; i++){
307 if((reg->list+i)->start <= loc && (reg->list+i)->end > loc){
308 #if 0
309 printf("\n%d:loc found %d %p\n",armci_me,i,loc);
310 #endif
311 found=1; break;
312 }
313 #if 0
314 else {
315 printf("\n%d: loc ptr=%p st=%p end=%p size=%d\n",armci_me,loc,
316 (reg->list+i)->start,(reg->list+i)->end,size);
317 fflush(stdout);
318 }
319 #endif
320 }
321
322 if(!found){ /* might be local shared */
323 reg=clus_regions+armci_clus_me;
324 for(i=0; i<reg->n; i++){
325 if((reg->list+i)->start <= loc && (reg->list+i)->end > loc){
326 found=1; break;
327 }
328 #if 0
329 else {
330 printf("\n%d:clus ptr=%p st=%p end=%p size=%d\n",armci_me,loc,
331 (reg->list+i)->start,(reg->list+i)->end,size);
332 fflush(stdout);
333 }
334 #endif
335 }
336 }
337
338 #ifdef PORTALS
339 if(found!=1){
340 *loc_memhdl=NULL;
341 found=1;
342 }
343 #else
344 if(!found) return 0;
345 #endif
346 else {*loc_memhdl=&((reg->list+i)->memhdl);}
347
348 /* now check remote shared */
349 reg=serv_regions+node;
350 for(i=0; i<reg->n; i++){
351 if((reg->list+i)->start <= rem && (reg->list+i)->end > rem){
352 #if 0
353 printf("\n%d: serv found %d %p %p\n",armci_me,i,rem,(reg->list+i)->start);
354 #endif
355 found=2;break;
356 }
357 #if 0
358 else {
359 printf("\n%d: serv ptr=%p st=%p end=%p size=%d nd=%d\n",armci_me,rem,
360 (reg->list+i)->start,(reg->list+i)->end,size,node);
361 fflush(stdout);
362 }
363 #endif
364 }
365
366 if(0){
367 if(found==2){printf("%d: found both %d %p\n",
368 armci_me,node,(void*)*loc_memhdl);
369 fflush(stdout);
370 }
371 }
372 if(found==2){*rem_memhdl=&((reg->list+i)->memhdl); return 1;}
373 else return 0;
374 }
375
376
armci_region_remote_found_hndl(void * rem,int size,int node,ARMCI_MEMHDL_T ** remhdl)377 int armci_region_remote_found_hndl(void *rem,int size, int node, ARMCI_MEMHDL_T **remhdl)
378 {
379 armci_reglist_t *reg = serv_regions+node;
380 int i,found=0;
381
382 for(i=0; i<reg->n; i++)
383 if((reg->list+i)->start <= rem && (reg->list+i)->end > rem){
384 found=1;break;
385 }
386
387 if(found==1){*remhdl=&((reg->list+i)->memhdl); return 1;}
388 else return 0;
389
390 }
391
get_armci_region_local_hndl(void * loc,int node,ARMCI_MEMHDL_T ** loc_memhdl)392 int get_armci_region_local_hndl(void *loc, int node,ARMCI_MEMHDL_T **loc_memhdl)
393 {
394
395 armci_reglist_t *reg = &loc_regions_arr;
396 int i, found = 0;
397
398 if(!allow_pin) {printf("inside get_armci_region_local_hndl : case allow_pin = 0\n"); return 0; }
399 if(!found){
400 reg = serv_regions+armci_clus_me;
401 for(i=0; i<reg->n; i++){
402 if((reg->list+i)->start <= loc && (reg->list+i)->end >loc){
403 found =1;
404 break;
405 }
406 #if 0
407 else {
408 printf("\n%d: serv ptr=%p st=%p end=%p nd=%d nreg=%d\n",armci_me,loc,
409 (reg->list+i)->start,(reg->list+i)->end,node,reg->n);
410 fflush(stdout);
411 }
412 #endif
413 }
414 }
415 if(found == 1){
416 *loc_memhdl = &((reg->list+i)->memhdl);
417 if(0){
418 printf("%d(s) : found local %p\n",armci_me,(void*)*loc_memhdl);
419 fflush(stdout);
420 }
421 return 1;
422 }
423 else return 0;
424 }
425
426
armci_region_serv_found(int node,void * start,int size)427 int armci_region_serv_found(int node,void *start,int size)
428 {
429 armci_reglist_t *reg=serv_regions+node;
430 int i,found=-1;
431 if(!allow_pin) return 0;
432 if(node > armci_nclus || node <0 )
433 armci_die("armci_region_serv_found: bad node ",node);
434 for(i=0; i<reg->n; i++)
435 if((reg->list+i)->start <= start && (reg->list+i)->end > start){found=i; break;}
436
437 return(found);
438 }
439
440 #endif
441
442
armci_region_both_found(void * loc,void * rem,int size,int node)443 int armci_region_both_found(void *loc, void *rem, int size, int node)
444 {
445 armci_reglist_t *reg = &loc_regions_arr;
446 int i,found=0;
447 if(!allow_pin) return 0;
448
449 /* first scan for local */
450 for(i=0; i<reg->n; i++)
451 if((reg->list+i)->start <= loc && (reg->list+i)->end > loc){found=1; break;}
452
453 if(!found){ /* might be local shared */
454 reg=clus_regions+armci_clus_me;
455 for(i=0; i<reg->n; i++)
456 if((reg->list+i)->start <= loc && (reg->list+i)->end > loc){found=1; break;}
457 }
458 if(!found) return 0;
459
460 /* now check remote shared */
461 reg=clus_regions+node;
462 for(i=0; i<reg->n; i++)
463 if((reg->list+i)->start <= rem && (reg->list+i)->end > rem){found=2;break;}
464
465 #if 0
466 if(found==2){printf("%d: found both %d\n",armci_me,node); fflush(stdout); }
467 #endif
468 if(found==2) return 1;
469 else return 0;
470 }
471
472
473
armci_region_exchange(void * start,long size)474 void armci_region_exchange(void *start, long size)
475 {
476 int found=0, i;
477 armci_region_t *reg=0;
478 #ifdef REGIONS_REQUIRE_MEMHDL
479 ARMCI_MEMHDL_T *hdlarr;
480
481 hdlarr = calloc(armci_nclus,sizeof(ARMCI_MEMHDL_T));
482 #endif
483 if(!allow_pin)return;
484 if(armci_nclus<=1)return;
485
486 found=armci_region_clus_found(armci_clus_me, start,size);
487 if(found>-1){
488 if(!exch_rem[found]){
489 reg = (clus_regions+armci_clus_me)->list+found;
490 exch_rem[found]=1;
491 }
492 }else{
493 found= armci_region_loc_found(start,size);
494 if(found>-1){
495 if(!exch_loc[found]){
496 reg = (&loc_regions_arr)->list+found;
497 exch_loc[found]=1;
498 }
499 }
500 }
501
502 bzero(exch_list,2*armci_nclus*sizeof(void *));
503 if( reg && (armci_me == armci_master)){
504 exch_list[2*armci_clus_me] = reg->start;
505 exch_list[2*armci_clus_me+1] = reg->end;
506 #ifdef REGIONS_REQUIRE_MEMHDL
507 armci_copy(®->memhdl,&hdlarr[armci_clus_me],sizeof(ARMCI_MEMHDL_T));
508 #endif
509 }
510
511 /* exchange info on new regions with other nodes */
512 armci_exchange_address(exch_list,2*armci_nclus);
513 #ifdef REGIONS_REQUIRE_MEMHDL
514 i = armci_nclus*sizeof(ARMCI_MEMHDL_T)/sizeof(int);
515 armci_msg_gop_scope(SCOPE_ALL,hdlarr,i,"+",ARMCI_INT);
516 #endif
517 for(i=0; i<armci_nclus; i++){
518 armci_reglist_t *r=clus_regions+i;
519 if(i==armci_clus_me) continue;
520 if(exch_list[2*i]){
521 #if 0
522 printf("%d recording clus=%d mem %p-%p n=%d\n",armci_me,i,exch_list[2*i],
523 exch_list[2*i+1],r->n);
524 fflush(stdout);
525 #endif
526
527 #ifdef REGIONS_REQUIRE_MEMHDL
528 armci_copy(&hdlarr[i],&(r->list+r->n)->memhdl,sizeof(ARMCI_MEMHDL_T));
529 #endif
530 armci_region_record(exch_list[2*i],exch_list[2*i+1], r);
531 }
532 }
533 }
534
535 /*\
536 * for server thread to know which region is what in cases where there is one
537 * process per node
538 \*/
armci_global_region_exchange(void * start,long size)539 void armci_global_region_exchange(void *start, long size)
540 {
541 #ifdef REGIONS_REQUIRE_MEMHDL
542 ARMCI_MEMHDL_T *hdlarr;
543
544 hdlarr = calloc(armci_nclus,sizeof(ARMCI_MEMHDL_T));
545 #endif
546 if(!allow_pin)return;
547 if(armci_nclus<=1)return;
548
549 armci_region_exchange(start,size);
550
551 #ifdef REGIONS_REQUIRE_MEMHDL
552 {
553 int foundclus=0, foundserv=0, i,loc=0;
554 armci_reglist_t *reglist=NULL,*clreglist=NULL;
555 armci_region_t *reg=NULL;
556
557 foundclus=armci_region_clus_found(armci_clus_me, start,size);
558 foundserv=armci_region_serv_found(armci_clus_me, start,size);
559 if(foundclus==-1){
560 foundclus = armci_region_loc_found(start,size);
561 loc=1;
562 }
563
564 if(foundclus!=-1 && foundserv==-1){
565 reglist = (serv_regions+armci_clus_me);
566 if(loc)
567 clreglist = &(loc_regions_arr);
568 else
569 clreglist = (clus_regions+armci_clus_me);
570 #if defined(DATA_SERVER) || defined(PORTALS)
571 # if defined(PORTALS)
572 ((reglist->list+reglist->n)->memhdl).regid=(reglist->n);
573 # endif
574 armci_serv_register_req((clreglist->list+foundclus)->start,((char *)(clreglist->list+foundclus)->end-(char *)((clreglist->list+foundclus)->start)),&((reglist->list+reglist->n)->memhdl));
575 #endif
576 (void)armci_region_record((clreglist->list+foundclus)->start,(clreglist->list+foundclus)->end,reglist);
577
578 #ifdef LAPI_RDMA
579 armci_copy(&(clreglist->list+foundclus)->memhdl, &(reglist->list+foundclus)->memhdl, sizeof(ARMCI_MEMHDL_T));
580 #endif
581
582 #if DEBUG
583 printf("\n%d:serv recording st=%p end=%p sz=%d from %d n=%d sz=%d\n",armci_me,(clreglist->list+foundclus)->start,(clreglist->list+foundclus)->end,(clreglist->list+foundclus)->end-(clreglist->list+foundclus)->start,armci_clus_me,reglist->n,sizeof(ARMCI_MEMHDL_T));fflush(stdout);
584 #endif
585 foundserv=armci_region_serv_found(armci_clus_me, start,size);
586 reg = (serv_regions+armci_clus_me)->list+foundserv;
587 }
588 if(reg)
589 armci_copy(®->memhdl,&hdlarr[armci_clus_me],sizeof(ARMCI_MEMHDL_T));
590
591 i = armci_nclus*sizeof(ARMCI_MEMHDL_T)/sizeof(int);
592 armci_msg_gop_scope(SCOPE_ALL,hdlarr,i,"+",ARMCI_INT);
593 for(i=0; i<armci_nclus; i++){
594 armci_reglist_t *r=serv_regions+i;
595 armci_reglist_t *rc=clus_regions+i;
596 if(i==armci_clus_me) continue;
597 if((rc->list+r->n)->start){
598 #if 0
599 printf("\n%d:serv recording %p from %d n=%d \n",armci_me,(rc->list+r->n)->start,i,r->n);fflush(stdout);
600 #endif
601 armci_copy(&hdlarr[i],&(r->list+r->n)->memhdl,sizeof(ARMCI_MEMHDL_T));
602 armci_region_record((rc->list+r->n)->start,(rc->list+r->n)->end,r);
603 }
604 }
605 }
606 #endif
607 }
608