1 #if HAVE_CONFIG_H
2 #   include "config.h"
3 #endif
4 
5 /* $Id: regions.c,v 1.14.2.3 2007-10-05 16:51:51 manoj Exp $ interface to keep track of memory regions accross the cluster */
6 /*
7  * armci_region_init - allocates list of regions, initialization
8  * armci_region_register_shm - registers shared memory on the current node
9  * armci_region_register_loc - registers local memory
10  * armci_region_clus_record  - stores info on pinned region on a given cluster node
11  * armci_region_clus_found   - returns 1/0 if specified memory is registered
12  * armci_region_loc_found    - same for local memory
13  * armci_region_loc_both_found - returns 1 if local and remote are found, otherwise 0
14  *
15  */
16 
17 
18 /*7/7/06
19  * REGIONS REQUIRE MEMHDL was for all networks like via, infiniband, etc..
20  * which had a handle associated with remote/local memory required for
21  * rdma. Coincidentally all these networks also used a server thread.
22  * so server_regions were allocated and enabled when REGIONS_REQUIRE_MEMHDL
23  * was defined.
24  * With Catamount, we require portals memory descriptors to be stored
25  * there is no server but we still need the server_regions to post match all
26  * md to accept all incomming requests
27  */
28 
29 #include "armcip.h"
30 #if HAVE_STDLIB_H
31 #   include <stdlib.h>
32 #endif
33 #if HAVE_STDIO_H
34 #   include <stdio.h>
35 #endif
36 #include "copy.h"
37 
38 /*this should match similar def in portals.c vapi.c and openib.c */
39 #ifdef PORTALS
40 #  define MAX_REGIONS 24
41 #else
42 #  define MAX_REGIONS 8
43 #endif
44 
45 typedef struct {
46   void *start;
47   void *end;
48 #ifdef REGIONS_REQUIRE_MEMHDL
49   ARMCI_MEMHDL_T memhdl;
50 #endif
51 } armci_region_t;
52 
53 typedef struct {
54         long n;
55         armci_region_t list[MAX_REGIONS];
56 } armci_reglist_t;
57 
58 static int allow_pin=0;
59 static armci_reglist_t  *clus_regions;        /* cluster shared/remote memory */
60 #ifdef REGIONS_REQUIRE_MEMHDL
61 static armci_reglist_t  *serv_regions;        /* server regions */
62 #endif
63 static armci_reglist_t loc_regions_arr;       /* local memory */
64 static void *needs_pin_shmptr=NULL, *needs_pin_ptr=NULL;
65 static size_t  needs_pin_shmsize=0, needs_pin_size=0;
66 
67 #ifdef REGIONS_REQUIRE_MEMHDL
68 extern int armci_pin_contig_hndl(void *ptr, size_t bytes, ARMCI_MEMHDL_T *memhdl);
69 extern int armci_server_pin_contig(void *ptr, size_t bytes, ARMCI_MEMHDL_T *memhdl);
70 #else
71 extern int armci_pin_contig1(void *ptr, size_t bytes);
72 #endif
73 static void **exch_list=(void**)0;
74 static char exch_loc[MAX_REGIONS];
75 static char exch_rem[MAX_REGIONS];
76 void armci_serv_register_req(void *start,long bytes, ARMCI_MEMHDL_T *reg_mem);
77 
78 
armci_region_record(void * start,void * end,armci_reglist_t * reg)79 static int armci_region_record(void *start, void *end, armci_reglist_t *reg)
80 {
81      int cur=reg->n;
82 #ifdef DEBUG_
83      int i;
84      for(i=0; i<reg->n; i++)
85         if(reg->start >= start && reg->end < start)
86            armci_die("armci_region_record: already recorded",i);
87 #endif
88      if(reg->n >= MAX_REGIONS) return 0;
89      (reg->list+cur)->start = start;
90      (reg->list+cur)->end   = end;
91      reg->n++;
92      return 1;
93 }
94 
armci_region_register(void * start,long size,armci_reglist_t * reg)95 static void armci_region_register(void *start, long size, armci_reglist_t *reg)
96 {
97 #ifdef REGIONS_REQUIRE_MEMHDL
98 #  if defined(PORTALS)
99     int regid = reg->n;
100 #  endif
101 #endif
102     ARMCI_PR_DBG("enter",0);
103     if(reg->n >= MAX_REGIONS) return;
104     if(armci_nclus<=1)return;
105 
106 #ifdef REGIONS_REQUIRE_MEMHDL
107 #  if defined(PORTALS)
108      /*we really shouldn't have network specific ifdef's here but this is an
109       * exception to avoid significant code change in the portals layer
110       * ARMCI portals layer maintains a list of memory descriptors for each
111       * region allocated. It uses them in a round robin fashion. We store it
112       * in the memhdl to identify which memory region the memory used by a
113       * communication call corresponds to.
114       */
115      (reg->list+(regid))->memhdl.regid=regid;
116 #  endif
117 
118      if(!armci_pin_contig_hndl(start, size, &((reg->list+reg->n)->memhdl))){
119         printf("%d pin failed %p bytes=%ld\n",armci_me,start,size);
120         fflush(stdout); return;
121      }
122 #else
123      if(!armci_pin_contig1(start, size)){
124         printf("%d pin failed %p bytes=%ld\n",armci_me,start,size);
125         fflush(stdout); return;
126      }
127 #endif
128 
129      (void) armci_region_record(start,((char*)start)+size,reg);
130 }
131 
132 
armci_region_register_shm(void * start,long size)133 void armci_region_register_shm(void *start, long size)
134 {
135 #ifdef PORTALS
136 armci_reglist_t *reg = clus_regions+armci_clus_me;
137 #endif
138     if(allow_pin)
139       armci_region_register(start, size, clus_regions+armci_clus_me);
140     else{
141       needs_pin_shmptr = start;
142       needs_pin_shmsize= size;
143     }
144 
145 #ifdef PORTALS
146     /* we mark the region as local region so that portals layer uses
147      * the md from memhdl instead of any region list*/
148     (reg->list+(reg->n-1))->memhdl.islocal=0;
149 #endif
150 
151 #if 0
152      if(allow_pin){
153         printf("\n%d:%d registering shm %p bytes=%ld\n",armci_me,allow_pin,start,size);
154         fflush(stdout);
155      }
156 #endif
157 }
158 
159 
armci_region_register_loc(void * start,long size)160 void armci_region_register_loc(void *start, long size)
161 {
162 #ifdef PORTALS
163      armci_reglist_t *reg = &loc_regions_arr;
164 #endif
165      if(allow_pin)armci_region_register(start, size, &loc_regions_arr);
166      else{
167          needs_pin_ptr = start;
168          needs_pin_size= size;
169      }
170 
171 #ifdef PORTALS
172      {
173         extern int _armci_malloc_local_region;
174         if(_armci_malloc_local_region){
175            (reg->list+(reg->n-1))->memhdl.islocal=1;
176            _armci_malloc_local_region=0;
177         }
178         else
179            (reg->list+(reg->n-1))->memhdl.islocal=0;
180      }
181 #endif
182 
183 #ifdef DEBUG_
184      if(allow_pin){
185         printf("\n%d:%d registered local %p bytes=%ld\n",armci_me,allow_pin,start,size);
186         fflush(stdout);
187      }
188 #endif
189 }
190 
191 
armci_region_clus_record(int node,void * start,long size)192 void armci_region_clus_record(int node, void *start, long size)
193 {
194      if(node > armci_nclus || node <0 )
195                armci_die("armci_region_remote: bad node ",node);
196 
197      (void)armci_region_record(start,((char*)start)+size,clus_regions+node);
198 }
199 
200 
armci_region_init()201 void armci_region_init()
202 {
203     ARMCI_PR_DBG("enter",0);
204     allow_pin =1;
205     clus_regions=(armci_reglist_t*)calloc(armci_nclus,sizeof(armci_reglist_t));
206     if(!clus_regions)armci_die("armci_region_init: calloc failed",armci_nclus);
207 #ifdef REGIONS_REQUIRE_MEMHDL
208     serv_regions=(armci_reglist_t*)calloc(armci_nclus,sizeof(armci_reglist_t));
209     if(!serv_regions)armci_die("armci_region_init: calloc failed",armci_nclus);
210 #endif
211     exch_list = (void**)calloc(2*armci_nclus, sizeof(void*));
212     if(!exch_list) armci_die("armci_region_init: calloc 2 failed",armci_nclus);
213     bzero(exch_loc,sizeof(exch_loc));
214     bzero(exch_rem,sizeof(exch_rem));
215 
216     if(needs_pin_ptr) armci_region_register_loc(needs_pin_ptr,needs_pin_size);
217     if(needs_pin_shmptr) armci_region_register_shm(needs_pin_shmptr,needs_pin_shmsize);
218     ARMCI_PR_DBG("exit",0);
219 }
220 
armci_region_destroy()221 void armci_region_destroy()
222 {
223 armci_reglist_t *reg = &loc_regions_arr;
224 int i;
225 #ifdef REGIONS_REQUIRE_MEMHDL
226 ARMCI_MEMHDL_T *loc_memhdl;
227 #endif
228     if(!allow_pin) return;
229     for(i=0; i<reg->n; i++){
230 #ifdef REGIONS_REQUIRE_MEMHDL
231       loc_memhdl=&((reg->list+i)->memhdl);
232       armci_network_client_deregister_memory(loc_memhdl);
233 #endif
234     }
235 
236     reg=clus_regions+armci_clus_me;
237     for(i=0; i<reg->n; i++){
238 #ifdef REGIONS_REQUIRE_MEMHDL
239       loc_memhdl=&((reg->list+i)->memhdl);
240       armci_network_client_deregister_memory(loc_memhdl);
241 #endif
242     }
243 }
244 
armci_server_region_destroy()245 void armci_server_region_destroy()
246 {
247 armci_reglist_t *reg;
248 int i;
249 #ifdef REGIONS_REQUIRE_MEMHDL
250 ARMCI_MEMHDL_T *loc_memhdl;
251     ARMCI_PR_DBG("enter",0);
252     reg=serv_regions+armci_clus_me;
253     for(i=0; i<reg->n; i++){
254        loc_memhdl=&((reg->list+i)->memhdl);
255        armci_network_server_deregister_memory(loc_memhdl);
256     }
257     ARMCI_PR_DBG("exit",0);
258 #endif
259 }
260 
261 
armci_region_clus_found(int node,void * start,int size)262 int armci_region_clus_found(int node, void *start, int size)
263 {
264 armci_reglist_t *reg=clus_regions+node;
265 int i,found=-1;
266     if(!allow_pin) return 0;
267     if(node > armci_nclus || node <0 )
268       armci_die("armci_region_clus_found: bad node ",node);
269     for(i=0; i<reg->n; i++)
270       if((reg->list+i)->start <= start && (reg->list+i)->end > start){
271         found=i; break;
272       }
273 
274     return(found);
275 }
276 
armci_region_loc_found(void * start,int size)277 int armci_region_loc_found(void *start, int size)
278 {
279 armci_reglist_t *reg = &loc_regions_arr;
280 int i,found=-1;
281     ARMCI_PR_DBG("enter",0);
282     if(!allow_pin) return 0;
283     for(i=0; i<reg->n; i++)
284       if((reg->list+i)->start <= start && (reg->list+i)->end > start){
285         found=i;
286         break;
287       }
288 #ifdef DEBUG
289     if(found){
290       printf("%d: found loc %d n=%ld (%p,%p) %p\n",armci_me,found,reg->n,
291                (reg->list)->start,(reg->list)->end, start); fflush(stdout);
292     }
293 #endif
294     ARMCI_PR_DBG("exit",0);
295     return(found);
296 }
297 
298 #ifdef REGIONS_REQUIRE_MEMHDL
armci_region_both_found_hndl(void * loc,void * rem,int size,int node,ARMCI_MEMHDL_T ** loc_memhdl,ARMCI_MEMHDL_T ** rem_memhdl)299 int armci_region_both_found_hndl(void *loc, void *rem, int size, int node,
300                         ARMCI_MEMHDL_T **loc_memhdl,ARMCI_MEMHDL_T **rem_memhdl)
301 {
302  armci_reglist_t *reg = &loc_regions_arr;
303      int i,found=0;
304      if(!allow_pin) return 0;
305      /* first scan for local */
306      for(i=0; i<reg->n; i++){
307         if((reg->list+i)->start <= loc && (reg->list+i)->end > loc){
308 #if 0
309 	  printf("\n%d:loc found %d %p\n",armci_me,i,loc);
310 #endif
311 	  found=1; break;
312 	}
313 #if 0
314 	else {
315 	  printf("\n%d: loc ptr=%p st=%p end=%p size=%d\n",armci_me,loc,
316 			  (reg->list+i)->start,(reg->list+i)->end,size);
317 	  fflush(stdout);
318 	}
319 #endif
320      }
321 
322      if(!found){ /* might be local shared */
323          reg=clus_regions+armci_clus_me;
324          for(i=0; i<reg->n; i++){
325            if((reg->list+i)->start <= loc && (reg->list+i)->end > loc){
326 	     found=1; break;
327 	   }
328 #if 0
329 	   else {
330 	     printf("\n%d:clus ptr=%p st=%p end=%p size=%d\n",armci_me,loc,
331 			  (reg->list+i)->start,(reg->list+i)->end,size);
332 	     fflush(stdout);
333 	   }
334 #endif
335 	 }
336      }
337 
338 #ifdef PORTALS
339      if(found!=1){
340         *loc_memhdl=NULL;
341         found=1;
342      }
343 #else
344      if(!found) return 0;
345 #endif
346      else {*loc_memhdl=&((reg->list+i)->memhdl);}
347 
348      /* now check remote shared */
349      reg=serv_regions+node;
350      for(i=0; i<reg->n; i++){
351          if((reg->list+i)->start <= rem && (reg->list+i)->end > rem){
352 #if 0
353 	    printf("\n%d: serv found %d %p %p\n",armci_me,i,rem,(reg->list+i)->start);
354 #endif
355             found=2;break;
356 	 }
357 #if 0
358 	 else {
359 	   printf("\n%d: serv ptr=%p st=%p end=%p size=%d nd=%d\n",armci_me,rem,
360 	          (reg->list+i)->start,(reg->list+i)->end,size,node);
361 	   fflush(stdout);
362 	 }
363 #endif
364      }
365 
366      if(0){
367             if(found==2){printf("%d: found both %d %p\n",
368                       armci_me,node,(void*)*loc_memhdl);
369             fflush(stdout);
370             }
371      }
372      if(found==2){*rem_memhdl=&((reg->list+i)->memhdl); return 1;}
373      else return 0;
374 }
375 
376 
armci_region_remote_found_hndl(void * rem,int size,int node,ARMCI_MEMHDL_T ** remhdl)377 int armci_region_remote_found_hndl(void *rem,int size, int node, ARMCI_MEMHDL_T **remhdl)
378 {
379 armci_reglist_t *reg = serv_regions+node;
380 int i,found=0;
381 
382     for(i=0; i<reg->n; i++)
383       if((reg->list+i)->start <= rem && (reg->list+i)->end > rem){
384               found=1;break;
385       }
386 
387     if(found==1){*remhdl=&((reg->list+i)->memhdl); return 1;}
388     else return 0;
389 
390 }
391 
get_armci_region_local_hndl(void * loc,int node,ARMCI_MEMHDL_T ** loc_memhdl)392 int get_armci_region_local_hndl(void *loc, int node,ARMCI_MEMHDL_T **loc_memhdl)
393 {
394 
395   armci_reglist_t *reg = &loc_regions_arr;
396   int i, found = 0;
397 
398    if(!allow_pin) {printf("inside get_armci_region_local_hndl : case allow_pin = 0\n"); return 0; }
399    if(!found){
400       reg = serv_regions+armci_clus_me;
401       for(i=0; i<reg->n; i++){
402          if((reg->list+i)->start <= loc && (reg->list+i)->end >loc){
403            found =1;
404            break;
405          }
406 #if 0
407          else {
408 	   printf("\n%d: serv ptr=%p st=%p end=%p nd=%d nreg=%d\n",armci_me,loc,
409 	          (reg->list+i)->start,(reg->list+i)->end,node,reg->n);
410 	   fflush(stdout);
411          }
412 #endif
413       }
414    }
415    if(found == 1){
416       *loc_memhdl = &((reg->list+i)->memhdl);
417       if(0){
418          printf("%d(s) : found local %p\n",armci_me,(void*)*loc_memhdl);
419          fflush(stdout);
420       }
421       return 1;
422    }
423    else return 0;
424 }
425 
426 
armci_region_serv_found(int node,void * start,int size)427 int armci_region_serv_found(int node,void *start,int size)
428 {
429     armci_reglist_t *reg=serv_regions+node;
430     int i,found=-1;
431     if(!allow_pin) return 0;
432     if(node > armci_nclus || node <0 )
433       armci_die("armci_region_serv_found: bad node ",node);
434     for(i=0; i<reg->n; i++)
435         if((reg->list+i)->start <= start && (reg->list+i)->end > start){found=i; break;}
436 
437     return(found);
438 }
439 
440 #endif
441 
442 
armci_region_both_found(void * loc,void * rem,int size,int node)443 int armci_region_both_found(void *loc, void *rem, int size, int node)
444 {
445      armci_reglist_t *reg = &loc_regions_arr;
446      int i,found=0;
447      if(!allow_pin) return 0;
448 
449      /* first scan for local */
450      for(i=0; i<reg->n; i++)
451         if((reg->list+i)->start <= loc && (reg->list+i)->end > loc){found=1; break;}
452 
453      if(!found){ /* might be local shared */
454          reg=clus_regions+armci_clus_me;
455          for(i=0; i<reg->n; i++)
456            if((reg->list+i)->start <= loc && (reg->list+i)->end > loc){found=1; break;}
457      }
458      if(!found) return 0;
459 
460      /* now check remote shared */
461      reg=clus_regions+node;
462      for(i=0; i<reg->n; i++)
463          if((reg->list+i)->start <= rem && (reg->list+i)->end > rem){found=2;break;}
464 
465 #if 0
466      if(found==2){printf("%d: found both %d\n",armci_me,node); fflush(stdout); }
467 #endif
468      if(found==2) return 1;
469      else return 0;
470 }
471 
472 
473 
armci_region_exchange(void * start,long size)474 void armci_region_exchange(void *start, long size)
475 {
476      int found=0, i;
477      armci_region_t *reg=0;
478 #ifdef REGIONS_REQUIRE_MEMHDL
479      ARMCI_MEMHDL_T *hdlarr;
480 
481      hdlarr = calloc(armci_nclus,sizeof(ARMCI_MEMHDL_T));
482 #endif
483      if(!allow_pin)return;
484      if(armci_nclus<=1)return;
485 
486      found=armci_region_clus_found(armci_clus_me, start,size);
487      if(found>-1){
488         if(!exch_rem[found]){
489            reg = (clus_regions+armci_clus_me)->list+found;
490            exch_rem[found]=1;
491         }
492      }else{
493         found= armci_region_loc_found(start,size);
494         if(found>-1){
495              if(!exch_loc[found]){
496                reg =  (&loc_regions_arr)->list+found;
497                exch_loc[found]=1;
498              }
499         }
500      }
501 
502      bzero(exch_list,2*armci_nclus*sizeof(void *));
503      if( reg && (armci_me == armci_master)){
504         exch_list[2*armci_clus_me] = reg->start;
505         exch_list[2*armci_clus_me+1] = reg->end;
506 #ifdef REGIONS_REQUIRE_MEMHDL
507         armci_copy(&reg->memhdl,&hdlarr[armci_clus_me],sizeof(ARMCI_MEMHDL_T));
508 #endif
509      }
510 
511      /* exchange info on new regions with other nodes */
512      armci_exchange_address(exch_list,2*armci_nclus);
513 #ifdef REGIONS_REQUIRE_MEMHDL
514      i = armci_nclus*sizeof(ARMCI_MEMHDL_T)/sizeof(int);
515      armci_msg_gop_scope(SCOPE_ALL,hdlarr,i,"+",ARMCI_INT);
516 #endif
517      for(i=0; i<armci_nclus; i++){
518          armci_reglist_t *r=clus_regions+i;
519          if(i==armci_clus_me) continue;
520          if(exch_list[2*i]){
521 #if 0
522            printf("%d recording clus=%d mem %p-%p n=%d\n",armci_me,i,exch_list[2*i],
523                   exch_list[2*i+1],r->n);
524            fflush(stdout);
525 #endif
526 
527 #ifdef REGIONS_REQUIRE_MEMHDL
528           armci_copy(&hdlarr[i],&(r->list+r->n)->memhdl,sizeof(ARMCI_MEMHDL_T));
529 #endif
530            armci_region_record(exch_list[2*i],exch_list[2*i+1], r);
531          }
532      }
533 }
534 
535 /*\
536  * for server thread to know which region is what in cases where there is one
537  * process per node
538 \*/
armci_global_region_exchange(void * start,long size)539 void armci_global_region_exchange(void *start, long size)
540 {
541 #ifdef REGIONS_REQUIRE_MEMHDL
542     ARMCI_MEMHDL_T *hdlarr;
543 
544     hdlarr = calloc(armci_nclus,sizeof(ARMCI_MEMHDL_T));
545 #endif
546     if(!allow_pin)return;
547     if(armci_nclus<=1)return;
548 
549     armci_region_exchange(start,size);
550 
551 #ifdef REGIONS_REQUIRE_MEMHDL
552     {
553       int foundclus=0, foundserv=0, i,loc=0;
554       armci_reglist_t *reglist=NULL,*clreglist=NULL;
555       armci_region_t *reg=NULL;
556 
557       foundclus=armci_region_clus_found(armci_clus_me, start,size);
558       foundserv=armci_region_serv_found(armci_clus_me, start,size);
559       if(foundclus==-1){
560 	foundclus = armci_region_loc_found(start,size);
561 	loc=1;
562       }
563 
564       if(foundclus!=-1 && foundserv==-1){
565 	reglist = (serv_regions+armci_clus_me);
566 	if(loc)
567 	  clreglist = &(loc_regions_arr);
568 	else
569 	  clreglist = (clus_regions+armci_clus_me);
570 #if defined(DATA_SERVER) || defined(PORTALS)
571 #  if defined(PORTALS)
572         ((reglist->list+reglist->n)->memhdl).regid=(reglist->n);
573 #  endif
574 	armci_serv_register_req((clreglist->list+foundclus)->start,((char *)(clreglist->list+foundclus)->end-(char *)((clreglist->list+foundclus)->start)),&((reglist->list+reglist->n)->memhdl));
575 #endif
576 	(void)armci_region_record((clreglist->list+foundclus)->start,(clreglist->list+foundclus)->end,reglist);
577 
578 #ifdef LAPI_RDMA
579         armci_copy(&(clreglist->list+foundclus)->memhdl, &(reglist->list+foundclus)->memhdl, sizeof(ARMCI_MEMHDL_T));
580 #endif
581 
582 #if DEBUG
583 	printf("\n%d:serv recording st=%p end=%p sz=%d from %d n=%d sz=%d\n",armci_me,(clreglist->list+foundclus)->start,(clreglist->list+foundclus)->end,(clreglist->list+foundclus)->end-(clreglist->list+foundclus)->start,armci_clus_me,reglist->n,sizeof(ARMCI_MEMHDL_T));fflush(stdout);
584 #endif
585 	foundserv=armci_region_serv_found(armci_clus_me, start,size);
586 	reg = (serv_regions+armci_clus_me)->list+foundserv;
587       }
588       if(reg)
589 	armci_copy(&reg->memhdl,&hdlarr[armci_clus_me],sizeof(ARMCI_MEMHDL_T));
590 
591       i = armci_nclus*sizeof(ARMCI_MEMHDL_T)/sizeof(int);
592       armci_msg_gop_scope(SCOPE_ALL,hdlarr,i,"+",ARMCI_INT);
593       for(i=0; i<armci_nclus; i++){
594 	armci_reglist_t *r=serv_regions+i;
595 	armci_reglist_t *rc=clus_regions+i;
596 	if(i==armci_clus_me) continue;
597 	if((rc->list+r->n)->start){
598 #if 0
599 	  printf("\n%d:serv recording %p from %d n=%d \n",armci_me,(rc->list+r->n)->start,i,r->n);fflush(stdout);
600 #endif
601 	  armci_copy(&hdlarr[i],&(r->list+r->n)->memhdl,sizeof(ARMCI_MEMHDL_T));
602 	  armci_region_record((rc->list+r->n)->start,(rc->list+r->n)->end,r);
603 	}
604       }
605     }
606 #endif
607 }
608