1 #if HAVE_CONFIG_H
2 # include "config.h"
3 #endif
4
5 #include "globalp.h"
6 #include "base.h"
7 #include "ga-papi.h"
8 #if HAVE_STDIO_H
9 # include <stdio.h>
10 #endif
11 #define DEBUG 0
12
13 /* WARNING: The maximum value MAX_NUM_NB_HDLS can assume is 256. If it is any larger,
14 * the 8-bit field defined in gai_hbhdl_t will exceed its upper limit of 255 in
15 * some parts of the nbutil.c code */
16 #define MAX_NUM_NB_HDLS 256
17 static int nb_max_outstanding = MAX_NUM_NB_HDLS;
18
19 /**
20 * NOTES
21 * The non-blocking GA handle indexes into a list of structs that point to a
22 * linked list of non-blocking ARMCI calls. The first link in the list is
23 * contained in the GA struct. Conversely, each link in the non-blocking list
24 * points to the GA handle that contains the head of the list. When a new GA
25 * non-blocking call is created, the code looks at the list of GA handles and
26 * tries to find one that is not currently being used. If it can't find one, it
27 * calls wait on an existing call and recycles that handle for the new call.
28 *
29 * Similarly, each GA call consists of multiple ARMCI non-blocking calls. The
30 * handles for each of these calls are assembled into a list. If no handle is
31 * available, the ARMCI_Wait function is called on a handle, freeing it for use.
32 * The handle is also removed from the linked list pointed to by the original GA
33 * struct. It is possible in this scheme that a GA struct has a linked list that
34 * contains no links. When wait of test is called in this case, the struct is
35 * marked as inactive and then returns without performing any ARMCI operations.
36 */
37
38 /* The structure of gai_nbhdl_t (this is our internal handle). It maps directly
39 * to a 32-bit integer*/
40 typedef struct {
41 unsigned int ihdl_index:8;
42 unsigned int ga_nbtag:24;
43 } gai_nbhdl_t;
44
45
46 /* We create an array of type struct_armci_hdl_t. This list represents the
47 * number of available ARMCI non-blocking calls that are available to create
48 * non-blocking GA calls. Each element in the armci handle linked list is of
49 * type ga_armcihdl_t.
50 * handle: int handle or gai_nbhdl_t struct that represents ARMCI handle for
51 * non-blocking call
52 * next: pointer to next element in list
53 * previous: pointer to previous element in list
54 * ga_hdlarr_index: index that points back to ga_nbhdl_array list.
55 * This can be used to remove this link from GA linked list if
56 * this armci request must be cleared to make room for a new
57 * request.
58 * active: indicates that this represents an outstanding ARMCI non-blocking
59 * request
60 */
61 typedef struct struct_armcihdl_t {
62 armci_hdl_t handle;
63 struct struct_armcihdl_t *next;
64 struct struct_armcihdl_t *previous;
65 int ga_hdlarr_index;
66 int active;
67 } ga_armcihdl_t;
68
69
70 /* We create an array of type ga_nbhdl_array_t. Each of the elements in this
71 * array is the head of the armci handle linked list that is associated with
72 * each GA call.
73 * ahandle: head node in a linked list of ARMCI handles
74 * count: total number of ARMCI handles in linked list
75 * ga_nbtag: unique tag that matches tag in handle (gai_nbhdl_t)
76 * If count is 0 or ahandle is null, there are no outstanding armci calls
77 * associated with this GA handle
78 */
79 typedef struct{
80 ga_armcihdl_t *ahandle;
81 int count;
82 int ga_nbtag;
83 int active;
84 } ga_nbhdl_array_t;
85
86 /**
87 * Array of headers for non-blocking GA calls. The ihdl_index element of the
88 * non-blocking handle indexes into this array. The maximum number of
89 * outstanding non-blocking GA calls is nb_max_outstanding.
90 */
91 static ga_nbhdl_array_t ga_ihdl_array[MAX_NUM_NB_HDLS];
92
93 /**
94 * Array of armci handles. This is used to construct linked lists of ARMCI
95 * non-blocking calls. The maximum number of outstanding ARMCI non-blocking
96 * calls is nb_max_outstanding.
97 */
98 static ga_armcihdl_t armci_ihdl_array[MAX_NUM_NB_HDLS];
99
100 static int lastGAhandle = -1; /* last assigned ga handle */
101 static int lastARMCIhandle = -1; /* last assigned armci handle */
102
103 /**
104 * get a unique tag for each individual ARMCI call. These tags currently repeat
105 * after 16777216=2^24 non-blocking calls
106 */
107 static unsigned int ga_nb_tag = -1;
get_next_tag()108 unsigned int get_next_tag(){
109 ga_nb_tag++;
110 ga_nb_tag = ga_nb_tag%16777216;
111 return ga_nb_tag;
112 /* return(++ga_nb_tag); */
113 }
114
115 /**
116 * Initialize some data structures used in the non-blocking function calls
117 */
gai_nb_init()118 void gai_nb_init()
119 {
120 int i;
121 char *value;
122 /* This is a hideous kluge, but some users want to be able to set this
123 * externally. The fact that only integer handles are exchanged between GA and
124 * the underlying runtime make it very difficult to handle in a more elegant
125 * manner. */
126 nb_max_outstanding = MAX_NUM_NB_HDLS; /* default */
127 value = getenv("COMEX_MAX_NB_OUTSTANDING");
128 if (NULL != value) {
129 nb_max_outstanding = atoi(value);
130 }
131 if (nb_max_outstanding <1 || nb_max_outstanding > MAX_NUM_NB_HDLS) {
132 pnga_error("Illegal number of outstanding Non-block requests specified",
133 nb_max_outstanding);
134 }
135 for (i=0; i<nb_max_outstanding; i++) {
136 ga_ihdl_array[i].ahandle = NULL;
137 ga_ihdl_array[i].count = 0;
138 ga_ihdl_array[i].active = 0;
139 ga_ihdl_array[i].ga_nbtag = -1;
140 armci_ihdl_array[i].next = NULL;
141 armci_ihdl_array[i].previous = NULL;
142 armci_ihdl_array[i].active = 0;
143 ARMCI_INIT_HANDLE(&armci_ihdl_array[i].handle);
144 }
145 }
146
147 /**
148 * Called from ga_put/get before every call to a non-blocking armci request.
149 * Find an available armic non-blocking handle. If none is available,
150 * complete an existing outstanding armci request and return the
151 * corresponding handle.
152 */
get_armci_nbhandle(Integer * nbhandle)153 armci_hdl_t* get_armci_nbhandle(Integer *nbhandle)
154 {
155 int i, top, idx, iloc;
156 gai_nbhdl_t *inbhandle = (gai_nbhdl_t *)nbhandle;
157 int index = inbhandle->ihdl_index;
158 ga_armcihdl_t* next = ga_ihdl_array[index].ahandle;
159
160 lastARMCIhandle++;
161 lastARMCIhandle = lastARMCIhandle%nb_max_outstanding;
162 top = lastARMCIhandle+nb_max_outstanding;
163 /* default index if no handles are available */
164 iloc = lastARMCIhandle;
165 for (i=lastARMCIhandle; i<top; i++) {
166 idx = i%nb_max_outstanding;
167 if (armci_ihdl_array[idx].active == 0) {
168 iloc = idx;
169 break;
170 }
171 }
172 /* if selected handle represents an outstanding request, complete it */
173 if (armci_ihdl_array[iloc].active == 1) {
174 int iga_hdl = armci_ihdl_array[iloc].ga_hdlarr_index;
175 ARMCI_Wait(&armci_ihdl_array[iloc].handle);
176 /* clean up linked list that this handle used to be a link in */
177 if (armci_ihdl_array[iloc].previous != NULL) {
178 /* link is not first in linked list */
179 armci_ihdl_array[iloc].previous->next = armci_ihdl_array[iloc].next;
180 if (armci_ihdl_array[iloc].next != NULL) {
181 armci_ihdl_array[iloc].next->previous = armci_ihdl_array[iloc].previous;
182 }
183 } else {
184 /* link is first in linked list. Need to update header */
185 ga_ihdl_array[iga_hdl].ahandle = armci_ihdl_array[iloc].next;
186 if (armci_ihdl_array[iloc].next != NULL) {
187 armci_ihdl_array[iloc].next->previous = NULL;
188 }
189 }
190 ga_ihdl_array[iga_hdl].count--;
191 }
192 /* Initialize armci handle and add this operation to the linked list
193 * corresponding to nbhandle */
194 ARMCI_INIT_HANDLE(&armci_ihdl_array[iloc].handle);
195 armci_ihdl_array[iloc].active = 1;
196 armci_ihdl_array[iloc].previous = NULL;
197 if (ga_ihdl_array[index].ahandle) {
198 ga_ihdl_array[index].ahandle->previous = &armci_ihdl_array[iloc];
199 }
200 armci_ihdl_array[iloc].next = ga_ihdl_array[index].ahandle;
201 ga_ihdl_array[index].ahandle = &armci_ihdl_array[iloc];
202 armci_ihdl_array[iloc].ga_hdlarr_index = index;
203 ga_ihdl_array[index].count++;
204
205 /* reset lastARMCIhandle to iloc */
206 lastARMCIhandle = iloc;
207
208 return &armci_ihdl_array[iloc].handle;
209 }
210
211 /**
212 * the wait routine which is called inside pnga_nbwait. This always returns
213 * zero. The return value is not checked in the code.
214 */
nga_wait_internal(Integer * nbhandle)215 int nga_wait_internal(Integer *nbhandle){
216 gai_nbhdl_t *inbhandle = (gai_nbhdl_t *)nbhandle;
217 int index = inbhandle->ihdl_index;
218 int retval = 0;
219 int tag = inbhandle->ga_nbtag;
220 /* check if tags match. If they don't then this request was already completed
221 * so the handle can be used for another GA non-blocking call. Just return in
222 * this case */
223 if (tag == ga_ihdl_array[index].ga_nbtag) {
224 if (ga_ihdl_array[index].active == 0) {
225 printf("p[%d] nga_wait_internal: GA NB handle inactive\n",GAme);
226 }
227 ga_armcihdl_t* next = ga_ihdl_array[index].ahandle;
228 /* Loop over linked list and complete all remaining armci non-blocking calls */
229 while(next) {
230 ga_armcihdl_t* tmp = next->next;
231 /* Complete the call */
232 ARMCI_Wait(&next->handle);
233 /* reinitialize armci_hlt_t data structure */
234 next->next = NULL;
235 next->previous = NULL;
236 next->active = 0;
237 ARMCI_INIT_HANDLE(&next->handle);
238 next = tmp;
239 }
240 ga_ihdl_array[index].ahandle = NULL;
241 ga_ihdl_array[index].count = 0;
242 ga_ihdl_array[index].active = 0;
243 }
244
245 return(retval);
246 }
247
248
249 /**
250 * the test routine which is called inside nga_nbtest. Return 0 if operation is
251 * completed
252 */
nga_test_internal(Integer * nbhandle)253 int nga_test_internal(Integer *nbhandle)
254 {
255 gai_nbhdl_t *inbhandle = (gai_nbhdl_t *)nbhandle;
256 int index = inbhandle->ihdl_index;
257 int retval = 0;
258 int tag = inbhandle->ga_nbtag;
259
260 /* check if tags match. If they don't then this request was already completed
261 * so the handle can be used for another GA non-blocking call. Just return in
262 * this case */
263 if (tag == ga_ihdl_array[index].ga_nbtag) {
264 ga_armcihdl_t* next = ga_ihdl_array[index].ahandle;
265 /* Loop over linked list and test all remaining armci non-blocking calls */
266 while(next) {
267 int ret = ARMCI_Test(&next->handle);
268 ga_armcihdl_t *tmp = next->next;
269 if (ret == 0) {
270 /* operation is completed so remove it from linked list */
271 if (next->previous != NULL) {
272 /* operation is not first element in list */
273 next->previous->next = next->next;
274 if (next->next != NULL) {
275 next->next->previous = next->previous;
276 }
277 } else {
278 /* operation is first element in list */
279 ga_ihdl_array[index].ahandle = next->next;
280 if (next->next != NULL) {
281 next->next->previous = NULL;
282 }
283 }
284 next->previous = NULL;
285 next->next = NULL;
286 next->active = 0;
287 ga_ihdl_array[index].count--;
288 }
289 next = tmp;
290 }
291 if (ga_ihdl_array[index].count == 0) {
292 ga_ihdl_array[index].ahandle = NULL;
293 ga_ihdl_array[index].active = 0;
294 }
295 if (ga_ihdl_array[index].count > 0) retval = 1;
296 }
297
298 return(retval);
299 }
300
301 /**
302 * Find a free GA non-blocking handle.
303 */
ga_init_nbhandle(Integer * nbhandle)304 void ga_init_nbhandle(Integer *nbhandle)
305 {
306 int i, top, idx, iloc;
307 gai_nbhdl_t *inbhandle = (gai_nbhdl_t *)nbhandle;
308 lastGAhandle++;
309 lastGAhandle = lastGAhandle%nb_max_outstanding;
310 top = lastGAhandle+nb_max_outstanding;
311 /* default index if no handles are available */
312 idx = lastGAhandle;
313 for (i=lastGAhandle; i<top; i++) {
314 iloc = i%nb_max_outstanding;
315 if (ga_ihdl_array[iloc].ahandle == NULL) {
316 idx = iloc;
317 break;
318 }
319 }
320 /* If no free handle is found, clear the oldest handle */
321 if (ga_ihdl_array[idx].ahandle != NULL) {
322 Integer itmp;
323 /* find value of itmp corresponding to oldest handle */
324 gai_nbhdl_t *oldhdl = (gai_nbhdl_t*)&itmp;
325 oldhdl->ihdl_index = idx;
326 oldhdl->ga_nbtag = ga_ihdl_array[idx].ga_nbtag;
327 nga_wait_internal(&itmp);
328 }
329 inbhandle->ihdl_index = idx;
330 inbhandle->ga_nbtag = get_next_tag();
331 ga_ihdl_array[idx].ahandle = NULL;
332 ga_ihdl_array[idx].count = 0;
333 ga_ihdl_array[idx].active = 1;
334 ga_ihdl_array[idx].ga_nbtag = inbhandle->ga_nbtag;
335
336 /* reset lastGAhandle to idx */
337 lastGAhandle = idx;
338 return;
339 }
340