1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
2 /*
3  * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
4  *                         University Research and Technology
5  *                         Corporation.  All rights reserved.
6  * Copyright (c) 2004-2011 The University of Tennessee and The University
7  *                         of Tennessee Research Foundation.  All rights
8  *                         reserved.
9  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
10  *                         University of Stuttgart.  All rights reserved.
11  * Copyright (c) 2004-2005 The Regents of the University of California.
12  *                         All rights reserved.
13  * Copyright (c) 2007-2015 Cisco Systems, Inc.  All rights reserved.
14  * Copyright (c) 2006-2007 Voltaire All rights reserved.
15  * Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
16  * Copyright (c) 2015-2018 Research Organization for Information Science
17  *                         and Technology (RIST).  All rights reserved.
18  * Copyright (c) 2015      Mellanox Technologies. All rights reserved.
19  * Copyright (c) 2016-2017 Los Alamos National Security, LLC. All rights
20  *                         reserved.
21  *
22  * $COPYRIGHT$
23  *
24  * Additional copyrights may follow
25  *
26  * $HEADER$
27  */
28 
29 #include "opal_config.h"
30 
31 #include "opal/util/arch.h"
32 #include "opal/mca/pmix/pmix.h"
33 
34 #include "btl_openib.h"
35 #include "btl_openib_proc.h"
36 #include "connect/base.h"
37 #include "connect/connect.h"
38 
39 static void mca_btl_openib_proc_btl_construct(mca_btl_openib_proc_btlptr_t* elem);
40 static void mca_btl_openib_proc_btl_destruct(mca_btl_openib_proc_btlptr_t* elem);
41 
42 OBJ_CLASS_INSTANCE(mca_btl_openib_proc_btlptr_t,
43         opal_list_item_t, mca_btl_openib_proc_btl_construct,
44         mca_btl_openib_proc_btl_destruct);
45 
mca_btl_openib_proc_btl_construct(mca_btl_openib_proc_btlptr_t * elem)46 static void mca_btl_openib_proc_btl_construct(mca_btl_openib_proc_btlptr_t* elem)
47 {
48     elem->openib_btl = NULL;
49 }
50 
mca_btl_openib_proc_btl_destruct(mca_btl_openib_proc_btlptr_t * elem)51 static void mca_btl_openib_proc_btl_destruct(mca_btl_openib_proc_btlptr_t* elem)
52 {
53     elem->openib_btl = NULL;
54 }
55 
56 static void mca_btl_openib_proc_construct(mca_btl_openib_proc_t* proc);
57 static void mca_btl_openib_proc_destruct(mca_btl_openib_proc_t* proc);
58 
59 OBJ_CLASS_INSTANCE(mca_btl_openib_proc_t,
60         opal_list_item_t, mca_btl_openib_proc_construct,
61         mca_btl_openib_proc_destruct);
62 
mca_btl_openib_proc_construct(mca_btl_openib_proc_t * ib_proc)63 void mca_btl_openib_proc_construct(mca_btl_openib_proc_t* ib_proc)
64 {
65     ib_proc->proc_opal           = 0;
66     ib_proc->proc_ports          = NULL;
67     ib_proc->proc_port_count     = 0;
68     ib_proc->proc_endpoints      = 0;
69     ib_proc->proc_endpoint_count = 0;
70     OBJ_CONSTRUCT(&ib_proc->proc_lock, opal_mutex_t);
71     OBJ_CONSTRUCT(&ib_proc->openib_btls, opal_list_t);
72 }
73 
74 /*
75  * Cleanup ib proc instance
76  */
77 
mca_btl_openib_proc_destruct(mca_btl_openib_proc_t * ib_proc)78 void mca_btl_openib_proc_destruct(mca_btl_openib_proc_t* ib_proc)
79 {
80     /* release resources */
81     if(NULL != ib_proc->proc_endpoints) {
82         free(ib_proc->proc_endpoints);
83     }
84     if (NULL != ib_proc->proc_ports) {
85         int i, j;
86         for (i = 0; i < ib_proc->proc_port_count; ++i) {
87             for (j = 0; j < ib_proc->proc_ports[i].pm_cpc_data_count; ++j) {
88                 if (NULL != ib_proc->proc_ports[i].pm_cpc_data[j].cbm_modex_message) {
89                     free(ib_proc->proc_ports[i].pm_cpc_data[j].cbm_modex_message);
90                 }
91             }
92         }
93         free(ib_proc->proc_ports);
94     }
95     OBJ_DESTRUCT(&ib_proc->proc_lock);
96 
97     OPAL_LIST_DESTRUCT(&ib_proc->openib_btls);
98 }
99 
100 
101 /*
102  * Look for an existing IB process instances based on the associated
103  * opal_proc_t instance.
104  */
ibproc_lookup_no_lock(opal_proc_t * proc)105 static mca_btl_openib_proc_t* ibproc_lookup_no_lock(opal_proc_t* proc)
106 {
107     mca_btl_openib_proc_t* ib_proc;
108 
109     OPAL_LIST_FOREACH(ib_proc, &mca_btl_openib_component.ib_procs, mca_btl_openib_proc_t) {
110         if(ib_proc->proc_opal == proc) {
111             return ib_proc;
112         }
113     }
114     return NULL;
115 }
116 
ibproc_lookup_and_lock(opal_proc_t * proc)117 static mca_btl_openib_proc_t* ibproc_lookup_and_lock(opal_proc_t* proc)
118 {
119     mca_btl_openib_proc_t* ib_proc;
120 
121     /* get the process from the list */
122     opal_mutex_lock(&mca_btl_openib_component.ib_lock);
123     ib_proc = ibproc_lookup_no_lock(proc);
124     opal_mutex_unlock(&mca_btl_openib_component.ib_lock);
125     if( NULL != ib_proc ){
126         /* if we were able to find it - lock it.
127          * NOTE: we want to lock it outside of list locked region */
128         opal_mutex_lock(&ib_proc->proc_lock);
129     }
130     return ib_proc;
131 }
132 
unpack8(char ** src,uint8_t * value)133 static void inline unpack8(char **src, uint8_t *value)
134 {
135     /* Copy one character */
136     *value = (uint8_t) **src;
137     /* Most the src ahead one */
138     ++*src;
139 }
140 
141 /*
142  * Create a IB process structure. There is a one-to-one correspondence
143  * between a opal_proc_t and a mca_btl_openib_proc_t instance. We
144  * cache additional data (specifically the list of
145  * mca_btl_openib_endpoint_t instances, and published addresses)
146  * associated w/ a given destination on this datastructure.
147  */
148 
mca_btl_openib_proc_get_locked(opal_proc_t * proc)149 mca_btl_openib_proc_t* mca_btl_openib_proc_get_locked(opal_proc_t* proc)
150 {
151     mca_btl_openib_proc_t *ib_proc = NULL, *ib_proc_ret = NULL;
152     size_t msg_size;
153     uint32_t size;
154     int rc, i, j;
155     void *message;
156     char *offset;
157     int modex_message_size;
158     mca_btl_openib_modex_message_t dummy;
159     bool is_new = false;
160 
161     /* Check if we have already created a IB proc
162      * structure for this ompi process */
163     ib_proc = ibproc_lookup_and_lock(proc);
164     if (NULL != ib_proc) {
165         /* Gotcha! */
166         return ib_proc;
167     }
168 
169     /* All initialization has to be an atomic operation. we do the following assumption:
170      * - we let all concurent threads to try to do the initialization;
171      * - when one has finished it locks ib_lock and checks if corresponding
172      *   process is still missing;
173      * - if so - new proc is added, otherwise - initialized proc struct is released.
174      */
175 
176     /* First time, gotta create a new IB proc
177      * out of the opal_proc ... */
178     ib_proc = OBJ_NEW(mca_btl_openib_proc_t);
179     if (NULL == ib_proc) {
180       return NULL;
181     }
182 
183     /* Initialize number of peer */
184     ib_proc->proc_endpoint_count = 0;
185     ib_proc->proc_opal = proc;
186 
187     /* query for the peer address info */
188     OPAL_MODEX_RECV(rc, &mca_btl_openib_component.super.btl_version,
189                     &proc->proc_name, &message, &msg_size);
190     if (OPAL_SUCCESS != rc) {
191         BTL_VERBOSE(("[%s:%d] opal_modex_recv failed for peer %s",
192                    __FILE__, __LINE__,
193                    OPAL_NAME_PRINT(proc->proc_name)));
194         goto no_err_exit;
195     }
196     if (0 == msg_size) {
197         goto no_err_exit;
198     }
199 
200     /* Message was packed in btl_openib_component.c; the format is
201        listed in a comment in that file */
202     modex_message_size = ((char *) &(dummy.end)) - ((char*) &dummy);
203 
204     /* Unpack the number of modules in the message */
205     offset = (char *) message;
206     unpack8(&offset, &(ib_proc->proc_port_count));
207     BTL_VERBOSE(("unpack: %d btls", ib_proc->proc_port_count));
208     if (ib_proc->proc_port_count > 0) {
209         ib_proc->proc_ports = (mca_btl_openib_proc_modex_t *)
210             malloc(sizeof(mca_btl_openib_proc_modex_t) *
211                    ib_proc->proc_port_count);
212     } else {
213         ib_proc->proc_ports = NULL;
214     }
215 
216     /* Loop over unpacking all the ports */
217     for (i = 0; i < ib_proc->proc_port_count; i++) {
218 
219         /* Unpack the modex comment message struct */
220         size = modex_message_size;
221         memcpy(&(ib_proc->proc_ports[i].pm_port_info), offset, size);
222 #if !defined(WORDS_BIGENDIAN) && OPAL_ENABLE_HETEROGENEOUS_SUPPORT
223         MCA_BTL_OPENIB_MODEX_MSG_NTOH(ib_proc->proc_ports[i].pm_port_info);
224 #endif
225         offset += size;
226         BTL_VERBOSE(("unpacked btl %d: modex message, offset now %d",
227                      i, (int)(offset-((char*)message))));
228 
229         /* Unpack the number of CPCs that follow */
230         unpack8(&offset, &(ib_proc->proc_ports[i].pm_cpc_data_count));
231         BTL_VERBOSE(("unpacked btl %d: number of cpcs to follow %d (offset now %d)",
232                      i, ib_proc->proc_ports[i].pm_cpc_data_count,
233                      (int)(offset-((char*)message))));
234         ib_proc->proc_ports[i].pm_cpc_data = (opal_btl_openib_connect_base_module_data_t *)
235             calloc(ib_proc->proc_ports[i].pm_cpc_data_count,
236                    sizeof(opal_btl_openib_connect_base_module_data_t));
237         if (NULL == ib_proc->proc_ports[i].pm_cpc_data) {
238             goto err_exit;
239         }
240 
241         /* Unpack the CPCs */
242         for (j = 0; j < ib_proc->proc_ports[i].pm_cpc_data_count; ++j) {
243             uint8_t u8;
244             opal_btl_openib_connect_base_module_data_t *cpcd;
245             cpcd = ib_proc->proc_ports[i].pm_cpc_data + j;
246             unpack8(&offset, &u8);
247             BTL_VERBOSE(("unpacked btl %d: cpc %d: index %d (offset now %d)",
248                          i, j, u8, (int)(offset-(char*)message)));
249             cpcd->cbm_component =
250                 opal_btl_openib_connect_base_get_cpc_byindex(u8);
251             BTL_VERBOSE(("unpacked btl %d: cpc %d: component %s",
252                          i, j, cpcd->cbm_component->cbc_name));
253 
254             unpack8(&offset, &cpcd->cbm_priority);
255             unpack8(&offset, &cpcd->cbm_modex_message_len);
256             BTL_VERBOSE(("unpacked btl %d: cpc %d: priority %d, msg len %d (offset now %d)",
257                          i, j, cpcd->cbm_priority,
258                          cpcd->cbm_modex_message_len,
259                          (int)(offset-(char*)message)));
260             if (cpcd->cbm_modex_message_len > 0) {
261                 cpcd->cbm_modex_message = malloc(cpcd->cbm_modex_message_len);
262                 if (NULL == cpcd->cbm_modex_message) {
263                     BTL_ERROR(("Failed to malloc"));
264                     goto err_exit;
265                 }
266                 memcpy(cpcd->cbm_modex_message, offset,
267                        cpcd->cbm_modex_message_len);
268                 offset += cpcd->cbm_modex_message_len;
269                 BTL_VERBOSE(("unpacked btl %d: cpc %d: blob unpacked %d %x (offset now %d)",
270                              i, j,
271                              ((uint32_t*)cpcd->cbm_modex_message)[0],
272                              ((uint32_t*)cpcd->cbm_modex_message)[1],
273                              (int)(offset-((char*)message))));
274             }
275         }
276     }
277 
278     if (0 == ib_proc->proc_port_count) {
279         ib_proc->proc_endpoints = NULL;
280     } else {
281         ib_proc->proc_endpoints = (volatile mca_btl_base_endpoint_t**)
282             malloc(ib_proc->proc_port_count *
283                    sizeof(mca_btl_base_endpoint_t*));
284     }
285     if (NULL == ib_proc->proc_endpoints) {
286         goto err_exit;
287     }
288 
289     BTL_VERBOSE(("unpacking done!"));
290 
291     /* Finally add this process to the initialized procs list */
292     opal_mutex_lock(&mca_btl_openib_component.ib_lock);
293 
294     ib_proc_ret = ibproc_lookup_no_lock(proc);
295     if (NULL == ib_proc_ret) {
296         /* if process can't be found in this list - insert it locked
297          * it is safe to lock ib_proc here because this thread is
298          * the only one who knows about it so far */
299         opal_mutex_lock(&ib_proc->proc_lock);
300         opal_list_append(&mca_btl_openib_component.ib_procs, &ib_proc->super);
301         ib_proc_ret = ib_proc;
302         is_new = true;
303     } else {
304         /* otherwise - release module_proc */
305         OBJ_RELEASE(ib_proc);
306     }
307     opal_mutex_unlock(&mca_btl_openib_component.ib_lock);
308 
309     /* if we haven't insert the process - lock it here so we
310      * won't lock mca_btl_openib_component.ib_lock */
311     if( !is_new ){
312         opal_mutex_lock(&ib_proc_ret->proc_lock);
313     }
314 
315     return ib_proc_ret;
316 
317 err_exit:
318 
319     BTL_ERROR(("%d: error exit from mca_btl_openib_proc_create", OPAL_PROC_MY_NAME.vpid));
320 
321 no_err_exit:
322 
323     OBJ_RELEASE(ib_proc);
324     return NULL;
325 }
326 
mca_btl_openib_proc_remove(opal_proc_t * proc,mca_btl_base_endpoint_t * endpoint)327 int mca_btl_openib_proc_remove(opal_proc_t *proc,
328                                mca_btl_base_endpoint_t *endpoint)
329 {
330     size_t i;
331     mca_btl_openib_proc_t* ib_proc = NULL;
332 
333     /* Remove endpoint from the openib BTL version of the proc as
334        well */
335     ib_proc = ibproc_lookup_and_lock(proc);
336     if (NULL != ib_proc) {
337         for (i = 0; i < ib_proc->proc_endpoint_count; ++i) {
338             if (ib_proc->proc_endpoints[i] == endpoint) {
339                 ib_proc->proc_endpoints[i] = NULL;
340                 if (i == ib_proc->proc_endpoint_count - 1) {
341                     --ib_proc->proc_endpoint_count;
342                 }
343                 opal_mutex_unlock(&ib_proc->proc_lock);
344                 return OPAL_SUCCESS;
345             }
346         }
347     }
348 
349     return OPAL_ERR_NOT_FOUND;
350 }
351 
352 /*
353  * Note that this routine must be called with the lock on the process
354  * already held.  Insert a btl instance into the proc array and assign
355  * it an address.
356  */
mca_btl_openib_proc_insert(mca_btl_openib_proc_t * module_proc,mca_btl_base_endpoint_t * module_endpoint)357 int mca_btl_openib_proc_insert(mca_btl_openib_proc_t* module_proc,
358         mca_btl_base_endpoint_t* module_endpoint)
359 {
360     /* insert into endpoint array */
361 
362 
363 #ifndef WORDS_BIGENDIAN
364     /* if we are little endian and our peer is not so lucky, then we
365        need to put all information sent to him in big endian (aka
366        Network Byte Order) and expect all information received to
367        be in NBO.  Since big endian machines always send and receive
368        in NBO, we don't care so much about that case. */
369     if (module_proc->proc_opal->proc_arch & OPAL_ARCH_ISBIGENDIAN) {
370         module_endpoint->nbo = true;
371     }
372 #endif
373 
374     /* only allow eager rdma if the peers agree on the size of a long */
375     if((module_proc->proc_opal->proc_arch & OPAL_ARCH_LONGISxx) !=
376        (opal_proc_local_get()->proc_arch & OPAL_ARCH_LONGISxx)) {
377         module_endpoint->use_eager_rdma = false;
378     }
379 
380     module_endpoint->endpoint_proc = module_proc;
381     module_proc->proc_endpoints[module_proc->proc_endpoint_count++] = module_endpoint;
382     return OPAL_SUCCESS;
383 }
384 
mca_btl_openib_proc_reg_btl(mca_btl_openib_proc_t * ib_proc,mca_btl_openib_module_t * openib_btl)385 int mca_btl_openib_proc_reg_btl(mca_btl_openib_proc_t* ib_proc,
386                                 mca_btl_openib_module_t* openib_btl)
387 {
388     mca_btl_openib_proc_btlptr_t* elem;
389 
390     OPAL_LIST_FOREACH(elem, &ib_proc->openib_btls, mca_btl_openib_proc_btlptr_t) {
391         if(elem->openib_btl == openib_btl) {
392             /* this is normal return meaning that this BTL has already touched this ib_proc */
393             return OPAL_ERR_RESOURCE_BUSY;
394         }
395     }
396 
397     elem = OBJ_NEW(mca_btl_openib_proc_btlptr_t);
398     if( NULL == elem ){
399         return OPAL_ERR_OUT_OF_RESOURCE;
400     }
401     elem->openib_btl = openib_btl;
402     opal_list_append(&ib_proc->openib_btls, &elem->super);
403     return OPAL_SUCCESS;
404 }
405