1 /*
2  * Copyright (c) 2013-2018 Mellanox Technologies, Inc.
3  *                         All rights reserved.
4  * Copyright (c) 2016      Research Organization for Information Science
5  *                         and Technology (RIST). All rights reserved.
6  * Copyright (c) 2017      Cisco Systems, Inc.  All rights reserved
7  * $COPYRIGHT$
8  *
9  * Additional copyrights may follow
10  *
11  * $HEADER$
12  */
13 #ifndef OSHMEM_PROC_PROC_H
14 #define OSHMEM_PROC_PROC_H
15 
16 #include "oshmem_config.h"
17 #include "oshmem/types.h"
18 #include "oshmem/constants.h"
19 
20 #include "opal/class/opal_list.h"
21 #include "opal/util/proc.h"
22 #include "opal/dss/dss_types.h"
23 #include "opal/mca/hwloc/hwloc-internal.h"
24 
25 #include "orte/types.h"
26 #include "orte/runtime/orte_globals.h"
27 
28 #include "ompi/proc/proc.h"
29 #include "ompi/communicator/communicator.h"
30 
31 #include "oshmem/mca/scoll/scoll.h"
32 #include "oshmem/runtime/runtime.h"
33 #include "oshmem/shmem/shmem_api_logger.h"
34 
35 BEGIN_C_DECLS
36 
37 /* ******************************************************************** */
38 
39 struct oshmem_group_t;
40 
41 #define OSHMEM_PE_INVALID   (-1)
42 
43 /* This struct will be copied into the padding field of an ompi_proc_t
44  * so the size of oshmem_proc_data_t must be less or equal than
45  * OMPI_PROC_PADDING_SIZE */
46 struct oshmem_proc_data_t {
47     char * transport_ids;
48     int num_transports;
49 };
50 
51 typedef struct oshmem_proc_data_t oshmem_proc_data_t;
52 
53 #define OSHMEM_PROC_DATA(proc) \
54     ((oshmem_proc_data_t *)(proc)->padding)
55 
56 /**
57  * Group of Open SHMEM processes structure
58  *
59  * Set of processes used in collective operations.
60  */
61 struct oshmem_group_t {
62     opal_object_t               base;
63     int                         id;             /**< index in global array */
64     int                         my_pe;
65     int                         proc_count;     /**< number of processes in group */
66     int                         is_member;   /* true if my_pe is part of the group, participate in collectives */
67     struct ompi_proc_t          **proc_array; /**< list of pointers to ompi_proc_t structures
68                                                    for each process in the group */
69     opal_list_t                 peer_list;
70 
71     /* Collectives module interface and data */
72     mca_scoll_base_group_scoll_t g_scoll;
73     ompi_communicator_t*         ompi_comm;
74 };
75 typedef struct oshmem_group_t oshmem_group_t;
76 OSHMEM_DECLSPEC OBJ_CLASS_DECLARATION(oshmem_group_t);
77 
78 OSHMEM_DECLSPEC extern oshmem_group_t* oshmem_group_all;
79 OSHMEM_DECLSPEC extern oshmem_group_t* oshmem_group_self;
80 OSHMEM_DECLSPEC extern oshmem_group_t* oshmem_group_null;
81 
82 
83 /* ******************************************************************** */
84 
85 /**
86  * Initialize the OSHMEM process subsystem
87  *
88  * Initialize the Open SHMEM process subsystem.  This function will
89  * query the run-time environment and build a list of the proc
90  * instances in the current pe set.  The local information not
91  * easily determined by the run-time ahead of time (architecture and
92  * hostname) will be published during this call.
93  *
94  * @note While an ompi_proc_t will exist with mostly valid information
95  * for each process in the pe set at the conclusion of this
96  * call, some information will not be immediately available.  This
97  * includes the architecture and hostname, which will be available by
98  * the conclusion of the stage gate.
99  *
100  * @retval OSHMEM_SUCESS  System successfully initialized
101  * @retval OSHMEM_ERROR   Initialization failed due to unspecified error
102  */
103 OSHMEM_DECLSPEC int oshmem_proc_init(void);
104 
105 /**
106  * Finalize the OSHMEM Process subsystem
107  *
108  * Finalize the Open SHMEM process subsystem.  This function will
109  * release all memory created during the life of the application,
110  * including all ompi_proc_t structures.
111  *
112  * @retval OSHMEM_SUCCESS  System successfully finalized
113  */
114 OSHMEM_DECLSPEC int oshmem_proc_finalize(void);
115 
116 /**
117  * Returns a pointer to the local process
118  *
119  * Returns a pointer to the local process.  Unlike oshmem_proc_self(),
120  * the reference count on the local proc instance is not modified by
121  * this function.
122  *
123  * @return Pointer to the local process structure
124  */
oshmem_proc_local(void)125 static inline ompi_proc_t *oshmem_proc_local(void)
126 {
127     return (ompi_proc_t *)ompi_proc_local_proc;
128 }
129 
130 /**
131  * Returns the proc instance for a given name
132  *
133  * Returns the proc instance for the specified process name.  The
134  * reference count for the proc instance is not incremented by this
135  * function.
136  *
137  * @param[in] name     The process name to look for
138  *
139  * @return Pointer to the process instance for \c name
140  */
oshmem_proc_for_find(const orte_process_name_t name)141 static inline ompi_proc_t *oshmem_proc_for_find(const orte_process_name_t name)
142 {
143     return (ompi_proc_t *)ompi_proc_for_name(name);
144 }
145 
oshmem_proc_find(int pe)146 static inline ompi_proc_t *oshmem_proc_find(int pe)
147 {
148     orte_process_name_t name;
149 
150     name.jobid = ORTE_PROC_MY_NAME->jobid;
151     name.vpid = pe;
152     return oshmem_proc_for_find(name);
153 }
154 
oshmem_proc_pe(ompi_proc_t * proc)155 static inline int oshmem_proc_pe(ompi_proc_t *proc)
156 {
157     return (proc ? (int) ((orte_process_name_t*)&proc->super.proc_name)->vpid : -1);
158 }
159 
160 #define OSHMEM_PROC_JOBID(PROC)    (((orte_process_name_t*)&((PROC)->super.proc_name))->jobid)
161 #define OSHMEM_PROC_VPID(PROC)     (((orte_process_name_t*)&((PROC)->super.proc_name))->vpid)
162 
163 /**
164  * Initialize the OSHMEM process predefined groups
165  *
166  * Initialize the Open SHMEM process predefined groups.  This function will
167  * query the run-time environment and build a list of the proc
168  * instances in the current pe set.  The local information not
169  * easily determined by the run-time ahead of time (architecture and
170  * hostname) will be published during this call.
171  *
172  * @note This is primarily used once during SHMEM setup.
173  *
174  * @retval OSHMEM_SUCESS  System successfully initialized
175  * @retval OSHMEM_ERROR   Initialization failed due to unspecified error
176  */
177 OSHMEM_DECLSPEC int oshmem_proc_group_init(void);
178 
179 /**
180  * Finalize the OSHMEM process predefined groups
181  *
182  * @retval OSHMEM_SUCESS  System successfully initialized
183  * @retval OSHMEM_ERROR   Initialization failed due to unspecified error
184  */
185 OSHMEM_DECLSPEC int oshmem_proc_group_finalize(void);
186 
187 /**
188  * Release collectives used by the groups. The function
189  * must be called prior to the oshmem_proc_group_finalize()
190  */
191 OSHMEM_DECLSPEC void oshmem_proc_group_finalize_scoll(void);
192 
193 /**
194  * Create processes group.
195  *
196  * Returns the list of known proc instances located in this group.
197  *
198  * @param[in] pe_start     The lowest PE in the active set.
199  * @param[in] pe_stride    The log (base 2) of the stride between consecutive
200  *                         PEs in the active set.
201  * @param[in] pe_size      The number of PEs in the active set.
202  *
203  * @return Array of pointers to proc instances in the current
204  * known universe, or NULL if there is an internal failure.
205  */
206 OSHMEM_DECLSPEC oshmem_group_t *oshmem_proc_group_create(int pe_start,
207                                                          int pe_stride,
208                                                          int pe_size);
209 
210 /**
211  * same as above but abort on failure
212  */
213 static inline oshmem_group_t *
oshmem_proc_group_create_nofail(int pe_start,int pe_stride,int pe_size)214 oshmem_proc_group_create_nofail(int pe_start, int pe_stride, int pe_size)
215 {
216     oshmem_group_t *group;
217 
218     group = oshmem_proc_group_create(pe_start, pe_stride, pe_size);
219     if (NULL == group) {
220         goto fatal;
221     }
222     return group;
223 
224 fatal:
225     SHMEM_API_ERROR("Failed to create group (%d,%d,%d)",
226                     pe_start, pe_stride, pe_size);
227     oshmem_shmem_abort(-1);
228     return NULL;
229 }
230 
231 
232 /**
233  * Destroy processes group.
234  *
235  */
236 OSHMEM_DECLSPEC void oshmem_proc_group_destroy(oshmem_group_t* group);
237 
oshmem_proc_group_all(int pe)238 static inline ompi_proc_t *oshmem_proc_group_all(int pe)
239 {
240     return oshmem_group_all->proc_array[pe];
241 }
242 
oshmem_proc_group_find(oshmem_group_t * group,int pe)243 static inline ompi_proc_t *oshmem_proc_group_find(oshmem_group_t* group,
244                                                     int pe)
245 {
246     int i = 0;
247     ompi_proc_t* proc = NULL;
248 
249     if (OPAL_LIKELY(group)) {
250         if (OPAL_LIKELY(group == oshmem_group_all)) {
251             /* To improve performance use direct index. It is feature of oshmem_group_all */
252             proc = group->proc_array[pe];
253         } else {
254             for (i = 0; i < group->proc_count; i++) {
255                 if (pe == oshmem_proc_pe(group->proc_array[i])) {
256                     proc = group->proc_array[i];
257                     break;
258                 }
259             }
260         }
261     } else {
262         orte_process_name_t name;
263 
264         name.jobid = ORTE_PROC_MY_NAME->jobid;
265         name.vpid = pe;
266         proc = oshmem_proc_for_find(name);
267     }
268 
269     return proc;
270 }
271 
oshmem_proc_group_find_id(oshmem_group_t * group,int pe)272 static inline int oshmem_proc_group_find_id(oshmem_group_t* group, int pe)
273 {
274     int i = 0;
275     int id = -1;
276 
277     if (group) {
278         for (i = 0; i < group->proc_count; i++) {
279             if (pe == oshmem_proc_pe(group->proc_array[i])) {
280                 id = i;
281                 break;
282             }
283         }
284     }
285 
286     return id;
287 }
288 
oshmem_proc_group_is_member(oshmem_group_t * group)289 static inline int oshmem_proc_group_is_member(oshmem_group_t *group)
290 {
291     return group->is_member;
292 }
293 
oshmem_num_procs(void)294 static inline int oshmem_num_procs(void)
295 {
296     return (oshmem_group_all ?
297         oshmem_group_all->proc_count : (int)opal_list_get_size(&ompi_proc_list));
298 }
299 
oshmem_my_proc_id(void)300 static inline int oshmem_my_proc_id(void)
301 {
302     return oshmem_group_self->my_pe;
303 }
304 
oshmem_get_transport_id(int pe)305 static inline int oshmem_get_transport_id(int pe)
306 {
307     ompi_proc_t *proc;
308 
309     proc = oshmem_proc_group_find(oshmem_group_all, pe);
310 
311     return (int) OSHMEM_PROC_DATA(proc)->transport_ids[0];
312 }
313 
oshmem_get_transport_count(int pe)314 static inline int oshmem_get_transport_count(int pe)
315 {
316     ompi_proc_t *proc;
317     proc = oshmem_proc_group_find(oshmem_group_all, pe);
318     return OSHMEM_PROC_DATA(proc)->num_transports;
319 }
320 
321 END_C_DECLS
322 
323 #endif /* OSHMEM_PROC_PROC_H */
324