1 /*
2  * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved.
3  * Copyright (c) 2002-2015 Mellanox Technologies LTD. All rights reserved.
4  * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
5  * Copyright (c) 2009 HNR Consulting. All rights reserved.
6  *
7  * This software is available to you under a choice of one of two
8  * licenses.  You may choose to be licensed under the terms of the GNU
9  * General Public License (GPL) Version 2, available from the file
10  * COPYING in the main directory of this source tree, or the
11  * OpenIB.org BSD license below:
12  *
13  *     Redistribution and use in source and binary forms, with or
14  *     without modification, are permitted provided that the following
15  *     conditions are met:
16  *
17  *      - Redistributions of source code must retain the above
18  *        copyright notice, this list of conditions and the following
19  *        disclaimer.
20  *
21  *      - Redistributions in binary form must reproduce the above
22  *        copyright notice, this list of conditions and the following
23  *        disclaimer in the documentation and/or other materials
24  *        provided with the distribution.
25  *
26  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33  * SOFTWARE.
34  *
35  */
36 
37 /*
38  * Abstract:
39  *    Implementation of osm_ni_rcv_t.
40  * This object represents the NodeInfo Receiver object.
41  * This object is part of the opensm family of objects.
42  */
43 
44 #if HAVE_CONFIG_H
45 #  include <config.h>
46 #endif				/* HAVE_CONFIG_H */
47 
48 #include <stdlib.h>
49 #include <string.h>
50 #include <iba/ib_types.h>
51 #include <complib/cl_qmap.h>
52 #include <complib/cl_passivelock.h>
53 #include <complib/cl_debug.h>
54 #include <opensm/osm_file_ids.h>
55 #define FILE_ID OSM_FILE_NODE_INFO_RCV_C
56 #include <opensm/osm_madw.h>
57 #include <opensm/osm_log.h>
58 #include <opensm/osm_node.h>
59 #include <opensm/osm_subnet.h>
60 #include <opensm/osm_router.h>
61 #include <opensm/osm_mad_pool.h>
62 #include <opensm/osm_helper.h>
63 #include <opensm/osm_msgdef.h>
64 #include <opensm/osm_opensm.h>
65 #include <opensm/osm_ucast_mgr.h>
66 #include <opensm/osm_db_pack.h>
67 
68 static void report_duplicated_guid(IN osm_sm_t * sm, osm_physp_t * p_physp,
69 				   osm_node_t * p_neighbor_node,
70 				   const uint8_t port_num)
71 {
72 	osm_physp_t *p_old, *p_new;
73 	osm_dr_path_t path;
74 
75 	p_old = p_physp->p_remote_physp;
76 	p_new = osm_node_get_physp_ptr(p_neighbor_node, port_num);
77 
78 	OSM_LOG(sm->p_log, OSM_LOG_SYS | OSM_LOG_ERROR, "ERR 0D01: "
79 		"Found duplicated node GUID.\n"
80 		"Node 0x%" PRIx64 " port %u is reachable from remote node "
81 		"0x%" PRIx64 " port %u and remote node 0x%" PRIx64 " port %u.\n"
82 		"Paths are:\n",
83 		cl_ntoh64(p_physp->p_node->node_info.node_guid),
84 		p_physp->port_num,
85 		p_old ? cl_ntoh64(p_old->p_node->node_info.node_guid) : 0,
86 		p_old ? p_old->port_num : 0,
87 		p_new ? cl_ntoh64(p_new->p_node->node_info.node_guid) : 0,
88 		p_new ? p_new->port_num : 0);
89 
90 	osm_dump_dr_path_v2(sm->p_log, osm_physp_get_dr_path_ptr(p_physp),
91 			    FILE_ID, OSM_LOG_ERROR);
92 
93 	path = *osm_physp_get_dr_path_ptr(p_new);
94 	if (osm_dr_path_extend(&path, port_num))
95 		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D05: "
96 			"DR path with hop count %d couldn't be extended\n",
97 			path.hop_count);
98 	osm_dump_dr_path_v2(sm->p_log, &path, FILE_ID, OSM_LOG_ERROR);
99 }
100 
101 static void requery_dup_node_info(IN osm_sm_t * sm, osm_physp_t * p_physp,
102 				  unsigned count)
103 {
104 	osm_madw_context_t context;
105 	osm_dr_path_t path;
106 	cl_status_t status;
107 
108 	if (!p_physp->p_remote_physp) {
109 		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D0D: "
110 			"DR path couldn't be extended due to NULL remote physp\n");
111 		return;
112 	}
113 
114 	path = *osm_physp_get_dr_path_ptr(p_physp->p_remote_physp);
115 	if (osm_dr_path_extend(&path, p_physp->p_remote_physp->port_num)) {
116 		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D08: "
117 			"DR path with hop count %d couldn't be extended\n",
118 			path.hop_count);
119 		return;
120 	}
121 
122 	context.ni_context.node_guid =
123 	    p_physp->p_remote_physp->p_node->node_info.port_guid;
124 	context.ni_context.port_num = p_physp->p_remote_physp->port_num;
125 	context.ni_context.dup_node_guid = p_physp->p_node->node_info.node_guid;
126 	context.ni_context.dup_port_num = p_physp->port_num;
127 	context.ni_context.dup_count = count;
128 
129 	status = osm_req_get(sm, &path, IB_MAD_ATTR_NODE_INFO, 0,
130 			     TRUE, 0, CL_DISP_MSGID_NONE, &context);
131 
132 	if (status != IB_SUCCESS)
133 		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D02: "
134 			"Failure initiating NodeInfo request (%s)\n",
135 			ib_get_err_str(status));
136 }
137 
138 /**********************************************************************
139  The plock must be held before calling this function.
140 **********************************************************************/
141 static void ni_rcv_set_links(IN osm_sm_t * sm, osm_node_t * p_node,
142 			     const uint8_t port_num,
143 			     const osm_ni_context_t * p_ni_context)
144 {
145 	osm_node_t *p_neighbor_node;
146 	osm_physp_t *p_physp, *p_remote_physp;
147 
148 	OSM_LOG_ENTER(sm->p_log);
149 
150 	/*
151 	   A special case exists in which the node we're trying to
152 	   link is our own node.  In this case, the guid value in
153 	   the ni_context will be zero.
154 	 */
155 	if (p_ni_context->node_guid == 0) {
156 		OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
157 			"Nothing to link for our own node 0x%" PRIx64 "\n",
158 			cl_ntoh64(osm_node_get_node_guid(p_node)));
159 		goto _exit;
160 	}
161 
162 	p_neighbor_node = osm_get_node_by_guid(sm->p_subn,
163 					       p_ni_context->node_guid);
164 	if (PF(!p_neighbor_node)) {
165 		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D10: "
166 			"Unexpected removal of neighbor node 0x%" PRIx64 "\n",
167 			cl_ntoh64(p_ni_context->node_guid));
168 		goto _exit;
169 	}
170 
171 	/* When setting the link, ports on both
172 	   sides of the link should be initialized */
173 	CL_ASSERT(osm_node_link_has_valid_ports(p_node, port_num,
174 						p_neighbor_node,
175 						p_ni_context->port_num));
176 
177 	if (osm_node_link_exists(p_node, port_num,
178 				 p_neighbor_node, p_ni_context->port_num)) {
179 		OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Link already exists\n");
180 		goto _exit;
181 	}
182 
183 	p_physp = osm_node_get_physp_ptr(p_node, port_num);
184 	if (!p_physp) {
185 		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR OD0E: "
186 			"Failed to find physp for port %d of Node GUID 0x%"
187 			PRIx64 "\n", port_num,
188 			cl_ntoh64(osm_node_get_node_guid(p_node)));
189 		goto _exit;
190 	}
191 
192 	/*
193 	 * If the link went UP, after we already discovered it, we shouldn't
194 	 * set the link between the ports and resweep.
195 	 */
196 	if (osm_physp_get_port_state(p_physp) == IB_LINK_DOWN &&
197 	    p_node->physp_discovered[port_num]) {
198 		/* Link down on another side. Don't create a link*/
199 		p_node->physp_discovered[port_num] = 0;
200 		sm->p_subn->force_heavy_sweep = TRUE;
201 		goto _exit;
202 	}
203 
204 	if (osm_node_has_any_link(p_node, port_num) &&
205 	    sm->p_subn->force_heavy_sweep == FALSE &&
206 	    (!p_ni_context->dup_count ||
207 	     (p_ni_context->dup_node_guid == osm_node_get_node_guid(p_node) &&
208 	      p_ni_context->dup_port_num == port_num))) {
209 		/*
210 		   Uh oh...
211 		   This could be reconnected ports, but also duplicated GUID
212 		   (2 nodes have the same guid) or a 12x link with lane reversal
213 		   that is not configured correctly.
214 		   We will try to recover by querying NodeInfo again.
215 		   In order to catch even fast port moving to new location(s)
216 		   and back we will count up to 5.
217 		   Some crazy reconnections (newly created switch loop right
218 		   before targeted CA) will not be catched this way. So in worst
219 		   case - report GUID duplication and request new discovery.
220 		   When switch node is targeted NodeInfo querying will be done
221 		   in opposite order, this is much stronger check, unfortunately
222 		   it is impossible with CAs.
223 		 */
224 		p_physp = osm_node_get_physp_ptr(p_node, port_num);
225 		if (!p_physp) {
226 			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR OD0F: "
227 				"Failed to find physp for port %d of Node GUID 0x%"
228 				PRIx64 "\n", port_num,
229 				cl_ntoh64(osm_node_get_node_guid(p_node)));
230 			goto _exit;
231 		}
232 
233 		if (p_ni_context->dup_count > 5) {
234 			report_duplicated_guid(sm, p_physp, p_neighbor_node,
235 					       p_ni_context->port_num);
236 			sm->p_subn->force_heavy_sweep = TRUE;
237 		} else if (p_node->sw)
238 			requery_dup_node_info(sm, p_physp->p_remote_physp,
239 					      p_ni_context->dup_count + 1);
240 		else
241 			requery_dup_node_info(sm, p_physp,
242 					      p_ni_context->dup_count + 1);
243 	}
244 
245 	/*
246 	   When there are only two nodes with exact same guids (connected back
247 	   to back) - the previous check for duplicated guid will not catch
248 	   them. But the link will be from the port to itself...
249 	   Enhanced Port 0 is an exception to this
250 	 */
251 	if (osm_node_get_node_guid(p_node) == p_ni_context->node_guid &&
252 	    port_num == p_ni_context->port_num &&
253 	    port_num != 0 && cl_qmap_count(&sm->p_subn->sw_guid_tbl) == 0) {
254 		OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
255 			"Duplicate GUID found by link from a port to itself:"
256 			"node 0x%" PRIx64 ", port number %u\n",
257 			cl_ntoh64(osm_node_get_node_guid(p_node)), port_num);
258 		p_physp = osm_node_get_physp_ptr(p_node, port_num);
259 		if (!p_physp) {
260 			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR OD1D: "
261 				"Failed to find physp for port %d of Node GUID 0x%"
262 				PRIx64 "\n", port_num,
263 				cl_ntoh64(osm_node_get_node_guid(p_node)));
264 			goto _exit;
265 		}
266 
267 		osm_dump_dr_path_v2(sm->p_log, osm_physp_get_dr_path_ptr(p_physp),
268 				    FILE_ID, OSM_LOG_VERBOSE);
269 
270 		if (sm->p_subn->opt.exit_on_fatal == TRUE) {
271 			osm_log_v2(sm->p_log, OSM_LOG_SYS, FILE_ID,
272 				   "Errors on subnet. Duplicate GUID found "
273 				   "by link from a port to itself. "
274 				   "See verbose opensm.log for more details\n");
275 			exit(1);
276 		}
277 	}
278 
279 	OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
280 		"Creating new link between:\n\t\t\t\tnode 0x%" PRIx64
281 		", port number %u and\n\t\t\t\tnode 0x%" PRIx64
282 		", port number %u\n",
283 		cl_ntoh64(osm_node_get_node_guid(p_node)), port_num,
284 		cl_ntoh64(p_ni_context->node_guid), p_ni_context->port_num);
285 
286 	if (sm->ucast_mgr.cache_valid)
287 		osm_ucast_cache_check_new_link(&sm->ucast_mgr, p_node, port_num,
288 					       p_neighbor_node,
289 					       p_ni_context->port_num);
290 
291 	p_physp = osm_node_get_physp_ptr(p_node, port_num);
292 	p_remote_physp = osm_node_get_physp_ptr(p_neighbor_node,
293 						p_ni_context->port_num);
294 	if (!p_physp || !p_remote_physp)
295 		goto _exit;
296 
297 	osm_node_link(p_node, port_num, p_neighbor_node, p_ni_context->port_num);
298 
299 	osm_db_neighbor_set(sm->p_subn->p_neighbor,
300 			    cl_ntoh64(osm_physp_get_port_guid(p_physp)),
301 			    port_num,
302 			    cl_ntoh64(osm_physp_get_port_guid(p_remote_physp)),
303 			    p_ni_context->port_num);
304 	osm_db_neighbor_set(sm->p_subn->p_neighbor,
305 			    cl_ntoh64(osm_physp_get_port_guid(p_remote_physp)),
306 			    p_ni_context->port_num,
307 			    cl_ntoh64(osm_physp_get_port_guid(p_physp)),
308 			    port_num);
309 
310 _exit:
311 	OSM_LOG_EXIT(sm->p_log);
312 }
313 
314 static void ni_rcv_get_port_info(IN osm_sm_t * sm, IN osm_node_t * node,
315 				 IN const osm_madw_t * madw)
316 {
317 	osm_madw_context_t context;
318 	osm_physp_t *physp;
319 	ib_node_info_t *ni;
320 	unsigned port;
321 	ib_api_status_t status;
322 	int mlnx_epi_supported = 0;
323 
324 	ni = ib_smp_get_payload_ptr(osm_madw_get_smp_ptr(madw));
325 
326 	port = ib_node_info_get_local_port_num(ni);
327 
328 	if (sm->p_subn->opt.fdr10)
329 		mlnx_epi_supported = is_mlnx_ext_port_info_supported(
330 						ib_node_info_get_vendor_id(ni),
331 						ni->device_id);
332 
333 	physp = osm_node_get_physp_ptr(node, port);
334 	if (!physp) {
335 		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR OD1E: "
336 			"Failed to find physp for port %d of Node GUID 0x%"
337 			PRIx64 "\n", port,
338 			cl_ntoh64(osm_node_get_node_guid(node)));
339 		return;
340 	}
341 
342 	context.pi_context.node_guid = osm_node_get_node_guid(node);
343 	context.pi_context.port_guid = osm_physp_get_port_guid(physp);
344 	context.pi_context.set_method = FALSE;
345 	context.pi_context.light_sweep = FALSE;
346 	context.pi_context.active_transition = FALSE;
347 	context.pi_context.client_rereg = FALSE;
348 
349 	status = osm_req_get(sm, osm_physp_get_dr_path_ptr(physp),
350 			     IB_MAD_ATTR_PORT_INFO, cl_hton32(port),
351 			     TRUE, 0, CL_DISP_MSGID_NONE, &context);
352 	if (status != IB_SUCCESS)
353 		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR OD02: "
354 			"Failure initiating PortInfo request (%s)\n",
355 			ib_get_err_str(status));
356 	if (mlnx_epi_supported) {
357 		status = osm_req_get(sm,
358 				     osm_physp_get_dr_path_ptr(physp),
359 				     IB_MAD_ATTR_MLNX_EXTENDED_PORT_INFO,
360 				     cl_hton32(port),
361 				     TRUE, 0, CL_DISP_MSGID_NONE, &context);
362 		if (status != IB_SUCCESS)
363 			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D0B: "
364 				"Failure initiating MLNX ExtPortInfo request (%s)\n",
365 				ib_get_err_str(status));
366 	}
367 }
368 
369 /**********************************************************************
370  The plock must be held before calling this function.
371 **********************************************************************/
372 void osm_req_get_node_desc(IN osm_sm_t * sm, osm_physp_t * p_physp)
373 {
374 	ib_api_status_t status = IB_SUCCESS;
375 	osm_madw_context_t context;
376 
377 	OSM_LOG_ENTER(sm->p_log);
378 
379 	context.nd_context.node_guid =
380 	    osm_node_get_node_guid(osm_physp_get_node_ptr(p_physp));
381 
382 	status = osm_req_get(sm, osm_physp_get_dr_path_ptr(p_physp),
383 			     IB_MAD_ATTR_NODE_DESC, 0, TRUE, 0,
384 			     CL_DISP_MSGID_NONE, &context);
385 	if (status != IB_SUCCESS)
386 		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D03: "
387 			"Failure initiating NodeDescription request (%s)\n",
388 			ib_get_err_str(status));
389 
390 	OSM_LOG_EXIT(sm->p_log);
391 }
392 
393 /**********************************************************************
394  The plock must be held before calling this function.
395 **********************************************************************/
396 static void ni_rcv_get_node_desc(IN osm_sm_t * sm, IN osm_node_t * p_node,
397 				 IN const osm_madw_t * p_madw)
398 {
399 	ib_node_info_t *p_ni;
400 	ib_smp_t *p_smp;
401 	uint8_t port_num;
402 	osm_physp_t *p_physp = NULL;
403 
404 	OSM_LOG_ENTER(sm->p_log);
405 
406 	p_smp = osm_madw_get_smp_ptr(p_madw);
407 	p_ni = ib_smp_get_payload_ptr(p_smp);
408 	port_num = ib_node_info_get_local_port_num(p_ni);
409 
410 	/*
411 	   Request PortInfo & NodeDescription attributes for the port
412 	   that responded to the NodeInfo attribute.
413 	   Because this is a channel adapter or router, we are
414 	   not allowed to request PortInfo for the other ports.
415 	   Set the context union properly, so the recipient
416 	   knows which node & port are relevant.
417 	 */
418 	p_physp = osm_node_get_physp_ptr(p_node, port_num);
419 	if (!p_physp) {
420 		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR OD1F: "
421 			"Failed to find physp for port %d of Node GUID 0x%"
422 			PRIx64 "\n", port_num,
423 			cl_ntoh64(osm_node_get_node_guid(p_node)));
424 		return;
425 	}
426 
427 	osm_req_get_node_desc(sm, p_physp);
428 
429 	OSM_LOG_EXIT(sm->p_log);
430 }
431 
432 /**********************************************************************
433  The plock must be held before calling this function.
434 **********************************************************************/
435 static void ni_rcv_process_new_ca_or_router(IN osm_sm_t * sm,
436 					    IN osm_node_t * p_node,
437 					    IN const osm_madw_t * p_madw)
438 {
439 	OSM_LOG_ENTER(sm->p_log);
440 
441 	ni_rcv_get_port_info(sm, p_node, p_madw);
442 
443 	/*
444 	   A node guid of 0 is the corner case that indicates
445 	   we discovered our own node.  Initialize the subnet
446 	   object with the SM's own port guid.
447 	 */
448 	if (osm_madw_get_ni_context_ptr(p_madw)->node_guid == 0)
449 		sm->p_subn->sm_port_guid = p_node->node_info.port_guid;
450 
451 	OSM_LOG_EXIT(sm->p_log);
452 }
453 
454 /**********************************************************************
455  The plock must be held before calling this function.
456 **********************************************************************/
457 static void ni_rcv_process_existing_ca_or_router(IN osm_sm_t * sm,
458 						 IN osm_node_t * p_node,
459 						 IN const osm_madw_t * p_madw)
460 {
461 	ib_node_info_t *p_ni;
462 	ib_smp_t *p_smp;
463 	osm_port_t *p_port;
464 	osm_port_t *p_port_check;
465 	uint8_t port_num;
466 	osm_dr_path_t *p_dr_path;
467 	osm_alias_guid_t *p_alias_guid, *p_alias_guid_check;
468 
469 	OSM_LOG_ENTER(sm->p_log);
470 
471 	p_smp = osm_madw_get_smp_ptr(p_madw);
472 	p_ni = ib_smp_get_payload_ptr(p_smp);
473 	port_num = ib_node_info_get_local_port_num(p_ni);
474 
475 	/*
476 	   Determine if we have encountered this node through a
477 	   previously undiscovered port.  If so, build the new
478 	   port object.
479 	 */
480 	p_port = osm_get_port_by_guid(sm->p_subn, p_ni->port_guid);
481 	if (!p_port) {
482 		OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
483 			"Creating new port object with GUID 0x%" PRIx64 "\n",
484 			cl_ntoh64(p_ni->port_guid));
485 
486 		osm_node_init_physp(p_node, port_num, p_madw);
487 
488 		p_port = osm_port_new(p_ni, p_node);
489 		if (PF(p_port == NULL)) {
490 			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D04: "
491 				"Unable to create new port object\n");
492 			goto Exit;
493 		}
494 
495 		/*
496 		   Add the new port object to the database.
497 		 */
498 		p_port_check =
499 		    (osm_port_t *) cl_qmap_insert(&sm->p_subn->port_guid_tbl,
500 						  p_ni->port_guid,
501 						  &p_port->map_item);
502 		if (PF(p_port_check != p_port)) {
503 			/*
504 			   We should never be here!
505 			   Somehow, this port GUID already exists in the table.
506 			 */
507 			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D12: "
508 				"Port 0x%" PRIx64 " already in the database!\n",
509 				cl_ntoh64(p_ni->port_guid));
510 
511 			osm_port_delete(&p_port);
512 			goto Exit;
513 		}
514 
515 		p_alias_guid = osm_alias_guid_new(p_ni->port_guid,
516 						  p_port);
517 		if (PF(!p_alias_guid)) {
518 			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D11: "
519 				"alias guid memory allocation failed"
520 				" for port GUID 0x%" PRIx64 "\n",
521 				cl_ntoh64(p_ni->port_guid));
522 			goto alias_done;
523 		}
524 
525 		/* insert into alias guid table */
526 		p_alias_guid_check =
527 			(osm_alias_guid_t *) cl_qmap_insert(&sm->p_subn->alias_port_guid_tbl,
528 							    p_alias_guid->alias_guid,
529 							    &p_alias_guid->map_item);
530 		if (p_alias_guid_check != p_alias_guid) {
531 			/* alias GUID is a duplicate */
532 			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D13: "
533 				"Duplicate alias port GUID 0x%" PRIx64 "\n",
534 				cl_ntoh64(p_ni->port_guid));
535 			osm_alias_guid_delete(&p_alias_guid);
536 			osm_port_delete(&p_port);
537 			goto Exit;
538 		}
539 
540 alias_done:
541 		/* If we are a master, then this means the port is new on the subnet.
542 		   Mark it as new - need to send trap 64 for these ports.
543 		   The condition that we are master is true, since if we are in discovering
544 		   state (meaning we woke up from standby or we are just initializing),
545 		   then these ports may be new to us, but are not new on the subnet.
546 		   If we are master, then the subnet as we know it is the updated one,
547 		   and any new ports we encounter should cause trap 64. C14-72.1.1 */
548 		if (sm->p_subn->sm_state == IB_SMINFO_STATE_MASTER)
549 			p_port->is_new = 1;
550 
551 	} else {
552 		osm_physp_t *p_physp = osm_node_get_physp_ptr(p_node, port_num);
553 
554 		if (PF(p_physp == NULL)) {
555 			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D1C: "
556 				"No physical port found for node GUID 0x%"
557 				PRIx64 " port %u. Might be duplicate port GUID\n",
558 				cl_ntoh64(p_node->node_info.node_guid),
559 				port_num);
560 			goto Exit;
561 		}
562 
563 		/*
564 		   Update the DR Path to the port,
565 		   in case the old one is no longer available.
566 		 */
567 		p_dr_path = osm_physp_get_dr_path_ptr(p_physp);
568 
569 		osm_dr_path_init(p_dr_path, p_smp->hop_count,
570 				 p_smp->initial_path);
571 	}
572 
573 	ni_rcv_get_port_info(sm, p_node, p_madw);
574 
575 Exit:
576 	OSM_LOG_EXIT(sm->p_log);
577 }
578 
579 static void ni_rcv_process_switch(IN osm_sm_t * sm, IN osm_node_t * p_node,
580 				  IN const osm_madw_t * p_madw)
581 {
582 	ib_api_status_t status = IB_SUCCESS;
583 	osm_physp_t *p_physp;
584 	osm_madw_context_t context;
585 	osm_dr_path_t *path;
586 	ib_smp_t *p_smp;
587 
588 	OSM_LOG_ENTER(sm->p_log);
589 
590 	p_smp = osm_madw_get_smp_ptr(p_madw);
591 
592 	p_physp = osm_node_get_physp_ptr(p_node, 0);
593 	/* update DR path of already initialized switch port 0 */
594 	path = osm_physp_get_dr_path_ptr(p_physp);
595 	osm_dr_path_init(path, p_smp->hop_count, p_smp->initial_path);
596 
597 	context.si_context.node_guid = osm_node_get_node_guid(p_node);
598 	context.si_context.set_method = FALSE;
599 	context.si_context.light_sweep = FALSE;
600 	context.si_context.lft_top_change = FALSE;
601 
602 	/* Request a SwitchInfo attribute */
603 	status = osm_req_get(sm, path, IB_MAD_ATTR_SWITCH_INFO, 0, TRUE, 0,
604 			     CL_DISP_MSGID_NONE, &context);
605 	if (status != IB_SUCCESS)
606 		/* continue despite error */
607 		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D06: "
608 			"Failure initiating SwitchInfo request (%s)\n",
609 			ib_get_err_str(status));
610 
611 	OSM_LOG_EXIT(sm->p_log);
612 }
613 
614 /**********************************************************************
615  The plock must be held before calling this function.
616 **********************************************************************/
617 static void ni_rcv_process_existing_switch(IN osm_sm_t * sm,
618 					   IN osm_node_t * p_node,
619 					   IN const osm_madw_t * p_madw)
620 {
621 	OSM_LOG_ENTER(sm->p_log);
622 
623 	/*
624 	   If this switch has already been probed during this sweep,
625 	   then don't bother reprobing it.
626 	 */
627 	if (p_node->discovery_count == 1)
628 		ni_rcv_process_switch(sm, p_node, p_madw);
629 
630 	OSM_LOG_EXIT(sm->p_log);
631 }
632 
633 /**********************************************************************
634  The plock must be held before calling this function.
635 **********************************************************************/
636 static void ni_rcv_process_new_switch(IN osm_sm_t * sm, IN osm_node_t * p_node,
637 				      IN const osm_madw_t * p_madw)
638 {
639 	OSM_LOG_ENTER(sm->p_log);
640 
641 	ni_rcv_process_switch(sm, p_node, p_madw);
642 
643 	/*
644 	   A node guid of 0 is the corner case that indicates
645 	   we discovered our own node.  Initialize the subnet
646 	   object with the SM's own port guid.
647 	 */
648 	if (osm_madw_get_ni_context_ptr(p_madw)->node_guid == 0)
649 		sm->p_subn->sm_port_guid = p_node->node_info.port_guid;
650 
651 	OSM_LOG_EXIT(sm->p_log);
652 }
653 
654 /**********************************************************************
655  The plock must NOT be held before calling this function.
656 **********************************************************************/
657 static void ni_rcv_process_new(IN osm_sm_t * sm, IN const osm_madw_t * p_madw)
658 {
659 	osm_node_t *p_node;
660 	osm_node_t *p_node_check;
661 	osm_port_t *p_port;
662 	osm_port_t *p_port_check;
663 	osm_router_t *p_rtr = NULL;
664 	osm_router_t *p_rtr_check;
665 	cl_qmap_t *p_rtr_guid_tbl;
666 	ib_node_info_t *p_ni;
667 	ib_smp_t *p_smp;
668 	osm_ni_context_t *p_ni_context;
669 	osm_alias_guid_t *p_alias_guid, *p_alias_guid_check;
670 	uint8_t port_num;
671 
672 	OSM_LOG_ENTER(sm->p_log);
673 
674 	p_smp = osm_madw_get_smp_ptr(p_madw);
675 	p_ni = ib_smp_get_payload_ptr(p_smp);
676 	p_ni_context = osm_madw_get_ni_context_ptr(p_madw);
677 	port_num = ib_node_info_get_local_port_num(p_ni);
678 
679 	osm_dump_smp_dr_path_v2(sm->p_log, p_smp, FILE_ID, OSM_LOG_VERBOSE);
680 
681 	OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
682 		"Discovered new %s node,"
683 		"\n\t\t\t\tGUID 0x%" PRIx64 ", TID 0x%" PRIx64 "\n",
684 		ib_get_node_type_str(p_ni->node_type),
685 		cl_ntoh64(p_ni->node_guid), cl_ntoh64(p_smp->trans_id));
686 
687 	if (PF(port_num > p_ni->num_ports)) {
688 		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D0A: "
689 			"New %s node GUID 0x%" PRIx64 "is non-compliant and "
690 			"is being ignored since the "
691 			"local port num %u > num ports %u\n",
692 			ib_get_node_type_str(p_ni->node_type),
693 			cl_ntoh64(p_ni->node_guid), port_num,
694 			p_ni->num_ports);
695 		goto Exit;
696 	}
697 
698 	p_node = osm_node_new(p_madw);
699 	if (PF(p_node == NULL)) {
700 		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D07: "
701 			"Unable to create new node object\n");
702 		goto Exit;
703 	}
704 
705 	/*
706 	   Create a new port object to represent this node's physical
707 	   ports in the port table.
708 	 */
709 	p_port = osm_port_new(p_ni, p_node);
710 	if (PF(p_port == NULL)) {
711 		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D14: "
712 			"Unable to create new port object\n");
713 		osm_node_delete(&p_node);
714 		goto Exit;
715 	}
716 
717 	/*
718 	   Add the new port object to the database.
719 	 */
720 	p_port_check =
721 	    (osm_port_t *) cl_qmap_insert(&sm->p_subn->port_guid_tbl,
722 					  p_ni->port_guid, &p_port->map_item);
723 	if (PF(p_port_check != p_port)) {
724 		/*
725 		   We should never be here!
726 		   Somehow, this port GUID already exists in the table.
727 		 */
728 		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D15: "
729 			"Duplicate Port GUID 0x%" PRIx64
730 			"! Found by the two directed routes:\n",
731 			cl_ntoh64(p_ni->port_guid));
732 		osm_dump_dr_path_v2(sm->p_log,
733 				    osm_physp_get_dr_path_ptr(p_port->p_physp),
734 				    FILE_ID, OSM_LOG_ERROR);
735 		osm_dump_dr_path_v2(sm->p_log,
736 				    osm_physp_get_dr_path_ptr(p_port_check->
737 							   p_physp),
738 				    FILE_ID, OSM_LOG_ERROR);
739 		osm_port_delete(&p_port);
740 		osm_node_delete(&p_node);
741 		goto Exit;
742 	}
743 
744 	p_alias_guid = osm_alias_guid_new(p_ni->port_guid,
745 					  p_port);
746 	if (PF(!p_alias_guid)) {
747 		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D18: "
748 			"alias guid memory allocation failed"
749 			" for port GUID 0x%" PRIx64 "\n",
750 			cl_ntoh64(p_ni->port_guid));
751 		goto alias_done2;
752 	}
753 
754 	/* insert into alias guid table */
755 	p_alias_guid_check =
756 		(osm_alias_guid_t *) cl_qmap_insert(&sm->p_subn->alias_port_guid_tbl,
757 						    p_alias_guid->alias_guid,
758 						    &p_alias_guid->map_item);
759 	if (p_alias_guid_check != p_alias_guid) {
760 		/* alias GUID is a duplicate */
761 		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D19: "
762 			"Duplicate alias port GUID 0x%" PRIx64 "\n",
763 			cl_ntoh64(p_ni->port_guid));
764 		osm_alias_guid_delete(&p_alias_guid);
765 	}
766 
767 alias_done2:
768 	/* If we are a master, then this means the port is new on the subnet.
769 	   Mark it as new - need to send trap 64 on these ports.
770 	   The condition that we are master is true, since if we are in discovering
771 	   state (meaning we woke up from standby or we are just initializing),
772 	   then these ports may be new to us, but are not new on the subnet.
773 	   If we are master, then the subnet as we know it is the updated one,
774 	   and any new ports we encounter should cause trap 64. C14-72.1.1 */
775 	if (sm->p_subn->sm_state == IB_SMINFO_STATE_MASTER)
776 		p_port->is_new = 1;
777 
778 	/* If there were RouterInfo or other router attribute,
779 	   this would be elsewhere */
780 	if (p_ni->node_type == IB_NODE_TYPE_ROUTER) {
781 		if (PF((p_rtr = osm_router_new(p_port)) == NULL))
782 			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D1A: "
783 				"Unable to create new router object\n");
784 		else {
785 			p_rtr_guid_tbl = &sm->p_subn->rtr_guid_tbl;
786 			p_rtr_check =
787 			    (osm_router_t *) cl_qmap_insert(p_rtr_guid_tbl,
788 							    p_ni->port_guid,
789 							    &p_rtr->map_item);
790 			if (PF(p_rtr_check != p_rtr))
791 				OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D1B: "
792 					"Unable to add port GUID:0x%016" PRIx64
793 					" to router table\n",
794 					cl_ntoh64(p_ni->port_guid));
795 		}
796 	}
797 
798 	p_node_check =
799 	    (osm_node_t *) cl_qmap_insert(&sm->p_subn->node_guid_tbl,
800 					  p_ni->node_guid, &p_node->map_item);
801 	if (PF(p_node_check != p_node)) {
802 		/*
803 		   This node must have been inserted by another thread.
804 		   This is unexpected, but is not an error.
805 		   We can simply clean-up, since the other thread will
806 		   see this processing through to completion.
807 		 */
808 		OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
809 			"Discovery race detected at node 0x%" PRIx64 "\n",
810 			cl_ntoh64(p_ni->node_guid));
811 		osm_node_delete(&p_node);
812 		p_node = p_node_check;
813 		ni_rcv_set_links(sm, p_node, port_num, p_ni_context);
814 		goto Exit;
815 	} else
816 		ni_rcv_set_links(sm, p_node, port_num, p_ni_context);
817 
818 	p_node->discovery_count++;
819 	ni_rcv_get_node_desc(sm, p_node, p_madw);
820 
821 	switch (p_ni->node_type) {
822 	case IB_NODE_TYPE_CA:
823 	case IB_NODE_TYPE_ROUTER:
824 		ni_rcv_process_new_ca_or_router(sm, p_node, p_madw);
825 		break;
826 	case IB_NODE_TYPE_SWITCH:
827 		ni_rcv_process_new_switch(sm, p_node, p_madw);
828 		break;
829 	default:
830 		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D16: "
831 			"Unknown node type %u with GUID 0x%" PRIx64 "\n",
832 			p_ni->node_type, cl_ntoh64(p_ni->node_guid));
833 		break;
834 	}
835 
836 Exit:
837 	OSM_LOG_EXIT(sm->p_log);
838 }
839 
840 /**********************************************************************
841  The plock must be held before calling this function.
842 **********************************************************************/
843 static void ni_rcv_process_existing(IN osm_sm_t * sm, IN osm_node_t * p_node,
844 				    IN const osm_madw_t * p_madw)
845 {
846 	ib_node_info_t *p_ni;
847 	ib_smp_t *p_smp;
848 	osm_ni_context_t *p_ni_context;
849 	uint8_t port_num;
850 
851 	OSM_LOG_ENTER(sm->p_log);
852 
853 	p_smp = osm_madw_get_smp_ptr(p_madw);
854 	p_ni = ib_smp_get_payload_ptr(p_smp);
855 	p_ni_context = osm_madw_get_ni_context_ptr(p_madw);
856 	port_num = ib_node_info_get_local_port_num(p_ni);
857 
858 	OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
859 		"Rediscovered %s node 0x%" PRIx64 " TID 0x%" PRIx64
860 		", discovered %u times already\n",
861 		ib_get_node_type_str(p_ni->node_type),
862 		cl_ntoh64(p_ni->node_guid),
863 		cl_ntoh64(p_smp->trans_id), p_node->discovery_count);
864 
865 	if (PF(port_num > p_ni->num_ports)) {
866 		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D0C: "
867 			"Existing %s node GUID 0x%" PRIx64 "is non-compliant "
868 			"and is being ignored since the "
869 			"local port num %u > num ports %u\n",
870 			ib_get_node_type_str(p_ni->node_type),
871 			cl_ntoh64(p_ni->node_guid), port_num,
872 			p_ni->num_ports);
873 		goto Exit;
874 	}
875 
876 	/*
877 	   If we haven't already encountered this existing node
878 	   on this particular sweep, then process further.
879 	 */
880 	p_node->discovery_count++;
881 
882 	switch (p_ni->node_type) {
883 	case IB_NODE_TYPE_CA:
884 	case IB_NODE_TYPE_ROUTER:
885 		ni_rcv_process_existing_ca_or_router(sm, p_node, p_madw);
886 		break;
887 
888 	case IB_NODE_TYPE_SWITCH:
889 		ni_rcv_process_existing_switch(sm, p_node, p_madw);
890 		break;
891 
892 	default:
893 		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D09: "
894 			"Unknown node type %u with GUID 0x%" PRIx64 "\n",
895 			p_ni->node_type, cl_ntoh64(p_ni->node_guid));
896 		break;
897 	}
898 
899 	if ( p_ni->sys_guid != p_node->node_info.sys_guid) {
900 		OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Updated SysImageGUID: 0x%"
901 			PRIx64 " for node 0x%" PRIx64 "\n",
902 			cl_ntoh64(p_ni->sys_guid),
903 			cl_ntoh64(p_ni->node_guid));
904 	}
905 	ni_rcv_set_links(sm, p_node, port_num, p_ni_context);
906 	p_node->node_info = *p_ni;
907 
908 Exit:
909 	OSM_LOG_EXIT(sm->p_log);
910 }
911 
912 void osm_ni_rcv_process(IN void *context, IN void *data)
913 {
914 	osm_sm_t *sm = context;
915 	osm_madw_t *p_madw = data;
916 	ib_node_info_t *p_ni;
917 	ib_smp_t *p_smp;
918 	osm_node_t *p_node;
919 
920 	CL_ASSERT(sm);
921 
922 	OSM_LOG_ENTER(sm->p_log);
923 
924 	CL_ASSERT(p_madw);
925 
926 	p_smp = osm_madw_get_smp_ptr(p_madw);
927 	p_ni = ib_smp_get_payload_ptr(p_smp);
928 
929 	CL_ASSERT(p_smp->attr_id == IB_MAD_ATTR_NODE_INFO);
930 
931 	if (PF(p_ni->node_guid == 0)) {
932 		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D16: "
933 			"Got Zero Node GUID! Found on the directed route:\n");
934 		osm_dump_smp_dr_path_v2(sm->p_log, p_smp, FILE_ID, OSM_LOG_ERROR);
935 		goto Exit;
936 	}
937 
938 	if (PF(p_ni->port_guid == 0)) {
939 		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D17: "
940 			"Got Zero Port GUID! Found on the directed route:\n");
941 		osm_dump_smp_dr_path_v2(sm->p_log, p_smp, FILE_ID, OSM_LOG_ERROR);
942 		goto Exit;
943 	}
944 
945 	if (ib_smp_get_status(p_smp)) {
946 		OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
947 			"MAD status 0x%x received\n",
948 			cl_ntoh16(ib_smp_get_status(p_smp)));
949 		goto Exit;
950 	}
951 
952 	/*
953 	   Determine if this node has already been discovered,
954 	   and process accordingly.
955 	   During processing of this node, hold the shared lock.
956 	 */
957 
958 	CL_PLOCK_EXCL_ACQUIRE(sm->p_lock);
959 	p_node = osm_get_node_by_guid(sm->p_subn, p_ni->node_guid);
960 
961 	osm_dump_node_info_v2(sm->p_log, p_ni, FILE_ID, OSM_LOG_DEBUG);
962 
963 	if (!p_node)
964 		ni_rcv_process_new(sm, p_madw);
965 	else
966 		ni_rcv_process_existing(sm, p_node, p_madw);
967 
968 	CL_PLOCK_RELEASE(sm->p_lock);
969 
970 Exit:
971 	OSM_LOG_EXIT(sm->p_log);
972 }
973