1 /*
2  * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved.
3  * Copyright (c) 2002-2015 Mellanox Technologies LTD. All rights reserved.
4  * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
5  * Copyright (c) 2008 Xsigo Systems Inc.  All rights reserved.
6  * Copyright (c) 2013 Oracle and/or its affiliates. All rights reserved.
7  *
8  * This software is available to you under a choice of one of two
9  * licenses.  You may choose to be licensed under the terms of the GNU
10  * General Public License (GPL) Version 2, available from the file
11  * COPYING in the main directory of this source tree, or the
12  * OpenIB.org BSD license below:
13  *
14  *     Redistribution and use in source and binary forms, with or
15  *     without modification, are permitted provided that the following
16  *     conditions are met:
17  *
18  *      - Redistributions of source code must retain the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer.
21  *
22  *      - Redistributions in binary form must reproduce the above
23  *        copyright notice, this list of conditions and the following
24  *        disclaimer in the documentation and/or other materials
25  *        provided with the distribution.
26  *
27  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
28  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
29  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
30  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
31  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
32  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
33  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34  * SOFTWARE.
35  *
36  */
37 
38 /*
39  * Abstract:
40  *    Implementation of osm_drop_mgr_t.
41  * This object represents the Drop Manager object.
42  * This object is part of the opensm family of objects.
43  */
44 
45 #if HAVE_CONFIG_H
46 #  include <config.h>
47 #endif				/* HAVE_CONFIG_H */
48 
49 #include <stdlib.h>
50 #include <string.h>
51 #include <iba/ib_types.h>
52 #include <complib/cl_qmap.h>
53 #include <complib/cl_passivelock.h>
54 #include <complib/cl_debug.h>
55 #include <complib/cl_ptr_vector.h>
56 #include <opensm/osm_file_ids.h>
57 #define FILE_ID OSM_FILE_DROP_MGR_C
58 #include <opensm/osm_sm.h>
59 #include <opensm/osm_router.h>
60 #include <opensm/osm_switch.h>
61 #include <opensm/osm_node.h>
62 #include <opensm/osm_guid.h>
63 #include <opensm/osm_helper.h>
64 #include <opensm/osm_multicast.h>
65 #include <opensm/osm_remote_sm.h>
66 #include <opensm/osm_inform.h>
67 #include <opensm/osm_ucast_mgr.h>
68 
69 static void drop_mgr_remove_router(osm_sm_t * sm, IN const ib_net64_t portguid)
70 {
71 	osm_router_t *p_rtr;
72 	cl_qmap_t *p_rtr_guid_tbl;
73 
74 	p_rtr_guid_tbl = &sm->p_subn->rtr_guid_tbl;
75 	p_rtr = (osm_router_t *) cl_qmap_remove(p_rtr_guid_tbl, portguid);
76 	if (p_rtr != (osm_router_t *) cl_qmap_end(p_rtr_guid_tbl)) {
77 		OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
78 			"Cleaned router for port guid 0x%016" PRIx64 "\n",
79 			cl_ntoh64(portguid));
80 		osm_router_delete(&p_rtr);
81 	}
82 }
83 
84 static void drop_mgr_clean_physp(osm_sm_t * sm, IN osm_physp_t * p_physp)
85 {
86 	osm_physp_t *p_remote_physp;
87 	osm_port_t *p_remote_port;
88 
89 	p_remote_physp = osm_physp_get_remote(p_physp);
90 	if (p_remote_physp) {
91 		p_remote_port = osm_get_port_by_guid(sm->p_subn,
92 						     p_remote_physp->port_guid);
93 
94 		if (p_remote_port) {
95 			/* Let's check if this is a case of link that is lost
96 			   (both ports weren't recognized), or a "hiccup" in the
97 			   subnet - in which case the remote port was
98 			   recognized, and its state is ACTIVE.
99 			   If this is just a "hiccup" - force a heavy sweep in
100 			   the next sweep. We don't want to lose that part of
101 			   the subnet. */
102 			if (p_remote_port->discovery_count &&
103 			    osm_physp_get_port_state(p_remote_physp) ==
104 			    IB_LINK_ACTIVE) {
105 				OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
106 					"Forcing new heavy sweep. Remote "
107 					"port 0x%016" PRIx64 " port num: %u "
108 					"was recognized in ACTIVE state\n",
109 					cl_ntoh64(p_remote_physp->port_guid),
110 					p_remote_physp->port_num);
111 				sm->p_subn->force_heavy_sweep = TRUE;
112 			}
113 
114 			/* If the remote node is ca or router - need to remove
115 			   the remote port, since it is no longer reachable.
116 			   This can be done if we reset the discovery count
117 			   of the remote port. */
118 			if (!p_remote_physp->p_node->sw &&
119                             p_remote_physp->port_guid != sm->p_subn->sm_port_guid) {
120 				p_remote_port->discovery_count = 0;
121 				OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
122 					"Resetting discovery count of node: "
123 					"0x%016" PRIx64 " port num:%u\n",
124 					cl_ntoh64(osm_node_get_node_guid
125 						  (p_remote_physp->p_node)),
126 					p_remote_physp->port_num);
127 			}
128 		}
129 
130 		OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
131 			"Unlinking local node 0x%016" PRIx64 ", port %u"
132 			"\n\t\t\t\tand remote node 0x%016" PRIx64
133 			", port %u\n",
134 			cl_ntoh64(osm_node_get_node_guid(p_physp->p_node)),
135 			p_physp->port_num,
136 			cl_ntoh64(osm_node_get_node_guid
137 				  (p_remote_physp->p_node)),
138 			p_remote_physp->port_num);
139 
140 		if (sm->ucast_mgr.cache_valid)
141 			osm_ucast_cache_add_link(&sm->ucast_mgr, p_physp,
142 						 p_remote_physp);
143 
144 		osm_physp_unlink(p_physp, p_remote_physp);
145 
146 	}
147 
148 	/* Make port as undiscovered */
149 	p_physp->p_node->physp_discovered[p_physp->port_num] = 0;
150 
151 	OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
152 		"Clearing node 0x%016" PRIx64 " physical port number %u\n",
153 		cl_ntoh64(osm_node_get_node_guid(p_physp->p_node)),
154 		p_physp->port_num);
155 
156 	osm_physp_destroy(p_physp);
157 }
158 
159 static void drop_mgr_remove_port(osm_sm_t * sm, IN osm_port_t * p_port)
160 {
161 	ib_net64_t port_guid;
162 	osm_port_t *p_port_check;
163 	cl_qmap_t *p_alias_guid_tbl;
164 	cl_qmap_t *p_sm_guid_tbl;
165 	osm_mcm_port_t *mcm_port;
166 	cl_ptr_vector_t *p_port_lid_tbl;
167 	uint16_t min_lid_ho;
168 	uint16_t max_lid_ho;
169 	uint16_t lid_ho;
170 	osm_node_t *p_node;
171 	osm_remote_sm_t *p_sm;
172 	osm_alias_guid_t *p_alias_guid, *p_alias_guid_check;
173 	osm_guidinfo_work_obj_t *wobj;
174 	cl_list_item_t *item, *next_item;
175 	ib_gid_t port_gid;
176 	ib_mad_notice_attr_t notice;
177 	ib_api_status_t status;
178 
179 	OSM_LOG_ENTER(sm->p_log);
180 
181 	port_guid = osm_port_get_guid(p_port);
182 	OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
183 		"Unreachable port 0x%016" PRIx64 "\n", cl_ntoh64(port_guid));
184 
185 	p_port_check =
186 	    (osm_port_t *) cl_qmap_get(&sm->p_subn->port_guid_tbl, port_guid);
187 	if (p_port_check != p_port) {
188 		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0101: "
189 			"Port 0x%016" PRIx64 " not in guid table\n",
190 			cl_ntoh64(port_guid));
191 		goto Exit;
192 	}
193 
194 	/* issue a notice - trap 65 (SM_GID_OUT_OF_SERVICE_TRAP) */
195 	/* details of the notice */
196 	notice.generic_type = 0x80 | IB_NOTICE_TYPE_SUBN_MGMT;	/* is generic subn mgt type */
197 	ib_notice_set_prod_type_ho(&notice, 4);	/* A class manager generator */
198 	/* endport ceases to be reachable */
199 	notice.g_or_v.generic.trap_num = CL_HTON16(SM_GID_OUT_OF_SERVICE_TRAP); /* 65 */
200 	/* The sm_base_lid is saved in network order already. */
201 	notice.issuer_lid = sm->p_subn->sm_base_lid;
202 	/* following C14-72.1.2 and table 119 p725 */
203 	/* we need to provide the GID */
204 	port_gid.unicast.prefix = sm->p_subn->opt.subnet_prefix;
205 	port_gid.unicast.interface_id = port_guid;
206 	memcpy(&(notice.data_details.ntc_64_67.gid),
207 	       &(port_gid), sizeof(ib_gid_t));
208 
209 	/* According to page 653 - the issuer gid in this case of trap
210 	   is the SM gid, since the SM is the initiator of this trap. */
211 	notice.issuer_gid.unicast.prefix = sm->p_subn->opt.subnet_prefix;
212 	notice.issuer_gid.unicast.interface_id = sm->p_subn->sm_port_guid;
213 
214 	status = osm_report_notice(sm->p_log, sm->p_subn, &notice);
215 	if (status != IB_SUCCESS) {
216 		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0103: "
217 			"Error sending trap reports (%s)\n",
218 			ib_get_err_str(status));
219 	}
220 
221 	next_item = cl_qlist_head(&sm->p_subn->alias_guid_list);
222 	while (next_item != cl_qlist_end(&sm->p_subn->alias_guid_list)) {
223 		item = next_item;
224 		next_item = cl_qlist_next(item);
225 		wobj = cl_item_obj(item, wobj, list_item);
226 		if (wobj->p_port == p_port) {
227 			cl_qlist_remove_item(&sm->p_subn->alias_guid_list,
228 					     &wobj->list_item);
229 			osm_guid_work_obj_delete(wobj);
230 		}
231 	}
232 
233 	while (!cl_is_qlist_empty(&p_port->mcm_list)) {
234 		mcm_port = cl_item_obj(cl_qlist_head(&p_port->mcm_list),
235 				       mcm_port, list_item);
236 		osm_mgrp_delete_port(sm->p_subn, sm->p_log, mcm_port->mgrp,
237 				     p_port);
238 	}
239 
240 	p_alias_guid_tbl = &sm->p_subn->alias_port_guid_tbl;
241 	p_alias_guid_check = (osm_alias_guid_t *) cl_qmap_head(p_alias_guid_tbl);
242 	while (p_alias_guid_check != (osm_alias_guid_t *) cl_qmap_end(p_alias_guid_tbl)) {
243 		if (p_alias_guid_check->p_base_port == p_port)
244 			p_alias_guid = p_alias_guid_check;
245 		else
246 			p_alias_guid = NULL;
247 		p_alias_guid_check = (osm_alias_guid_t *) cl_qmap_next(&p_alias_guid_check->map_item);
248 		if (p_alias_guid) {
249 			cl_qmap_remove_item(p_alias_guid_tbl,
250 					    &p_alias_guid->map_item);
251 			osm_alias_guid_delete(&p_alias_guid);
252 		}
253 	}
254 
255 	cl_qmap_remove(&sm->p_subn->port_guid_tbl, port_guid);
256 
257 	p_sm_guid_tbl = &sm->p_subn->sm_guid_tbl;
258 	p_sm = (osm_remote_sm_t *) cl_qmap_remove(p_sm_guid_tbl, port_guid);
259 	if (p_sm != (osm_remote_sm_t *) cl_qmap_end(p_sm_guid_tbl)) {
260 		/* need to remove this item */
261 		OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
262 			"Cleaned SM for port guid 0x%016" PRIx64 "\n",
263 			cl_ntoh64(port_guid));
264 		free(p_sm);
265 	}
266 
267 	drop_mgr_remove_router(sm, port_guid);
268 
269 	osm_port_get_lid_range_ho(p_port, &min_lid_ho, &max_lid_ho);
270 
271 	OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
272 		"Clearing abandoned LID range [%u,%u]\n",
273 		min_lid_ho, max_lid_ho);
274 
275 	p_port_lid_tbl = &sm->p_subn->port_lid_tbl;
276 	for (lid_ho = min_lid_ho; lid_ho <= max_lid_ho; lid_ho++)
277 		cl_ptr_vector_set(p_port_lid_tbl, lid_ho, NULL);
278 
279 	drop_mgr_clean_physp(sm, p_port->p_physp);
280 
281 	/* Delete event forwarding subscriptions */
282 	if (sm->p_subn->opt.drop_event_subscriptions) {
283 		if (osm_infr_remove_subscriptions(sm->p_subn, sm->p_log, port_guid)
284 		    == CL_SUCCESS)
285 			OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
286 			    "Removed event subscriptions for port 0x%016" PRIx64 "\n",
287 			    cl_ntoh64(port_guid));
288 	}
289 
290 	/* initialize the p_node - may need to get node_desc later */
291 	p_node = p_port->p_node;
292 
293 	osm_port_delete(&p_port);
294 
295 	OSM_LOG(sm->p_log, OSM_LOG_INFO,
296 		"Removed port with GUID:0x%016" PRIx64
297 		" LID range [%u, %u] of node:%s\n",
298 		cl_ntoh64(port_gid.unicast.interface_id),
299 		min_lid_ho, max_lid_ho,
300 		p_node ? p_node->print_desc : "UNKNOWN");
301 
302 Exit:
303 	OSM_LOG_EXIT(sm->p_log);
304 }
305 
306 static void drop_mgr_remove_switch(osm_sm_t * sm, IN osm_node_t * p_node)
307 {
308 	osm_switch_t *p_sw;
309 	cl_qmap_t *p_sw_guid_tbl;
310 	ib_net64_t node_guid;
311 
312 	OSM_LOG_ENTER(sm->p_log);
313 
314 	node_guid = osm_node_get_node_guid(p_node);
315 	p_sw_guid_tbl = &sm->p_subn->sw_guid_tbl;
316 
317 	p_sw = (osm_switch_t *) cl_qmap_remove(p_sw_guid_tbl, node_guid);
318 	if (p_sw == (osm_switch_t *) cl_qmap_end(p_sw_guid_tbl)) {
319 		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0102: "
320 			"Node 0x%016" PRIx64 " not in switch table\n",
321 			cl_ntoh64(osm_node_get_node_guid(p_node)));
322 	} else {
323 		p_node->sw = NULL;
324 		osm_switch_delete(&p_sw);
325 	}
326 
327 	OSM_LOG_EXIT(sm->p_log);
328 }
329 
330 static boolean_t drop_mgr_process_node(osm_sm_t * sm, IN osm_node_t * p_node)
331 {
332 	osm_physp_t *p_physp;
333 	osm_port_t *p_port;
334 	osm_node_t *p_node_check;
335 	uint32_t port_num;
336 	uint32_t max_ports;
337 	ib_net64_t port_guid;
338 	boolean_t return_val = FALSE;
339 
340 	OSM_LOG_ENTER(sm->p_log);
341 
342 	OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
343 		"Unreachable node 0x%016" PRIx64 "\n",
344 		cl_ntoh64(osm_node_get_node_guid(p_node)));
345 
346 	if (sm->ucast_mgr.cache_valid)
347 		osm_ucast_cache_add_node(&sm->ucast_mgr, p_node);
348 
349 	/*
350 	   Delete all the logical and physical port objects
351 	   associated with this node.
352 	 */
353 	max_ports = osm_node_get_num_physp(p_node);
354 	for (port_num = 0; port_num < max_ports; port_num++) {
355 		p_physp = osm_node_get_physp_ptr(p_node, port_num);
356 		if (p_physp) {
357 			port_guid = osm_physp_get_port_guid(p_physp);
358 
359 			p_port = osm_get_port_by_guid(sm->p_subn, port_guid);
360 
361 			if (p_port)
362 				drop_mgr_remove_port(sm, p_port);
363 			else
364 				drop_mgr_clean_physp(sm, p_physp);
365 		}
366 	}
367 
368 	return_val = TRUE;
369 
370 	if (p_node->sw)
371 		drop_mgr_remove_switch(sm, p_node);
372 
373 	p_node_check =
374 	    (osm_node_t *) cl_qmap_remove(&sm->p_subn->node_guid_tbl,
375 					  osm_node_get_node_guid(p_node));
376 	if (p_node_check != p_node) {
377 		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0105: "
378 			"Node 0x%016" PRIx64 " not in guid table\n",
379 			cl_ntoh64(osm_node_get_node_guid(p_node)));
380 	}
381 
382 	/* free memory allocated to node */
383 	osm_node_delete(&p_node);
384 
385 	OSM_LOG_EXIT(sm->p_log);
386 	return return_val;
387 }
388 
389 static void drop_mgr_check_switch_node(osm_sm_t * sm, IN osm_node_t * p_node)
390 {
391 	ib_net64_t node_guid;
392 	osm_physp_t *p_physp, *p_remote_physp;
393 	osm_node_t *p_remote_node;
394 	osm_port_t *p_port;
395 	ib_net64_t port_guid;
396 	uint8_t port_num, remote_port_num;
397 
398 	OSM_LOG_ENTER(sm->p_log);
399 
400 	node_guid = osm_node_get_node_guid(p_node);
401 
402 	/* Make sure we have a switch object for this node */
403 	if (!p_node->sw) {
404 		/* We do not have switch info for this node */
405 		OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
406 			"Node 0x%016" PRIx64 " no switch in table\n",
407 			cl_ntoh64(node_guid));
408 
409 		drop_mgr_process_node(sm, p_node);
410 		goto Exit;
411 	}
412 
413 	/* Make sure we have a port object for port zero */
414 	p_physp = osm_node_get_physp_ptr(p_node, 0);
415 	if (!p_physp) {
416 		OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
417 			"Node 0x%016" PRIx64 " no valid physical port 0\n",
418 			cl_ntoh64(node_guid));
419 
420 		drop_mgr_process_node(sm, p_node);
421 		goto Exit;
422 	}
423 
424 	port_guid = osm_physp_get_port_guid(p_physp);
425 
426 	p_port = osm_get_port_by_guid(sm->p_subn, port_guid);
427 
428 	if (!p_port) {
429 		OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
430 			"Node 0x%016" PRIx64 " has no port object\n",
431 			cl_ntoh64(node_guid));
432 
433 		drop_mgr_process_node(sm, p_node);
434 		goto Exit;
435 	}
436 
437 	if (!p_node->physp_discovered[0]) {
438 		OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
439 			"Node 0x%016" PRIx64 " port has discovery count zero\n",
440 			cl_ntoh64(node_guid));
441 
442 		drop_mgr_process_node(sm, p_node);
443 		goto Exit;
444 	}
445 
446 	/*
447 	 * Unlink all ports that havn't been discovered during the last sweep.
448 	 * Optimization: Skip the check if discovered all the ports of the switch.
449 	 */
450 	if (p_port->discovery_count < p_node->physp_tbl_size) {
451 		for (port_num = 1; port_num < p_node->physp_tbl_size; port_num++) {
452 			if (!p_node->physp_discovered[port_num]) {
453 				p_physp = osm_node_get_physp_ptr(p_node, port_num);
454 				if (!p_physp)
455 					continue;
456 				p_remote_physp = osm_physp_get_remote(p_physp);
457 				if (!p_remote_physp)
458 					continue;
459 
460 				p_remote_node =
461 				    osm_physp_get_node_ptr(p_remote_physp);
462 				remote_port_num =
463 				    osm_physp_get_port_num(p_remote_physp);
464 
465 				OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
466 					"Unlinking local node 0x%" PRIx64
467 					", port %u"
468 					"\n\t\t\t\tand remote node 0x%" PRIx64
469 					", port %u due to missing PortInfo\n",
470 					cl_ntoh64(osm_node_get_node_guid
471 						  (p_node)), port_num,
472 					cl_ntoh64(osm_node_get_node_guid
473 						  (p_remote_node)),
474 					remote_port_num);
475 
476 				if (sm->ucast_mgr.cache_valid)
477 					osm_ucast_cache_add_link(&sm->ucast_mgr,
478 								 p_physp,
479 								 p_remote_physp);
480 
481 				osm_node_unlink(p_node, (uint8_t) port_num,
482 						p_remote_node,
483 						(uint8_t) remote_port_num);
484 			}
485 		}
486 	}
487 Exit:
488 	OSM_LOG_EXIT(sm->p_log);
489 	return;
490 }
491 
492 void osm_drop_mgr_process(osm_sm_t * sm)
493 {
494 	cl_qmap_t *p_node_guid_tbl, *p_port_guid_tbl;
495 	osm_port_t *p_port, *p_next_port;
496 	osm_node_t *p_node, *p_next_node;
497 	int max_ports, port_num;
498 	osm_physp_t *p_physp;
499 	ib_net64_t port_guid;
500 
501 	CL_ASSERT(sm);
502 
503 	OSM_LOG_ENTER(sm->p_log);
504 
505 	p_node_guid_tbl = &sm->p_subn->node_guid_tbl;
506 	p_port_guid_tbl = &sm->p_subn->port_guid_tbl;
507 
508 	CL_PLOCK_EXCL_ACQUIRE(sm->p_lock);
509 
510 	p_next_node = (osm_node_t *) cl_qmap_head(p_node_guid_tbl);
511 	while (p_next_node != (osm_node_t *) cl_qmap_end(p_node_guid_tbl)) {
512 		p_node = p_next_node;
513 		p_next_node =
514 		    (osm_node_t *) cl_qmap_next(&p_next_node->map_item);
515 
516 		CL_ASSERT(cl_qmap_key(&p_node->map_item) ==
517 			  osm_node_get_node_guid(p_node));
518 
519 		OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
520 			"Checking node 0x%016" PRIx64 "\n",
521 			cl_ntoh64(osm_node_get_node_guid(p_node)));
522 
523 		/*
524 		   Check if this node was discovered during the last sweep.
525 		   If not, it is unreachable in the current subnet, and
526 		   should therefore be removed from the subnet object.
527 		 */
528 		if (p_node->discovery_count == 0)
529 			drop_mgr_process_node(sm, p_node);
530 		else {
531 			/*
532 			 * We want to preserve the configured pkey indexes,
533 			 * so if we don't receive GetResp P_KeyTable for some block,
534 			 * do the following:
535 			 *   1. Drop node if the node is sw and got timeout for port 0.
536 			 *   2. Drop node if node is HCA/RTR.
537 			 *   3. Drop only physp if got timeout for sw when the port isn't 0.
538 			 * We'll set error during initialization in order to
539 			 * cause an immediate heavy sweep and try to get the
540 			 * configured P_KeyTable again.
541 			 */
542 			if (osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH)
543 				port_num = 0;
544 			else
545 				port_num = 1;
546 			max_ports = osm_node_get_num_physp(p_node);
547 			for (; port_num < max_ports; port_num++) {
548 				p_physp = osm_node_get_physp_ptr(p_node, port_num);
549 				if (!p_physp || p_physp->pkeys.rcv_blocks_cnt == 0)
550 					continue;
551 				p_physp->pkeys.rcv_blocks_cnt = 0;
552 				p_physp->need_update = 2;
553 				sm->p_subn->subnet_initialization_error = TRUE;
554 				port_guid = osm_physp_get_port_guid(p_physp);
555 				p_port = osm_get_port_by_guid(sm->p_subn, port_guid);
556 				CL_ASSERT(p_port);
557 				if (p_node->physp_discovered[port_num]) {
558 					p_node->physp_discovered[port_num] = 0;
559 					p_port->discovery_count--;
560 				}
561 			}
562 		}
563 	}
564 
565 	/*
566 	   Go over all the nodes. If the node is a switch - make sure
567 	   there is also a switch record for it, and a portInfo record for
568 	   port zero of of the node.
569 	   If not - this means that there was some error in getting the data
570 	   of this node. Drop the node.
571 	 */
572 	p_next_node = (osm_node_t *) cl_qmap_head(p_node_guid_tbl);
573 	while (p_next_node != (osm_node_t *) cl_qmap_end(p_node_guid_tbl)) {
574 		p_node = p_next_node;
575 		p_next_node =
576 		    (osm_node_t *) cl_qmap_next(&p_next_node->map_item);
577 
578 		OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
579 			"Checking full discovery of node 0x%016" PRIx64 "\n",
580 			cl_ntoh64(osm_node_get_node_guid(p_node)));
581 
582 		if (osm_node_get_type(p_node) != IB_NODE_TYPE_SWITCH)
583 			continue;
584 
585 		/* We are handling a switch node */
586 		drop_mgr_check_switch_node(sm, p_node);
587 	}
588 
589 	p_next_port = (osm_port_t *) cl_qmap_head(p_port_guid_tbl);
590 	while (p_next_port != (osm_port_t *) cl_qmap_end(p_port_guid_tbl)) {
591 		p_port = p_next_port;
592 		p_next_port =
593 		    (osm_port_t *) cl_qmap_next(&p_next_port->map_item);
594 
595 		CL_ASSERT(cl_qmap_key(&p_port->map_item) ==
596 			  osm_port_get_guid(p_port));
597 
598 		OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
599 			"Checking port 0x%016" PRIx64 "\n",
600 			cl_ntoh64(osm_port_get_guid(p_port)));
601 
602 		/*
603 		   If the port is unreachable, remove it from the guid table.
604 		 */
605 		if (p_port->discovery_count == 0)
606 			drop_mgr_remove_port(sm, p_port);
607 	}
608 
609 	CL_PLOCK_RELEASE(sm->p_lock);
610 	OSM_LOG_EXIT(sm->p_log);
611 }
612