xref: /illumos-gate/usr/src/uts/common/os/sunmdi.c (revision fa084259)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 #pragma ident	"%Z%%M%	%I%	%E% SMI"
26 
27 /*
28  * Multipath driver interface (MDI) implementation; see mdi_impl.h for a more
29  * detailed discussion of the overall mpxio architecture.
30  *
31  * Default locking order:
32  *
33  * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_phci_mutex);
34  * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_client_mutex);
35  * _NOTE(LOCK_ORDER(mdi_vhci:vh_phci_mutex, mdi_phci::ph_mutex);
36  * _NOTE(LOCK_ORDER(mdi_vhci:vh_client_mutex, mdi_client::ct_mutex);
37  * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex))
38  * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex))
39  * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex))
40  */
41 
42 #include <sys/note.h>
43 #include <sys/types.h>
44 #include <sys/varargs.h>
45 #include <sys/param.h>
46 #include <sys/errno.h>
47 #include <sys/uio.h>
48 #include <sys/buf.h>
49 #include <sys/modctl.h>
50 #include <sys/open.h>
51 #include <sys/kmem.h>
52 #include <sys/poll.h>
53 #include <sys/conf.h>
54 #include <sys/bootconf.h>
55 #include <sys/cmn_err.h>
56 #include <sys/stat.h>
57 #include <sys/ddi.h>
58 #include <sys/sunddi.h>
59 #include <sys/ddipropdefs.h>
60 #include <sys/sunndi.h>
61 #include <sys/ndi_impldefs.h>
62 #include <sys/promif.h>
63 #include <sys/sunmdi.h>
64 #include <sys/mdi_impldefs.h>
65 #include <sys/taskq.h>
66 #include <sys/epm.h>
67 #include <sys/sunpm.h>
68 #include <sys/modhash.h>
69 #include <sys/disp.h>
70 #include <sys/autoconf.h>
71 #include <sys/sysmacros.h>
72 
73 #ifdef	DEBUG
74 #include <sys/debug.h>
75 int	mdi_debug = 1;
76 int	mdi_debug_logonly = 0;
77 #define	MDI_DEBUG(level, stmnt) \
78 	    if (mdi_debug >= (level)) i_mdi_log stmnt
79 static void i_mdi_log(int, dev_info_t *, const char *fmt, ...);
80 #else	/* !DEBUG */
81 #define	MDI_DEBUG(level, stmnt)
82 #endif	/* DEBUG */
83 
84 extern pri_t	minclsyspri;
85 extern int	modrootloaded;
86 
87 /*
88  * Global mutex:
89  * Protects vHCI list and structure members.
90  */
91 kmutex_t	mdi_mutex;
92 
93 /*
94  * Registered vHCI class driver lists
95  */
96 int		mdi_vhci_count;
97 mdi_vhci_t	*mdi_vhci_head;
98 mdi_vhci_t	*mdi_vhci_tail;
99 
100 /*
101  * Client Hash Table size
102  */
103 static int	mdi_client_table_size = CLIENT_HASH_TABLE_SIZE;
104 
105 /*
106  * taskq interface definitions
107  */
108 #define	MDI_TASKQ_N_THREADS	8
109 #define	MDI_TASKQ_PRI		minclsyspri
110 #define	MDI_TASKQ_MINALLOC	(4*mdi_taskq_n_threads)
111 #define	MDI_TASKQ_MAXALLOC	(500*mdi_taskq_n_threads)
112 
113 taskq_t				*mdi_taskq;
114 static uint_t			mdi_taskq_n_threads = MDI_TASKQ_N_THREADS;
115 
116 #define	TICKS_PER_SECOND	(drv_usectohz(1000000))
117 
118 /*
119  * The data should be "quiet" for this interval (in seconds) before the
120  * vhci cached data is flushed to the disk.
121  */
122 static int mdi_vhcache_flush_delay = 10;
123 
124 /* number of seconds the vhcache flush daemon will sleep idle before exiting */
125 static int mdi_vhcache_flush_daemon_idle_time = 60;
126 
127 /*
128  * MDI falls back to discovery of all paths when a bus_config_one fails.
129  * The following parameters can be used to tune this operation.
130  *
131  * mdi_path_discovery_boot
132  *	Number of times path discovery will be attempted during early boot.
133  *	Probably there is no reason to ever set this value to greater than one.
134  *
135  * mdi_path_discovery_postboot
136  *	Number of times path discovery will be attempted after early boot.
137  *	Set it to a minimum of two to allow for discovery of iscsi paths which
138  *	may happen very late during booting.
139  *
140  * mdi_path_discovery_interval
141  *	Minimum number of seconds MDI will wait between successive discovery
142  *	of all paths. Set it to -1 to disable discovery of all paths.
143  */
144 static int mdi_path_discovery_boot = 1;
145 static int mdi_path_discovery_postboot = 2;
146 static int mdi_path_discovery_interval = 10;
147 
148 /*
149  * number of seconds the asynchronous configuration thread will sleep idle
150  * before exiting.
151  */
152 static int mdi_async_config_idle_time = 600;
153 
154 static int mdi_bus_config_cache_hash_size = 256;
155 
156 /* turns off multithreaded configuration for certain operations */
157 static int mdi_mtc_off = 0;
158 
159 /*
160  * The "path" to a pathinfo node is identical to the /devices path to a
161  * devinfo node had the device been enumerated under a pHCI instead of
162  * a vHCI.  This pathinfo "path" is associated with a 'path_instance'.
163  * This association persists across create/delete of the pathinfo nodes,
164  * but not across reboot.
165  */
166 static uint_t		mdi_pathmap_instance = 1;	/* 0 -> any path */
167 static int		mdi_pathmap_hash_size = 256;
168 static kmutex_t		mdi_pathmap_mutex;
169 static mod_hash_t	*mdi_pathmap_bypath;		/* "path"->instance */
170 static mod_hash_t	*mdi_pathmap_byinstance;	/* instance->"path" */
171 
172 /*
173  * MDI component property name/value string definitions
174  */
175 const char 		*mdi_component_prop = "mpxio-component";
176 const char		*mdi_component_prop_vhci = "vhci";
177 const char		*mdi_component_prop_phci = "phci";
178 const char		*mdi_component_prop_client = "client";
179 
180 /*
181  * MDI client global unique identifier property name
182  */
183 const char		*mdi_client_guid_prop = "client-guid";
184 
185 /*
186  * MDI client load balancing property name/value string definitions
187  */
188 const char		*mdi_load_balance = "load-balance";
189 const char		*mdi_load_balance_none = "none";
190 const char		*mdi_load_balance_rr = "round-robin";
191 const char		*mdi_load_balance_lba = "logical-block";
192 
193 /*
194  * Obsolete vHCI class definition; to be removed after Leadville update
195  */
196 const char *mdi_vhci_class_scsi = MDI_HCI_CLASS_SCSI;
197 
198 static char vhci_greeting[] =
199 	"\tThere already exists one vHCI driver for class %s\n"
200 	"\tOnly one vHCI driver for each class is allowed\n";
201 
202 /*
203  * Static function prototypes
204  */
205 static int		i_mdi_phci_offline(dev_info_t *, uint_t);
206 static int		i_mdi_client_offline(dev_info_t *, uint_t);
207 static int		i_mdi_phci_pre_detach(dev_info_t *, ddi_detach_cmd_t);
208 static void		i_mdi_phci_post_detach(dev_info_t *,
209 			    ddi_detach_cmd_t, int);
210 static int		i_mdi_client_pre_detach(dev_info_t *,
211 			    ddi_detach_cmd_t);
212 static void		i_mdi_client_post_detach(dev_info_t *,
213 			    ddi_detach_cmd_t, int);
214 static void		i_mdi_pm_hold_pip(mdi_pathinfo_t *);
215 static void		i_mdi_pm_rele_pip(mdi_pathinfo_t *);
216 static int 		i_mdi_lba_lb(mdi_client_t *ct,
217 			    mdi_pathinfo_t **ret_pip, struct buf *buf);
218 static void		i_mdi_pm_hold_client(mdi_client_t *, int);
219 static void		i_mdi_pm_rele_client(mdi_client_t *, int);
220 static void		i_mdi_pm_reset_client(mdi_client_t *);
221 static int		i_mdi_power_all_phci(mdi_client_t *);
222 static void		i_mdi_log_sysevent(dev_info_t *, char *, char *);
223 
224 
225 /*
226  * Internal mdi_pathinfo node functions
227  */
228 static void		i_mdi_pi_kstat_destroy(mdi_pathinfo_t *);
229 
230 static mdi_vhci_t	*i_mdi_vhci_class2vhci(char *);
231 static mdi_vhci_t	*i_devi_get_vhci(dev_info_t *);
232 static mdi_phci_t	*i_devi_get_phci(dev_info_t *);
233 static void		i_mdi_phci_lock(mdi_phci_t *, mdi_pathinfo_t *);
234 static void		i_mdi_phci_unlock(mdi_phci_t *);
235 static mdi_pathinfo_t	*i_mdi_pi_alloc(mdi_phci_t *, char *, mdi_client_t *);
236 static void		i_mdi_phci_add_path(mdi_phci_t *, mdi_pathinfo_t *);
237 static void		i_mdi_client_add_path(mdi_client_t *, mdi_pathinfo_t *);
238 static void		i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *,
239 			    mdi_client_t *);
240 static void		i_mdi_phci_remove_path(mdi_phci_t *, mdi_pathinfo_t *);
241 static void		i_mdi_client_remove_path(mdi_client_t *,
242 			    mdi_pathinfo_t *);
243 
244 static int		i_mdi_pi_state_change(mdi_pathinfo_t *,
245 			    mdi_pathinfo_state_t, int);
246 static int		i_mdi_pi_offline(mdi_pathinfo_t *, int);
247 static dev_info_t	*i_mdi_devinfo_create(mdi_vhci_t *, char *, char *,
248 			    char **, int);
249 static dev_info_t	*i_mdi_devinfo_find(mdi_vhci_t *, char *, char *);
250 static int		i_mdi_devinfo_remove(dev_info_t *, dev_info_t *, int);
251 static int		i_mdi_is_child_present(dev_info_t *, dev_info_t *);
252 static mdi_client_t	*i_mdi_client_alloc(mdi_vhci_t *, char *, char *);
253 static void		i_mdi_client_enlist_table(mdi_vhci_t *, mdi_client_t *);
254 static void		i_mdi_client_delist_table(mdi_vhci_t *, mdi_client_t *);
255 static mdi_client_t	*i_mdi_client_find(mdi_vhci_t *, char *, char *);
256 static void		i_mdi_client_update_state(mdi_client_t *);
257 static int		i_mdi_client_compute_state(mdi_client_t *,
258 			    mdi_phci_t *);
259 static void		i_mdi_client_lock(mdi_client_t *, mdi_pathinfo_t *);
260 static void		i_mdi_client_unlock(mdi_client_t *);
261 static int		i_mdi_client_free(mdi_vhci_t *, mdi_client_t *);
262 static mdi_client_t	*i_devi_get_client(dev_info_t *);
263 /*
264  * NOTE: this will be removed once the NWS files are changed to use the new
265  * mdi_{enable,disable}_path interfaces
266  */
267 static int		i_mdi_pi_enable_disable(dev_info_t *, dev_info_t *,
268 				int, int);
269 static mdi_pathinfo_t 	*i_mdi_enable_disable_path(mdi_pathinfo_t *pip,
270 				mdi_vhci_t *vh, int flags, int op);
271 /*
272  * Failover related function prototypes
273  */
274 static int		i_mdi_failover(void *);
275 
276 /*
277  * misc internal functions
278  */
279 static int		i_mdi_get_hash_key(char *);
280 static int		i_map_nvlist_error_to_mdi(int);
281 static void		i_mdi_report_path_state(mdi_client_t *,
282 			    mdi_pathinfo_t *);
283 
284 static void		setup_vhci_cache(mdi_vhci_t *);
285 static int		destroy_vhci_cache(mdi_vhci_t *);
286 static int		stop_vhcache_async_threads(mdi_vhci_config_t *);
287 static boolean_t	stop_vhcache_flush_thread(void *, int);
288 static void		free_string_array(char **, int);
289 static void		free_vhcache_phci(mdi_vhcache_phci_t *);
290 static void		free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *);
291 static void		free_vhcache_client(mdi_vhcache_client_t *);
292 static int		mainnvl_to_vhcache(mdi_vhci_cache_t *, nvlist_t *);
293 static nvlist_t		*vhcache_to_mainnvl(mdi_vhci_cache_t *);
294 static void		vhcache_phci_add(mdi_vhci_config_t *, mdi_phci_t *);
295 static void		vhcache_phci_remove(mdi_vhci_config_t *, mdi_phci_t *);
296 static void		vhcache_pi_add(mdi_vhci_config_t *,
297 			    struct mdi_pathinfo *);
298 static void		vhcache_pi_remove(mdi_vhci_config_t *,
299 			    struct mdi_pathinfo *);
300 static void		free_phclient_path_list(mdi_phys_path_t *);
301 static void		sort_vhcache_paths(mdi_vhcache_client_t *);
302 static int		flush_vhcache(mdi_vhci_config_t *, int);
303 static void		vhcache_dirty(mdi_vhci_config_t *);
304 static void		free_async_client_config(mdi_async_client_config_t *);
305 static void		single_threaded_vhconfig_enter(mdi_vhci_config_t *);
306 static void		single_threaded_vhconfig_exit(mdi_vhci_config_t *);
307 static nvlist_t		*read_on_disk_vhci_cache(char *);
308 extern int		fread_nvlist(char *, nvlist_t **);
309 extern int		fwrite_nvlist(char *, nvlist_t *);
310 
311 /* called once when first vhci registers with mdi */
312 static void
313 i_mdi_init()
314 {
315 	static int initialized = 0;
316 
317 	if (initialized)
318 		return;
319 	initialized = 1;
320 
321 	mutex_init(&mdi_mutex, NULL, MUTEX_DEFAULT, NULL);
322 
323 	/* Create our taskq resources */
324 	mdi_taskq = taskq_create("mdi_taskq", mdi_taskq_n_threads,
325 	    MDI_TASKQ_PRI, MDI_TASKQ_MINALLOC, MDI_TASKQ_MAXALLOC,
326 	    TASKQ_PREPOPULATE | TASKQ_CPR_SAFE);
327 	ASSERT(mdi_taskq != NULL);	/* taskq_create never fails */
328 
329 	/* Allocate ['path_instance' <-> "path"] maps */
330 	mutex_init(&mdi_pathmap_mutex, NULL, MUTEX_DRIVER, NULL);
331 	mdi_pathmap_bypath = mod_hash_create_strhash(
332 	    "mdi_pathmap_bypath", mdi_pathmap_hash_size,
333 	    mod_hash_null_valdtor);
334 	mdi_pathmap_byinstance = mod_hash_create_idhash(
335 	    "mdi_pathmap_byinstance", mdi_pathmap_hash_size,
336 	    mod_hash_null_valdtor);
337 }
338 
339 /*
340  * mdi_get_component_type():
341  *		Return mpxio component type
342  * Return Values:
343  *		MDI_COMPONENT_NONE
344  *		MDI_COMPONENT_VHCI
345  *		MDI_COMPONENT_PHCI
346  *		MDI_COMPONENT_CLIENT
347  * XXX This doesn't work under multi-level MPxIO and should be
348  *	removed when clients migrate mdi_component_is_*() interfaces.
349  */
350 int
351 mdi_get_component_type(dev_info_t *dip)
352 {
353 	return (DEVI(dip)->devi_mdi_component);
354 }
355 
356 /*
357  * mdi_vhci_register():
358  *		Register a vHCI module with the mpxio framework
359  *		mdi_vhci_register() is called by vHCI drivers to register the
360  *		'class_driver' vHCI driver and its MDI entrypoints with the
361  *		mpxio framework.  The vHCI driver must call this interface as
362  *		part of its attach(9e) handler.
363  *		Competing threads may try to attach mdi_vhci_register() as
364  *		the vHCI drivers are loaded and attached as a result of pHCI
365  *		driver instance registration (mdi_phci_register()) with the
366  *		framework.
367  * Return Values:
368  *		MDI_SUCCESS
369  *		MDI_FAILURE
370  */
371 /*ARGSUSED*/
372 int
373 mdi_vhci_register(char *class, dev_info_t *vdip, mdi_vhci_ops_t *vops,
374     int flags)
375 {
376 	mdi_vhci_t		*vh = NULL;
377 
378 	ASSERT(vops->vo_revision == MDI_VHCI_OPS_REV);
379 	ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip)));
380 
381 	i_mdi_init();
382 
383 	mutex_enter(&mdi_mutex);
384 	/*
385 	 * Scan for already registered vhci
386 	 */
387 	for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
388 		if (strcmp(vh->vh_class, class) == 0) {
389 			/*
390 			 * vHCI has already been created.  Check for valid
391 			 * vHCI ops registration.  We only support one vHCI
392 			 * module per class
393 			 */
394 			if (vh->vh_ops != NULL) {
395 				mutex_exit(&mdi_mutex);
396 				cmn_err(CE_NOTE, vhci_greeting, class);
397 				return (MDI_FAILURE);
398 			}
399 			break;
400 		}
401 	}
402 
403 	/*
404 	 * if not yet created, create the vHCI component
405 	 */
406 	if (vh == NULL) {
407 		struct client_hash	*hash = NULL;
408 		char			*load_balance;
409 
410 		/*
411 		 * Allocate and initialize the mdi extensions
412 		 */
413 		vh = kmem_zalloc(sizeof (mdi_vhci_t), KM_SLEEP);
414 		hash = kmem_zalloc(mdi_client_table_size * sizeof (*hash),
415 		    KM_SLEEP);
416 		vh->vh_client_table = hash;
417 		vh->vh_class = kmem_zalloc(strlen(class) + 1, KM_SLEEP);
418 		(void) strcpy(vh->vh_class, class);
419 		vh->vh_lb = LOAD_BALANCE_RR;
420 		if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vdip,
421 		    0, LOAD_BALANCE_PROP, &load_balance) == DDI_SUCCESS) {
422 			if (strcmp(load_balance, LOAD_BALANCE_PROP_NONE) == 0) {
423 				vh->vh_lb = LOAD_BALANCE_NONE;
424 			} else if (strcmp(load_balance, LOAD_BALANCE_PROP_LBA)
425 				    == 0) {
426 				vh->vh_lb = LOAD_BALANCE_LBA;
427 			}
428 			ddi_prop_free(load_balance);
429 		}
430 
431 		mutex_init(&vh->vh_phci_mutex, NULL, MUTEX_DEFAULT, NULL);
432 		mutex_init(&vh->vh_client_mutex, NULL, MUTEX_DEFAULT, NULL);
433 
434 		/*
435 		 * Store the vHCI ops vectors
436 		 */
437 		vh->vh_dip = vdip;
438 		vh->vh_ops = vops;
439 
440 		setup_vhci_cache(vh);
441 
442 		if (mdi_vhci_head == NULL) {
443 			mdi_vhci_head = vh;
444 		}
445 		if (mdi_vhci_tail) {
446 			mdi_vhci_tail->vh_next = vh;
447 		}
448 		mdi_vhci_tail = vh;
449 		mdi_vhci_count++;
450 	}
451 
452 	/*
453 	 * Claim the devfs node as a vhci component
454 	 */
455 	DEVI(vdip)->devi_mdi_component |= MDI_COMPONENT_VHCI;
456 
457 	/*
458 	 * Initialize our back reference from dev_info node
459 	 */
460 	DEVI(vdip)->devi_mdi_xhci = (caddr_t)vh;
461 	mutex_exit(&mdi_mutex);
462 	return (MDI_SUCCESS);
463 }
464 
465 /*
466  * mdi_vhci_unregister():
467  *		Unregister a vHCI module from mpxio framework
468  *		mdi_vhci_unregister() is called from the detach(9E) entrypoint
469  * 		of a vhci to unregister it from the framework.
470  * Return Values:
471  *		MDI_SUCCESS
472  *		MDI_FAILURE
473  */
474 /*ARGSUSED*/
475 int
476 mdi_vhci_unregister(dev_info_t *vdip, int flags)
477 {
478 	mdi_vhci_t	*found, *vh, *prev = NULL;
479 
480 	ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip)));
481 
482 	/*
483 	 * Check for invalid VHCI
484 	 */
485 	if ((vh = i_devi_get_vhci(vdip)) == NULL)
486 		return (MDI_FAILURE);
487 
488 	/*
489 	 * Scan the list of registered vHCIs for a match
490 	 */
491 	mutex_enter(&mdi_mutex);
492 	for (found = mdi_vhci_head; found != NULL; found = found->vh_next) {
493 		if (found == vh)
494 			break;
495 		prev = found;
496 	}
497 
498 	if (found == NULL) {
499 		mutex_exit(&mdi_mutex);
500 		return (MDI_FAILURE);
501 	}
502 
503 	/*
504 	 * Check the vHCI, pHCI and client count. All the pHCIs and clients
505 	 * should have been unregistered, before a vHCI can be
506 	 * unregistered.
507 	 */
508 	MDI_VHCI_PHCI_LOCK(vh);
509 	if (vh->vh_refcnt || vh->vh_phci_count || vh->vh_client_count) {
510 		MDI_VHCI_PHCI_UNLOCK(vh);
511 		mutex_exit(&mdi_mutex);
512 		return (MDI_FAILURE);
513 	}
514 	MDI_VHCI_PHCI_UNLOCK(vh);
515 
516 	if (destroy_vhci_cache(vh) != MDI_SUCCESS) {
517 		mutex_exit(&mdi_mutex);
518 		return (MDI_FAILURE);
519 	}
520 
521 	/*
522 	 * Remove the vHCI from the global list
523 	 */
524 	if (vh == mdi_vhci_head) {
525 		mdi_vhci_head = vh->vh_next;
526 	} else {
527 		prev->vh_next = vh->vh_next;
528 	}
529 	if (vh == mdi_vhci_tail) {
530 		mdi_vhci_tail = prev;
531 	}
532 	mdi_vhci_count--;
533 	mutex_exit(&mdi_mutex);
534 
535 	vh->vh_ops = NULL;
536 	DEVI(vdip)->devi_mdi_component &= ~MDI_COMPONENT_VHCI;
537 	DEVI(vdip)->devi_mdi_xhci = NULL;
538 	kmem_free(vh->vh_class, strlen(vh->vh_class)+1);
539 	kmem_free(vh->vh_client_table,
540 	    mdi_client_table_size * sizeof (struct client_hash));
541 	mutex_destroy(&vh->vh_phci_mutex);
542 	mutex_destroy(&vh->vh_client_mutex);
543 
544 	kmem_free(vh, sizeof (mdi_vhci_t));
545 	return (MDI_SUCCESS);
546 }
547 
548 /*
549  * i_mdi_vhci_class2vhci():
550  *		Look for a matching vHCI module given a vHCI class name
551  * Return Values:
552  *		Handle to a vHCI component
553  *		NULL
554  */
555 static mdi_vhci_t *
556 i_mdi_vhci_class2vhci(char *class)
557 {
558 	mdi_vhci_t	*vh = NULL;
559 
560 	ASSERT(!MUTEX_HELD(&mdi_mutex));
561 
562 	mutex_enter(&mdi_mutex);
563 	for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
564 		if (strcmp(vh->vh_class, class) == 0) {
565 			break;
566 		}
567 	}
568 	mutex_exit(&mdi_mutex);
569 	return (vh);
570 }
571 
572 /*
573  * i_devi_get_vhci():
574  *		Utility function to get the handle to a vHCI component
575  * Return Values:
576  *		Handle to a vHCI component
577  *		NULL
578  */
579 mdi_vhci_t *
580 i_devi_get_vhci(dev_info_t *vdip)
581 {
582 	mdi_vhci_t	*vh = NULL;
583 	if (MDI_VHCI(vdip)) {
584 		vh = (mdi_vhci_t *)DEVI(vdip)->devi_mdi_xhci;
585 	}
586 	return (vh);
587 }
588 
589 /*
590  * mdi_phci_register():
591  *		Register a pHCI module with mpxio framework
592  *		mdi_phci_register() is called by pHCI drivers to register with
593  *		the mpxio framework and a specific 'class_driver' vHCI.  The
594  *		pHCI driver must call this interface as part of its attach(9e)
595  *		handler.
596  * Return Values:
597  *		MDI_SUCCESS
598  *		MDI_FAILURE
599  */
600 /*ARGSUSED*/
601 int
602 mdi_phci_register(char *class, dev_info_t *pdip, int flags)
603 {
604 	mdi_phci_t		*ph;
605 	mdi_vhci_t		*vh;
606 	char			*data;
607 	char			*pathname;
608 
609 	/*
610 	 * Some subsystems, like fcp, perform pHCI registration from a
611 	 * different thread than the one doing the pHCI attach(9E) - the
612 	 * driver attach code is waiting for this other thread to complete.
613 	 * This means we can only ASSERT DEVI_BUSY_CHANGING of parent
614 	 * (indicating that some thread has done an ndi_devi_enter of parent)
615 	 * not DEVI_BUSY_OWNED (which would indicate that we did the enter).
616 	 */
617 	ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip)));
618 
619 	pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
620 	(void) ddi_pathname(pdip, pathname);
621 
622 	/*
623 	 * Check for mpxio-disable property. Enable mpxio if the property is
624 	 * missing or not set to "yes".
625 	 * If the property is set to "yes" then emit a brief message.
626 	 */
627 	if ((ddi_prop_lookup_string(DDI_DEV_T_ANY, pdip, 0, "mpxio-disable",
628 	    &data) == DDI_SUCCESS)) {
629 		if (strcmp(data, "yes") == 0) {
630 			MDI_DEBUG(1, (CE_CONT, pdip,
631 			    "?%s (%s%d) multipath capabilities "
632 			    "disabled via %s.conf.\n", pathname,
633 			    ddi_driver_name(pdip), ddi_get_instance(pdip),
634 			    ddi_driver_name(pdip)));
635 			ddi_prop_free(data);
636 			kmem_free(pathname, MAXPATHLEN);
637 			return (MDI_FAILURE);
638 		}
639 		ddi_prop_free(data);
640 	}
641 
642 	kmem_free(pathname, MAXPATHLEN);
643 
644 	/*
645 	 * Search for a matching vHCI
646 	 */
647 	vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class);
648 	if (vh == NULL) {
649 		return (MDI_FAILURE);
650 	}
651 
652 	ph = kmem_zalloc(sizeof (mdi_phci_t), KM_SLEEP);
653 	mutex_init(&ph->ph_mutex, NULL, MUTEX_DEFAULT, NULL);
654 	ph->ph_dip = pdip;
655 	ph->ph_vhci = vh;
656 	ph->ph_next = NULL;
657 	ph->ph_unstable = 0;
658 	ph->ph_vprivate = 0;
659 	cv_init(&ph->ph_unstable_cv, NULL, CV_DRIVER, NULL);
660 
661 	MDI_PHCI_LOCK(ph);
662 	MDI_PHCI_SET_POWER_UP(ph);
663 	MDI_PHCI_UNLOCK(ph);
664 	DEVI(pdip)->devi_mdi_component |= MDI_COMPONENT_PHCI;
665 	DEVI(pdip)->devi_mdi_xhci = (caddr_t)ph;
666 
667 	vhcache_phci_add(vh->vh_config, ph);
668 
669 	MDI_VHCI_PHCI_LOCK(vh);
670 	if (vh->vh_phci_head == NULL) {
671 		vh->vh_phci_head = ph;
672 	}
673 	if (vh->vh_phci_tail) {
674 		vh->vh_phci_tail->ph_next = ph;
675 	}
676 	vh->vh_phci_tail = ph;
677 	vh->vh_phci_count++;
678 	MDI_VHCI_PHCI_UNLOCK(vh);
679 
680 	i_mdi_log_sysevent(pdip, class, ESC_DDI_INITIATOR_REGISTER);
681 	return (MDI_SUCCESS);
682 }
683 
684 /*
685  * mdi_phci_unregister():
686  *		Unregister a pHCI module from mpxio framework
687  *		mdi_phci_unregister() is called by the pHCI drivers from their
688  *		detach(9E) handler to unregister their instances from the
689  *		framework.
690  * Return Values:
691  *		MDI_SUCCESS
692  *		MDI_FAILURE
693  */
694 /*ARGSUSED*/
695 int
696 mdi_phci_unregister(dev_info_t *pdip, int flags)
697 {
698 	mdi_vhci_t		*vh;
699 	mdi_phci_t		*ph;
700 	mdi_phci_t		*tmp;
701 	mdi_phci_t		*prev = NULL;
702 
703 	ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip)));
704 
705 	ph = i_devi_get_phci(pdip);
706 	if (ph == NULL) {
707 		MDI_DEBUG(1, (CE_WARN, pdip,
708 		    "!pHCI unregister: Not a valid pHCI"));
709 		return (MDI_FAILURE);
710 	}
711 
712 	vh = ph->ph_vhci;
713 	ASSERT(vh != NULL);
714 	if (vh == NULL) {
715 		MDI_DEBUG(1, (CE_WARN, pdip,
716 		    "!pHCI unregister: Not a valid vHCI"));
717 		return (MDI_FAILURE);
718 	}
719 
720 	MDI_VHCI_PHCI_LOCK(vh);
721 	tmp = vh->vh_phci_head;
722 	while (tmp) {
723 		if (tmp == ph) {
724 			break;
725 		}
726 		prev = tmp;
727 		tmp = tmp->ph_next;
728 	}
729 
730 	if (ph == vh->vh_phci_head) {
731 		vh->vh_phci_head = ph->ph_next;
732 	} else {
733 		prev->ph_next = ph->ph_next;
734 	}
735 
736 	if (ph == vh->vh_phci_tail) {
737 		vh->vh_phci_tail = prev;
738 	}
739 
740 	vh->vh_phci_count--;
741 	MDI_VHCI_PHCI_UNLOCK(vh);
742 
743 	i_mdi_log_sysevent(pdip, ph->ph_vhci->vh_class,
744 	    ESC_DDI_INITIATOR_UNREGISTER);
745 	vhcache_phci_remove(vh->vh_config, ph);
746 	cv_destroy(&ph->ph_unstable_cv);
747 	mutex_destroy(&ph->ph_mutex);
748 	kmem_free(ph, sizeof (mdi_phci_t));
749 	DEVI(pdip)->devi_mdi_component &= ~MDI_COMPONENT_PHCI;
750 	DEVI(pdip)->devi_mdi_xhci = NULL;
751 	return (MDI_SUCCESS);
752 }
753 
754 /*
755  * i_devi_get_phci():
756  * 		Utility function to return the phci extensions.
757  */
758 static mdi_phci_t *
759 i_devi_get_phci(dev_info_t *pdip)
760 {
761 	mdi_phci_t	*ph = NULL;
762 	if (MDI_PHCI(pdip)) {
763 		ph = (mdi_phci_t *)DEVI(pdip)->devi_mdi_xhci;
764 	}
765 	return (ph);
766 }
767 
768 /*
769  * Single thread mdi entry into devinfo node for modifying its children.
770  * If necessary we perform an ndi_devi_enter of the vHCI before doing
771  * an ndi_devi_enter of 'dip'.  We maintain circular in two parts: one
772  * for the vHCI and one for the pHCI.
773  */
774 void
775 mdi_devi_enter(dev_info_t *phci_dip, int *circular)
776 {
777 	dev_info_t	*vdip;
778 	int		vcircular, pcircular;
779 
780 	/* Verify calling context */
781 	ASSERT(MDI_PHCI(phci_dip));
782 	vdip = mdi_devi_get_vdip(phci_dip);
783 	ASSERT(vdip);			/* A pHCI always has a vHCI */
784 
785 	/*
786 	 * If pHCI is detaching then the framework has already entered the
787 	 * vHCI on a threads that went down the code path leading to
788 	 * detach_node().  This framework enter of the vHCI during pHCI
789 	 * detach is done to avoid deadlock with vHCI power management
790 	 * operations which enter the vHCI and the enter down the path
791 	 * to the pHCI. If pHCI is detaching then we piggyback this calls
792 	 * enter of the vHCI on frameworks vHCI enter that has already
793 	 * occurred - this is OK because we know that the framework thread
794 	 * doing detach is waiting for our completion.
795 	 *
796 	 * We should DEVI_IS_DETACHING under an enter of the parent to avoid
797 	 * race with detach - but we can't do that because the framework has
798 	 * already entered the parent, so we have some complexity instead.
799 	 */
800 	for (;;) {
801 		if (ndi_devi_tryenter(vdip, &vcircular)) {
802 			ASSERT(vcircular != -1);
803 			if (DEVI_IS_DETACHING(phci_dip)) {
804 				ndi_devi_exit(vdip, vcircular);
805 				vcircular = -1;
806 			}
807 			break;
808 		} else if (DEVI_IS_DETACHING(phci_dip)) {
809 			vcircular = -1;
810 			break;
811 		} else {
812 			delay(1);
813 		}
814 	}
815 
816 	ndi_devi_enter(phci_dip, &pcircular);
817 	*circular = (vcircular << 16) | (pcircular & 0xFFFF);
818 }
819 
820 /*
821  * Release mdi_devi_enter or successful mdi_devi_tryenter.
822  */
823 void
824 mdi_devi_exit(dev_info_t *phci_dip, int circular)
825 {
826 	dev_info_t	*vdip;
827 	int		vcircular, pcircular;
828 
829 	/* Verify calling context */
830 	ASSERT(MDI_PHCI(phci_dip));
831 	vdip = mdi_devi_get_vdip(phci_dip);
832 	ASSERT(vdip);			/* A pHCI always has a vHCI */
833 
834 	/* extract two circular recursion values from single int */
835 	pcircular = (short)(circular & 0xFFFF);
836 	vcircular = (short)((circular >> 16) & 0xFFFF);
837 
838 	ndi_devi_exit(phci_dip, pcircular);
839 	if (vcircular != -1)
840 		ndi_devi_exit(vdip, vcircular);
841 }
842 
843 /*
844  * The functions mdi_devi_exit_phci() and mdi_devi_enter_phci() are used
845  * around a pHCI drivers calls to mdi_pi_online/offline, after holding
846  * the pathinfo node via mdi_hold_path/mdi_rele_path, to avoid deadlock
847  * with vHCI power management code during path online/offline.  Each
848  * mdi_devi_exit_phci must have a matching mdi_devi_enter_phci, and both must
849  * occur within the scope of an active mdi_devi_enter that establishes the
850  * circular value.
851  */
852 void
853 mdi_devi_exit_phci(dev_info_t *phci_dip, int circular)
854 {
855 	int		pcircular;
856 
857 	/* Verify calling context */
858 	ASSERT(MDI_PHCI(phci_dip));
859 
860 	pcircular = (short)(circular & 0xFFFF);
861 	ndi_devi_exit(phci_dip, pcircular);
862 }
863 
864 void
865 mdi_devi_enter_phci(dev_info_t *phci_dip, int *circular)
866 {
867 	int		pcircular;
868 
869 	/* Verify calling context */
870 	ASSERT(MDI_PHCI(phci_dip));
871 
872 	ndi_devi_enter(phci_dip, &pcircular);
873 
874 	/* verify matching mdi_devi_exit_phci/mdi_devi_enter_phci use */
875 	ASSERT(pcircular == ((short)(*circular & 0xFFFF)));
876 }
877 
878 /*
879  * mdi_devi_get_vdip():
880  *		given a pHCI dip return vHCI dip
881  */
882 dev_info_t *
883 mdi_devi_get_vdip(dev_info_t *pdip)
884 {
885 	mdi_phci_t	*ph;
886 
887 	ph = i_devi_get_phci(pdip);
888 	if (ph && ph->ph_vhci)
889 		return (ph->ph_vhci->vh_dip);
890 	return (NULL);
891 }
892 
893 /*
894  * mdi_devi_pdip_entered():
895  *		Return 1 if we are vHCI and have done an ndi_devi_enter
896  *		of a pHCI
897  */
898 int
899 mdi_devi_pdip_entered(dev_info_t *vdip)
900 {
901 	mdi_vhci_t	*vh;
902 	mdi_phci_t	*ph;
903 
904 	vh = i_devi_get_vhci(vdip);
905 	if (vh == NULL)
906 		return (0);
907 
908 	MDI_VHCI_PHCI_LOCK(vh);
909 	ph = vh->vh_phci_head;
910 	while (ph) {
911 		if (ph->ph_dip && DEVI_BUSY_OWNED(ph->ph_dip)) {
912 			MDI_VHCI_PHCI_UNLOCK(vh);
913 			return (1);
914 		}
915 		ph = ph->ph_next;
916 	}
917 	MDI_VHCI_PHCI_UNLOCK(vh);
918 	return (0);
919 }
920 
921 /*
922  * mdi_phci_path2devinfo():
923  * 		Utility function to search for a valid phci device given
924  *		the devfs pathname.
925  */
926 dev_info_t *
927 mdi_phci_path2devinfo(dev_info_t *vdip, caddr_t pathname)
928 {
929 	char		*temp_pathname;
930 	mdi_vhci_t	*vh;
931 	mdi_phci_t	*ph;
932 	dev_info_t 	*pdip = NULL;
933 
934 	vh = i_devi_get_vhci(vdip);
935 	ASSERT(vh != NULL);
936 
937 	if (vh == NULL) {
938 		/*
939 		 * Invalid vHCI component, return failure
940 		 */
941 		return (NULL);
942 	}
943 
944 	temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
945 	MDI_VHCI_PHCI_LOCK(vh);
946 	ph = vh->vh_phci_head;
947 	while (ph != NULL) {
948 		pdip = ph->ph_dip;
949 		ASSERT(pdip != NULL);
950 		*temp_pathname = '\0';
951 		(void) ddi_pathname(pdip, temp_pathname);
952 		if (strcmp(temp_pathname, pathname) == 0) {
953 			break;
954 		}
955 		ph = ph->ph_next;
956 	}
957 	if (ph == NULL) {
958 		pdip = NULL;
959 	}
960 	MDI_VHCI_PHCI_UNLOCK(vh);
961 	kmem_free(temp_pathname, MAXPATHLEN);
962 	return (pdip);
963 }
964 
965 /*
966  * mdi_phci_get_path_count():
967  * 		get number of path information nodes associated with a given
968  *		pHCI device.
969  */
970 int
971 mdi_phci_get_path_count(dev_info_t *pdip)
972 {
973 	mdi_phci_t	*ph;
974 	int		count = 0;
975 
976 	ph = i_devi_get_phci(pdip);
977 	if (ph != NULL) {
978 		count = ph->ph_path_count;
979 	}
980 	return (count);
981 }
982 
983 /*
984  * i_mdi_phci_lock():
985  *		Lock a pHCI device
986  * Return Values:
987  *		None
988  * Note:
989  *		The default locking order is:
990  *		_NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex))
991  *		But there are number of situations where locks need to be
992  *		grabbed in reverse order.  This routine implements try and lock
993  *		mechanism depending on the requested parameter option.
994  */
995 static void
996 i_mdi_phci_lock(mdi_phci_t *ph, mdi_pathinfo_t *pip)
997 {
998 	if (pip) {
999 		/* Reverse locking is requested. */
1000 		while (MDI_PHCI_TRYLOCK(ph) == 0) {
1001 			/*
1002 			 * tryenter failed. Try to grab again
1003 			 * after a small delay
1004 			 */
1005 			MDI_PI_HOLD(pip);
1006 			MDI_PI_UNLOCK(pip);
1007 			delay(1);
1008 			MDI_PI_LOCK(pip);
1009 			MDI_PI_RELE(pip);
1010 		}
1011 	} else {
1012 		MDI_PHCI_LOCK(ph);
1013 	}
1014 }
1015 
1016 /*
1017  * i_mdi_phci_unlock():
1018  *		Unlock the pHCI component
1019  */
1020 static void
1021 i_mdi_phci_unlock(mdi_phci_t *ph)
1022 {
1023 	MDI_PHCI_UNLOCK(ph);
1024 }
1025 
1026 /*
1027  * i_mdi_devinfo_create():
1028  *		create client device's devinfo node
1029  * Return Values:
1030  *		dev_info
1031  *		NULL
1032  * Notes:
1033  */
1034 static dev_info_t *
1035 i_mdi_devinfo_create(mdi_vhci_t *vh, char *name, char *guid,
1036 	char **compatible, int ncompatible)
1037 {
1038 	dev_info_t *cdip = NULL;
1039 
1040 	ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1041 
1042 	/* Verify for duplicate entry */
1043 	cdip = i_mdi_devinfo_find(vh, name, guid);
1044 	ASSERT(cdip == NULL);
1045 	if (cdip) {
1046 		cmn_err(CE_WARN,
1047 		    "i_mdi_devinfo_create: client dip %p already exists",
1048 			(void *)cdip);
1049 	}
1050 
1051 	ndi_devi_alloc_sleep(vh->vh_dip, name, DEVI_SID_NODEID, &cdip);
1052 	if (cdip == NULL)
1053 		goto fail;
1054 
1055 	/*
1056 	 * Create component type and Global unique identifier
1057 	 * properties
1058 	 */
1059 	if (ndi_prop_update_string(DDI_DEV_T_NONE, cdip,
1060 	    MDI_CLIENT_GUID_PROP, guid) != DDI_PROP_SUCCESS) {
1061 		goto fail;
1062 	}
1063 
1064 	/* Decorate the node with compatible property */
1065 	if (compatible &&
1066 	    (ndi_prop_update_string_array(DDI_DEV_T_NONE, cdip,
1067 	    "compatible", compatible, ncompatible) != DDI_PROP_SUCCESS)) {
1068 		goto fail;
1069 	}
1070 
1071 	return (cdip);
1072 
1073 fail:
1074 	if (cdip) {
1075 		(void) ndi_prop_remove_all(cdip);
1076 		(void) ndi_devi_free(cdip);
1077 	}
1078 	return (NULL);
1079 }
1080 
1081 /*
1082  * i_mdi_devinfo_find():
1083  *		Find a matching devinfo node for given client node name
1084  *		and its guid.
1085  * Return Values:
1086  *		Handle to a dev_info node or NULL
1087  */
1088 static dev_info_t *
1089 i_mdi_devinfo_find(mdi_vhci_t *vh, caddr_t name, char *guid)
1090 {
1091 	char			*data;
1092 	dev_info_t 		*cdip = NULL;
1093 	dev_info_t 		*ndip = NULL;
1094 	int			circular;
1095 
1096 	ndi_devi_enter(vh->vh_dip, &circular);
1097 	ndip = (dev_info_t *)DEVI(vh->vh_dip)->devi_child;
1098 	while ((cdip = ndip) != NULL) {
1099 		ndip = (dev_info_t *)DEVI(cdip)->devi_sibling;
1100 
1101 		if (strcmp(DEVI(cdip)->devi_node_name, name)) {
1102 			continue;
1103 		}
1104 
1105 		if (ddi_prop_lookup_string(DDI_DEV_T_ANY, cdip,
1106 		    DDI_PROP_DONTPASS, MDI_CLIENT_GUID_PROP,
1107 		    &data) != DDI_PROP_SUCCESS) {
1108 			continue;
1109 		}
1110 
1111 		if (strcmp(data, guid) != 0) {
1112 			ddi_prop_free(data);
1113 			continue;
1114 		}
1115 		ddi_prop_free(data);
1116 		break;
1117 	}
1118 	ndi_devi_exit(vh->vh_dip, circular);
1119 	return (cdip);
1120 }
1121 
1122 /*
1123  * i_mdi_devinfo_remove():
1124  *		Remove a client device node
1125  */
1126 static int
1127 i_mdi_devinfo_remove(dev_info_t *vdip, dev_info_t *cdip, int flags)
1128 {
1129 	int	rv = MDI_SUCCESS;
1130 
1131 	if (i_mdi_is_child_present(vdip, cdip) == MDI_SUCCESS ||
1132 	    (flags & MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED)) {
1133 		rv = ndi_devi_offline(cdip, NDI_DEVI_REMOVE);
1134 		if (rv != NDI_SUCCESS) {
1135 			MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_devinfo_remove:"
1136 			    " failed. cdip = %p\n", (void *)cdip));
1137 		}
1138 		/*
1139 		 * Convert to MDI error code
1140 		 */
1141 		switch (rv) {
1142 		case NDI_SUCCESS:
1143 			rv = MDI_SUCCESS;
1144 			break;
1145 		case NDI_BUSY:
1146 			rv = MDI_BUSY;
1147 			break;
1148 		default:
1149 			rv = MDI_FAILURE;
1150 			break;
1151 		}
1152 	}
1153 	return (rv);
1154 }
1155 
1156 /*
1157  * i_devi_get_client()
1158  *		Utility function to get mpxio component extensions
1159  */
1160 static mdi_client_t *
1161 i_devi_get_client(dev_info_t *cdip)
1162 {
1163 	mdi_client_t	*ct = NULL;
1164 
1165 	if (MDI_CLIENT(cdip)) {
1166 		ct = (mdi_client_t *)DEVI(cdip)->devi_mdi_client;
1167 	}
1168 	return (ct);
1169 }
1170 
1171 /*
1172  * i_mdi_is_child_present():
1173  *		Search for the presence of client device dev_info node
1174  */
1175 static int
1176 i_mdi_is_child_present(dev_info_t *vdip, dev_info_t *cdip)
1177 {
1178 	int		rv = MDI_FAILURE;
1179 	struct dev_info	*dip;
1180 	int		circular;
1181 
1182 	ndi_devi_enter(vdip, &circular);
1183 	dip = DEVI(vdip)->devi_child;
1184 	while (dip) {
1185 		if (dip == DEVI(cdip)) {
1186 			rv = MDI_SUCCESS;
1187 			break;
1188 		}
1189 		dip = dip->devi_sibling;
1190 	}
1191 	ndi_devi_exit(vdip, circular);
1192 	return (rv);
1193 }
1194 
1195 
1196 /*
1197  * i_mdi_client_lock():
1198  *		Grab client component lock
1199  * Return Values:
1200  *		None
1201  * Note:
1202  *		The default locking order is:
1203  *		_NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex))
1204  *		But there are number of situations where locks need to be
1205  *		grabbed in reverse order.  This routine implements try and lock
1206  *		mechanism depending on the requested parameter option.
1207  */
1208 static void
1209 i_mdi_client_lock(mdi_client_t *ct, mdi_pathinfo_t *pip)
1210 {
1211 	if (pip) {
1212 		/*
1213 		 * Reverse locking is requested.
1214 		 */
1215 		while (MDI_CLIENT_TRYLOCK(ct) == 0) {
1216 			/*
1217 			 * tryenter failed. Try to grab again
1218 			 * after a small delay
1219 			 */
1220 			MDI_PI_HOLD(pip);
1221 			MDI_PI_UNLOCK(pip);
1222 			delay(1);
1223 			MDI_PI_LOCK(pip);
1224 			MDI_PI_RELE(pip);
1225 		}
1226 	} else {
1227 		MDI_CLIENT_LOCK(ct);
1228 	}
1229 }
1230 
1231 /*
1232  * i_mdi_client_unlock():
1233  *		Unlock a client component
1234  */
1235 static void
1236 i_mdi_client_unlock(mdi_client_t *ct)
1237 {
1238 	MDI_CLIENT_UNLOCK(ct);
1239 }
1240 
1241 /*
1242  * i_mdi_client_alloc():
1243  * 		Allocate and initialize a client structure.  Caller should
1244  *		hold the vhci client lock.
1245  * Return Values:
1246  *		Handle to a client component
1247  */
1248 /*ARGSUSED*/
1249 static mdi_client_t *
1250 i_mdi_client_alloc(mdi_vhci_t *vh, char *name, char *lguid)
1251 {
1252 	mdi_client_t	*ct;
1253 
1254 	ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1255 
1256 	/*
1257 	 * Allocate and initialize a component structure.
1258 	 */
1259 	ct = kmem_zalloc(sizeof (*ct), KM_SLEEP);
1260 	mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL);
1261 	ct->ct_hnext = NULL;
1262 	ct->ct_hprev = NULL;
1263 	ct->ct_dip = NULL;
1264 	ct->ct_vhci = vh;
1265 	ct->ct_drvname = kmem_alloc(strlen(name) + 1, KM_SLEEP);
1266 	(void) strcpy(ct->ct_drvname, name);
1267 	ct->ct_guid = kmem_alloc(strlen(lguid) + 1, KM_SLEEP);
1268 	(void) strcpy(ct->ct_guid, lguid);
1269 	ct->ct_cprivate = NULL;
1270 	ct->ct_vprivate = NULL;
1271 	ct->ct_flags = 0;
1272 	ct->ct_state = MDI_CLIENT_STATE_FAILED;
1273 	MDI_CLIENT_LOCK(ct);
1274 	MDI_CLIENT_SET_OFFLINE(ct);
1275 	MDI_CLIENT_SET_DETACH(ct);
1276 	MDI_CLIENT_SET_POWER_UP(ct);
1277 	MDI_CLIENT_UNLOCK(ct);
1278 	ct->ct_failover_flags = 0;
1279 	ct->ct_failover_status = 0;
1280 	cv_init(&ct->ct_failover_cv, NULL, CV_DRIVER, NULL);
1281 	ct->ct_unstable = 0;
1282 	cv_init(&ct->ct_unstable_cv, NULL, CV_DRIVER, NULL);
1283 	cv_init(&ct->ct_powerchange_cv, NULL, CV_DRIVER, NULL);
1284 	ct->ct_lb = vh->vh_lb;
1285 	ct->ct_lb_args =  kmem_zalloc(sizeof (client_lb_args_t), KM_SLEEP);
1286 	ct->ct_lb_args->region_size = LOAD_BALANCE_DEFAULT_REGION_SIZE;
1287 	ct->ct_path_count = 0;
1288 	ct->ct_path_head = NULL;
1289 	ct->ct_path_tail = NULL;
1290 	ct->ct_path_last = NULL;
1291 
1292 	/*
1293 	 * Add this client component to our client hash queue
1294 	 */
1295 	i_mdi_client_enlist_table(vh, ct);
1296 	return (ct);
1297 }
1298 
1299 /*
1300  * i_mdi_client_enlist_table():
1301  *		Attach the client device to the client hash table. Caller
1302  *		should hold the vhci client lock.
1303  */
1304 static void
1305 i_mdi_client_enlist_table(mdi_vhci_t *vh, mdi_client_t *ct)
1306 {
1307 	int 			index;
1308 	struct client_hash	*head;
1309 
1310 	ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1311 
1312 	index = i_mdi_get_hash_key(ct->ct_guid);
1313 	head = &vh->vh_client_table[index];
1314 	ct->ct_hnext = (mdi_client_t *)head->ct_hash_head;
1315 	head->ct_hash_head = ct;
1316 	head->ct_hash_count++;
1317 	vh->vh_client_count++;
1318 }
1319 
1320 /*
1321  * i_mdi_client_delist_table():
1322  *		Attach the client device to the client hash table.
1323  *		Caller should hold the vhci client lock.
1324  */
1325 static void
1326 i_mdi_client_delist_table(mdi_vhci_t *vh, mdi_client_t *ct)
1327 {
1328 	int			index;
1329 	char			*guid;
1330 	struct client_hash 	*head;
1331 	mdi_client_t		*next;
1332 	mdi_client_t		*last;
1333 
1334 	ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1335 
1336 	guid = ct->ct_guid;
1337 	index = i_mdi_get_hash_key(guid);
1338 	head = &vh->vh_client_table[index];
1339 
1340 	last = NULL;
1341 	next = (mdi_client_t *)head->ct_hash_head;
1342 	while (next != NULL) {
1343 		if (next == ct) {
1344 			break;
1345 		}
1346 		last = next;
1347 		next = next->ct_hnext;
1348 	}
1349 
1350 	if (next) {
1351 		head->ct_hash_count--;
1352 		if (last == NULL) {
1353 			head->ct_hash_head = ct->ct_hnext;
1354 		} else {
1355 			last->ct_hnext = ct->ct_hnext;
1356 		}
1357 		ct->ct_hnext = NULL;
1358 		vh->vh_client_count--;
1359 	}
1360 }
1361 
1362 
1363 /*
1364  * i_mdi_client_free():
1365  *		Free a client component
1366  */
1367 static int
1368 i_mdi_client_free(mdi_vhci_t *vh, mdi_client_t *ct)
1369 {
1370 	int		rv = MDI_SUCCESS;
1371 	int		flags = ct->ct_flags;
1372 	dev_info_t	*cdip;
1373 	dev_info_t	*vdip;
1374 
1375 	ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1376 
1377 	vdip = vh->vh_dip;
1378 	cdip = ct->ct_dip;
1379 
1380 	(void) ndi_prop_remove(DDI_DEV_T_NONE, cdip, MDI_CLIENT_GUID_PROP);
1381 	DEVI(cdip)->devi_mdi_component &= ~MDI_COMPONENT_CLIENT;
1382 	DEVI(cdip)->devi_mdi_client = NULL;
1383 
1384 	/*
1385 	 * Clear out back ref. to dev_info_t node
1386 	 */
1387 	ct->ct_dip = NULL;
1388 
1389 	/*
1390 	 * Remove this client from our hash queue
1391 	 */
1392 	i_mdi_client_delist_table(vh, ct);
1393 
1394 	/*
1395 	 * Uninitialize and free the component
1396 	 */
1397 	kmem_free(ct->ct_drvname, strlen(ct->ct_drvname) + 1);
1398 	kmem_free(ct->ct_guid, strlen(ct->ct_guid) + 1);
1399 	kmem_free(ct->ct_lb_args, sizeof (client_lb_args_t));
1400 	cv_destroy(&ct->ct_failover_cv);
1401 	cv_destroy(&ct->ct_unstable_cv);
1402 	cv_destroy(&ct->ct_powerchange_cv);
1403 	mutex_destroy(&ct->ct_mutex);
1404 	kmem_free(ct, sizeof (*ct));
1405 
1406 	if (cdip != NULL) {
1407 		MDI_VHCI_CLIENT_UNLOCK(vh);
1408 		(void) i_mdi_devinfo_remove(vdip, cdip, flags);
1409 		MDI_VHCI_CLIENT_LOCK(vh);
1410 	}
1411 	return (rv);
1412 }
1413 
1414 /*
1415  * i_mdi_client_find():
1416  * 		Find the client structure corresponding to a given guid
1417  *		Caller should hold the vhci client lock.
1418  */
1419 static mdi_client_t *
1420 i_mdi_client_find(mdi_vhci_t *vh, char *cname, char *guid)
1421 {
1422 	int			index;
1423 	struct client_hash	*head;
1424 	mdi_client_t		*ct;
1425 
1426 	ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1427 
1428 	index = i_mdi_get_hash_key(guid);
1429 	head = &vh->vh_client_table[index];
1430 
1431 	ct = head->ct_hash_head;
1432 	while (ct != NULL) {
1433 		if (strcmp(ct->ct_guid, guid) == 0 &&
1434 		    (cname == NULL || strcmp(ct->ct_drvname, cname) == 0)) {
1435 			break;
1436 		}
1437 		ct = ct->ct_hnext;
1438 	}
1439 	return (ct);
1440 }
1441 
1442 /*
1443  * i_mdi_client_update_state():
1444  *		Compute and update client device state
1445  * Notes:
1446  *		A client device can be in any of three possible states:
1447  *
1448  *		MDI_CLIENT_STATE_OPTIMAL - Client in optimal state with more
1449  *		one online/standby paths. Can tolerate failures.
1450  *		MDI_CLIENT_STATE_DEGRADED - Client device in degraded state with
1451  *		no alternate paths available as standby. A failure on the online
1452  *		would result in loss of access to device data.
1453  *		MDI_CLIENT_STATE_FAILED - Client device in failed state with
1454  *		no paths available to access the device.
1455  */
1456 static void
1457 i_mdi_client_update_state(mdi_client_t *ct)
1458 {
1459 	int state;
1460 
1461 	ASSERT(MDI_CLIENT_LOCKED(ct));
1462 	state = i_mdi_client_compute_state(ct, NULL);
1463 	MDI_CLIENT_SET_STATE(ct, state);
1464 }
1465 
1466 /*
1467  * i_mdi_client_compute_state():
1468  *		Compute client device state
1469  *
1470  *		mdi_phci_t *	Pointer to pHCI structure which should
1471  *				while computing the new value.  Used by
1472  *				i_mdi_phci_offline() to find the new
1473  *				client state after DR of a pHCI.
1474  */
1475 static int
1476 i_mdi_client_compute_state(mdi_client_t *ct, mdi_phci_t *ph)
1477 {
1478 	int		state;
1479 	int		online_count = 0;
1480 	int		standby_count = 0;
1481 	mdi_pathinfo_t	*pip, *next;
1482 
1483 	ASSERT(MDI_CLIENT_LOCKED(ct));
1484 	pip = ct->ct_path_head;
1485 	while (pip != NULL) {
1486 		MDI_PI_LOCK(pip);
1487 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
1488 		if (MDI_PI(pip)->pi_phci == ph) {
1489 			MDI_PI_UNLOCK(pip);
1490 			pip = next;
1491 			continue;
1492 		}
1493 
1494 		if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK)
1495 				== MDI_PATHINFO_STATE_ONLINE)
1496 			online_count++;
1497 		else if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK)
1498 				== MDI_PATHINFO_STATE_STANDBY)
1499 			standby_count++;
1500 		MDI_PI_UNLOCK(pip);
1501 		pip = next;
1502 	}
1503 
1504 	if (online_count == 0) {
1505 		if (standby_count == 0) {
1506 			state = MDI_CLIENT_STATE_FAILED;
1507 			MDI_DEBUG(2, (CE_NOTE, NULL, "!client state: failed"
1508 			    " ct = %p\n", (void *)ct));
1509 		} else if (standby_count == 1) {
1510 			state = MDI_CLIENT_STATE_DEGRADED;
1511 		} else {
1512 			state = MDI_CLIENT_STATE_OPTIMAL;
1513 		}
1514 	} else if (online_count == 1) {
1515 		if (standby_count == 0) {
1516 			state = MDI_CLIENT_STATE_DEGRADED;
1517 		} else {
1518 			state = MDI_CLIENT_STATE_OPTIMAL;
1519 		}
1520 	} else {
1521 		state = MDI_CLIENT_STATE_OPTIMAL;
1522 	}
1523 	return (state);
1524 }
1525 
1526 /*
1527  * i_mdi_client2devinfo():
1528  *		Utility function
1529  */
1530 dev_info_t *
1531 i_mdi_client2devinfo(mdi_client_t *ct)
1532 {
1533 	return (ct->ct_dip);
1534 }
1535 
1536 /*
1537  * mdi_client_path2_devinfo():
1538  * 		Given the parent devinfo and child devfs pathname, search for
1539  *		a valid devfs node handle.
1540  */
1541 dev_info_t *
1542 mdi_client_path2devinfo(dev_info_t *vdip, char *pathname)
1543 {
1544 	dev_info_t 	*cdip = NULL;
1545 	dev_info_t 	*ndip = NULL;
1546 	char		*temp_pathname;
1547 	int		circular;
1548 
1549 	/*
1550 	 * Allocate temp buffer
1551 	 */
1552 	temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1553 
1554 	/*
1555 	 * Lock parent against changes
1556 	 */
1557 	ndi_devi_enter(vdip, &circular);
1558 	ndip = (dev_info_t *)DEVI(vdip)->devi_child;
1559 	while ((cdip = ndip) != NULL) {
1560 		ndip = (dev_info_t *)DEVI(cdip)->devi_sibling;
1561 
1562 		*temp_pathname = '\0';
1563 		(void) ddi_pathname(cdip, temp_pathname);
1564 		if (strcmp(temp_pathname, pathname) == 0) {
1565 			break;
1566 		}
1567 	}
1568 	/*
1569 	 * Release devinfo lock
1570 	 */
1571 	ndi_devi_exit(vdip, circular);
1572 
1573 	/*
1574 	 * Free the temp buffer
1575 	 */
1576 	kmem_free(temp_pathname, MAXPATHLEN);
1577 	return (cdip);
1578 }
1579 
1580 /*
1581  * mdi_client_get_path_count():
1582  * 		Utility function to get number of path information nodes
1583  *		associated with a given client device.
1584  */
1585 int
1586 mdi_client_get_path_count(dev_info_t *cdip)
1587 {
1588 	mdi_client_t	*ct;
1589 	int		count = 0;
1590 
1591 	ct = i_devi_get_client(cdip);
1592 	if (ct != NULL) {
1593 		count = ct->ct_path_count;
1594 	}
1595 	return (count);
1596 }
1597 
1598 
1599 /*
1600  * i_mdi_get_hash_key():
1601  * 		Create a hash using strings as keys
1602  *
1603  */
1604 static int
1605 i_mdi_get_hash_key(char *str)
1606 {
1607 	uint32_t	g, hash = 0;
1608 	char		*p;
1609 
1610 	for (p = str; *p != '\0'; p++) {
1611 		g = *p;
1612 		hash += g;
1613 	}
1614 	return (hash % (CLIENT_HASH_TABLE_SIZE - 1));
1615 }
1616 
1617 /*
1618  * mdi_get_lb_policy():
1619  * 		Get current load balancing policy for a given client device
1620  */
1621 client_lb_t
1622 mdi_get_lb_policy(dev_info_t *cdip)
1623 {
1624 	client_lb_t	lb = LOAD_BALANCE_NONE;
1625 	mdi_client_t	*ct;
1626 
1627 	ct = i_devi_get_client(cdip);
1628 	if (ct != NULL) {
1629 		lb = ct->ct_lb;
1630 	}
1631 	return (lb);
1632 }
1633 
1634 /*
1635  * mdi_set_lb_region_size():
1636  * 		Set current region size for the load-balance
1637  */
1638 int
1639 mdi_set_lb_region_size(dev_info_t *cdip, int region_size)
1640 {
1641 	mdi_client_t	*ct;
1642 	int		rv = MDI_FAILURE;
1643 
1644 	ct = i_devi_get_client(cdip);
1645 	if (ct != NULL && ct->ct_lb_args != NULL) {
1646 		ct->ct_lb_args->region_size = region_size;
1647 		rv = MDI_SUCCESS;
1648 	}
1649 	return (rv);
1650 }
1651 
1652 /*
1653  * mdi_Set_lb_policy():
1654  * 		Set current load balancing policy for a given client device
1655  */
1656 int
1657 mdi_set_lb_policy(dev_info_t *cdip, client_lb_t lb)
1658 {
1659 	mdi_client_t	*ct;
1660 	int		rv = MDI_FAILURE;
1661 
1662 	ct = i_devi_get_client(cdip);
1663 	if (ct != NULL) {
1664 		ct->ct_lb = lb;
1665 		rv = MDI_SUCCESS;
1666 	}
1667 	return (rv);
1668 }
1669 
1670 /*
1671  * mdi_failover():
1672  *		failover function called by the vHCI drivers to initiate
1673  *		a failover operation.  This is typically due to non-availability
1674  *		of online paths to route I/O requests.  Failover can be
1675  *		triggered through user application also.
1676  *
1677  *		The vHCI driver calls mdi_failover() to initiate a failover
1678  *		operation. mdi_failover() calls back into the vHCI driver's
1679  *		vo_failover() entry point to perform the actual failover
1680  *		operation.  The reason for requiring the vHCI driver to
1681  *		initiate failover by calling mdi_failover(), instead of directly
1682  *		executing vo_failover() itself, is to ensure that the mdi
1683  *		framework can keep track of the client state properly.
1684  *		Additionally, mdi_failover() provides as a convenience the
1685  *		option of performing the failover operation synchronously or
1686  *		asynchronously
1687  *
1688  *		Upon successful completion of the failover operation, the
1689  *		paths that were previously ONLINE will be in the STANDBY state,
1690  *		and the newly activated paths will be in the ONLINE state.
1691  *
1692  *		The flags modifier determines whether the activation is done
1693  *		synchronously: MDI_FAILOVER_SYNC
1694  * Return Values:
1695  *		MDI_SUCCESS
1696  *		MDI_FAILURE
1697  *		MDI_BUSY
1698  */
1699 /*ARGSUSED*/
1700 int
1701 mdi_failover(dev_info_t *vdip, dev_info_t *cdip, int flags)
1702 {
1703 	int			rv;
1704 	mdi_client_t		*ct;
1705 
1706 	ct = i_devi_get_client(cdip);
1707 	ASSERT(ct != NULL);
1708 	if (ct == NULL) {
1709 		/* cdip is not a valid client device. Nothing more to do. */
1710 		return (MDI_FAILURE);
1711 	}
1712 
1713 	MDI_CLIENT_LOCK(ct);
1714 
1715 	if (MDI_CLIENT_IS_PATH_FREE_IN_PROGRESS(ct)) {
1716 		/* A path to the client is being freed */
1717 		MDI_CLIENT_UNLOCK(ct);
1718 		return (MDI_BUSY);
1719 	}
1720 
1721 
1722 	if (MDI_CLIENT_IS_FAILED(ct)) {
1723 		/*
1724 		 * Client is in failed state. Nothing more to do.
1725 		 */
1726 		MDI_CLIENT_UNLOCK(ct);
1727 		return (MDI_FAILURE);
1728 	}
1729 
1730 	if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
1731 		/*
1732 		 * Failover is already in progress; return BUSY
1733 		 */
1734 		MDI_CLIENT_UNLOCK(ct);
1735 		return (MDI_BUSY);
1736 	}
1737 	/*
1738 	 * Make sure that mdi_pathinfo node state changes are processed.
1739 	 * We do not allow failovers to progress while client path state
1740 	 * changes are in progress
1741 	 */
1742 	if (ct->ct_unstable) {
1743 		if (flags == MDI_FAILOVER_ASYNC) {
1744 			MDI_CLIENT_UNLOCK(ct);
1745 			return (MDI_BUSY);
1746 		} else {
1747 			while (ct->ct_unstable)
1748 				cv_wait(&ct->ct_unstable_cv, &ct->ct_mutex);
1749 		}
1750 	}
1751 
1752 	/*
1753 	 * Client device is in stable state. Before proceeding, perform sanity
1754 	 * checks again.
1755 	 */
1756 	if ((MDI_CLIENT_IS_DETACHED(ct)) || (MDI_CLIENT_IS_FAILED(ct)) ||
1757 	    (!i_ddi_devi_attached(ct->ct_dip))) {
1758 		/*
1759 		 * Client is in failed state. Nothing more to do.
1760 		 */
1761 		MDI_CLIENT_UNLOCK(ct);
1762 		return (MDI_FAILURE);
1763 	}
1764 
1765 	/*
1766 	 * Set the client state as failover in progress.
1767 	 */
1768 	MDI_CLIENT_SET_FAILOVER_IN_PROGRESS(ct);
1769 	ct->ct_failover_flags = flags;
1770 	MDI_CLIENT_UNLOCK(ct);
1771 
1772 	if (flags == MDI_FAILOVER_ASYNC) {
1773 		/*
1774 		 * Submit the initiate failover request via CPR safe
1775 		 * taskq threads.
1776 		 */
1777 		(void) taskq_dispatch(mdi_taskq, (task_func_t *)i_mdi_failover,
1778 		    ct, KM_SLEEP);
1779 		return (MDI_ACCEPT);
1780 	} else {
1781 		/*
1782 		 * Synchronous failover mode.  Typically invoked from the user
1783 		 * land.
1784 		 */
1785 		rv = i_mdi_failover(ct);
1786 	}
1787 	return (rv);
1788 }
1789 
1790 /*
1791  * i_mdi_failover():
1792  *		internal failover function. Invokes vHCI drivers failover
1793  *		callback function and process the failover status
1794  * Return Values:
1795  *		None
1796  *
1797  * Note: A client device in failover state can not be detached or freed.
1798  */
1799 static int
1800 i_mdi_failover(void *arg)
1801 {
1802 	int		rv = MDI_SUCCESS;
1803 	mdi_client_t	*ct = (mdi_client_t *)arg;
1804 	mdi_vhci_t	*vh = ct->ct_vhci;
1805 
1806 	ASSERT(!MDI_CLIENT_LOCKED(ct));
1807 
1808 	if (vh->vh_ops->vo_failover != NULL) {
1809 		/*
1810 		 * Call vHCI drivers callback routine
1811 		 */
1812 		rv = (*vh->vh_ops->vo_failover)(vh->vh_dip, ct->ct_dip,
1813 		    ct->ct_failover_flags);
1814 	}
1815 
1816 	MDI_CLIENT_LOCK(ct);
1817 	MDI_CLIENT_CLEAR_FAILOVER_IN_PROGRESS(ct);
1818 
1819 	/*
1820 	 * Save the failover return status
1821 	 */
1822 	ct->ct_failover_status = rv;
1823 
1824 	/*
1825 	 * As a result of failover, client status would have been changed.
1826 	 * Update the client state and wake up anyone waiting on this client
1827 	 * device.
1828 	 */
1829 	i_mdi_client_update_state(ct);
1830 
1831 	cv_broadcast(&ct->ct_failover_cv);
1832 	MDI_CLIENT_UNLOCK(ct);
1833 	return (rv);
1834 }
1835 
1836 /*
1837  * Load balancing is logical block.
1838  * IOs within the range described by region_size
1839  * would go on the same path. This would improve the
1840  * performance by cache-hit on some of the RAID devices.
1841  * Search only for online paths(At some point we
1842  * may want to balance across target ports).
1843  * If no paths are found then default to round-robin.
1844  */
1845 static int
1846 i_mdi_lba_lb(mdi_client_t *ct, mdi_pathinfo_t **ret_pip, struct buf *bp)
1847 {
1848 	int		path_index = -1;
1849 	int		online_path_count = 0;
1850 	int		online_nonpref_path_count = 0;
1851 	int 		region_size = ct->ct_lb_args->region_size;
1852 	mdi_pathinfo_t	*pip;
1853 	mdi_pathinfo_t	*next;
1854 	int		preferred, path_cnt;
1855 
1856 	pip = ct->ct_path_head;
1857 	while (pip) {
1858 		MDI_PI_LOCK(pip);
1859 		if (MDI_PI(pip)->pi_state ==
1860 		    MDI_PATHINFO_STATE_ONLINE && MDI_PI(pip)->pi_preferred) {
1861 			online_path_count++;
1862 		} else if (MDI_PI(pip)->pi_state ==
1863 		    MDI_PATHINFO_STATE_ONLINE && !MDI_PI(pip)->pi_preferred) {
1864 			online_nonpref_path_count++;
1865 		}
1866 		next = (mdi_pathinfo_t *)
1867 		    MDI_PI(pip)->pi_client_link;
1868 		MDI_PI_UNLOCK(pip);
1869 		pip = next;
1870 	}
1871 	/* if found any online/preferred then use this type */
1872 	if (online_path_count > 0) {
1873 		path_cnt = online_path_count;
1874 		preferred = 1;
1875 	} else if (online_nonpref_path_count > 0) {
1876 		path_cnt = online_nonpref_path_count;
1877 		preferred = 0;
1878 	} else {
1879 		path_cnt = 0;
1880 	}
1881 	if (path_cnt) {
1882 		path_index = (bp->b_blkno >> region_size) % path_cnt;
1883 		pip = ct->ct_path_head;
1884 		while (pip && path_index != -1) {
1885 			MDI_PI_LOCK(pip);
1886 			if (path_index == 0 &&
1887 			    (MDI_PI(pip)->pi_state ==
1888 			    MDI_PATHINFO_STATE_ONLINE) &&
1889 				MDI_PI(pip)->pi_preferred == preferred) {
1890 				MDI_PI_HOLD(pip);
1891 				MDI_PI_UNLOCK(pip);
1892 				*ret_pip = pip;
1893 				return (MDI_SUCCESS);
1894 			}
1895 			path_index --;
1896 			next = (mdi_pathinfo_t *)
1897 			    MDI_PI(pip)->pi_client_link;
1898 			MDI_PI_UNLOCK(pip);
1899 			pip = next;
1900 		}
1901 		if (pip == NULL) {
1902 			MDI_DEBUG(4, (CE_NOTE, NULL,
1903 			    "!lba %llx, no pip !!\n",
1904 				bp->b_lblkno));
1905 		} else {
1906 			MDI_DEBUG(4, (CE_NOTE, NULL,
1907 			    "!lba %llx, no pip for path_index, "
1908 			    "pip %p\n", bp->b_lblkno, (void *)pip));
1909 		}
1910 	}
1911 	return (MDI_FAILURE);
1912 }
1913 
1914 /*
1915  * mdi_select_path():
1916  *		select a path to access a client device.
1917  *
1918  *		mdi_select_path() function is called by the vHCI drivers to
1919  *		select a path to route the I/O request to.  The caller passes
1920  *		the block I/O data transfer structure ("buf") as one of the
1921  *		parameters.  The mpxio framework uses the buf structure
1922  *		contents to maintain per path statistics (total I/O size /
1923  *		count pending).  If more than one online paths are available to
1924  *		select, the framework automatically selects a suitable path
1925  *		for routing I/O request. If a failover operation is active for
1926  *		this client device the call shall be failed with MDI_BUSY error
1927  *		code.
1928  *
1929  *		By default this function returns a suitable path in online
1930  *		state based on the current load balancing policy.  Currently
1931  *		we support LOAD_BALANCE_NONE (Previously selected online path
1932  *		will continue to be used till the path is usable) and
1933  *		LOAD_BALANCE_RR (Online paths will be selected in a round
1934  *		robin fashion), LOAD_BALANCE_LB(Online paths will be selected
1935  *		based on the logical block).  The load balancing
1936  *		through vHCI drivers configuration file (driver.conf).
1937  *
1938  *		vHCI drivers may override this default behavior by specifying
1939  *		appropriate flags.  The meaning of the thrid argument depends
1940  *		on the flags specified. If MDI_SELECT_PATH_INSTANCE is set
1941  *		then the argument is the "path instance" of the path to select.
1942  *		If MDI_SELECT_PATH_INSTANCE is not set then the argument is
1943  *		"start_pip". A non NULL "start_pip" is the starting point to
1944  *		walk and find the next appropriate path.  The following values
1945  *		are currently defined: MDI_SELECT_ONLINE_PATH (to select an
1946  *		ONLINE path) and/or MDI_SELECT_STANDBY_PATH (to select an
1947  *		STANDBY path).
1948  *
1949  *		The non-standard behavior is used by the scsi_vhci driver,
1950  *		whenever it has to use a STANDBY/FAULTED path.  Eg. during
1951  *		attach of client devices (to avoid an unnecessary failover
1952  *		when the STANDBY path comes up first), during failover
1953  *		(to activate a STANDBY path as ONLINE).
1954  *
1955  *		The selected path is returned in a a mdi_hold_path() state
1956  *		(pi_ref_cnt). Caller should release the hold by calling
1957  *		mdi_rele_path().
1958  *
1959  * Return Values:
1960  *		MDI_SUCCESS	- Completed successfully
1961  *		MDI_BUSY 	- Client device is busy failing over
1962  *		MDI_NOPATH	- Client device is online, but no valid path are
1963  *				  available to access this client device
1964  *		MDI_FAILURE	- Invalid client device or state
1965  *		MDI_DEVI_ONLINING
1966  *				- Client device (struct dev_info state) is in
1967  *				  onlining state.
1968  */
1969 
1970 /*ARGSUSED*/
1971 int
1972 mdi_select_path(dev_info_t *cdip, struct buf *bp, int flags,
1973     void *arg, mdi_pathinfo_t **ret_pip)
1974 {
1975 	mdi_client_t	*ct;
1976 	mdi_pathinfo_t	*pip;
1977 	mdi_pathinfo_t	*next;
1978 	mdi_pathinfo_t	*head;
1979 	mdi_pathinfo_t	*start;
1980 	client_lb_t	lbp;	/* load balancing policy */
1981 	int		sb = 1;	/* standard behavior */
1982 	int		preferred = 1;	/* preferred path */
1983 	int		cond, cont = 1;
1984 	int		retry = 0;
1985 	mdi_pathinfo_t	*start_pip;	/* request starting pathinfo */
1986 	int		path_instance;	/* request specific path instance */
1987 
1988 	/* determine type of arg based on flags */
1989 	if (flags & MDI_SELECT_PATH_INSTANCE) {
1990 		flags &= ~MDI_SELECT_PATH_INSTANCE;
1991 		path_instance = (int)(intptr_t)arg;
1992 		start_pip = NULL;
1993 	} else {
1994 		path_instance = 0;
1995 		start_pip = (mdi_pathinfo_t *)arg;
1996 	}
1997 
1998 	if (flags != 0) {
1999 		/*
2000 		 * disable default behavior
2001 		 */
2002 		sb = 0;
2003 	}
2004 
2005 	*ret_pip = NULL;
2006 	ct = i_devi_get_client(cdip);
2007 	if (ct == NULL) {
2008 		/* mdi extensions are NULL, Nothing more to do */
2009 		return (MDI_FAILURE);
2010 	}
2011 
2012 	MDI_CLIENT_LOCK(ct);
2013 
2014 	if (sb) {
2015 		if (MDI_CLIENT_IS_FAILED(ct)) {
2016 			/*
2017 			 * Client is not ready to accept any I/O requests.
2018 			 * Fail this request.
2019 			 */
2020 			MDI_DEBUG(2, (CE_NOTE, cdip, "!mdi_select_path: "
2021 			    "client state offline ct = %p\n", (void *)ct));
2022 			MDI_CLIENT_UNLOCK(ct);
2023 			return (MDI_FAILURE);
2024 		}
2025 
2026 		if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
2027 			/*
2028 			 * Check for Failover is in progress. If so tell the
2029 			 * caller that this device is busy.
2030 			 */
2031 			MDI_DEBUG(2, (CE_NOTE, cdip, "!mdi_select_path: "
2032 			    "client failover in progress ct = %p\n",
2033 			    (void *)ct));
2034 			MDI_CLIENT_UNLOCK(ct);
2035 			return (MDI_BUSY);
2036 		}
2037 
2038 		/*
2039 		 * Check to see whether the client device is attached.
2040 		 * If not so, let the vHCI driver manually select a path
2041 		 * (standby) and let the probe/attach process to continue.
2042 		 */
2043 		if (MDI_CLIENT_IS_DETACHED(ct) || !i_ddi_devi_attached(cdip)) {
2044 			MDI_DEBUG(4, (CE_NOTE, cdip, "!Devi is onlining "
2045 			    "ct = %p\n", (void *)ct));
2046 			MDI_CLIENT_UNLOCK(ct);
2047 			return (MDI_DEVI_ONLINING);
2048 		}
2049 	}
2050 
2051 	/*
2052 	 * Cache in the client list head.  If head of the list is NULL
2053 	 * return MDI_NOPATH
2054 	 */
2055 	head = ct->ct_path_head;
2056 	if (head == NULL) {
2057 		MDI_CLIENT_UNLOCK(ct);
2058 		return (MDI_NOPATH);
2059 	}
2060 
2061 	/* Caller is specifying a specific pathinfo path by path_instance */
2062 	if (path_instance) {
2063 		/* search for pathinfo with correct path_instance */
2064 		for (pip = head;
2065 		    pip && (mdi_pi_get_path_instance(pip) != path_instance);
2066 		    pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link)
2067 			;
2068 
2069 		/* If path can't be selected then MDI_FAILURE is returned. */
2070 		if (pip == NULL) {
2071 			MDI_CLIENT_UNLOCK(ct);
2072 			return (MDI_FAILURE);
2073 		}
2074 
2075 		/* verify state of path */
2076 		MDI_PI_LOCK(pip);
2077 		if (MDI_PI(pip)->pi_state != MDI_PATHINFO_STATE_ONLINE) {
2078 			MDI_PI_UNLOCK(pip);
2079 			MDI_CLIENT_UNLOCK(ct);
2080 			return (MDI_FAILURE);
2081 		}
2082 
2083 		/*
2084 		 * Return the path in hold state. Caller should release the
2085 		 * lock by calling mdi_rele_path()
2086 		 */
2087 		MDI_PI_HOLD(pip);
2088 		MDI_PI_UNLOCK(pip);
2089 		ct->ct_path_last = pip;
2090 		*ret_pip = pip;
2091 		MDI_CLIENT_UNLOCK(ct);
2092 		return (MDI_SUCCESS);
2093 	}
2094 
2095 	/*
2096 	 * for non default behavior, bypass current
2097 	 * load balancing policy and always use LOAD_BALANCE_RR
2098 	 * except that the start point will be adjusted based
2099 	 * on the provided start_pip
2100 	 */
2101 	lbp = sb ? ct->ct_lb : LOAD_BALANCE_RR;
2102 
2103 	switch (lbp) {
2104 	case LOAD_BALANCE_NONE:
2105 		/*
2106 		 * Load balancing is None  or Alternate path mode
2107 		 * Start looking for a online mdi_pathinfo node starting from
2108 		 * last known selected path
2109 		 */
2110 		preferred = 1;
2111 		pip = (mdi_pathinfo_t *)ct->ct_path_last;
2112 		if (pip == NULL) {
2113 			pip = head;
2114 		}
2115 		start = pip;
2116 		do {
2117 			MDI_PI_LOCK(pip);
2118 			/*
2119 			 * No need to explicitly check if the path is disabled.
2120 			 * Since we are checking for state == ONLINE and the
2121 			 * same veriable is used for DISABLE/ENABLE information.
2122 			 */
2123 			if ((MDI_PI(pip)->pi_state  ==
2124 				MDI_PATHINFO_STATE_ONLINE) &&
2125 				preferred == MDI_PI(pip)->pi_preferred) {
2126 				/*
2127 				 * Return the path in hold state. Caller should
2128 				 * release the lock by calling mdi_rele_path()
2129 				 */
2130 				MDI_PI_HOLD(pip);
2131 				MDI_PI_UNLOCK(pip);
2132 				ct->ct_path_last = pip;
2133 				*ret_pip = pip;
2134 				MDI_CLIENT_UNLOCK(ct);
2135 				return (MDI_SUCCESS);
2136 			}
2137 
2138 			/*
2139 			 * Path is busy.
2140 			 */
2141 			if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) ||
2142 			    MDI_PI_IS_TRANSIENT(pip))
2143 				retry = 1;
2144 			/*
2145 			 * Keep looking for a next available online path
2146 			 */
2147 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2148 			if (next == NULL) {
2149 				next = head;
2150 			}
2151 			MDI_PI_UNLOCK(pip);
2152 			pip = next;
2153 			if (start == pip && preferred) {
2154 				preferred = 0;
2155 			} else if (start == pip && !preferred) {
2156 				cont = 0;
2157 			}
2158 		} while (cont);
2159 		break;
2160 
2161 	case LOAD_BALANCE_LBA:
2162 		/*
2163 		 * Make sure we are looking
2164 		 * for an online path. Otherwise, if it is for a STANDBY
2165 		 * path request, it will go through and fetch an ONLINE
2166 		 * path which is not desirable.
2167 		 */
2168 		if ((ct->ct_lb_args != NULL) &&
2169 			    (ct->ct_lb_args->region_size) && bp &&
2170 				(sb || (flags == MDI_SELECT_ONLINE_PATH))) {
2171 			if (i_mdi_lba_lb(ct, ret_pip, bp)
2172 				    == MDI_SUCCESS) {
2173 				MDI_CLIENT_UNLOCK(ct);
2174 				return (MDI_SUCCESS);
2175 			}
2176 		}
2177 		/*  FALLTHROUGH */
2178 	case LOAD_BALANCE_RR:
2179 		/*
2180 		 * Load balancing is Round Robin. Start looking for a online
2181 		 * mdi_pathinfo node starting from last known selected path
2182 		 * as the start point.  If override flags are specified,
2183 		 * process accordingly.
2184 		 * If the search is already in effect(start_pip not null),
2185 		 * then lets just use the same path preference to continue the
2186 		 * traversal.
2187 		 */
2188 
2189 		if (start_pip != NULL) {
2190 			preferred = MDI_PI(start_pip)->pi_preferred;
2191 		} else {
2192 			preferred = 1;
2193 		}
2194 
2195 		start = sb ? (mdi_pathinfo_t *)ct->ct_path_last : start_pip;
2196 		if (start == NULL) {
2197 			pip = head;
2198 		} else {
2199 			pip = (mdi_pathinfo_t *)MDI_PI(start)->pi_client_link;
2200 			if (pip == NULL) {
2201 				if (!sb) {
2202 					if (preferred == 0) {
2203 						/*
2204 						 * Looks like we have completed
2205 						 * the traversal as preferred
2206 						 * value is 0. Time to bail out.
2207 						 */
2208 						*ret_pip = NULL;
2209 						MDI_CLIENT_UNLOCK(ct);
2210 						return (MDI_NOPATH);
2211 					} else {
2212 						/*
2213 						 * Looks like we reached the
2214 						 * end of the list. Lets enable
2215 						 * traversal of non preferred
2216 						 * paths.
2217 						 */
2218 						preferred = 0;
2219 					}
2220 				}
2221 				pip = head;
2222 			}
2223 		}
2224 		start = pip;
2225 		do {
2226 			MDI_PI_LOCK(pip);
2227 			if (sb) {
2228 				cond = ((MDI_PI(pip)->pi_state ==
2229 				    MDI_PATHINFO_STATE_ONLINE &&
2230 					MDI_PI(pip)->pi_preferred ==
2231 						preferred) ? 1 : 0);
2232 			} else {
2233 				if (flags == MDI_SELECT_ONLINE_PATH) {
2234 					cond = ((MDI_PI(pip)->pi_state ==
2235 					    MDI_PATHINFO_STATE_ONLINE &&
2236 						MDI_PI(pip)->pi_preferred ==
2237 						preferred) ? 1 : 0);
2238 				} else if (flags == MDI_SELECT_STANDBY_PATH) {
2239 					cond = ((MDI_PI(pip)->pi_state ==
2240 					    MDI_PATHINFO_STATE_STANDBY &&
2241 						MDI_PI(pip)->pi_preferred ==
2242 						preferred) ? 1 : 0);
2243 				} else if (flags == (MDI_SELECT_ONLINE_PATH |
2244 				    MDI_SELECT_STANDBY_PATH)) {
2245 					cond = (((MDI_PI(pip)->pi_state ==
2246 					    MDI_PATHINFO_STATE_ONLINE ||
2247 					    (MDI_PI(pip)->pi_state ==
2248 					    MDI_PATHINFO_STATE_STANDBY)) &&
2249 						MDI_PI(pip)->pi_preferred ==
2250 						preferred) ? 1 : 0);
2251 				} else if (flags ==
2252 					(MDI_SELECT_STANDBY_PATH |
2253 					MDI_SELECT_ONLINE_PATH |
2254 					MDI_SELECT_USER_DISABLE_PATH)) {
2255 					cond = (((MDI_PI(pip)->pi_state ==
2256 					    MDI_PATHINFO_STATE_ONLINE ||
2257 					    (MDI_PI(pip)->pi_state ==
2258 					    MDI_PATHINFO_STATE_STANDBY) ||
2259 						(MDI_PI(pip)->pi_state ==
2260 					    (MDI_PATHINFO_STATE_ONLINE|
2261 					    MDI_PATHINFO_STATE_USER_DISABLE)) ||
2262 						(MDI_PI(pip)->pi_state ==
2263 					    (MDI_PATHINFO_STATE_STANDBY |
2264 					    MDI_PATHINFO_STATE_USER_DISABLE)))&&
2265 						MDI_PI(pip)->pi_preferred ==
2266 						preferred) ? 1 : 0);
2267 				} else {
2268 					cond = 0;
2269 				}
2270 			}
2271 			/*
2272 			 * No need to explicitly check if the path is disabled.
2273 			 * Since we are checking for state == ONLINE and the
2274 			 * same veriable is used for DISABLE/ENABLE information.
2275 			 */
2276 			if (cond) {
2277 				/*
2278 				 * Return the path in hold state. Caller should
2279 				 * release the lock by calling mdi_rele_path()
2280 				 */
2281 				MDI_PI_HOLD(pip);
2282 				MDI_PI_UNLOCK(pip);
2283 				if (sb)
2284 					ct->ct_path_last = pip;
2285 				*ret_pip = pip;
2286 				MDI_CLIENT_UNLOCK(ct);
2287 				return (MDI_SUCCESS);
2288 			}
2289 			/*
2290 			 * Path is busy.
2291 			 */
2292 			if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) ||
2293 			    MDI_PI_IS_TRANSIENT(pip))
2294 				retry = 1;
2295 
2296 			/*
2297 			 * Keep looking for a next available online path
2298 			 */
2299 do_again:
2300 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2301 			if (next == NULL) {
2302 				if (!sb) {
2303 					if (preferred == 1) {
2304 						/*
2305 						 * Looks like we reached the
2306 						 * end of the list. Lets enable
2307 						 * traversal of non preferred
2308 						 * paths.
2309 						 */
2310 						preferred = 0;
2311 						next = head;
2312 					} else {
2313 						/*
2314 						 * We have done both the passes
2315 						 * Preferred as well as for
2316 						 * Non-preferred. Bail out now.
2317 						 */
2318 						cont = 0;
2319 					}
2320 				} else {
2321 					/*
2322 					 * Standard behavior case.
2323 					 */
2324 					next = head;
2325 				}
2326 			}
2327 			MDI_PI_UNLOCK(pip);
2328 			if (cont == 0) {
2329 				break;
2330 			}
2331 			pip = next;
2332 
2333 			if (!sb) {
2334 				/*
2335 				 * We need to handle the selection of
2336 				 * non-preferred path in the following
2337 				 * case:
2338 				 *
2339 				 * +------+   +------+   +------+   +-----+
2340 				 * | A : 1| - | B : 1| - | C : 0| - |NULL |
2341 				 * +------+   +------+   +------+   +-----+
2342 				 *
2343 				 * If we start the search with B, we need to
2344 				 * skip beyond B to pick C which is non -
2345 				 * preferred in the second pass. The following
2346 				 * test, if true, will allow us to skip over
2347 				 * the 'start'(B in the example) to select
2348 				 * other non preferred elements.
2349 				 */
2350 				if ((start_pip != NULL) && (start_pip == pip) &&
2351 				    (MDI_PI(start_pip)->pi_preferred
2352 				    != preferred)) {
2353 					/*
2354 					 * try again after going past the start
2355 					 * pip
2356 					 */
2357 					MDI_PI_LOCK(pip);
2358 					goto do_again;
2359 				}
2360 			} else {
2361 				/*
2362 				 * Standard behavior case
2363 				 */
2364 				if (start == pip && preferred) {
2365 					/* look for nonpreferred paths */
2366 					preferred = 0;
2367 				} else if (start == pip && !preferred) {
2368 					/*
2369 					 * Exit condition
2370 					 */
2371 					cont = 0;
2372 				}
2373 			}
2374 		} while (cont);
2375 		break;
2376 	}
2377 
2378 	MDI_CLIENT_UNLOCK(ct);
2379 	if (retry == 1) {
2380 		return (MDI_BUSY);
2381 	} else {
2382 		return (MDI_NOPATH);
2383 	}
2384 }
2385 
2386 /*
2387  * For a client, return the next available path to any phci
2388  *
2389  * Note:
2390  *		Caller should hold the branch's devinfo node to get a consistent
2391  *		snap shot of the mdi_pathinfo nodes.
2392  *
2393  *		Please note that even the list is stable the mdi_pathinfo
2394  *		node state and properties are volatile.  The caller should lock
2395  *		and unlock the nodes by calling mdi_pi_lock() and
2396  *		mdi_pi_unlock() functions to get a stable properties.
2397  *
2398  *		If there is a need to use the nodes beyond the hold of the
2399  *		devinfo node period (For ex. I/O), then mdi_pathinfo node
2400  *		need to be held against unexpected removal by calling
2401  *		mdi_hold_path() and should be released by calling
2402  *		mdi_rele_path() on completion.
2403  */
2404 mdi_pathinfo_t *
2405 mdi_get_next_phci_path(dev_info_t *ct_dip, mdi_pathinfo_t *pip)
2406 {
2407 	mdi_client_t *ct;
2408 
2409 	if (!MDI_CLIENT(ct_dip))
2410 		return (NULL);
2411 
2412 	/*
2413 	 * Walk through client link
2414 	 */
2415 	ct = (mdi_client_t *)DEVI(ct_dip)->devi_mdi_client;
2416 	ASSERT(ct != NULL);
2417 
2418 	if (pip == NULL)
2419 		return ((mdi_pathinfo_t *)ct->ct_path_head);
2420 
2421 	return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link);
2422 }
2423 
2424 /*
2425  * For a phci, return the next available path to any client
2426  * Note: ditto mdi_get_next_phci_path()
2427  */
2428 mdi_pathinfo_t *
2429 mdi_get_next_client_path(dev_info_t *ph_dip, mdi_pathinfo_t *pip)
2430 {
2431 	mdi_phci_t *ph;
2432 
2433 	if (!MDI_PHCI(ph_dip))
2434 		return (NULL);
2435 
2436 	/*
2437 	 * Walk through pHCI link
2438 	 */
2439 	ph = (mdi_phci_t *)DEVI(ph_dip)->devi_mdi_xhci;
2440 	ASSERT(ph != NULL);
2441 
2442 	if (pip == NULL)
2443 		return ((mdi_pathinfo_t *)ph->ph_path_head);
2444 
2445 	return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link);
2446 }
2447 
2448 /*
2449  * mdi_hold_path():
2450  *		Hold the mdi_pathinfo node against unwanted unexpected free.
2451  * Return Values:
2452  *		None
2453  */
2454 void
2455 mdi_hold_path(mdi_pathinfo_t *pip)
2456 {
2457 	if (pip) {
2458 		MDI_PI_LOCK(pip);
2459 		MDI_PI_HOLD(pip);
2460 		MDI_PI_UNLOCK(pip);
2461 	}
2462 }
2463 
2464 
2465 /*
2466  * mdi_rele_path():
2467  *		Release the mdi_pathinfo node which was selected
2468  *		through mdi_select_path() mechanism or manually held by
2469  *		calling mdi_hold_path().
2470  * Return Values:
2471  *		None
2472  */
2473 void
2474 mdi_rele_path(mdi_pathinfo_t *pip)
2475 {
2476 	if (pip) {
2477 		MDI_PI_LOCK(pip);
2478 		MDI_PI_RELE(pip);
2479 		if (MDI_PI(pip)->pi_ref_cnt == 0) {
2480 			cv_broadcast(&MDI_PI(pip)->pi_ref_cv);
2481 		}
2482 		MDI_PI_UNLOCK(pip);
2483 	}
2484 }
2485 
2486 /*
2487  * mdi_pi_lock():
2488  * 		Lock the mdi_pathinfo node.
2489  * Note:
2490  *		The caller should release the lock by calling mdi_pi_unlock()
2491  */
2492 void
2493 mdi_pi_lock(mdi_pathinfo_t *pip)
2494 {
2495 	ASSERT(pip != NULL);
2496 	if (pip) {
2497 		MDI_PI_LOCK(pip);
2498 	}
2499 }
2500 
2501 
2502 /*
2503  * mdi_pi_unlock():
2504  * 		Unlock the mdi_pathinfo node.
2505  * Note:
2506  *		The mdi_pathinfo node should have been locked with mdi_pi_lock()
2507  */
2508 void
2509 mdi_pi_unlock(mdi_pathinfo_t *pip)
2510 {
2511 	ASSERT(pip != NULL);
2512 	if (pip) {
2513 		MDI_PI_UNLOCK(pip);
2514 	}
2515 }
2516 
2517 /*
2518  * mdi_pi_find():
2519  *		Search the list of mdi_pathinfo nodes attached to the
2520  *		pHCI/Client device node whose path address matches "paddr".
2521  *		Returns a pointer to the mdi_pathinfo node if a matching node is
2522  *		found.
2523  * Return Values:
2524  *		mdi_pathinfo node handle
2525  *		NULL
2526  * Notes:
2527  *		Caller need not hold any locks to call this function.
2528  */
2529 mdi_pathinfo_t *
2530 mdi_pi_find(dev_info_t *pdip, char *caddr, char *paddr)
2531 {
2532 	mdi_phci_t		*ph;
2533 	mdi_vhci_t		*vh;
2534 	mdi_client_t		*ct;
2535 	mdi_pathinfo_t		*pip = NULL;
2536 
2537 	MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: %s %s",
2538 	    caddr ? caddr : "NULL", paddr ? paddr : "NULL"));
2539 	if ((pdip == NULL) || (paddr == NULL)) {
2540 		return (NULL);
2541 	}
2542 	ph = i_devi_get_phci(pdip);
2543 	if (ph == NULL) {
2544 		/*
2545 		 * Invalid pHCI device, Nothing more to do.
2546 		 */
2547 		MDI_DEBUG(2, (CE_WARN, pdip,
2548 		    "!mdi_pi_find: invalid phci"));
2549 		return (NULL);
2550 	}
2551 
2552 	vh = ph->ph_vhci;
2553 	if (vh == NULL) {
2554 		/*
2555 		 * Invalid vHCI device, Nothing more to do.
2556 		 */
2557 		MDI_DEBUG(2, (CE_WARN, pdip,
2558 		    "!mdi_pi_find: invalid vhci"));
2559 		return (NULL);
2560 	}
2561 
2562 	/*
2563 	 * Look for pathinfo node identified by paddr.
2564 	 */
2565 	if (caddr == NULL) {
2566 		/*
2567 		 * Find a mdi_pathinfo node under pHCI list for a matching
2568 		 * unit address.
2569 		 */
2570 		MDI_PHCI_LOCK(ph);
2571 		if (MDI_PHCI_IS_OFFLINE(ph)) {
2572 			MDI_DEBUG(2, (CE_WARN, pdip,
2573 			    "!mdi_pi_find: offline phci %p", (void *)ph));
2574 			MDI_PHCI_UNLOCK(ph);
2575 			return (NULL);
2576 		}
2577 		pip = (mdi_pathinfo_t *)ph->ph_path_head;
2578 
2579 		while (pip != NULL) {
2580 			if (strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2581 				break;
2582 			}
2583 			pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
2584 		}
2585 		MDI_PHCI_UNLOCK(ph);
2586 		MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: found %p",
2587 		    (void *)pip));
2588 		return (pip);
2589 	}
2590 
2591 	/*
2592 	 * XXX - Is the rest of the code in this function really necessary?
2593 	 * The consumers of mdi_pi_find() can search for the desired pathinfo
2594 	 * node by calling mdi_pi_find(pdip, NULL, paddr). Irrespective of
2595 	 * whether the search is based on the pathinfo nodes attached to
2596 	 * the pHCI or the client node, the result will be the same.
2597 	 */
2598 
2599 	/*
2600 	 * Find the client device corresponding to 'caddr'
2601 	 */
2602 	MDI_VHCI_CLIENT_LOCK(vh);
2603 
2604 	/*
2605 	 * XXX - Passing NULL to the following function works as long as the
2606 	 * the client addresses (caddr) are unique per vhci basis.
2607 	 */
2608 	ct = i_mdi_client_find(vh, NULL, caddr);
2609 	if (ct == NULL) {
2610 		/*
2611 		 * Client not found, Obviously mdi_pathinfo node has not been
2612 		 * created yet.
2613 		 */
2614 		MDI_VHCI_CLIENT_UNLOCK(vh);
2615 		MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: client not "
2616 		    "found for caddr %s", caddr ? caddr : "NULL"));
2617 		return (NULL);
2618 	}
2619 
2620 	/*
2621 	 * Hold the client lock and look for a mdi_pathinfo node with matching
2622 	 * pHCI and paddr
2623 	 */
2624 	MDI_CLIENT_LOCK(ct);
2625 
2626 	/*
2627 	 * Release the global mutex as it is no more needed. Note: We always
2628 	 * respect the locking order while acquiring.
2629 	 */
2630 	MDI_VHCI_CLIENT_UNLOCK(vh);
2631 
2632 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
2633 	while (pip != NULL) {
2634 		/*
2635 		 * Compare the unit address
2636 		 */
2637 		if ((MDI_PI(pip)->pi_phci == ph) &&
2638 		    strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2639 			break;
2640 		}
2641 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2642 	}
2643 	MDI_CLIENT_UNLOCK(ct);
2644 	MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: found:: %p", (void *)pip));
2645 	return (pip);
2646 }
2647 
2648 /*
2649  * mdi_pi_alloc():
2650  *		Allocate and initialize a new instance of a mdi_pathinfo node.
2651  *		The mdi_pathinfo node returned by this function identifies a
2652  *		unique device path is capable of having properties attached
2653  *		and passed to mdi_pi_online() to fully attach and online the
2654  *		path and client device node.
2655  *		The mdi_pathinfo node returned by this function must be
2656  *		destroyed using mdi_pi_free() if the path is no longer
2657  *		operational or if the caller fails to attach a client device
2658  *		node when calling mdi_pi_online(). The framework will not free
2659  *		the resources allocated.
2660  *		This function can be called from both interrupt and kernel
2661  *		contexts.  DDI_NOSLEEP flag should be used while calling
2662  *		from interrupt contexts.
2663  * Return Values:
2664  *		MDI_SUCCESS
2665  *		MDI_FAILURE
2666  *		MDI_NOMEM
2667  */
2668 /*ARGSUSED*/
2669 int
2670 mdi_pi_alloc_compatible(dev_info_t *pdip, char *cname, char *caddr, char *paddr,
2671     char **compatible, int ncompatible, int flags, mdi_pathinfo_t **ret_pip)
2672 {
2673 	mdi_vhci_t	*vh;
2674 	mdi_phci_t	*ph;
2675 	mdi_client_t	*ct;
2676 	mdi_pathinfo_t	*pip = NULL;
2677 	dev_info_t	*cdip;
2678 	int		rv = MDI_NOMEM;
2679 	int		path_allocated = 0;
2680 
2681 	MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_alloc_compatible: %s %s %s",
2682 	    cname ? cname : "NULL", caddr ? caddr : "NULL",
2683 	    paddr ? paddr : "NULL"));
2684 
2685 	if (pdip == NULL || cname == NULL || caddr == NULL || paddr == NULL ||
2686 	    ret_pip == NULL) {
2687 		/* Nothing more to do */
2688 		return (MDI_FAILURE);
2689 	}
2690 
2691 	*ret_pip = NULL;
2692 
2693 	/* No allocations on detaching pHCI */
2694 	if (DEVI_IS_DETACHING(pdip)) {
2695 		/* Invalid pHCI device, return failure */
2696 		MDI_DEBUG(1, (CE_WARN, pdip,
2697 		    "!mdi_pi_alloc: detaching pHCI=%p", (void *)pdip));
2698 		return (MDI_FAILURE);
2699 	}
2700 
2701 	ph = i_devi_get_phci(pdip);
2702 	ASSERT(ph != NULL);
2703 	if (ph == NULL) {
2704 		/* Invalid pHCI device, return failure */
2705 		MDI_DEBUG(1, (CE_WARN, pdip,
2706 		    "!mdi_pi_alloc: invalid pHCI=%p", (void *)pdip));
2707 		return (MDI_FAILURE);
2708 	}
2709 
2710 	MDI_PHCI_LOCK(ph);
2711 	vh = ph->ph_vhci;
2712 	if (vh == NULL) {
2713 		/* Invalid vHCI device, return failure */
2714 		MDI_DEBUG(1, (CE_WARN, pdip,
2715 		    "!mdi_pi_alloc: invalid vHCI=%p", (void *)pdip));
2716 		MDI_PHCI_UNLOCK(ph);
2717 		return (MDI_FAILURE);
2718 	}
2719 
2720 	if (MDI_PHCI_IS_READY(ph) == 0) {
2721 		/*
2722 		 * Do not allow new node creation when pHCI is in
2723 		 * offline/suspended states
2724 		 */
2725 		MDI_DEBUG(1, (CE_WARN, pdip,
2726 		    "mdi_pi_alloc: pHCI=%p is not ready", (void *)ph));
2727 		MDI_PHCI_UNLOCK(ph);
2728 		return (MDI_BUSY);
2729 	}
2730 	MDI_PHCI_UNSTABLE(ph);
2731 	MDI_PHCI_UNLOCK(ph);
2732 
2733 	/* look for a matching client, create one if not found */
2734 	MDI_VHCI_CLIENT_LOCK(vh);
2735 	ct = i_mdi_client_find(vh, cname, caddr);
2736 	if (ct == NULL) {
2737 		ct = i_mdi_client_alloc(vh, cname, caddr);
2738 		ASSERT(ct != NULL);
2739 	}
2740 
2741 	if (ct->ct_dip == NULL) {
2742 		/*
2743 		 * Allocate a devinfo node
2744 		 */
2745 		ct->ct_dip = i_mdi_devinfo_create(vh, cname, caddr,
2746 		    compatible, ncompatible);
2747 		if (ct->ct_dip == NULL) {
2748 			(void) i_mdi_client_free(vh, ct);
2749 			goto fail;
2750 		}
2751 	}
2752 	cdip = ct->ct_dip;
2753 
2754 	DEVI(cdip)->devi_mdi_component |= MDI_COMPONENT_CLIENT;
2755 	DEVI(cdip)->devi_mdi_client = (caddr_t)ct;
2756 
2757 	MDI_CLIENT_LOCK(ct);
2758 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
2759 	while (pip != NULL) {
2760 		/*
2761 		 * Compare the unit address
2762 		 */
2763 		if ((MDI_PI(pip)->pi_phci == ph) &&
2764 		    strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2765 			break;
2766 		}
2767 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2768 	}
2769 	MDI_CLIENT_UNLOCK(ct);
2770 
2771 	if (pip == NULL) {
2772 		/*
2773 		 * This is a new path for this client device.  Allocate and
2774 		 * initialize a new pathinfo node
2775 		 */
2776 		pip = i_mdi_pi_alloc(ph, paddr, ct);
2777 		ASSERT(pip != NULL);
2778 		path_allocated = 1;
2779 	}
2780 	rv = MDI_SUCCESS;
2781 
2782 fail:
2783 	/*
2784 	 * Release the global mutex.
2785 	 */
2786 	MDI_VHCI_CLIENT_UNLOCK(vh);
2787 
2788 	/*
2789 	 * Mark the pHCI as stable
2790 	 */
2791 	MDI_PHCI_LOCK(ph);
2792 	MDI_PHCI_STABLE(ph);
2793 	MDI_PHCI_UNLOCK(ph);
2794 	*ret_pip = pip;
2795 
2796 	MDI_DEBUG(2, (CE_NOTE, pdip,
2797 	    "!mdi_pi_alloc_compatible: alloc %p", (void *)pip));
2798 
2799 	if (path_allocated)
2800 		vhcache_pi_add(vh->vh_config, MDI_PI(pip));
2801 
2802 	return (rv);
2803 }
2804 
2805 /*ARGSUSED*/
2806 int
2807 mdi_pi_alloc(dev_info_t *pdip, char *cname, char *caddr, char *paddr,
2808     int flags, mdi_pathinfo_t **ret_pip)
2809 {
2810 	return (mdi_pi_alloc_compatible(pdip, cname, caddr, paddr, NULL, 0,
2811 	    flags, ret_pip));
2812 }
2813 
2814 /*
2815  * i_mdi_pi_alloc():
2816  *		Allocate a mdi_pathinfo node and add to the pHCI path list
2817  * Return Values:
2818  *		mdi_pathinfo
2819  */
2820 /*ARGSUSED*/
2821 static mdi_pathinfo_t *
2822 i_mdi_pi_alloc(mdi_phci_t *ph, char *paddr, mdi_client_t *ct)
2823 {
2824 	mdi_pathinfo_t	*pip;
2825 	int		ct_circular;
2826 	int		ph_circular;
2827 	static char	path[MAXPATHLEN];
2828 	char		*path_persistent;
2829 	int		path_instance;
2830 	mod_hash_val_t	hv;
2831 
2832 	ASSERT(MDI_VHCI_CLIENT_LOCKED(ph->ph_vhci));
2833 
2834 	pip = kmem_zalloc(sizeof (struct mdi_pathinfo), KM_SLEEP);
2835 	mutex_init(&MDI_PI(pip)->pi_mutex, NULL, MUTEX_DEFAULT, NULL);
2836 	MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_INIT |
2837 	    MDI_PATHINFO_STATE_TRANSIENT;
2838 
2839 	if (MDI_PHCI_IS_USER_DISABLED(ph))
2840 		MDI_PI_SET_USER_DISABLE(pip);
2841 
2842 	if (MDI_PHCI_IS_DRV_DISABLED_TRANSIENT(ph))
2843 		MDI_PI_SET_DRV_DISABLE_TRANS(pip);
2844 
2845 	if (MDI_PHCI_IS_DRV_DISABLED(ph))
2846 		MDI_PI_SET_DRV_DISABLE(pip);
2847 
2848 	MDI_PI(pip)->pi_old_state = MDI_PATHINFO_STATE_INIT;
2849 	cv_init(&MDI_PI(pip)->pi_state_cv, NULL, CV_DEFAULT, NULL);
2850 	MDI_PI(pip)->pi_client = ct;
2851 	MDI_PI(pip)->pi_phci = ph;
2852 	MDI_PI(pip)->pi_addr = kmem_alloc(strlen(paddr) + 1, KM_SLEEP);
2853 	(void) strcpy(MDI_PI(pip)->pi_addr, paddr);
2854 
2855         /*
2856 	 * We form the "path" to the pathinfo node, and see if we have
2857 	 * already allocated a 'path_instance' for that "path".  If so,
2858 	 * we use the already allocated 'path_instance'.  If not, we
2859 	 * allocate a new 'path_instance' and associate it with a copy of
2860 	 * the "path" string (which is never freed). The association
2861 	 * between a 'path_instance' this "path" string persists until
2862 	 * reboot.
2863 	 */
2864         mutex_enter(&mdi_pathmap_mutex);
2865 	(void) ddi_pathname(ph->ph_dip, path);
2866 	(void) sprintf(path + strlen(path), "/%s@%s",
2867 	    ddi_node_name(ct->ct_dip), MDI_PI(pip)->pi_addr);
2868         if (mod_hash_find(mdi_pathmap_bypath, (mod_hash_key_t)path, &hv) == 0) {
2869                 path_instance = (uint_t)(intptr_t)hv;
2870         } else {
2871 		/* allocate a new 'path_instance' and persistent "path" */
2872 		path_instance = mdi_pathmap_instance++;
2873 		path_persistent = i_ddi_strdup(path, KM_SLEEP);
2874                 (void) mod_hash_insert(mdi_pathmap_bypath,
2875                     (mod_hash_key_t)path_persistent,
2876                     (mod_hash_val_t)(intptr_t)path_instance);
2877 		(void) mod_hash_insert(mdi_pathmap_byinstance,
2878 		    (mod_hash_key_t)(intptr_t)path_instance,
2879 		    (mod_hash_val_t)path_persistent);
2880         }
2881         mutex_exit(&mdi_pathmap_mutex);
2882 	MDI_PI(pip)->pi_path_instance = path_instance;
2883 
2884 	(void) nvlist_alloc(&MDI_PI(pip)->pi_prop, NV_UNIQUE_NAME, KM_SLEEP);
2885 	ASSERT(MDI_PI(pip)->pi_prop != NULL);
2886 	MDI_PI(pip)->pi_pprivate = NULL;
2887 	MDI_PI(pip)->pi_cprivate = NULL;
2888 	MDI_PI(pip)->pi_vprivate = NULL;
2889 	MDI_PI(pip)->pi_client_link = NULL;
2890 	MDI_PI(pip)->pi_phci_link = NULL;
2891 	MDI_PI(pip)->pi_ref_cnt = 0;
2892 	MDI_PI(pip)->pi_kstats = NULL;
2893 	MDI_PI(pip)->pi_preferred = 1;
2894 	cv_init(&MDI_PI(pip)->pi_ref_cv, NULL, CV_DEFAULT, NULL);
2895 
2896 	/*
2897 	 * Lock both dev_info nodes against changes in parallel.
2898 	 *
2899 	 * The ndi_devi_enter(Client), is atypical since the client is a leaf.
2900 	 * This atypical operation is done to synchronize pathinfo nodes
2901 	 * during devinfo snapshot (see di_register_pip) by 'pretending' that
2902 	 * the pathinfo nodes are children of the Client.
2903 	 */
2904 	ndi_devi_enter(ct->ct_dip, &ct_circular);
2905 	ndi_devi_enter(ph->ph_dip, &ph_circular);
2906 
2907 	i_mdi_phci_add_path(ph, pip);
2908 	i_mdi_client_add_path(ct, pip);
2909 
2910 	ndi_devi_exit(ph->ph_dip, ph_circular);
2911 	ndi_devi_exit(ct->ct_dip, ct_circular);
2912 
2913 	return (pip);
2914 }
2915 
2916 /*
2917  * mdi_pi_pathname_by_instance():
2918  *	Lookup of "path" by 'path_instance'. Return "path".
2919  *	NOTE: returned "path" remains valid forever (until reboot).
2920  */
2921 char *
2922 mdi_pi_pathname_by_instance(int path_instance)
2923 {
2924 	char		*path;
2925 	mod_hash_val_t	hv;
2926 
2927 	/* mdi_pathmap lookup of "path" by 'path_instance' */
2928 	mutex_enter(&mdi_pathmap_mutex);
2929 	if (mod_hash_find(mdi_pathmap_byinstance,
2930 	    (mod_hash_key_t)(intptr_t)path_instance, &hv) == 0)
2931 		path = (char *)hv;
2932 	else
2933 		path = NULL;
2934 	mutex_exit(&mdi_pathmap_mutex);
2935 	return (path);
2936 }
2937 
2938 /*
2939  * i_mdi_phci_add_path():
2940  * 		Add a mdi_pathinfo node to pHCI list.
2941  * Notes:
2942  *		Caller should per-pHCI mutex
2943  */
2944 static void
2945 i_mdi_phci_add_path(mdi_phci_t *ph, mdi_pathinfo_t *pip)
2946 {
2947 	ASSERT(DEVI_BUSY_OWNED(ph->ph_dip));
2948 
2949 	MDI_PHCI_LOCK(ph);
2950 	if (ph->ph_path_head == NULL) {
2951 		ph->ph_path_head = pip;
2952 	} else {
2953 		MDI_PI(ph->ph_path_tail)->pi_phci_link = MDI_PI(pip);
2954 	}
2955 	ph->ph_path_tail = pip;
2956 	ph->ph_path_count++;
2957 	MDI_PHCI_UNLOCK(ph);
2958 }
2959 
2960 /*
2961  * i_mdi_client_add_path():
2962  *		Add mdi_pathinfo node to client list
2963  */
2964 static void
2965 i_mdi_client_add_path(mdi_client_t *ct, mdi_pathinfo_t *pip)
2966 {
2967 	ASSERT(DEVI_BUSY_OWNED(ct->ct_dip));
2968 
2969 	MDI_CLIENT_LOCK(ct);
2970 	if (ct->ct_path_head == NULL) {
2971 		ct->ct_path_head = pip;
2972 	} else {
2973 		MDI_PI(ct->ct_path_tail)->pi_client_link = MDI_PI(pip);
2974 	}
2975 	ct->ct_path_tail = pip;
2976 	ct->ct_path_count++;
2977 	MDI_CLIENT_UNLOCK(ct);
2978 }
2979 
2980 /*
2981  * mdi_pi_free():
2982  *		Free the mdi_pathinfo node and also client device node if this
2983  *		is the last path to the device
2984  * Return Values:
2985  *		MDI_SUCCESS
2986  *		MDI_FAILURE
2987  *		MDI_BUSY
2988  */
2989 /*ARGSUSED*/
2990 int
2991 mdi_pi_free(mdi_pathinfo_t *pip, int flags)
2992 {
2993 	int		rv = MDI_FAILURE;
2994 	mdi_vhci_t	*vh;
2995 	mdi_phci_t	*ph;
2996 	mdi_client_t	*ct;
2997 	int		(*f)();
2998 	int		client_held = 0;
2999 
3000 	MDI_PI_LOCK(pip);
3001 	ph = MDI_PI(pip)->pi_phci;
3002 	ASSERT(ph != NULL);
3003 	if (ph == NULL) {
3004 		/*
3005 		 * Invalid pHCI device, return failure
3006 		 */
3007 		MDI_DEBUG(1, (CE_WARN, NULL,
3008 		    "!mdi_pi_free: invalid pHCI pip=%p", (void *)pip));
3009 		MDI_PI_UNLOCK(pip);
3010 		return (MDI_FAILURE);
3011 	}
3012 
3013 	vh = ph->ph_vhci;
3014 	ASSERT(vh != NULL);
3015 	if (vh == NULL) {
3016 		/* Invalid pHCI device, return failure */
3017 		MDI_DEBUG(1, (CE_WARN, NULL,
3018 		    "!mdi_pi_free: invalid vHCI pip=%p", (void *)pip));
3019 		MDI_PI_UNLOCK(pip);
3020 		return (MDI_FAILURE);
3021 	}
3022 
3023 	ct = MDI_PI(pip)->pi_client;
3024 	ASSERT(ct != NULL);
3025 	if (ct == NULL) {
3026 		/*
3027 		 * Invalid Client device, return failure
3028 		 */
3029 		MDI_DEBUG(1, (CE_WARN, NULL,
3030 		    "!mdi_pi_free: invalid client pip=%p", (void *)pip));
3031 		MDI_PI_UNLOCK(pip);
3032 		return (MDI_FAILURE);
3033 	}
3034 
3035 	/*
3036 	 * Check to see for busy condition.  A mdi_pathinfo can only be freed
3037 	 * if the node state is either offline or init and the reference count
3038 	 * is zero.
3039 	 */
3040 	if (!(MDI_PI_IS_OFFLINE(pip) || MDI_PI_IS_INIT(pip) ||
3041 	    MDI_PI_IS_INITING(pip))) {
3042 		/*
3043 		 * Node is busy
3044 		 */
3045 		MDI_DEBUG(1, (CE_WARN, ct->ct_dip,
3046 		    "!mdi_pi_free: pathinfo node is busy pip=%p", (void *)pip));
3047 		MDI_PI_UNLOCK(pip);
3048 		return (MDI_BUSY);
3049 	}
3050 
3051 	while (MDI_PI(pip)->pi_ref_cnt != 0) {
3052 		/*
3053 		 * Give a chance for pending I/Os to complete.
3054 		 */
3055 		MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!mdi_pi_free: "
3056 		    "%d cmds still pending on path: %p\n",
3057 		    MDI_PI(pip)->pi_ref_cnt, (void *)pip));
3058 		if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv,
3059 		    &MDI_PI(pip)->pi_mutex,
3060 		    ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) {
3061 			/*
3062 			 * The timeout time reached without ref_cnt being zero
3063 			 * being signaled.
3064 			 */
3065 			MDI_DEBUG(1, (CE_NOTE, ct->ct_dip,
3066 			    "!mdi_pi_free: "
3067 			    "Timeout reached on path %p without the cond\n",
3068 			    (void *)pip));
3069 			MDI_DEBUG(1, (CE_NOTE, ct->ct_dip,
3070 			    "!mdi_pi_free: "
3071 			    "%d cmds still pending on path: %p\n",
3072 			    MDI_PI(pip)->pi_ref_cnt, (void *)pip));
3073 			MDI_PI_UNLOCK(pip);
3074 			return (MDI_BUSY);
3075 		}
3076 	}
3077 	if (MDI_PI(pip)->pi_pm_held) {
3078 		client_held = 1;
3079 	}
3080 	MDI_PI_UNLOCK(pip);
3081 
3082 	vhcache_pi_remove(vh->vh_config, MDI_PI(pip));
3083 
3084 	MDI_CLIENT_LOCK(ct);
3085 
3086 	/* Prevent further failovers till MDI_VHCI_CLIENT_LOCK is held */
3087 	MDI_CLIENT_SET_PATH_FREE_IN_PROGRESS(ct);
3088 
3089 	/*
3090 	 * Wait till failover is complete before removing this node.
3091 	 */
3092 	while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct))
3093 		cv_wait(&ct->ct_failover_cv, &ct->ct_mutex);
3094 
3095 	MDI_CLIENT_UNLOCK(ct);
3096 	MDI_VHCI_CLIENT_LOCK(vh);
3097 	MDI_CLIENT_LOCK(ct);
3098 	MDI_CLIENT_CLEAR_PATH_FREE_IN_PROGRESS(ct);
3099 
3100 	if (!MDI_PI_IS_INITING(pip)) {
3101 		f = vh->vh_ops->vo_pi_uninit;
3102 		if (f != NULL) {
3103 			rv = (*f)(vh->vh_dip, pip, 0);
3104 		}
3105 	}
3106 	/*
3107 	 * If vo_pi_uninit() completed successfully.
3108 	 */
3109 	if (rv == MDI_SUCCESS) {
3110 		if (client_held) {
3111 			MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_free "
3112 			    "i_mdi_pm_rele_client\n"));
3113 			i_mdi_pm_rele_client(ct, 1);
3114 		}
3115 		i_mdi_pi_free(ph, pip, ct);
3116 		if (ct->ct_path_count == 0) {
3117 			/*
3118 			 * Client lost its last path.
3119 			 * Clean up the client device
3120 			 */
3121 			MDI_CLIENT_UNLOCK(ct);
3122 			(void) i_mdi_client_free(ct->ct_vhci, ct);
3123 			MDI_VHCI_CLIENT_UNLOCK(vh);
3124 			return (rv);
3125 		}
3126 	}
3127 	MDI_CLIENT_UNLOCK(ct);
3128 	MDI_VHCI_CLIENT_UNLOCK(vh);
3129 
3130 	if (rv == MDI_FAILURE)
3131 		vhcache_pi_add(vh->vh_config, MDI_PI(pip));
3132 
3133 	return (rv);
3134 }
3135 
3136 /*
3137  * i_mdi_pi_free():
3138  *		Free the mdi_pathinfo node
3139  */
3140 static void
3141 i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *pip, mdi_client_t *ct)
3142 {
3143 	int	ct_circular;
3144 	int	ph_circular;
3145 
3146 	ASSERT(MDI_CLIENT_LOCKED(ct));
3147 
3148 	/*
3149 	 * remove any per-path kstats
3150 	 */
3151 	i_mdi_pi_kstat_destroy(pip);
3152 
3153 	/* See comments in i_mdi_pi_alloc() */
3154 	ndi_devi_enter(ct->ct_dip, &ct_circular);
3155 	ndi_devi_enter(ph->ph_dip, &ph_circular);
3156 
3157 	i_mdi_client_remove_path(ct, pip);
3158 	i_mdi_phci_remove_path(ph, pip);
3159 
3160 	ndi_devi_exit(ph->ph_dip, ph_circular);
3161 	ndi_devi_exit(ct->ct_dip, ct_circular);
3162 
3163 	mutex_destroy(&MDI_PI(pip)->pi_mutex);
3164 	cv_destroy(&MDI_PI(pip)->pi_state_cv);
3165 	cv_destroy(&MDI_PI(pip)->pi_ref_cv);
3166 	if (MDI_PI(pip)->pi_addr) {
3167 		kmem_free(MDI_PI(pip)->pi_addr,
3168 		    strlen(MDI_PI(pip)->pi_addr) + 1);
3169 		MDI_PI(pip)->pi_addr = NULL;
3170 	}
3171 
3172 	if (MDI_PI(pip)->pi_prop) {
3173 		(void) nvlist_free(MDI_PI(pip)->pi_prop);
3174 		MDI_PI(pip)->pi_prop = NULL;
3175 	}
3176 	kmem_free(pip, sizeof (struct mdi_pathinfo));
3177 }
3178 
3179 
3180 /*
3181  * i_mdi_phci_remove_path():
3182  * 		Remove a mdi_pathinfo node from pHCI list.
3183  * Notes:
3184  *		Caller should hold per-pHCI mutex
3185  */
3186 static void
3187 i_mdi_phci_remove_path(mdi_phci_t *ph, mdi_pathinfo_t *pip)
3188 {
3189 	mdi_pathinfo_t	*prev = NULL;
3190 	mdi_pathinfo_t	*path = NULL;
3191 
3192 	ASSERT(DEVI_BUSY_OWNED(ph->ph_dip));
3193 
3194 	MDI_PHCI_LOCK(ph);
3195 	path = ph->ph_path_head;
3196 	while (path != NULL) {
3197 		if (path == pip) {
3198 			break;
3199 		}
3200 		prev = path;
3201 		path = (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link;
3202 	}
3203 
3204 	if (path) {
3205 		ph->ph_path_count--;
3206 		if (prev) {
3207 			MDI_PI(prev)->pi_phci_link = MDI_PI(path)->pi_phci_link;
3208 		} else {
3209 			ph->ph_path_head =
3210 			    (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link;
3211 		}
3212 		if (ph->ph_path_tail == path) {
3213 			ph->ph_path_tail = prev;
3214 		}
3215 	}
3216 
3217 	/*
3218 	 * Clear the pHCI link
3219 	 */
3220 	MDI_PI(pip)->pi_phci_link = NULL;
3221 	MDI_PI(pip)->pi_phci = NULL;
3222 	MDI_PHCI_UNLOCK(ph);
3223 }
3224 
3225 /*
3226  * i_mdi_client_remove_path():
3227  * 		Remove a mdi_pathinfo node from client path list.
3228  */
3229 static void
3230 i_mdi_client_remove_path(mdi_client_t *ct, mdi_pathinfo_t *pip)
3231 {
3232 	mdi_pathinfo_t	*prev = NULL;
3233 	mdi_pathinfo_t	*path;
3234 
3235 	ASSERT(DEVI_BUSY_OWNED(ct->ct_dip));
3236 
3237 	ASSERT(MDI_CLIENT_LOCKED(ct));
3238 	path = ct->ct_path_head;
3239 	while (path != NULL) {
3240 		if (path == pip) {
3241 			break;
3242 		}
3243 		prev = path;
3244 		path = (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link;
3245 	}
3246 
3247 	if (path) {
3248 		ct->ct_path_count--;
3249 		if (prev) {
3250 			MDI_PI(prev)->pi_client_link =
3251 			    MDI_PI(path)->pi_client_link;
3252 		} else {
3253 			ct->ct_path_head =
3254 			    (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link;
3255 		}
3256 		if (ct->ct_path_tail == path) {
3257 			ct->ct_path_tail = prev;
3258 		}
3259 		if (ct->ct_path_last == path) {
3260 			ct->ct_path_last = ct->ct_path_head;
3261 		}
3262 	}
3263 	MDI_PI(pip)->pi_client_link = NULL;
3264 	MDI_PI(pip)->pi_client = NULL;
3265 }
3266 
3267 /*
3268  * i_mdi_pi_state_change():
3269  *		online a mdi_pathinfo node
3270  *
3271  * Return Values:
3272  *		MDI_SUCCESS
3273  *		MDI_FAILURE
3274  */
3275 /*ARGSUSED*/
3276 static int
3277 i_mdi_pi_state_change(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state, int flag)
3278 {
3279 	int		rv = MDI_SUCCESS;
3280 	mdi_vhci_t	*vh;
3281 	mdi_phci_t	*ph;
3282 	mdi_client_t	*ct;
3283 	int		(*f)();
3284 	dev_info_t	*cdip;
3285 
3286 	MDI_PI_LOCK(pip);
3287 
3288 	ph = MDI_PI(pip)->pi_phci;
3289 	ASSERT(ph);
3290 	if (ph == NULL) {
3291 		/*
3292 		 * Invalid pHCI device, fail the request
3293 		 */
3294 		MDI_PI_UNLOCK(pip);
3295 		MDI_DEBUG(1, (CE_WARN, NULL,
3296 		    "!mdi_pi_state_change: invalid phci pip=%p", (void *)pip));
3297 		return (MDI_FAILURE);
3298 	}
3299 
3300 	vh = ph->ph_vhci;
3301 	ASSERT(vh);
3302 	if (vh == NULL) {
3303 		/*
3304 		 * Invalid vHCI device, fail the request
3305 		 */
3306 		MDI_PI_UNLOCK(pip);
3307 		MDI_DEBUG(1, (CE_WARN, NULL,
3308 		    "!mdi_pi_state_change: invalid vhci pip=%p", (void *)pip));
3309 		return (MDI_FAILURE);
3310 	}
3311 
3312 	ct = MDI_PI(pip)->pi_client;
3313 	ASSERT(ct != NULL);
3314 	if (ct == NULL) {
3315 		/*
3316 		 * Invalid client device, fail the request
3317 		 */
3318 		MDI_PI_UNLOCK(pip);
3319 		MDI_DEBUG(1, (CE_WARN, NULL,
3320 		    "!mdi_pi_state_change: invalid client pip=%p",
3321 		    (void *)pip));
3322 		return (MDI_FAILURE);
3323 	}
3324 
3325 	/*
3326 	 * If this path has not been initialized yet, Callback vHCI driver's
3327 	 * pathinfo node initialize entry point
3328 	 */
3329 
3330 	if (MDI_PI_IS_INITING(pip)) {
3331 		MDI_PI_UNLOCK(pip);
3332 		f = vh->vh_ops->vo_pi_init;
3333 		if (f != NULL) {
3334 			rv = (*f)(vh->vh_dip, pip, 0);
3335 			if (rv != MDI_SUCCESS) {
3336 				MDI_DEBUG(1, (CE_WARN, ct->ct_dip,
3337 				    "!vo_pi_init: failed vHCI=0x%p, pip=0x%p",
3338 				    (void *)vh, (void *)pip));
3339 				return (MDI_FAILURE);
3340 			}
3341 		}
3342 		MDI_PI_LOCK(pip);
3343 		MDI_PI_CLEAR_TRANSIENT(pip);
3344 	}
3345 
3346 	/*
3347 	 * Do not allow state transition when pHCI is in offline/suspended
3348 	 * states
3349 	 */
3350 	i_mdi_phci_lock(ph, pip);
3351 	if (MDI_PHCI_IS_READY(ph) == 0) {
3352 		MDI_DEBUG(1, (CE_WARN, ct->ct_dip,
3353 		    "!mdi_pi_state_change: pHCI not ready, pHCI=%p",
3354 		    (void *)ph));
3355 		MDI_PI_UNLOCK(pip);
3356 		i_mdi_phci_unlock(ph);
3357 		return (MDI_BUSY);
3358 	}
3359 	MDI_PHCI_UNSTABLE(ph);
3360 	i_mdi_phci_unlock(ph);
3361 
3362 	/*
3363 	 * Check if mdi_pathinfo state is in transient state.
3364 	 * If yes, offlining is in progress and wait till transient state is
3365 	 * cleared.
3366 	 */
3367 	if (MDI_PI_IS_TRANSIENT(pip)) {
3368 		while (MDI_PI_IS_TRANSIENT(pip)) {
3369 			cv_wait(&MDI_PI(pip)->pi_state_cv,
3370 			    &MDI_PI(pip)->pi_mutex);
3371 		}
3372 	}
3373 
3374 	/*
3375 	 * Grab the client lock in reverse order sequence and release the
3376 	 * mdi_pathinfo mutex.
3377 	 */
3378 	i_mdi_client_lock(ct, pip);
3379 	MDI_PI_UNLOCK(pip);
3380 
3381 	/*
3382 	 * Wait till failover state is cleared
3383 	 */
3384 	while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct))
3385 		cv_wait(&ct->ct_failover_cv, &ct->ct_mutex);
3386 
3387 	/*
3388 	 * Mark the mdi_pathinfo node state as transient
3389 	 */
3390 	MDI_PI_LOCK(pip);
3391 	switch (state) {
3392 	case MDI_PATHINFO_STATE_ONLINE:
3393 		MDI_PI_SET_ONLINING(pip);
3394 		break;
3395 
3396 	case MDI_PATHINFO_STATE_STANDBY:
3397 		MDI_PI_SET_STANDBYING(pip);
3398 		break;
3399 
3400 	case MDI_PATHINFO_STATE_FAULT:
3401 		/*
3402 		 * Mark the pathinfo state as FAULTED
3403 		 */
3404 		MDI_PI_SET_FAULTING(pip);
3405 		MDI_PI_ERRSTAT(pip, MDI_PI_HARDERR);
3406 		break;
3407 
3408 	case MDI_PATHINFO_STATE_OFFLINE:
3409 		/*
3410 		 * ndi_devi_offline() cannot hold pip or ct locks.
3411 		 */
3412 		MDI_PI_UNLOCK(pip);
3413 		/*
3414 		 * Don't offline the client dev_info node unless we have
3415 		 * no available paths left at all.
3416 		 */
3417 		cdip = ct->ct_dip;
3418 		if ((flag & NDI_DEVI_REMOVE) &&
3419 		    (ct->ct_path_count == 1)) {
3420 			i_mdi_client_unlock(ct);
3421 			rv = ndi_devi_offline(cdip, 0);
3422 			if (rv != NDI_SUCCESS) {
3423 				/*
3424 				 * Convert to MDI error code
3425 				 */
3426 				switch (rv) {
3427 				case NDI_BUSY:
3428 					rv = MDI_BUSY;
3429 					break;
3430 				default:
3431 					rv = MDI_FAILURE;
3432 					break;
3433 				}
3434 				goto state_change_exit;
3435 			} else {
3436 				i_mdi_client_lock(ct, NULL);
3437 			}
3438 		}
3439 		/*
3440 		 * Mark the mdi_pathinfo node state as transient
3441 		 */
3442 		MDI_PI_LOCK(pip);
3443 		MDI_PI_SET_OFFLINING(pip);
3444 		break;
3445 	}
3446 	MDI_PI_UNLOCK(pip);
3447 	MDI_CLIENT_UNSTABLE(ct);
3448 	i_mdi_client_unlock(ct);
3449 
3450 	f = vh->vh_ops->vo_pi_state_change;
3451 	if (f != NULL)
3452 		rv = (*f)(vh->vh_dip, pip, state, 0, flag);
3453 
3454 	MDI_CLIENT_LOCK(ct);
3455 	MDI_PI_LOCK(pip);
3456 	if (rv == MDI_NOT_SUPPORTED) {
3457 		MDI_CLIENT_SET_DEV_NOT_SUPPORTED(ct);
3458 	}
3459 	if (rv != MDI_SUCCESS) {
3460 		MDI_DEBUG(2, (CE_WARN, ct->ct_dip,
3461 		    "!vo_pi_state_change: failed rv = %x", rv));
3462 	}
3463 	if (MDI_PI_IS_TRANSIENT(pip)) {
3464 		if (rv == MDI_SUCCESS) {
3465 			MDI_PI_CLEAR_TRANSIENT(pip);
3466 		} else {
3467 			MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip);
3468 		}
3469 	}
3470 
3471 	/*
3472 	 * Wake anyone waiting for this mdi_pathinfo node
3473 	 */
3474 	cv_broadcast(&MDI_PI(pip)->pi_state_cv);
3475 	MDI_PI_UNLOCK(pip);
3476 
3477 	/*
3478 	 * Mark the client device as stable
3479 	 */
3480 	MDI_CLIENT_STABLE(ct);
3481 	if (rv == MDI_SUCCESS) {
3482 		if (ct->ct_unstable == 0) {
3483 			cdip = ct->ct_dip;
3484 
3485 			/*
3486 			 * Onlining the mdi_pathinfo node will impact the
3487 			 * client state Update the client and dev_info node
3488 			 * state accordingly
3489 			 */
3490 			rv = NDI_SUCCESS;
3491 			i_mdi_client_update_state(ct);
3492 			switch (MDI_CLIENT_STATE(ct)) {
3493 			case MDI_CLIENT_STATE_OPTIMAL:
3494 			case MDI_CLIENT_STATE_DEGRADED:
3495 				if (cdip && !i_ddi_devi_attached(cdip) &&
3496 				    ((state == MDI_PATHINFO_STATE_ONLINE) ||
3497 				    (state == MDI_PATHINFO_STATE_STANDBY))) {
3498 
3499 					/*
3500 					 * Must do ndi_devi_online() through
3501 					 * hotplug thread for deferred
3502 					 * attach mechanism to work
3503 					 */
3504 					MDI_CLIENT_UNLOCK(ct);
3505 					rv = ndi_devi_online(cdip, 0);
3506 					MDI_CLIENT_LOCK(ct);
3507 					if ((rv != NDI_SUCCESS) &&
3508 					    (MDI_CLIENT_STATE(ct) ==
3509 					    MDI_CLIENT_STATE_DEGRADED)) {
3510 						/*
3511 						 * ndi_devi_online failed.
3512 						 * Reset client flags to
3513 						 * offline.
3514 						 */
3515 						MDI_DEBUG(1, (CE_WARN, cdip,
3516 						    "!ndi_devi_online: failed "
3517 						    " Error: %x", rv));
3518 						MDI_CLIENT_SET_OFFLINE(ct);
3519 					}
3520 					if (rv != NDI_SUCCESS) {
3521 						/* Reset the path state */
3522 						MDI_PI_LOCK(pip);
3523 						MDI_PI(pip)->pi_state =
3524 						    MDI_PI_OLD_STATE(pip);
3525 						MDI_PI_UNLOCK(pip);
3526 					}
3527 				}
3528 				break;
3529 
3530 			case MDI_CLIENT_STATE_FAILED:
3531 				/*
3532 				 * This is the last path case for
3533 				 * non-user initiated events.
3534 				 */
3535 				if (((flag & NDI_DEVI_REMOVE) == 0) &&
3536 				    cdip && (i_ddi_node_state(cdip) >=
3537 				    DS_INITIALIZED)) {
3538 					MDI_CLIENT_UNLOCK(ct);
3539 					rv = ndi_devi_offline(cdip, 0);
3540 					MDI_CLIENT_LOCK(ct);
3541 
3542 					if (rv != NDI_SUCCESS) {
3543 						/*
3544 						 * ndi_devi_offline failed.
3545 						 * Reset client flags to
3546 						 * online as the path could not
3547 						 * be offlined.
3548 						 */
3549 						MDI_DEBUG(1, (CE_WARN, cdip,
3550 						    "!ndi_devi_offline: failed "
3551 						    " Error: %x", rv));
3552 						MDI_CLIENT_SET_ONLINE(ct);
3553 					}
3554 				}
3555 				break;
3556 			}
3557 			/*
3558 			 * Convert to MDI error code
3559 			 */
3560 			switch (rv) {
3561 			case NDI_SUCCESS:
3562 				MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct);
3563 				i_mdi_report_path_state(ct, pip);
3564 				rv = MDI_SUCCESS;
3565 				break;
3566 			case NDI_BUSY:
3567 				rv = MDI_BUSY;
3568 				break;
3569 			default:
3570 				rv = MDI_FAILURE;
3571 				break;
3572 			}
3573 		}
3574 	}
3575 	MDI_CLIENT_UNLOCK(ct);
3576 
3577 state_change_exit:
3578 	/*
3579 	 * Mark the pHCI as stable again.
3580 	 */
3581 	MDI_PHCI_LOCK(ph);
3582 	MDI_PHCI_STABLE(ph);
3583 	MDI_PHCI_UNLOCK(ph);
3584 	return (rv);
3585 }
3586 
3587 /*
3588  * mdi_pi_online():
3589  *		Place the path_info node in the online state.  The path is
3590  *		now available to be selected by mdi_select_path() for
3591  *		transporting I/O requests to client devices.
3592  * Return Values:
3593  *		MDI_SUCCESS
3594  *		MDI_FAILURE
3595  */
3596 int
3597 mdi_pi_online(mdi_pathinfo_t *pip, int flags)
3598 {
3599 	mdi_client_t	*ct = MDI_PI(pip)->pi_client;
3600 	int		client_held = 0;
3601 	int		rv;
3602 	int		se_flag;
3603 	int		kmem_flag;
3604 
3605 	ASSERT(ct != NULL);
3606 	rv = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_ONLINE, flags);
3607 	if (rv != MDI_SUCCESS)
3608 		return (rv);
3609 
3610 	MDI_PI_LOCK(pip);
3611 	if (MDI_PI(pip)->pi_pm_held == 0) {
3612 		MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_online "
3613 		    "i_mdi_pm_hold_pip %p\n", (void *)pip));
3614 		i_mdi_pm_hold_pip(pip);
3615 		client_held = 1;
3616 	}
3617 	MDI_PI_UNLOCK(pip);
3618 
3619 	if (client_held) {
3620 		MDI_CLIENT_LOCK(ct);
3621 		if (ct->ct_power_cnt == 0) {
3622 			rv = i_mdi_power_all_phci(ct);
3623 		}
3624 
3625 		MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_online "
3626 		    "i_mdi_pm_hold_client %p\n", (void *)ct));
3627 		i_mdi_pm_hold_client(ct, 1);
3628 		MDI_CLIENT_UNLOCK(ct);
3629 	}
3630 
3631 	/* determine interrupt context */
3632 	se_flag = (servicing_interrupt()) ? SE_NOSLEEP : SE_SLEEP;
3633 	kmem_flag = (se_flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP;
3634 
3635 	/* A new path is online.  Invalidate DINFOCACHE snap shot. */
3636 	i_ddi_di_cache_invalidate(kmem_flag);
3637 
3638 	return (rv);
3639 }
3640 
3641 /*
3642  * mdi_pi_standby():
3643  *		Place the mdi_pathinfo node in standby state
3644  *
3645  * Return Values:
3646  *		MDI_SUCCESS
3647  *		MDI_FAILURE
3648  */
3649 int
3650 mdi_pi_standby(mdi_pathinfo_t *pip, int flags)
3651 {
3652 	return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_STANDBY, flags));
3653 }
3654 
3655 /*
3656  * mdi_pi_fault():
3657  *		Place the mdi_pathinfo node in fault'ed state
3658  * Return Values:
3659  *		MDI_SUCCESS
3660  *		MDI_FAILURE
3661  */
3662 int
3663 mdi_pi_fault(mdi_pathinfo_t *pip, int flags)
3664 {
3665 	return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_FAULT, flags));
3666 }
3667 
3668 /*
3669  * mdi_pi_offline():
3670  *		Offline a mdi_pathinfo node.
3671  * Return Values:
3672  *		MDI_SUCCESS
3673  *		MDI_FAILURE
3674  */
3675 int
3676 mdi_pi_offline(mdi_pathinfo_t *pip, int flags)
3677 {
3678 	int	ret, client_held = 0;
3679 	mdi_client_t	*ct;
3680 	int		se_flag;
3681 	int		kmem_flag;
3682 
3683 	ret = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_OFFLINE, flags);
3684 
3685 	if (ret == MDI_SUCCESS) {
3686 		MDI_PI_LOCK(pip);
3687 		if (MDI_PI(pip)->pi_pm_held) {
3688 			client_held = 1;
3689 		}
3690 		MDI_PI_UNLOCK(pip);
3691 
3692 		if (client_held) {
3693 			ct = MDI_PI(pip)->pi_client;
3694 			MDI_CLIENT_LOCK(ct);
3695 			MDI_DEBUG(4, (CE_NOTE, ct->ct_dip,
3696 			    "mdi_pi_offline i_mdi_pm_rele_client\n"));
3697 			i_mdi_pm_rele_client(ct, 1);
3698 			MDI_CLIENT_UNLOCK(ct);
3699 		}
3700 
3701 		/* determine interrupt context */
3702 		se_flag = (servicing_interrupt()) ? SE_NOSLEEP : SE_SLEEP;
3703 		kmem_flag = (se_flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP;
3704 
3705 		/* pathinfo is offlined. update DINFOCACHE. */
3706 		i_ddi_di_cache_invalidate(kmem_flag);
3707 	}
3708 
3709 	return (ret);
3710 }
3711 
3712 /*
3713  * i_mdi_pi_offline():
3714  *		Offline a mdi_pathinfo node and call the vHCI driver's callback
3715  */
3716 static int
3717 i_mdi_pi_offline(mdi_pathinfo_t *pip, int flags)
3718 {
3719 	dev_info_t	*vdip = NULL;
3720 	mdi_vhci_t	*vh = NULL;
3721 	mdi_client_t	*ct = NULL;
3722 	int		(*f)();
3723 	int		rv;
3724 
3725 	MDI_PI_LOCK(pip);
3726 	ct = MDI_PI(pip)->pi_client;
3727 	ASSERT(ct != NULL);
3728 
3729 	while (MDI_PI(pip)->pi_ref_cnt != 0) {
3730 		/*
3731 		 * Give a chance for pending I/Os to complete.
3732 		 */
3733 		MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!i_mdi_pi_offline: "
3734 		    "%d cmds still pending on path: %p\n",
3735 		    MDI_PI(pip)->pi_ref_cnt, (void *)pip));
3736 		if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv,
3737 		    &MDI_PI(pip)->pi_mutex,
3738 		    ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) {
3739 			/*
3740 			 * The timeout time reached without ref_cnt being zero
3741 			 * being signaled.
3742 			 */
3743 			MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!i_mdi_pi_offline: "
3744 			    "Timeout reached on path %p without the cond\n",
3745 			    (void *)pip));
3746 			MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!i_mdi_pi_offline: "
3747 			    "%d cmds still pending on path: %p\n",
3748 			    MDI_PI(pip)->pi_ref_cnt, (void *)pip));
3749 		}
3750 	}
3751 	vh = ct->ct_vhci;
3752 	vdip = vh->vh_dip;
3753 
3754 	/*
3755 	 * Notify vHCI that has registered this event
3756 	 */
3757 	ASSERT(vh->vh_ops);
3758 	f = vh->vh_ops->vo_pi_state_change;
3759 
3760 	if (f != NULL) {
3761 		MDI_PI_UNLOCK(pip);
3762 		if ((rv = (*f)(vdip, pip, MDI_PATHINFO_STATE_OFFLINE, 0,
3763 		    flags)) != MDI_SUCCESS) {
3764 			MDI_DEBUG(1, (CE_WARN, ct->ct_dip,
3765 			    "!vo_path_offline failed "
3766 			    "vdip %p, pip %p", (void *)vdip, (void *)pip));
3767 		}
3768 		MDI_PI_LOCK(pip);
3769 	}
3770 
3771 	/*
3772 	 * Set the mdi_pathinfo node state and clear the transient condition
3773 	 */
3774 	MDI_PI_SET_OFFLINE(pip);
3775 	cv_broadcast(&MDI_PI(pip)->pi_state_cv);
3776 	MDI_PI_UNLOCK(pip);
3777 
3778 	MDI_CLIENT_LOCK(ct);
3779 	if (rv == MDI_SUCCESS) {
3780 		if (ct->ct_unstable == 0) {
3781 			dev_info_t	*cdip = ct->ct_dip;
3782 
3783 			/*
3784 			 * Onlining the mdi_pathinfo node will impact the
3785 			 * client state Update the client and dev_info node
3786 			 * state accordingly
3787 			 */
3788 			i_mdi_client_update_state(ct);
3789 			rv = NDI_SUCCESS;
3790 			if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) {
3791 				if (cdip &&
3792 				    (i_ddi_node_state(cdip) >=
3793 				    DS_INITIALIZED)) {
3794 					MDI_CLIENT_UNLOCK(ct);
3795 					rv = ndi_devi_offline(cdip, 0);
3796 					MDI_CLIENT_LOCK(ct);
3797 					if (rv != NDI_SUCCESS) {
3798 						/*
3799 						 * ndi_devi_offline failed.
3800 						 * Reset client flags to
3801 						 * online.
3802 						 */
3803 						MDI_DEBUG(4, (CE_WARN, cdip,
3804 						    "!ndi_devi_offline: failed "
3805 						    " Error: %x", rv));
3806 						MDI_CLIENT_SET_ONLINE(ct);
3807 					}
3808 				}
3809 			}
3810 			/*
3811 			 * Convert to MDI error code
3812 			 */
3813 			switch (rv) {
3814 			case NDI_SUCCESS:
3815 				rv = MDI_SUCCESS;
3816 				break;
3817 			case NDI_BUSY:
3818 				rv = MDI_BUSY;
3819 				break;
3820 			default:
3821 				rv = MDI_FAILURE;
3822 				break;
3823 			}
3824 		}
3825 		MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct);
3826 		i_mdi_report_path_state(ct, pip);
3827 	}
3828 
3829 	MDI_CLIENT_UNLOCK(ct);
3830 
3831 	/*
3832 	 * Change in the mdi_pathinfo node state will impact the client state
3833 	 */
3834 	MDI_DEBUG(2, (CE_NOTE, NULL, "!i_mdi_pi_offline ct = %p pip = %p",
3835 	    (void *)ct, (void *)pip));
3836 	return (rv);
3837 }
3838 
3839 
3840 /*
3841  * mdi_pi_get_addr():
3842  *		Get the unit address associated with a mdi_pathinfo node
3843  *
3844  * Return Values:
3845  *		char *
3846  */
3847 char *
3848 mdi_pi_get_addr(mdi_pathinfo_t *pip)
3849 {
3850 	if (pip == NULL)
3851 		return (NULL);
3852 
3853 	return (MDI_PI(pip)->pi_addr);
3854 }
3855 
3856 /*
3857  * mdi_pi_get_path_instance():
3858  *		Get the 'path_instance' of a mdi_pathinfo node
3859  *
3860  * Return Values:
3861  *		path_instance
3862  */
3863 int
3864 mdi_pi_get_path_instance(mdi_pathinfo_t *pip)
3865 {
3866 	if (pip == NULL)
3867 		return (0);
3868 
3869 	return (MDI_PI(pip)->pi_path_instance);
3870 }
3871 
3872 /*
3873  * mdi_pi_pathname():
3874  *		Return pointer to path to pathinfo node.
3875  */
3876 char *
3877 mdi_pi_pathname(mdi_pathinfo_t *pip)
3878 {
3879 	if (pip == NULL)
3880 		return (NULL);
3881 	return (mdi_pi_pathname_by_instance(mdi_pi_get_path_instance(pip)));
3882 }
3883 
3884 /*
3885  * mdi_pi_get_client():
3886  *		Get the client devinfo associated with a mdi_pathinfo node
3887  *
3888  * Return Values:
3889  *		Handle to client device dev_info node
3890  */
3891 dev_info_t *
3892 mdi_pi_get_client(mdi_pathinfo_t *pip)
3893 {
3894 	dev_info_t	*dip = NULL;
3895 	if (pip) {
3896 		dip = MDI_PI(pip)->pi_client->ct_dip;
3897 	}
3898 	return (dip);
3899 }
3900 
3901 /*
3902  * mdi_pi_get_phci():
3903  *		Get the pHCI devinfo associated with the mdi_pathinfo node
3904  * Return Values:
3905  *		Handle to dev_info node
3906  */
3907 dev_info_t *
3908 mdi_pi_get_phci(mdi_pathinfo_t *pip)
3909 {
3910 	dev_info_t	*dip = NULL;
3911 	if (pip) {
3912 		dip = MDI_PI(pip)->pi_phci->ph_dip;
3913 	}
3914 	return (dip);
3915 }
3916 
3917 /*
3918  * mdi_pi_get_client_private():
3919  *		Get the client private information associated with the
3920  *		mdi_pathinfo node
3921  */
3922 void *
3923 mdi_pi_get_client_private(mdi_pathinfo_t *pip)
3924 {
3925 	void *cprivate = NULL;
3926 	if (pip) {
3927 		cprivate = MDI_PI(pip)->pi_cprivate;
3928 	}
3929 	return (cprivate);
3930 }
3931 
3932 /*
3933  * mdi_pi_set_client_private():
3934  *		Set the client private information in the mdi_pathinfo node
3935  */
3936 void
3937 mdi_pi_set_client_private(mdi_pathinfo_t *pip, void *priv)
3938 {
3939 	if (pip) {
3940 		MDI_PI(pip)->pi_cprivate = priv;
3941 	}
3942 }
3943 
3944 /*
3945  * mdi_pi_get_phci_private():
3946  *		Get the pHCI private information associated with the
3947  *		mdi_pathinfo node
3948  */
3949 caddr_t
3950 mdi_pi_get_phci_private(mdi_pathinfo_t *pip)
3951 {
3952 	caddr_t	pprivate = NULL;
3953 	if (pip) {
3954 		pprivate = MDI_PI(pip)->pi_pprivate;
3955 	}
3956 	return (pprivate);
3957 }
3958 
3959 /*
3960  * mdi_pi_set_phci_private():
3961  *		Set the pHCI private information in the mdi_pathinfo node
3962  */
3963 void
3964 mdi_pi_set_phci_private(mdi_pathinfo_t *pip, caddr_t priv)
3965 {
3966 	if (pip) {
3967 		MDI_PI(pip)->pi_pprivate = priv;
3968 	}
3969 }
3970 
3971 /*
3972  * mdi_pi_get_state():
3973  *		Get the mdi_pathinfo node state. Transient states are internal
3974  *		and not provided to the users
3975  */
3976 mdi_pathinfo_state_t
3977 mdi_pi_get_state(mdi_pathinfo_t *pip)
3978 {
3979 	mdi_pathinfo_state_t    state = MDI_PATHINFO_STATE_INIT;
3980 
3981 	if (pip) {
3982 		if (MDI_PI_IS_TRANSIENT(pip)) {
3983 			/*
3984 			 * mdi_pathinfo is in state transition.  Return the
3985 			 * last good state.
3986 			 */
3987 			state = MDI_PI_OLD_STATE(pip);
3988 		} else {
3989 			state = MDI_PI_STATE(pip);
3990 		}
3991 	}
3992 	return (state);
3993 }
3994 
3995 /*
3996  * Note that the following function needs to be the new interface for
3997  * mdi_pi_get_state when mpxio gets integrated to ON.
3998  */
3999 int
4000 mdi_pi_get_state2(mdi_pathinfo_t *pip, mdi_pathinfo_state_t *state,
4001 		uint32_t *ext_state)
4002 {
4003 	*state = MDI_PATHINFO_STATE_INIT;
4004 
4005 	if (pip) {
4006 		if (MDI_PI_IS_TRANSIENT(pip)) {
4007 			/*
4008 			 * mdi_pathinfo is in state transition.  Return the
4009 			 * last good state.
4010 			 */
4011 			*state = MDI_PI_OLD_STATE(pip);
4012 			*ext_state = MDI_PI_OLD_EXT_STATE(pip);
4013 		} else {
4014 			*state = MDI_PI_STATE(pip);
4015 			*ext_state = MDI_PI_EXT_STATE(pip);
4016 		}
4017 	}
4018 	return (MDI_SUCCESS);
4019 }
4020 
4021 /*
4022  * mdi_pi_get_preferred:
4023  *	Get the preferred path flag
4024  */
4025 int
4026 mdi_pi_get_preferred(mdi_pathinfo_t *pip)
4027 {
4028 	if (pip) {
4029 		return (MDI_PI(pip)->pi_preferred);
4030 	}
4031 	return (0);
4032 }
4033 
4034 /*
4035  * mdi_pi_set_preferred:
4036  *	Set the preferred path flag
4037  */
4038 void
4039 mdi_pi_set_preferred(mdi_pathinfo_t *pip, int preferred)
4040 {
4041 	if (pip) {
4042 		MDI_PI(pip)->pi_preferred = preferred;
4043 	}
4044 }
4045 
4046 /*
4047  * mdi_pi_set_state():
4048  *		Set the mdi_pathinfo node state
4049  */
4050 void
4051 mdi_pi_set_state(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state)
4052 {
4053 	uint32_t	ext_state;
4054 
4055 	if (pip) {
4056 		ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK;
4057 		MDI_PI(pip)->pi_state = state;
4058 		MDI_PI(pip)->pi_state |= ext_state;
4059 	}
4060 }
4061 
4062 /*
4063  * Property functions:
4064  */
4065 int
4066 i_map_nvlist_error_to_mdi(int val)
4067 {
4068 	int rv;
4069 
4070 	switch (val) {
4071 	case 0:
4072 		rv = DDI_PROP_SUCCESS;
4073 		break;
4074 	case EINVAL:
4075 	case ENOTSUP:
4076 		rv = DDI_PROP_INVAL_ARG;
4077 		break;
4078 	case ENOMEM:
4079 		rv = DDI_PROP_NO_MEMORY;
4080 		break;
4081 	default:
4082 		rv = DDI_PROP_NOT_FOUND;
4083 		break;
4084 	}
4085 	return (rv);
4086 }
4087 
4088 /*
4089  * mdi_pi_get_next_prop():
4090  * 		Property walk function.  The caller should hold mdi_pi_lock()
4091  *		and release by calling mdi_pi_unlock() at the end of walk to
4092  *		get a consistent value.
4093  */
4094 nvpair_t *
4095 mdi_pi_get_next_prop(mdi_pathinfo_t *pip, nvpair_t *prev)
4096 {
4097 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4098 		return (NULL);
4099 	}
4100 	ASSERT(MDI_PI_LOCKED(pip));
4101 	return (nvlist_next_nvpair(MDI_PI(pip)->pi_prop, prev));
4102 }
4103 
4104 /*
4105  * mdi_prop_remove():
4106  * 		Remove the named property from the named list.
4107  */
4108 int
4109 mdi_prop_remove(mdi_pathinfo_t *pip, char *name)
4110 {
4111 	if (pip == NULL) {
4112 		return (DDI_PROP_NOT_FOUND);
4113 	}
4114 	ASSERT(!MDI_PI_LOCKED(pip));
4115 	MDI_PI_LOCK(pip);
4116 	if (MDI_PI(pip)->pi_prop == NULL) {
4117 		MDI_PI_UNLOCK(pip);
4118 		return (DDI_PROP_NOT_FOUND);
4119 	}
4120 	if (name) {
4121 		(void) nvlist_remove_all(MDI_PI(pip)->pi_prop, name);
4122 	} else {
4123 		char		nvp_name[MAXNAMELEN];
4124 		nvpair_t	*nvp;
4125 		nvp = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, NULL);
4126 		while (nvp) {
4127 			nvpair_t	*next;
4128 			next = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, nvp);
4129 			(void) snprintf(nvp_name, MAXNAMELEN, "%s",
4130 			    nvpair_name(nvp));
4131 			(void) nvlist_remove_all(MDI_PI(pip)->pi_prop,
4132 			    nvp_name);
4133 			nvp = next;
4134 		}
4135 	}
4136 	MDI_PI_UNLOCK(pip);
4137 	return (DDI_PROP_SUCCESS);
4138 }
4139 
4140 /*
4141  * mdi_prop_size():
4142  * 		Get buffer size needed to pack the property data.
4143  * 		Caller should hold the mdi_pathinfo_t lock to get a consistent
4144  *		buffer size.
4145  */
4146 int
4147 mdi_prop_size(mdi_pathinfo_t *pip, size_t *buflenp)
4148 {
4149 	int	rv;
4150 	size_t	bufsize;
4151 
4152 	*buflenp = 0;
4153 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4154 		return (DDI_PROP_NOT_FOUND);
4155 	}
4156 	ASSERT(MDI_PI_LOCKED(pip));
4157 	rv = nvlist_size(MDI_PI(pip)->pi_prop,
4158 	    &bufsize, NV_ENCODE_NATIVE);
4159 	*buflenp = bufsize;
4160 	return (i_map_nvlist_error_to_mdi(rv));
4161 }
4162 
4163 /*
4164  * mdi_prop_pack():
4165  * 		pack the property list.  The caller should hold the
4166  *		mdi_pathinfo_t node to get a consistent data
4167  */
4168 int
4169 mdi_prop_pack(mdi_pathinfo_t *pip, char **bufp, uint_t buflen)
4170 {
4171 	int	rv;
4172 	size_t	bufsize;
4173 
4174 	if ((pip == NULL) || MDI_PI(pip)->pi_prop == NULL) {
4175 		return (DDI_PROP_NOT_FOUND);
4176 	}
4177 
4178 	ASSERT(MDI_PI_LOCKED(pip));
4179 
4180 	bufsize = buflen;
4181 	rv = nvlist_pack(MDI_PI(pip)->pi_prop, bufp, (size_t *)&bufsize,
4182 	    NV_ENCODE_NATIVE, KM_SLEEP);
4183 
4184 	return (i_map_nvlist_error_to_mdi(rv));
4185 }
4186 
4187 /*
4188  * mdi_prop_update_byte():
4189  *		Create/Update a byte property
4190  */
4191 int
4192 mdi_prop_update_byte(mdi_pathinfo_t *pip, char *name, uchar_t data)
4193 {
4194 	int rv;
4195 
4196 	if (pip == NULL) {
4197 		return (DDI_PROP_INVAL_ARG);
4198 	}
4199 	ASSERT(!MDI_PI_LOCKED(pip));
4200 	MDI_PI_LOCK(pip);
4201 	if (MDI_PI(pip)->pi_prop == NULL) {
4202 		MDI_PI_UNLOCK(pip);
4203 		return (DDI_PROP_NOT_FOUND);
4204 	}
4205 	rv = nvlist_add_byte(MDI_PI(pip)->pi_prop, name, data);
4206 	MDI_PI_UNLOCK(pip);
4207 	return (i_map_nvlist_error_to_mdi(rv));
4208 }
4209 
4210 /*
4211  * mdi_prop_update_byte_array():
4212  *		Create/Update a byte array property
4213  */
4214 int
4215 mdi_prop_update_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t *data,
4216     uint_t nelements)
4217 {
4218 	int rv;
4219 
4220 	if (pip == NULL) {
4221 		return (DDI_PROP_INVAL_ARG);
4222 	}
4223 	ASSERT(!MDI_PI_LOCKED(pip));
4224 	MDI_PI_LOCK(pip);
4225 	if (MDI_PI(pip)->pi_prop == NULL) {
4226 		MDI_PI_UNLOCK(pip);
4227 		return (DDI_PROP_NOT_FOUND);
4228 	}
4229 	rv = nvlist_add_byte_array(MDI_PI(pip)->pi_prop, name, data, nelements);
4230 	MDI_PI_UNLOCK(pip);
4231 	return (i_map_nvlist_error_to_mdi(rv));
4232 }
4233 
4234 /*
4235  * mdi_prop_update_int():
4236  *		Create/Update a 32 bit integer property
4237  */
4238 int
4239 mdi_prop_update_int(mdi_pathinfo_t *pip, char *name, int data)
4240 {
4241 	int rv;
4242 
4243 	if (pip == NULL) {
4244 		return (DDI_PROP_INVAL_ARG);
4245 	}
4246 	ASSERT(!MDI_PI_LOCKED(pip));
4247 	MDI_PI_LOCK(pip);
4248 	if (MDI_PI(pip)->pi_prop == NULL) {
4249 		MDI_PI_UNLOCK(pip);
4250 		return (DDI_PROP_NOT_FOUND);
4251 	}
4252 	rv = nvlist_add_int32(MDI_PI(pip)->pi_prop, name, (int32_t)data);
4253 	MDI_PI_UNLOCK(pip);
4254 	return (i_map_nvlist_error_to_mdi(rv));
4255 }
4256 
4257 /*
4258  * mdi_prop_update_int64():
4259  *		Create/Update a 64 bit integer property
4260  */
4261 int
4262 mdi_prop_update_int64(mdi_pathinfo_t *pip, char *name, int64_t data)
4263 {
4264 	int rv;
4265 
4266 	if (pip == NULL) {
4267 		return (DDI_PROP_INVAL_ARG);
4268 	}
4269 	ASSERT(!MDI_PI_LOCKED(pip));
4270 	MDI_PI_LOCK(pip);
4271 	if (MDI_PI(pip)->pi_prop == NULL) {
4272 		MDI_PI_UNLOCK(pip);
4273 		return (DDI_PROP_NOT_FOUND);
4274 	}
4275 	rv = nvlist_add_int64(MDI_PI(pip)->pi_prop, name, data);
4276 	MDI_PI_UNLOCK(pip);
4277 	return (i_map_nvlist_error_to_mdi(rv));
4278 }
4279 
4280 /*
4281  * mdi_prop_update_int_array():
4282  *		Create/Update a int array property
4283  */
4284 int
4285 mdi_prop_update_int_array(mdi_pathinfo_t *pip, char *name, int *data,
4286 	    uint_t nelements)
4287 {
4288 	int rv;
4289 
4290 	if (pip == NULL) {
4291 		return (DDI_PROP_INVAL_ARG);
4292 	}
4293 	ASSERT(!MDI_PI_LOCKED(pip));
4294 	MDI_PI_LOCK(pip);
4295 	if (MDI_PI(pip)->pi_prop == NULL) {
4296 		MDI_PI_UNLOCK(pip);
4297 		return (DDI_PROP_NOT_FOUND);
4298 	}
4299 	rv = nvlist_add_int32_array(MDI_PI(pip)->pi_prop, name, (int32_t *)data,
4300 	    nelements);
4301 	MDI_PI_UNLOCK(pip);
4302 	return (i_map_nvlist_error_to_mdi(rv));
4303 }
4304 
4305 /*
4306  * mdi_prop_update_string():
4307  *		Create/Update a string property
4308  */
4309 int
4310 mdi_prop_update_string(mdi_pathinfo_t *pip, char *name, char *data)
4311 {
4312 	int rv;
4313 
4314 	if (pip == NULL) {
4315 		return (DDI_PROP_INVAL_ARG);
4316 	}
4317 	ASSERT(!MDI_PI_LOCKED(pip));
4318 	MDI_PI_LOCK(pip);
4319 	if (MDI_PI(pip)->pi_prop == NULL) {
4320 		MDI_PI_UNLOCK(pip);
4321 		return (DDI_PROP_NOT_FOUND);
4322 	}
4323 	rv = nvlist_add_string(MDI_PI(pip)->pi_prop, name, data);
4324 	MDI_PI_UNLOCK(pip);
4325 	return (i_map_nvlist_error_to_mdi(rv));
4326 }
4327 
4328 /*
4329  * mdi_prop_update_string_array():
4330  *		Create/Update a string array property
4331  */
4332 int
4333 mdi_prop_update_string_array(mdi_pathinfo_t *pip, char *name, char **data,
4334     uint_t nelements)
4335 {
4336 	int rv;
4337 
4338 	if (pip == NULL) {
4339 		return (DDI_PROP_INVAL_ARG);
4340 	}
4341 	ASSERT(!MDI_PI_LOCKED(pip));
4342 	MDI_PI_LOCK(pip);
4343 	if (MDI_PI(pip)->pi_prop == NULL) {
4344 		MDI_PI_UNLOCK(pip);
4345 		return (DDI_PROP_NOT_FOUND);
4346 	}
4347 	rv = nvlist_add_string_array(MDI_PI(pip)->pi_prop, name, data,
4348 	    nelements);
4349 	MDI_PI_UNLOCK(pip);
4350 	return (i_map_nvlist_error_to_mdi(rv));
4351 }
4352 
4353 /*
4354  * mdi_prop_lookup_byte():
4355  * 		Look for byte property identified by name.  The data returned
4356  *		is the actual property and valid as long as mdi_pathinfo_t node
4357  *		is alive.
4358  */
4359 int
4360 mdi_prop_lookup_byte(mdi_pathinfo_t *pip, char *name, uchar_t *data)
4361 {
4362 	int rv;
4363 
4364 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4365 		return (DDI_PROP_NOT_FOUND);
4366 	}
4367 	rv = nvlist_lookup_byte(MDI_PI(pip)->pi_prop, name, data);
4368 	return (i_map_nvlist_error_to_mdi(rv));
4369 }
4370 
4371 
4372 /*
4373  * mdi_prop_lookup_byte_array():
4374  * 		Look for byte array property identified by name.  The data
4375  *		returned is the actual property and valid as long as
4376  *		mdi_pathinfo_t node is alive.
4377  */
4378 int
4379 mdi_prop_lookup_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t **data,
4380     uint_t *nelements)
4381 {
4382 	int rv;
4383 
4384 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4385 		return (DDI_PROP_NOT_FOUND);
4386 	}
4387 	rv = nvlist_lookup_byte_array(MDI_PI(pip)->pi_prop, name, data,
4388 	    nelements);
4389 	return (i_map_nvlist_error_to_mdi(rv));
4390 }
4391 
4392 /*
4393  * mdi_prop_lookup_int():
4394  * 		Look for int property identified by name.  The data returned
4395  *		is the actual property and valid as long as mdi_pathinfo_t
4396  *		node is alive.
4397  */
4398 int
4399 mdi_prop_lookup_int(mdi_pathinfo_t *pip, char *name, int *data)
4400 {
4401 	int rv;
4402 
4403 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4404 		return (DDI_PROP_NOT_FOUND);
4405 	}
4406 	rv = nvlist_lookup_int32(MDI_PI(pip)->pi_prop, name, (int32_t *)data);
4407 	return (i_map_nvlist_error_to_mdi(rv));
4408 }
4409 
4410 /*
4411  * mdi_prop_lookup_int64():
4412  * 		Look for int64 property identified by name.  The data returned
4413  *		is the actual property and valid as long as mdi_pathinfo_t node
4414  *		is alive.
4415  */
4416 int
4417 mdi_prop_lookup_int64(mdi_pathinfo_t *pip, char *name, int64_t *data)
4418 {
4419 	int rv;
4420 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4421 		return (DDI_PROP_NOT_FOUND);
4422 	}
4423 	rv = nvlist_lookup_int64(MDI_PI(pip)->pi_prop, name, data);
4424 	return (i_map_nvlist_error_to_mdi(rv));
4425 }
4426 
4427 /*
4428  * mdi_prop_lookup_int_array():
4429  * 		Look for int array property identified by name.  The data
4430  *		returned is the actual property and valid as long as
4431  *		mdi_pathinfo_t node is alive.
4432  */
4433 int
4434 mdi_prop_lookup_int_array(mdi_pathinfo_t *pip, char *name, int **data,
4435     uint_t *nelements)
4436 {
4437 	int rv;
4438 
4439 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4440 		return (DDI_PROP_NOT_FOUND);
4441 	}
4442 	rv = nvlist_lookup_int32_array(MDI_PI(pip)->pi_prop, name,
4443 	    (int32_t **)data, nelements);
4444 	return (i_map_nvlist_error_to_mdi(rv));
4445 }
4446 
4447 /*
4448  * mdi_prop_lookup_string():
4449  * 		Look for string property identified by name.  The data
4450  *		returned is the actual property and valid as long as
4451  *		mdi_pathinfo_t node is alive.
4452  */
4453 int
4454 mdi_prop_lookup_string(mdi_pathinfo_t *pip, char *name, char **data)
4455 {
4456 	int rv;
4457 
4458 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4459 		return (DDI_PROP_NOT_FOUND);
4460 	}
4461 	rv = nvlist_lookup_string(MDI_PI(pip)->pi_prop, name, data);
4462 	return (i_map_nvlist_error_to_mdi(rv));
4463 }
4464 
4465 /*
4466  * mdi_prop_lookup_string_array():
4467  * 		Look for string array property identified by name.  The data
4468  *		returned is the actual property and valid as long as
4469  *		mdi_pathinfo_t node is alive.
4470  */
4471 int
4472 mdi_prop_lookup_string_array(mdi_pathinfo_t *pip, char *name, char ***data,
4473     uint_t *nelements)
4474 {
4475 	int rv;
4476 
4477 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4478 		return (DDI_PROP_NOT_FOUND);
4479 	}
4480 	rv = nvlist_lookup_string_array(MDI_PI(pip)->pi_prop, name, data,
4481 	    nelements);
4482 	return (i_map_nvlist_error_to_mdi(rv));
4483 }
4484 
4485 /*
4486  * mdi_prop_free():
4487  * 		Symmetrical function to ddi_prop_free(). nvlist_lookup_xx()
4488  *		functions return the pointer to actual property data and not a
4489  *		copy of it.  So the data returned is valid as long as
4490  *		mdi_pathinfo_t node is valid.
4491  */
4492 /*ARGSUSED*/
4493 int
4494 mdi_prop_free(void *data)
4495 {
4496 	return (DDI_PROP_SUCCESS);
4497 }
4498 
4499 /*ARGSUSED*/
4500 static void
4501 i_mdi_report_path_state(mdi_client_t *ct, mdi_pathinfo_t *pip)
4502 {
4503 	char		*phci_path, *ct_path;
4504 	char		*ct_status;
4505 	char		*status;
4506 	dev_info_t	*dip = ct->ct_dip;
4507 	char		lb_buf[64];
4508 
4509 	ASSERT(MDI_CLIENT_LOCKED(ct));
4510 	if ((dip == NULL) || (ddi_get_instance(dip) == -1) ||
4511 	    (MDI_CLIENT_IS_REPORT_DEV_NEEDED(ct) == 0)) {
4512 		return;
4513 	}
4514 	if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL) {
4515 		ct_status = "optimal";
4516 	} else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) {
4517 		ct_status = "degraded";
4518 	} else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) {
4519 		ct_status = "failed";
4520 	} else {
4521 		ct_status = "unknown";
4522 	}
4523 
4524 	if (MDI_PI_IS_OFFLINE(pip)) {
4525 		status = "offline";
4526 	} else if (MDI_PI_IS_ONLINE(pip)) {
4527 		status = "online";
4528 	} else if (MDI_PI_IS_STANDBY(pip)) {
4529 		status = "standby";
4530 	} else if (MDI_PI_IS_FAULT(pip)) {
4531 		status = "faulted";
4532 	} else {
4533 		status = "unknown";
4534 	}
4535 
4536 	if (ct->ct_lb == LOAD_BALANCE_LBA) {
4537 		(void) snprintf(lb_buf, sizeof (lb_buf),
4538 		    "%s, region-size: %d", mdi_load_balance_lba,
4539 			ct->ct_lb_args->region_size);
4540 	} else if (ct->ct_lb == LOAD_BALANCE_NONE) {
4541 		(void) snprintf(lb_buf, sizeof (lb_buf),
4542 		    "%s", mdi_load_balance_none);
4543 	} else {
4544 		(void) snprintf(lb_buf, sizeof (lb_buf), "%s",
4545 		    mdi_load_balance_rr);
4546 	}
4547 
4548 	if (dip) {
4549 		ct_path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
4550 		phci_path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
4551 		cmn_err(CE_CONT, "?%s (%s%d) multipath status: %s, "
4552 		    "path %s (%s%d) to target address: %s is %s"
4553 		    " Load balancing: %s\n",
4554 		    ddi_pathname(dip, ct_path), ddi_driver_name(dip),
4555 		    ddi_get_instance(dip), ct_status,
4556 		    ddi_pathname(MDI_PI(pip)->pi_phci->ph_dip, phci_path),
4557 		    ddi_driver_name(MDI_PI(pip)->pi_phci->ph_dip),
4558 		    ddi_get_instance(MDI_PI(pip)->pi_phci->ph_dip),
4559 		    MDI_PI(pip)->pi_addr, status, lb_buf);
4560 		kmem_free(phci_path, MAXPATHLEN);
4561 		kmem_free(ct_path, MAXPATHLEN);
4562 		MDI_CLIENT_CLEAR_REPORT_DEV_NEEDED(ct);
4563 	}
4564 }
4565 
4566 #ifdef	DEBUG
4567 /*
4568  * i_mdi_log():
4569  *		Utility function for error message management
4570  *
4571  */
4572 /*PRINTFLIKE3*/
4573 static void
4574 i_mdi_log(int level, dev_info_t *dip, const char *fmt, ...)
4575 {
4576 	char		name[MAXNAMELEN];
4577 	char		buf[MAXNAMELEN];
4578 	char		*bp;
4579 	va_list		ap;
4580 	int		log_only = 0;
4581 	int		boot_only = 0;
4582 	int		console_only = 0;
4583 
4584 	if (dip) {
4585 		(void) snprintf(name, MAXNAMELEN, "%s%d: ",
4586 		    ddi_node_name(dip), ddi_get_instance(dip));
4587 	} else {
4588 		name[0] = 0;
4589 	}
4590 
4591 	va_start(ap, fmt);
4592 	(void) vsnprintf(buf, MAXNAMELEN, fmt, ap);
4593 	va_end(ap);
4594 
4595 	switch (buf[0]) {
4596 	case '!':
4597 		bp = &buf[1];
4598 		log_only = 1;
4599 		break;
4600 	case '?':
4601 		bp = &buf[1];
4602 		boot_only = 1;
4603 		break;
4604 	case '^':
4605 		bp = &buf[1];
4606 		console_only = 1;
4607 		break;
4608 	default:
4609 		bp = buf;
4610 		break;
4611 	}
4612 	if (mdi_debug_logonly) {
4613 		log_only = 1;
4614 		boot_only = 0;
4615 		console_only = 0;
4616 	}
4617 
4618 	switch (level) {
4619 	case CE_NOTE:
4620 		level = CE_CONT;
4621 		/* FALLTHROUGH */
4622 	case CE_CONT:
4623 	case CE_WARN:
4624 	case CE_PANIC:
4625 		if (boot_only) {
4626 			cmn_err(level, "?mdi: %s%s", name, bp);
4627 		} else if (console_only) {
4628 			cmn_err(level, "^mdi: %s%s", name, bp);
4629 		} else if (log_only) {
4630 			cmn_err(level, "!mdi: %s%s", name, bp);
4631 		} else {
4632 			cmn_err(level, "mdi: %s%s", name, bp);
4633 		}
4634 		break;
4635 	default:
4636 		cmn_err(level, "mdi: %s%s", name, bp);
4637 		break;
4638 	}
4639 }
4640 #endif	/* DEBUG */
4641 
4642 void
4643 i_mdi_client_online(dev_info_t *ct_dip)
4644 {
4645 	mdi_client_t	*ct;
4646 
4647 	/*
4648 	 * Client online notification. Mark client state as online
4649 	 * restore our binding with dev_info node
4650 	 */
4651 	ct = i_devi_get_client(ct_dip);
4652 	ASSERT(ct != NULL);
4653 	MDI_CLIENT_LOCK(ct);
4654 	MDI_CLIENT_SET_ONLINE(ct);
4655 	/* catch for any memory leaks */
4656 	ASSERT((ct->ct_dip == NULL) || (ct->ct_dip == ct_dip));
4657 	ct->ct_dip = ct_dip;
4658 
4659 	if (ct->ct_power_cnt == 0)
4660 		(void) i_mdi_power_all_phci(ct);
4661 
4662 	MDI_DEBUG(4, (CE_NOTE, ct_dip, "i_mdi_client_online "
4663 	    "i_mdi_pm_hold_client %p\n", (void *)ct));
4664 	i_mdi_pm_hold_client(ct, 1);
4665 
4666 	MDI_CLIENT_UNLOCK(ct);
4667 }
4668 
4669 void
4670 i_mdi_phci_online(dev_info_t *ph_dip)
4671 {
4672 	mdi_phci_t	*ph;
4673 
4674 	/* pHCI online notification. Mark state accordingly */
4675 	ph = i_devi_get_phci(ph_dip);
4676 	ASSERT(ph != NULL);
4677 	MDI_PHCI_LOCK(ph);
4678 	MDI_PHCI_SET_ONLINE(ph);
4679 	MDI_PHCI_UNLOCK(ph);
4680 }
4681 
4682 /*
4683  * mdi_devi_online():
4684  * 		Online notification from NDI framework on pHCI/client
4685  *		device online.
4686  * Return Values:
4687  *		NDI_SUCCESS
4688  *		MDI_FAILURE
4689  */
4690 /*ARGSUSED*/
4691 int
4692 mdi_devi_online(dev_info_t *dip, uint_t flags)
4693 {
4694 	if (MDI_PHCI(dip)) {
4695 		i_mdi_phci_online(dip);
4696 	}
4697 
4698 	if (MDI_CLIENT(dip)) {
4699 		i_mdi_client_online(dip);
4700 	}
4701 	return (NDI_SUCCESS);
4702 }
4703 
4704 /*
4705  * mdi_devi_offline():
4706  * 		Offline notification from NDI framework on pHCI/Client device
4707  *		offline.
4708  *
4709  * Return Values:
4710  *		NDI_SUCCESS
4711  *		NDI_FAILURE
4712  */
4713 /*ARGSUSED*/
4714 int
4715 mdi_devi_offline(dev_info_t *dip, uint_t flags)
4716 {
4717 	int		rv = NDI_SUCCESS;
4718 
4719 	if (MDI_CLIENT(dip)) {
4720 		rv = i_mdi_client_offline(dip, flags);
4721 		if (rv != NDI_SUCCESS)
4722 			return (rv);
4723 	}
4724 
4725 	if (MDI_PHCI(dip)) {
4726 		rv = i_mdi_phci_offline(dip, flags);
4727 
4728 		if ((rv != NDI_SUCCESS) && MDI_CLIENT(dip)) {
4729 			/* set client back online */
4730 			i_mdi_client_online(dip);
4731 		}
4732 	}
4733 
4734 	return (rv);
4735 }
4736 
4737 /*ARGSUSED*/
4738 static int
4739 i_mdi_phci_offline(dev_info_t *dip, uint_t flags)
4740 {
4741 	int		rv = NDI_SUCCESS;
4742 	mdi_phci_t	*ph;
4743 	mdi_client_t	*ct;
4744 	mdi_pathinfo_t	*pip;
4745 	mdi_pathinfo_t	*next;
4746 	mdi_pathinfo_t	*failed_pip = NULL;
4747 	dev_info_t	*cdip;
4748 
4749 	/*
4750 	 * pHCI component offline notification
4751 	 * Make sure that this pHCI instance is free to be offlined.
4752 	 * If it is OK to proceed, Offline and remove all the child
4753 	 * mdi_pathinfo nodes.  This process automatically offlines
4754 	 * corresponding client devices, for which this pHCI provides
4755 	 * critical services.
4756 	 */
4757 	ph = i_devi_get_phci(dip);
4758 	MDI_DEBUG(2, (CE_NOTE, dip, "!mdi_phci_offline called %p %p\n",
4759 	    (void *)dip, (void *)ph));
4760 	if (ph == NULL) {
4761 		return (rv);
4762 	}
4763 
4764 	MDI_PHCI_LOCK(ph);
4765 
4766 	if (MDI_PHCI_IS_OFFLINE(ph)) {
4767 		MDI_DEBUG(1, (CE_WARN, dip, "!pHCI %p already offlined",
4768 		    (void *)ph));
4769 		MDI_PHCI_UNLOCK(ph);
4770 		return (NDI_SUCCESS);
4771 	}
4772 
4773 	/*
4774 	 * Check to see if the pHCI can be offlined
4775 	 */
4776 	if (ph->ph_unstable) {
4777 		MDI_DEBUG(1, (CE_WARN, dip,
4778 		    "!One or more target devices are in transient "
4779 		    "state. This device can not be removed at "
4780 		    "this moment. Please try again later."));
4781 		MDI_PHCI_UNLOCK(ph);
4782 		return (NDI_BUSY);
4783 	}
4784 
4785 	pip = ph->ph_path_head;
4786 	while (pip != NULL) {
4787 		MDI_PI_LOCK(pip);
4788 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
4789 
4790 		/*
4791 		 * The mdi_pathinfo state is OK. Check the client state.
4792 		 * If failover in progress fail the pHCI from offlining
4793 		 */
4794 		ct = MDI_PI(pip)->pi_client;
4795 		i_mdi_client_lock(ct, pip);
4796 		if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
4797 		    (ct->ct_unstable)) {
4798 			/*
4799 			 * Failover is in progress, Fail the DR
4800 			 */
4801 			MDI_DEBUG(1, (CE_WARN, dip,
4802 			    "!pHCI device (%s%d) is Busy. %s",
4803 			    ddi_driver_name(dip), ddi_get_instance(dip),
4804 			    "This device can not be removed at "
4805 			    "this moment. Please try again later."));
4806 			MDI_PI_UNLOCK(pip);
4807 			i_mdi_client_unlock(ct);
4808 			MDI_PHCI_UNLOCK(ph);
4809 			return (NDI_BUSY);
4810 		}
4811 		MDI_PI_UNLOCK(pip);
4812 
4813 		/*
4814 		 * Check to see of we are removing the last path of this
4815 		 * client device...
4816 		 */
4817 		cdip = ct->ct_dip;
4818 		if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
4819 		    (i_mdi_client_compute_state(ct, ph) ==
4820 		    MDI_CLIENT_STATE_FAILED)) {
4821 			i_mdi_client_unlock(ct);
4822 			MDI_PHCI_UNLOCK(ph);
4823 			if (ndi_devi_offline(cdip, 0) != NDI_SUCCESS) {
4824 				/*
4825 				 * ndi_devi_offline() failed.
4826 				 * This pHCI provides the critical path
4827 				 * to one or more client devices.
4828 				 * Return busy.
4829 				 */
4830 				MDI_PHCI_LOCK(ph);
4831 				MDI_DEBUG(1, (CE_WARN, dip,
4832 				    "!pHCI device (%s%d) is Busy. %s",
4833 				    ddi_driver_name(dip), ddi_get_instance(dip),
4834 				    "This device can not be removed at "
4835 				    "this moment. Please try again later."));
4836 				failed_pip = pip;
4837 				break;
4838 			} else {
4839 				MDI_PHCI_LOCK(ph);
4840 				pip = next;
4841 			}
4842 		} else {
4843 			i_mdi_client_unlock(ct);
4844 			pip = next;
4845 		}
4846 	}
4847 
4848 	if (failed_pip) {
4849 		pip = ph->ph_path_head;
4850 		while (pip != failed_pip) {
4851 			MDI_PI_LOCK(pip);
4852 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
4853 			ct = MDI_PI(pip)->pi_client;
4854 			i_mdi_client_lock(ct, pip);
4855 			cdip = ct->ct_dip;
4856 			switch (MDI_CLIENT_STATE(ct)) {
4857 			case MDI_CLIENT_STATE_OPTIMAL:
4858 			case MDI_CLIENT_STATE_DEGRADED:
4859 				if (cdip) {
4860 					MDI_PI_UNLOCK(pip);
4861 					i_mdi_client_unlock(ct);
4862 					MDI_PHCI_UNLOCK(ph);
4863 					(void) ndi_devi_online(cdip, 0);
4864 					MDI_PHCI_LOCK(ph);
4865 					pip = next;
4866 					continue;
4867 				}
4868 				break;
4869 
4870 			case MDI_CLIENT_STATE_FAILED:
4871 				if (cdip) {
4872 					MDI_PI_UNLOCK(pip);
4873 					i_mdi_client_unlock(ct);
4874 					MDI_PHCI_UNLOCK(ph);
4875 					(void) ndi_devi_offline(cdip, 0);
4876 					MDI_PHCI_LOCK(ph);
4877 					pip = next;
4878 					continue;
4879 				}
4880 				break;
4881 			}
4882 			MDI_PI_UNLOCK(pip);
4883 			i_mdi_client_unlock(ct);
4884 			pip = next;
4885 		}
4886 		MDI_PHCI_UNLOCK(ph);
4887 		return (NDI_BUSY);
4888 	}
4889 
4890 	/*
4891 	 * Mark the pHCI as offline
4892 	 */
4893 	MDI_PHCI_SET_OFFLINE(ph);
4894 
4895 	/*
4896 	 * Mark the child mdi_pathinfo nodes as transient
4897 	 */
4898 	pip = ph->ph_path_head;
4899 	while (pip != NULL) {
4900 		MDI_PI_LOCK(pip);
4901 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
4902 		MDI_PI_SET_OFFLINING(pip);
4903 		MDI_PI_UNLOCK(pip);
4904 		pip = next;
4905 	}
4906 	MDI_PHCI_UNLOCK(ph);
4907 	/*
4908 	 * Give a chance for any pending commands to execute
4909 	 */
4910 	delay(1);
4911 	MDI_PHCI_LOCK(ph);
4912 	pip = ph->ph_path_head;
4913 	while (pip != NULL) {
4914 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
4915 		(void) i_mdi_pi_offline(pip, flags);
4916 		MDI_PI_LOCK(pip);
4917 		ct = MDI_PI(pip)->pi_client;
4918 		if (!MDI_PI_IS_OFFLINE(pip)) {
4919 			MDI_DEBUG(1, (CE_WARN, dip,
4920 			    "!pHCI device (%s%d) is Busy. %s",
4921 			    ddi_driver_name(dip), ddi_get_instance(dip),
4922 			    "This device can not be removed at "
4923 			    "this moment. Please try again later."));
4924 			MDI_PI_UNLOCK(pip);
4925 			MDI_PHCI_SET_ONLINE(ph);
4926 			MDI_PHCI_UNLOCK(ph);
4927 			return (NDI_BUSY);
4928 		}
4929 		MDI_PI_UNLOCK(pip);
4930 		pip = next;
4931 	}
4932 	MDI_PHCI_UNLOCK(ph);
4933 
4934 	return (rv);
4935 }
4936 
4937 void
4938 mdi_phci_mark_retiring(dev_info_t *dip, char **cons_array)
4939 {
4940 	mdi_phci_t	*ph;
4941 	mdi_client_t	*ct;
4942 	mdi_pathinfo_t	*pip;
4943 	mdi_pathinfo_t	*next;
4944 	dev_info_t	*cdip;
4945 
4946 	if (!MDI_PHCI(dip))
4947 		return;
4948 
4949 	ph = i_devi_get_phci(dip);
4950 	if (ph == NULL) {
4951 		return;
4952 	}
4953 
4954 	MDI_PHCI_LOCK(ph);
4955 
4956 	if (MDI_PHCI_IS_OFFLINE(ph)) {
4957 		/* has no last path */
4958 		MDI_PHCI_UNLOCK(ph);
4959 		return;
4960 	}
4961 
4962 	pip = ph->ph_path_head;
4963 	while (pip != NULL) {
4964 		MDI_PI_LOCK(pip);
4965 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
4966 
4967 		ct = MDI_PI(pip)->pi_client;
4968 		i_mdi_client_lock(ct, pip);
4969 		MDI_PI_UNLOCK(pip);
4970 
4971 		cdip = ct->ct_dip;
4972 		if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
4973 		    (i_mdi_client_compute_state(ct, ph) ==
4974 		    MDI_CLIENT_STATE_FAILED)) {
4975 			/* Last path. Mark client dip as retiring */
4976 			i_mdi_client_unlock(ct);
4977 			MDI_PHCI_UNLOCK(ph);
4978 			(void) e_ddi_mark_retiring(cdip, cons_array);
4979 			MDI_PHCI_LOCK(ph);
4980 			pip = next;
4981 		} else {
4982 			i_mdi_client_unlock(ct);
4983 			pip = next;
4984 		}
4985 	}
4986 
4987 	MDI_PHCI_UNLOCK(ph);
4988 
4989 	return;
4990 }
4991 
4992 void
4993 mdi_phci_retire_notify(dev_info_t *dip, int *constraint)
4994 {
4995 	mdi_phci_t	*ph;
4996 	mdi_client_t	*ct;
4997 	mdi_pathinfo_t	*pip;
4998 	mdi_pathinfo_t	*next;
4999 	dev_info_t	*cdip;
5000 
5001 	if (!MDI_PHCI(dip))
5002 		return;
5003 
5004 	ph = i_devi_get_phci(dip);
5005 	if (ph == NULL)
5006 		return;
5007 
5008 	MDI_PHCI_LOCK(ph);
5009 
5010 	if (MDI_PHCI_IS_OFFLINE(ph)) {
5011 		MDI_PHCI_UNLOCK(ph);
5012 		/* not last path */
5013 		return;
5014 	}
5015 
5016 	if (ph->ph_unstable) {
5017 		MDI_PHCI_UNLOCK(ph);
5018 		/* can't check for constraints */
5019 		*constraint = 0;
5020 		return;
5021 	}
5022 
5023 	pip = ph->ph_path_head;
5024 	while (pip != NULL) {
5025 		MDI_PI_LOCK(pip);
5026 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5027 
5028 		/*
5029 		 * The mdi_pathinfo state is OK. Check the client state.
5030 		 * If failover in progress fail the pHCI from offlining
5031 		 */
5032 		ct = MDI_PI(pip)->pi_client;
5033 		i_mdi_client_lock(ct, pip);
5034 		if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
5035 		    (ct->ct_unstable)) {
5036 			/*
5037 			 * Failover is in progress, can't check for constraints
5038 			 */
5039 			MDI_PI_UNLOCK(pip);
5040 			i_mdi_client_unlock(ct);
5041 			MDI_PHCI_UNLOCK(ph);
5042 			*constraint = 0;
5043 			return;
5044 		}
5045 		MDI_PI_UNLOCK(pip);
5046 
5047 		/*
5048 		 * Check to see of we are retiring the last path of this
5049 		 * client device...
5050 		 */
5051 		cdip = ct->ct_dip;
5052 		if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
5053 		    (i_mdi_client_compute_state(ct, ph) ==
5054 		    MDI_CLIENT_STATE_FAILED)) {
5055 			i_mdi_client_unlock(ct);
5056 			MDI_PHCI_UNLOCK(ph);
5057 			(void) e_ddi_retire_notify(cdip, constraint);
5058 			MDI_PHCI_LOCK(ph);
5059 			pip = next;
5060 		} else {
5061 			i_mdi_client_unlock(ct);
5062 			pip = next;
5063 		}
5064 	}
5065 
5066 	MDI_PHCI_UNLOCK(ph);
5067 
5068 	return;
5069 }
5070 
5071 /*
5072  * offline the path(s) hanging off the PHCI. If the
5073  * last path to any client, check that constraints
5074  * have been applied.
5075  */
5076 void
5077 mdi_phci_retire_finalize(dev_info_t *dip, int phci_only)
5078 {
5079 	mdi_phci_t	*ph;
5080 	mdi_client_t	*ct;
5081 	mdi_pathinfo_t	*pip;
5082 	mdi_pathinfo_t	*next;
5083 	dev_info_t	*cdip;
5084 	int		unstable = 0;
5085 	int		constraint;
5086 
5087 	if (!MDI_PHCI(dip))
5088 		return;
5089 
5090 	ph = i_devi_get_phci(dip);
5091 	if (ph == NULL) {
5092 		/* no last path and no pips */
5093 		return;
5094 	}
5095 
5096 	MDI_PHCI_LOCK(ph);
5097 
5098 	if (MDI_PHCI_IS_OFFLINE(ph)) {
5099 		MDI_PHCI_UNLOCK(ph);
5100 		/* no last path and no pips */
5101 		return;
5102 	}
5103 
5104 	/*
5105 	 * Check to see if the pHCI can be offlined
5106 	 */
5107 	if (ph->ph_unstable) {
5108 		unstable = 1;
5109 	}
5110 
5111 	pip = ph->ph_path_head;
5112 	while (pip != NULL) {
5113 		MDI_PI_LOCK(pip);
5114 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5115 
5116 		/*
5117 		 * if failover in progress fail the pHCI from offlining
5118 		 */
5119 		ct = MDI_PI(pip)->pi_client;
5120 		i_mdi_client_lock(ct, pip);
5121 		if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
5122 		    (ct->ct_unstable)) {
5123 			unstable = 1;
5124 		}
5125 		MDI_PI_UNLOCK(pip);
5126 
5127 		/*
5128 		 * Check to see of we are removing the last path of this
5129 		 * client device...
5130 		 */
5131 		cdip = ct->ct_dip;
5132 		if (!phci_only && cdip &&
5133 		    (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
5134 		    (i_mdi_client_compute_state(ct, ph) ==
5135 		    MDI_CLIENT_STATE_FAILED)) {
5136 			i_mdi_client_unlock(ct);
5137 			MDI_PHCI_UNLOCK(ph);
5138 			/*
5139 			 * We don't retire clients we just retire the
5140 			 * path to a client. If it is the last path
5141 			 * to a client, constraints are checked and
5142 			 * if we pass the last path is offlined. MPXIO will
5143 			 * then fail all I/Os to the client. Since we don't
5144 			 * want to retire the client on a path error
5145 			 * set constraint = 0 so that the client dip
5146 			 * is not retired.
5147 			 */
5148 			constraint = 0;
5149 			(void) e_ddi_retire_finalize(cdip, &constraint);
5150 			MDI_PHCI_LOCK(ph);
5151 			pip = next;
5152 		} else {
5153 			i_mdi_client_unlock(ct);
5154 			pip = next;
5155 		}
5156 	}
5157 
5158 	/*
5159 	 * Cannot offline pip(s)
5160 	 */
5161 	if (unstable) {
5162 		cmn_err(CE_WARN, "PHCI in transient state, cannot "
5163 		    "retire, dip = %p", (void *)dip);
5164 		MDI_PHCI_UNLOCK(ph);
5165 		return;
5166 	}
5167 
5168 	/*
5169 	 * Mark the pHCI as offline
5170 	 */
5171 	MDI_PHCI_SET_OFFLINE(ph);
5172 
5173 	/*
5174 	 * Mark the child mdi_pathinfo nodes as transient
5175 	 */
5176 	pip = ph->ph_path_head;
5177 	while (pip != NULL) {
5178 		MDI_PI_LOCK(pip);
5179 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5180 		MDI_PI_SET_OFFLINING(pip);
5181 		MDI_PI_UNLOCK(pip);
5182 		pip = next;
5183 	}
5184 	MDI_PHCI_UNLOCK(ph);
5185 	/*
5186 	 * Give a chance for any pending commands to execute
5187 	 */
5188 	delay(1);
5189 	MDI_PHCI_LOCK(ph);
5190 	pip = ph->ph_path_head;
5191 	while (pip != NULL) {
5192 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5193 		(void) i_mdi_pi_offline(pip, 0);
5194 		MDI_PI_LOCK(pip);
5195 		ct = MDI_PI(pip)->pi_client;
5196 		if (!MDI_PI_IS_OFFLINE(pip)) {
5197 			cmn_err(CE_WARN, "PHCI busy, cannot offline path: "
5198 			    "PHCI dip = %p", (void *)dip);
5199 			MDI_PI_UNLOCK(pip);
5200 			MDI_PHCI_SET_ONLINE(ph);
5201 			MDI_PHCI_UNLOCK(ph);
5202 			return;
5203 		}
5204 		MDI_PI_UNLOCK(pip);
5205 		pip = next;
5206 	}
5207 	MDI_PHCI_UNLOCK(ph);
5208 
5209 	return;
5210 }
5211 
5212 void
5213 mdi_phci_unretire(dev_info_t *dip)
5214 {
5215 	ASSERT(MDI_PHCI(dip));
5216 
5217 	/*
5218 	 * Online the phci
5219 	 */
5220 	i_mdi_phci_online(dip);
5221 }
5222 
5223 /*ARGSUSED*/
5224 static int
5225 i_mdi_client_offline(dev_info_t *dip, uint_t flags)
5226 {
5227 	int		rv = NDI_SUCCESS;
5228 	mdi_client_t	*ct;
5229 
5230 	/*
5231 	 * Client component to go offline.  Make sure that we are
5232 	 * not in failing over state and update client state
5233 	 * accordingly
5234 	 */
5235 	ct = i_devi_get_client(dip);
5236 	MDI_DEBUG(2, (CE_NOTE, dip, "!i_mdi_client_offline called %p %p\n",
5237 	    (void *)dip, (void *)ct));
5238 	if (ct != NULL) {
5239 		MDI_CLIENT_LOCK(ct);
5240 		if (ct->ct_unstable) {
5241 			/*
5242 			 * One or more paths are in transient state,
5243 			 * Dont allow offline of a client device
5244 			 */
5245 			MDI_DEBUG(1, (CE_WARN, dip,
5246 			    "!One or more paths to this device is "
5247 			    "in transient state. This device can not "
5248 			    "be removed at this moment. "
5249 			    "Please try again later."));
5250 			MDI_CLIENT_UNLOCK(ct);
5251 			return (NDI_BUSY);
5252 		}
5253 		if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
5254 			/*
5255 			 * Failover is in progress, Dont allow DR of
5256 			 * a client device
5257 			 */
5258 			MDI_DEBUG(1, (CE_WARN, dip,
5259 			    "!Client device (%s%d) is Busy. %s",
5260 			    ddi_driver_name(dip), ddi_get_instance(dip),
5261 			    "This device can not be removed at "
5262 			    "this moment. Please try again later."));
5263 			MDI_CLIENT_UNLOCK(ct);
5264 			return (NDI_BUSY);
5265 		}
5266 		MDI_CLIENT_SET_OFFLINE(ct);
5267 
5268 		/*
5269 		 * Unbind our relationship with the dev_info node
5270 		 */
5271 		if (flags & NDI_DEVI_REMOVE) {
5272 			ct->ct_dip = NULL;
5273 		}
5274 		MDI_CLIENT_UNLOCK(ct);
5275 	}
5276 	return (rv);
5277 }
5278 
5279 /*
5280  * mdi_pre_attach():
5281  *		Pre attach() notification handler
5282  */
5283 /*ARGSUSED*/
5284 int
5285 mdi_pre_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
5286 {
5287 	/* don't support old DDI_PM_RESUME */
5288 	if ((DEVI(dip)->devi_mdi_component != MDI_COMPONENT_NONE) &&
5289 	    (cmd == DDI_PM_RESUME))
5290 		return (DDI_FAILURE);
5291 
5292 	return (DDI_SUCCESS);
5293 }
5294 
5295 /*
5296  * mdi_post_attach():
5297  *		Post attach() notification handler
5298  */
5299 /*ARGSUSED*/
5300 void
5301 mdi_post_attach(dev_info_t *dip, ddi_attach_cmd_t cmd, int error)
5302 {
5303 	mdi_phci_t	*ph;
5304 	mdi_client_t	*ct;
5305 	mdi_vhci_t	*vh;
5306 
5307 	if (MDI_PHCI(dip)) {
5308 		ph = i_devi_get_phci(dip);
5309 		ASSERT(ph != NULL);
5310 
5311 		MDI_PHCI_LOCK(ph);
5312 		switch (cmd) {
5313 		case DDI_ATTACH:
5314 			MDI_DEBUG(2, (CE_NOTE, dip,
5315 			    "!pHCI post_attach: called %p\n", (void *)ph));
5316 			if (error == DDI_SUCCESS) {
5317 				MDI_PHCI_SET_ATTACH(ph);
5318 			} else {
5319 				MDI_DEBUG(1, (CE_NOTE, dip,
5320 				    "!pHCI post_attach: failed error=%d\n",
5321 				    error));
5322 				MDI_PHCI_SET_DETACH(ph);
5323 			}
5324 			break;
5325 
5326 		case DDI_RESUME:
5327 			MDI_DEBUG(2, (CE_NOTE, dip,
5328 			    "!pHCI post_resume: called %p\n", (void *)ph));
5329 			if (error == DDI_SUCCESS) {
5330 				MDI_PHCI_SET_RESUME(ph);
5331 			} else {
5332 				MDI_DEBUG(1, (CE_NOTE, dip,
5333 				    "!pHCI post_resume: failed error=%d\n",
5334 				    error));
5335 				MDI_PHCI_SET_SUSPEND(ph);
5336 			}
5337 			break;
5338 		}
5339 		MDI_PHCI_UNLOCK(ph);
5340 	}
5341 
5342 	if (MDI_CLIENT(dip)) {
5343 		ct = i_devi_get_client(dip);
5344 		ASSERT(ct != NULL);
5345 
5346 		MDI_CLIENT_LOCK(ct);
5347 		switch (cmd) {
5348 		case DDI_ATTACH:
5349 			MDI_DEBUG(2, (CE_NOTE, dip,
5350 			    "!Client post_attach: called %p\n", (void *)ct));
5351 			if (error != DDI_SUCCESS) {
5352 				MDI_DEBUG(1, (CE_NOTE, dip,
5353 				    "!Client post_attach: failed error=%d\n",
5354 				    error));
5355 				MDI_CLIENT_SET_DETACH(ct);
5356 				MDI_DEBUG(4, (CE_WARN, dip,
5357 				    "mdi_post_attach i_mdi_pm_reset_client\n"));
5358 				i_mdi_pm_reset_client(ct);
5359 				break;
5360 			}
5361 
5362 			/*
5363 			 * Client device has successfully attached, inform
5364 			 * the vhci.
5365 			 */
5366 			vh = ct->ct_vhci;
5367 			if (vh->vh_ops->vo_client_attached)
5368 				(*vh->vh_ops->vo_client_attached)(dip);
5369 
5370 			MDI_CLIENT_SET_ATTACH(ct);
5371 			break;
5372 
5373 		case DDI_RESUME:
5374 			MDI_DEBUG(2, (CE_NOTE, dip,
5375 			    "!Client post_attach: called %p\n", (void *)ct));
5376 			if (error == DDI_SUCCESS) {
5377 				MDI_CLIENT_SET_RESUME(ct);
5378 			} else {
5379 				MDI_DEBUG(1, (CE_NOTE, dip,
5380 				    "!Client post_resume: failed error=%d\n",
5381 				    error));
5382 				MDI_CLIENT_SET_SUSPEND(ct);
5383 			}
5384 			break;
5385 		}
5386 		MDI_CLIENT_UNLOCK(ct);
5387 	}
5388 }
5389 
5390 /*
5391  * mdi_pre_detach():
5392  *		Pre detach notification handler
5393  */
5394 /*ARGSUSED*/
5395 int
5396 mdi_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
5397 {
5398 	int rv = DDI_SUCCESS;
5399 
5400 	if (MDI_CLIENT(dip)) {
5401 		(void) i_mdi_client_pre_detach(dip, cmd);
5402 	}
5403 
5404 	if (MDI_PHCI(dip)) {
5405 		rv = i_mdi_phci_pre_detach(dip, cmd);
5406 	}
5407 
5408 	return (rv);
5409 }
5410 
5411 /*ARGSUSED*/
5412 static int
5413 i_mdi_phci_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
5414 {
5415 	int		rv = DDI_SUCCESS;
5416 	mdi_phci_t	*ph;
5417 	mdi_client_t	*ct;
5418 	mdi_pathinfo_t	*pip;
5419 	mdi_pathinfo_t	*failed_pip = NULL;
5420 	mdi_pathinfo_t	*next;
5421 
5422 	ph = i_devi_get_phci(dip);
5423 	if (ph == NULL) {
5424 		return (rv);
5425 	}
5426 
5427 	MDI_PHCI_LOCK(ph);
5428 	switch (cmd) {
5429 	case DDI_DETACH:
5430 		MDI_DEBUG(2, (CE_NOTE, dip,
5431 		    "!pHCI pre_detach: called %p\n", (void *)ph));
5432 		if (!MDI_PHCI_IS_OFFLINE(ph)) {
5433 			/*
5434 			 * mdi_pathinfo nodes are still attached to
5435 			 * this pHCI. Fail the detach for this pHCI.
5436 			 */
5437 			MDI_DEBUG(2, (CE_WARN, dip,
5438 			    "!pHCI pre_detach: "
5439 			    "mdi_pathinfo nodes are still attached "
5440 			    "%p\n", (void *)ph));
5441 			rv = DDI_FAILURE;
5442 			break;
5443 		}
5444 		MDI_PHCI_SET_DETACH(ph);
5445 		break;
5446 
5447 	case DDI_SUSPEND:
5448 		/*
5449 		 * pHCI is getting suspended.  Since mpxio client
5450 		 * devices may not be suspended at this point, to avoid
5451 		 * a potential stack overflow, it is important to suspend
5452 		 * client devices before pHCI can be suspended.
5453 		 */
5454 
5455 		MDI_DEBUG(2, (CE_NOTE, dip,
5456 		    "!pHCI pre_suspend: called %p\n", (void *)ph));
5457 		/*
5458 		 * Suspend all the client devices accessible through this pHCI
5459 		 */
5460 		pip = ph->ph_path_head;
5461 		while (pip != NULL && rv == DDI_SUCCESS) {
5462 			dev_info_t *cdip;
5463 			MDI_PI_LOCK(pip);
5464 			next =
5465 			    (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5466 			ct = MDI_PI(pip)->pi_client;
5467 			i_mdi_client_lock(ct, pip);
5468 			cdip = ct->ct_dip;
5469 			MDI_PI_UNLOCK(pip);
5470 			if ((MDI_CLIENT_IS_DETACHED(ct) == 0) &&
5471 			    MDI_CLIENT_IS_SUSPENDED(ct) == 0) {
5472 				i_mdi_client_unlock(ct);
5473 				if ((rv = devi_detach(cdip, DDI_SUSPEND)) !=
5474 				    DDI_SUCCESS) {
5475 					/*
5476 					 * Suspend of one of the client
5477 					 * device has failed.
5478 					 */
5479 					MDI_DEBUG(1, (CE_WARN, dip,
5480 					    "!Suspend of device (%s%d) failed.",
5481 					    ddi_driver_name(cdip),
5482 					    ddi_get_instance(cdip)));
5483 					failed_pip = pip;
5484 					break;
5485 				}
5486 			} else {
5487 				i_mdi_client_unlock(ct);
5488 			}
5489 			pip = next;
5490 		}
5491 
5492 		if (rv == DDI_SUCCESS) {
5493 			/*
5494 			 * Suspend of client devices is complete. Proceed
5495 			 * with pHCI suspend.
5496 			 */
5497 			MDI_PHCI_SET_SUSPEND(ph);
5498 		} else {
5499 			/*
5500 			 * Revert back all the suspended client device states
5501 			 * to converse.
5502 			 */
5503 			pip = ph->ph_path_head;
5504 			while (pip != failed_pip) {
5505 				dev_info_t *cdip;
5506 				MDI_PI_LOCK(pip);
5507 				next =
5508 				    (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5509 				ct = MDI_PI(pip)->pi_client;
5510 				i_mdi_client_lock(ct, pip);
5511 				cdip = ct->ct_dip;
5512 				MDI_PI_UNLOCK(pip);
5513 				if (MDI_CLIENT_IS_SUSPENDED(ct)) {
5514 					i_mdi_client_unlock(ct);
5515 					(void) devi_attach(cdip, DDI_RESUME);
5516 				} else {
5517 					i_mdi_client_unlock(ct);
5518 				}
5519 				pip = next;
5520 			}
5521 		}
5522 		break;
5523 
5524 	default:
5525 		rv = DDI_FAILURE;
5526 		break;
5527 	}
5528 	MDI_PHCI_UNLOCK(ph);
5529 	return (rv);
5530 }
5531 
5532 /*ARGSUSED*/
5533 static int
5534 i_mdi_client_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
5535 {
5536 	int		rv = DDI_SUCCESS;
5537 	mdi_client_t	*ct;
5538 
5539 	ct = i_devi_get_client(dip);
5540 	if (ct == NULL) {
5541 		return (rv);
5542 	}
5543 
5544 	MDI_CLIENT_LOCK(ct);
5545 	switch (cmd) {
5546 	case DDI_DETACH:
5547 		MDI_DEBUG(2, (CE_NOTE, dip,
5548 		    "!Client pre_detach: called %p\n", (void *)ct));
5549 		MDI_CLIENT_SET_DETACH(ct);
5550 		break;
5551 
5552 	case DDI_SUSPEND:
5553 		MDI_DEBUG(2, (CE_NOTE, dip,
5554 		    "!Client pre_suspend: called %p\n", (void *)ct));
5555 		MDI_CLIENT_SET_SUSPEND(ct);
5556 		break;
5557 
5558 	default:
5559 		rv = DDI_FAILURE;
5560 		break;
5561 	}
5562 	MDI_CLIENT_UNLOCK(ct);
5563 	return (rv);
5564 }
5565 
5566 /*
5567  * mdi_post_detach():
5568  *		Post detach notification handler
5569  */
5570 /*ARGSUSED*/
5571 void
5572 mdi_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
5573 {
5574 	/*
5575 	 * Detach/Suspend of mpxio component failed. Update our state
5576 	 * too
5577 	 */
5578 	if (MDI_PHCI(dip))
5579 		i_mdi_phci_post_detach(dip, cmd, error);
5580 
5581 	if (MDI_CLIENT(dip))
5582 		i_mdi_client_post_detach(dip, cmd, error);
5583 }
5584 
5585 /*ARGSUSED*/
5586 static void
5587 i_mdi_phci_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
5588 {
5589 	mdi_phci_t	*ph;
5590 
5591 	/*
5592 	 * Detach/Suspend of phci component failed. Update our state
5593 	 * too
5594 	 */
5595 	ph = i_devi_get_phci(dip);
5596 	if (ph == NULL) {
5597 		return;
5598 	}
5599 
5600 	MDI_PHCI_LOCK(ph);
5601 	/*
5602 	 * Detach of pHCI failed. Restore back converse
5603 	 * state
5604 	 */
5605 	switch (cmd) {
5606 	case DDI_DETACH:
5607 		MDI_DEBUG(2, (CE_NOTE, dip,
5608 		    "!pHCI post_detach: called %p\n", (void *)ph));
5609 		if (error != DDI_SUCCESS)
5610 			MDI_PHCI_SET_ATTACH(ph);
5611 		break;
5612 
5613 	case DDI_SUSPEND:
5614 		MDI_DEBUG(2, (CE_NOTE, dip,
5615 		    "!pHCI post_suspend: called %p\n", (void *)ph));
5616 		if (error != DDI_SUCCESS)
5617 			MDI_PHCI_SET_RESUME(ph);
5618 		break;
5619 	}
5620 	MDI_PHCI_UNLOCK(ph);
5621 }
5622 
5623 /*ARGSUSED*/
5624 static void
5625 i_mdi_client_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
5626 {
5627 	mdi_client_t	*ct;
5628 
5629 	ct = i_devi_get_client(dip);
5630 	if (ct == NULL) {
5631 		return;
5632 	}
5633 	MDI_CLIENT_LOCK(ct);
5634 	/*
5635 	 * Detach of Client failed. Restore back converse
5636 	 * state
5637 	 */
5638 	switch (cmd) {
5639 	case DDI_DETACH:
5640 		MDI_DEBUG(2, (CE_NOTE, dip,
5641 		    "!Client post_detach: called %p\n", (void *)ct));
5642 		if (DEVI_IS_ATTACHING(ct->ct_dip)) {
5643 			MDI_DEBUG(4, (CE_NOTE, dip, "i_mdi_client_post_detach "
5644 			    "i_mdi_pm_rele_client\n"));
5645 			i_mdi_pm_rele_client(ct, ct->ct_path_count);
5646 		} else {
5647 			MDI_DEBUG(4, (CE_NOTE, dip, "i_mdi_client_post_detach "
5648 			    "i_mdi_pm_reset_client\n"));
5649 			i_mdi_pm_reset_client(ct);
5650 		}
5651 		if (error != DDI_SUCCESS)
5652 			MDI_CLIENT_SET_ATTACH(ct);
5653 		break;
5654 
5655 	case DDI_SUSPEND:
5656 		MDI_DEBUG(2, (CE_NOTE, dip,
5657 		    "!Client post_suspend: called %p\n", (void *)ct));
5658 		if (error != DDI_SUCCESS)
5659 			MDI_CLIENT_SET_RESUME(ct);
5660 		break;
5661 	}
5662 	MDI_CLIENT_UNLOCK(ct);
5663 }
5664 
5665 int
5666 mdi_pi_kstat_exists(mdi_pathinfo_t *pip)
5667 {
5668 	return (MDI_PI(pip)->pi_kstats ? 1 : 0);
5669 }
5670 
5671 /*
5672  * create and install per-path (client - pHCI) statistics
5673  * I/O stats supported: nread, nwritten, reads, and writes
5674  * Error stats - hard errors, soft errors, & transport errors
5675  */
5676 int
5677 mdi_pi_kstat_create(mdi_pathinfo_t *pip, char *ksname)
5678 {
5679 	kstat_t			*kiosp, *kerrsp;
5680 	struct pi_errs		*nsp;
5681 	struct mdi_pi_kstats	*mdi_statp;
5682 
5683 	if (MDI_PI(pip)->pi_kstats != NULL)
5684 		return (MDI_SUCCESS);
5685 
5686 	if ((kiosp = kstat_create("mdi", 0, ksname, "iopath",
5687 	    KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) == NULL) {
5688 		return (MDI_FAILURE);
5689 	}
5690 
5691 	(void) strcat(ksname, ",err");
5692 	kerrsp = kstat_create("mdi", 0, ksname, "iopath_errors",
5693 	    KSTAT_TYPE_NAMED,
5694 	    sizeof (struct pi_errs) / sizeof (kstat_named_t), 0);
5695 	if (kerrsp == NULL) {
5696 		kstat_delete(kiosp);
5697 		return (MDI_FAILURE);
5698 	}
5699 
5700 	nsp = (struct pi_errs *)kerrsp->ks_data;
5701 	kstat_named_init(&nsp->pi_softerrs, "Soft Errors", KSTAT_DATA_UINT32);
5702 	kstat_named_init(&nsp->pi_harderrs, "Hard Errors", KSTAT_DATA_UINT32);
5703 	kstat_named_init(&nsp->pi_transerrs, "Transport Errors",
5704 	    KSTAT_DATA_UINT32);
5705 	kstat_named_init(&nsp->pi_icnt_busy, "Interconnect Busy",
5706 	    KSTAT_DATA_UINT32);
5707 	kstat_named_init(&nsp->pi_icnt_errors, "Interconnect Errors",
5708 	    KSTAT_DATA_UINT32);
5709 	kstat_named_init(&nsp->pi_phci_rsrc, "pHCI No Resources",
5710 	    KSTAT_DATA_UINT32);
5711 	kstat_named_init(&nsp->pi_phci_localerr, "pHCI Local Errors",
5712 	    KSTAT_DATA_UINT32);
5713 	kstat_named_init(&nsp->pi_phci_invstate, "pHCI Invalid State",
5714 	    KSTAT_DATA_UINT32);
5715 	kstat_named_init(&nsp->pi_failedfrom, "Failed From",
5716 	    KSTAT_DATA_UINT32);
5717 	kstat_named_init(&nsp->pi_failedto, "Failed To", KSTAT_DATA_UINT32);
5718 
5719 	mdi_statp = kmem_alloc(sizeof (*mdi_statp), KM_SLEEP);
5720 	mdi_statp->pi_kstat_ref = 1;
5721 	mdi_statp->pi_kstat_iostats = kiosp;
5722 	mdi_statp->pi_kstat_errstats = kerrsp;
5723 	kstat_install(kiosp);
5724 	kstat_install(kerrsp);
5725 	MDI_PI(pip)->pi_kstats = mdi_statp;
5726 	return (MDI_SUCCESS);
5727 }
5728 
5729 /*
5730  * destroy per-path properties
5731  */
5732 static void
5733 i_mdi_pi_kstat_destroy(mdi_pathinfo_t *pip)
5734 {
5735 
5736 	struct mdi_pi_kstats *mdi_statp;
5737 
5738 	if (MDI_PI(pip)->pi_kstats == NULL)
5739 		return;
5740 	if ((mdi_statp = MDI_PI(pip)->pi_kstats) == NULL)
5741 		return;
5742 
5743 	MDI_PI(pip)->pi_kstats = NULL;
5744 
5745 	/*
5746 	 * the kstat may be shared between multiple pathinfo nodes
5747 	 * decrement this pathinfo's usage, removing the kstats
5748 	 * themselves when the last pathinfo reference is removed.
5749 	 */
5750 	ASSERT(mdi_statp->pi_kstat_ref > 0);
5751 	if (--mdi_statp->pi_kstat_ref != 0)
5752 		return;
5753 
5754 	kstat_delete(mdi_statp->pi_kstat_iostats);
5755 	kstat_delete(mdi_statp->pi_kstat_errstats);
5756 	kmem_free(mdi_statp, sizeof (*mdi_statp));
5757 }
5758 
5759 /*
5760  * update I/O paths KSTATS
5761  */
5762 void
5763 mdi_pi_kstat_iosupdate(mdi_pathinfo_t *pip, struct buf *bp)
5764 {
5765 	kstat_t *iostatp;
5766 	size_t xfer_cnt;
5767 
5768 	ASSERT(pip != NULL);
5769 
5770 	/*
5771 	 * I/O can be driven across a path prior to having path
5772 	 * statistics available, i.e. probe(9e).
5773 	 */
5774 	if (bp != NULL && MDI_PI(pip)->pi_kstats != NULL) {
5775 		iostatp = MDI_PI(pip)->pi_kstats->pi_kstat_iostats;
5776 		xfer_cnt = bp->b_bcount - bp->b_resid;
5777 		if (bp->b_flags & B_READ) {
5778 			KSTAT_IO_PTR(iostatp)->reads++;
5779 			KSTAT_IO_PTR(iostatp)->nread += xfer_cnt;
5780 		} else {
5781 			KSTAT_IO_PTR(iostatp)->writes++;
5782 			KSTAT_IO_PTR(iostatp)->nwritten += xfer_cnt;
5783 		}
5784 	}
5785 }
5786 
5787 /*
5788  * Enable the path(specific client/target/initiator)
5789  * Enabling a path means that MPxIO may select the enabled path for routing
5790  * future I/O requests, subject to other path state constraints.
5791  */
5792 int
5793 mdi_pi_enable_path(mdi_pathinfo_t *pip, int flags)
5794 {
5795 	mdi_phci_t	*ph;
5796 
5797 	ph = i_devi_get_phci(mdi_pi_get_phci(pip));
5798 	if (ph == NULL) {
5799 		MDI_DEBUG(1, (CE_NOTE, NULL, "!mdi_pi_enable_path:"
5800 			" failed. pip: %p ph = NULL\n", (void *)pip));
5801 		return (MDI_FAILURE);
5802 	}
5803 
5804 	(void) i_mdi_enable_disable_path(pip, ph->ph_vhci, flags,
5805 		MDI_ENABLE_OP);
5806 	MDI_DEBUG(5, (CE_NOTE, NULL, "!mdi_pi_enable_path:"
5807 		" Returning success pip = %p. ph = %p\n",
5808 		(void *)pip, (void *)ph));
5809 	return (MDI_SUCCESS);
5810 
5811 }
5812 
5813 /*
5814  * Disable the path (specific client/target/initiator)
5815  * Disabling a path means that MPxIO will not select the disabled path for
5816  * routing any new I/O requests.
5817  */
5818 int
5819 mdi_pi_disable_path(mdi_pathinfo_t *pip, int flags)
5820 {
5821 	mdi_phci_t	*ph;
5822 
5823 	ph = i_devi_get_phci(mdi_pi_get_phci(pip));
5824 	if (ph == NULL) {
5825 		MDI_DEBUG(1, (CE_NOTE, NULL, "!mdi_pi_disable_path:"
5826 			" failed. pip: %p ph = NULL\n", (void *)pip));
5827 		return (MDI_FAILURE);
5828 	}
5829 
5830 	(void) i_mdi_enable_disable_path(pip,
5831 			ph->ph_vhci, flags, MDI_DISABLE_OP);
5832 	MDI_DEBUG(5, (CE_NOTE, NULL, "!mdi_pi_disable_path:"
5833 		"Returning success pip = %p. ph = %p",
5834 		(void *)pip, (void *)ph));
5835 	return (MDI_SUCCESS);
5836 }
5837 
5838 /*
5839  * disable the path to a particular pHCI (pHCI specified in the phci_path
5840  * argument) for a particular client (specified in the client_path argument).
5841  * Disabling a path means that MPxIO will not select the disabled path for
5842  * routing any new I/O requests.
5843  * NOTE: this will be removed once the NWS files are changed to use the new
5844  * mdi_{enable,disable}_path interfaces
5845  */
5846 int
5847 mdi_pi_disable(dev_info_t *cdip, dev_info_t *pdip, int flags)
5848 {
5849 	return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_DISABLE_OP));
5850 }
5851 
5852 /*
5853  * Enable the path to a particular pHCI (pHCI specified in the phci_path
5854  * argument) for a particular client (specified in the client_path argument).
5855  * Enabling a path means that MPxIO may select the enabled path for routing
5856  * future I/O requests, subject to other path state constraints.
5857  * NOTE: this will be removed once the NWS files are changed to use the new
5858  * mdi_{enable,disable}_path interfaces
5859  */
5860 
5861 int
5862 mdi_pi_enable(dev_info_t *cdip, dev_info_t *pdip, int flags)
5863 {
5864 	return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_ENABLE_OP));
5865 }
5866 
5867 /*
5868  * Common routine for doing enable/disable.
5869  */
5870 static mdi_pathinfo_t *
5871 i_mdi_enable_disable_path(mdi_pathinfo_t *pip, mdi_vhci_t *vh, int flags,
5872 		int op)
5873 {
5874 	int		sync_flag = 0;
5875 	int		rv;
5876 	mdi_pathinfo_t 	*next;
5877 	int		(*f)() = NULL;
5878 
5879 	f = vh->vh_ops->vo_pi_state_change;
5880 
5881 	sync_flag = (flags << 8) & 0xf00;
5882 
5883 	/*
5884 	 * Do a callback into the mdi consumer to let it
5885 	 * know that path is about to get enabled/disabled.
5886 	 */
5887 	if (f != NULL) {
5888 		rv = (*f)(vh->vh_dip, pip, 0,
5889 			MDI_PI_EXT_STATE(pip),
5890 			MDI_EXT_STATE_CHANGE | sync_flag |
5891 			op | MDI_BEFORE_STATE_CHANGE);
5892 		if (rv != MDI_SUCCESS) {
5893 			MDI_DEBUG(2, (CE_WARN, vh->vh_dip,
5894 			"!vo_pi_state_change: failed rv = %x", rv));
5895 		}
5896 	}
5897 	MDI_PI_LOCK(pip);
5898 	next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5899 
5900 	switch (flags) {
5901 		case USER_DISABLE:
5902 			if (op == MDI_DISABLE_OP) {
5903 				MDI_PI_SET_USER_DISABLE(pip);
5904 			} else {
5905 				MDI_PI_SET_USER_ENABLE(pip);
5906 			}
5907 			break;
5908 		case DRIVER_DISABLE:
5909 			if (op == MDI_DISABLE_OP) {
5910 				MDI_PI_SET_DRV_DISABLE(pip);
5911 			} else {
5912 				MDI_PI_SET_DRV_ENABLE(pip);
5913 			}
5914 			break;
5915 		case DRIVER_DISABLE_TRANSIENT:
5916 			if (op == MDI_DISABLE_OP && rv == MDI_SUCCESS) {
5917 				MDI_PI_SET_DRV_DISABLE_TRANS(pip);
5918 			} else {
5919 				MDI_PI_SET_DRV_ENABLE_TRANS(pip);
5920 			}
5921 			break;
5922 	}
5923 	MDI_PI_UNLOCK(pip);
5924 	/*
5925 	 * Do a callback into the mdi consumer to let it
5926 	 * know that path is now enabled/disabled.
5927 	 */
5928 	if (f != NULL) {
5929 		rv = (*f)(vh->vh_dip, pip, 0,
5930 			MDI_PI_EXT_STATE(pip),
5931 			MDI_EXT_STATE_CHANGE | sync_flag |
5932 			op | MDI_AFTER_STATE_CHANGE);
5933 		if (rv != MDI_SUCCESS) {
5934 			MDI_DEBUG(2, (CE_WARN, vh->vh_dip,
5935 			"!vo_pi_state_change: failed rv = %x", rv));
5936 		}
5937 	}
5938 	return (next);
5939 }
5940 
5941 /*
5942  * Common routine for doing enable/disable.
5943  * NOTE: this will be removed once the NWS files are changed to use the new
5944  * mdi_{enable,disable}_path has been putback
5945  */
5946 int
5947 i_mdi_pi_enable_disable(dev_info_t *cdip, dev_info_t *pdip, int flags, int op)
5948 {
5949 
5950 	mdi_phci_t	*ph;
5951 	mdi_vhci_t	*vh = NULL;
5952 	mdi_client_t	*ct;
5953 	mdi_pathinfo_t	*next, *pip;
5954 	int		found_it;
5955 
5956 	ph = i_devi_get_phci(pdip);
5957 	MDI_DEBUG(5, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: "
5958 		"Op = %d pdip = %p cdip = %p\n", op, (void *)pdip,
5959 		(void *)cdip));
5960 	if (ph == NULL) {
5961 		MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:"
5962 			"Op %d failed. ph = NULL\n", op));
5963 		return (MDI_FAILURE);
5964 	}
5965 
5966 	if ((op != MDI_ENABLE_OP) && (op != MDI_DISABLE_OP)) {
5967 		MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: "
5968 			"Op Invalid operation = %d\n", op));
5969 		return (MDI_FAILURE);
5970 	}
5971 
5972 	vh = ph->ph_vhci;
5973 
5974 	if (cdip == NULL) {
5975 		/*
5976 		 * Need to mark the Phci as enabled/disabled.
5977 		 */
5978 		MDI_DEBUG(3, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: "
5979 		"Op %d for the phci\n", op));
5980 		MDI_PHCI_LOCK(ph);
5981 		switch (flags) {
5982 			case USER_DISABLE:
5983 				if (op == MDI_DISABLE_OP) {
5984 					MDI_PHCI_SET_USER_DISABLE(ph);
5985 				} else {
5986 					MDI_PHCI_SET_USER_ENABLE(ph);
5987 				}
5988 				break;
5989 			case DRIVER_DISABLE:
5990 				if (op == MDI_DISABLE_OP) {
5991 					MDI_PHCI_SET_DRV_DISABLE(ph);
5992 				} else {
5993 					MDI_PHCI_SET_DRV_ENABLE(ph);
5994 				}
5995 				break;
5996 			case DRIVER_DISABLE_TRANSIENT:
5997 				if (op == MDI_DISABLE_OP) {
5998 					MDI_PHCI_SET_DRV_DISABLE_TRANSIENT(ph);
5999 				} else {
6000 					MDI_PHCI_SET_DRV_ENABLE_TRANSIENT(ph);
6001 				}
6002 				break;
6003 			default:
6004 				MDI_PHCI_UNLOCK(ph);
6005 				MDI_DEBUG(1, (CE_NOTE, NULL,
6006 				"!i_mdi_pi_enable_disable:"
6007 				" Invalid flag argument= %d\n", flags));
6008 		}
6009 
6010 		/*
6011 		 * Phci has been disabled. Now try to enable/disable
6012 		 * path info's to each client.
6013 		 */
6014 		pip = ph->ph_path_head;
6015 		while (pip != NULL) {
6016 			pip = i_mdi_enable_disable_path(pip, vh, flags, op);
6017 		}
6018 		MDI_PHCI_UNLOCK(ph);
6019 	} else {
6020 
6021 		/*
6022 		 * Disable a specific client.
6023 		 */
6024 		ct = i_devi_get_client(cdip);
6025 		if (ct == NULL) {
6026 			MDI_DEBUG(1, (CE_NOTE, NULL,
6027 			"!i_mdi_pi_enable_disable:"
6028 			" failed. ct = NULL operation = %d\n", op));
6029 			return (MDI_FAILURE);
6030 		}
6031 
6032 		MDI_CLIENT_LOCK(ct);
6033 		pip = ct->ct_path_head;
6034 		found_it = 0;
6035 		while (pip != NULL) {
6036 			MDI_PI_LOCK(pip);
6037 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6038 			if (MDI_PI(pip)->pi_phci == ph) {
6039 				MDI_PI_UNLOCK(pip);
6040 				found_it = 1;
6041 				break;
6042 			}
6043 			MDI_PI_UNLOCK(pip);
6044 			pip = next;
6045 		}
6046 
6047 
6048 		MDI_CLIENT_UNLOCK(ct);
6049 		if (found_it == 0) {
6050 			MDI_DEBUG(1, (CE_NOTE, NULL,
6051 			"!i_mdi_pi_enable_disable:"
6052 			" failed. Could not find corresponding pip\n"));
6053 			return (MDI_FAILURE);
6054 		}
6055 
6056 		(void) i_mdi_enable_disable_path(pip, vh, flags, op);
6057 	}
6058 
6059 	MDI_DEBUG(5, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: "
6060 		"Op %d Returning success pdip = %p cdip = %p\n",
6061 		op, (void *)pdip, (void *)cdip));
6062 	return (MDI_SUCCESS);
6063 }
6064 
6065 /*
6066  * Ensure phci powered up
6067  */
6068 static void
6069 i_mdi_pm_hold_pip(mdi_pathinfo_t *pip)
6070 {
6071 	dev_info_t	*ph_dip;
6072 
6073 	ASSERT(pip != NULL);
6074 	ASSERT(MDI_PI_LOCKED(pip));
6075 
6076 	if (MDI_PI(pip)->pi_pm_held) {
6077 		return;
6078 	}
6079 
6080 	ph_dip = mdi_pi_get_phci(pip);
6081 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_pm_hold_pip for %s%d %p\n",
6082 	    ddi_get_name(ph_dip), ddi_get_instance(ph_dip), (void *)pip));
6083 	if (ph_dip == NULL) {
6084 		return;
6085 	}
6086 
6087 	MDI_PI_UNLOCK(pip);
6088 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt was %d\n",
6089 	    DEVI(ph_dip)->devi_pm_kidsupcnt));
6090 
6091 	pm_hold_power(ph_dip);
6092 
6093 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt is %d\n",
6094 	    DEVI(ph_dip)->devi_pm_kidsupcnt));
6095 	MDI_PI_LOCK(pip);
6096 
6097 	/* If PM_GET_PM_INFO is NULL the pm_hold_power above was a noop */
6098 	if (DEVI(ph_dip)->devi_pm_info)
6099 		MDI_PI(pip)->pi_pm_held = 1;
6100 }
6101 
6102 /*
6103  * Allow phci powered down
6104  */
6105 static void
6106 i_mdi_pm_rele_pip(mdi_pathinfo_t *pip)
6107 {
6108 	dev_info_t	*ph_dip = NULL;
6109 
6110 	ASSERT(pip != NULL);
6111 	ASSERT(MDI_PI_LOCKED(pip));
6112 
6113 	if (MDI_PI(pip)->pi_pm_held == 0) {
6114 		return;
6115 	}
6116 
6117 	ph_dip = mdi_pi_get_phci(pip);
6118 	ASSERT(ph_dip != NULL);
6119 
6120 	MDI_PI_UNLOCK(pip);
6121 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_pm_rele_pip for %s%d %p\n",
6122 	    ddi_get_name(ph_dip), ddi_get_instance(ph_dip), (void *)pip));
6123 
6124 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt was %d\n",
6125 	    DEVI(ph_dip)->devi_pm_kidsupcnt));
6126 	pm_rele_power(ph_dip);
6127 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt is %d\n",
6128 	    DEVI(ph_dip)->devi_pm_kidsupcnt));
6129 
6130 	MDI_PI_LOCK(pip);
6131 	MDI_PI(pip)->pi_pm_held = 0;
6132 }
6133 
6134 static void
6135 i_mdi_pm_hold_client(mdi_client_t *ct, int incr)
6136 {
6137 	ASSERT(MDI_CLIENT_LOCKED(ct));
6138 
6139 	ct->ct_power_cnt += incr;
6140 	MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_hold_client %p "
6141 	    "ct_power_cnt = %d incr = %d\n", (void *)ct,
6142 	    ct->ct_power_cnt, incr));
6143 	ASSERT(ct->ct_power_cnt >= 0);
6144 }
6145 
6146 static void
6147 i_mdi_rele_all_phci(mdi_client_t *ct)
6148 {
6149 	mdi_pathinfo_t  *pip;
6150 
6151 	ASSERT(MDI_CLIENT_LOCKED(ct));
6152 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
6153 	while (pip != NULL) {
6154 		mdi_hold_path(pip);
6155 		MDI_PI_LOCK(pip);
6156 		i_mdi_pm_rele_pip(pip);
6157 		MDI_PI_UNLOCK(pip);
6158 		mdi_rele_path(pip);
6159 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6160 	}
6161 }
6162 
6163 static void
6164 i_mdi_pm_rele_client(mdi_client_t *ct, int decr)
6165 {
6166 	ASSERT(MDI_CLIENT_LOCKED(ct));
6167 
6168 	if (i_ddi_devi_attached(ct->ct_dip)) {
6169 		ct->ct_power_cnt -= decr;
6170 		MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_rele_client %p "
6171 		    "ct_power_cnt = %d decr = %d\n",
6172 		    (void *)ct, ct->ct_power_cnt, decr));
6173 	}
6174 
6175 	ASSERT(ct->ct_power_cnt >= 0);
6176 	if (ct->ct_power_cnt == 0) {
6177 		i_mdi_rele_all_phci(ct);
6178 		return;
6179 	}
6180 }
6181 
6182 static void
6183 i_mdi_pm_reset_client(mdi_client_t *ct)
6184 {
6185 	MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_reset_client %p "
6186 	    "ct_power_cnt = %d\n", (void *)ct, ct->ct_power_cnt));
6187 	ASSERT(MDI_CLIENT_LOCKED(ct));
6188 	ct->ct_power_cnt = 0;
6189 	i_mdi_rele_all_phci(ct);
6190 	ct->ct_powercnt_config = 0;
6191 	ct->ct_powercnt_unconfig = 0;
6192 	ct->ct_powercnt_reset = 1;
6193 }
6194 
6195 static int
6196 i_mdi_power_one_phci(mdi_pathinfo_t *pip)
6197 {
6198 	int		ret;
6199 	dev_info_t	*ph_dip;
6200 
6201 	MDI_PI_LOCK(pip);
6202 	i_mdi_pm_hold_pip(pip);
6203 
6204 	ph_dip = mdi_pi_get_phci(pip);
6205 	MDI_PI_UNLOCK(pip);
6206 
6207 	/* bring all components of phci to full power */
6208 	MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_power_one_phci "
6209 	    "pm_powerup for %s%d %p\n", ddi_get_name(ph_dip),
6210 	    ddi_get_instance(ph_dip), (void *)pip));
6211 
6212 	ret = pm_powerup(ph_dip);
6213 
6214 	if (ret == DDI_FAILURE) {
6215 		MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_power_one_phci "
6216 		    "pm_powerup FAILED for %s%d %p\n",
6217 		    ddi_get_name(ph_dip), ddi_get_instance(ph_dip),
6218 		    (void *)pip));
6219 
6220 		MDI_PI_LOCK(pip);
6221 		i_mdi_pm_rele_pip(pip);
6222 		MDI_PI_UNLOCK(pip);
6223 		return (MDI_FAILURE);
6224 	}
6225 
6226 	return (MDI_SUCCESS);
6227 }
6228 
6229 static int
6230 i_mdi_power_all_phci(mdi_client_t *ct)
6231 {
6232 	mdi_pathinfo_t  *pip;
6233 	int		succeeded = 0;
6234 
6235 	ASSERT(MDI_CLIENT_LOCKED(ct));
6236 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
6237 	while (pip != NULL) {
6238 		/*
6239 		 * Don't power if MDI_PATHINFO_STATE_FAULT
6240 		 * or MDI_PATHINFO_STATE_OFFLINE.
6241 		 */
6242 		if (MDI_PI_IS_INIT(pip) ||
6243 		    MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) {
6244 			mdi_hold_path(pip);
6245 			MDI_CLIENT_UNLOCK(ct);
6246 			if (i_mdi_power_one_phci(pip) == MDI_SUCCESS)
6247 				succeeded = 1;
6248 
6249 			ASSERT(ct == MDI_PI(pip)->pi_client);
6250 			MDI_CLIENT_LOCK(ct);
6251 			mdi_rele_path(pip);
6252 		}
6253 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6254 	}
6255 
6256 	return (succeeded ? MDI_SUCCESS : MDI_FAILURE);
6257 }
6258 
6259 /*
6260  * mdi_bus_power():
6261  *		1. Place the phci(s) into powered up state so that
6262  *		   client can do power management
6263  *		2. Ensure phci powered up as client power managing
6264  * Return Values:
6265  *		MDI_SUCCESS
6266  *		MDI_FAILURE
6267  */
6268 int
6269 mdi_bus_power(dev_info_t *parent, void *impl_arg, pm_bus_power_op_t op,
6270     void *arg, void *result)
6271 {
6272 	int			ret = MDI_SUCCESS;
6273 	pm_bp_child_pwrchg_t	*bpc;
6274 	mdi_client_t		*ct;
6275 	dev_info_t		*cdip;
6276 	pm_bp_has_changed_t	*bphc;
6277 
6278 	/*
6279 	 * BUS_POWER_NOINVOL not supported
6280 	 */
6281 	if (op == BUS_POWER_NOINVOL)
6282 		return (MDI_FAILURE);
6283 
6284 	/*
6285 	 * ignore other OPs.
6286 	 * return quickly to save cou cycles on the ct processing
6287 	 */
6288 	switch (op) {
6289 	case BUS_POWER_PRE_NOTIFICATION:
6290 	case BUS_POWER_POST_NOTIFICATION:
6291 		bpc = (pm_bp_child_pwrchg_t *)arg;
6292 		cdip = bpc->bpc_dip;
6293 		break;
6294 	case BUS_POWER_HAS_CHANGED:
6295 		bphc = (pm_bp_has_changed_t *)arg;
6296 		cdip = bphc->bphc_dip;
6297 		break;
6298 	default:
6299 		return (pm_busop_bus_power(parent, impl_arg, op, arg, result));
6300 	}
6301 
6302 	ASSERT(MDI_CLIENT(cdip));
6303 
6304 	ct = i_devi_get_client(cdip);
6305 	if (ct == NULL)
6306 		return (MDI_FAILURE);
6307 
6308 	/*
6309 	 * wait till the mdi_pathinfo node state change are processed
6310 	 */
6311 	MDI_CLIENT_LOCK(ct);
6312 	switch (op) {
6313 	case BUS_POWER_PRE_NOTIFICATION:
6314 		MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, "mdi_bus_power "
6315 		    "BUS_POWER_PRE_NOTIFICATION:"
6316 		    "%s@%s, olevel=%d, nlevel=%d, comp=%d\n",
6317 		    PM_NAME(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip),
6318 		    bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp));
6319 
6320 		/* serialize power level change per client */
6321 		while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6322 			cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6323 
6324 		MDI_CLIENT_SET_POWER_TRANSITION(ct);
6325 
6326 		if (ct->ct_power_cnt == 0) {
6327 			ret = i_mdi_power_all_phci(ct);
6328 		}
6329 
6330 		/*
6331 		 * if new_level > 0:
6332 		 *	- hold phci(s)
6333 		 *	- power up phci(s) if not already
6334 		 * ignore power down
6335 		 */
6336 		if (bpc->bpc_nlevel > 0) {
6337 			if (!DEVI_IS_ATTACHING(ct->ct_dip)) {
6338 				MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip,
6339 				    "mdi_bus_power i_mdi_pm_hold_client\n"));
6340 				i_mdi_pm_hold_client(ct, ct->ct_path_count);
6341 			}
6342 		}
6343 		break;
6344 	case BUS_POWER_POST_NOTIFICATION:
6345 		MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, "mdi_bus_power "
6346 		    "BUS_POWER_POST_NOTIFICATION:"
6347 		    "%s@%s, olevel=%d, nlevel=%d, comp=%d result=%d\n",
6348 		    PM_NAME(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip),
6349 		    bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp,
6350 		    *(int *)result));
6351 
6352 		if (*(int *)result == DDI_SUCCESS) {
6353 			if (bpc->bpc_nlevel > 0) {
6354 				MDI_CLIENT_SET_POWER_UP(ct);
6355 			} else {
6356 				MDI_CLIENT_SET_POWER_DOWN(ct);
6357 			}
6358 		}
6359 
6360 		/* release the hold we did in pre-notification */
6361 		if (bpc->bpc_nlevel > 0 && (*(int *)result != DDI_SUCCESS) &&
6362 		    !DEVI_IS_ATTACHING(ct->ct_dip)) {
6363 			MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip,
6364 			    "mdi_bus_power i_mdi_pm_rele_client\n"));
6365 			i_mdi_pm_rele_client(ct, ct->ct_path_count);
6366 		}
6367 
6368 		if (bpc->bpc_nlevel == 0 && (*(int *)result == DDI_SUCCESS)) {
6369 			/* another thread might started attaching */
6370 			if (DEVI_IS_ATTACHING(ct->ct_dip)) {
6371 				MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip,
6372 				    "mdi_bus_power i_mdi_pm_rele_client\n"));
6373 				i_mdi_pm_rele_client(ct, ct->ct_path_count);
6374 			/* detaching has been taken care in pm_post_unconfig */
6375 			} else if (!DEVI_IS_DETACHING(ct->ct_dip)) {
6376 				MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip,
6377 				    "mdi_bus_power i_mdi_pm_reset_client\n"));
6378 				i_mdi_pm_reset_client(ct);
6379 			}
6380 		}
6381 
6382 		MDI_CLIENT_CLEAR_POWER_TRANSITION(ct);
6383 		cv_broadcast(&ct->ct_powerchange_cv);
6384 
6385 		break;
6386 
6387 	/* need to do more */
6388 	case BUS_POWER_HAS_CHANGED:
6389 		MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, "mdi_bus_power "
6390 		    "BUS_POWER_HAS_CHANGED:"
6391 		    "%s@%s, olevel=%d, nlevel=%d, comp=%d\n",
6392 		    PM_NAME(bphc->bphc_dip), PM_ADDR(bphc->bphc_dip),
6393 		    bphc->bphc_olevel, bphc->bphc_nlevel, bphc->bphc_comp));
6394 
6395 		if (bphc->bphc_nlevel > 0 &&
6396 		    bphc->bphc_nlevel > bphc->bphc_olevel) {
6397 			if (ct->ct_power_cnt == 0) {
6398 				ret = i_mdi_power_all_phci(ct);
6399 			}
6400 			MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip,
6401 			    "mdi_bus_power i_mdi_pm_hold_client\n"));
6402 			i_mdi_pm_hold_client(ct, ct->ct_path_count);
6403 		}
6404 
6405 		if (bphc->bphc_nlevel == 0 && bphc->bphc_olevel != -1) {
6406 			MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip,
6407 			    "mdi_bus_power i_mdi_pm_rele_client\n"));
6408 			i_mdi_pm_rele_client(ct, ct->ct_path_count);
6409 		}
6410 		break;
6411 	}
6412 
6413 	MDI_CLIENT_UNLOCK(ct);
6414 	return (ret);
6415 }
6416 
6417 static int
6418 i_mdi_pm_pre_config_one(dev_info_t *child)
6419 {
6420 	int		ret = MDI_SUCCESS;
6421 	mdi_client_t	*ct;
6422 
6423 	ct = i_devi_get_client(child);
6424 	if (ct == NULL)
6425 		return (MDI_FAILURE);
6426 
6427 	MDI_CLIENT_LOCK(ct);
6428 	while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6429 		cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6430 
6431 	if (!MDI_CLIENT_IS_FAILED(ct)) {
6432 		MDI_CLIENT_UNLOCK(ct);
6433 		MDI_DEBUG(4, (CE_NOTE, child,
6434 		    "i_mdi_pm_pre_config_one already configured\n"));
6435 		return (MDI_SUCCESS);
6436 	}
6437 
6438 	if (ct->ct_powercnt_config) {
6439 		MDI_CLIENT_UNLOCK(ct);
6440 		MDI_DEBUG(4, (CE_NOTE, child,
6441 		    "i_mdi_pm_pre_config_one ALREADY held\n"));
6442 		return (MDI_SUCCESS);
6443 	}
6444 
6445 	if (ct->ct_power_cnt == 0) {
6446 		ret = i_mdi_power_all_phci(ct);
6447 	}
6448 	MDI_DEBUG(4, (CE_NOTE, child,
6449 	    "i_mdi_pm_pre_config_one i_mdi_pm_hold_client\n"));
6450 	i_mdi_pm_hold_client(ct, ct->ct_path_count);
6451 	ct->ct_powercnt_config = 1;
6452 	ct->ct_powercnt_reset = 0;
6453 	MDI_CLIENT_UNLOCK(ct);
6454 	return (ret);
6455 }
6456 
6457 static int
6458 i_mdi_pm_pre_config(dev_info_t *vdip, dev_info_t *child)
6459 {
6460 	int			ret = MDI_SUCCESS;
6461 	dev_info_t		*cdip;
6462 	int			circ;
6463 
6464 	ASSERT(MDI_VHCI(vdip));
6465 
6466 	/* ndi_devi_config_one */
6467 	if (child) {
6468 		ASSERT(DEVI_BUSY_OWNED(vdip));
6469 		return (i_mdi_pm_pre_config_one(child));
6470 	}
6471 
6472 	/* devi_config_common */
6473 	ndi_devi_enter(vdip, &circ);
6474 	cdip = ddi_get_child(vdip);
6475 	while (cdip) {
6476 		dev_info_t *next = ddi_get_next_sibling(cdip);
6477 
6478 		ret = i_mdi_pm_pre_config_one(cdip);
6479 		if (ret != MDI_SUCCESS)
6480 			break;
6481 		cdip = next;
6482 	}
6483 	ndi_devi_exit(vdip, circ);
6484 	return (ret);
6485 }
6486 
6487 static int
6488 i_mdi_pm_pre_unconfig_one(dev_info_t *child, int *held, int flags)
6489 {
6490 	int		ret = MDI_SUCCESS;
6491 	mdi_client_t	*ct;
6492 
6493 	ct = i_devi_get_client(child);
6494 	if (ct == NULL)
6495 		return (MDI_FAILURE);
6496 
6497 	MDI_CLIENT_LOCK(ct);
6498 	while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6499 		cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6500 
6501 	if (!i_ddi_devi_attached(ct->ct_dip)) {
6502 		MDI_DEBUG(4, (CE_NOTE, child,
6503 		    "i_mdi_pm_pre_unconfig node detached already\n"));
6504 		MDI_CLIENT_UNLOCK(ct);
6505 		return (MDI_SUCCESS);
6506 	}
6507 
6508 	if (MDI_CLIENT_IS_POWERED_DOWN(ct) &&
6509 	    (flags & NDI_AUTODETACH)) {
6510 		MDI_DEBUG(4, (CE_NOTE, child,
6511 		    "i_mdi_pm_pre_unconfig auto-modunload\n"));
6512 		MDI_CLIENT_UNLOCK(ct);
6513 		return (MDI_FAILURE);
6514 	}
6515 
6516 	if (ct->ct_powercnt_unconfig) {
6517 		MDI_DEBUG(4, (CE_NOTE, child,
6518 		    "i_mdi_pm_pre_unconfig ct_powercnt_held\n"));
6519 		MDI_CLIENT_UNLOCK(ct);
6520 		*held = 1;
6521 		return (MDI_SUCCESS);
6522 	}
6523 
6524 	if (ct->ct_power_cnt == 0) {
6525 		ret = i_mdi_power_all_phci(ct);
6526 	}
6527 	MDI_DEBUG(4, (CE_NOTE, child,
6528 	    "i_mdi_pm_pre_unconfig i_mdi_pm_hold_client\n"));
6529 	i_mdi_pm_hold_client(ct, ct->ct_path_count);
6530 	ct->ct_powercnt_unconfig = 1;
6531 	ct->ct_powercnt_reset = 0;
6532 	MDI_CLIENT_UNLOCK(ct);
6533 	if (ret == MDI_SUCCESS)
6534 		*held = 1;
6535 	return (ret);
6536 }
6537 
6538 static int
6539 i_mdi_pm_pre_unconfig(dev_info_t *vdip, dev_info_t *child, int *held,
6540     int flags)
6541 {
6542 	int			ret = MDI_SUCCESS;
6543 	dev_info_t		*cdip;
6544 	int			circ;
6545 
6546 	ASSERT(MDI_VHCI(vdip));
6547 	*held = 0;
6548 
6549 	/* ndi_devi_unconfig_one */
6550 	if (child) {
6551 		ASSERT(DEVI_BUSY_OWNED(vdip));
6552 		return (i_mdi_pm_pre_unconfig_one(child, held, flags));
6553 	}
6554 
6555 	/* devi_unconfig_common */
6556 	ndi_devi_enter(vdip, &circ);
6557 	cdip = ddi_get_child(vdip);
6558 	while (cdip) {
6559 		dev_info_t *next = ddi_get_next_sibling(cdip);
6560 
6561 		ret = i_mdi_pm_pre_unconfig_one(cdip, held, flags);
6562 		cdip = next;
6563 	}
6564 	ndi_devi_exit(vdip, circ);
6565 
6566 	if (*held)
6567 		ret = MDI_SUCCESS;
6568 
6569 	return (ret);
6570 }
6571 
6572 static void
6573 i_mdi_pm_post_config_one(dev_info_t *child)
6574 {
6575 	mdi_client_t	*ct;
6576 
6577 	ct = i_devi_get_client(child);
6578 	if (ct == NULL)
6579 		return;
6580 
6581 	MDI_CLIENT_LOCK(ct);
6582 	while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6583 		cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6584 
6585 	if (ct->ct_powercnt_reset || !ct->ct_powercnt_config) {
6586 		MDI_DEBUG(4, (CE_NOTE, child,
6587 		    "i_mdi_pm_post_config_one NOT configured\n"));
6588 		MDI_CLIENT_UNLOCK(ct);
6589 		return;
6590 	}
6591 
6592 	/* client has not been updated */
6593 	if (MDI_CLIENT_IS_FAILED(ct)) {
6594 		MDI_DEBUG(4, (CE_NOTE, child,
6595 		    "i_mdi_pm_post_config_one NOT configured\n"));
6596 		MDI_CLIENT_UNLOCK(ct);
6597 		return;
6598 	}
6599 
6600 	/* another thread might have powered it down or detached it */
6601 	if ((MDI_CLIENT_IS_POWERED_DOWN(ct) &&
6602 	    !DEVI_IS_ATTACHING(ct->ct_dip)) ||
6603 	    (!i_ddi_devi_attached(ct->ct_dip) &&
6604 	    !DEVI_IS_ATTACHING(ct->ct_dip))) {
6605 		MDI_DEBUG(4, (CE_NOTE, child,
6606 		    "i_mdi_pm_post_config i_mdi_pm_reset_client\n"));
6607 		i_mdi_pm_reset_client(ct);
6608 	} else {
6609 		mdi_pathinfo_t  *pip, *next;
6610 		int	valid_path_count = 0;
6611 
6612 		MDI_DEBUG(4, (CE_NOTE, child,
6613 		    "i_mdi_pm_post_config i_mdi_pm_rele_client\n"));
6614 		pip = ct->ct_path_head;
6615 		while (pip != NULL) {
6616 			MDI_PI_LOCK(pip);
6617 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6618 			if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip))
6619 				valid_path_count ++;
6620 			MDI_PI_UNLOCK(pip);
6621 			pip = next;
6622 		}
6623 		i_mdi_pm_rele_client(ct, valid_path_count);
6624 	}
6625 	ct->ct_powercnt_config = 0;
6626 	MDI_CLIENT_UNLOCK(ct);
6627 }
6628 
6629 static void
6630 i_mdi_pm_post_config(dev_info_t *vdip, dev_info_t *child)
6631 {
6632 	int		circ;
6633 	dev_info_t	*cdip;
6634 
6635 	ASSERT(MDI_VHCI(vdip));
6636 
6637 	/* ndi_devi_config_one */
6638 	if (child) {
6639 		ASSERT(DEVI_BUSY_OWNED(vdip));
6640 		i_mdi_pm_post_config_one(child);
6641 		return;
6642 	}
6643 
6644 	/* devi_config_common */
6645 	ndi_devi_enter(vdip, &circ);
6646 	cdip = ddi_get_child(vdip);
6647 	while (cdip) {
6648 		dev_info_t *next = ddi_get_next_sibling(cdip);
6649 
6650 		i_mdi_pm_post_config_one(cdip);
6651 		cdip = next;
6652 	}
6653 	ndi_devi_exit(vdip, circ);
6654 }
6655 
6656 static void
6657 i_mdi_pm_post_unconfig_one(dev_info_t *child)
6658 {
6659 	mdi_client_t	*ct;
6660 
6661 	ct = i_devi_get_client(child);
6662 	if (ct == NULL)
6663 		return;
6664 
6665 	MDI_CLIENT_LOCK(ct);
6666 	while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6667 		cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6668 
6669 	if (!ct->ct_powercnt_unconfig || ct->ct_powercnt_reset) {
6670 		MDI_DEBUG(4, (CE_NOTE, child,
6671 		    "i_mdi_pm_post_unconfig NOT held\n"));
6672 		MDI_CLIENT_UNLOCK(ct);
6673 		return;
6674 	}
6675 
6676 	/* failure detaching or another thread just attached it */
6677 	if ((MDI_CLIENT_IS_POWERED_DOWN(ct) &&
6678 	    i_ddi_devi_attached(ct->ct_dip)) ||
6679 	    (!i_ddi_devi_attached(ct->ct_dip) &&
6680 	    !DEVI_IS_ATTACHING(ct->ct_dip))) {
6681 		MDI_DEBUG(4, (CE_NOTE, child,
6682 		    "i_mdi_pm_post_unconfig i_mdi_pm_reset_client\n"));
6683 		i_mdi_pm_reset_client(ct);
6684 	} else {
6685 		mdi_pathinfo_t  *pip, *next;
6686 		int	valid_path_count = 0;
6687 
6688 		MDI_DEBUG(4, (CE_NOTE, child,
6689 		    "i_mdi_pm_post_unconfig i_mdi_pm_rele_client\n"));
6690 		pip = ct->ct_path_head;
6691 		while (pip != NULL) {
6692 			MDI_PI_LOCK(pip);
6693 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6694 			if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip))
6695 				valid_path_count ++;
6696 			MDI_PI_UNLOCK(pip);
6697 			pip = next;
6698 		}
6699 		i_mdi_pm_rele_client(ct, valid_path_count);
6700 		ct->ct_powercnt_unconfig = 0;
6701 	}
6702 
6703 	MDI_CLIENT_UNLOCK(ct);
6704 }
6705 
6706 static void
6707 i_mdi_pm_post_unconfig(dev_info_t *vdip, dev_info_t *child, int held)
6708 {
6709 	int			circ;
6710 	dev_info_t		*cdip;
6711 
6712 	ASSERT(MDI_VHCI(vdip));
6713 
6714 	if (!held) {
6715 		MDI_DEBUG(4, (CE_NOTE, vdip,
6716 		    "i_mdi_pm_post_unconfig held = %d\n", held));
6717 		return;
6718 	}
6719 
6720 	if (child) {
6721 		ASSERT(DEVI_BUSY_OWNED(vdip));
6722 		i_mdi_pm_post_unconfig_one(child);
6723 		return;
6724 	}
6725 
6726 	ndi_devi_enter(vdip, &circ);
6727 	cdip = ddi_get_child(vdip);
6728 	while (cdip) {
6729 		dev_info_t *next = ddi_get_next_sibling(cdip);
6730 
6731 		i_mdi_pm_post_unconfig_one(cdip);
6732 		cdip = next;
6733 	}
6734 	ndi_devi_exit(vdip, circ);
6735 }
6736 
6737 int
6738 mdi_power(dev_info_t *vdip, mdi_pm_op_t op, void *args, char *devnm, int flags)
6739 {
6740 	int			circ, ret = MDI_SUCCESS;
6741 	dev_info_t		*client_dip = NULL;
6742 	mdi_client_t		*ct;
6743 
6744 	/*
6745 	 * Handling ndi_devi_config_one and ndi_devi_unconfig_one.
6746 	 * Power up pHCI for the named client device.
6747 	 * Note: Before the client is enumerated under vhci by phci,
6748 	 * client_dip can be NULL. Then proceed to power up all the
6749 	 * pHCIs.
6750 	 */
6751 	if (devnm != NULL) {
6752 		ndi_devi_enter(vdip, &circ);
6753 		client_dip = ndi_devi_findchild(vdip, devnm);
6754 	}
6755 
6756 	MDI_DEBUG(4, (CE_NOTE, vdip, "mdi_power op = %d %s %p\n",
6757 	    op, devnm ? devnm : "NULL", (void *)client_dip));
6758 
6759 	switch (op) {
6760 	case MDI_PM_PRE_CONFIG:
6761 		ret = i_mdi_pm_pre_config(vdip, client_dip);
6762 		break;
6763 
6764 	case MDI_PM_PRE_UNCONFIG:
6765 		ret = i_mdi_pm_pre_unconfig(vdip, client_dip, (int *)args,
6766 		    flags);
6767 		break;
6768 
6769 	case MDI_PM_POST_CONFIG:
6770 		i_mdi_pm_post_config(vdip, client_dip);
6771 		break;
6772 
6773 	case MDI_PM_POST_UNCONFIG:
6774 		i_mdi_pm_post_unconfig(vdip, client_dip, *(int *)args);
6775 		break;
6776 
6777 	case MDI_PM_HOLD_POWER:
6778 	case MDI_PM_RELE_POWER:
6779 		ASSERT(args);
6780 
6781 		client_dip = (dev_info_t *)args;
6782 		ASSERT(MDI_CLIENT(client_dip));
6783 
6784 		ct = i_devi_get_client(client_dip);
6785 		MDI_CLIENT_LOCK(ct);
6786 
6787 		if (op == MDI_PM_HOLD_POWER) {
6788 			if (ct->ct_power_cnt == 0) {
6789 				(void) i_mdi_power_all_phci(ct);
6790 				MDI_DEBUG(4, (CE_NOTE, client_dip,
6791 				    "mdi_power i_mdi_pm_hold_client\n"));
6792 				i_mdi_pm_hold_client(ct, ct->ct_path_count);
6793 			}
6794 		} else {
6795 			if (DEVI_IS_ATTACHING(ct->ct_dip)) {
6796 				MDI_DEBUG(4, (CE_NOTE, client_dip,
6797 				    "mdi_power i_mdi_pm_rele_client\n"));
6798 				i_mdi_pm_rele_client(ct, ct->ct_path_count);
6799 			} else {
6800 				MDI_DEBUG(4, (CE_NOTE, client_dip,
6801 				    "mdi_power i_mdi_pm_reset_client\n"));
6802 				i_mdi_pm_reset_client(ct);
6803 			}
6804 		}
6805 
6806 		MDI_CLIENT_UNLOCK(ct);
6807 		break;
6808 
6809 	default:
6810 		break;
6811 	}
6812 
6813 	if (devnm)
6814 		ndi_devi_exit(vdip, circ);
6815 
6816 	return (ret);
6817 }
6818 
6819 int
6820 mdi_component_is_vhci(dev_info_t *dip, const char **mdi_class)
6821 {
6822 	mdi_vhci_t *vhci;
6823 
6824 	if (!MDI_VHCI(dip))
6825 		return (MDI_FAILURE);
6826 
6827 	if (mdi_class) {
6828 		vhci = DEVI(dip)->devi_mdi_xhci;
6829 		ASSERT(vhci);
6830 		*mdi_class = vhci->vh_class;
6831 	}
6832 
6833 	return (MDI_SUCCESS);
6834 }
6835 
6836 int
6837 mdi_component_is_phci(dev_info_t *dip, const char **mdi_class)
6838 {
6839 	mdi_phci_t *phci;
6840 
6841 	if (!MDI_PHCI(dip))
6842 		return (MDI_FAILURE);
6843 
6844 	if (mdi_class) {
6845 		phci = DEVI(dip)->devi_mdi_xhci;
6846 		ASSERT(phci);
6847 		*mdi_class = phci->ph_vhci->vh_class;
6848 	}
6849 
6850 	return (MDI_SUCCESS);
6851 }
6852 
6853 int
6854 mdi_component_is_client(dev_info_t *dip, const char **mdi_class)
6855 {
6856 	mdi_client_t *client;
6857 
6858 	if (!MDI_CLIENT(dip))
6859 		return (MDI_FAILURE);
6860 
6861 	if (mdi_class) {
6862 		client = DEVI(dip)->devi_mdi_client;
6863 		ASSERT(client);
6864 		*mdi_class = client->ct_vhci->vh_class;
6865 	}
6866 
6867 	return (MDI_SUCCESS);
6868 }
6869 
6870 void *
6871 mdi_client_get_vhci_private(dev_info_t *dip)
6872 {
6873 	ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS);
6874 	if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) {
6875 		mdi_client_t	*ct;
6876 		ct = i_devi_get_client(dip);
6877 		return (ct->ct_vprivate);
6878 	}
6879 	return (NULL);
6880 }
6881 
6882 void
6883 mdi_client_set_vhci_private(dev_info_t *dip, void *data)
6884 {
6885 	ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS);
6886 	if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) {
6887 		mdi_client_t	*ct;
6888 		ct = i_devi_get_client(dip);
6889 		ct->ct_vprivate = data;
6890 	}
6891 }
6892 /*
6893  * mdi_pi_get_vhci_private():
6894  *		Get the vhci private information associated with the
6895  *		mdi_pathinfo node
6896  */
6897 void *
6898 mdi_pi_get_vhci_private(mdi_pathinfo_t *pip)
6899 {
6900 	caddr_t	vprivate = NULL;
6901 	if (pip) {
6902 		vprivate = MDI_PI(pip)->pi_vprivate;
6903 	}
6904 	return (vprivate);
6905 }
6906 
6907 /*
6908  * mdi_pi_set_vhci_private():
6909  *		Set the vhci private information in the mdi_pathinfo node
6910  */
6911 void
6912 mdi_pi_set_vhci_private(mdi_pathinfo_t *pip, void *priv)
6913 {
6914 	if (pip) {
6915 		MDI_PI(pip)->pi_vprivate = priv;
6916 	}
6917 }
6918 
6919 /*
6920  * mdi_phci_get_vhci_private():
6921  *		Get the vhci private information associated with the
6922  *		mdi_phci node
6923  */
6924 void *
6925 mdi_phci_get_vhci_private(dev_info_t *dip)
6926 {
6927 	ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS);
6928 	if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) {
6929 		mdi_phci_t	*ph;
6930 		ph = i_devi_get_phci(dip);
6931 		return (ph->ph_vprivate);
6932 	}
6933 	return (NULL);
6934 }
6935 
6936 /*
6937  * mdi_phci_set_vhci_private():
6938  *		Set the vhci private information in the mdi_phci node
6939  */
6940 void
6941 mdi_phci_set_vhci_private(dev_info_t *dip, void *priv)
6942 {
6943 	ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS);
6944 	if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) {
6945 		mdi_phci_t	*ph;
6946 		ph = i_devi_get_phci(dip);
6947 		ph->ph_vprivate = priv;
6948 	}
6949 }
6950 
6951 /*
6952  * List of vhci class names:
6953  * A vhci class name must be in this list only if the corresponding vhci
6954  * driver intends to use the mdi provided bus config implementation
6955  * (i.e., mdi_vhci_bus_config()).
6956  */
6957 static char *vhci_class_list[] = { MDI_HCI_CLASS_SCSI, MDI_HCI_CLASS_IB };
6958 #define	N_VHCI_CLASSES	(sizeof (vhci_class_list) / sizeof (char *))
6959 
6960 /*
6961  * During boot time, the on-disk vhci cache for every vhci class is read
6962  * in the form of an nvlist and stored here.
6963  */
6964 static nvlist_t *vhcache_nvl[N_VHCI_CLASSES];
6965 
6966 /* nvpair names in vhci cache nvlist */
6967 #define	MDI_VHCI_CACHE_VERSION	1
6968 #define	MDI_NVPNAME_VERSION	"version"
6969 #define	MDI_NVPNAME_PHCIS	"phcis"
6970 #define	MDI_NVPNAME_CTADDRMAP	"clientaddrmap"
6971 
6972 /*
6973  * Given vhci class name, return its on-disk vhci cache filename.
6974  * Memory for the returned filename which includes the full path is allocated
6975  * by this function.
6976  */
6977 static char *
6978 vhclass2vhcache_filename(char *vhclass)
6979 {
6980 	char *filename;
6981 	int len;
6982 	static char *fmt = "/etc/devices/mdi_%s_cache";
6983 
6984 	/*
6985 	 * fmt contains the on-disk vhci cache file name format;
6986 	 * for scsi_vhci the filename is "/etc/devices/mdi_scsi_vhci_cache".
6987 	 */
6988 
6989 	/* the -1 below is to account for "%s" in the format string */
6990 	len = strlen(fmt) + strlen(vhclass) - 1;
6991 	filename = kmem_alloc(len, KM_SLEEP);
6992 	(void) snprintf(filename, len, fmt, vhclass);
6993 	ASSERT(len == (strlen(filename) + 1));
6994 	return (filename);
6995 }
6996 
6997 /*
6998  * initialize the vhci cache related data structures and read the on-disk
6999  * vhci cached data into memory.
7000  */
7001 static void
7002 setup_vhci_cache(mdi_vhci_t *vh)
7003 {
7004 	mdi_vhci_config_t *vhc;
7005 	mdi_vhci_cache_t *vhcache;
7006 	int i;
7007 	nvlist_t *nvl = NULL;
7008 
7009 	vhc = kmem_zalloc(sizeof (mdi_vhci_config_t), KM_SLEEP);
7010 	vh->vh_config = vhc;
7011 	vhcache = &vhc->vhc_vhcache;
7012 
7013 	vhc->vhc_vhcache_filename = vhclass2vhcache_filename(vh->vh_class);
7014 
7015 	mutex_init(&vhc->vhc_lock, NULL, MUTEX_DEFAULT, NULL);
7016 	cv_init(&vhc->vhc_cv, NULL, CV_DRIVER, NULL);
7017 
7018 	rw_init(&vhcache->vhcache_lock, NULL, RW_DRIVER, NULL);
7019 
7020 	/*
7021 	 * Create string hash; same as mod_hash_create_strhash() except that
7022 	 * we use NULL key destructor.
7023 	 */
7024 	vhcache->vhcache_client_hash = mod_hash_create_extended(vh->vh_class,
7025 	    mdi_bus_config_cache_hash_size,
7026 	    mod_hash_null_keydtor, mod_hash_null_valdtor,
7027 	    mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
7028 
7029 	/*
7030 	 * The on-disk vhci cache is read during booting prior to the
7031 	 * lights-out period by mdi_read_devices_files().
7032 	 */
7033 	for (i = 0; i < N_VHCI_CLASSES; i++) {
7034 		if (strcmp(vhci_class_list[i], vh->vh_class) == 0) {
7035 			nvl = vhcache_nvl[i];
7036 			vhcache_nvl[i] = NULL;
7037 			break;
7038 		}
7039 	}
7040 
7041 	/*
7042 	 * this is to cover the case of some one manually causing unloading
7043 	 * (or detaching) and reloading (or attaching) of a vhci driver.
7044 	 */
7045 	if (nvl == NULL && modrootloaded)
7046 		nvl = read_on_disk_vhci_cache(vh->vh_class);
7047 
7048 	if (nvl != NULL) {
7049 		rw_enter(&vhcache->vhcache_lock, RW_WRITER);
7050 		if (mainnvl_to_vhcache(vhcache, nvl) == MDI_SUCCESS)
7051 			vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE;
7052 		else  {
7053 			cmn_err(CE_WARN,
7054 			    "%s: data file corrupted, will recreate\n",
7055 			    vhc->vhc_vhcache_filename);
7056 		}
7057 		rw_exit(&vhcache->vhcache_lock);
7058 		nvlist_free(nvl);
7059 	}
7060 
7061 	vhc->vhc_cbid = callb_add(stop_vhcache_flush_thread, vhc,
7062 	    CB_CL_UADMIN_PRE_VFS, "mdi_vhcache_flush");
7063 
7064 	vhc->vhc_path_discovery_boot = mdi_path_discovery_boot;
7065 	vhc->vhc_path_discovery_postboot = mdi_path_discovery_postboot;
7066 }
7067 
7068 /*
7069  * free all vhci cache related resources
7070  */
7071 static int
7072 destroy_vhci_cache(mdi_vhci_t *vh)
7073 {
7074 	mdi_vhci_config_t *vhc = vh->vh_config;
7075 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7076 	mdi_vhcache_phci_t *cphci, *cphci_next;
7077 	mdi_vhcache_client_t *cct, *cct_next;
7078 	mdi_vhcache_pathinfo_t *cpi, *cpi_next;
7079 
7080 	if (stop_vhcache_async_threads(vhc) != MDI_SUCCESS)
7081 		return (MDI_FAILURE);
7082 
7083 	kmem_free(vhc->vhc_vhcache_filename,
7084 	    strlen(vhc->vhc_vhcache_filename) + 1);
7085 
7086 	mod_hash_destroy_strhash(vhcache->vhcache_client_hash);
7087 
7088 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
7089 	    cphci = cphci_next) {
7090 		cphci_next = cphci->cphci_next;
7091 		free_vhcache_phci(cphci);
7092 	}
7093 
7094 	for (cct = vhcache->vhcache_client_head; cct != NULL; cct = cct_next) {
7095 		cct_next = cct->cct_next;
7096 		for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi_next) {
7097 			cpi_next = cpi->cpi_next;
7098 			free_vhcache_pathinfo(cpi);
7099 		}
7100 		free_vhcache_client(cct);
7101 	}
7102 
7103 	rw_destroy(&vhcache->vhcache_lock);
7104 
7105 	mutex_destroy(&vhc->vhc_lock);
7106 	cv_destroy(&vhc->vhc_cv);
7107 	kmem_free(vhc, sizeof (mdi_vhci_config_t));
7108 	return (MDI_SUCCESS);
7109 }
7110 
7111 /*
7112  * Stop all vhci cache related async threads and free their resources.
7113  */
7114 static int
7115 stop_vhcache_async_threads(mdi_vhci_config_t *vhc)
7116 {
7117 	mdi_async_client_config_t *acc, *acc_next;
7118 
7119 	mutex_enter(&vhc->vhc_lock);
7120 	vhc->vhc_flags |= MDI_VHC_EXIT;
7121 	ASSERT(vhc->vhc_acc_thrcount >= 0);
7122 	cv_broadcast(&vhc->vhc_cv);
7123 
7124 	while ((vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) ||
7125 	    vhc->vhc_acc_thrcount != 0) {
7126 		mutex_exit(&vhc->vhc_lock);
7127 		delay(1);
7128 		mutex_enter(&vhc->vhc_lock);
7129 	}
7130 
7131 	vhc->vhc_flags &= ~MDI_VHC_EXIT;
7132 
7133 	for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc_next) {
7134 		acc_next = acc->acc_next;
7135 		free_async_client_config(acc);
7136 	}
7137 	vhc->vhc_acc_list_head = NULL;
7138 	vhc->vhc_acc_list_tail = NULL;
7139 	vhc->vhc_acc_count = 0;
7140 
7141 	if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) {
7142 		vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
7143 		mutex_exit(&vhc->vhc_lock);
7144 		if (flush_vhcache(vhc, 0) != MDI_SUCCESS) {
7145 			vhcache_dirty(vhc);
7146 			return (MDI_FAILURE);
7147 		}
7148 	} else
7149 		mutex_exit(&vhc->vhc_lock);
7150 
7151 	if (callb_delete(vhc->vhc_cbid) != 0)
7152 		return (MDI_FAILURE);
7153 
7154 	return (MDI_SUCCESS);
7155 }
7156 
7157 /*
7158  * Stop vhci cache flush thread
7159  */
7160 /* ARGSUSED */
7161 static boolean_t
7162 stop_vhcache_flush_thread(void *arg, int code)
7163 {
7164 	mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
7165 
7166 	mutex_enter(&vhc->vhc_lock);
7167 	vhc->vhc_flags |= MDI_VHC_EXIT;
7168 	cv_broadcast(&vhc->vhc_cv);
7169 
7170 	while (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) {
7171 		mutex_exit(&vhc->vhc_lock);
7172 		delay(1);
7173 		mutex_enter(&vhc->vhc_lock);
7174 	}
7175 
7176 	if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) {
7177 		vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
7178 		mutex_exit(&vhc->vhc_lock);
7179 		(void) flush_vhcache(vhc, 1);
7180 	} else
7181 		mutex_exit(&vhc->vhc_lock);
7182 
7183 	return (B_TRUE);
7184 }
7185 
7186 /*
7187  * Enqueue the vhcache phci (cphci) at the tail of the list
7188  */
7189 static void
7190 enqueue_vhcache_phci(mdi_vhci_cache_t *vhcache, mdi_vhcache_phci_t *cphci)
7191 {
7192 	cphci->cphci_next = NULL;
7193 	if (vhcache->vhcache_phci_head == NULL)
7194 		vhcache->vhcache_phci_head = cphci;
7195 	else
7196 		vhcache->vhcache_phci_tail->cphci_next = cphci;
7197 	vhcache->vhcache_phci_tail = cphci;
7198 }
7199 
7200 /*
7201  * Enqueue the vhcache pathinfo (cpi) at the tail of the list
7202  */
7203 static void
7204 enqueue_tail_vhcache_pathinfo(mdi_vhcache_client_t *cct,
7205     mdi_vhcache_pathinfo_t *cpi)
7206 {
7207 	cpi->cpi_next = NULL;
7208 	if (cct->cct_cpi_head == NULL)
7209 		cct->cct_cpi_head = cpi;
7210 	else
7211 		cct->cct_cpi_tail->cpi_next = cpi;
7212 	cct->cct_cpi_tail = cpi;
7213 }
7214 
7215 /*
7216  * Enqueue the vhcache pathinfo (cpi) at the correct location in the
7217  * ordered list. All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST
7218  * flag set come at the beginning of the list. All cpis which have this
7219  * flag set come at the end of the list.
7220  */
7221 static void
7222 enqueue_vhcache_pathinfo(mdi_vhcache_client_t *cct,
7223     mdi_vhcache_pathinfo_t *newcpi)
7224 {
7225 	mdi_vhcache_pathinfo_t *cpi, *prev_cpi;
7226 
7227 	if (cct->cct_cpi_head == NULL ||
7228 	    (newcpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))
7229 		enqueue_tail_vhcache_pathinfo(cct, newcpi);
7230 	else {
7231 		for (cpi = cct->cct_cpi_head, prev_cpi = NULL; cpi != NULL &&
7232 		    !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST);
7233 		    prev_cpi = cpi, cpi = cpi->cpi_next)
7234 			;
7235 
7236 		if (prev_cpi == NULL)
7237 			cct->cct_cpi_head = newcpi;
7238 		else
7239 			prev_cpi->cpi_next = newcpi;
7240 
7241 		newcpi->cpi_next = cpi;
7242 
7243 		if (cpi == NULL)
7244 			cct->cct_cpi_tail = newcpi;
7245 	}
7246 }
7247 
7248 /*
7249  * Enqueue the vhcache client (cct) at the tail of the list
7250  */
7251 static void
7252 enqueue_vhcache_client(mdi_vhci_cache_t *vhcache,
7253     mdi_vhcache_client_t *cct)
7254 {
7255 	cct->cct_next = NULL;
7256 	if (vhcache->vhcache_client_head == NULL)
7257 		vhcache->vhcache_client_head = cct;
7258 	else
7259 		vhcache->vhcache_client_tail->cct_next = cct;
7260 	vhcache->vhcache_client_tail = cct;
7261 }
7262 
7263 static void
7264 free_string_array(char **str, int nelem)
7265 {
7266 	int i;
7267 
7268 	if (str) {
7269 		for (i = 0; i < nelem; i++) {
7270 			if (str[i])
7271 				kmem_free(str[i], strlen(str[i]) + 1);
7272 		}
7273 		kmem_free(str, sizeof (char *) * nelem);
7274 	}
7275 }
7276 
7277 static void
7278 free_vhcache_phci(mdi_vhcache_phci_t *cphci)
7279 {
7280 	kmem_free(cphci->cphci_path, strlen(cphci->cphci_path) + 1);
7281 	kmem_free(cphci, sizeof (*cphci));
7282 }
7283 
7284 static void
7285 free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *cpi)
7286 {
7287 	kmem_free(cpi->cpi_addr, strlen(cpi->cpi_addr) + 1);
7288 	kmem_free(cpi, sizeof (*cpi));
7289 }
7290 
7291 static void
7292 free_vhcache_client(mdi_vhcache_client_t *cct)
7293 {
7294 	kmem_free(cct->cct_name_addr, strlen(cct->cct_name_addr) + 1);
7295 	kmem_free(cct, sizeof (*cct));
7296 }
7297 
7298 static char *
7299 vhcache_mknameaddr(char *ct_name, char *ct_addr, int *ret_len)
7300 {
7301 	char *name_addr;
7302 	int len;
7303 
7304 	len = strlen(ct_name) + strlen(ct_addr) + 2;
7305 	name_addr = kmem_alloc(len, KM_SLEEP);
7306 	(void) snprintf(name_addr, len, "%s@%s", ct_name, ct_addr);
7307 
7308 	if (ret_len)
7309 		*ret_len = len;
7310 	return (name_addr);
7311 }
7312 
7313 /*
7314  * Copy the contents of paddrnvl to vhci cache.
7315  * paddrnvl nvlist contains path information for a vhci client.
7316  * See the comment in mainnvl_to_vhcache() for the format of this nvlist.
7317  */
7318 static void
7319 paddrnvl_to_vhcache(nvlist_t *nvl, mdi_vhcache_phci_t *cphci_list[],
7320     mdi_vhcache_client_t *cct)
7321 {
7322 	nvpair_t *nvp = NULL;
7323 	mdi_vhcache_pathinfo_t *cpi;
7324 	uint_t nelem;
7325 	uint32_t *val;
7326 
7327 	while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
7328 		ASSERT(nvpair_type(nvp) == DATA_TYPE_UINT32_ARRAY);
7329 		cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP);
7330 		cpi->cpi_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP);
7331 		(void) nvpair_value_uint32_array(nvp, &val, &nelem);
7332 		ASSERT(nelem == 2);
7333 		cpi->cpi_cphci = cphci_list[val[0]];
7334 		cpi->cpi_flags = val[1];
7335 		enqueue_tail_vhcache_pathinfo(cct, cpi);
7336 	}
7337 }
7338 
7339 /*
7340  * Copy the contents of caddrmapnvl to vhci cache.
7341  * caddrmapnvl nvlist contains vhci client address to phci client address
7342  * mappings. See the comment in mainnvl_to_vhcache() for the format of
7343  * this nvlist.
7344  */
7345 static void
7346 caddrmapnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl,
7347     mdi_vhcache_phci_t *cphci_list[])
7348 {
7349 	nvpair_t *nvp = NULL;
7350 	nvlist_t *paddrnvl;
7351 	mdi_vhcache_client_t *cct;
7352 
7353 	while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
7354 		ASSERT(nvpair_type(nvp) == DATA_TYPE_NVLIST);
7355 		cct = kmem_zalloc(sizeof (*cct), KM_SLEEP);
7356 		cct->cct_name_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP);
7357 		(void) nvpair_value_nvlist(nvp, &paddrnvl);
7358 		paddrnvl_to_vhcache(paddrnvl, cphci_list, cct);
7359 		/* the client must contain at least one path */
7360 		ASSERT(cct->cct_cpi_head != NULL);
7361 
7362 		enqueue_vhcache_client(vhcache, cct);
7363 		(void) mod_hash_insert(vhcache->vhcache_client_hash,
7364 		    (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct);
7365 	}
7366 }
7367 
7368 /*
7369  * Copy the contents of the main nvlist to vhci cache.
7370  *
7371  * VHCI busconfig cached data is stored in the form of a nvlist on the disk.
7372  * The nvlist contains the mappings between the vhci client addresses and
7373  * their corresponding phci client addresses.
7374  *
7375  * The structure of the nvlist is as follows:
7376  *
7377  * Main nvlist:
7378  *	NAME		TYPE		DATA
7379  *	version		int32		version number
7380  *	phcis		string array	array of phci paths
7381  *	clientaddrmap	nvlist_t	c2paddrs_nvl (see below)
7382  *
7383  * structure of c2paddrs_nvl:
7384  *	NAME		TYPE		DATA
7385  *	caddr1		nvlist_t	paddrs_nvl1
7386  *	caddr2		nvlist_t	paddrs_nvl2
7387  *	...
7388  * where caddr1, caddr2, ... are vhci client name and addresses in the
7389  * form of "<clientname>@<clientaddress>".
7390  * (for example: "ssd@2000002037cd9f72");
7391  * paddrs_nvl1, paddrs_nvl2, .. are nvlists that contain path information.
7392  *
7393  * structure of paddrs_nvl:
7394  *	NAME		TYPE		DATA
7395  *	pi_addr1	uint32_array	(phci-id, cpi_flags)
7396  *	pi_addr2	uint32_array	(phci-id, cpi_flags)
7397  *	...
7398  * where pi_addr1, pi_addr2, ... are bus specific addresses of pathinfo nodes
7399  * (so called pi_addrs, for example: "w2100002037cd9f72,0");
7400  * phci-ids are integers that identify PHCIs to which the
7401  * the bus specific address belongs to. These integers are used as an index
7402  * into to the phcis string array in the main nvlist to get the PHCI path.
7403  */
7404 static int
7405 mainnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl)
7406 {
7407 	char **phcis, **phci_namep;
7408 	uint_t nphcis;
7409 	mdi_vhcache_phci_t *cphci, **cphci_list;
7410 	nvlist_t *caddrmapnvl;
7411 	int32_t ver;
7412 	int i;
7413 	size_t cphci_list_size;
7414 
7415 	ASSERT(RW_WRITE_HELD(&vhcache->vhcache_lock));
7416 
7417 	if (nvlist_lookup_int32(nvl, MDI_NVPNAME_VERSION, &ver) != 0 ||
7418 	    ver != MDI_VHCI_CACHE_VERSION)
7419 		return (MDI_FAILURE);
7420 
7421 	if (nvlist_lookup_string_array(nvl, MDI_NVPNAME_PHCIS, &phcis,
7422 	    &nphcis) != 0)
7423 		return (MDI_SUCCESS);
7424 
7425 	ASSERT(nphcis > 0);
7426 
7427 	cphci_list_size = sizeof (mdi_vhcache_phci_t *) * nphcis;
7428 	cphci_list = kmem_alloc(cphci_list_size, KM_SLEEP);
7429 	for (i = 0, phci_namep = phcis; i < nphcis; i++, phci_namep++) {
7430 		cphci = kmem_zalloc(sizeof (mdi_vhcache_phci_t), KM_SLEEP);
7431 		cphci->cphci_path = i_ddi_strdup(*phci_namep, KM_SLEEP);
7432 		enqueue_vhcache_phci(vhcache, cphci);
7433 		cphci_list[i] = cphci;
7434 	}
7435 
7436 	ASSERT(vhcache->vhcache_phci_head != NULL);
7437 
7438 	if (nvlist_lookup_nvlist(nvl, MDI_NVPNAME_CTADDRMAP, &caddrmapnvl) == 0)
7439 		caddrmapnvl_to_vhcache(vhcache, caddrmapnvl, cphci_list);
7440 
7441 	kmem_free(cphci_list, cphci_list_size);
7442 	return (MDI_SUCCESS);
7443 }
7444 
7445 /*
7446  * Build paddrnvl for the specified client using the information in the
7447  * vhci cache and add it to the caddrmapnnvl.
7448  * Returns 0 on success, errno on failure.
7449  */
7450 static int
7451 vhcache_to_paddrnvl(mdi_vhci_cache_t *vhcache, mdi_vhcache_client_t *cct,
7452     nvlist_t *caddrmapnvl)
7453 {
7454 	mdi_vhcache_pathinfo_t *cpi;
7455 	nvlist_t *nvl;
7456 	int err;
7457 	uint32_t val[2];
7458 
7459 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7460 
7461 	if ((err = nvlist_alloc(&nvl, 0, KM_SLEEP)) != 0)
7462 		return (err);
7463 
7464 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
7465 		val[0] = cpi->cpi_cphci->cphci_id;
7466 		val[1] = cpi->cpi_flags;
7467 		if ((err = nvlist_add_uint32_array(nvl, cpi->cpi_addr, val, 2))
7468 		    != 0)
7469 			goto out;
7470 	}
7471 
7472 	err = nvlist_add_nvlist(caddrmapnvl, cct->cct_name_addr, nvl);
7473 out:
7474 	nvlist_free(nvl);
7475 	return (err);
7476 }
7477 
7478 /*
7479  * Build caddrmapnvl using the information in the vhci cache
7480  * and add it to the mainnvl.
7481  * Returns 0 on success, errno on failure.
7482  */
7483 static int
7484 vhcache_to_caddrmapnvl(mdi_vhci_cache_t *vhcache, nvlist_t *mainnvl)
7485 {
7486 	mdi_vhcache_client_t *cct;
7487 	nvlist_t *nvl;
7488 	int err;
7489 
7490 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7491 
7492 	if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0)
7493 		return (err);
7494 
7495 	for (cct = vhcache->vhcache_client_head; cct != NULL;
7496 	    cct = cct->cct_next) {
7497 		if ((err = vhcache_to_paddrnvl(vhcache, cct, nvl)) != 0)
7498 			goto out;
7499 	}
7500 
7501 	err = nvlist_add_nvlist(mainnvl, MDI_NVPNAME_CTADDRMAP, nvl);
7502 out:
7503 	nvlist_free(nvl);
7504 	return (err);
7505 }
7506 
7507 /*
7508  * Build nvlist using the information in the vhci cache.
7509  * See the comment in mainnvl_to_vhcache() for the format of the nvlist.
7510  * Returns nvl on success, NULL on failure.
7511  */
7512 static nvlist_t *
7513 vhcache_to_mainnvl(mdi_vhci_cache_t *vhcache)
7514 {
7515 	mdi_vhcache_phci_t *cphci;
7516 	uint_t phci_count;
7517 	char **phcis;
7518 	nvlist_t *nvl;
7519 	int err, i;
7520 
7521 	if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) {
7522 		nvl = NULL;
7523 		goto out;
7524 	}
7525 
7526 	if ((err = nvlist_add_int32(nvl, MDI_NVPNAME_VERSION,
7527 	    MDI_VHCI_CACHE_VERSION)) != 0)
7528 		goto out;
7529 
7530 	rw_enter(&vhcache->vhcache_lock, RW_READER);
7531 	if (vhcache->vhcache_phci_head == NULL) {
7532 		rw_exit(&vhcache->vhcache_lock);
7533 		return (nvl);
7534 	}
7535 
7536 	phci_count = 0;
7537 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
7538 	    cphci = cphci->cphci_next)
7539 		cphci->cphci_id = phci_count++;
7540 
7541 	/* build phci pathname list */
7542 	phcis = kmem_alloc(sizeof (char *) * phci_count, KM_SLEEP);
7543 	for (cphci = vhcache->vhcache_phci_head, i = 0; cphci != NULL;
7544 	    cphci = cphci->cphci_next, i++)
7545 		phcis[i] = i_ddi_strdup(cphci->cphci_path, KM_SLEEP);
7546 
7547 	err = nvlist_add_string_array(nvl, MDI_NVPNAME_PHCIS, phcis,
7548 	    phci_count);
7549 	free_string_array(phcis, phci_count);
7550 
7551 	if (err == 0 &&
7552 	    (err = vhcache_to_caddrmapnvl(vhcache, nvl)) == 0) {
7553 		rw_exit(&vhcache->vhcache_lock);
7554 		return (nvl);
7555 	}
7556 
7557 	rw_exit(&vhcache->vhcache_lock);
7558 out:
7559 	if (nvl)
7560 		nvlist_free(nvl);
7561 	return (NULL);
7562 }
7563 
7564 /*
7565  * Lookup vhcache phci structure for the specified phci path.
7566  */
7567 static mdi_vhcache_phci_t *
7568 lookup_vhcache_phci_by_name(mdi_vhci_cache_t *vhcache, char *phci_path)
7569 {
7570 	mdi_vhcache_phci_t *cphci;
7571 
7572 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7573 
7574 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
7575 	    cphci = cphci->cphci_next) {
7576 		if (strcmp(cphci->cphci_path, phci_path) == 0)
7577 			return (cphci);
7578 	}
7579 
7580 	return (NULL);
7581 }
7582 
7583 /*
7584  * Lookup vhcache phci structure for the specified phci.
7585  */
7586 static mdi_vhcache_phci_t *
7587 lookup_vhcache_phci_by_addr(mdi_vhci_cache_t *vhcache, mdi_phci_t *ph)
7588 {
7589 	mdi_vhcache_phci_t *cphci;
7590 
7591 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7592 
7593 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
7594 	    cphci = cphci->cphci_next) {
7595 		if (cphci->cphci_phci == ph)
7596 			return (cphci);
7597 	}
7598 
7599 	return (NULL);
7600 }
7601 
7602 /*
7603  * Add the specified phci to the vhci cache if not already present.
7604  */
7605 static void
7606 vhcache_phci_add(mdi_vhci_config_t *vhc, mdi_phci_t *ph)
7607 {
7608 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7609 	mdi_vhcache_phci_t *cphci;
7610 	char *pathname;
7611 	int cache_updated;
7612 
7613 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
7614 
7615 	pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
7616 	(void) ddi_pathname(ph->ph_dip, pathname);
7617 	if ((cphci = lookup_vhcache_phci_by_name(vhcache, pathname))
7618 	    != NULL) {
7619 		cphci->cphci_phci = ph;
7620 		cache_updated = 0;
7621 	} else {
7622 		cphci = kmem_zalloc(sizeof (*cphci), KM_SLEEP);
7623 		cphci->cphci_path = i_ddi_strdup(pathname, KM_SLEEP);
7624 		cphci->cphci_phci = ph;
7625 		enqueue_vhcache_phci(vhcache, cphci);
7626 		cache_updated = 1;
7627 	}
7628 
7629 	rw_exit(&vhcache->vhcache_lock);
7630 
7631 	/*
7632 	 * Since a new phci has been added, reset
7633 	 * vhc_path_discovery_cutoff_time to allow for discovery of paths
7634 	 * during next vhcache_discover_paths().
7635 	 */
7636 	mutex_enter(&vhc->vhc_lock);
7637 	vhc->vhc_path_discovery_cutoff_time = 0;
7638 	mutex_exit(&vhc->vhc_lock);
7639 
7640 	kmem_free(pathname, MAXPATHLEN);
7641 	if (cache_updated)
7642 		vhcache_dirty(vhc);
7643 }
7644 
7645 /*
7646  * Remove the reference to the specified phci from the vhci cache.
7647  */
7648 static void
7649 vhcache_phci_remove(mdi_vhci_config_t *vhc, mdi_phci_t *ph)
7650 {
7651 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7652 	mdi_vhcache_phci_t *cphci;
7653 
7654 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
7655 	if ((cphci = lookup_vhcache_phci_by_addr(vhcache, ph)) != NULL) {
7656 		/* do not remove the actual mdi_vhcache_phci structure */
7657 		cphci->cphci_phci = NULL;
7658 	}
7659 	rw_exit(&vhcache->vhcache_lock);
7660 }
7661 
7662 static void
7663 init_vhcache_lookup_token(mdi_vhcache_lookup_token_t *dst,
7664     mdi_vhcache_lookup_token_t *src)
7665 {
7666 	if (src == NULL) {
7667 		dst->lt_cct = NULL;
7668 		dst->lt_cct_lookup_time = 0;
7669 	} else {
7670 		dst->lt_cct = src->lt_cct;
7671 		dst->lt_cct_lookup_time = src->lt_cct_lookup_time;
7672 	}
7673 }
7674 
7675 /*
7676  * Look up vhcache client for the specified client.
7677  */
7678 static mdi_vhcache_client_t *
7679 lookup_vhcache_client(mdi_vhci_cache_t *vhcache, char *ct_name, char *ct_addr,
7680     mdi_vhcache_lookup_token_t *token)
7681 {
7682 	mod_hash_val_t hv;
7683 	char *name_addr;
7684 	int len;
7685 
7686 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7687 
7688 	/*
7689 	 * If no vhcache clean occurred since the last lookup, we can
7690 	 * simply return the cct from the last lookup operation.
7691 	 * It works because ccts are never freed except during the vhcache
7692 	 * cleanup operation.
7693 	 */
7694 	if (token != NULL &&
7695 	    vhcache->vhcache_clean_time < token->lt_cct_lookup_time)
7696 		return (token->lt_cct);
7697 
7698 	name_addr = vhcache_mknameaddr(ct_name, ct_addr, &len);
7699 	if (mod_hash_find(vhcache->vhcache_client_hash,
7700 	    (mod_hash_key_t)name_addr, &hv) == 0) {
7701 		if (token) {
7702 			token->lt_cct = (mdi_vhcache_client_t *)hv;
7703 			token->lt_cct_lookup_time = lbolt64;
7704 		}
7705 	} else {
7706 		if (token) {
7707 			token->lt_cct = NULL;
7708 			token->lt_cct_lookup_time = 0;
7709 		}
7710 		hv = NULL;
7711 	}
7712 	kmem_free(name_addr, len);
7713 	return ((mdi_vhcache_client_t *)hv);
7714 }
7715 
7716 /*
7717  * Add the specified path to the vhci cache if not already present.
7718  * Also add the vhcache client for the client corresponding to this path
7719  * if it doesn't already exist.
7720  */
7721 static void
7722 vhcache_pi_add(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip)
7723 {
7724 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7725 	mdi_vhcache_client_t *cct;
7726 	mdi_vhcache_pathinfo_t *cpi;
7727 	mdi_phci_t *ph = pip->pi_phci;
7728 	mdi_client_t *ct = pip->pi_client;
7729 	int cache_updated = 0;
7730 
7731 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
7732 
7733 	/* if vhcache client for this pip doesn't already exist, add it */
7734 	if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid,
7735 	    NULL)) == NULL) {
7736 		cct = kmem_zalloc(sizeof (*cct), KM_SLEEP);
7737 		cct->cct_name_addr = vhcache_mknameaddr(ct->ct_drvname,
7738 		    ct->ct_guid, NULL);
7739 		enqueue_vhcache_client(vhcache, cct);
7740 		(void) mod_hash_insert(vhcache->vhcache_client_hash,
7741 		    (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct);
7742 		cache_updated = 1;
7743 	}
7744 
7745 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
7746 		if (cpi->cpi_cphci->cphci_phci == ph &&
7747 		    strcmp(cpi->cpi_addr, pip->pi_addr) == 0) {
7748 			cpi->cpi_pip = pip;
7749 			if (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST) {
7750 				cpi->cpi_flags &=
7751 				    ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
7752 				sort_vhcache_paths(cct);
7753 				cache_updated = 1;
7754 			}
7755 			break;
7756 		}
7757 	}
7758 
7759 	if (cpi == NULL) {
7760 		cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP);
7761 		cpi->cpi_addr = i_ddi_strdup(pip->pi_addr, KM_SLEEP);
7762 		cpi->cpi_cphci = lookup_vhcache_phci_by_addr(vhcache, ph);
7763 		ASSERT(cpi->cpi_cphci != NULL);
7764 		cpi->cpi_pip = pip;
7765 		enqueue_vhcache_pathinfo(cct, cpi);
7766 		cache_updated = 1;
7767 	}
7768 
7769 	rw_exit(&vhcache->vhcache_lock);
7770 
7771 	if (cache_updated)
7772 		vhcache_dirty(vhc);
7773 }
7774 
7775 /*
7776  * Remove the reference to the specified path from the vhci cache.
7777  */
7778 static void
7779 vhcache_pi_remove(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip)
7780 {
7781 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7782 	mdi_client_t *ct = pip->pi_client;
7783 	mdi_vhcache_client_t *cct;
7784 	mdi_vhcache_pathinfo_t *cpi;
7785 
7786 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
7787 	if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid,
7788 	    NULL)) != NULL) {
7789 		for (cpi = cct->cct_cpi_head; cpi != NULL;
7790 		    cpi = cpi->cpi_next) {
7791 			if (cpi->cpi_pip == pip) {
7792 				cpi->cpi_pip = NULL;
7793 				break;
7794 			}
7795 		}
7796 	}
7797 	rw_exit(&vhcache->vhcache_lock);
7798 }
7799 
7800 /*
7801  * Flush the vhci cache to disk.
7802  * Returns MDI_SUCCESS on success, MDI_FAILURE on failure.
7803  */
7804 static int
7805 flush_vhcache(mdi_vhci_config_t *vhc, int force_flag)
7806 {
7807 	nvlist_t *nvl;
7808 	int err;
7809 	int rv;
7810 
7811 	/*
7812 	 * It is possible that the system may shutdown before
7813 	 * i_ddi_io_initialized (during stmsboot for example). To allow for
7814 	 * flushing the cache in this case do not check for
7815 	 * i_ddi_io_initialized when force flag is set.
7816 	 */
7817 	if (force_flag == 0 && !i_ddi_io_initialized())
7818 		return (MDI_FAILURE);
7819 
7820 	if ((nvl = vhcache_to_mainnvl(&vhc->vhc_vhcache)) != NULL) {
7821 		err = fwrite_nvlist(vhc->vhc_vhcache_filename, nvl);
7822 		nvlist_free(nvl);
7823 	} else
7824 		err = EFAULT;
7825 
7826 	rv = MDI_SUCCESS;
7827 	mutex_enter(&vhc->vhc_lock);
7828 	if (err != 0) {
7829 		if (err == EROFS) {
7830 			vhc->vhc_flags |= MDI_VHC_READONLY_FS;
7831 			vhc->vhc_flags &= ~(MDI_VHC_VHCACHE_FLUSH_ERROR |
7832 			    MDI_VHC_VHCACHE_DIRTY);
7833 		} else {
7834 			if (!(vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR)) {
7835 				cmn_err(CE_CONT, "%s: update failed\n",
7836 				    vhc->vhc_vhcache_filename);
7837 				vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_ERROR;
7838 			}
7839 			rv = MDI_FAILURE;
7840 		}
7841 	} else if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR) {
7842 		cmn_err(CE_CONT,
7843 		    "%s: update now ok\n", vhc->vhc_vhcache_filename);
7844 		vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_ERROR;
7845 	}
7846 	mutex_exit(&vhc->vhc_lock);
7847 
7848 	return (rv);
7849 }
7850 
7851 /*
7852  * Call flush_vhcache() to flush the vhci cache at the scheduled time.
7853  * Exits itself if left idle for the idle timeout period.
7854  */
7855 static void
7856 vhcache_flush_thread(void *arg)
7857 {
7858 	mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
7859 	clock_t idle_time, quit_at_ticks;
7860 	callb_cpr_t cprinfo;
7861 
7862 	/* number of seconds to sleep idle before exiting */
7863 	idle_time = mdi_vhcache_flush_daemon_idle_time * TICKS_PER_SECOND;
7864 
7865 	CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr,
7866 	    "mdi_vhcache_flush");
7867 	mutex_enter(&vhc->vhc_lock);
7868 	for (; ; ) {
7869 		while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
7870 		    (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) {
7871 			if (ddi_get_lbolt() < vhc->vhc_flush_at_ticks) {
7872 				CALLB_CPR_SAFE_BEGIN(&cprinfo);
7873 				(void) cv_timedwait(&vhc->vhc_cv,
7874 				    &vhc->vhc_lock, vhc->vhc_flush_at_ticks);
7875 				CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
7876 			} else {
7877 				vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
7878 				mutex_exit(&vhc->vhc_lock);
7879 
7880 				if (flush_vhcache(vhc, 0) != MDI_SUCCESS)
7881 					vhcache_dirty(vhc);
7882 
7883 				mutex_enter(&vhc->vhc_lock);
7884 			}
7885 		}
7886 
7887 		quit_at_ticks = ddi_get_lbolt() + idle_time;
7888 
7889 		while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
7890 		    !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) &&
7891 		    ddi_get_lbolt() < quit_at_ticks) {
7892 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
7893 			(void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock,
7894 			    quit_at_ticks);
7895 			CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
7896 		}
7897 
7898 		if ((vhc->vhc_flags & MDI_VHC_EXIT) ||
7899 		    !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY))
7900 			goto out;
7901 	}
7902 
7903 out:
7904 	vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_THREAD;
7905 	/* CALLB_CPR_EXIT releases the vhc->vhc_lock */
7906 	CALLB_CPR_EXIT(&cprinfo);
7907 }
7908 
7909 /*
7910  * Make vhci cache dirty and schedule flushing by vhcache flush thread.
7911  */
7912 static void
7913 vhcache_dirty(mdi_vhci_config_t *vhc)
7914 {
7915 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7916 	int create_thread;
7917 
7918 	rw_enter(&vhcache->vhcache_lock, RW_READER);
7919 	/* do not flush cache until the cache is fully built */
7920 	if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) {
7921 		rw_exit(&vhcache->vhcache_lock);
7922 		return;
7923 	}
7924 	rw_exit(&vhcache->vhcache_lock);
7925 
7926 	mutex_enter(&vhc->vhc_lock);
7927 	if (vhc->vhc_flags & MDI_VHC_READONLY_FS) {
7928 		mutex_exit(&vhc->vhc_lock);
7929 		return;
7930 	}
7931 
7932 	vhc->vhc_flags |= MDI_VHC_VHCACHE_DIRTY;
7933 	vhc->vhc_flush_at_ticks = ddi_get_lbolt() +
7934 	    mdi_vhcache_flush_delay * TICKS_PER_SECOND;
7935 	if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) {
7936 		cv_broadcast(&vhc->vhc_cv);
7937 		create_thread = 0;
7938 	} else {
7939 		vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_THREAD;
7940 		create_thread = 1;
7941 	}
7942 	mutex_exit(&vhc->vhc_lock);
7943 
7944 	if (create_thread)
7945 		(void) thread_create(NULL, 0, vhcache_flush_thread, vhc,
7946 		    0, &p0, TS_RUN, minclsyspri);
7947 }
7948 
7949 /*
7950  * phci bus config structure - one for for each phci bus config operation that
7951  * we initiate on behalf of a vhci.
7952  */
7953 typedef struct mdi_phci_bus_config_s {
7954 	char *phbc_phci_path;
7955 	struct mdi_vhci_bus_config_s *phbc_vhbusconfig;	/* vhci bus config */
7956 	struct mdi_phci_bus_config_s *phbc_next;
7957 } mdi_phci_bus_config_t;
7958 
7959 /* vhci bus config structure - one for each vhci bus config operation */
7960 typedef struct mdi_vhci_bus_config_s {
7961 	ddi_bus_config_op_t vhbc_op;	/* bus config op */
7962 	major_t vhbc_op_major;		/* bus config op major */
7963 	uint_t vhbc_op_flags;		/* bus config op flags */
7964 	kmutex_t vhbc_lock;
7965 	kcondvar_t vhbc_cv;
7966 	int vhbc_thr_count;
7967 } mdi_vhci_bus_config_t;
7968 
7969 /*
7970  * bus config the specified phci
7971  */
7972 static void
7973 bus_config_phci(void *arg)
7974 {
7975 	mdi_phci_bus_config_t *phbc = (mdi_phci_bus_config_t *)arg;
7976 	mdi_vhci_bus_config_t *vhbc = phbc->phbc_vhbusconfig;
7977 	dev_info_t *ph_dip;
7978 
7979 	/*
7980 	 * first configure all path components upto phci and then configure
7981 	 * the phci children.
7982 	 */
7983 	if ((ph_dip = e_ddi_hold_devi_by_path(phbc->phbc_phci_path, 0))
7984 	    != NULL) {
7985 		if (vhbc->vhbc_op == BUS_CONFIG_DRIVER ||
7986 		    vhbc->vhbc_op == BUS_UNCONFIG_DRIVER) {
7987 			(void) ndi_devi_config_driver(ph_dip,
7988 			    vhbc->vhbc_op_flags,
7989 			    vhbc->vhbc_op_major);
7990 		} else
7991 			(void) ndi_devi_config(ph_dip,
7992 			    vhbc->vhbc_op_flags);
7993 
7994 		/* release the hold that e_ddi_hold_devi_by_path() placed */
7995 		ndi_rele_devi(ph_dip);
7996 	}
7997 
7998 	kmem_free(phbc->phbc_phci_path, strlen(phbc->phbc_phci_path) + 1);
7999 	kmem_free(phbc, sizeof (*phbc));
8000 
8001 	mutex_enter(&vhbc->vhbc_lock);
8002 	vhbc->vhbc_thr_count--;
8003 	if (vhbc->vhbc_thr_count == 0)
8004 		cv_broadcast(&vhbc->vhbc_cv);
8005 	mutex_exit(&vhbc->vhbc_lock);
8006 }
8007 
8008 /*
8009  * Bus config all phcis associated with the vhci in parallel.
8010  * op must be BUS_CONFIG_DRIVER or BUS_CONFIG_ALL.
8011  */
8012 static void
8013 bus_config_all_phcis(mdi_vhci_cache_t *vhcache, uint_t flags,
8014     ddi_bus_config_op_t op, major_t maj)
8015 {
8016 	mdi_phci_bus_config_t *phbc_head = NULL, *phbc, *phbc_next;
8017 	mdi_vhci_bus_config_t *vhbc;
8018 	mdi_vhcache_phci_t *cphci;
8019 
8020 	rw_enter(&vhcache->vhcache_lock, RW_READER);
8021 	if (vhcache->vhcache_phci_head == NULL) {
8022 		rw_exit(&vhcache->vhcache_lock);
8023 		return;
8024 	}
8025 
8026 	vhbc = kmem_zalloc(sizeof (*vhbc), KM_SLEEP);
8027 
8028 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
8029 	    cphci = cphci->cphci_next) {
8030 		/* skip phcis that haven't attached before root is available */
8031 		if (!modrootloaded && (cphci->cphci_phci == NULL))
8032 			continue;
8033 		phbc = kmem_zalloc(sizeof (*phbc), KM_SLEEP);
8034 		phbc->phbc_phci_path = i_ddi_strdup(cphci->cphci_path,
8035 		    KM_SLEEP);
8036 		phbc->phbc_vhbusconfig = vhbc;
8037 		phbc->phbc_next = phbc_head;
8038 		phbc_head = phbc;
8039 		vhbc->vhbc_thr_count++;
8040 	}
8041 	rw_exit(&vhcache->vhcache_lock);
8042 
8043 	vhbc->vhbc_op = op;
8044 	vhbc->vhbc_op_major = maj;
8045 	vhbc->vhbc_op_flags = NDI_NO_EVENT |
8046 	    (flags & (NDI_CONFIG_REPROBE | NDI_DRV_CONF_REPROBE));
8047 	mutex_init(&vhbc->vhbc_lock, NULL, MUTEX_DEFAULT, NULL);
8048 	cv_init(&vhbc->vhbc_cv, NULL, CV_DRIVER, NULL);
8049 
8050 	/* now create threads to initiate bus config on all phcis in parallel */
8051 	for (phbc = phbc_head; phbc != NULL; phbc = phbc_next) {
8052 		phbc_next = phbc->phbc_next;
8053 		if (mdi_mtc_off)
8054 			bus_config_phci((void *)phbc);
8055 		else
8056 			(void) thread_create(NULL, 0, bus_config_phci, phbc,
8057 			    0, &p0, TS_RUN, minclsyspri);
8058 	}
8059 
8060 	mutex_enter(&vhbc->vhbc_lock);
8061 	/* wait until all threads exit */
8062 	while (vhbc->vhbc_thr_count > 0)
8063 		cv_wait(&vhbc->vhbc_cv, &vhbc->vhbc_lock);
8064 	mutex_exit(&vhbc->vhbc_lock);
8065 
8066 	mutex_destroy(&vhbc->vhbc_lock);
8067 	cv_destroy(&vhbc->vhbc_cv);
8068 	kmem_free(vhbc, sizeof (*vhbc));
8069 }
8070 
8071 /*
8072  * Single threaded version of bus_config_all_phcis()
8073  */
8074 static void
8075 st_bus_config_all_phcis(mdi_vhci_config_t *vhc, uint_t flags,
8076     ddi_bus_config_op_t op, major_t maj)
8077 {
8078 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8079 
8080 	single_threaded_vhconfig_enter(vhc);
8081 	bus_config_all_phcis(vhcache, flags, op, maj);
8082 	single_threaded_vhconfig_exit(vhc);
8083 }
8084 
8085 /*
8086  * Perform BUS_CONFIG_ONE on the specified child of the phci.
8087  * The path includes the child component in addition to the phci path.
8088  */
8089 static int
8090 bus_config_one_phci_child(char *path)
8091 {
8092 	dev_info_t *ph_dip, *child;
8093 	char *devnm;
8094 	int rv = MDI_FAILURE;
8095 
8096 	/* extract the child component of the phci */
8097 	devnm = strrchr(path, '/');
8098 	*devnm++ = '\0';
8099 
8100 	/*
8101 	 * first configure all path components upto phci and then
8102 	 * configure the phci child.
8103 	 */
8104 	if ((ph_dip = e_ddi_hold_devi_by_path(path, 0)) != NULL) {
8105 		if (ndi_devi_config_one(ph_dip, devnm, &child, NDI_NO_EVENT) ==
8106 		    NDI_SUCCESS) {
8107 			/*
8108 			 * release the hold that ndi_devi_config_one() placed
8109 			 */
8110 			ndi_rele_devi(child);
8111 			rv = MDI_SUCCESS;
8112 		}
8113 
8114 		/* release the hold that e_ddi_hold_devi_by_path() placed */
8115 		ndi_rele_devi(ph_dip);
8116 	}
8117 
8118 	devnm--;
8119 	*devnm = '/';
8120 	return (rv);
8121 }
8122 
8123 /*
8124  * Build a list of phci client paths for the specified vhci client.
8125  * The list includes only those phci client paths which aren't configured yet.
8126  */
8127 static mdi_phys_path_t *
8128 build_phclient_path_list(mdi_vhcache_client_t *cct, char *ct_name)
8129 {
8130 	mdi_vhcache_pathinfo_t *cpi;
8131 	mdi_phys_path_t *pp_head = NULL, *pp_tail = NULL, *pp;
8132 	int config_path, len;
8133 
8134 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8135 		/*
8136 		 * include only those paths that aren't configured.
8137 		 */
8138 		config_path = 0;
8139 		if (cpi->cpi_pip == NULL)
8140 			config_path = 1;
8141 		else {
8142 			MDI_PI_LOCK(cpi->cpi_pip);
8143 			if (MDI_PI_IS_INIT(cpi->cpi_pip))
8144 				config_path = 1;
8145 			MDI_PI_UNLOCK(cpi->cpi_pip);
8146 		}
8147 
8148 		if (config_path) {
8149 			pp = kmem_alloc(sizeof (*pp), KM_SLEEP);
8150 			len = strlen(cpi->cpi_cphci->cphci_path) +
8151 			    strlen(ct_name) + strlen(cpi->cpi_addr) + 3;
8152 			pp->phys_path = kmem_alloc(len, KM_SLEEP);
8153 			(void) snprintf(pp->phys_path, len, "%s/%s@%s",
8154 			    cpi->cpi_cphci->cphci_path, ct_name,
8155 			    cpi->cpi_addr);
8156 			pp->phys_path_next = NULL;
8157 
8158 			if (pp_head == NULL)
8159 				pp_head = pp;
8160 			else
8161 				pp_tail->phys_path_next = pp;
8162 			pp_tail = pp;
8163 		}
8164 	}
8165 
8166 	return (pp_head);
8167 }
8168 
8169 /*
8170  * Free the memory allocated for phci client path list.
8171  */
8172 static void
8173 free_phclient_path_list(mdi_phys_path_t *pp_head)
8174 {
8175 	mdi_phys_path_t *pp, *pp_next;
8176 
8177 	for (pp = pp_head; pp != NULL; pp = pp_next) {
8178 		pp_next = pp->phys_path_next;
8179 		kmem_free(pp->phys_path, strlen(pp->phys_path) + 1);
8180 		kmem_free(pp, sizeof (*pp));
8181 	}
8182 }
8183 
8184 /*
8185  * Allocated async client structure and initialize with the specified values.
8186  */
8187 static mdi_async_client_config_t *
8188 alloc_async_client_config(char *ct_name, char *ct_addr,
8189     mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
8190 {
8191 	mdi_async_client_config_t *acc;
8192 
8193 	acc = kmem_alloc(sizeof (*acc), KM_SLEEP);
8194 	acc->acc_ct_name = i_ddi_strdup(ct_name, KM_SLEEP);
8195 	acc->acc_ct_addr = i_ddi_strdup(ct_addr, KM_SLEEP);
8196 	acc->acc_phclient_path_list_head = pp_head;
8197 	init_vhcache_lookup_token(&acc->acc_token, tok);
8198 	acc->acc_next = NULL;
8199 	return (acc);
8200 }
8201 
8202 /*
8203  * Free the memory allocated for the async client structure and their members.
8204  */
8205 static void
8206 free_async_client_config(mdi_async_client_config_t *acc)
8207 {
8208 	if (acc->acc_phclient_path_list_head)
8209 		free_phclient_path_list(acc->acc_phclient_path_list_head);
8210 	kmem_free(acc->acc_ct_name, strlen(acc->acc_ct_name) + 1);
8211 	kmem_free(acc->acc_ct_addr, strlen(acc->acc_ct_addr) + 1);
8212 	kmem_free(acc, sizeof (*acc));
8213 }
8214 
8215 /*
8216  * Sort vhcache pathinfos (cpis) of the specified client.
8217  * All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST
8218  * flag set come at the beginning of the list. All cpis which have this
8219  * flag set come at the end of the list.
8220  */
8221 static void
8222 sort_vhcache_paths(mdi_vhcache_client_t *cct)
8223 {
8224 	mdi_vhcache_pathinfo_t *cpi, *cpi_next, *cpi_head;
8225 
8226 	cpi_head = cct->cct_cpi_head;
8227 	cct->cct_cpi_head = cct->cct_cpi_tail = NULL;
8228 	for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) {
8229 		cpi_next = cpi->cpi_next;
8230 		enqueue_vhcache_pathinfo(cct, cpi);
8231 	}
8232 }
8233 
8234 /*
8235  * Verify whether MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag setting is correct for
8236  * every vhcache pathinfo of the specified client. If not adjust the flag
8237  * setting appropriately.
8238  *
8239  * Note that MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag is persisted in the
8240  * on-disk vhci cache. So every time this flag is updated the cache must be
8241  * flushed.
8242  */
8243 static void
8244 adjust_sort_vhcache_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
8245     mdi_vhcache_lookup_token_t *tok)
8246 {
8247 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8248 	mdi_vhcache_client_t *cct;
8249 	mdi_vhcache_pathinfo_t *cpi;
8250 
8251 	rw_enter(&vhcache->vhcache_lock, RW_READER);
8252 	if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, tok))
8253 	    == NULL) {
8254 		rw_exit(&vhcache->vhcache_lock);
8255 		return;
8256 	}
8257 
8258 	/*
8259 	 * to avoid unnecessary on-disk cache updates, first check if an
8260 	 * update is really needed. If no update is needed simply return.
8261 	 */
8262 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8263 		if ((cpi->cpi_pip != NULL &&
8264 		    (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) ||
8265 		    (cpi->cpi_pip == NULL &&
8266 		    !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))) {
8267 			break;
8268 		}
8269 	}
8270 	if (cpi == NULL) {
8271 		rw_exit(&vhcache->vhcache_lock);
8272 		return;
8273 	}
8274 
8275 	if (rw_tryupgrade(&vhcache->vhcache_lock) == 0) {
8276 		rw_exit(&vhcache->vhcache_lock);
8277 		rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8278 		if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr,
8279 		    tok)) == NULL) {
8280 			rw_exit(&vhcache->vhcache_lock);
8281 			return;
8282 		}
8283 	}
8284 
8285 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8286 		if (cpi->cpi_pip != NULL)
8287 			cpi->cpi_flags &= ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
8288 		else
8289 			cpi->cpi_flags |= MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
8290 	}
8291 	sort_vhcache_paths(cct);
8292 
8293 	rw_exit(&vhcache->vhcache_lock);
8294 	vhcache_dirty(vhc);
8295 }
8296 
8297 /*
8298  * Configure all specified paths of the client.
8299  */
8300 static void
8301 config_client_paths_sync(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
8302     mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
8303 {
8304 	mdi_phys_path_t *pp;
8305 
8306 	for (pp = pp_head; pp != NULL; pp = pp->phys_path_next)
8307 		(void) bus_config_one_phci_child(pp->phys_path);
8308 	adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, tok);
8309 }
8310 
8311 /*
8312  * Dequeue elements from vhci async client config list and bus configure
8313  * their corresponding phci clients.
8314  */
8315 static void
8316 config_client_paths_thread(void *arg)
8317 {
8318 	mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
8319 	mdi_async_client_config_t *acc;
8320 	clock_t quit_at_ticks;
8321 	clock_t idle_time = mdi_async_config_idle_time * TICKS_PER_SECOND;
8322 	callb_cpr_t cprinfo;
8323 
8324 	CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr,
8325 	    "mdi_config_client_paths");
8326 
8327 	for (; ; ) {
8328 		quit_at_ticks = ddi_get_lbolt() + idle_time;
8329 
8330 		mutex_enter(&vhc->vhc_lock);
8331 		while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
8332 		    vhc->vhc_acc_list_head == NULL &&
8333 		    ddi_get_lbolt() < quit_at_ticks) {
8334 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
8335 			(void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock,
8336 			    quit_at_ticks);
8337 			CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
8338 		}
8339 
8340 		if ((vhc->vhc_flags & MDI_VHC_EXIT) ||
8341 		    vhc->vhc_acc_list_head == NULL)
8342 			goto out;
8343 
8344 		acc = vhc->vhc_acc_list_head;
8345 		vhc->vhc_acc_list_head = acc->acc_next;
8346 		if (vhc->vhc_acc_list_head == NULL)
8347 			vhc->vhc_acc_list_tail = NULL;
8348 		vhc->vhc_acc_count--;
8349 		mutex_exit(&vhc->vhc_lock);
8350 
8351 		config_client_paths_sync(vhc, acc->acc_ct_name,
8352 		    acc->acc_ct_addr, acc->acc_phclient_path_list_head,
8353 		    &acc->acc_token);
8354 
8355 		free_async_client_config(acc);
8356 	}
8357 
8358 out:
8359 	vhc->vhc_acc_thrcount--;
8360 	/* CALLB_CPR_EXIT releases the vhc->vhc_lock */
8361 	CALLB_CPR_EXIT(&cprinfo);
8362 }
8363 
8364 /*
8365  * Arrange for all the phci client paths (pp_head) for the specified client
8366  * to be bus configured asynchronously by a thread.
8367  */
8368 static void
8369 config_client_paths_async(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
8370     mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
8371 {
8372 	mdi_async_client_config_t *acc, *newacc;
8373 	int create_thread;
8374 
8375 	if (pp_head == NULL)
8376 		return;
8377 
8378 	if (mdi_mtc_off) {
8379 		config_client_paths_sync(vhc, ct_name, ct_addr, pp_head, tok);
8380 		free_phclient_path_list(pp_head);
8381 		return;
8382 	}
8383 
8384 	newacc = alloc_async_client_config(ct_name, ct_addr, pp_head, tok);
8385 	ASSERT(newacc);
8386 
8387 	mutex_enter(&vhc->vhc_lock);
8388 	for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc->acc_next) {
8389 		if (strcmp(ct_name, acc->acc_ct_name) == 0 &&
8390 		    strcmp(ct_addr, acc->acc_ct_addr) == 0) {
8391 			free_async_client_config(newacc);
8392 			mutex_exit(&vhc->vhc_lock);
8393 			return;
8394 		}
8395 	}
8396 
8397 	if (vhc->vhc_acc_list_head == NULL)
8398 		vhc->vhc_acc_list_head = newacc;
8399 	else
8400 		vhc->vhc_acc_list_tail->acc_next = newacc;
8401 	vhc->vhc_acc_list_tail = newacc;
8402 	vhc->vhc_acc_count++;
8403 	if (vhc->vhc_acc_count <= vhc->vhc_acc_thrcount) {
8404 		cv_broadcast(&vhc->vhc_cv);
8405 		create_thread = 0;
8406 	} else {
8407 		vhc->vhc_acc_thrcount++;
8408 		create_thread = 1;
8409 	}
8410 	mutex_exit(&vhc->vhc_lock);
8411 
8412 	if (create_thread)
8413 		(void) thread_create(NULL, 0, config_client_paths_thread, vhc,
8414 		    0, &p0, TS_RUN, minclsyspri);
8415 }
8416 
8417 /*
8418  * Return number of online paths for the specified client.
8419  */
8420 static int
8421 nonline_paths(mdi_vhcache_client_t *cct)
8422 {
8423 	mdi_vhcache_pathinfo_t *cpi;
8424 	int online_count = 0;
8425 
8426 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8427 		if (cpi->cpi_pip != NULL) {
8428 			MDI_PI_LOCK(cpi->cpi_pip);
8429 			if (cpi->cpi_pip->pi_state == MDI_PATHINFO_STATE_ONLINE)
8430 				online_count++;
8431 			MDI_PI_UNLOCK(cpi->cpi_pip);
8432 		}
8433 	}
8434 
8435 	return (online_count);
8436 }
8437 
8438 /*
8439  * Bus configure all paths for the specified vhci client.
8440  * If at least one path for the client is already online, the remaining paths
8441  * will be configured asynchronously. Otherwise, it synchronously configures
8442  * the paths until at least one path is online and then rest of the paths
8443  * will be configured asynchronously.
8444  */
8445 static void
8446 config_client_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr)
8447 {
8448 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8449 	mdi_phys_path_t *pp_head, *pp;
8450 	mdi_vhcache_client_t *cct;
8451 	mdi_vhcache_lookup_token_t tok;
8452 
8453 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
8454 
8455 	init_vhcache_lookup_token(&tok, NULL);
8456 
8457 	if (ct_name == NULL || ct_addr == NULL ||
8458 	    (cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, &tok))
8459 	    == NULL ||
8460 	    (pp_head = build_phclient_path_list(cct, ct_name)) == NULL) {
8461 		rw_exit(&vhcache->vhcache_lock);
8462 		return;
8463 	}
8464 
8465 	/* if at least one path is online, configure the rest asynchronously */
8466 	if (nonline_paths(cct) > 0) {
8467 		rw_exit(&vhcache->vhcache_lock);
8468 		config_client_paths_async(vhc, ct_name, ct_addr, pp_head, &tok);
8469 		return;
8470 	}
8471 
8472 	rw_exit(&vhcache->vhcache_lock);
8473 
8474 	for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) {
8475 		if (bus_config_one_phci_child(pp->phys_path) == MDI_SUCCESS) {
8476 			rw_enter(&vhcache->vhcache_lock, RW_READER);
8477 
8478 			if ((cct = lookup_vhcache_client(vhcache, ct_name,
8479 			    ct_addr, &tok)) == NULL) {
8480 				rw_exit(&vhcache->vhcache_lock);
8481 				goto out;
8482 			}
8483 
8484 			if (nonline_paths(cct) > 0 &&
8485 			    pp->phys_path_next != NULL) {
8486 				rw_exit(&vhcache->vhcache_lock);
8487 				config_client_paths_async(vhc, ct_name, ct_addr,
8488 				    pp->phys_path_next, &tok);
8489 				pp->phys_path_next = NULL;
8490 				goto out;
8491 			}
8492 
8493 			rw_exit(&vhcache->vhcache_lock);
8494 		}
8495 	}
8496 
8497 	adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, &tok);
8498 out:
8499 	free_phclient_path_list(pp_head);
8500 }
8501 
8502 static void
8503 single_threaded_vhconfig_enter(mdi_vhci_config_t *vhc)
8504 {
8505 	mutex_enter(&vhc->vhc_lock);
8506 	while (vhc->vhc_flags & MDI_VHC_SINGLE_THREADED)
8507 		cv_wait(&vhc->vhc_cv, &vhc->vhc_lock);
8508 	vhc->vhc_flags |= MDI_VHC_SINGLE_THREADED;
8509 	mutex_exit(&vhc->vhc_lock);
8510 }
8511 
8512 static void
8513 single_threaded_vhconfig_exit(mdi_vhci_config_t *vhc)
8514 {
8515 	mutex_enter(&vhc->vhc_lock);
8516 	vhc->vhc_flags &= ~MDI_VHC_SINGLE_THREADED;
8517 	cv_broadcast(&vhc->vhc_cv);
8518 	mutex_exit(&vhc->vhc_lock);
8519 }
8520 
8521 typedef struct mdi_phci_driver_info {
8522 	char	*phdriver_name;	/* name of the phci driver */
8523 
8524 	/* set to non zero if the phci driver supports root device */
8525 	int	phdriver_root_support;
8526 } mdi_phci_driver_info_t;
8527 
8528 /*
8529  * vhci class and root support capability of a phci driver can be
8530  * specified using ddi-vhci-class and ddi-no-root-support properties in the
8531  * phci driver.conf file. The built-in tables below contain this information
8532  * for those phci drivers whose driver.conf files don't yet contain this info.
8533  *
8534  * All phci drivers expect iscsi have root device support.
8535  */
8536 static mdi_phci_driver_info_t scsi_phci_driver_list[] = {
8537 	{ "fp", 1 },
8538 	{ "iscsi", 0 },
8539 	{ "ibsrp", 1 }
8540 	};
8541 
8542 static mdi_phci_driver_info_t ib_phci_driver_list[] = { "tavor", 1 };
8543 
8544 static void *
8545 mdi_realloc(void *old_ptr, size_t old_size, size_t new_size)
8546 {
8547 	void *new_ptr;
8548 
8549 	new_ptr = kmem_zalloc(new_size, KM_SLEEP);
8550 	if (old_ptr) {
8551 		bcopy(old_ptr, new_ptr, MIN(old_size, new_size));
8552 		kmem_free(old_ptr, old_size);
8553 	}
8554 	return (new_ptr);
8555 }
8556 
8557 static void
8558 add_to_phci_list(char ***driver_list, int **root_support_list,
8559     int *cur_elements, int *max_elements, char *driver_name, int root_support)
8560 {
8561 	ASSERT(*cur_elements <= *max_elements);
8562 	if (*cur_elements == *max_elements) {
8563 		*max_elements += 10;
8564 		*driver_list = mdi_realloc(*driver_list,
8565 		    sizeof (char *) * (*cur_elements),
8566 		    sizeof (char *) * (*max_elements));
8567 		*root_support_list = mdi_realloc(*root_support_list,
8568 		    sizeof (int) * (*cur_elements),
8569 		    sizeof (int) * (*max_elements));
8570 	}
8571 	(*driver_list)[*cur_elements] = i_ddi_strdup(driver_name, KM_SLEEP);
8572 	(*root_support_list)[*cur_elements] = root_support;
8573 	(*cur_elements)++;
8574 }
8575 
8576 static void
8577 get_phci_driver_list(char *vhci_class, char ***driver_list,
8578     int **root_support_list, int *cur_elements, int *max_elements)
8579 {
8580 	mdi_phci_driver_info_t	*st_driver_list, *p;
8581 	int		st_ndrivers, root_support, i, j, driver_conf_count;
8582 	major_t		m;
8583 	struct devnames	*dnp;
8584 	ddi_prop_t	*propp;
8585 
8586 	*driver_list = NULL;
8587 	*root_support_list = NULL;
8588 	*cur_elements = 0;
8589 	*max_elements = 0;
8590 
8591 	/* add the phci drivers derived from the phci driver.conf files */
8592 	for (m = 0; m < devcnt; m++) {
8593 		dnp = &devnamesp[m];
8594 
8595 		if (dnp->dn_flags & DN_PHCI_DRIVER) {
8596 			LOCK_DEV_OPS(&dnp->dn_lock);
8597 			if (dnp->dn_global_prop_ptr != NULL &&
8598 			    (propp = i_ddi_prop_search(DDI_DEV_T_ANY,
8599 			    DDI_VHCI_CLASS, DDI_PROP_TYPE_STRING,
8600 			    &dnp->dn_global_prop_ptr->prop_list)) != NULL &&
8601 			    strcmp(propp->prop_val, vhci_class) == 0) {
8602 
8603 				root_support = (i_ddi_prop_search(DDI_DEV_T_ANY,
8604 				    DDI_NO_ROOT_SUPPORT, DDI_PROP_TYPE_INT,
8605 				    &dnp->dn_global_prop_ptr->prop_list)
8606 				    == NULL) ? 1 : 0;
8607 
8608 				add_to_phci_list(driver_list, root_support_list,
8609 				    cur_elements, max_elements, dnp->dn_name,
8610 				    root_support);
8611 
8612 				UNLOCK_DEV_OPS(&dnp->dn_lock);
8613 			} else
8614 				UNLOCK_DEV_OPS(&dnp->dn_lock);
8615 		}
8616 	}
8617 
8618 	driver_conf_count = *cur_elements;
8619 
8620 	/* add the phci drivers specified in the built-in tables */
8621 	if (strcmp(vhci_class, MDI_HCI_CLASS_SCSI) == 0) {
8622 		st_driver_list = scsi_phci_driver_list;
8623 		st_ndrivers = sizeof (scsi_phci_driver_list) /
8624 		    sizeof (mdi_phci_driver_info_t);
8625 	} else if (strcmp(vhci_class, MDI_HCI_CLASS_IB) == 0) {
8626 		st_driver_list = ib_phci_driver_list;
8627 		st_ndrivers = sizeof (ib_phci_driver_list) /
8628 		    sizeof (mdi_phci_driver_info_t);
8629 	} else {
8630 		st_driver_list = NULL;
8631 		st_ndrivers = 0;
8632 	}
8633 
8634 	for (i = 0, p = st_driver_list; i < st_ndrivers; i++, p++) {
8635 		/* add this phci driver if not already added before */
8636 		for (j = 0; j < driver_conf_count; j++) {
8637 			if (strcmp((*driver_list)[j], p->phdriver_name) == 0)
8638 				break;
8639 		}
8640 		if (j == driver_conf_count) {
8641 			add_to_phci_list(driver_list, root_support_list,
8642 			    cur_elements, max_elements, p->phdriver_name,
8643 			    p->phdriver_root_support);
8644 		}
8645 	}
8646 }
8647 
8648 /*
8649  * Attach the phci driver instances associated with the specified vhci class.
8650  * If root is mounted attach all phci driver instances.
8651  * If root is not mounted, attach the instances of only those phci
8652  * drivers that have the root support.
8653  */
8654 static void
8655 attach_phci_drivers(char *vhci_class)
8656 {
8657 	char	**driver_list, **p;
8658 	int	*root_support_list;
8659 	int	cur_elements, max_elements, i;
8660 	major_t	m;
8661 
8662 	get_phci_driver_list(vhci_class, &driver_list, &root_support_list,
8663 	    &cur_elements, &max_elements);
8664 
8665 	for (i = 0; i < cur_elements; i++) {
8666 		if (modrootloaded || root_support_list[i]) {
8667 			m = ddi_name_to_major(driver_list[i]);
8668 			if (m != DDI_MAJOR_T_NONE &&
8669 			    ddi_hold_installed_driver(m))
8670 				ddi_rele_driver(m);
8671 		}
8672 	}
8673 
8674 	if (driver_list) {
8675 		for (i = 0, p = driver_list; i < cur_elements; i++, p++)
8676 			kmem_free(*p, strlen(*p) + 1);
8677 		kmem_free(driver_list, sizeof (char *) * max_elements);
8678 		kmem_free(root_support_list, sizeof (int) * max_elements);
8679 	}
8680 }
8681 
8682 /*
8683  * Build vhci cache:
8684  *
8685  * Attach phci driver instances and then drive BUS_CONFIG_ALL on
8686  * the phci driver instances. During this process the cache gets built.
8687  *
8688  * Cache is built fully if the root is mounted.
8689  * If the root is not mounted, phci drivers that do not have root support
8690  * are not attached. As a result the cache is built partially. The entries
8691  * in the cache reflect only those phci drivers that have root support.
8692  */
8693 static int
8694 build_vhci_cache(mdi_vhci_t *vh)
8695 {
8696 	mdi_vhci_config_t *vhc = vh->vh_config;
8697 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8698 
8699 	single_threaded_vhconfig_enter(vhc);
8700 
8701 	rw_enter(&vhcache->vhcache_lock, RW_READER);
8702 	if (vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE) {
8703 		rw_exit(&vhcache->vhcache_lock);
8704 		single_threaded_vhconfig_exit(vhc);
8705 		return (0);
8706 	}
8707 	rw_exit(&vhcache->vhcache_lock);
8708 
8709 	attach_phci_drivers(vh->vh_class);
8710 	bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | NDI_NO_EVENT,
8711 	    BUS_CONFIG_ALL, DDI_MAJOR_T_NONE);
8712 
8713 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8714 	vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE;
8715 	rw_exit(&vhcache->vhcache_lock);
8716 
8717 	single_threaded_vhconfig_exit(vhc);
8718 	vhcache_dirty(vhc);
8719 	return (1);
8720 }
8721 
8722 /*
8723  * Determine if discovery of paths is needed.
8724  */
8725 static int
8726 vhcache_do_discovery(mdi_vhci_config_t *vhc)
8727 {
8728 	int rv = 1;
8729 
8730 	mutex_enter(&vhc->vhc_lock);
8731 	if (i_ddi_io_initialized() == 0) {
8732 		if (vhc->vhc_path_discovery_boot > 0) {
8733 			vhc->vhc_path_discovery_boot--;
8734 			goto out;
8735 		}
8736 	} else {
8737 		if (vhc->vhc_path_discovery_postboot > 0) {
8738 			vhc->vhc_path_discovery_postboot--;
8739 			goto out;
8740 		}
8741 	}
8742 
8743 	/*
8744 	 * Do full path discovery at most once per mdi_path_discovery_interval.
8745 	 * This is to avoid a series of full path discoveries when opening
8746 	 * stale /dev/[r]dsk links.
8747 	 */
8748 	if (mdi_path_discovery_interval != -1 &&
8749 	    lbolt64 >= vhc->vhc_path_discovery_cutoff_time)
8750 		goto out;
8751 
8752 	rv = 0;
8753 out:
8754 	mutex_exit(&vhc->vhc_lock);
8755 	return (rv);
8756 }
8757 
8758 /*
8759  * Discover all paths:
8760  *
8761  * Attach phci driver instances and then drive BUS_CONFIG_ALL on all the phci
8762  * driver instances. During this process all paths will be discovered.
8763  */
8764 static int
8765 vhcache_discover_paths(mdi_vhci_t *vh)
8766 {
8767 	mdi_vhci_config_t *vhc = vh->vh_config;
8768 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8769 	int rv = 0;
8770 
8771 	single_threaded_vhconfig_enter(vhc);
8772 
8773 	if (vhcache_do_discovery(vhc)) {
8774 		attach_phci_drivers(vh->vh_class);
8775 		bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE |
8776 		    NDI_NO_EVENT, BUS_CONFIG_ALL, DDI_MAJOR_T_NONE);
8777 
8778 		mutex_enter(&vhc->vhc_lock);
8779 		vhc->vhc_path_discovery_cutoff_time = lbolt64 +
8780 		    mdi_path_discovery_interval * TICKS_PER_SECOND;
8781 		mutex_exit(&vhc->vhc_lock);
8782 		rv = 1;
8783 	}
8784 
8785 	single_threaded_vhconfig_exit(vhc);
8786 	return (rv);
8787 }
8788 
8789 /*
8790  * Generic vhci bus config implementation:
8791  *
8792  * Parameters
8793  *	vdip	vhci dip
8794  *	flags	bus config flags
8795  *	op	bus config operation
8796  *	The remaining parameters are bus config operation specific
8797  *
8798  * for BUS_CONFIG_ONE
8799  *	arg	pointer to name@addr
8800  *	child	upon successful return from this function, *child will be
8801  *		set to the configured and held devinfo child node of vdip.
8802  *	ct_addr	pointer to client address (i.e. GUID)
8803  *
8804  * for BUS_CONFIG_DRIVER
8805  *	arg	major number of the driver
8806  *	child and ct_addr parameters are ignored
8807  *
8808  * for BUS_CONFIG_ALL
8809  *	arg, child, and ct_addr parameters are ignored
8810  *
8811  * Note that for the rest of the bus config operations, this function simply
8812  * calls the framework provided default bus config routine.
8813  */
8814 int
8815 mdi_vhci_bus_config(dev_info_t *vdip, uint_t flags, ddi_bus_config_op_t op,
8816     void *arg, dev_info_t **child, char *ct_addr)
8817 {
8818 	mdi_vhci_t *vh = i_devi_get_vhci(vdip);
8819 	mdi_vhci_config_t *vhc = vh->vh_config;
8820 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8821 	int rv = 0;
8822 	int params_valid = 0;
8823 	char *cp;
8824 
8825 	/*
8826 	 * To bus config vhcis we relay operation, possibly using another
8827 	 * thread, to phcis. The phci driver then interacts with MDI to cause
8828 	 * vhci child nodes to be enumerated under the vhci node.  Adding a
8829 	 * vhci child requires an ndi_devi_enter of the vhci. Since another
8830 	 * thread may be adding the child, to avoid deadlock we can't wait
8831 	 * for the relayed operations to complete if we have already entered
8832 	 * the vhci node.
8833 	 */
8834 	if (DEVI_BUSY_OWNED(vdip)) {
8835 		MDI_DEBUG(2, (CE_NOTE, vdip, "!MDI: vhci bus config: "
8836 		    "vhci dip is busy owned %p\n", (void *)vdip));
8837 		goto default_bus_config;
8838 	}
8839 
8840 	rw_enter(&vhcache->vhcache_lock, RW_READER);
8841 	if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) {
8842 		rw_exit(&vhcache->vhcache_lock);
8843 		rv = build_vhci_cache(vh);
8844 		rw_enter(&vhcache->vhcache_lock, RW_READER);
8845 	}
8846 
8847 	switch (op) {
8848 	case BUS_CONFIG_ONE:
8849 		if (arg != NULL && ct_addr != NULL) {
8850 			/* extract node name */
8851 			cp = (char *)arg;
8852 			while (*cp != '\0' && *cp != '@')
8853 				cp++;
8854 			if (*cp == '@') {
8855 				params_valid = 1;
8856 				*cp = '\0';
8857 				config_client_paths(vhc, (char *)arg, ct_addr);
8858 				/* config_client_paths() releases cache_lock */
8859 				*cp = '@';
8860 				break;
8861 			}
8862 		}
8863 
8864 		rw_exit(&vhcache->vhcache_lock);
8865 		break;
8866 
8867 	case BUS_CONFIG_DRIVER:
8868 		rw_exit(&vhcache->vhcache_lock);
8869 		if (rv == 0)
8870 			st_bus_config_all_phcis(vhc, flags, op,
8871 			    (major_t)(uintptr_t)arg);
8872 		break;
8873 
8874 	case BUS_CONFIG_ALL:
8875 		rw_exit(&vhcache->vhcache_lock);
8876 		if (rv == 0)
8877 			st_bus_config_all_phcis(vhc, flags, op, -1);
8878 		break;
8879 
8880 	default:
8881 		rw_exit(&vhcache->vhcache_lock);
8882 		break;
8883 	}
8884 
8885 
8886 default_bus_config:
8887 	/*
8888 	 * All requested child nodes are enumerated under the vhci.
8889 	 * Now configure them.
8890 	 */
8891 	if (ndi_busop_bus_config(vdip, flags, op, arg, child, 0) ==
8892 	    NDI_SUCCESS) {
8893 		return (MDI_SUCCESS);
8894 	} else if (op == BUS_CONFIG_ONE && rv == 0 && params_valid) {
8895 		/* discover all paths and try configuring again */
8896 		if (vhcache_discover_paths(vh) &&
8897 		    ndi_busop_bus_config(vdip, flags, op, arg, child, 0) ==
8898 		    NDI_SUCCESS)
8899 			return (MDI_SUCCESS);
8900 	}
8901 
8902 	return (MDI_FAILURE);
8903 }
8904 
8905 /*
8906  * Read the on-disk vhci cache into an nvlist for the specified vhci class.
8907  */
8908 static nvlist_t *
8909 read_on_disk_vhci_cache(char *vhci_class)
8910 {
8911 	nvlist_t *nvl;
8912 	int err;
8913 	char *filename;
8914 
8915 	filename = vhclass2vhcache_filename(vhci_class);
8916 
8917 	if ((err = fread_nvlist(filename, &nvl)) == 0) {
8918 		kmem_free(filename, strlen(filename) + 1);
8919 		return (nvl);
8920 	} else if (err == EIO)
8921 		cmn_err(CE_WARN, "%s: I/O error, will recreate\n", filename);
8922 	else if (err == EINVAL)
8923 		cmn_err(CE_WARN,
8924 		    "%s: data file corrupted, will recreate\n", filename);
8925 
8926 	kmem_free(filename, strlen(filename) + 1);
8927 	return (NULL);
8928 }
8929 
8930 /*
8931  * Read on-disk vhci cache into nvlists for all vhci classes.
8932  * Called during booting by i_ddi_read_devices_files().
8933  */
8934 void
8935 mdi_read_devices_files(void)
8936 {
8937 	int i;
8938 
8939 	for (i = 0; i < N_VHCI_CLASSES; i++)
8940 		vhcache_nvl[i] = read_on_disk_vhci_cache(vhci_class_list[i]);
8941 }
8942 
8943 /*
8944  * Remove all stale entries from vhci cache.
8945  */
8946 static void
8947 clean_vhcache(mdi_vhci_config_t *vhc)
8948 {
8949 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8950 	mdi_vhcache_phci_t *cphci, *cphci_head, *cphci_next;
8951 	mdi_vhcache_client_t *cct, *cct_head, *cct_next;
8952 	mdi_vhcache_pathinfo_t *cpi, *cpi_head, *cpi_next;
8953 
8954 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8955 
8956 	cct_head = vhcache->vhcache_client_head;
8957 	vhcache->vhcache_client_head = vhcache->vhcache_client_tail = NULL;
8958 	for (cct = cct_head; cct != NULL; cct = cct_next) {
8959 		cct_next = cct->cct_next;
8960 
8961 		cpi_head = cct->cct_cpi_head;
8962 		cct->cct_cpi_head = cct->cct_cpi_tail = NULL;
8963 		for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) {
8964 			cpi_next = cpi->cpi_next;
8965 			if (cpi->cpi_pip != NULL) {
8966 				ASSERT(cpi->cpi_cphci->cphci_phci != NULL);
8967 				enqueue_tail_vhcache_pathinfo(cct, cpi);
8968 			} else
8969 				free_vhcache_pathinfo(cpi);
8970 		}
8971 
8972 		if (cct->cct_cpi_head != NULL)
8973 			enqueue_vhcache_client(vhcache, cct);
8974 		else {
8975 			(void) mod_hash_destroy(vhcache->vhcache_client_hash,
8976 			    (mod_hash_key_t)cct->cct_name_addr);
8977 			free_vhcache_client(cct);
8978 		}
8979 	}
8980 
8981 	cphci_head = vhcache->vhcache_phci_head;
8982 	vhcache->vhcache_phci_head = vhcache->vhcache_phci_tail = NULL;
8983 	for (cphci = cphci_head; cphci != NULL; cphci = cphci_next) {
8984 		cphci_next = cphci->cphci_next;
8985 		if (cphci->cphci_phci != NULL)
8986 			enqueue_vhcache_phci(vhcache, cphci);
8987 		else
8988 			free_vhcache_phci(cphci);
8989 	}
8990 
8991 	vhcache->vhcache_clean_time = lbolt64;
8992 	rw_exit(&vhcache->vhcache_lock);
8993 	vhcache_dirty(vhc);
8994 }
8995 
8996 /*
8997  * Remove all stale entries from vhci cache.
8998  * Called by i_ddi_clean_devices_files() during the execution of devfsadm -C
8999  */
9000 void
9001 mdi_clean_vhcache(void)
9002 {
9003 	mdi_vhci_t *vh;
9004 
9005 	mutex_enter(&mdi_mutex);
9006 	for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
9007 		vh->vh_refcnt++;
9008 		mutex_exit(&mdi_mutex);
9009 		clean_vhcache(vh->vh_config);
9010 		mutex_enter(&mdi_mutex);
9011 		vh->vh_refcnt--;
9012 	}
9013 	mutex_exit(&mdi_mutex);
9014 }
9015 
9016 /*
9017  * mdi_vhci_walk_clients():
9018  *		Walker routine to traverse client dev_info nodes
9019  * ddi_walk_devs(ddi_get_child(vdip), f, arg) returns the entire tree
9020  * below the client, including nexus devices, which we dont want.
9021  * So we just traverse the immediate siblings, starting from 1st client.
9022  */
9023 void
9024 mdi_vhci_walk_clients(dev_info_t *vdip,
9025     int (*f)(dev_info_t *, void *), void *arg)
9026 {
9027 	mdi_vhci_t	*vh = i_devi_get_vhci(vdip);
9028 	dev_info_t	*cdip;
9029 	mdi_client_t	*ct;
9030 
9031 	MDI_VHCI_CLIENT_LOCK(vh);
9032 	cdip = ddi_get_child(vdip);
9033 	while (cdip) {
9034 		ct = i_devi_get_client(cdip);
9035 		MDI_CLIENT_LOCK(ct);
9036 
9037 		if (((*f)(cdip, arg)) == DDI_WALK_CONTINUE)
9038 			cdip = ddi_get_next_sibling(cdip);
9039 		else
9040 			cdip = NULL;
9041 
9042 		MDI_CLIENT_UNLOCK(ct);
9043 	}
9044 	MDI_VHCI_CLIENT_UNLOCK(vh);
9045 }
9046 
9047 /*
9048  * mdi_vhci_walk_phcis():
9049  *		Walker routine to traverse phci dev_info nodes
9050  */
9051 void
9052 mdi_vhci_walk_phcis(dev_info_t *vdip,
9053     int (*f)(dev_info_t *, void *), void *arg)
9054 {
9055 	mdi_vhci_t	*vh = i_devi_get_vhci(vdip);
9056 	mdi_phci_t	*ph, *next;
9057 
9058 	MDI_VHCI_PHCI_LOCK(vh);
9059 	ph = vh->vh_phci_head;
9060 	while (ph) {
9061 		MDI_PHCI_LOCK(ph);
9062 
9063 		if (((*f)(ph->ph_dip, arg)) == DDI_WALK_CONTINUE)
9064 			next = ph->ph_next;
9065 		else
9066 			next = NULL;
9067 
9068 		MDI_PHCI_UNLOCK(ph);
9069 		ph = next;
9070 	}
9071 	MDI_VHCI_PHCI_UNLOCK(vh);
9072 }
9073 
9074 
9075 /*
9076  * mdi_walk_vhcis():
9077  *		Walker routine to traverse vhci dev_info nodes
9078  */
9079 void
9080 mdi_walk_vhcis(int (*f)(dev_info_t *, void *), void *arg)
9081 {
9082 	mdi_vhci_t	*vh = NULL;
9083 
9084 	mutex_enter(&mdi_mutex);
9085 	/*
9086 	 * Scan for already registered vhci
9087 	 */
9088 	for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
9089 		vh->vh_refcnt++;
9090 		mutex_exit(&mdi_mutex);
9091 		if (((*f)(vh->vh_dip, arg)) != DDI_WALK_CONTINUE) {
9092 			mutex_enter(&mdi_mutex);
9093 			vh->vh_refcnt--;
9094 			break;
9095 		} else {
9096 			mutex_enter(&mdi_mutex);
9097 			vh->vh_refcnt--;
9098 		}
9099 	}
9100 
9101 	mutex_exit(&mdi_mutex);
9102 }
9103 
9104 /*
9105  * i_mdi_log_sysevent():
9106  *		Logs events for pickup by syseventd
9107  */
9108 static void
9109 i_mdi_log_sysevent(dev_info_t *dip, char *ph_vh_class, char *subclass)
9110 {
9111 	char		*path_name;
9112 	nvlist_t	*attr_list;
9113 
9114 	if (nvlist_alloc(&attr_list, NV_UNIQUE_NAME_TYPE,
9115 	    KM_SLEEP) != DDI_SUCCESS) {
9116 		goto alloc_failed;
9117 	}
9118 
9119 	path_name = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
9120 	(void) ddi_pathname(dip, path_name);
9121 
9122 	if (nvlist_add_string(attr_list, DDI_DRIVER_NAME,
9123 	    ddi_driver_name(dip)) != DDI_SUCCESS) {
9124 		goto error;
9125 	}
9126 
9127 	if (nvlist_add_int32(attr_list, DDI_DRIVER_MAJOR,
9128 	    (int32_t)ddi_driver_major(dip)) != DDI_SUCCESS) {
9129 		goto error;
9130 	}
9131 
9132 	if (nvlist_add_int32(attr_list, DDI_INSTANCE,
9133 	    (int32_t)ddi_get_instance(dip)) != DDI_SUCCESS) {
9134 		goto error;
9135 	}
9136 
9137 	if (nvlist_add_string(attr_list, DDI_PATHNAME,
9138 	    path_name) != DDI_SUCCESS) {
9139 		goto error;
9140 	}
9141 
9142 	if (nvlist_add_string(attr_list, DDI_CLASS,
9143 	    ph_vh_class) != DDI_SUCCESS) {
9144 		goto error;
9145 	}
9146 
9147 	(void) ddi_log_sysevent(dip, DDI_VENDOR_SUNW, EC_DDI, subclass,
9148 	    attr_list, NULL, DDI_SLEEP);
9149 
9150 error:
9151 	kmem_free(path_name, MAXPATHLEN);
9152 	nvlist_free(attr_list);
9153 	return;
9154 
9155 alloc_failed:
9156 	MDI_DEBUG(1, (CE_WARN, dip,
9157 	    "!i_mdi_log_sysevent: Unable to send sysevent"));
9158 }
9159 
9160 char **
9161 mdi_get_phci_driver_list(char *vhci_class, int	*ndrivers)
9162 {
9163 	char	**driver_list, **ret_driver_list = NULL;
9164 	int	*root_support_list;
9165 	int	cur_elements, max_elements;
9166 
9167 	get_phci_driver_list(vhci_class, &driver_list, &root_support_list,
9168 	    &cur_elements, &max_elements);
9169 
9170 
9171 	if (driver_list) {
9172 		kmem_free(root_support_list, sizeof (int) * max_elements);
9173 		ret_driver_list = mdi_realloc(driver_list, sizeof (char *)
9174 		    * max_elements, sizeof (char *) * cur_elements);
9175 	}
9176 	*ndrivers = cur_elements;
9177 
9178 	return (ret_driver_list);
9179 
9180 }
9181 
9182 void
9183 mdi_free_phci_driver_list(char **driver_list, int ndrivers)
9184 {
9185 	char	**p;
9186 	int	i;
9187 
9188 	if (driver_list) {
9189 		for (i = 0, p = driver_list; i < ndrivers; i++, p++)
9190 			kmem_free(*p, strlen(*p) + 1);
9191 		kmem_free(driver_list, sizeof (char *) * ndrivers);
9192 	}
9193 }
9194