1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * sun4u specific DDI implementation
31  */
32 #include <sys/bootconf.h>
33 #include <sys/conf.h>
34 #include <sys/ddi_subrdefs.h>
35 #include <sys/ethernet.h>
36 #include <sys/idprom.h>
37 #include <sys/machsystm.h>
38 #include <sys/modhash.h>
39 #include <sys/promif.h>
40 #include <sys/prom_plat.h>
41 #include <sys/sunndi.h>
42 #include <sys/systeminfo.h>
43 #include <sys/fpu/fpusystm.h>
44 #include <sys/vm.h>
45 #include <sys/fs/dv_node.h>
46 #include <sys/fs/snode.h>
47 
48 /*
49  * Favored drivers of this implementation
50  * architecture.  These drivers MUST be present for
51  * the system to boot at all.
52  */
53 char *impl_module_list[] = {
54 	"rootnex",
55 	"options",
56 	"sad",		/* Referenced via init_tbl[] */
57 	"pseudo",
58 	"clone",
59 	"scsi_vhci",
60 	(char *)0
61 };
62 
63 /*
64  * These strings passed to not_serviced in locore.s
65  */
66 const char busname_ovec[] = "onboard ";
67 const char busname_svec[] = "SBus ";
68 const char busname_vec[] = "";
69 
70 
71 static uint64_t *intr_map_reg[32];
72 
73 /*
74  * Forward declarations
75  */
76 static int getlongprop_buf();
77 static int get_boardnum(int nid, dev_info_t *par);
78 
79 /*
80  * Check the status of the device node passed as an argument.
81  *
82  *	if ((status is OKAY) || (status is DISABLED))
83  *		return DDI_SUCCESS
84  *	else
85  *		print a warning and return DDI_FAILURE
86  */
87 /*ARGSUSED*/
88 int
89 check_status(int id, char *buf, dev_info_t *parent)
90 {
91 	char status_buf[64];
92 	char devtype_buf[OBP_MAXPROPNAME];
93 	char board_buf[32];
94 	char path[OBP_MAXPATHLEN];
95 	int boardnum;
96 	int retval = DDI_FAILURE;
97 	extern int status_okay(int, char *, int);
98 
99 	/*
100 	 * is the status okay?
101 	 */
102 	if (status_okay(id, status_buf, sizeof (status_buf)))
103 		return (DDI_SUCCESS);
104 
105 	/*
106 	 * a status property indicating bad memory will be associated
107 	 * with a node which has a "device_type" property with a value of
108 	 * "memory-controller". in this situation, return DDI_SUCCESS
109 	 */
110 	if (getlongprop_buf(id, OBP_DEVICETYPE, devtype_buf,
111 	    sizeof (devtype_buf)) > 0) {
112 		if (strcmp(devtype_buf, "memory-controller") == 0)
113 			retval = DDI_SUCCESS;
114 	}
115 
116 	/*
117 	 * get the full OBP pathname of this node
118 	 */
119 	if (prom_phandle_to_path((phandle_t)id, path, sizeof (path)) < 0)
120 		cmn_err(CE_WARN, "prom_phandle_to_path(%d) failed", id);
121 
122 	/*
123 	 * get the board number, if one exists
124 	 */
125 	if ((boardnum = get_boardnum(id, parent)) >= 0)
126 		(void) sprintf(board_buf, " on board %d", boardnum);
127 	else
128 		board_buf[0] = '\0';
129 
130 	/*
131 	 * print the status property information
132 	 */
133 	cmn_err(CE_WARN, "status '%s' for '%s'%s",
134 		status_buf, path, board_buf);
135 	return (retval);
136 }
137 
138 /*
139  * determine the board number associated with this nodeid
140  */
141 static int
142 get_boardnum(int nid, dev_info_t *par)
143 {
144 	int board_num;
145 
146 	if (prom_getprop((pnode_t)nid, OBP_BOARDNUM,
147 	    (caddr_t)&board_num) != -1)
148 		return (board_num);
149 
150 	/*
151 	 * Look at current node and up the parent chain
152 	 * till we find a node with an OBP_BOARDNUM.
153 	 */
154 	while (par) {
155 		nid = ddi_get_nodeid(par);
156 
157 		if (prom_getprop((pnode_t)nid, OBP_BOARDNUM,
158 		    (caddr_t)&board_num) != -1)
159 			return (board_num);
160 
161 		par = ddi_get_parent(par);
162 	}
163 	return (-1);
164 }
165 
166 /*
167  * Note that this routine does not take into account the endianness
168  * of the host or the device (or PROM) when retrieving properties.
169  */
170 static int
171 getlongprop_buf(int id, char *name, char *buf, int maxlen)
172 {
173 	int size;
174 
175 	size = prom_getproplen((pnode_t)id, name);
176 	if (size <= 0 || (size > maxlen - 1))
177 		return (-1);
178 
179 	if (-1 == prom_getprop((pnode_t)id, name, buf))
180 		return (-1);
181 
182 	/*
183 	 * Workaround for bugid 1085575 - OBP may return a "name" property
184 	 * without null terminating the string with '\0'.  When this occurs,
185 	 * append a '\0' and return (size + 1).
186 	 */
187 	if (strcmp("name", name) == 0) {
188 		if (buf[size - 1] != '\0') {
189 			buf[size] = '\0';
190 			size += 1;
191 		}
192 	}
193 
194 	return (size);
195 }
196 
197 /*
198  * Routines to set/get UPA slave only device interrupt mapping registers.
199  * set_intr_mapping_reg() is called by the UPA master to register the address
200  * of an interrupt mapping register. The upa id is that of the master. If
201  * this routine is called on behalf of a slave device, the framework
202  * determines the upa id of the slave based on that supplied by the master.
203  *
204  * get_intr_mapping_reg() is called by the UPA nexus driver on behalf
205  * of a child device to get and program the interrupt mapping register of
206  * one of it's child nodes.  It uses the upa id of the child device to
207  * index into a table of mapping registers.  If the routine is called on
208  * behalf of a slave device and the mapping register has not been set,
209  * the framework determines the devinfo node of the corresponding master
210  * nexus which owns the mapping register of the slave and installs that
211  * driver.  The device driver which owns the mapping register must call
212  * set_intr_mapping_reg() in its attach routine to register the slaves
213  * mapping register with the system.
214  */
215 void
216 set_intr_mapping_reg(int upaid, uint64_t *addr, int slave)
217 {
218 	int affin_upaid;
219 
220 	/* For UPA master devices, set the mapping reg addr and we're done */
221 	if (slave == 0) {
222 		intr_map_reg[upaid] = addr;
223 		return;
224 	}
225 
226 	/*
227 	 * If we get here, we're adding an entry for a UPA slave only device.
228 	 * The UPA id of the device which has affinity with that requesting,
229 	 * will be the device with the same UPA id minus the slave number.
230 	 * If the affin_upaid is negative, silently return to the caller.
231 	 */
232 	if ((affin_upaid = upaid - slave) < 0)
233 		return;
234 
235 	/*
236 	 * Load the address of the mapping register in the correct slot
237 	 * for the slave device.
238 	 */
239 	intr_map_reg[affin_upaid] = addr;
240 }
241 
242 uint64_t *
243 get_intr_mapping_reg(int upaid, int slave)
244 {
245 	int affin_upaid;
246 	dev_info_t *affin_dip;
247 	uint64_t *addr = intr_map_reg[upaid];
248 
249 	/* If we're a UPA master, or we have a valid mapping register. */
250 	if (!slave || addr != NULL)
251 		return (addr);
252 
253 	/*
254 	 * We only get here if we're a UPA slave only device whose interrupt
255 	 * mapping register has not been set.
256 	 * We need to try and install the nexus whose physical address
257 	 * space is where the slaves mapping register resides.  They
258 	 * should call set_intr_mapping_reg() in their xxattach() to register
259 	 * the mapping register with the system.
260 	 */
261 
262 	/*
263 	 * We don't know if a single- or multi-interrupt proxy is fielding
264 	 * our UPA slave interrupt, we must check both cases.
265 	 * Start out by assuming the multi-interrupt case.
266 	 * We assume that single- and multi- interrupters are not
267 	 * overlapping in UPA portid space.
268 	 */
269 
270 	affin_upaid = upaid | 3;
271 
272 	/*
273 	 * We start looking for the multi-interrupter affinity node.
274 	 * We know it's ONLY a child of the root node since the root
275 	 * node defines UPA space.
276 	 */
277 	for (affin_dip = ddi_get_child(ddi_root_node()); affin_dip;
278 	    affin_dip = ddi_get_next_sibling(affin_dip))
279 		if (ddi_prop_get_int(DDI_DEV_T_ANY, affin_dip,
280 		    DDI_PROP_DONTPASS, "upa-portid", -1) == affin_upaid)
281 			break;
282 
283 	if (affin_dip) {
284 		if (i_ddi_attach_node_hierarchy(affin_dip) == DDI_SUCCESS) {
285 			/* try again to get the mapping register. */
286 			addr = intr_map_reg[upaid];
287 		}
288 	}
289 
290 	/*
291 	 * If we still don't have a mapping register try single -interrupter
292 	 * case.
293 	 */
294 	if (addr == NULL) {
295 
296 		affin_upaid = upaid | 1;
297 
298 		for (affin_dip = ddi_get_child(ddi_root_node()); affin_dip;
299 		    affin_dip = ddi_get_next_sibling(affin_dip))
300 			if (ddi_prop_get_int(DDI_DEV_T_ANY, affin_dip,
301 			    DDI_PROP_DONTPASS, "upa-portid", -1) == affin_upaid)
302 				break;
303 
304 		if (affin_dip) {
305 			if (i_ddi_attach_node_hierarchy(affin_dip)
306 			    == DDI_SUCCESS) {
307 				/* try again to get the mapping register. */
308 				addr = intr_map_reg[upaid];
309 			}
310 		}
311 	}
312 	return (addr);
313 }
314 
315 
316 static struct upa_dma_pfns {
317 	pfn_t hipfn;
318 	pfn_t lopfn;
319 } upa_dma_pfn_array[MAX_UPA];
320 
321 static int upa_dma_pfn_ndx = 0;
322 
323 /*
324  * Certain UPA busses cannot accept dma transactions from any other source
325  * except for memory due to livelock conditions in their hardware. (e.g. sbus
326  * and PCI). These routines allow devices or busses on the UPA to register
327  * a physical address block within it's own register space where DMA can be
328  * performed.  Currently, the FFB is the only such device which supports
329  * device DMA on the UPA.
330  */
331 void
332 pf_set_dmacapable(pfn_t hipfn, pfn_t lopfn)
333 {
334 	int i = upa_dma_pfn_ndx;
335 
336 	upa_dma_pfn_ndx++;
337 
338 	upa_dma_pfn_array[i].hipfn = hipfn;
339 	upa_dma_pfn_array[i].lopfn = lopfn;
340 }
341 
342 void
343 pf_unset_dmacapable(pfn_t pfn)
344 {
345 	int i;
346 
347 	for (i = 0; i < upa_dma_pfn_ndx; i++) {
348 		if (pfn <= upa_dma_pfn_array[i].hipfn &&
349 		    pfn >= upa_dma_pfn_array[i].lopfn) {
350 			upa_dma_pfn_array[i].hipfn =
351 			    upa_dma_pfn_array[upa_dma_pfn_ndx - 1].hipfn;
352 			upa_dma_pfn_array[i].lopfn =
353 			    upa_dma_pfn_array[upa_dma_pfn_ndx - 1].lopfn;
354 			upa_dma_pfn_ndx--;
355 			break;
356 		}
357 	}
358 }
359 
360 /*
361  * This routine should only be called using a pfn that is known to reside
362  * in IO space.  The function pf_is_memory() can be used to determine this.
363  */
364 int
365 pf_is_dmacapable(pfn_t pfn)
366 {
367 	int i, j;
368 
369 	/* If the caller passed in a memory pfn, return true. */
370 	if (pf_is_memory(pfn))
371 		return (1);
372 
373 	for (i = upa_dma_pfn_ndx, j = 0; j < i; j++)
374 		if (pfn <= upa_dma_pfn_array[j].hipfn &&
375 		    pfn >= upa_dma_pfn_array[j].lopfn)
376 			return (1);
377 
378 	return (0);
379 }
380 
381 
382 /*
383  * Find cpu_id corresponding to the dip of a CPU device node
384  */
385 int
386 dip_to_cpu_id(dev_info_t *dip, processorid_t *cpu_id)
387 {
388 	pnode_t		nodeid;
389 	int		i;
390 
391 	nodeid = (pnode_t)ddi_get_nodeid(dip);
392 	for (i = 0; i < NCPU; i++) {
393 		if (cpunodes[i].nodeid == nodeid) {
394 			*cpu_id = i;
395 			return (DDI_SUCCESS);
396 		}
397 	}
398 	return (DDI_FAILURE);
399 }
400 
401 /*
402  * Platform independent DR routines
403  */
404 
405 static int
406 ndi2errno(int n)
407 {
408 	int err = 0;
409 
410 	switch (n) {
411 		case NDI_NOMEM:
412 			err = ENOMEM;
413 			break;
414 		case NDI_BUSY:
415 			err = EBUSY;
416 			break;
417 		case NDI_FAULT:
418 			err = EFAULT;
419 			break;
420 		case NDI_FAILURE:
421 			err = EIO;
422 			break;
423 		case NDI_SUCCESS:
424 			break;
425 		case NDI_BADHANDLE:
426 		default:
427 			err = EINVAL;
428 			break;
429 	}
430 	return (err);
431 }
432 
433 /*
434  * Prom tree node list
435  */
436 struct ptnode {
437 	pnode_t		nodeid;
438 	struct ptnode	*next;
439 };
440 
441 /*
442  * Prom tree walk arg
443  */
444 struct pta {
445 	dev_info_t	*pdip;
446 	devi_branch_t	*bp;
447 	uint_t		flags;
448 	dev_info_t	*fdip;
449 	struct ptnode	*head;
450 };
451 
452 static void
453 visit_node(pnode_t nodeid, struct pta *ap)
454 {
455 	struct ptnode	**nextp;
456 	int		(*select)(pnode_t, void *, uint_t);
457 
458 	ASSERT(nodeid != OBP_NONODE && nodeid != OBP_BADNODE);
459 
460 	select = ap->bp->create.prom_branch_select;
461 
462 	ASSERT(select);
463 
464 	if (select(nodeid, ap->bp->arg, 0) == DDI_SUCCESS) {
465 
466 		for (nextp = &ap->head; *nextp; nextp = &(*nextp)->next)
467 			;
468 
469 		*nextp = kmem_zalloc(sizeof (struct ptnode), KM_SLEEP);
470 
471 		(*nextp)->nodeid = nodeid;
472 	}
473 
474 	if ((ap->flags & DEVI_BRANCH_CHILD) == DEVI_BRANCH_CHILD)
475 		return;
476 
477 	nodeid = prom_childnode(nodeid);
478 	while (nodeid != OBP_NONODE && nodeid != OBP_BADNODE) {
479 		visit_node(nodeid, ap);
480 		nodeid = prom_nextnode(nodeid);
481 	}
482 }
483 
484 /*ARGSUSED*/
485 static int
486 set_dip_offline(dev_info_t *dip, void *arg)
487 {
488 	ASSERT(dip);
489 
490 	mutex_enter(&(DEVI(dip)->devi_lock));
491 	if (!DEVI_IS_DEVICE_OFFLINE(dip))
492 		DEVI_SET_DEVICE_OFFLINE(dip);
493 	mutex_exit(&(DEVI(dip)->devi_lock));
494 
495 	return (DDI_WALK_CONTINUE);
496 }
497 
498 /*ARGSUSED*/
499 static int
500 create_prom_branch(void *arg, int has_changed)
501 {
502 	int		circ, c;
503 	int		exists, rv;
504 	pnode_t		nodeid;
505 	struct ptnode	*tnp;
506 	dev_info_t	*dip;
507 	struct pta	*ap = arg;
508 	devi_branch_t	*bp;
509 
510 	ASSERT(ap);
511 	ASSERT(ap->fdip == NULL);
512 	ASSERT(ap->pdip && ndi_dev_is_prom_node(ap->pdip));
513 
514 	bp = ap->bp;
515 
516 	nodeid = ddi_get_nodeid(ap->pdip);
517 	if (nodeid == OBP_NONODE || nodeid == OBP_BADNODE) {
518 		cmn_err(CE_WARN, "create_prom_branch: invalid "
519 		    "nodeid: 0x%x", nodeid);
520 		return (EINVAL);
521 	}
522 
523 	ap->head = NULL;
524 
525 	nodeid = prom_childnode(nodeid);
526 	while (nodeid != OBP_NONODE && nodeid != OBP_BADNODE) {
527 		visit_node(nodeid, ap);
528 		nodeid = prom_nextnode(nodeid);
529 	}
530 
531 	if (ap->head == NULL)
532 		return (ENODEV);
533 
534 	rv = 0;
535 	while ((tnp = ap->head) != NULL) {
536 		ap->head = tnp->next;
537 
538 		ndi_devi_enter(ap->pdip, &circ);
539 
540 		/*
541 		 * Check if the branch already exists.
542 		 */
543 		exists = 0;
544 		dip = e_ddi_nodeid_to_dip(tnp->nodeid);
545 		if (dip != NULL) {
546 			exists = 1;
547 
548 			/* Parent is held busy, so release hold */
549 			ndi_rele_devi(dip);
550 #ifdef	DEBUG
551 			cmn_err(CE_WARN, "create_prom_branch: dip(%p) exists"
552 			    " for nodeid 0x%x", (void *)dip, tnp->nodeid);
553 #endif
554 		} else {
555 			dip = i_ddi_create_branch(ap->pdip, tnp->nodeid);
556 		}
557 
558 		kmem_free(tnp, sizeof (struct ptnode));
559 
560 		if (dip == NULL) {
561 			ndi_devi_exit(ap->pdip, circ);
562 			rv = EIO;
563 			continue;
564 		}
565 
566 		ASSERT(ddi_get_parent(dip) == ap->pdip);
567 
568 		/*
569 		 * Hold the branch if it is not already held
570 		 */
571 		if (!exists)
572 			e_ddi_branch_hold(dip);
573 
574 		ASSERT(e_ddi_branch_held(dip));
575 
576 		/*
577 		 * Set all dips in the branch offline so that
578 		 * only a "configure" operation can attach
579 		 * the branch
580 		 */
581 		(void) set_dip_offline(dip, NULL);
582 
583 		ndi_devi_enter(dip, &c);
584 		ddi_walk_devs(ddi_get_child(dip), set_dip_offline, NULL);
585 		ndi_devi_exit(dip, c);
586 
587 		ndi_devi_exit(ap->pdip, circ);
588 
589 		if (ap->flags & DEVI_BRANCH_CONFIGURE) {
590 			int error = e_ddi_branch_configure(dip, &ap->fdip, 0);
591 			if (error && rv == 0)
592 				rv = error;
593 		}
594 
595 		/*
596 		 * Invoke devi_branch_callback() (if it exists) only for
597 		 * newly created branches
598 		 */
599 		if (bp->devi_branch_callback && !exists)
600 			bp->devi_branch_callback(dip, bp->arg, 0);
601 	}
602 
603 	return (rv);
604 }
605 
606 static int
607 sid_node_create(dev_info_t *pdip, devi_branch_t *bp, dev_info_t **rdipp)
608 {
609 	int			rv, circ, len;
610 	int			i, flags;
611 	dev_info_t		*dip;
612 	char			*nbuf;
613 	static const char	*noname = "<none>";
614 
615 	ASSERT(pdip);
616 	ASSERT(DEVI_BUSY_OWNED(pdip));
617 
618 	flags = 0;
619 
620 	/*
621 	 * Creating the root of a branch ?
622 	 */
623 	if (rdipp) {
624 		*rdipp = NULL;
625 		flags = DEVI_BRANCH_ROOT;
626 	}
627 
628 	ndi_devi_alloc_sleep(pdip, (char *)noname, DEVI_SID_NODEID, &dip);
629 	rv = bp->create.sid_branch_create(dip, bp->arg, flags);
630 
631 	nbuf = kmem_alloc(OBP_MAXDRVNAME, KM_SLEEP);
632 
633 	if (rv == DDI_WALK_ERROR) {
634 		cmn_err(CE_WARN, "e_ddi_branch_create: Error setting"
635 		    " properties on devinfo node %p",  (void *)dip);
636 		goto fail;
637 	}
638 
639 	len = OBP_MAXDRVNAME;
640 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
641 	    DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, "name", nbuf, &len)
642 	    != DDI_PROP_SUCCESS) {
643 		cmn_err(CE_WARN, "e_ddi_branch_create: devinfo node %p has"
644 		    "no name property", (void *)dip);
645 		goto fail;
646 	}
647 
648 	ASSERT(i_ddi_node_state(dip) == DS_PROTO);
649 	if (ndi_devi_set_nodename(dip, nbuf, 0) != NDI_SUCCESS) {
650 		cmn_err(CE_WARN, "e_ddi_branch_create: cannot set name (%s)"
651 		    " for devinfo node %p", nbuf, (void *)dip);
652 		goto fail;
653 	}
654 
655 	kmem_free(nbuf, OBP_MAXDRVNAME);
656 
657 	/*
658 	 * Ignore bind failures just like boot does
659 	 */
660 	(void) ndi_devi_bind_driver(dip, 0);
661 
662 	switch (rv) {
663 	case DDI_WALK_CONTINUE:
664 	case DDI_WALK_PRUNESIB:
665 		ndi_devi_enter(dip, &circ);
666 
667 		i = DDI_WALK_CONTINUE;
668 		for (; i == DDI_WALK_CONTINUE; ) {
669 			i = sid_node_create(dip, bp, NULL);
670 		}
671 
672 		ASSERT(i == DDI_WALK_ERROR || i == DDI_WALK_PRUNESIB);
673 		if (i == DDI_WALK_ERROR)
674 			rv = i;
675 		/*
676 		 * If PRUNESIB stop creating siblings
677 		 * of dip's child. Subsequent walk behavior
678 		 * is determined by rv returned by dip.
679 		 */
680 
681 		ndi_devi_exit(dip, circ);
682 		break;
683 	case DDI_WALK_TERMINATE:
684 		/*
685 		 * Don't create children and ask our parent
686 		 * to not create siblings either.
687 		 */
688 		rv = DDI_WALK_PRUNESIB;
689 		break;
690 	case DDI_WALK_PRUNECHILD:
691 		/*
692 		 * Don't create children, but ask parent to continue
693 		 * with siblings.
694 		 */
695 		rv = DDI_WALK_CONTINUE;
696 		break;
697 	default:
698 		ASSERT(0);
699 		break;
700 	}
701 
702 	if (rdipp)
703 		*rdipp = dip;
704 
705 	/*
706 	 * Set device offline - only the "configure" op should cause an attach
707 	 */
708 	(void) set_dip_offline(dip, NULL);
709 
710 	return (rv);
711 fail:
712 	(void) ndi_devi_free(dip);
713 	kmem_free(nbuf, OBP_MAXDRVNAME);
714 	return (DDI_WALK_ERROR);
715 }
716 
717 static int
718 create_sid_branch(
719 	dev_info_t	*pdip,
720 	devi_branch_t	*bp,
721 	dev_info_t	**dipp,
722 	uint_t		flags)
723 {
724 	int		rv = 0, state = DDI_WALK_CONTINUE;
725 	dev_info_t	*rdip;
726 
727 	while (state == DDI_WALK_CONTINUE) {
728 		int	circ;
729 
730 		ndi_devi_enter(pdip, &circ);
731 
732 		state = sid_node_create(pdip, bp, &rdip);
733 		if (rdip == NULL) {
734 			ndi_devi_exit(pdip, circ);
735 			ASSERT(state == DDI_WALK_ERROR);
736 			break;
737 		}
738 
739 		e_ddi_branch_hold(rdip);
740 
741 		ndi_devi_exit(pdip, circ);
742 
743 		if (flags & DEVI_BRANCH_CONFIGURE) {
744 			int error = e_ddi_branch_configure(rdip, dipp, 0);
745 			if (error && rv == 0)
746 				rv = error;
747 		}
748 
749 		/*
750 		 * devi_branch_callback() is optional
751 		 */
752 		if (bp->devi_branch_callback)
753 			bp->devi_branch_callback(rdip, bp->arg, 0);
754 	}
755 
756 	ASSERT(state == DDI_WALK_ERROR || state == DDI_WALK_PRUNESIB);
757 
758 	return (state == DDI_WALK_ERROR ? EIO : rv);
759 }
760 
761 int
762 e_ddi_branch_create(
763 	dev_info_t	*pdip,
764 	devi_branch_t	*bp,
765 	dev_info_t	**dipp,
766 	uint_t		flags)
767 {
768 	int prom_devi, sid_devi, error;
769 
770 	if (pdip == NULL || bp == NULL || bp->type == 0)
771 		return (EINVAL);
772 
773 	prom_devi = (bp->type == DEVI_BRANCH_PROM) ? 1 : 0;
774 	sid_devi = (bp->type == DEVI_BRANCH_SID) ? 1 : 0;
775 
776 	if (prom_devi && bp->create.prom_branch_select == NULL)
777 		return (EINVAL);
778 	else if (sid_devi && bp->create.sid_branch_create == NULL)
779 		return (EINVAL);
780 	else if (!prom_devi && !sid_devi)
781 		return (EINVAL);
782 
783 	if (flags & DEVI_BRANCH_EVENT)
784 		return (EINVAL);
785 
786 	if (prom_devi) {
787 		struct pta pta = {0};
788 
789 		pta.pdip = pdip;
790 		pta.bp = bp;
791 		pta.flags = flags;
792 
793 		error = prom_tree_access(create_prom_branch, &pta, NULL);
794 
795 		if (dipp)
796 			*dipp = pta.fdip;
797 		else if (pta.fdip)
798 			ndi_rele_devi(pta.fdip);
799 	} else {
800 		error = create_sid_branch(pdip, bp, dipp, flags);
801 	}
802 
803 	return (error);
804 }
805 
806 int
807 e_ddi_branch_configure(dev_info_t *rdip, dev_info_t **dipp, uint_t flags)
808 {
809 	int		circ, rv;
810 	char		*devnm;
811 	dev_info_t	*pdip;
812 
813 	if (dipp)
814 		*dipp = NULL;
815 
816 	if (rdip == NULL || flags != 0 || (flags & DEVI_BRANCH_EVENT))
817 		return (EINVAL);
818 
819 	pdip = ddi_get_parent(rdip);
820 
821 	ndi_devi_enter(pdip, &circ);
822 
823 	if (!e_ddi_branch_held(rdip)) {
824 		ndi_devi_exit(pdip, circ);
825 		cmn_err(CE_WARN, "e_ddi_branch_configure: "
826 		    "dip(%p) not held", (void *)rdip);
827 		return (EINVAL);
828 	}
829 
830 	if (i_ddi_node_state(rdip) < DS_INITIALIZED) {
831 		/*
832 		 * First attempt to bind a driver. If we fail, return
833 		 * success (On some platforms, dips for some device
834 		 * types (CPUs) may not have a driver)
835 		 */
836 		if (ndi_devi_bind_driver(rdip, 0) != NDI_SUCCESS) {
837 			ndi_devi_exit(pdip, circ);
838 			return (0);
839 		}
840 
841 		if (ddi_initchild(pdip, rdip) != DDI_SUCCESS) {
842 			rv = NDI_FAILURE;
843 			goto out;
844 		}
845 	}
846 
847 	ASSERT(i_ddi_node_state(rdip) >= DS_INITIALIZED);
848 
849 	devnm = kmem_alloc(MAXNAMELEN + 1, KM_SLEEP);
850 
851 	(void) ddi_deviname(rdip, devnm);
852 
853 	if ((rv = ndi_devi_config_one(pdip, devnm+1, &rdip,
854 	    NDI_DEVI_ONLINE | NDI_CONFIG)) == NDI_SUCCESS) {
855 		/* release hold from ndi_devi_config_one() */
856 		ndi_rele_devi(rdip);
857 	}
858 
859 	kmem_free(devnm, MAXNAMELEN + 1);
860 out:
861 	if (rv != NDI_SUCCESS && dipp) {
862 		ndi_hold_devi(rdip);
863 		*dipp = rdip;
864 	}
865 	ndi_devi_exit(pdip, circ);
866 	return (ndi2errno(rv));
867 }
868 
869 void
870 e_ddi_branch_hold(dev_info_t *rdip)
871 {
872 	if (e_ddi_branch_held(rdip)) {
873 		cmn_err(CE_WARN, "e_ddi_branch_hold: branch already held");
874 		return;
875 	}
876 
877 	mutex_enter(&DEVI(rdip)->devi_lock);
878 	if ((DEVI(rdip)->devi_flags & DEVI_BRANCH_HELD) == 0) {
879 		DEVI(rdip)->devi_flags |= DEVI_BRANCH_HELD;
880 		DEVI(rdip)->devi_ref++;
881 	}
882 	ASSERT(DEVI(rdip)->devi_ref > 0);
883 	mutex_exit(&DEVI(rdip)->devi_lock);
884 }
885 
886 int
887 e_ddi_branch_held(dev_info_t *rdip)
888 {
889 	int rv = 0;
890 
891 	mutex_enter(&DEVI(rdip)->devi_lock);
892 	if ((DEVI(rdip)->devi_flags & DEVI_BRANCH_HELD) &&
893 	    DEVI(rdip)->devi_ref > 0) {
894 		rv = 1;
895 	}
896 	mutex_exit(&DEVI(rdip)->devi_lock);
897 
898 	return (rv);
899 }
900 void
901 e_ddi_branch_rele(dev_info_t *rdip)
902 {
903 	mutex_enter(&DEVI(rdip)->devi_lock);
904 	DEVI(rdip)->devi_flags &= ~DEVI_BRANCH_HELD;
905 	DEVI(rdip)->devi_ref--;
906 	mutex_exit(&DEVI(rdip)->devi_lock);
907 }
908 
909 int
910 e_ddi_branch_unconfigure(
911 	dev_info_t *rdip,
912 	dev_info_t **dipp,
913 	uint_t flags)
914 {
915 	int	circ, rv;
916 	int	destroy;
917 	char	*devnm;
918 	uint_t	nflags;
919 	dev_info_t *pdip;
920 
921 	if (dipp)
922 		*dipp = NULL;
923 
924 	if (rdip == NULL)
925 		return (EINVAL);
926 
927 	pdip = ddi_get_parent(rdip);
928 
929 	ASSERT(pdip);
930 
931 	/*
932 	 * Check if caller holds pdip busy - can cause deadlocks during
933 	 * devfs_clean()
934 	 */
935 	if (DEVI_BUSY_OWNED(pdip)) {
936 		cmn_err(CE_WARN, "e_ddi_branch_unconfigure: failed: parent"
937 		    " devinfo node(%p) is busy held", (void *)pdip);
938 		return (EINVAL);
939 	}
940 
941 	destroy = (flags & DEVI_BRANCH_DESTROY) ? 1 : 0;
942 
943 	devnm = kmem_alloc(MAXNAMELEN + 1, KM_SLEEP);
944 
945 	ndi_devi_enter(pdip, &circ);
946 	(void) ddi_deviname(rdip, devnm);
947 	ndi_devi_exit(pdip, circ);
948 
949 	/*
950 	 * ddi_deviname() returns a component name with / prepended.
951 	 */
952 	rv = devfs_clean(pdip, devnm + 1, DV_CLEAN_FORCE);
953 	if (rv) {
954 		kmem_free(devnm, MAXNAMELEN + 1);
955 		return (rv);
956 	}
957 
958 	ndi_devi_enter(pdip, &circ);
959 
960 	/*
961 	 * Recreate device name as it may have changed state (init/uninit)
962 	 * when parent busy lock was dropped for devfs_clean()
963 	 */
964 	(void) ddi_deviname(rdip, devnm);
965 
966 	if (!e_ddi_branch_held(rdip)) {
967 		kmem_free(devnm, MAXNAMELEN + 1);
968 		ndi_devi_exit(pdip, circ);
969 		cmn_err(CE_WARN, "e_ddi_%s_branch: dip(%p) not held",
970 		    destroy ? "destroy" : "unconfigure", (void *)rdip);
971 		return (EINVAL);
972 	}
973 
974 	/*
975 	 * Release hold on the branch. This is ok since we are holding the
976 	 * parent busy. If rdip is not removed, we must do a hold on the
977 	 * branch before returning.
978 	 */
979 	e_ddi_branch_rele(rdip);
980 
981 	nflags = NDI_DEVI_OFFLINE;
982 	if (destroy || (flags & DEVI_BRANCH_DESTROY)) {
983 		nflags |= NDI_DEVI_REMOVE;
984 		destroy = 1;
985 	} else {
986 		nflags |= NDI_UNCONFIG;		/* uninit but don't remove */
987 	}
988 
989 	if (flags & DEVI_BRANCH_EVENT)
990 		nflags |= NDI_POST_EVENT;
991 
992 	if (i_ddi_node_state(pdip) == DS_READY &&
993 	    i_ddi_node_state(rdip) >= DS_INITIALIZED) {
994 		rv = ndi_devi_unconfig_one(pdip, devnm+1, dipp, nflags);
995 	} else {
996 		rv = e_ddi_devi_unconfig(rdip, dipp, nflags);
997 		if (rv == NDI_SUCCESS) {
998 			ASSERT(!destroy || ddi_get_child(rdip) == NULL);
999 			rv = ndi_devi_offline(rdip, nflags);
1000 		}
1001 	}
1002 
1003 	if (!destroy || rv != NDI_SUCCESS) {
1004 		/* The dip still exists, so do a hold */
1005 		e_ddi_branch_hold(rdip);
1006 	}
1007 out:
1008 	kmem_free(devnm, MAXNAMELEN + 1);
1009 	ndi_devi_exit(pdip, circ);
1010 	return (ndi2errno(rv));
1011 }
1012 
1013 int
1014 e_ddi_branch_destroy(dev_info_t *rdip, dev_info_t **dipp, uint_t flag)
1015 {
1016 	return (e_ddi_branch_unconfigure(rdip, dipp,
1017 	    flag|DEVI_BRANCH_DESTROY));
1018 }
1019 
1020 /*
1021  * Number of chains for hash table
1022  */
1023 #define	NUMCHAINS	17
1024 
1025 /*
1026  * Devinfo busy arg
1027  */
1028 struct devi_busy {
1029 	int dv_total;
1030 	int s_total;
1031 	mod_hash_t *dv_hash;
1032 	mod_hash_t *s_hash;
1033 	int (*callback)(dev_info_t *, void *, uint_t);
1034 	void *arg;
1035 };
1036 
1037 static int
1038 visit_dip(dev_info_t *dip, void *arg)
1039 {
1040 	uintptr_t sbusy, dvbusy, ref;
1041 	struct devi_busy *bsp = arg;
1042 
1043 	ASSERT(bsp->callback);
1044 
1045 	/*
1046 	 * A dip cannot be busy if its reference count is 0
1047 	 */
1048 	if ((ref = e_ddi_devi_holdcnt(dip)) == 0) {
1049 		return (bsp->callback(dip, bsp->arg, 0));
1050 	}
1051 
1052 	if (mod_hash_find(bsp->dv_hash, dip, (mod_hash_val_t *)&dvbusy))
1053 		dvbusy = 0;
1054 
1055 	/*
1056 	 * To catch device opens currently maintained on specfs common snodes.
1057 	 */
1058 	if (mod_hash_find(bsp->s_hash, dip, (mod_hash_val_t *)&sbusy))
1059 		sbusy = 0;
1060 
1061 #ifdef	DEBUG
1062 	if (ref < sbusy || ref < dvbusy) {
1063 		cmn_err(CE_WARN, "dip(%p): sopen = %lu, dvopen = %lu "
1064 		    "dip ref = %lu\n", (void *)dip, sbusy, dvbusy, ref);
1065 	}
1066 #endif
1067 
1068 	dvbusy = (sbusy > dvbusy) ? sbusy : dvbusy;
1069 
1070 	return (bsp->callback(dip, bsp->arg, dvbusy));
1071 }
1072 
1073 static int
1074 visit_snode(struct snode *sp, void *arg)
1075 {
1076 	uintptr_t sbusy;
1077 	dev_info_t *dip;
1078 	int count;
1079 	struct devi_busy *bsp = arg;
1080 
1081 	ASSERT(sp);
1082 
1083 	/*
1084 	 * The stable lock is held. This prevents
1085 	 * the snode and its associated dip from
1086 	 * going away.
1087 	 */
1088 	dip = NULL;
1089 	count = spec_devi_open_count(sp, &dip);
1090 
1091 	if (count <= 0)
1092 		return (DDI_WALK_CONTINUE);
1093 
1094 	ASSERT(dip);
1095 
1096 	if (mod_hash_remove(bsp->s_hash, dip, (mod_hash_val_t *)&sbusy))
1097 		sbusy = count;
1098 	else
1099 		sbusy += count;
1100 
1101 	if (mod_hash_insert(bsp->s_hash, dip, (mod_hash_val_t)sbusy)) {
1102 		cmn_err(CE_WARN, "%s: s_hash insert failed: dip=0x%p, "
1103 		    "sbusy = %lu", "e_ddi_branch_referenced",
1104 		    (void *)dip, sbusy);
1105 	}
1106 
1107 	bsp->s_total += count;
1108 
1109 	return (DDI_WALK_CONTINUE);
1110 }
1111 
1112 static void
1113 visit_dvnode(struct dv_node *dv, void *arg)
1114 {
1115 	uintptr_t dvbusy;
1116 	uint_t count;
1117 	struct vnode *vp;
1118 	struct devi_busy *bsp = arg;
1119 
1120 	ASSERT(dv && dv->dv_devi);
1121 
1122 	vp = DVTOV(dv);
1123 
1124 	mutex_enter(&vp->v_lock);
1125 	count = vp->v_count;
1126 	mutex_exit(&vp->v_lock);
1127 
1128 	if (!count)
1129 		return;
1130 
1131 	if (mod_hash_remove(bsp->dv_hash, dv->dv_devi,
1132 	    (mod_hash_val_t *)&dvbusy))
1133 		dvbusy = count;
1134 	else
1135 		dvbusy += count;
1136 
1137 	if (mod_hash_insert(bsp->dv_hash, dv->dv_devi,
1138 	    (mod_hash_val_t)dvbusy)) {
1139 		cmn_err(CE_WARN, "%s: dv_hash insert failed: dip=0x%p, "
1140 		    "dvbusy=%lu", "e_ddi_branch_referenced",
1141 		    (void *)dv->dv_devi, dvbusy);
1142 	}
1143 
1144 	bsp->dv_total += count;
1145 }
1146 
1147 /*
1148  * Returns reference count on success or -1 on failure.
1149  */
1150 int
1151 e_ddi_branch_referenced(
1152 	dev_info_t *rdip,
1153 	int (*callback)(dev_info_t *dip, void *arg, uint_t ref),
1154 	void *arg)
1155 {
1156 	int circ;
1157 	char *path;
1158 	dev_info_t *pdip;
1159 	struct devi_busy bsa = {0};
1160 
1161 	ASSERT(rdip);
1162 
1163 	path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1164 
1165 	ndi_hold_devi(rdip);
1166 
1167 	pdip = ddi_get_parent(rdip);
1168 
1169 	ASSERT(pdip);
1170 
1171 	/*
1172 	 * Check if caller holds pdip busy - can cause deadlocks during
1173 	 * devfs_walk()
1174 	 */
1175 	if (!e_ddi_branch_held(rdip) || DEVI_BUSY_OWNED(pdip)) {
1176 		cmn_err(CE_WARN, "e_ddi_branch_referenced: failed: "
1177 		    "devinfo branch(%p) not held or parent busy held",
1178 		    (void *)rdip);
1179 		ndi_rele_devi(rdip);
1180 		kmem_free(path, MAXPATHLEN);
1181 		return (-1);
1182 	}
1183 
1184 	ndi_devi_enter(pdip, &circ);
1185 	(void) ddi_pathname(rdip, path);
1186 	ndi_devi_exit(pdip, circ);
1187 
1188 	bsa.dv_hash = mod_hash_create_ptrhash("dv_node busy hash", NUMCHAINS,
1189 	    mod_hash_null_valdtor, sizeof (struct dev_info));
1190 
1191 	bsa.s_hash = mod_hash_create_ptrhash("snode busy hash", NUMCHAINS,
1192 	    mod_hash_null_valdtor, sizeof (struct snode));
1193 
1194 	if (devfs_walk(path, visit_dvnode, &bsa)) {
1195 		cmn_err(CE_WARN, "e_ddi_branch_referenced: "
1196 		    "devfs walk failed for: %s", path);
1197 		kmem_free(path, MAXPATHLEN);
1198 		bsa.s_total = bsa.dv_total = -1;
1199 		goto out;
1200 	}
1201 
1202 	kmem_free(path, MAXPATHLEN);
1203 
1204 	/*
1205 	 * Walk the snode table to detect device opens, which are currently
1206 	 * maintained on specfs common snodes.
1207 	 */
1208 	spec_snode_walk(visit_snode, &bsa);
1209 
1210 	if (callback == NULL)
1211 		goto out;
1212 
1213 	bsa.callback = callback;
1214 	bsa.arg = arg;
1215 
1216 	if (visit_dip(rdip, &bsa) == DDI_WALK_CONTINUE) {
1217 		ndi_devi_enter(rdip, &circ);
1218 		ddi_walk_devs(ddi_get_child(rdip), visit_dip, &bsa);
1219 		ndi_devi_exit(rdip, circ);
1220 	}
1221 
1222 out:
1223 	ndi_rele_devi(rdip);
1224 	mod_hash_destroy_ptrhash(bsa.s_hash);
1225 	mod_hash_destroy_ptrhash(bsa.dv_hash);
1226 	return (bsa.s_total > bsa.dv_total ? bsa.s_total : bsa.dv_total);
1227 }
1228