xref: /dragonfly/sys/vfs/devfs/devfs_core.c (revision dc71b7ab)
1 /*
2  * Copyright (c) 2009 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Alex Hornung <ahornung@gmail.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/kernel.h>
37 #include <sys/mount.h>
38 #include <sys/vnode.h>
39 #include <sys/types.h>
40 #include <sys/lock.h>
41 #include <sys/msgport.h>
42 #include <sys/sysctl.h>
43 #include <sys/ucred.h>
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/devfs.h>
47 #include <sys/devfs_rules.h>
48 #include <sys/udev.h>
49 
50 #include <sys/msgport2.h>
51 #include <sys/spinlock2.h>
52 #include <sys/mplock2.h>
53 #include <sys/sysref2.h>
54 
55 MALLOC_DEFINE(M_DEVFS, "devfs", "Device File System (devfs) allocations");
56 DEVFS_DECLARE_CLONE_BITMAP(ops_id);
57 /*
58  * SYSREF Integration - reference counting, allocation,
59  * sysid and syslink integration.
60  */
61 static void devfs_cdev_terminate(cdev_t dev);
62 static void devfs_cdev_lock(cdev_t dev);
63 static void devfs_cdev_unlock(cdev_t dev);
64 static struct sysref_class     cdev_sysref_class = {
65 	.name =         "cdev",
66 	.mtype =        M_DEVFS,
67 	.proto =        SYSREF_PROTO_DEV,
68 	.offset =       offsetof(struct cdev, si_sysref),
69 	.objsize =      sizeof(struct cdev),
70 	.nom_cache =	32,
71 	.flags =        0,
72 	.ops =  {
73 		.terminate = (sysref_terminate_func_t)devfs_cdev_terminate,
74 		.lock = (sysref_lock_func_t)devfs_cdev_lock,
75 		.unlock = (sysref_unlock_func_t)devfs_cdev_unlock
76 	}
77 };
78 
79 static struct objcache	*devfs_node_cache;
80 static struct objcache 	*devfs_msg_cache;
81 static struct objcache	*devfs_dev_cache;
82 
83 static struct objcache_malloc_args devfs_node_malloc_args = {
84 	sizeof(struct devfs_node), M_DEVFS };
85 struct objcache_malloc_args devfs_msg_malloc_args = {
86 	sizeof(struct devfs_msg), M_DEVFS };
87 struct objcache_malloc_args devfs_dev_malloc_args = {
88 	sizeof(struct cdev), M_DEVFS };
89 
90 static struct devfs_dev_head devfs_dev_list =
91 		TAILQ_HEAD_INITIALIZER(devfs_dev_list);
92 static struct devfs_mnt_head devfs_mnt_list =
93 		TAILQ_HEAD_INITIALIZER(devfs_mnt_list);
94 static struct devfs_chandler_head devfs_chandler_list =
95 		TAILQ_HEAD_INITIALIZER(devfs_chandler_list);
96 static struct devfs_alias_head devfs_alias_list =
97 		TAILQ_HEAD_INITIALIZER(devfs_alias_list);
98 static struct devfs_dev_ops_head devfs_dev_ops_list =
99 		TAILQ_HEAD_INITIALIZER(devfs_dev_ops_list);
100 
101 struct lock 		devfs_lock;
102 static struct lwkt_port devfs_dispose_port;
103 static struct lwkt_port devfs_msg_port;
104 static struct thread 	*td_core;
105 
106 static struct spinlock  ino_lock;
107 static ino_t 	d_ino;
108 static int	devfs_debug_enable;
109 static int	devfs_run;
110 
111 static ino_t devfs_fetch_ino(void);
112 static int devfs_create_all_dev_worker(struct devfs_node *);
113 static int devfs_create_dev_worker(cdev_t, uid_t, gid_t, int);
114 static int devfs_destroy_dev_worker(cdev_t);
115 static int devfs_destroy_related_worker(cdev_t);
116 static int devfs_destroy_dev_by_ops_worker(struct dev_ops *, int);
117 static int devfs_propagate_dev(cdev_t, int);
118 static int devfs_unlink_dev(cdev_t dev);
119 static void devfs_msg_exec(devfs_msg_t msg);
120 
121 static int devfs_chandler_add_worker(const char *, d_clone_t *);
122 static int devfs_chandler_del_worker(const char *);
123 
124 static void devfs_msg_autofree_reply(lwkt_port_t, lwkt_msg_t);
125 static void devfs_msg_core(void *);
126 
127 static int devfs_find_device_by_name_worker(devfs_msg_t);
128 static int devfs_find_device_by_udev_worker(devfs_msg_t);
129 
130 static int devfs_apply_reset_rules_caller(char *, int);
131 
132 static int devfs_scan_callback_worker(devfs_scan_t *, void *);
133 
134 static struct devfs_node *devfs_resolve_or_create_dir(struct devfs_node *,
135 		char *, size_t, int);
136 
137 static int devfs_make_alias_worker(struct devfs_alias *);
138 static int devfs_destroy_alias_worker(struct devfs_alias *);
139 static int devfs_alias_remove(cdev_t);
140 static int devfs_alias_reap(void);
141 static int devfs_alias_propagate(struct devfs_alias *, int);
142 static int devfs_alias_apply(struct devfs_node *, struct devfs_alias *);
143 static int devfs_alias_check_create(struct devfs_node *);
144 
145 static int devfs_clr_related_flag_worker(cdev_t, uint32_t);
146 static int devfs_destroy_related_without_flag_worker(cdev_t, uint32_t);
147 
148 static void *devfs_reaperp_callback(struct devfs_node *, void *);
149 static void *devfs_gc_dirs_callback(struct devfs_node *, void *);
150 static void *devfs_gc_links_callback(struct devfs_node *, struct devfs_node *);
151 static void *
152 devfs_inode_to_vnode_worker_callback(struct devfs_node *, ino_t *);
153 
154 /*
155  * devfs_debug() is a SYSCTL and TUNABLE controlled debug output function
156  * using kvprintf
157  */
158 int
159 devfs_debug(int level, char *fmt, ...)
160 {
161 	__va_list ap;
162 
163 	__va_start(ap, fmt);
164 	if (level <= devfs_debug_enable)
165 		kvprintf(fmt, ap);
166 	__va_end(ap);
167 
168 	return 0;
169 }
170 
171 /*
172  * devfs_allocp() Allocates a new devfs node with the specified
173  * parameters. The node is also automatically linked into the topology
174  * if a parent is specified. It also calls the rule and alias stuff to
175  * be applied on the new node
176  */
177 struct devfs_node *
178 devfs_allocp(devfs_nodetype devfsnodetype, char *name,
179 	     struct devfs_node *parent, struct mount *mp, cdev_t dev)
180 {
181 	struct devfs_node *node = NULL;
182 	size_t namlen = strlen(name);
183 
184 	node = objcache_get(devfs_node_cache, M_WAITOK);
185 	bzero(node, sizeof(*node));
186 
187 	atomic_add_long(&DEVFS_MNTDATA(mp)->leak_count, 1);
188 
189 	node->d_dev = NULL;
190 	node->nchildren = 1;
191 	node->mp = mp;
192 	node->d_dir.d_ino = devfs_fetch_ino();
193 
194 	/*
195 	 * Cookie jar for children. Leave 0 and 1 for '.' and '..' entries
196 	 * respectively.
197 	 */
198 	node->cookie_jar = 2;
199 
200 	/*
201 	 * Access Control members
202 	 */
203 	node->mode = DEVFS_DEFAULT_MODE;
204 	node->uid = DEVFS_DEFAULT_UID;
205 	node->gid = DEVFS_DEFAULT_GID;
206 
207 	switch (devfsnodetype) {
208 	case Nroot:
209 		/*
210 		 * Ensure that we don't recycle the root vnode by marking it as
211 		 * linked into the topology.
212 		 */
213 		node->flags |= DEVFS_NODE_LINKED;
214 	case Ndir:
215 		TAILQ_INIT(DEVFS_DENODE_HEAD(node));
216 		node->d_dir.d_type = DT_DIR;
217 		node->nchildren = 2;
218 		break;
219 
220 	case Nlink:
221 		node->d_dir.d_type = DT_LNK;
222 		break;
223 
224 	case Nreg:
225 		node->d_dir.d_type = DT_REG;
226 		break;
227 
228 	case Ndev:
229 		if (dev != NULL) {
230 			node->d_dir.d_type = DT_CHR;
231 			node->d_dev = dev;
232 
233 			node->mode = dev->si_perms;
234 			node->uid = dev->si_uid;
235 			node->gid = dev->si_gid;
236 
237 			devfs_alias_check_create(node);
238 		}
239 		break;
240 
241 	default:
242 		panic("devfs_allocp: unknown node type");
243 	}
244 
245 	node->v_node = NULL;
246 	node->node_type = devfsnodetype;
247 
248 	/* Initialize the dirent structure of each devfs vnode */
249 	node->d_dir.d_namlen = namlen;
250 	node->d_dir.d_name = kmalloc(namlen+1, M_DEVFS, M_WAITOK);
251 	memcpy(node->d_dir.d_name, name, namlen);
252 	node->d_dir.d_name[namlen] = '\0';
253 
254 	/* Initialize the parent node element */
255 	node->parent = parent;
256 
257 	/* Initialize *time members */
258 	nanotime(&node->atime);
259 	node->mtime = node->ctime = node->atime;
260 
261 	/*
262 	 * Associate with parent as last step, clean out namecache
263 	 * reference.
264 	 */
265 	if ((parent != NULL) &&
266 	    ((parent->node_type == Nroot) || (parent->node_type == Ndir))) {
267 		parent->nchildren++;
268 		node->cookie = parent->cookie_jar++;
269 		node->flags |= DEVFS_NODE_LINKED;
270 		TAILQ_INSERT_TAIL(DEVFS_DENODE_HEAD(parent), node, link);
271 
272 		/* This forces negative namecache lookups to clear */
273 		++mp->mnt_namecache_gen;
274 	}
275 
276 	/* Apply rules */
277 	devfs_rule_check_apply(node, NULL);
278 
279 	atomic_add_long(&DEVFS_MNTDATA(mp)->file_count, 1);
280 
281 	return node;
282 }
283 
284 /*
285  * devfs_allocv() allocates a new vnode based on a devfs node.
286  */
287 int
288 devfs_allocv(struct vnode **vpp, struct devfs_node *node)
289 {
290 	struct vnode *vp;
291 	int error = 0;
292 
293 	KKASSERT(node);
294 
295 	/*
296 	 * devfs master lock must not be held across a vget() call, we have
297 	 * to hold our ad-hoc vp to avoid a free race from destroying the
298 	 * contents of the structure.  The vget() will interlock recycles
299 	 * for us.
300 	 */
301 try_again:
302 	while ((vp = node->v_node) != NULL) {
303 		vhold(vp);
304 		lockmgr(&devfs_lock, LK_RELEASE);
305 		error = vget(vp, LK_EXCLUSIVE);
306 		vdrop(vp);
307 		lockmgr(&devfs_lock, LK_EXCLUSIVE);
308 		if (error == 0) {
309 			*vpp = vp;
310 			goto out;
311 		}
312 		if (error != ENOENT) {
313 			*vpp = NULL;
314 			goto out;
315 		}
316 	}
317 
318 	/*
319 	 * devfs master lock must not be held across a getnewvnode() call.
320 	 */
321 	lockmgr(&devfs_lock, LK_RELEASE);
322 	if ((error = getnewvnode(VT_DEVFS, node->mp, vpp, 0, 0)) != 0) {
323 		lockmgr(&devfs_lock, LK_EXCLUSIVE);
324 		goto out;
325 	}
326 	lockmgr(&devfs_lock, LK_EXCLUSIVE);
327 
328 	vp = *vpp;
329 
330 	if (node->v_node != NULL) {
331 		vp->v_type = VBAD;
332 		vx_put(vp);
333 		goto try_again;
334 	}
335 
336 	vp->v_data = node;
337 	node->v_node = vp;
338 
339 	switch (node->node_type) {
340 	case Nroot:
341 		vsetflags(vp, VROOT);
342 		/* fall through */
343 	case Ndir:
344 		vp->v_type = VDIR;
345 		break;
346 
347 	case Nlink:
348 		vp->v_type = VLNK;
349 		break;
350 
351 	case Nreg:
352 		vp->v_type = VREG;
353 		break;
354 
355 	case Ndev:
356 		vp->v_type = VCHR;
357 		KKASSERT(node->d_dev);
358 
359 		vp->v_uminor = node->d_dev->si_uminor;
360 		vp->v_umajor = node->d_dev->si_umajor;
361 
362 		v_associate_rdev(vp, node->d_dev);
363 		vp->v_ops = &node->mp->mnt_vn_spec_ops;
364 		break;
365 
366 	default:
367 		panic("devfs_allocv: unknown node type");
368 	}
369 
370 out:
371 	return error;
372 }
373 
374 /*
375  * devfs_allocvp allocates both a devfs node (with the given settings) and a vnode
376  * based on the newly created devfs node.
377  */
378 int
379 devfs_allocvp(struct mount *mp, struct vnode **vpp, devfs_nodetype devfsnodetype,
380 		char *name, struct devfs_node *parent, cdev_t dev)
381 {
382 	struct devfs_node *node;
383 
384 	node = devfs_allocp(devfsnodetype, name, parent, mp, dev);
385 
386 	if (node != NULL)
387 		devfs_allocv(vpp, node);
388 	else
389 		*vpp = NULL;
390 
391 	return 0;
392 }
393 
394 /*
395  * Destroy the devfs_node.  The node must be unlinked from the topology.
396  *
397  * This function will also destroy any vnode association with the node
398  * and device.
399  *
400  * The cdev_t itself remains intact.
401  *
402  * The core lock is not necessarily held on call and must be temporarily
403  * released if it is to avoid a deadlock.
404  */
405 int
406 devfs_freep(struct devfs_node *node)
407 {
408 	struct vnode *vp;
409 	int relock;
410 
411 	KKASSERT(node);
412 	KKASSERT(((node->flags & DEVFS_NODE_LINKED) == 0) ||
413 		 (node->node_type == Nroot));
414 
415 	/*
416 	 * Protect against double frees
417 	 */
418 	KKASSERT((node->flags & DEVFS_DESTROYED) == 0);
419 	node->flags |= DEVFS_DESTROYED;
420 
421 	/*
422 	 * Avoid deadlocks between devfs_lock and the vnode lock when
423 	 * disassociating the vnode (stress2 pty vs ls -la /dev/pts).
424 	 *
425 	 * This also prevents the vnode reclaim code from double-freeing
426 	 * the node.  The vget() is required to safely modified the vp
427 	 * and cycle the refs to terminate an inactive vp.
428 	 */
429 	if (lockstatus(&devfs_lock, curthread) == LK_EXCLUSIVE) {
430 		lockmgr(&devfs_lock, LK_RELEASE);
431 		relock = 1;
432 	} else {
433 		relock = 0;
434 	}
435 
436 	while ((vp = node->v_node) != NULL) {
437 		if (vget(vp, LK_EXCLUSIVE | LK_RETRY) != 0)
438 			break;
439 		v_release_rdev(vp);
440 		vp->v_data = NULL;
441 		node->v_node = NULL;
442 		cache_inval_vp(vp, CINV_DESTROY);
443 		vput(vp);
444 	}
445 
446 	/*
447 	 * Remaining cleanup
448 	 */
449 	atomic_subtract_long(&DEVFS_MNTDATA(node->mp)->leak_count, 1);
450 	if (node->symlink_name)	{
451 		kfree(node->symlink_name, M_DEVFS);
452 		node->symlink_name = NULL;
453 	}
454 
455 	/*
456 	 * Remove the node from the orphan list if it is still on it.
457 	 */
458 	if (node->flags & DEVFS_ORPHANED)
459 		devfs_tracer_del_orphan(node);
460 
461 	if (node->d_dir.d_name) {
462 		kfree(node->d_dir.d_name, M_DEVFS);
463 		node->d_dir.d_name = NULL;
464 	}
465 	atomic_subtract_long(&DEVFS_MNTDATA(node->mp)->file_count, 1);
466 	objcache_put(devfs_node_cache, node);
467 
468 	if (relock)
469 		lockmgr(&devfs_lock, LK_EXCLUSIVE);
470 
471 	return 0;
472 }
473 
474 /*
475  * Unlink the devfs node from the topology and add it to the orphan list.
476  * The node will later be destroyed by freep.
477  *
478  * Any vnode association, including the v_rdev and v_data, remains intact
479  * until the freep.
480  */
481 int
482 devfs_unlinkp(struct devfs_node *node)
483 {
484 	struct devfs_node *parent;
485 	KKASSERT(node);
486 
487 	/*
488 	 * Add the node to the orphan list, so it is referenced somewhere, to
489 	 * so we don't leak it.
490 	 */
491 	devfs_tracer_add_orphan(node);
492 
493 	parent = node->parent;
494 
495 	/*
496 	 * If the parent is known we can unlink the node out of the topology
497 	 */
498 	if (parent)	{
499 		TAILQ_REMOVE(DEVFS_DENODE_HEAD(parent), node, link);
500 		parent->nchildren--;
501 		node->flags &= ~DEVFS_NODE_LINKED;
502 	}
503 
504 	node->parent = NULL;
505 	return 0;
506 }
507 
508 void *
509 devfs_iterate_topology(struct devfs_node *node,
510 		devfs_iterate_callback_t *callback, void *arg1)
511 {
512 	struct devfs_node *node1, *node2;
513 	void *ret = NULL;
514 
515 	if ((node->node_type == Nroot) || (node->node_type == Ndir)) {
516 		if (node->nchildren > 2) {
517 			TAILQ_FOREACH_MUTABLE(node1, DEVFS_DENODE_HEAD(node),
518 							link, node2) {
519 				if ((ret = devfs_iterate_topology(node1, callback, arg1)))
520 					return ret;
521 			}
522 		}
523 	}
524 
525 	ret = callback(node, arg1);
526 	return ret;
527 }
528 
529 /*
530  * devfs_reaperp() is a recursive function that iterates through all the
531  * topology, unlinking and freeing all devfs nodes.
532  */
533 static void *
534 devfs_reaperp_callback(struct devfs_node *node, void *unused)
535 {
536 	devfs_unlinkp(node);
537 	devfs_freep(node);
538 
539 	return NULL;
540 }
541 
542 static void *
543 devfs_gc_dirs_callback(struct devfs_node *node, void *unused)
544 {
545 	if (node->node_type == Ndir) {
546 		if ((node->nchildren == 2) &&
547 		    !(node->flags & DEVFS_USER_CREATED)) {
548 			devfs_unlinkp(node);
549 			devfs_freep(node);
550 		}
551 	}
552 
553 	return NULL;
554 }
555 
556 static void *
557 devfs_gc_links_callback(struct devfs_node *node, struct devfs_node *target)
558 {
559 	if ((node->node_type == Nlink) && (node->link_target == target)) {
560 		devfs_unlinkp(node);
561 		devfs_freep(node);
562 	}
563 
564 	return NULL;
565 }
566 
567 /*
568  * devfs_gc() is devfs garbage collector. It takes care of unlinking and
569  * freeing a node, but also removes empty directories and links that link
570  * via devfs auto-link mechanism to the node being deleted.
571  */
572 int
573 devfs_gc(struct devfs_node *node)
574 {
575 	struct devfs_node *root_node = DEVFS_MNTDATA(node->mp)->root_node;
576 
577 	if (node->nlinks > 0)
578 		devfs_iterate_topology(root_node,
579 				(devfs_iterate_callback_t *)devfs_gc_links_callback, node);
580 
581 	devfs_unlinkp(node);
582 	devfs_iterate_topology(root_node,
583 			(devfs_iterate_callback_t *)devfs_gc_dirs_callback, NULL);
584 
585 	devfs_freep(node);
586 
587 	return 0;
588 }
589 
590 /*
591  * devfs_create_dev() is the asynchronous entry point for device creation.
592  * It just sends a message with the relevant details to the devfs core.
593  *
594  * This function will reference the passed device.  The reference is owned
595  * by devfs and represents all of the device's node associations.
596  */
597 int
598 devfs_create_dev(cdev_t dev, uid_t uid, gid_t gid, int perms)
599 {
600 	reference_dev(dev);
601 	devfs_msg_send_dev(DEVFS_DEVICE_CREATE, dev, uid, gid, perms);
602 
603 	return 0;
604 }
605 
606 /*
607  * devfs_destroy_dev() is the asynchronous entry point for device destruction.
608  * It just sends a message with the relevant details to the devfs core.
609  */
610 int
611 devfs_destroy_dev(cdev_t dev)
612 {
613 	devfs_msg_send_dev(DEVFS_DEVICE_DESTROY, dev, 0, 0, 0);
614 	return 0;
615 }
616 
617 /*
618  * devfs_mount_add() is the synchronous entry point for adding a new devfs
619  * mount.  It sends a synchronous message with the relevant details to the
620  * devfs core.
621  */
622 int
623 devfs_mount_add(struct devfs_mnt_data *mnt)
624 {
625 	devfs_msg_t msg;
626 
627 	msg = devfs_msg_get();
628 	msg->mdv_mnt = mnt;
629 	msg = devfs_msg_send_sync(DEVFS_MOUNT_ADD, msg);
630 	devfs_msg_put(msg);
631 
632 	return 0;
633 }
634 
635 /*
636  * devfs_mount_del() is the synchronous entry point for removing a devfs mount.
637  * It sends a synchronous message with the relevant details to the devfs core.
638  */
639 int
640 devfs_mount_del(struct devfs_mnt_data *mnt)
641 {
642 	devfs_msg_t msg;
643 
644 	msg = devfs_msg_get();
645 	msg->mdv_mnt = mnt;
646 	msg = devfs_msg_send_sync(DEVFS_MOUNT_DEL, msg);
647 	devfs_msg_put(msg);
648 
649 	return 0;
650 }
651 
652 /*
653  * devfs_destroy_related() is the synchronous entry point for device
654  * destruction by subname. It just sends a message with the relevant details to
655  * the devfs core.
656  */
657 int
658 devfs_destroy_related(cdev_t dev)
659 {
660 	devfs_msg_t msg;
661 
662 	msg = devfs_msg_get();
663 	msg->mdv_load = dev;
664 	msg = devfs_msg_send_sync(DEVFS_DESTROY_RELATED, msg);
665 	devfs_msg_put(msg);
666 	return 0;
667 }
668 
669 int
670 devfs_clr_related_flag(cdev_t dev, uint32_t flag)
671 {
672 	devfs_msg_t msg;
673 
674 	msg = devfs_msg_get();
675 	msg->mdv_flags.dev = dev;
676 	msg->mdv_flags.flag = flag;
677 	msg = devfs_msg_send_sync(DEVFS_CLR_RELATED_FLAG, msg);
678 	devfs_msg_put(msg);
679 
680 	return 0;
681 }
682 
683 int
684 devfs_destroy_related_without_flag(cdev_t dev, uint32_t flag)
685 {
686 	devfs_msg_t msg;
687 
688 	msg = devfs_msg_get();
689 	msg->mdv_flags.dev = dev;
690 	msg->mdv_flags.flag = flag;
691 	msg = devfs_msg_send_sync(DEVFS_DESTROY_RELATED_WO_FLAG, msg);
692 	devfs_msg_put(msg);
693 
694 	return 0;
695 }
696 
697 /*
698  * devfs_create_all_dev is the asynchronous entry point to trigger device
699  * node creation.  It just sends a message with the relevant details to
700  * the devfs core.
701  */
702 int
703 devfs_create_all_dev(struct devfs_node *root)
704 {
705 	devfs_msg_send_generic(DEVFS_CREATE_ALL_DEV, root);
706 	return 0;
707 }
708 
709 /*
710  * devfs_destroy_dev_by_ops is the asynchronous entry point to destroy all
711  * devices with a specific set of dev_ops and minor.  It just sends a
712  * message with the relevant details to the devfs core.
713  */
714 int
715 devfs_destroy_dev_by_ops(struct dev_ops *ops, int minor)
716 {
717 	devfs_msg_send_ops(DEVFS_DESTROY_DEV_BY_OPS, ops, minor);
718 	return 0;
719 }
720 
721 /*
722  * devfs_clone_handler_add is the synchronous entry point to add a new
723  * clone handler.  It just sends a message with the relevant details to
724  * the devfs core.
725  */
726 int
727 devfs_clone_handler_add(const char *name, d_clone_t *nhandler)
728 {
729 	devfs_msg_t msg;
730 
731 	msg = devfs_msg_get();
732 	msg->mdv_chandler.name = name;
733 	msg->mdv_chandler.nhandler = nhandler;
734 	msg = devfs_msg_send_sync(DEVFS_CHANDLER_ADD, msg);
735 	devfs_msg_put(msg);
736 	return 0;
737 }
738 
739 /*
740  * devfs_clone_handler_del is the synchronous entry point to remove a
741  * clone handler.  It just sends a message with the relevant details to
742  * the devfs core.
743  */
744 int
745 devfs_clone_handler_del(const char *name)
746 {
747 	devfs_msg_t msg;
748 
749 	msg = devfs_msg_get();
750 	msg->mdv_chandler.name = name;
751 	msg->mdv_chandler.nhandler = NULL;
752 	msg = devfs_msg_send_sync(DEVFS_CHANDLER_DEL, msg);
753 	devfs_msg_put(msg);
754 	return 0;
755 }
756 
757 /*
758  * devfs_find_device_by_name is the synchronous entry point to find a
759  * device given its name.  It sends a synchronous message with the
760  * relevant details to the devfs core and returns the answer.
761  */
762 cdev_t
763 devfs_find_device_by_name(const char *fmt, ...)
764 {
765 	cdev_t found = NULL;
766 	devfs_msg_t msg;
767 	char *target;
768 	__va_list ap;
769 
770 	if (fmt == NULL)
771 		return NULL;
772 
773 	__va_start(ap, fmt);
774 	kvasnrprintf(&target, PATH_MAX, 10, fmt, ap);
775 	__va_end(ap);
776 
777 	msg = devfs_msg_get();
778 	msg->mdv_name = target;
779 	msg = devfs_msg_send_sync(DEVFS_FIND_DEVICE_BY_NAME, msg);
780 	found = msg->mdv_cdev;
781 	devfs_msg_put(msg);
782 	kvasfree(&target);
783 
784 	return found;
785 }
786 
787 /*
788  * devfs_find_device_by_udev is the synchronous entry point to find a
789  * device given its udev number.  It sends a synchronous message with
790  * the relevant details to the devfs core and returns the answer.
791  */
792 cdev_t
793 devfs_find_device_by_udev(udev_t udev)
794 {
795 	cdev_t found = NULL;
796 	devfs_msg_t msg;
797 
798 	msg = devfs_msg_get();
799 	msg->mdv_udev = udev;
800 	msg = devfs_msg_send_sync(DEVFS_FIND_DEVICE_BY_UDEV, msg);
801 	found = msg->mdv_cdev;
802 	devfs_msg_put(msg);
803 
804 	devfs_debug(DEVFS_DEBUG_DEBUG,
805 		    "devfs_find_device_by_udev found? %s  -end:3-\n",
806 		    ((found) ? found->si_name:"NO"));
807 	return found;
808 }
809 
810 struct vnode *
811 devfs_inode_to_vnode(struct mount *mp, ino_t target)
812 {
813 	struct vnode *vp = NULL;
814 	devfs_msg_t msg;
815 
816 	if (mp == NULL)
817 		return NULL;
818 
819 	msg = devfs_msg_get();
820 	msg->mdv_ino.mp = mp;
821 	msg->mdv_ino.ino = target;
822 	msg = devfs_msg_send_sync(DEVFS_INODE_TO_VNODE, msg);
823 	vp = msg->mdv_ino.vp;
824 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
825 	devfs_msg_put(msg);
826 
827 	return vp;
828 }
829 
830 /*
831  * devfs_make_alias is the asynchronous entry point to register an alias
832  * for a device.  It just sends a message with the relevant details to the
833  * devfs core.
834  */
835 int
836 devfs_make_alias(const char *name, cdev_t dev_target)
837 {
838 	struct devfs_alias *alias;
839 	size_t len;
840 
841 	len = strlen(name);
842 
843 	alias = kmalloc(sizeof(struct devfs_alias), M_DEVFS, M_WAITOK);
844 	alias->name = kstrdup(name, M_DEVFS);
845 	alias->namlen = len;
846 	alias->dev_target = dev_target;
847 
848 	devfs_msg_send_generic(DEVFS_MAKE_ALIAS, alias);
849 	return 0;
850 }
851 
852 /*
853  * devfs_destroy_alias is the asynchronous entry point to deregister an alias
854  * for a device.  It just sends a message with the relevant details to the
855  * devfs core.
856  */
857 int
858 devfs_destroy_alias(const char *name, cdev_t dev_target)
859 {
860 	struct devfs_alias *alias;
861 	size_t len;
862 
863 	len = strlen(name);
864 
865 	alias = kmalloc(sizeof(struct devfs_alias), M_DEVFS, M_WAITOK);
866 	alias->name = kstrdup(name, M_DEVFS);
867 	alias->namlen = len;
868 	alias->dev_target = dev_target;
869 
870 	devfs_msg_send_generic(DEVFS_DESTROY_ALIAS, alias);
871 	return 0;
872 }
873 
874 /*
875  * devfs_apply_rules is the asynchronous entry point to trigger application
876  * of all rules.  It just sends a message with the relevant details to the
877  * devfs core.
878  */
879 int
880 devfs_apply_rules(char *mntto)
881 {
882 	char *new_name;
883 
884 	new_name = kstrdup(mntto, M_DEVFS);
885 	devfs_msg_send_name(DEVFS_APPLY_RULES, new_name);
886 
887 	return 0;
888 }
889 
890 /*
891  * devfs_reset_rules is the asynchronous entry point to trigger reset of all
892  * rules. It just sends a message with the relevant details to the devfs core.
893  */
894 int
895 devfs_reset_rules(char *mntto)
896 {
897 	char *new_name;
898 
899 	new_name = kstrdup(mntto, M_DEVFS);
900 	devfs_msg_send_name(DEVFS_RESET_RULES, new_name);
901 
902 	return 0;
903 }
904 
905 
906 /*
907  * devfs_scan_callback is the asynchronous entry point to call a callback
908  * on all cdevs.
909  * It just sends a message with the relevant details to the devfs core.
910  */
911 int
912 devfs_scan_callback(devfs_scan_t *callback, void *arg)
913 {
914 	devfs_msg_t msg;
915 
916 	KKASSERT(callback);
917 
918 	msg = devfs_msg_get();
919 	msg->mdv_load = callback;
920 	msg->mdv_load2 = arg;
921 	msg = devfs_msg_send_sync(DEVFS_SCAN_CALLBACK, msg);
922 	devfs_msg_put(msg);
923 
924 	return 0;
925 }
926 
927 
928 /*
929  * Acts as a message drain. Any message that is replied to here gets destroyed
930  * and the memory freed.
931  */
932 static void
933 devfs_msg_autofree_reply(lwkt_port_t port, lwkt_msg_t msg)
934 {
935 	devfs_msg_put((devfs_msg_t)msg);
936 }
937 
938 /*
939  * devfs_msg_get allocates a new devfs msg and returns it.
940  */
941 devfs_msg_t
942 devfs_msg_get(void)
943 {
944 	return objcache_get(devfs_msg_cache, M_WAITOK);
945 }
946 
947 /*
948  * devfs_msg_put deallocates a given devfs msg.
949  */
950 int
951 devfs_msg_put(devfs_msg_t msg)
952 {
953 	objcache_put(devfs_msg_cache, msg);
954 	return 0;
955 }
956 
957 /*
958  * devfs_msg_send is the generic asynchronous message sending facility
959  * for devfs. By default the reply port is the automatic disposal port.
960  *
961  * If the current thread is the devfs_msg_port thread we execute the
962  * operation synchronously.
963  */
964 void
965 devfs_msg_send(uint32_t cmd, devfs_msg_t devfs_msg)
966 {
967 	lwkt_port_t port = &devfs_msg_port;
968 
969 	lwkt_initmsg(&devfs_msg->hdr, &devfs_dispose_port, 0);
970 
971 	devfs_msg->hdr.u.ms_result = cmd;
972 
973 	if (port->mpu_td == curthread) {
974 		devfs_msg_exec(devfs_msg);
975 		lwkt_replymsg(&devfs_msg->hdr, 0);
976 	} else {
977 		lwkt_sendmsg(port, (lwkt_msg_t)devfs_msg);
978 	}
979 }
980 
981 /*
982  * devfs_msg_send_sync is the generic synchronous message sending
983  * facility for devfs. It initializes a local reply port and waits
984  * for the core's answer. This answer is then returned.
985  */
986 devfs_msg_t
987 devfs_msg_send_sync(uint32_t cmd, devfs_msg_t devfs_msg)
988 {
989 	struct lwkt_port rep_port;
990 	devfs_msg_t	msg_incoming;
991 	lwkt_port_t port = &devfs_msg_port;
992 
993 	lwkt_initport_thread(&rep_port, curthread);
994 	lwkt_initmsg(&devfs_msg->hdr, &rep_port, 0);
995 
996 	devfs_msg->hdr.u.ms_result = cmd;
997 
998 	lwkt_sendmsg(port, (lwkt_msg_t)devfs_msg);
999 	msg_incoming = lwkt_waitport(&rep_port, 0);
1000 
1001 	return msg_incoming;
1002 }
1003 
1004 /*
1005  * sends a message with a generic argument.
1006  */
1007 void
1008 devfs_msg_send_generic(uint32_t cmd, void *load)
1009 {
1010 	devfs_msg_t devfs_msg = devfs_msg_get();
1011 
1012 	devfs_msg->mdv_load = load;
1013 	devfs_msg_send(cmd, devfs_msg);
1014 }
1015 
1016 /*
1017  * sends a message with a name argument.
1018  */
1019 void
1020 devfs_msg_send_name(uint32_t cmd, char *name)
1021 {
1022 	devfs_msg_t devfs_msg = devfs_msg_get();
1023 
1024 	devfs_msg->mdv_name = name;
1025 	devfs_msg_send(cmd, devfs_msg);
1026 }
1027 
1028 /*
1029  * sends a message with a mount argument.
1030  */
1031 void
1032 devfs_msg_send_mount(uint32_t cmd, struct devfs_mnt_data *mnt)
1033 {
1034 	devfs_msg_t devfs_msg = devfs_msg_get();
1035 
1036 	devfs_msg->mdv_mnt = mnt;
1037 	devfs_msg_send(cmd, devfs_msg);
1038 }
1039 
1040 /*
1041  * sends a message with an ops argument.
1042  */
1043 void
1044 devfs_msg_send_ops(uint32_t cmd, struct dev_ops *ops, int minor)
1045 {
1046 	devfs_msg_t devfs_msg = devfs_msg_get();
1047 
1048 	devfs_msg->mdv_ops.ops = ops;
1049 	devfs_msg->mdv_ops.minor = minor;
1050 	devfs_msg_send(cmd, devfs_msg);
1051 }
1052 
1053 /*
1054  * sends a message with a clone handler argument.
1055  */
1056 void
1057 devfs_msg_send_chandler(uint32_t cmd, char *name, d_clone_t handler)
1058 {
1059 	devfs_msg_t devfs_msg = devfs_msg_get();
1060 
1061 	devfs_msg->mdv_chandler.name = name;
1062 	devfs_msg->mdv_chandler.nhandler = handler;
1063 	devfs_msg_send(cmd, devfs_msg);
1064 }
1065 
1066 /*
1067  * sends a message with a device argument.
1068  */
1069 void
1070 devfs_msg_send_dev(uint32_t cmd, cdev_t dev, uid_t uid, gid_t gid, int perms)
1071 {
1072 	devfs_msg_t devfs_msg = devfs_msg_get();
1073 
1074 	devfs_msg->mdv_dev.dev = dev;
1075 	devfs_msg->mdv_dev.uid = uid;
1076 	devfs_msg->mdv_dev.gid = gid;
1077 	devfs_msg->mdv_dev.perms = perms;
1078 
1079 	devfs_msg_send(cmd, devfs_msg);
1080 }
1081 
1082 /*
1083  * sends a message with a link argument.
1084  */
1085 void
1086 devfs_msg_send_link(uint32_t cmd, char *name, char *target, struct mount *mp)
1087 {
1088 	devfs_msg_t devfs_msg = devfs_msg_get();
1089 
1090 	devfs_msg->mdv_link.name = name;
1091 	devfs_msg->mdv_link.target = target;
1092 	devfs_msg->mdv_link.mp = mp;
1093 	devfs_msg_send(cmd, devfs_msg);
1094 }
1095 
1096 /*
1097  * devfs_msg_core is the main devfs thread. It handles all incoming messages
1098  * and calls the relevant worker functions. By using messages it's assured
1099  * that events occur in the correct order.
1100  */
1101 static void
1102 devfs_msg_core(void *arg)
1103 {
1104 	devfs_msg_t msg;
1105 
1106 	lwkt_initport_thread(&devfs_msg_port, curthread);
1107 
1108 	lockmgr(&devfs_lock, LK_EXCLUSIVE);
1109 	devfs_run = 1;
1110 	wakeup(td_core);
1111 	lockmgr(&devfs_lock, LK_RELEASE);
1112 
1113 	get_mplock();	/* mpsafe yet? */
1114 
1115 	while (devfs_run) {
1116 		msg = (devfs_msg_t)lwkt_waitport(&devfs_msg_port, 0);
1117 		devfs_debug(DEVFS_DEBUG_DEBUG,
1118 				"devfs_msg_core, new msg: %x\n",
1119 				(unsigned int)msg->hdr.u.ms_result);
1120 		devfs_msg_exec(msg);
1121 		lwkt_replymsg(&msg->hdr, 0);
1122 	}
1123 
1124 	rel_mplock();
1125 	wakeup(td_core);
1126 
1127 	lwkt_exit();
1128 }
1129 
1130 static void
1131 devfs_msg_exec(devfs_msg_t msg)
1132 {
1133 	struct devfs_mnt_data *mnt;
1134 	struct devfs_node *node;
1135 	cdev_t	dev;
1136 
1137 	/*
1138 	 * Acquire the devfs lock to ensure safety of all called functions
1139 	 */
1140 	lockmgr(&devfs_lock, LK_EXCLUSIVE);
1141 
1142 	switch (msg->hdr.u.ms_result) {
1143 	case DEVFS_DEVICE_CREATE:
1144 		dev = msg->mdv_dev.dev;
1145 		devfs_create_dev_worker(dev,
1146 					msg->mdv_dev.uid,
1147 					msg->mdv_dev.gid,
1148 					msg->mdv_dev.perms);
1149 		break;
1150 	case DEVFS_DEVICE_DESTROY:
1151 		dev = msg->mdv_dev.dev;
1152 		devfs_destroy_dev_worker(dev);
1153 		break;
1154 	case DEVFS_DESTROY_RELATED:
1155 		devfs_destroy_related_worker(msg->mdv_load);
1156 		break;
1157 	case DEVFS_DESTROY_DEV_BY_OPS:
1158 		devfs_destroy_dev_by_ops_worker(msg->mdv_ops.ops,
1159 						msg->mdv_ops.minor);
1160 		break;
1161 	case DEVFS_CREATE_ALL_DEV:
1162 		node = (struct devfs_node *)msg->mdv_load;
1163 		devfs_create_all_dev_worker(node);
1164 		break;
1165 	case DEVFS_MOUNT_ADD:
1166 		mnt = msg->mdv_mnt;
1167 		TAILQ_INSERT_TAIL(&devfs_mnt_list, mnt, link);
1168 		devfs_create_all_dev_worker(mnt->root_node);
1169 		break;
1170 	case DEVFS_MOUNT_DEL:
1171 		mnt = msg->mdv_mnt;
1172 		TAILQ_REMOVE(&devfs_mnt_list, mnt, link);
1173 		devfs_iterate_topology(mnt->root_node, devfs_reaperp_callback,
1174 				       NULL);
1175 		if (mnt->leak_count) {
1176 			devfs_debug(DEVFS_DEBUG_SHOW,
1177 				    "Leaked %ld devfs_node elements!\n",
1178 				    mnt->leak_count);
1179 		}
1180 		break;
1181 	case DEVFS_CHANDLER_ADD:
1182 		devfs_chandler_add_worker(msg->mdv_chandler.name,
1183 				msg->mdv_chandler.nhandler);
1184 		break;
1185 	case DEVFS_CHANDLER_DEL:
1186 		devfs_chandler_del_worker(msg->mdv_chandler.name);
1187 		break;
1188 	case DEVFS_FIND_DEVICE_BY_NAME:
1189 		devfs_find_device_by_name_worker(msg);
1190 		break;
1191 	case DEVFS_FIND_DEVICE_BY_UDEV:
1192 		devfs_find_device_by_udev_worker(msg);
1193 		break;
1194 	case DEVFS_MAKE_ALIAS:
1195 		devfs_make_alias_worker((struct devfs_alias *)msg->mdv_load);
1196 		break;
1197 	case DEVFS_DESTROY_ALIAS:
1198 		devfs_destroy_alias_worker((struct devfs_alias *)msg->mdv_load);
1199 		break;
1200 	case DEVFS_APPLY_RULES:
1201 		devfs_apply_reset_rules_caller(msg->mdv_name, 1);
1202 		break;
1203 	case DEVFS_RESET_RULES:
1204 		devfs_apply_reset_rules_caller(msg->mdv_name, 0);
1205 		break;
1206 	case DEVFS_SCAN_CALLBACK:
1207 		devfs_scan_callback_worker((devfs_scan_t *)msg->mdv_load,
1208 			msg->mdv_load2);
1209 		break;
1210 	case DEVFS_CLR_RELATED_FLAG:
1211 		devfs_clr_related_flag_worker(msg->mdv_flags.dev,
1212 				msg->mdv_flags.flag);
1213 		break;
1214 	case DEVFS_DESTROY_RELATED_WO_FLAG:
1215 		devfs_destroy_related_without_flag_worker(msg->mdv_flags.dev,
1216 				msg->mdv_flags.flag);
1217 		break;
1218 	case DEVFS_INODE_TO_VNODE:
1219 		msg->mdv_ino.vp = devfs_iterate_topology(
1220 			DEVFS_MNTDATA(msg->mdv_ino.mp)->root_node,
1221 			(devfs_iterate_callback_t *)devfs_inode_to_vnode_worker_callback,
1222 			&msg->mdv_ino.ino);
1223 		break;
1224 	case DEVFS_TERMINATE_CORE:
1225 		devfs_run = 0;
1226 		break;
1227 	case DEVFS_SYNC:
1228 		break;
1229 	default:
1230 		devfs_debug(DEVFS_DEBUG_WARNING,
1231 			    "devfs_msg_core: unknown message "
1232 			    "received at core\n");
1233 		break;
1234 	}
1235 	lockmgr(&devfs_lock, LK_RELEASE);
1236 }
1237 
1238 /*
1239  * Worker function to insert a new dev into the dev list and initialize its
1240  * permissions. It also calls devfs_propagate_dev which in turn propagates
1241  * the change to all mount points.
1242  *
1243  * The passed dev is already referenced.  This reference is eaten by this
1244  * function and represents the dev's linkage into devfs_dev_list.
1245  */
1246 static int
1247 devfs_create_dev_worker(cdev_t dev, uid_t uid, gid_t gid, int perms)
1248 {
1249 	KKASSERT(dev);
1250 
1251 	dev->si_uid = uid;
1252 	dev->si_gid = gid;
1253 	dev->si_perms = perms;
1254 
1255 	devfs_link_dev(dev);
1256 	devfs_propagate_dev(dev, 1);
1257 
1258 	udev_event_attach(dev, NULL, 0);
1259 
1260 	return 0;
1261 }
1262 
1263 /*
1264  * Worker function to delete a dev from the dev list and free the cdev.
1265  * It also calls devfs_propagate_dev which in turn propagates the change
1266  * to all mount points.
1267  */
1268 static int
1269 devfs_destroy_dev_worker(cdev_t dev)
1270 {
1271 	int error;
1272 
1273 	KKASSERT(dev);
1274 	KKASSERT((lockstatus(&devfs_lock, curthread)) == LK_EXCLUSIVE);
1275 
1276 	error = devfs_unlink_dev(dev);
1277 	devfs_propagate_dev(dev, 0);
1278 
1279 	udev_event_detach(dev, NULL, 0);
1280 
1281 	if (error == 0)
1282 		release_dev(dev);	/* link ref */
1283 	release_dev(dev);
1284 	release_dev(dev);
1285 
1286 	return 0;
1287 }
1288 
1289 /*
1290  * Worker function to destroy all devices with a certain basename.
1291  * Calls devfs_destroy_dev_worker for the actual destruction.
1292  */
1293 static int
1294 devfs_destroy_related_worker(cdev_t needle)
1295 {
1296 	cdev_t dev;
1297 
1298 restart:
1299 	devfs_debug(DEVFS_DEBUG_DEBUG, "related worker: %s\n",
1300 	    needle->si_name);
1301 	TAILQ_FOREACH(dev, &devfs_dev_list, link) {
1302 		if (dev->si_parent == needle) {
1303 			devfs_destroy_related_worker(dev);
1304 			devfs_destroy_dev_worker(dev);
1305 			goto restart;
1306 		}
1307 	}
1308 	return 0;
1309 }
1310 
1311 static int
1312 devfs_clr_related_flag_worker(cdev_t needle, uint32_t flag)
1313 {
1314 	cdev_t dev, dev1;
1315 
1316 	TAILQ_FOREACH_MUTABLE(dev, &devfs_dev_list, link, dev1) {
1317 		if (dev->si_parent == needle) {
1318 			devfs_clr_related_flag_worker(dev, flag);
1319 			dev->si_flags &= ~flag;
1320 		}
1321 	}
1322 
1323 	return 0;
1324 }
1325 
1326 static int
1327 devfs_destroy_related_without_flag_worker(cdev_t needle, uint32_t flag)
1328 {
1329 	cdev_t dev;
1330 
1331 restart:
1332 	devfs_debug(DEVFS_DEBUG_DEBUG, "related_wo_flag: %s\n",
1333 	    needle->si_name);
1334 
1335 	TAILQ_FOREACH(dev, &devfs_dev_list, link) {
1336 		if (dev->si_parent == needle) {
1337 			devfs_destroy_related_without_flag_worker(dev, flag);
1338 			if (!(dev->si_flags & flag)) {
1339 				devfs_destroy_dev_worker(dev);
1340 				devfs_debug(DEVFS_DEBUG_DEBUG,
1341 				    "related_wo_flag: %s restart\n", dev->si_name);
1342 				goto restart;
1343 			}
1344 		}
1345 	}
1346 
1347 	return 0;
1348 }
1349 
1350 /*
1351  * Worker function that creates all device nodes on top of a devfs
1352  * root node.
1353  */
1354 static int
1355 devfs_create_all_dev_worker(struct devfs_node *root)
1356 {
1357 	cdev_t dev;
1358 
1359 	KKASSERT(root);
1360 
1361 	TAILQ_FOREACH(dev, &devfs_dev_list, link) {
1362 		devfs_create_device_node(root, dev, NULL, NULL);
1363 	}
1364 
1365 	return 0;
1366 }
1367 
1368 /*
1369  * Worker function that destroys all devices that match a specific
1370  * dev_ops and/or minor. If minor is less than 0, it is not matched
1371  * against. It also propagates all changes.
1372  */
1373 static int
1374 devfs_destroy_dev_by_ops_worker(struct dev_ops *ops, int minor)
1375 {
1376 	cdev_t dev, dev1;
1377 
1378 	KKASSERT(ops);
1379 
1380 	TAILQ_FOREACH_MUTABLE(dev, &devfs_dev_list, link, dev1) {
1381 		if (dev->si_ops != ops)
1382 			continue;
1383 		if ((minor < 0) || (dev->si_uminor == minor)) {
1384 			devfs_destroy_dev_worker(dev);
1385 		}
1386 	}
1387 
1388 	return 0;
1389 }
1390 
1391 /*
1392  * Worker function that registers a new clone handler in devfs.
1393  */
1394 static int
1395 devfs_chandler_add_worker(const char *name, d_clone_t *nhandler)
1396 {
1397 	struct devfs_clone_handler *chandler = NULL;
1398 	u_char len = strlen(name);
1399 
1400 	if (len == 0)
1401 		return 1;
1402 
1403 	TAILQ_FOREACH(chandler, &devfs_chandler_list, link) {
1404 		if (chandler->namlen != len)
1405 			continue;
1406 
1407 		if (!memcmp(chandler->name, name, len)) {
1408 			/* Clonable basename already exists */
1409 			return 1;
1410 		}
1411 	}
1412 
1413 	chandler = kmalloc(sizeof(*chandler), M_DEVFS, M_WAITOK | M_ZERO);
1414 	chandler->name = kstrdup(name, M_DEVFS);
1415 	chandler->namlen = len;
1416 	chandler->nhandler = nhandler;
1417 
1418 	TAILQ_INSERT_TAIL(&devfs_chandler_list, chandler, link);
1419 	return 0;
1420 }
1421 
1422 /*
1423  * Worker function that removes a given clone handler from the
1424  * clone handler list.
1425  */
1426 static int
1427 devfs_chandler_del_worker(const char *name)
1428 {
1429 	struct devfs_clone_handler *chandler, *chandler2;
1430 	u_char len = strlen(name);
1431 
1432 	if (len == 0)
1433 		return 1;
1434 
1435 	TAILQ_FOREACH_MUTABLE(chandler, &devfs_chandler_list, link, chandler2) {
1436 		if (chandler->namlen != len)
1437 			continue;
1438 		if (memcmp(chandler->name, name, len))
1439 			continue;
1440 
1441 		TAILQ_REMOVE(&devfs_chandler_list, chandler, link);
1442 		kfree(chandler->name, M_DEVFS);
1443 		kfree(chandler, M_DEVFS);
1444 		break;
1445 	}
1446 
1447 	return 0;
1448 }
1449 
1450 /*
1451  * Worker function that finds a given device name and changes
1452  * the message received accordingly so that when replied to,
1453  * the answer is returned to the caller.
1454  */
1455 static int
1456 devfs_find_device_by_name_worker(devfs_msg_t devfs_msg)
1457 {
1458 	struct devfs_alias *alias;
1459 	cdev_t dev;
1460 	cdev_t found = NULL;
1461 
1462 	TAILQ_FOREACH(dev, &devfs_dev_list, link) {
1463 		if (strcmp(devfs_msg->mdv_name, dev->si_name) == 0) {
1464 			found = dev;
1465 			break;
1466 		}
1467 	}
1468 	if (found == NULL) {
1469 		TAILQ_FOREACH(alias, &devfs_alias_list, link) {
1470 			if (strcmp(devfs_msg->mdv_name, alias->name) == 0) {
1471 				found = alias->dev_target;
1472 				break;
1473 			}
1474 		}
1475 	}
1476 	devfs_msg->mdv_cdev = found;
1477 
1478 	return 0;
1479 }
1480 
1481 /*
1482  * Worker function that finds a given device udev and changes
1483  * the message received accordingly so that when replied to,
1484  * the answer is returned to the caller.
1485  */
1486 static int
1487 devfs_find_device_by_udev_worker(devfs_msg_t devfs_msg)
1488 {
1489 	cdev_t dev, dev1;
1490 	cdev_t found = NULL;
1491 
1492 	TAILQ_FOREACH_MUTABLE(dev, &devfs_dev_list, link, dev1) {
1493 		if (((udev_t)dev->si_inode) == devfs_msg->mdv_udev) {
1494 			found = dev;
1495 			break;
1496 		}
1497 	}
1498 	devfs_msg->mdv_cdev = found;
1499 
1500 	return 0;
1501 }
1502 
1503 /*
1504  * Worker function that inserts a given alias into the
1505  * alias list, and propagates the alias to all mount
1506  * points.
1507  */
1508 static int
1509 devfs_make_alias_worker(struct devfs_alias *alias)
1510 {
1511 	struct devfs_alias *alias2;
1512 	size_t len = strlen(alias->name);
1513 	int found = 0;
1514 
1515 	TAILQ_FOREACH(alias2, &devfs_alias_list, link) {
1516 		if (len != alias2->namlen)
1517 			continue;
1518 
1519 		if (!memcmp(alias->name, alias2->name, len)) {
1520 			found = 1;
1521 			break;
1522 		}
1523 	}
1524 
1525 	if (!found) {
1526 		/*
1527 		 * The alias doesn't exist yet, so we add it to the alias list
1528 		 */
1529 		TAILQ_INSERT_TAIL(&devfs_alias_list, alias, link);
1530 		devfs_alias_propagate(alias, 0);
1531 		udev_event_attach(alias->dev_target, alias->name, 1);
1532 	} else {
1533 		devfs_debug(DEVFS_DEBUG_WARNING,
1534 			    "Warning: duplicate devfs_make_alias for %s\n",
1535 			    alias->name);
1536 		kfree(alias->name, M_DEVFS);
1537 		kfree(alias, M_DEVFS);
1538 	}
1539 
1540 	return 0;
1541 }
1542 
1543 /*
1544  * Worker function that delete a given alias from the
1545  * alias list, and propagates the removal to all mount
1546  * points.
1547  */
1548 static int
1549 devfs_destroy_alias_worker(struct devfs_alias *alias)
1550 {
1551 	struct devfs_alias *alias2;
1552 	int found = 0;
1553 
1554 	TAILQ_FOREACH(alias2, &devfs_alias_list, link) {
1555 		if (alias->dev_target != alias2->dev_target)
1556 			continue;
1557 
1558 		if (devfs_WildCmp(alias->name, alias2->name) == 0) {
1559 			found = 1;
1560 			break;
1561 		}
1562 	}
1563 
1564 	if (!found) {
1565 		devfs_debug(DEVFS_DEBUG_WARNING,
1566 		    "Warning: devfs_destroy_alias for inexistant alias: %s\n",
1567 		    alias->name);
1568 		kfree(alias->name, M_DEVFS);
1569 		kfree(alias, M_DEVFS);
1570 	} else {
1571 		/*
1572 		 * The alias exists, so we delete it from the alias list
1573 		 */
1574 		TAILQ_REMOVE(&devfs_alias_list, alias2, link);
1575 		devfs_alias_propagate(alias2, 1);
1576 		udev_event_detach(alias2->dev_target, alias2->name, 1);
1577 		kfree(alias->name, M_DEVFS);
1578 		kfree(alias, M_DEVFS);
1579 		kfree(alias2->name, M_DEVFS);
1580 		kfree(alias2, M_DEVFS);
1581 	}
1582 
1583 	return 0;
1584 }
1585 
1586 /*
1587  * Function that removes and frees all aliases.
1588  */
1589 static int
1590 devfs_alias_reap(void)
1591 {
1592 	struct devfs_alias *alias, *alias2;
1593 
1594 	TAILQ_FOREACH_MUTABLE(alias, &devfs_alias_list, link, alias2) {
1595 		TAILQ_REMOVE(&devfs_alias_list, alias, link);
1596 		kfree(alias->name, M_DEVFS);
1597 		kfree(alias, M_DEVFS);
1598 	}
1599 	return 0;
1600 }
1601 
1602 /*
1603  * Function that removes an alias matching a specific cdev and frees
1604  * it accordingly.
1605  */
1606 static int
1607 devfs_alias_remove(cdev_t dev)
1608 {
1609 	struct devfs_alias *alias, *alias2;
1610 
1611 	TAILQ_FOREACH_MUTABLE(alias, &devfs_alias_list, link, alias2) {
1612 		if (alias->dev_target == dev) {
1613 			TAILQ_REMOVE(&devfs_alias_list, alias, link);
1614 			udev_event_detach(alias->dev_target, alias->name, 1);
1615 			kfree(alias->name, M_DEVFS);
1616 			kfree(alias, M_DEVFS);
1617 		}
1618 	}
1619 	return 0;
1620 }
1621 
1622 /*
1623  * This function propagates an alias addition or removal to
1624  * all mount points.
1625  */
1626 static int
1627 devfs_alias_propagate(struct devfs_alias *alias, int remove)
1628 {
1629 	struct devfs_mnt_data *mnt;
1630 
1631 	TAILQ_FOREACH(mnt, &devfs_mnt_list, link) {
1632 		if (remove) {
1633 			devfs_destroy_node(mnt->root_node, alias->name);
1634 		} else {
1635 			devfs_alias_apply(mnt->root_node, alias);
1636 		}
1637 	}
1638 	return 0;
1639 }
1640 
1641 /*
1642  * This function is a recursive function iterating through
1643  * all device nodes in the topology and, if applicable,
1644  * creating the relevant alias for a device node.
1645  */
1646 static int
1647 devfs_alias_apply(struct devfs_node *node, struct devfs_alias *alias)
1648 {
1649 	struct devfs_node *node1, *node2;
1650 
1651 	KKASSERT(alias != NULL);
1652 
1653 	if ((node->node_type == Nroot) || (node->node_type == Ndir)) {
1654 		if (node->nchildren > 2) {
1655 			TAILQ_FOREACH_MUTABLE(node1, DEVFS_DENODE_HEAD(node), link, node2) {
1656 				devfs_alias_apply(node1, alias);
1657 			}
1658 		}
1659 	} else {
1660 		if (node->d_dev == alias->dev_target)
1661 			devfs_alias_create(alias->name, node, 0);
1662 	}
1663 	return 0;
1664 }
1665 
1666 /*
1667  * This function checks if any alias possibly is applicable
1668  * to the given node. If so, the alias is created.
1669  */
1670 static int
1671 devfs_alias_check_create(struct devfs_node *node)
1672 {
1673 	struct devfs_alias *alias;
1674 
1675 	TAILQ_FOREACH(alias, &devfs_alias_list, link) {
1676 		if (node->d_dev == alias->dev_target)
1677 			devfs_alias_create(alias->name, node, 0);
1678 	}
1679 	return 0;
1680 }
1681 
1682 /*
1683  * This function creates an alias with a given name
1684  * linking to a given devfs node. It also increments
1685  * the link count on the target node.
1686  */
1687 int
1688 devfs_alias_create(char *name_orig, struct devfs_node *target, int rule_based)
1689 {
1690 	struct mount *mp = target->mp;
1691 	struct devfs_node *parent = DEVFS_MNTDATA(mp)->root_node;
1692 	struct devfs_node *linknode;
1693 	char *create_path = NULL;
1694 	char *name;
1695 	char *name_buf;
1696 	int result = 0;
1697 
1698 	KKASSERT((lockstatus(&devfs_lock, curthread)) == LK_EXCLUSIVE);
1699 
1700 	name_buf = kmalloc(PATH_MAX, M_TEMP, M_WAITOK);
1701 	devfs_resolve_name_path(name_orig, name_buf, &create_path, &name);
1702 
1703 	if (create_path)
1704 		parent = devfs_resolve_or_create_path(parent, create_path, 1);
1705 
1706 
1707 	if (devfs_find_device_node_by_name(parent, name)) {
1708 		devfs_debug(DEVFS_DEBUG_WARNING,
1709 			    "Node already exists: %s "
1710 			    "(devfs_make_alias_worker)!\n",
1711 			    name);
1712 		result = 1;
1713 		goto done;
1714 	}
1715 
1716 	linknode = devfs_allocp(Nlink, name, parent, mp, NULL);
1717 	if (linknode == NULL) {
1718 		result = 1;
1719 		goto done;
1720 	}
1721 
1722 	linknode->link_target = target;
1723 	target->nlinks++;
1724 
1725 	if (rule_based)
1726 		linknode->flags |= DEVFS_RULE_CREATED;
1727 
1728 done:
1729 	kfree(name_buf, M_TEMP);
1730 	return (result);
1731 }
1732 
1733 /*
1734  * This function is called by the core and handles mount point
1735  * strings. It either calls the relevant worker (devfs_apply_
1736  * reset_rules_worker) on all mountpoints or only a specific
1737  * one.
1738  */
1739 static int
1740 devfs_apply_reset_rules_caller(char *mountto, int apply)
1741 {
1742 	struct devfs_mnt_data *mnt;
1743 
1744 	if (mountto[0] == '*') {
1745 		TAILQ_FOREACH(mnt, &devfs_mnt_list, link) {
1746 			devfs_iterate_topology(mnt->root_node,
1747 					(apply)?(devfs_rule_check_apply):(devfs_rule_reset_node),
1748 					NULL);
1749 		}
1750 	} else {
1751 		TAILQ_FOREACH(mnt, &devfs_mnt_list, link) {
1752 			if (!strcmp(mnt->mp->mnt_stat.f_mntonname, mountto)) {
1753 				devfs_iterate_topology(mnt->root_node,
1754 					(apply)?(devfs_rule_check_apply):(devfs_rule_reset_node),
1755 					NULL);
1756 				break;
1757 			}
1758 		}
1759 	}
1760 
1761 	kfree(mountto, M_DEVFS);
1762 	return 0;
1763 }
1764 
1765 /*
1766  * This function calls a given callback function for
1767  * every dev node in the devfs dev list.
1768  */
1769 static int
1770 devfs_scan_callback_worker(devfs_scan_t *callback, void *arg)
1771 {
1772 	cdev_t dev, dev1;
1773 	struct devfs_alias *alias, *alias1;
1774 
1775 	TAILQ_FOREACH_MUTABLE(dev, &devfs_dev_list, link, dev1) {
1776 		callback(dev->si_name, dev, false, arg);
1777 	}
1778 	TAILQ_FOREACH_MUTABLE(alias, &devfs_alias_list, link, alias1) {
1779 		callback(alias->name, alias->dev_target, true, arg);
1780 	}
1781 
1782 	return 0;
1783 }
1784 
1785 /*
1786  * This function tries to resolve a given directory, or if not
1787  * found and creation requested, creates the given directory.
1788  */
1789 static struct devfs_node *
1790 devfs_resolve_or_create_dir(struct devfs_node *parent, char *dir_name,
1791 			    size_t name_len, int create)
1792 {
1793 	struct devfs_node *node, *found = NULL;
1794 
1795 	TAILQ_FOREACH(node, DEVFS_DENODE_HEAD(parent), link) {
1796 		if (name_len != node->d_dir.d_namlen)
1797 			continue;
1798 
1799 		if (!memcmp(dir_name, node->d_dir.d_name, name_len)) {
1800 			found = node;
1801 			break;
1802 		}
1803 	}
1804 
1805 	if ((found == NULL) && (create)) {
1806 		found = devfs_allocp(Ndir, dir_name, parent, parent->mp, NULL);
1807 	}
1808 
1809 	return found;
1810 }
1811 
1812 /*
1813  * This function tries to resolve a complete path. If creation is requested,
1814  * if a given part of the path cannot be resolved (because it doesn't exist),
1815  * it is created.
1816  */
1817 struct devfs_node *
1818 devfs_resolve_or_create_path(struct devfs_node *parent, char *path, int create)
1819 {
1820 	struct devfs_node *node = parent;
1821 	char *buf;
1822 	size_t idx = 0;
1823 
1824 	if (path == NULL)
1825 		return parent;
1826 
1827 	buf = kmalloc(PATH_MAX, M_TEMP, M_WAITOK);
1828 
1829 	while (*path && idx < PATH_MAX - 1) {
1830 		if (*path != '/') {
1831 			buf[idx++] = *path;
1832 		} else {
1833 			buf[idx] = '\0';
1834 			node = devfs_resolve_or_create_dir(node, buf, idx, create);
1835 			if (node == NULL) {
1836 				kfree(buf, M_TEMP);
1837 				return NULL;
1838 			}
1839 			idx = 0;
1840 		}
1841 		++path;
1842 	}
1843 	buf[idx] = '\0';
1844 	node = devfs_resolve_or_create_dir(node, buf, idx, create);
1845 	kfree (buf, M_TEMP);
1846 	return (node);
1847 }
1848 
1849 /*
1850  * Takes a full path and strips it into a directory path and a name.
1851  * For a/b/c/foo, it returns foo in namep and a/b/c in pathp. It
1852  * requires a working buffer with enough size to keep the whole
1853  * fullpath.
1854  */
1855 int
1856 devfs_resolve_name_path(char *fullpath, char *buf, char **pathp, char **namep)
1857 {
1858 	char *name = NULL;
1859 	char *path = NULL;
1860 	size_t len = strlen(fullpath) + 1;
1861 	int i;
1862 
1863 	KKASSERT((fullpath != NULL) && (buf != NULL));
1864 	KKASSERT((pathp != NULL) && (namep != NULL));
1865 
1866 	memcpy(buf, fullpath, len);
1867 
1868 	for (i = len-1; i>= 0; i--) {
1869 		if (buf[i] == '/') {
1870 			buf[i] = '\0';
1871 			name = &(buf[i+1]);
1872 			path = buf;
1873 			break;
1874 		}
1875 	}
1876 
1877 	*pathp = path;
1878 
1879 	if (name) {
1880 		*namep = name;
1881 	} else {
1882 		*namep = buf;
1883 	}
1884 
1885 	return 0;
1886 }
1887 
1888 /*
1889  * This function creates a new devfs node for a given device.  It can
1890  * handle a complete path as device name, and accordingly creates
1891  * the path and the final device node.
1892  *
1893  * The reference count on the passed dev remains unchanged.
1894  */
1895 struct devfs_node *
1896 devfs_create_device_node(struct devfs_node *root, cdev_t dev,
1897 			 char *dev_name, char *path_fmt, ...)
1898 {
1899 	struct devfs_node *parent, *node = NULL;
1900 	char *path = NULL;
1901 	char *name;
1902 	char *name_buf;
1903 	__va_list ap;
1904 	int i, found;
1905 	char *create_path = NULL;
1906 	char *names = "pqrsPQRS";
1907 
1908 	name_buf = kmalloc(PATH_MAX, M_TEMP, M_WAITOK);
1909 
1910 	if (path_fmt != NULL) {
1911 		__va_start(ap, path_fmt);
1912 		kvasnrprintf(&path, PATH_MAX, 10, path_fmt, ap);
1913 		__va_end(ap);
1914 	}
1915 
1916 	parent = devfs_resolve_or_create_path(root, path, 1);
1917 	KKASSERT(parent);
1918 
1919 	devfs_resolve_name_path(
1920 			((dev_name == NULL) && (dev))?(dev->si_name):(dev_name),
1921 			name_buf, &create_path, &name);
1922 
1923 	if (create_path)
1924 		parent = devfs_resolve_or_create_path(parent, create_path, 1);
1925 
1926 
1927 	if (devfs_find_device_node_by_name(parent, name)) {
1928 		devfs_debug(DEVFS_DEBUG_WARNING, "devfs_create_device_node: "
1929 			"DEVICE %s ALREADY EXISTS!!! Ignoring creation request.\n", name);
1930 		goto out;
1931 	}
1932 
1933 	node = devfs_allocp(Ndev, name, parent, parent->mp, dev);
1934 	nanotime(&parent->mtime);
1935 
1936 	/*
1937 	 * Ugly unix98 pty magic, to hide pty master (ptm) devices and their
1938 	 * directory
1939 	 */
1940 	if ((dev) && (strlen(dev->si_name) >= 4) &&
1941 			(!memcmp(dev->si_name, "ptm/", 4))) {
1942 		node->parent->flags |= DEVFS_HIDDEN;
1943 		node->flags |= DEVFS_HIDDEN;
1944 	}
1945 
1946 	/*
1947 	 * Ugly pty magic, to tag pty devices as such and hide them if needed.
1948 	 */
1949 	if ((strlen(name) >= 3) && (!memcmp(name, "pty", 3)))
1950 		node->flags |= (DEVFS_PTY | DEVFS_INVISIBLE);
1951 
1952 	if ((strlen(name) >= 3) && (!memcmp(name, "tty", 3))) {
1953 		found = 0;
1954 		for (i = 0; i < strlen(names); i++) {
1955 			if (name[3] == names[i]) {
1956 				found = 1;
1957 				break;
1958 			}
1959 		}
1960 		if (found)
1961 			node->flags |= (DEVFS_PTY | DEVFS_INVISIBLE);
1962 	}
1963 
1964 out:
1965 	kfree(name_buf, M_TEMP);
1966 	kvasfree(&path);
1967 	return node;
1968 }
1969 
1970 /*
1971  * This function finds a given device node in the topology with a given
1972  * cdev.
1973  */
1974 void *
1975 devfs_find_device_node_callback(struct devfs_node *node, cdev_t target)
1976 {
1977 	if ((node->node_type == Ndev) && (node->d_dev == target)) {
1978 		return node;
1979 	}
1980 
1981 	return NULL;
1982 }
1983 
1984 /*
1985  * This function finds a device node in the given parent directory by its
1986  * name and returns it.
1987  */
1988 struct devfs_node *
1989 devfs_find_device_node_by_name(struct devfs_node *parent, char *target)
1990 {
1991 	struct devfs_node *node, *found = NULL;
1992 	size_t len = strlen(target);
1993 
1994 	TAILQ_FOREACH(node, DEVFS_DENODE_HEAD(parent), link) {
1995 		if (len != node->d_dir.d_namlen)
1996 			continue;
1997 
1998 		if (!memcmp(node->d_dir.d_name, target, len)) {
1999 			found = node;
2000 			break;
2001 		}
2002 	}
2003 
2004 	return found;
2005 }
2006 
2007 static void *
2008 devfs_inode_to_vnode_worker_callback(struct devfs_node *node, ino_t *inop)
2009 {
2010 	struct vnode *vp = NULL;
2011 	ino_t target = *inop;
2012 
2013 	if (node->d_dir.d_ino == target) {
2014 		if (node->v_node) {
2015 			vp = node->v_node;
2016 			vget(vp, LK_EXCLUSIVE | LK_RETRY);
2017 			vn_unlock(vp);
2018 		} else {
2019 			devfs_allocv(&vp, node);
2020 			vn_unlock(vp);
2021 		}
2022 	}
2023 
2024 	return vp;
2025 }
2026 
2027 /*
2028  * This function takes a cdev and removes its devfs node in the
2029  * given topology.  The cdev remains intact.
2030  */
2031 int
2032 devfs_destroy_device_node(struct devfs_node *root, cdev_t target)
2033 {
2034 	KKASSERT(target != NULL);
2035 	return devfs_destroy_node(root, target->si_name);
2036 }
2037 
2038 /*
2039  * This function takes a path to a devfs node, resolves it and
2040  * removes the devfs node from the given topology.
2041  */
2042 int
2043 devfs_destroy_node(struct devfs_node *root, char *target)
2044 {
2045 	struct devfs_node *node, *parent;
2046 	char *name;
2047 	char *name_buf;
2048 	char *create_path = NULL;
2049 
2050 	KKASSERT(target);
2051 
2052 	name_buf = kmalloc(PATH_MAX, M_TEMP, M_WAITOK);
2053 	ksnprintf(name_buf, PATH_MAX, "%s", target);
2054 
2055 	devfs_resolve_name_path(target, name_buf, &create_path, &name);
2056 
2057 	if (create_path)
2058 		parent = devfs_resolve_or_create_path(root, create_path, 0);
2059 	else
2060 		parent = root;
2061 
2062 	if (parent == NULL) {
2063 		kfree(name_buf, M_TEMP);
2064 		return 1;
2065 	}
2066 
2067 	node = devfs_find_device_node_by_name(parent, name);
2068 
2069 	if (node) {
2070 		nanotime(&node->parent->mtime);
2071 		devfs_gc(node);
2072 	}
2073 
2074 	kfree(name_buf, M_TEMP);
2075 
2076 	return 0;
2077 }
2078 
2079 /*
2080  * Just set perms and ownership for given node.
2081  */
2082 int
2083 devfs_set_perms(struct devfs_node *node, uid_t uid, gid_t gid,
2084 		u_short mode, u_long flags)
2085 {
2086 	node->mode = mode;
2087 	node->uid = uid;
2088 	node->gid = gid;
2089 
2090 	return 0;
2091 }
2092 
2093 /*
2094  * Propagates a device attach/detach to all mount
2095  * points. Also takes care of automatic alias removal
2096  * for a deleted cdev.
2097  */
2098 static int
2099 devfs_propagate_dev(cdev_t dev, int attach)
2100 {
2101 	struct devfs_mnt_data *mnt;
2102 
2103 	TAILQ_FOREACH(mnt, &devfs_mnt_list, link) {
2104 		if (attach) {
2105 			/* Device is being attached */
2106 			devfs_create_device_node(mnt->root_node, dev,
2107 						 NULL, NULL );
2108 		} else {
2109 			/* Device is being detached */
2110 			devfs_alias_remove(dev);
2111 			devfs_destroy_device_node(mnt->root_node, dev);
2112 		}
2113 	}
2114 	return 0;
2115 }
2116 
2117 /*
2118  * devfs_clone either returns a basename from a complete name by
2119  * returning the length of the name without trailing digits, or,
2120  * if clone != 0, calls the device's clone handler to get a new
2121  * device, which in turn is returned in devp.
2122  */
2123 cdev_t
2124 devfs_clone(cdev_t dev, const char *name, size_t len, int mode,
2125 		struct ucred *cred)
2126 {
2127 	int error;
2128 	struct devfs_clone_handler *chandler;
2129 	struct dev_clone_args ap;
2130 
2131 	TAILQ_FOREACH(chandler, &devfs_chandler_list, link) {
2132 		if (chandler->namlen != len)
2133 			continue;
2134 		if ((!memcmp(chandler->name, name, len)) && (chandler->nhandler)) {
2135 			lockmgr(&devfs_lock, LK_RELEASE);
2136 			devfs_config();
2137 			lockmgr(&devfs_lock, LK_EXCLUSIVE);
2138 
2139 			ap.a_head.a_dev = dev;
2140 			ap.a_dev = NULL;
2141 			ap.a_name = name;
2142 			ap.a_namelen = len;
2143 			ap.a_mode = mode;
2144 			ap.a_cred = cred;
2145 			error = (chandler->nhandler)(&ap);
2146 			if (error)
2147 				continue;
2148 
2149 			return ap.a_dev;
2150 		}
2151 	}
2152 
2153 	return NULL;
2154 }
2155 
2156 
2157 /*
2158  * Registers a new orphan in the orphan list.
2159  */
2160 void
2161 devfs_tracer_add_orphan(struct devfs_node *node)
2162 {
2163 	struct devfs_orphan *orphan;
2164 
2165 	KKASSERT(node);
2166 	orphan = kmalloc(sizeof(struct devfs_orphan), M_DEVFS, M_WAITOK);
2167 	orphan->node = node;
2168 
2169 	KKASSERT((node->flags & DEVFS_ORPHANED) == 0);
2170 	node->flags |= DEVFS_ORPHANED;
2171 	TAILQ_INSERT_TAIL(DEVFS_ORPHANLIST(node->mp), orphan, link);
2172 }
2173 
2174 /*
2175  * Removes an orphan from the orphan list.
2176  */
2177 void
2178 devfs_tracer_del_orphan(struct devfs_node *node)
2179 {
2180 	struct devfs_orphan *orphan;
2181 
2182 	KKASSERT(node);
2183 
2184 	TAILQ_FOREACH(orphan, DEVFS_ORPHANLIST(node->mp), link)	{
2185 		if (orphan->node == node) {
2186 			node->flags &= ~DEVFS_ORPHANED;
2187 			TAILQ_REMOVE(DEVFS_ORPHANLIST(node->mp), orphan, link);
2188 			kfree(orphan, M_DEVFS);
2189 			break;
2190 		}
2191 	}
2192 }
2193 
2194 /*
2195  * Counts the orphans in the orphan list, and if cleanup
2196  * is specified, also frees the orphan and removes it from
2197  * the list.
2198  */
2199 size_t
2200 devfs_tracer_orphan_count(struct mount *mp, int cleanup)
2201 {
2202 	struct devfs_orphan *orphan, *orphan2;
2203 	size_t count = 0;
2204 
2205 	TAILQ_FOREACH_MUTABLE(orphan, DEVFS_ORPHANLIST(mp), link, orphan2)	{
2206 		count++;
2207 		/*
2208 		 * If we are instructed to clean up, we do so.
2209 		 */
2210 		if (cleanup) {
2211 			TAILQ_REMOVE(DEVFS_ORPHANLIST(mp), orphan, link);
2212 			orphan->node->flags &= ~DEVFS_ORPHANED;
2213 			devfs_freep(orphan->node);
2214 			kfree(orphan, M_DEVFS);
2215 		}
2216 	}
2217 
2218 	return count;
2219 }
2220 
2221 /*
2222  * Fetch an ino_t from the global d_ino by increasing it
2223  * while spinlocked.
2224  */
2225 static ino_t
2226 devfs_fetch_ino(void)
2227 {
2228 	ino_t	ret;
2229 
2230 	spin_lock(&ino_lock);
2231 	ret = d_ino++;
2232 	spin_unlock(&ino_lock);
2233 
2234 	return ret;
2235 }
2236 
2237 /*
2238  * Allocates a new cdev and initializes it's most basic
2239  * fields.
2240  */
2241 cdev_t
2242 devfs_new_cdev(struct dev_ops *ops, int minor, struct dev_ops *bops)
2243 {
2244 	cdev_t dev = sysref_alloc(&cdev_sysref_class);
2245 
2246 	sysref_activate(&dev->si_sysref);
2247 	reference_dev(dev);
2248 	bzero(dev, offsetof(struct cdev, si_sysref));
2249 
2250 	dev->si_uid = 0;
2251 	dev->si_gid = 0;
2252 	dev->si_perms = 0;
2253 	dev->si_drv1 = NULL;
2254 	dev->si_drv2 = NULL;
2255 	dev->si_lastread = 0;		/* time_second */
2256 	dev->si_lastwrite = 0;		/* time_second */
2257 
2258 	dev->si_dict = NULL;
2259 	dev->si_parent = NULL;
2260 	dev->si_ops = ops;
2261 	dev->si_flags = 0;
2262 	dev->si_uminor = minor;
2263 	dev->si_bops = bops;
2264 
2265 	/*
2266 	 * Since the disk subsystem is in the way, we need to
2267 	 * propagate the D_CANFREE from bops (and ops) to
2268 	 * si_flags.
2269 	 */
2270 	if (bops && (bops->head.flags & D_CANFREE)) {
2271 		dev->si_flags |= SI_CANFREE;
2272 	} else if (ops->head.flags & D_CANFREE) {
2273 		dev->si_flags |= SI_CANFREE;
2274 	}
2275 
2276 	/* If there is a backing device, we reference its ops */
2277 	dev->si_inode = makeudev(
2278 		    devfs_reference_ops((bops)?(bops):(ops)),
2279 		    minor );
2280 	dev->si_umajor = umajor(dev->si_inode);
2281 
2282 	return dev;
2283 }
2284 
2285 static void
2286 devfs_cdev_terminate(cdev_t dev)
2287 {
2288 	int locked = 0;
2289 
2290 	/* Check if it is locked already. if not, we acquire the devfs lock */
2291 	if ((lockstatus(&devfs_lock, curthread)) != LK_EXCLUSIVE) {
2292 		lockmgr(&devfs_lock, LK_EXCLUSIVE);
2293 		locked = 1;
2294 	}
2295 
2296 	/*
2297 	 * Make sure the node isn't linked anymore. Otherwise we've screwed
2298 	 * up somewhere, since normal devs are unlinked on the call to
2299 	 * destroy_dev and only-cdevs that have not been used for cloning
2300 	 * are not linked in the first place. only-cdevs used for cloning
2301 	 * will be linked in, too, and should only be destroyed via
2302 	 * destroy_dev, not destroy_only_dev, so we catch that problem, too.
2303 	 */
2304 	KKASSERT((dev->si_flags & SI_DEVFS_LINKED) == 0);
2305 
2306 	/* If we acquired the lock, we also get rid of it */
2307 	if (locked)
2308 		lockmgr(&devfs_lock, LK_RELEASE);
2309 
2310 	/* If there is a backing device, we release the backing device's ops */
2311 	devfs_release_ops((dev->si_bops)?(dev->si_bops):(dev->si_ops));
2312 
2313 	/* Finally destroy the device */
2314 	sysref_put(&dev->si_sysref);
2315 }
2316 
2317 /*
2318  * Dummies for now (individual locks for MPSAFE)
2319  */
2320 static void
2321 devfs_cdev_lock(cdev_t dev)
2322 {
2323 }
2324 
2325 static void
2326 devfs_cdev_unlock(cdev_t dev)
2327 {
2328 }
2329 
2330 static int
2331 devfs_detached_filter_eof(struct knote *kn, long hint)
2332 {
2333 	kn->kn_flags |= (EV_EOF | EV_NODATA);
2334 	return (1);
2335 }
2336 
2337 static void
2338 devfs_detached_filter_detach(struct knote *kn)
2339 {
2340 	cdev_t dev = (cdev_t)kn->kn_hook;
2341 
2342 	knote_remove(&dev->si_kqinfo.ki_note, kn);
2343 }
2344 
2345 static struct filterops devfs_detached_filterops =
2346 	{ FILTEROP_ISFD, NULL,
2347 	  devfs_detached_filter_detach,
2348 	  devfs_detached_filter_eof };
2349 
2350 /*
2351  * Delegates knote filter handling responsibility to devfs
2352  *
2353  * Any device that implements kqfilter event handling and could be detached
2354  * or shut down out from under the kevent subsystem must allow devfs to
2355  * assume responsibility for any knotes it may hold.
2356  */
2357 void
2358 devfs_assume_knotes(cdev_t dev, struct kqinfo *kqi)
2359 {
2360 	/*
2361 	 * Let kern/kern_event.c do the heavy lifting.
2362 	 */
2363 	knote_assume_knotes(kqi, &dev->si_kqinfo,
2364 			    &devfs_detached_filterops, (void *)dev);
2365 
2366 	/*
2367 	 * These should probably be activated individually, but doing so
2368 	 * would require refactoring kq's public in-kernel interface.
2369 	 */
2370 	KNOTE(&dev->si_kqinfo.ki_note, 0);
2371 }
2372 
2373 /*
2374  * Links a given cdev into the dev list.
2375  */
2376 int
2377 devfs_link_dev(cdev_t dev)
2378 {
2379 	KKASSERT((dev->si_flags & SI_DEVFS_LINKED) == 0);
2380 	dev->si_flags |= SI_DEVFS_LINKED;
2381 	TAILQ_INSERT_TAIL(&devfs_dev_list, dev, link);
2382 
2383 	return 0;
2384 }
2385 
2386 /*
2387  * Removes a given cdev from the dev list.  The caller is responsible for
2388  * releasing the reference on the device associated with the linkage.
2389  *
2390  * Returns EALREADY if the dev has already been unlinked.
2391  */
2392 static int
2393 devfs_unlink_dev(cdev_t dev)
2394 {
2395 	if ((dev->si_flags & SI_DEVFS_LINKED)) {
2396 		TAILQ_REMOVE(&devfs_dev_list, dev, link);
2397 		dev->si_flags &= ~SI_DEVFS_LINKED;
2398 		return (0);
2399 	}
2400 	return (EALREADY);
2401 }
2402 
2403 int
2404 devfs_node_is_accessible(struct devfs_node *node)
2405 {
2406 	if ((node) && (!(node->flags & DEVFS_HIDDEN)))
2407 		return 1;
2408 	else
2409 		return 0;
2410 }
2411 
2412 int
2413 devfs_reference_ops(struct dev_ops *ops)
2414 {
2415 	int unit;
2416 	struct devfs_dev_ops *found = NULL;
2417 	struct devfs_dev_ops *devops;
2418 
2419 	TAILQ_FOREACH(devops, &devfs_dev_ops_list, link) {
2420 		if (devops->ops == ops) {
2421 			found = devops;
2422 			break;
2423 		}
2424 	}
2425 
2426 	if (!found) {
2427 		found = kmalloc(sizeof(struct devfs_dev_ops), M_DEVFS, M_WAITOK);
2428 		found->ops = ops;
2429 		found->ref_count = 0;
2430 		TAILQ_INSERT_TAIL(&devfs_dev_ops_list, found, link);
2431 	}
2432 
2433 	KKASSERT(found);
2434 
2435 	if (found->ref_count == 0) {
2436 		found->id = devfs_clone_bitmap_get(&DEVFS_CLONE_BITMAP(ops_id), 255);
2437 		if (found->id == -1) {
2438 			/* Ran out of unique ids */
2439 			devfs_debug(DEVFS_DEBUG_WARNING,
2440 					"devfs_reference_ops: WARNING: ran out of unique ids\n");
2441 		}
2442 	}
2443 	unit = found->id;
2444 	++found->ref_count;
2445 
2446 	return unit;
2447 }
2448 
2449 void
2450 devfs_release_ops(struct dev_ops *ops)
2451 {
2452 	struct devfs_dev_ops *found = NULL;
2453 	struct devfs_dev_ops *devops;
2454 
2455 	TAILQ_FOREACH(devops, &devfs_dev_ops_list, link) {
2456 		if (devops->ops == ops) {
2457 			found = devops;
2458 			break;
2459 		}
2460 	}
2461 
2462 	KKASSERT(found);
2463 
2464 	--found->ref_count;
2465 
2466 	if (found->ref_count == 0) {
2467 		TAILQ_REMOVE(&devfs_dev_ops_list, found, link);
2468 		devfs_clone_bitmap_put(&DEVFS_CLONE_BITMAP(ops_id), found->id);
2469 		kfree(found, M_DEVFS);
2470 	}
2471 }
2472 
2473 /*
2474  * Wait for asynchronous messages to complete in the devfs helper
2475  * thread, then return.  Do nothing if the helper thread is dead
2476  * or we are being indirectly called from the helper thread itself.
2477  */
2478 void
2479 devfs_config(void)
2480 {
2481 	devfs_msg_t msg;
2482 
2483 	if (devfs_run && curthread != td_core) {
2484 		msg = devfs_msg_get();
2485 		msg = devfs_msg_send_sync(DEVFS_SYNC, msg);
2486 		devfs_msg_put(msg);
2487 	}
2488 }
2489 
2490 /*
2491  * Called on init of devfs; creates the objcaches and
2492  * spawns off the devfs core thread. Also initializes
2493  * locks.
2494  */
2495 static void
2496 devfs_init(void)
2497 {
2498 	devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_init() called\n");
2499 	/* Create objcaches for nodes, msgs and devs */
2500 	devfs_node_cache = objcache_create("devfs-node-cache", 0, 0,
2501 					   NULL, NULL, NULL,
2502 					   objcache_malloc_alloc,
2503 					   objcache_malloc_free,
2504 					   &devfs_node_malloc_args );
2505 
2506 	devfs_msg_cache = objcache_create("devfs-msg-cache", 0, 0,
2507 					  NULL, NULL, NULL,
2508 					  objcache_malloc_alloc,
2509 					  objcache_malloc_free,
2510 					  &devfs_msg_malloc_args );
2511 
2512 	devfs_dev_cache = objcache_create("devfs-dev-cache", 0, 0,
2513 					  NULL, NULL, NULL,
2514 					  objcache_malloc_alloc,
2515 					  objcache_malloc_free,
2516 					  &devfs_dev_malloc_args );
2517 
2518 	devfs_clone_bitmap_init(&DEVFS_CLONE_BITMAP(ops_id));
2519 
2520 	/* Initialize the reply-only port which acts as a message drain */
2521 	lwkt_initport_replyonly(&devfs_dispose_port, devfs_msg_autofree_reply);
2522 
2523 	/* Initialize *THE* devfs lock */
2524 	lockinit(&devfs_lock, "devfs_core lock", 0, 0);
2525 
2526 	lockmgr(&devfs_lock, LK_EXCLUSIVE);
2527 	lwkt_create(devfs_msg_core, /*args*/NULL, &td_core, NULL,
2528 		    0, -1, "devfs_msg_core");
2529 	while (devfs_run == 0)
2530 		lksleep(td_core, &devfs_lock, 0, "devfsc", 0);
2531 	lockmgr(&devfs_lock, LK_RELEASE);
2532 
2533 	devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_init finished\n");
2534 }
2535 
2536 /*
2537  * Called on unload of devfs; takes care of destroying the core
2538  * and the objcaches. Also removes aliases that are no longer needed.
2539  */
2540 static void
2541 devfs_uninit(void)
2542 {
2543 	devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_uninit() called\n");
2544 
2545 	devfs_msg_send(DEVFS_TERMINATE_CORE, NULL);
2546 	while (devfs_run)
2547 		tsleep(td_core, 0, "devfsc", hz*10);
2548 	tsleep(td_core, 0, "devfsc", hz);
2549 
2550 	devfs_clone_bitmap_uninit(&DEVFS_CLONE_BITMAP(ops_id));
2551 
2552 	/* Destroy the objcaches */
2553 	objcache_destroy(devfs_msg_cache);
2554 	objcache_destroy(devfs_node_cache);
2555 	objcache_destroy(devfs_dev_cache);
2556 
2557 	devfs_alias_reap();
2558 }
2559 
2560 /*
2561  * This is a sysctl handler to assist userland devname(3) to
2562  * find the device name for a given udev.
2563  */
2564 static int
2565 devfs_sysctl_devname_helper(SYSCTL_HANDLER_ARGS)
2566 {
2567 	udev_t 	udev;
2568 	cdev_t	found;
2569 	int		error;
2570 
2571 
2572 	if ((error = SYSCTL_IN(req, &udev, sizeof(udev_t))))
2573 		return (error);
2574 
2575 	devfs_debug(DEVFS_DEBUG_DEBUG, "devfs sysctl, received udev: %d\n", udev);
2576 
2577 	if (udev == NOUDEV)
2578 		return(EINVAL);
2579 
2580 	if ((found = devfs_find_device_by_udev(udev)) == NULL)
2581 		return(ENOENT);
2582 
2583 	return(SYSCTL_OUT(req, found->si_name, strlen(found->si_name) + 1));
2584 }
2585 
2586 
2587 SYSCTL_PROC(_kern, OID_AUTO, devname, CTLTYPE_OPAQUE|CTLFLAG_RW|CTLFLAG_ANYBODY,
2588 			NULL, 0, devfs_sysctl_devname_helper, "", "helper for devname(3)");
2589 
2590 SYSCTL_NODE(_vfs, OID_AUTO, devfs, CTLFLAG_RW, 0, "devfs");
2591 TUNABLE_INT("vfs.devfs.debug", &devfs_debug_enable);
2592 SYSCTL_INT(_vfs_devfs, OID_AUTO, debug, CTLFLAG_RW, &devfs_debug_enable,
2593 		0, "Enable DevFS debugging");
2594 
2595 SYSINIT(vfs_devfs_register, SI_SUB_PRE_DRIVERS, SI_ORDER_FIRST,
2596 		devfs_init, NULL);
2597 SYSUNINIT(vfs_devfs_register, SI_SUB_PRE_DRIVERS, SI_ORDER_ANY,
2598 		devfs_uninit, NULL);
2599 
2600 /*
2601  * WildCmp() - compare wild string to sane string
2602  *
2603  *	Returns 0 on success, -1 on failure.
2604  */
2605 static int
2606 wildCmp(const char **mary, int d, const char *w, const char *s)
2607 {
2608     int i;
2609 
2610     /*
2611      * skip fixed portion
2612      */
2613     for (;;) {
2614 	switch(*w) {
2615 	case '*':
2616 	    /*
2617 	     * optimize terminator
2618 	     */
2619 	    if (w[1] == 0)
2620 		return(0);
2621 	    if (w[1] != '?' && w[1] != '*') {
2622 		/*
2623 		 * optimize * followed by non-wild
2624 		 */
2625 		for (i = 0; s + i < mary[d]; ++i) {
2626 		    if (s[i] == w[1] && wildCmp(mary, d + 1, w + 1, s + i) == 0)
2627 			return(0);
2628 		}
2629 	    } else {
2630 		/*
2631 		 * less-optimal
2632 		 */
2633 		for (i = 0; s + i < mary[d]; ++i) {
2634 		    if (wildCmp(mary, d + 1, w + 1, s + i) == 0)
2635 			return(0);
2636 		}
2637 	    }
2638 	    mary[d] = s;
2639 	    return(-1);
2640 	case '?':
2641 	    if (*s == 0)
2642 		return(-1);
2643 	    ++w;
2644 	    ++s;
2645 	    break;
2646 	default:
2647 	    if (*w != *s)
2648 		return(-1);
2649 	    if (*w == 0)	/* terminator */
2650 		return(0);
2651 	    ++w;
2652 	    ++s;
2653 	    break;
2654 	}
2655     }
2656     /* not reached */
2657     return(-1);
2658 }
2659 
2660 
2661 /*
2662  * WildCaseCmp() - compare wild string to sane string, case insensitive
2663  *
2664  *	Returns 0 on success, -1 on failure.
2665  */
2666 static int
2667 wildCaseCmp(const char **mary, int d, const char *w, const char *s)
2668 {
2669     int i;
2670 
2671     /*
2672      * skip fixed portion
2673      */
2674     for (;;) {
2675 	switch(*w) {
2676 	case '*':
2677 	    /*
2678 	     * optimize terminator
2679 	     */
2680 	    if (w[1] == 0)
2681 		return(0);
2682 	    if (w[1] != '?' && w[1] != '*') {
2683 		/*
2684 		 * optimize * followed by non-wild
2685 		 */
2686 		for (i = 0; s + i < mary[d]; ++i) {
2687 		    if (s[i] == w[1] && wildCaseCmp(mary, d + 1, w + 1, s + i) == 0)
2688 			return(0);
2689 		}
2690 	    } else {
2691 		/*
2692 		 * less-optimal
2693 		 */
2694 		for (i = 0; s + i < mary[d]; ++i) {
2695 		    if (wildCaseCmp(mary, d + 1, w + 1, s + i) == 0)
2696 			return(0);
2697 		}
2698 	    }
2699 	    mary[d] = s;
2700 	    return(-1);
2701 	case '?':
2702 	    if (*s == 0)
2703 		return(-1);
2704 	    ++w;
2705 	    ++s;
2706 	    break;
2707 	default:
2708 	    if (*w != *s) {
2709 #define tolower(x)	((x >= 'A' && x <= 'Z')?(x+('a'-'A')):(x))
2710 		if (tolower(*w) != tolower(*s))
2711 		    return(-1);
2712 	    }
2713 	    if (*w == 0)	/* terminator */
2714 		return(0);
2715 	    ++w;
2716 	    ++s;
2717 	    break;
2718 	}
2719     }
2720     /* not reached */
2721     return(-1);
2722 }
2723 
2724 int
2725 devfs_WildCmp(const char *w, const char *s)
2726 {
2727     int i;
2728     int c;
2729     int slen = strlen(s);
2730     const char **mary;
2731 
2732     for (i = c = 0; w[i]; ++i) {
2733 	if (w[i] == '*')
2734 	    ++c;
2735     }
2736     mary = kmalloc(sizeof(char *) * (c + 1), M_DEVFS, M_WAITOK);
2737     for (i = 0; i < c; ++i)
2738 	mary[i] = s + slen;
2739     i = wildCmp(mary, 0, w, s);
2740     kfree(mary, M_DEVFS);
2741     return(i);
2742 }
2743 
2744 int
2745 devfs_WildCaseCmp(const char *w, const char *s)
2746 {
2747     int i;
2748     int c;
2749     int slen = strlen(s);
2750     const char **mary;
2751 
2752     for (i = c = 0; w[i]; ++i) {
2753 	if (w[i] == '*')
2754 	    ++c;
2755     }
2756     mary = kmalloc(sizeof(char *) * (c + 1), M_DEVFS, M_WAITOK);
2757     for (i = 0; i < c; ++i)
2758 	mary[i] = s + slen;
2759     i = wildCaseCmp(mary, 0, w, s);
2760     kfree(mary, M_DEVFS);
2761     return(i);
2762 }
2763 
2764