1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * This is the /dev (hence, the sdev_ prefix) filesystem.
28  */
29 
30 #include <sys/types.h>
31 #include <sys/param.h>
32 #include <sys/sysmacros.h>
33 #include <sys/systm.h>
34 #include <sys/kmem.h>
35 #include <sys/time.h>
36 #include <sys/pathname.h>
37 #include <sys/vfs.h>
38 #include <sys/vfs_opreg.h>
39 #include <sys/vnode.h>
40 #include <sys/file.h>
41 #include <sys/stat.h>
42 #include <sys/uio.h>
43 #include <sys/stat.h>
44 #include <sys/errno.h>
45 #include <sys/cmn_err.h>
46 #include <sys/cred.h>
47 #include <sys/statvfs.h>
48 #include <sys/policy.h>
49 #include <sys/mount.h>
50 #include <sys/debug.h>
51 #include <sys/modctl.h>
52 #include <sys/mkdev.h>
53 #include <fs/fs_subr.h>
54 #include <sys/fs/sdev_impl.h>
55 #include <sys/fs/sdev_node.h>
56 #include <sys/fs/snode.h>
57 #include <sys/fs/dv_node.h>
58 #include <sys/sunndi.h>
59 #include <sys/mntent.h>
60 
61 /*
62  * /dev vfs operations.
63  */
64 
65 /*
66  * globals
67  */
68 struct sdev_data *sdev_origins; /* mount info for origins under /dev */
69 kmutex_t sdev_lock; /* used for mount/unmount/rename synchronization */
70 
71 /*
72  * static
73  */
74 static major_t devmajor;	/* the fictitious major we live on */
75 static major_t devminor;	/* the fictitious minor of this instance */
76 static struct sdev_data *sdev_mntinfo = NULL;	/* linked list of instances */
77 
78 /* LINTED E_STATIC_UNUSED */		/* useful for debugging */
79 static struct vnode *sdev_stale_attrvp; /* stale root attrvp after remount */
80 
81 static int sdev_mount(struct vfs *, struct vnode *, struct mounta *,
82     struct cred *);
83 static int sdev_unmount(struct vfs *, int, struct cred *);
84 static int sdev_root(struct vfs *, struct vnode **);
85 static int sdev_statvfs(struct vfs *, struct statvfs64 *);
86 static void sdev_insert_mntinfo(struct sdev_data *);
87 static int devinit(int, char *);
88 
89 static vfsdef_t sdev_vfssw = {
90 	VFSDEF_VERSION,
91 	"dev",		/* type name string */
92 	devinit,	/* init routine */
93 	VSW_CANREMOUNT,	/* flags */
94 	NULL		/* mount options table prototype */
95 };
96 
97 
98 /*
99  * Module linkage information
100  */
101 static struct modlfs modlfs = {
102 	&mod_fsops, "/dev filesystem", &sdev_vfssw
103 };
104 
105 static struct modlinkage modlinkage = {
106 	MODREV_1, (void *)&modlfs, NULL
107 };
108 
109 int
110 _init(void)
111 {
112 	int e;
113 
114 	mutex_init(&sdev_lock, NULL, MUTEX_DEFAULT, NULL);
115 	sdev_node_cache_init();
116 	sdev_devfsadm_lockinit();
117 	if ((e = mod_install(&modlinkage)) != 0) {
118 		sdev_devfsadm_lockdestroy();
119 		sdev_node_cache_fini();
120 		mutex_destroy(&sdev_lock);
121 		return (e);
122 	}
123 	return (0);
124 }
125 
126 /*
127  * dev module remained loaded for the global /dev instance
128  */
129 int
130 _fini(void)
131 {
132 	return (EBUSY);
133 }
134 
135 int
136 _info(struct modinfo *modinfop)
137 {
138 	return (mod_info(&modlinkage, modinfop));
139 }
140 
141 /*ARGSUSED*/
142 static int
143 devinit(int fstype, char *name)
144 {
145 	static const fs_operation_def_t dev_vfsops_tbl[] = {
146 		VFSNAME_MOUNT,		{ .vfs_mount = sdev_mount },
147 		VFSNAME_UNMOUNT,	{ .vfs_unmount = sdev_unmount },
148 		VFSNAME_ROOT, 		{ .vfs_root = sdev_root },
149 		VFSNAME_STATVFS,	{ .vfs_statvfs = sdev_statvfs },
150 		NULL,			NULL
151 	};
152 
153 	int	error;
154 	extern major_t getudev(void);
155 
156 	devtype = fstype;
157 
158 	error = vfs_setfsops(fstype, dev_vfsops_tbl, NULL);
159 	if (error != 0) {
160 		cmn_err(CE_WARN, "devinit: bad vfs ops tbl");
161 		return (error);
162 	}
163 
164 	error = vn_make_ops("dev", sdev_vnodeops_tbl, &sdev_vnodeops);
165 	if (error != 0) {
166 		(void) vfs_freevfsops_by_type(fstype);
167 		cmn_err(CE_WARN, "devinit: bad vnode ops tbl");
168 		return (error);
169 	}
170 
171 	if ((devmajor = getudev()) == (major_t)-1) {
172 		cmn_err(CE_WARN, "%s: can't get unique dev", sdev_vfssw.name);
173 		return (1);
174 	}
175 
176 	/* initialize negative cache */
177 	sdev_ncache_init();
178 
179 	return (0);
180 }
181 
182 /*
183  * Both mount point and backing store directory name are
184  * passed in from userland
185  */
186 static int
187 sdev_mount(struct vfs *vfsp, struct vnode *mvp, struct mounta *uap,
188     struct cred *cr)
189 {
190 	struct sdev_data *sdev_data;
191 	struct vnode *avp;
192 	struct sdev_node *dv;
193 	struct sdev_mountargs *args = NULL;
194 	int	error = 0;
195 	dev_t	devdev;
196 
197 	/*
198 	 * security check
199 	 */
200 	if ((secpolicy_fs_mount(cr, mvp, vfsp) != 0) ||
201 	    (secpolicy_sys_devices(cr) != 0))
202 		return (EPERM);
203 
204 	/*
205 	 * Sanity check the mount point
206 	 */
207 	if (mvp->v_type != VDIR)
208 		return (ENOTDIR);
209 
210 	/*
211 	 * Sanity Check for overlay mount.
212 	 */
213 	mutex_enter(&mvp->v_lock);
214 	if ((uap->flags & MS_OVERLAY) == 0 &&
215 	    (uap->flags & MS_REMOUNT) == 0 &&
216 	    (mvp->v_count > 1 || (mvp->v_flag & VROOT))) {
217 		mutex_exit(&mvp->v_lock);
218 		return (EBUSY);
219 	}
220 	mutex_exit(&mvp->v_lock);
221 
222 	args = kmem_zalloc(sizeof (*args), KM_SLEEP);
223 
224 	if ((uap->flags & MS_DATA) &&
225 	    (uap->datalen != 0 && uap->dataptr != NULL)) {
226 		/* copy in the arguments */
227 		if (error = sdev_copyin_mountargs(uap, args))
228 			goto cleanup;
229 	}
230 
231 	/*
232 	 * Sanity check the backing store
233 	 */
234 	if (args->sdev_attrdir) {
235 		/* user supplied an attribute store */
236 		if (error = lookupname((char *)(uintptr_t)args->sdev_attrdir,
237 		    UIO_USERSPACE, FOLLOW, NULLVPP, &avp)) {
238 			cmn_err(CE_NOTE, "/dev fs: lookup on attribute "
239 			    "directory %s failed",
240 			    (char *)(uintptr_t)args->sdev_attrdir);
241 			goto cleanup;
242 		}
243 
244 		if (avp->v_type != VDIR) {
245 			VN_RELE(avp);
246 			error = ENOTDIR;
247 			goto cleanup;
248 		}
249 	} else {
250 		/* use mountp as the attribute store */
251 		avp = mvp;
252 		VN_HOLD(avp);
253 	}
254 
255 	mutex_enter(&sdev_lock);
256 
257 	/*
258 	 * handling installation
259 	 */
260 	if (uap->flags & MS_REMOUNT) {
261 		sdev_data = (struct sdev_data *)vfsp->vfs_data;
262 		ASSERT(sdev_data);
263 
264 		dv = sdev_data->sdev_root;
265 		ASSERT(dv == dv->sdev_dotdot);
266 
267 		/*
268 		 * mark all existing sdev_nodes (except root node) stale
269 		 */
270 		sdev_stale(dv);
271 
272 		/* Reset previous mountargs */
273 		if (sdev_data->sdev_mountargs) {
274 			kmem_free(sdev_data->sdev_mountargs,
275 			    sizeof (struct sdev_mountargs));
276 		}
277 		sdev_data->sdev_mountargs = args;
278 		args = NULL;		/* so it won't be freed below */
279 
280 		sdev_stale_attrvp = dv->sdev_attrvp;
281 		dv->sdev_attrvp = avp;
282 		vfsp->vfs_mtime = ddi_get_time();
283 
284 		mutex_exit(&sdev_lock);
285 		goto cleanup;				/* we're done */
286 	}
287 
288 	/*
289 	 * Create and initialize the vfs-private data.
290 	 */
291 	devdev = makedevice(devmajor, devminor);
292 	while (vfs_devismounted(devdev)) {
293 		devminor = (devminor + 1) & MAXMIN32;
294 
295 		/*
296 		 * All the minor numbers are used up.
297 		 */
298 		if (devminor == 0) {
299 			mutex_exit(&sdev_lock);
300 			VN_RELE(avp);
301 			error = ENODEV;
302 			goto cleanup;
303 		}
304 
305 		devdev = makedevice(devmajor, devminor);
306 	}
307 
308 	dv = sdev_mkroot(vfsp, devdev, mvp, avp, cr);
309 	sdev_data = kmem_zalloc(sizeof (struct sdev_data), KM_SLEEP);
310 	vfsp->vfs_dev = devdev;
311 	vfsp->vfs_data = (caddr_t)sdev_data;
312 	vfsp->vfs_fstype = devtype;
313 	vfsp->vfs_bsize = DEV_BSIZE;
314 	vfsp->vfs_mtime = ddi_get_time();
315 	vfs_make_fsid(&vfsp->vfs_fsid, vfsp->vfs_dev, devtype);
316 
317 	ASSERT(dv == dv->sdev_dotdot);
318 
319 	sdev_data->sdev_vfsp = vfsp;
320 	sdev_data->sdev_root = dv;
321 	sdev_data->sdev_mountargs = args;
322 
323 	/* get acl flavor from attribute dir */
324 	if (VOP_PATHCONF(avp, _PC_ACL_ENABLED, &sdev_data->sdev_acl_flavor,
325 	    kcred, NULL) != 0 || sdev_data->sdev_acl_flavor == 0)
326 		sdev_data->sdev_acl_flavor = _ACL_ACLENT_ENABLED;
327 
328 	args = NULL;			/* so it won't be freed below */
329 	sdev_insert_mntinfo(sdev_data);
330 	mutex_exit(&sdev_lock);
331 
332 	if (!SDEV_IS_GLOBAL(dv)) {
333 		ASSERT(sdev_origins);
334 		dv->sdev_flags &= ~SDEV_GLOBAL;
335 		dv->sdev_origin = sdev_origins->sdev_root;
336 	} else {
337 		sdev_ncache_setup();
338 		rw_enter(&dv->sdev_contents, RW_WRITER);
339 		sdev_filldir_dynamic(dv);
340 		rw_exit(&dv->sdev_contents);
341 	}
342 
343 	sdev_update_timestamps(dv->sdev_attrvp,
344 	    cr, AT_CTIME|AT_MTIME|AT_ATIME);
345 
346 cleanup:
347 	if (args)
348 		kmem_free(args, sizeof (*args));
349 	return (error);
350 }
351 
352 /*
353  * unmounting the non-global /dev instances, e.g. when deleting a Kevlar zone.
354  */
355 static int
356 sdev_unmount(struct vfs *vfsp, int flag, struct cred *cr)
357 {
358 	struct sdev_node *dv;
359 	int error;
360 	struct sdev_data *sdev_data, *prev, *next;
361 
362 	/*
363 	 * enforce the security policies
364 	 */
365 	if ((secpolicy_fs_unmount(cr, vfsp) != 0) ||
366 	    (secpolicy_sys_devices(cr) != 0))
367 		return (EPERM);
368 
369 	if (flag & MS_FORCE)
370 		return (ENOTSUP);
371 
372 	mutex_enter(&sdev_lock);
373 	dv = VFSTOSDEVFS(vfsp)->sdev_root;
374 	ASSERT(dv == dv->sdev_dotdot);
375 	if (SDEVTOV(dv)->v_count > 1) {
376 		mutex_exit(&sdev_lock);
377 		return (EBUSY);
378 	}
379 
380 	/*
381 	 * global instance remains mounted
382 	 */
383 	if (SDEV_IS_GLOBAL(dv)) {
384 		mutex_exit(&sdev_lock);
385 		return (EBUSY);
386 	}
387 	mutex_exit(&sdev_lock);
388 
389 	/* verify the v_count */
390 	if ((error = sdev_cleandir(dv, NULL, 0)) != 0) {
391 		return (error);
392 	}
393 	ASSERT(SDEVTOV(dv)->v_count == 1);
394 
395 	/* release hold on root node and destroy it */
396 	SDEV_RELE(dv);
397 	dv->sdev_nlink -= 2;
398 	sdev_nodedestroy(dv, 0);
399 
400 	sdev_data = (struct sdev_data *)vfsp->vfs_data;
401 	vfsp->vfs_data = (caddr_t)0;
402 
403 	/*
404 	 * XXX separate it into sdev_delete_mntinfo() if useful
405 	 */
406 	mutex_enter(&sdev_lock);
407 	prev = sdev_data->sdev_prev;
408 	next = sdev_data->sdev_next;
409 	if (prev)
410 		prev->sdev_next = next;
411 	else
412 		sdev_mntinfo = next;
413 	if (next)
414 		next->sdev_prev = prev;
415 	mutex_exit(&sdev_lock);
416 
417 	if (sdev_data->sdev_mountargs) {
418 		kmem_free(sdev_data->sdev_mountargs,
419 		    sizeof (struct sdev_mountargs));
420 	}
421 	kmem_free(sdev_data, sizeof (struct sdev_data));
422 	return (0);
423 }
424 
425 /*
426  * return root vnode for given vfs
427  */
428 static int
429 sdev_root(struct vfs *vfsp, struct vnode **vpp)
430 {
431 	*vpp = SDEVTOV(VFSTOSDEVFS(vfsp)->sdev_root);
432 	VN_HOLD(*vpp);
433 	return (0);
434 }
435 
436 /*
437  * return 'generic superblock' information to userland.
438  *
439  * not much that we can usefully admit to here
440  */
441 static int
442 sdev_statvfs(struct vfs *vfsp, struct statvfs64 *sbp)
443 {
444 	dev32_t d32;
445 
446 	bzero(sbp, sizeof (*sbp));
447 	sbp->f_frsize = sbp->f_bsize = vfsp->vfs_bsize;
448 	sbp->f_files = kmem_cache_stat(sdev_node_cache, "alloc");
449 
450 	/* no illusions that free/avail files is relevant to dev */
451 	sbp->f_ffree = 0;
452 	sbp->f_favail = 0;
453 
454 	/* no illusions that blocks are relevant to devfs */
455 	sbp->f_bfree = 0;
456 	sbp->f_bavail = 0;
457 	sbp->f_blocks = 0;
458 
459 	(void) cmpldev(&d32, vfsp->vfs_dev);
460 	sbp->f_fsid = d32;
461 	(void) strcpy(sbp->f_basetype, vfssw[devtype].vsw_name);
462 	sbp->f_flag = vf_to_stf(vfsp->vfs_flag);
463 	sbp->f_namemax = MAXNAMELEN - 1;
464 	(void) strcpy(sbp->f_fstr, "dev");
465 
466 	return (0);
467 }
468 
469 int
470 sdev_module_register(char *mod_name, struct devname_ops *dev_ops)
471 {
472 	struct devname_nsmap *map = NULL;
473 
474 	if (strcmp(mod_name, DEVNAME_NSCONFIG) == 0) {
475 		devname_ns_ops = dev_ops;
476 		return (0);
477 	}
478 
479 	map = sdev_get_nsmap_by_module(mod_name);
480 	if (map == NULL)
481 		return (EFAULT);
482 
483 	rw_enter(&map->dir_lock, RW_WRITER);
484 	map->dir_ops = dev_ops;
485 	rw_exit(&map->dir_lock);
486 	return (0);
487 }
488 
489 static void
490 sdev_insert_mntinfo(struct sdev_data *data)
491 {
492 	ASSERT(mutex_owned(&sdev_lock));
493 	data->sdev_next = sdev_mntinfo;
494 	data->sdev_prev = NULL;
495 	if (sdev_mntinfo) {
496 		sdev_mntinfo->sdev_prev = data;
497 	} else {
498 		sdev_origins = data;
499 	}
500 	sdev_mntinfo = data;
501 }
502 
503 struct sdev_data *
504 sdev_find_mntinfo(char *mntpt)
505 {
506 	struct sdev_data *mntinfo;
507 
508 	mutex_enter(&sdev_lock);
509 	mntinfo = sdev_mntinfo;
510 	while (mntinfo) {
511 		if (strcmp(mntpt, mntinfo->sdev_root->sdev_name) == 0) {
512 			SDEVTOV(mntinfo->sdev_root)->v_count++;
513 			break;
514 		}
515 		mntinfo = mntinfo->sdev_next;
516 	}
517 	mutex_exit(&sdev_lock);
518 	return (mntinfo);
519 }
520 
521 void
522 sdev_mntinfo_rele(struct sdev_data *mntinfo)
523 {
524 	mutex_enter(&sdev_lock);
525 	SDEVTOV(mntinfo->sdev_root)->v_count--;
526 	mutex_exit(&sdev_lock);
527 }
528