1 /*
2  * Copyright (c) 2006-2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29 
30 #include <sys/types.h>
31 #include <sys/param.h>
32 #include <sys/kernel.h>
33 #include <sys/systm.h>
34 #include <sys/malloc.h>
35 #include <sys/mount.h>
36 #include <sys/cred.h>
37 #include <sys/vfs.h>
38 #include <sys/priv.h>
39 #include <sys/libkern.h>
40 
41 #include <sys/mutex.h>
42 #include <sys/vnode.h>
43 #include <sys/taskq.h>
44 
45 #include <sys/ccompat.h>
46 
47 MALLOC_DECLARE(M_MOUNT);
48 
49 void
50 vfs_setmntopt(vfs_t *vfsp, const char *name, const char *arg,
51     int flags __unused)
52 {
53 	struct vfsopt *opt;
54 	size_t namesize;
55 	int locked;
56 
57 	if (!(locked = mtx_owned(MNT_MTX(vfsp))))
58 		MNT_ILOCK(vfsp);
59 
60 	if (vfsp->mnt_opt == NULL) {
61 		void *opts;
62 
63 		MNT_IUNLOCK(vfsp);
64 		opts = malloc(sizeof (*vfsp->mnt_opt), M_MOUNT, M_WAITOK);
65 		MNT_ILOCK(vfsp);
66 		if (vfsp->mnt_opt == NULL) {
67 			vfsp->mnt_opt = opts;
68 			TAILQ_INIT(vfsp->mnt_opt);
69 		} else {
70 			free(opts, M_MOUNT);
71 		}
72 	}
73 
74 	MNT_IUNLOCK(vfsp);
75 
76 	opt = malloc(sizeof (*opt), M_MOUNT, M_WAITOK);
77 	namesize = strlen(name) + 1;
78 	opt->name = malloc(namesize, M_MOUNT, M_WAITOK);
79 	strlcpy(opt->name, name, namesize);
80 	opt->pos = -1;
81 	opt->seen = 1;
82 	if (arg == NULL) {
83 		opt->value = NULL;
84 		opt->len = 0;
85 	} else {
86 		opt->len = strlen(arg) + 1;
87 		opt->value = malloc(opt->len, M_MOUNT, M_WAITOK);
88 		memcpy(opt->value, arg, opt->len);
89 	}
90 
91 	MNT_ILOCK(vfsp);
92 	TAILQ_INSERT_TAIL(vfsp->mnt_opt, opt, link);
93 	if (!locked)
94 		MNT_IUNLOCK(vfsp);
95 }
96 
97 void
98 vfs_clearmntopt(vfs_t *vfsp, const char *name)
99 {
100 	int locked;
101 
102 	if (!(locked = mtx_owned(MNT_MTX(vfsp))))
103 		MNT_ILOCK(vfsp);
104 	vfs_deleteopt(vfsp->mnt_opt, name);
105 	if (!locked)
106 		MNT_IUNLOCK(vfsp);
107 }
108 
109 int
110 vfs_optionisset(const vfs_t *vfsp, const char *opt, char **argp)
111 {
112 	struct vfsoptlist *opts = vfsp->mnt_optnew;
113 	int error;
114 
115 	if (opts == NULL)
116 		return (0);
117 	error = vfs_getopt(opts, opt, (void **)argp, NULL);
118 	return (error != 0 ? 0 : 1);
119 }
120 
121 int
122 mount_snapshot(kthread_t *td, vnode_t **vpp, const char *fstype, char *fspath,
123     char *fspec, int fsflags)
124 {
125 	struct vfsconf *vfsp;
126 	struct mount *mp;
127 	vnode_t *vp, *mvp;
128 	struct ucred *pcr, *tcr;
129 	int error;
130 
131 	ASSERT_VOP_ELOCKED(*vpp, "mount_snapshot");
132 
133 	vp = *vpp;
134 	*vpp = NULL;
135 	error = 0;
136 
137 	/*
138 	 * Be ultra-paranoid about making sure the type and fspath
139 	 * variables will fit in our mp buffers, including the
140 	 * terminating NUL.
141 	 */
142 	if (strlen(fstype) >= MFSNAMELEN || strlen(fspath) >= MNAMELEN)
143 		error = ENAMETOOLONG;
144 	if (error == 0 && (vfsp = vfs_byname_kld(fstype, td, &error)) == NULL)
145 		error = ENODEV;
146 	if (error == 0 && vp->v_type != VDIR)
147 		error = ENOTDIR;
148 	/*
149 	 * We need vnode lock to protect v_mountedhere and vnode interlock
150 	 * to protect v_iflag.
151 	 */
152 	if (error == 0) {
153 		VI_LOCK(vp);
154 		if ((vp->v_iflag & VI_MOUNT) == 0 && vp->v_mountedhere == NULL)
155 			vp->v_iflag |= VI_MOUNT;
156 		else
157 			error = EBUSY;
158 		VI_UNLOCK(vp);
159 	}
160 	if (error != 0) {
161 		vput(vp);
162 		return (error);
163 	}
164 	vn_seqc_write_begin(vp);
165 	VOP_UNLOCK1(vp);
166 
167 	/*
168 	 * Allocate and initialize the filesystem.
169 	 * We don't want regular user that triggered snapshot mount to be able
170 	 * to unmount it, so pass credentials of the parent mount.
171 	 */
172 	mp = vfs_mount_alloc(vp, vfsp, fspath, vp->v_mount->mnt_cred);
173 
174 	mp->mnt_optnew = NULL;
175 	vfs_setmntopt(mp, "from", fspec, 0);
176 	mp->mnt_optnew = mp->mnt_opt;
177 	mp->mnt_opt = NULL;
178 
179 	/*
180 	 * Set the mount level flags.
181 	 */
182 	mp->mnt_flag = fsflags & MNT_UPDATEMASK;
183 	/*
184 	 * Snapshots are always read-only.
185 	 */
186 	mp->mnt_flag |= MNT_RDONLY;
187 	/*
188 	 * We don't want snapshots to allow access to vulnerable setuid
189 	 * programs, so we turn off setuid when mounting snapshots.
190 	 */
191 	mp->mnt_flag |= MNT_NOSUID;
192 	/*
193 	 * We don't want snapshots to be visible in regular
194 	 * mount(8) and df(1) output.
195 	 */
196 	mp->mnt_flag |= MNT_IGNORE;
197 
198 	/*
199 	 * XXX: This is evil, but we can't mount a snapshot as a regular user.
200 	 * XXX: Is is safe when snapshot is mounted from within a jail?
201 	 */
202 	tcr = td->td_ucred;
203 	pcr = td->td_proc->p_ucred;
204 	td->td_ucred = kcred;
205 	td->td_proc->p_ucred = kcred;
206 	error = VFS_MOUNT(mp);
207 	td->td_ucred = tcr;
208 	td->td_proc->p_ucred = pcr;
209 
210 	if (error != 0) {
211 		/*
212 		 * Clear VI_MOUNT and decrement the use count "atomically",
213 		 * under the vnode lock.  This is not strictly required,
214 		 * but makes it easier to reason about the life-cycle and
215 		 * ownership of the covered vnode.
216 		 */
217 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
218 		VI_LOCK(vp);
219 		vp->v_iflag &= ~VI_MOUNT;
220 		VI_UNLOCK(vp);
221 		vn_seqc_write_end(vp);
222 		vput(vp);
223 		vfs_unbusy(mp);
224 		vfs_freeopts(mp->mnt_optnew);
225 		mp->mnt_vnodecovered = NULL;
226 		vfs_mount_destroy(mp);
227 		return (error);
228 	}
229 
230 	if (mp->mnt_opt != NULL)
231 		vfs_freeopts(mp->mnt_opt);
232 	mp->mnt_opt = mp->mnt_optnew;
233 	(void) VFS_STATFS(mp, &mp->mnt_stat);
234 
235 	/*
236 	 * Prevent external consumers of mount options from reading
237 	 * mnt_optnew.
238 	 */
239 	mp->mnt_optnew = NULL;
240 
241 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
242 #ifdef FREEBSD_NAMECACHE
243 	cache_purge(vp);
244 #endif
245 	VI_LOCK(vp);
246 	vp->v_iflag &= ~VI_MOUNT;
247 #ifdef VIRF_MOUNTPOINT
248 	vn_irflag_set_locked(vp, VIRF_MOUNTPOINT);
249 #endif
250 	vp->v_mountedhere = mp;
251 	VI_UNLOCK(vp);
252 	/* Put the new filesystem on the mount list. */
253 	mtx_lock(&mountlist_mtx);
254 	TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
255 	mtx_unlock(&mountlist_mtx);
256 	vfs_event_signal(NULL, VQ_MOUNT, 0);
257 	if (VFS_ROOT(mp, LK_EXCLUSIVE, &mvp))
258 		panic("mount: lost mount");
259 	vn_seqc_write_end(vp);
260 	VOP_UNLOCK1(vp);
261 #if __FreeBSD_version >= 1300048
262 	vfs_op_exit(mp);
263 #endif
264 	vfs_unbusy(mp);
265 	*vpp = mvp;
266 	return (0);
267 }
268 
269 /*
270  * Like vn_rele() except if we are going to call VOP_INACTIVE() then do it
271  * asynchronously using a taskq. This can avoid deadlocks caused by re-entering
272  * the file system as a result of releasing the vnode. Note, file systems
273  * already have to handle the race where the vnode is incremented before the
274  * inactive routine is called and does its locking.
275  *
276  * Warning: Excessive use of this routine can lead to performance problems.
277  * This is because taskqs throttle back allocation if too many are created.
278  */
279 void
280 vn_rele_async(vnode_t *vp, taskq_t *taskq)
281 {
282 	VERIFY3U(vp->v_usecount, >, 0);
283 	if (refcount_release_if_not_last(&vp->v_usecount)) {
284 #if __FreeBSD_version < 1300045
285 		vdrop(vp);
286 #endif
287 		return;
288 	}
289 	VERIFY3U(taskq_dispatch((taskq_t *)taskq,
290 	    (task_func_t *)vrele, vp, TQ_SLEEP), !=, 0);
291 }
292