1 /* 2 * Copyright (c) 2006-2007 Pawel Jakub Dawidek <pjd@FreeBSD.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 #include <sys/cdefs.h> 28 __FBSDID("$FreeBSD$"); 29 30 #include <sys/types.h> 31 #include <sys/param.h> 32 #include <sys/kernel.h> 33 #include <sys/systm.h> 34 #include <sys/malloc.h> 35 #include <sys/mount.h> 36 #include <sys/cred.h> 37 #include <sys/vfs.h> 38 #include <sys/priv.h> 39 #include <sys/libkern.h> 40 41 #include <sys/mutex.h> 42 #include <sys/vnode.h> 43 #include <sys/taskq.h> 44 45 #include <sys/ccompat.h> 46 47 MALLOC_DECLARE(M_MOUNT); 48 49 void 50 vfs_setmntopt(vfs_t *vfsp, const char *name, const char *arg, 51 int flags __unused) 52 { 53 struct vfsopt *opt; 54 size_t namesize; 55 int locked; 56 57 if (!(locked = mtx_owned(MNT_MTX(vfsp)))) 58 MNT_ILOCK(vfsp); 59 60 if (vfsp->mnt_opt == NULL) { 61 void *opts; 62 63 MNT_IUNLOCK(vfsp); 64 opts = malloc(sizeof (*vfsp->mnt_opt), M_MOUNT, M_WAITOK); 65 MNT_ILOCK(vfsp); 66 if (vfsp->mnt_opt == NULL) { 67 vfsp->mnt_opt = opts; 68 TAILQ_INIT(vfsp->mnt_opt); 69 } else { 70 free(opts, M_MOUNT); 71 } 72 } 73 74 MNT_IUNLOCK(vfsp); 75 76 opt = malloc(sizeof (*opt), M_MOUNT, M_WAITOK); 77 namesize = strlen(name) + 1; 78 opt->name = malloc(namesize, M_MOUNT, M_WAITOK); 79 strlcpy(opt->name, name, namesize); 80 opt->pos = -1; 81 opt->seen = 1; 82 if (arg == NULL) { 83 opt->value = NULL; 84 opt->len = 0; 85 } else { 86 opt->len = strlen(arg) + 1; 87 opt->value = malloc(opt->len, M_MOUNT, M_WAITOK); 88 bcopy(arg, opt->value, opt->len); 89 } 90 91 MNT_ILOCK(vfsp); 92 TAILQ_INSERT_TAIL(vfsp->mnt_opt, opt, link); 93 if (!locked) 94 MNT_IUNLOCK(vfsp); 95 } 96 97 void 98 vfs_clearmntopt(vfs_t *vfsp, const char *name) 99 { 100 int locked; 101 102 if (!(locked = mtx_owned(MNT_MTX(vfsp)))) 103 MNT_ILOCK(vfsp); 104 vfs_deleteopt(vfsp->mnt_opt, name); 105 if (!locked) 106 MNT_IUNLOCK(vfsp); 107 } 108 109 int 110 vfs_optionisset(const vfs_t *vfsp, const char *opt, char **argp) 111 { 112 struct vfsoptlist *opts = vfsp->mnt_optnew; 113 int error; 114 115 if (opts == NULL) 116 return (0); 117 error = vfs_getopt(opts, opt, (void **)argp, NULL); 118 return (error != 0 ? 0 : 1); 119 } 120 121 int 122 mount_snapshot(kthread_t *td, vnode_t **vpp, const char *fstype, char *fspath, 123 char *fspec, int fsflags) 124 { 125 struct vfsconf *vfsp; 126 struct mount *mp; 127 vnode_t *vp, *mvp; 128 struct ucred *cr; 129 int error; 130 131 ASSERT_VOP_ELOCKED(*vpp, "mount_snapshot"); 132 133 vp = *vpp; 134 *vpp = NULL; 135 error = 0; 136 137 /* 138 * Be ultra-paranoid about making sure the type and fspath 139 * variables will fit in our mp buffers, including the 140 * terminating NUL. 141 */ 142 if (strlen(fstype) >= MFSNAMELEN || strlen(fspath) >= MNAMELEN) 143 error = ENAMETOOLONG; 144 if (error == 0 && (vfsp = vfs_byname_kld(fstype, td, &error)) == NULL) 145 error = ENODEV; 146 if (error == 0 && vp->v_type != VDIR) 147 error = ENOTDIR; 148 /* 149 * We need vnode lock to protect v_mountedhere and vnode interlock 150 * to protect v_iflag. 151 */ 152 if (error == 0) { 153 VI_LOCK(vp); 154 if ((vp->v_iflag & VI_MOUNT) == 0 && vp->v_mountedhere == NULL) 155 vp->v_iflag |= VI_MOUNT; 156 else 157 error = EBUSY; 158 VI_UNLOCK(vp); 159 } 160 if (error != 0) { 161 vput(vp); 162 return (error); 163 } 164 vn_seqc_write_begin(vp); 165 VOP_UNLOCK1(vp); 166 167 /* 168 * Allocate and initialize the filesystem. 169 * We don't want regular user that triggered snapshot mount to be able 170 * to unmount it, so pass credentials of the parent mount. 171 */ 172 mp = vfs_mount_alloc(vp, vfsp, fspath, vp->v_mount->mnt_cred); 173 174 mp->mnt_optnew = NULL; 175 vfs_setmntopt(mp, "from", fspec, 0); 176 mp->mnt_optnew = mp->mnt_opt; 177 mp->mnt_opt = NULL; 178 179 /* 180 * Set the mount level flags. 181 */ 182 mp->mnt_flag = fsflags & MNT_UPDATEMASK; 183 /* 184 * Snapshots are always read-only. 185 */ 186 mp->mnt_flag |= MNT_RDONLY; 187 /* 188 * We don't want snapshots to allow access to vulnerable setuid 189 * programs, so we turn off setuid when mounting snapshots. 190 */ 191 mp->mnt_flag |= MNT_NOSUID; 192 /* 193 * We don't want snapshots to be visible in regular 194 * mount(8) and df(1) output. 195 */ 196 mp->mnt_flag |= MNT_IGNORE; 197 /* 198 * XXX: This is evil, but we can't mount a snapshot as a regular user. 199 * XXX: Is is safe when snapshot is mounted from within a jail? 200 */ 201 cr = td->td_ucred; 202 td->td_ucred = kcred; 203 error = VFS_MOUNT(mp); 204 td->td_ucred = cr; 205 206 if (error != 0) { 207 /* 208 * Clear VI_MOUNT and decrement the use count "atomically", 209 * under the vnode lock. This is not strictly required, 210 * but makes it easier to reason about the life-cycle and 211 * ownership of the covered vnode. 212 */ 213 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 214 VI_LOCK(vp); 215 vp->v_iflag &= ~VI_MOUNT; 216 VI_UNLOCK(vp); 217 vn_seqc_write_end(vp); 218 vput(vp); 219 vfs_unbusy(mp); 220 vfs_freeopts(mp->mnt_optnew); 221 mp->mnt_vnodecovered = NULL; 222 vfs_mount_destroy(mp); 223 return (error); 224 } 225 226 if (mp->mnt_opt != NULL) 227 vfs_freeopts(mp->mnt_opt); 228 mp->mnt_opt = mp->mnt_optnew; 229 (void) VFS_STATFS(mp, &mp->mnt_stat); 230 231 /* 232 * Prevent external consumers of mount options from reading 233 * mnt_optnew. 234 */ 235 mp->mnt_optnew = NULL; 236 237 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 238 #ifdef FREEBSD_NAMECACHE 239 cache_purge(vp); 240 #endif 241 VI_LOCK(vp); 242 vp->v_iflag &= ~VI_MOUNT; 243 VI_UNLOCK(vp); 244 245 vp->v_mountedhere = mp; 246 /* Put the new filesystem on the mount list. */ 247 mtx_lock(&mountlist_mtx); 248 TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list); 249 mtx_unlock(&mountlist_mtx); 250 vfs_event_signal(NULL, VQ_MOUNT, 0); 251 if (VFS_ROOT(mp, LK_EXCLUSIVE, &mvp)) 252 panic("mount: lost mount"); 253 vn_seqc_write_end(vp); 254 VOP_UNLOCK1(vp); 255 #if __FreeBSD_version >= 1300048 256 vfs_op_exit(mp); 257 #endif 258 vfs_unbusy(mp); 259 *vpp = mvp; 260 return (0); 261 } 262 263 /* 264 * Like vn_rele() except if we are going to call VOP_INACTIVE() then do it 265 * asynchronously using a taskq. This can avoid deadlocks caused by re-entering 266 * the file system as a result of releasing the vnode. Note, file systems 267 * already have to handle the race where the vnode is incremented before the 268 * inactive routine is called and does its locking. 269 * 270 * Warning: Excessive use of this routine can lead to performance problems. 271 * This is because taskqs throttle back allocation if too many are created. 272 */ 273 void 274 vn_rele_async(vnode_t *vp, taskq_t *taskq) 275 { 276 VERIFY(vp->v_count > 0); 277 if (refcount_release_if_not_last(&vp->v_usecount)) { 278 #if __FreeBSD_version < 1300045 279 vdrop(vp); 280 #endif 281 return; 282 } 283 VERIFY(taskq_dispatch((taskq_t *)taskq, 284 (task_func_t *)vrele, vp, TQ_SLEEP) != 0); 285 } 286