1 /* 2 * Copyright (c) 2007-2008 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * $DragonFly: src/sys/vfs/hammer/hammer_vfsops.c,v 1.74 2008/11/13 02:18:43 dillon Exp $ 35 */ 36 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/kernel.h> 40 #include <sys/vnode.h> 41 #include <sys/mount.h> 42 #include <sys/malloc.h> 43 #include <sys/nlookup.h> 44 #include <sys/fcntl.h> 45 #include <sys/sysctl.h> 46 #include <sys/buf.h> 47 #include <sys/buf2.h> 48 #include "hammer.h" 49 50 /* 51 * NOTE! Global statistics may not be MPSAFE so HAMMER never uses them 52 * in conditionals. 53 */ 54 int hammer_supported_version = HAMMER_VOL_VERSION_DEFAULT; 55 int hammer_debug_io; 56 int hammer_debug_general; 57 int hammer_debug_debug = 1; /* medium-error panics */ 58 int hammer_debug_inode; 59 int hammer_debug_locks; 60 int hammer_debug_btree; 61 int hammer_debug_tid; 62 int hammer_debug_recover; /* -1 will disable, +1 will force */ 63 int hammer_debug_recover_faults; 64 int hammer_debug_critical; /* non-zero enter debugger on error */ 65 int hammer_cluster_enable = 1; /* enable read clustering by default */ 66 int hammer_count_fsyncs; 67 int hammer_count_inodes; 68 int hammer_count_iqueued; 69 int hammer_count_reclaiming; 70 int hammer_count_records; 71 int hammer_count_record_datas; 72 int hammer_count_volumes; 73 int hammer_count_buffers; 74 int hammer_count_nodes; 75 int64_t hammer_count_extra_space_used; 76 int64_t hammer_stats_btree_lookups; 77 int64_t hammer_stats_btree_searches; 78 int64_t hammer_stats_btree_inserts; 79 int64_t hammer_stats_btree_deletes; 80 int64_t hammer_stats_btree_elements; 81 int64_t hammer_stats_btree_splits; 82 int64_t hammer_stats_btree_iterations; 83 int64_t hammer_stats_btree_root_iterations; 84 int64_t hammer_stats_record_iterations; 85 86 int64_t hammer_stats_file_read; 87 int64_t hammer_stats_file_write; 88 int64_t hammer_stats_file_iopsr; 89 int64_t hammer_stats_file_iopsw; 90 int64_t hammer_stats_disk_read; 91 int64_t hammer_stats_disk_write; 92 int64_t hammer_stats_inode_flushes; 93 int64_t hammer_stats_commits; 94 int64_t hammer_stats_undo; 95 int64_t hammer_stats_redo; 96 97 int hammer_count_dirtybufspace; /* global */ 98 int hammer_count_refedbufs; /* global */ 99 int hammer_count_reservations; 100 int hammer_count_io_running_read; 101 int hammer_count_io_running_write; 102 int hammer_count_io_locked; 103 int hammer_limit_dirtybufspace; /* per-mount */ 104 int hammer_limit_recs; /* as a whole XXX */ 105 int hammer_limit_inode_recs = 1024; /* per inode */ 106 int hammer_limit_reclaim = HAMMER_RECLAIM_WAIT; 107 int hammer_limit_redo = 4096 * 1024; /* per inode */ 108 int hammer_autoflush = 2000; /* auto flush */ 109 int hammer_bio_count; 110 int hammer_verify_zone; 111 int hammer_verify_data = 1; 112 int hammer_write_mode; 113 int hammer_yield_check = 16; 114 int hammer_fsync_mode = 3; 115 int64_t hammer_contention_count; 116 int64_t hammer_zone_limit; 117 118 SYSCTL_NODE(_vfs, OID_AUTO, hammer, CTLFLAG_RW, 0, "HAMMER filesystem"); 119 SYSCTL_INT(_vfs_hammer, OID_AUTO, supported_version, CTLFLAG_RD, 120 &hammer_supported_version, 0, ""); 121 SYSCTL_INT(_vfs_hammer, OID_AUTO, debug_general, CTLFLAG_RW, 122 &hammer_debug_general, 0, ""); 123 SYSCTL_INT(_vfs_hammer, OID_AUTO, debug_io, CTLFLAG_RW, 124 &hammer_debug_io, 0, ""); 125 SYSCTL_INT(_vfs_hammer, OID_AUTO, debug_debug, CTLFLAG_RW, 126 &hammer_debug_debug, 0, ""); 127 SYSCTL_INT(_vfs_hammer, OID_AUTO, debug_inode, CTLFLAG_RW, 128 &hammer_debug_inode, 0, ""); 129 SYSCTL_INT(_vfs_hammer, OID_AUTO, debug_locks, CTLFLAG_RW, 130 &hammer_debug_locks, 0, ""); 131 SYSCTL_INT(_vfs_hammer, OID_AUTO, debug_btree, CTLFLAG_RW, 132 &hammer_debug_btree, 0, ""); 133 SYSCTL_INT(_vfs_hammer, OID_AUTO, debug_tid, CTLFLAG_RW, 134 &hammer_debug_tid, 0, ""); 135 SYSCTL_INT(_vfs_hammer, OID_AUTO, debug_recover, CTLFLAG_RW, 136 &hammer_debug_recover, 0, ""); 137 SYSCTL_INT(_vfs_hammer, OID_AUTO, debug_recover_faults, CTLFLAG_RW, 138 &hammer_debug_recover_faults, 0, ""); 139 SYSCTL_INT(_vfs_hammer, OID_AUTO, debug_critical, CTLFLAG_RW, 140 &hammer_debug_critical, 0, ""); 141 SYSCTL_INT(_vfs_hammer, OID_AUTO, cluster_enable, CTLFLAG_RW, 142 &hammer_cluster_enable, 0, ""); 143 144 SYSCTL_INT(_vfs_hammer, OID_AUTO, limit_dirtybufspace, CTLFLAG_RW, 145 &hammer_limit_dirtybufspace, 0, ""); 146 SYSCTL_INT(_vfs_hammer, OID_AUTO, limit_recs, CTLFLAG_RW, 147 &hammer_limit_recs, 0, ""); 148 SYSCTL_INT(_vfs_hammer, OID_AUTO, limit_inode_recs, CTLFLAG_RW, 149 &hammer_limit_inode_recs, 0, ""); 150 SYSCTL_INT(_vfs_hammer, OID_AUTO, limit_reclaim, CTLFLAG_RW, 151 &hammer_limit_reclaim, 0, ""); 152 SYSCTL_INT(_vfs_hammer, OID_AUTO, limit_redo, CTLFLAG_RW, 153 &hammer_limit_redo, 0, ""); 154 155 SYSCTL_INT(_vfs_hammer, OID_AUTO, count_fsyncs, CTLFLAG_RD, 156 &hammer_count_fsyncs, 0, ""); 157 SYSCTL_INT(_vfs_hammer, OID_AUTO, count_inodes, CTLFLAG_RD, 158 &hammer_count_inodes, 0, ""); 159 SYSCTL_INT(_vfs_hammer, OID_AUTO, count_iqueued, CTLFLAG_RD, 160 &hammer_count_iqueued, 0, ""); 161 SYSCTL_INT(_vfs_hammer, OID_AUTO, count_reclaiming, CTLFLAG_RD, 162 &hammer_count_reclaiming, 0, ""); 163 SYSCTL_INT(_vfs_hammer, OID_AUTO, count_records, CTLFLAG_RD, 164 &hammer_count_records, 0, ""); 165 SYSCTL_INT(_vfs_hammer, OID_AUTO, count_record_datas, CTLFLAG_RD, 166 &hammer_count_record_datas, 0, ""); 167 SYSCTL_INT(_vfs_hammer, OID_AUTO, count_volumes, CTLFLAG_RD, 168 &hammer_count_volumes, 0, ""); 169 SYSCTL_INT(_vfs_hammer, OID_AUTO, count_buffers, CTLFLAG_RD, 170 &hammer_count_buffers, 0, ""); 171 SYSCTL_INT(_vfs_hammer, OID_AUTO, count_nodes, CTLFLAG_RD, 172 &hammer_count_nodes, 0, ""); 173 SYSCTL_QUAD(_vfs_hammer, OID_AUTO, count_extra_space_used, CTLFLAG_RD, 174 &hammer_count_extra_space_used, 0, ""); 175 176 SYSCTL_QUAD(_vfs_hammer, OID_AUTO, stats_btree_searches, CTLFLAG_RD, 177 &hammer_stats_btree_searches, 0, ""); 178 SYSCTL_QUAD(_vfs_hammer, OID_AUTO, stats_btree_lookups, CTLFLAG_RD, 179 &hammer_stats_btree_lookups, 0, ""); 180 SYSCTL_QUAD(_vfs_hammer, OID_AUTO, stats_btree_inserts, CTLFLAG_RD, 181 &hammer_stats_btree_inserts, 0, ""); 182 SYSCTL_QUAD(_vfs_hammer, OID_AUTO, stats_btree_deletes, CTLFLAG_RD, 183 &hammer_stats_btree_deletes, 0, ""); 184 SYSCTL_QUAD(_vfs_hammer, OID_AUTO, stats_btree_elements, CTLFLAG_RD, 185 &hammer_stats_btree_elements, 0, ""); 186 SYSCTL_QUAD(_vfs_hammer, OID_AUTO, stats_btree_splits, CTLFLAG_RD, 187 &hammer_stats_btree_splits, 0, ""); 188 SYSCTL_QUAD(_vfs_hammer, OID_AUTO, stats_btree_iterations, CTLFLAG_RD, 189 &hammer_stats_btree_iterations, 0, ""); 190 SYSCTL_QUAD(_vfs_hammer, OID_AUTO, stats_btree_root_iterations, CTLFLAG_RD, 191 &hammer_stats_btree_root_iterations, 0, ""); 192 SYSCTL_QUAD(_vfs_hammer, OID_AUTO, stats_record_iterations, CTLFLAG_RD, 193 &hammer_stats_record_iterations, 0, ""); 194 195 SYSCTL_QUAD(_vfs_hammer, OID_AUTO, stats_file_read, CTLFLAG_RD, 196 &hammer_stats_file_read, 0, ""); 197 SYSCTL_QUAD(_vfs_hammer, OID_AUTO, stats_file_write, CTLFLAG_RD, 198 &hammer_stats_file_write, 0, ""); 199 SYSCTL_QUAD(_vfs_hammer, OID_AUTO, stats_file_iopsr, CTLFLAG_RD, 200 &hammer_stats_file_iopsr, 0, ""); 201 SYSCTL_QUAD(_vfs_hammer, OID_AUTO, stats_file_iopsw, CTLFLAG_RD, 202 &hammer_stats_file_iopsw, 0, ""); 203 SYSCTL_QUAD(_vfs_hammer, OID_AUTO, stats_disk_read, CTLFLAG_RD, 204 &hammer_stats_disk_read, 0, ""); 205 SYSCTL_QUAD(_vfs_hammer, OID_AUTO, stats_disk_write, CTLFLAG_RD, 206 &hammer_stats_disk_write, 0, ""); 207 SYSCTL_QUAD(_vfs_hammer, OID_AUTO, stats_inode_flushes, CTLFLAG_RD, 208 &hammer_stats_inode_flushes, 0, ""); 209 SYSCTL_QUAD(_vfs_hammer, OID_AUTO, stats_commits, CTLFLAG_RD, 210 &hammer_stats_commits, 0, ""); 211 SYSCTL_QUAD(_vfs_hammer, OID_AUTO, stats_undo, CTLFLAG_RD, 212 &hammer_stats_undo, 0, ""); 213 SYSCTL_QUAD(_vfs_hammer, OID_AUTO, stats_redo, CTLFLAG_RD, 214 &hammer_stats_redo, 0, ""); 215 216 SYSCTL_INT(_vfs_hammer, OID_AUTO, count_dirtybufspace, CTLFLAG_RD, 217 &hammer_count_dirtybufspace, 0, ""); 218 SYSCTL_INT(_vfs_hammer, OID_AUTO, count_refedbufs, CTLFLAG_RD, 219 &hammer_count_refedbufs, 0, ""); 220 SYSCTL_INT(_vfs_hammer, OID_AUTO, count_reservations, CTLFLAG_RD, 221 &hammer_count_reservations, 0, ""); 222 SYSCTL_INT(_vfs_hammer, OID_AUTO, count_io_running_read, CTLFLAG_RD, 223 &hammer_count_io_running_read, 0, ""); 224 SYSCTL_INT(_vfs_hammer, OID_AUTO, count_io_locked, CTLFLAG_RD, 225 &hammer_count_io_locked, 0, ""); 226 SYSCTL_INT(_vfs_hammer, OID_AUTO, count_io_running_write, CTLFLAG_RD, 227 &hammer_count_io_running_write, 0, ""); 228 SYSCTL_QUAD(_vfs_hammer, OID_AUTO, zone_limit, CTLFLAG_RW, 229 &hammer_zone_limit, 0, ""); 230 SYSCTL_QUAD(_vfs_hammer, OID_AUTO, contention_count, CTLFLAG_RW, 231 &hammer_contention_count, 0, ""); 232 SYSCTL_INT(_vfs_hammer, OID_AUTO, autoflush, CTLFLAG_RW, 233 &hammer_autoflush, 0, ""); 234 SYSCTL_INT(_vfs_hammer, OID_AUTO, verify_zone, CTLFLAG_RW, 235 &hammer_verify_zone, 0, ""); 236 SYSCTL_INT(_vfs_hammer, OID_AUTO, verify_data, CTLFLAG_RW, 237 &hammer_verify_data, 0, ""); 238 SYSCTL_INT(_vfs_hammer, OID_AUTO, write_mode, CTLFLAG_RW, 239 &hammer_write_mode, 0, ""); 240 SYSCTL_INT(_vfs_hammer, OID_AUTO, yield_check, CTLFLAG_RW, 241 &hammer_yield_check, 0, ""); 242 SYSCTL_INT(_vfs_hammer, OID_AUTO, fsync_mode, CTLFLAG_RW, 243 &hammer_fsync_mode, 0, ""); 244 245 KTR_INFO_MASTER(hammer); 246 247 /* 248 * VFS ABI 249 */ 250 static void hammer_free_hmp(struct mount *mp); 251 252 static int hammer_vfs_mount(struct mount *mp, char *path, caddr_t data, 253 struct ucred *cred); 254 static int hammer_vfs_unmount(struct mount *mp, int mntflags); 255 static int hammer_vfs_root(struct mount *mp, struct vnode **vpp); 256 static int hammer_vfs_statfs(struct mount *mp, struct statfs *sbp, 257 struct ucred *cred); 258 static int hammer_vfs_statvfs(struct mount *mp, struct statvfs *sbp, 259 struct ucred *cred); 260 static int hammer_vfs_sync(struct mount *mp, int waitfor); 261 static int hammer_vfs_vget(struct mount *mp, struct vnode *dvp, 262 ino_t ino, struct vnode **vpp); 263 static int hammer_vfs_init(struct vfsconf *conf); 264 static int hammer_vfs_fhtovp(struct mount *mp, struct vnode *rootvp, 265 struct fid *fhp, struct vnode **vpp); 266 static int hammer_vfs_vptofh(struct vnode *vp, struct fid *fhp); 267 static int hammer_vfs_checkexp(struct mount *mp, struct sockaddr *nam, 268 int *exflagsp, struct ucred **credanonp); 269 270 271 static struct vfsops hammer_vfsops = { 272 .vfs_mount = hammer_vfs_mount, 273 .vfs_unmount = hammer_vfs_unmount, 274 .vfs_root = hammer_vfs_root, 275 .vfs_statfs = hammer_vfs_statfs, 276 .vfs_statvfs = hammer_vfs_statvfs, 277 .vfs_sync = hammer_vfs_sync, 278 .vfs_vget = hammer_vfs_vget, 279 .vfs_init = hammer_vfs_init, 280 .vfs_vptofh = hammer_vfs_vptofh, 281 .vfs_fhtovp = hammer_vfs_fhtovp, 282 .vfs_checkexp = hammer_vfs_checkexp 283 }; 284 285 MALLOC_DEFINE(M_HAMMER, "HAMMER-mount", ""); 286 287 VFS_SET(hammer_vfsops, hammer, 0); 288 MODULE_VERSION(hammer, 1); 289 290 static int 291 hammer_vfs_init(struct vfsconf *conf) 292 { 293 int n; 294 295 if (hammer_limit_recs == 0) { 296 hammer_limit_recs = nbuf * 25; 297 n = kmalloc_limit(M_HAMMER) / 512; 298 if (hammer_limit_recs > n) 299 hammer_limit_recs = n; 300 } 301 if (hammer_limit_dirtybufspace == 0) { 302 hammer_limit_dirtybufspace = hidirtybufspace / 2; 303 if (hammer_limit_dirtybufspace < 100) 304 hammer_limit_dirtybufspace = 100; 305 } 306 return(0); 307 } 308 309 static int 310 hammer_vfs_mount(struct mount *mp, char *mntpt, caddr_t data, 311 struct ucred *cred) 312 { 313 struct hammer_mount_info info; 314 hammer_mount_t hmp; 315 hammer_volume_t rootvol; 316 struct vnode *rootvp; 317 struct vnode *devvp = NULL; 318 const char *upath; /* volume name in userspace */ 319 char *path; /* volume name in system space */ 320 int error; 321 int i; 322 int master_id; 323 char *next_volume_ptr = NULL; 324 325 /* 326 * Accept hammer_mount_info. mntpt is NULL for root mounts at boot. 327 */ 328 if (mntpt == NULL) { 329 bzero(&info, sizeof(info)); 330 info.asof = 0; 331 info.hflags = 0; 332 info.nvolumes = 1; 333 334 next_volume_ptr = mp->mnt_stat.f_mntfromname; 335 336 /* Count number of volumes separated by ':' */ 337 for (char *p = next_volume_ptr; *p != '\0'; ++p) { 338 if (*p == ':') { 339 ++info.nvolumes; 340 } 341 } 342 343 mp->mnt_flag &= ~MNT_RDONLY; /* mount R/W */ 344 } else { 345 if ((error = copyin(data, &info, sizeof(info))) != 0) 346 return (error); 347 } 348 349 /* 350 * updating or new mount 351 */ 352 if (mp->mnt_flag & MNT_UPDATE) { 353 hmp = (void *)mp->mnt_data; 354 KKASSERT(hmp != NULL); 355 } else { 356 if (info.nvolumes <= 0 || info.nvolumes >= 32768) 357 return (EINVAL); 358 hmp = NULL; 359 } 360 361 /* 362 * master-id validation. The master id may not be changed by a 363 * mount update. 364 */ 365 if (info.hflags & HMNT_MASTERID) { 366 if (hmp && hmp->master_id != info.master_id) { 367 kprintf("hammer: cannot change master id " 368 "with mount update\n"); 369 return(EINVAL); 370 } 371 master_id = info.master_id; 372 if (master_id < -1 || master_id >= HAMMER_MAX_MASTERS) 373 return (EINVAL); 374 } else { 375 if (hmp) 376 master_id = hmp->master_id; 377 else 378 master_id = 0; 379 } 380 381 /* 382 * Interal mount data structure 383 */ 384 if (hmp == NULL) { 385 hmp = kmalloc(sizeof(*hmp), M_HAMMER, M_WAITOK | M_ZERO); 386 mp->mnt_data = (qaddr_t)hmp; 387 hmp->mp = mp; 388 /*TAILQ_INIT(&hmp->recycle_list);*/ 389 390 /* 391 * Make sure kmalloc type limits are set appropriately. 392 * 393 * Our inode kmalloc group is sized based on maxvnodes 394 * (controlled by the system, not us). 395 */ 396 kmalloc_create(&hmp->m_misc, "HAMMER-others"); 397 kmalloc_create(&hmp->m_inodes, "HAMMER-inodes"); 398 399 kmalloc_raise_limit(hmp->m_inodes, 0); /* unlimited */ 400 401 hmp->root_btree_beg.localization = 0x00000000U; 402 hmp->root_btree_beg.obj_id = -0x8000000000000000LL; 403 hmp->root_btree_beg.key = -0x8000000000000000LL; 404 hmp->root_btree_beg.create_tid = 1; 405 hmp->root_btree_beg.delete_tid = 1; 406 hmp->root_btree_beg.rec_type = 0; 407 hmp->root_btree_beg.obj_type = 0; 408 409 hmp->root_btree_end.localization = 0xFFFFFFFFU; 410 hmp->root_btree_end.obj_id = 0x7FFFFFFFFFFFFFFFLL; 411 hmp->root_btree_end.key = 0x7FFFFFFFFFFFFFFFLL; 412 hmp->root_btree_end.create_tid = 0xFFFFFFFFFFFFFFFFULL; 413 hmp->root_btree_end.delete_tid = 0; /* special case */ 414 hmp->root_btree_end.rec_type = 0xFFFFU; 415 hmp->root_btree_end.obj_type = 0; 416 417 hmp->krate.freq = 1; /* maximum reporting rate (hz) */ 418 hmp->krate.count = -16; /* initial burst */ 419 420 hmp->sync_lock.refs = 1; 421 hmp->free_lock.refs = 1; 422 hmp->undo_lock.refs = 1; 423 hmp->blkmap_lock.refs = 1; 424 hmp->snapshot_lock.refs = 1; 425 hmp->volume_lock.refs = 1; 426 427 TAILQ_INIT(&hmp->delay_list); 428 TAILQ_INIT(&hmp->flush_group_list); 429 TAILQ_INIT(&hmp->objid_cache_list); 430 TAILQ_INIT(&hmp->undo_lru_list); 431 TAILQ_INIT(&hmp->reclaim_list); 432 } 433 hmp->hflags &= ~HMNT_USERFLAGS; 434 hmp->hflags |= info.hflags & HMNT_USERFLAGS; 435 436 hmp->master_id = master_id; 437 438 if (info.asof) { 439 mp->mnt_flag |= MNT_RDONLY; 440 hmp->asof = info.asof; 441 } else { 442 hmp->asof = HAMMER_MAX_TID; 443 } 444 445 hmp->volume_to_remove = -1; 446 447 /* 448 * Re-open read-write if originally read-only, or vise-versa. 449 * 450 * When going from read-only to read-write execute the stage2 451 * recovery if it has not already been run. 452 */ 453 if (mp->mnt_flag & MNT_UPDATE) { 454 error = 0; 455 if (hmp->ronly && (mp->mnt_kern_flag & MNTK_WANTRDWR)) { 456 kprintf("HAMMER read-only -> read-write\n"); 457 hmp->ronly = 0; 458 RB_SCAN(hammer_vol_rb_tree, &hmp->rb_vols_root, NULL, 459 hammer_adjust_volume_mode, NULL); 460 rootvol = hammer_get_root_volume(hmp, &error); 461 if (rootvol) { 462 hammer_recover_flush_buffers(hmp, rootvol, 1); 463 error = hammer_recover_stage2(hmp, rootvol); 464 bcopy(rootvol->ondisk->vol0_blockmap, 465 hmp->blockmap, 466 sizeof(hmp->blockmap)); 467 hammer_rel_volume(rootvol, 0); 468 } 469 RB_SCAN(hammer_ino_rb_tree, &hmp->rb_inos_root, NULL, 470 hammer_reload_inode, NULL); 471 /* kernel clears MNT_RDONLY */ 472 } else if (hmp->ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) { 473 kprintf("HAMMER read-write -> read-only\n"); 474 hmp->ronly = 1; /* messy */ 475 RB_SCAN(hammer_ino_rb_tree, &hmp->rb_inos_root, NULL, 476 hammer_reload_inode, NULL); 477 hmp->ronly = 0; 478 hammer_flusher_sync(hmp); 479 hammer_flusher_sync(hmp); 480 hammer_flusher_sync(hmp); 481 hmp->ronly = 1; 482 RB_SCAN(hammer_vol_rb_tree, &hmp->rb_vols_root, NULL, 483 hammer_adjust_volume_mode, NULL); 484 } 485 return(error); 486 } 487 488 RB_INIT(&hmp->rb_vols_root); 489 RB_INIT(&hmp->rb_inos_root); 490 RB_INIT(&hmp->rb_redo_root); 491 RB_INIT(&hmp->rb_nods_root); 492 RB_INIT(&hmp->rb_undo_root); 493 RB_INIT(&hmp->rb_resv_root); 494 RB_INIT(&hmp->rb_bufs_root); 495 RB_INIT(&hmp->rb_pfsm_root); 496 497 hmp->ronly = ((mp->mnt_flag & MNT_RDONLY) != 0); 498 499 TAILQ_INIT(&hmp->volu_list); 500 TAILQ_INIT(&hmp->undo_list); 501 TAILQ_INIT(&hmp->data_list); 502 TAILQ_INIT(&hmp->meta_list); 503 TAILQ_INIT(&hmp->lose_list); 504 TAILQ_INIT(&hmp->iorun_list); 505 506 /* 507 * Load volumes 508 */ 509 path = objcache_get(namei_oc, M_WAITOK); 510 hmp->nvolumes = -1; 511 for (i = 0; i < info.nvolumes; ++i) { 512 if (mntpt == NULL) { 513 /* 514 * Root mount. 515 */ 516 KKASSERT(next_volume_ptr != NULL); 517 strcpy(path, ""); 518 if (*next_volume_ptr != '/') { 519 /* relative path */ 520 strcpy(path, "/dev/"); 521 } 522 int k; 523 for (k = strlen(path); k < MAXPATHLEN-1; ++k) { 524 if (*next_volume_ptr == '\0') { 525 break; 526 } else if (*next_volume_ptr == ':') { 527 ++next_volume_ptr; 528 break; 529 } else { 530 path[k] = *next_volume_ptr; 531 ++next_volume_ptr; 532 } 533 } 534 path[k] = '\0'; 535 536 error = 0; 537 cdev_t dev = kgetdiskbyname(path); 538 error = bdevvp(dev, &devvp); 539 if (error) { 540 kprintf("hammer_mountroot: can't find devvp\n"); 541 } 542 } else { 543 error = copyin(&info.volumes[i], &upath, 544 sizeof(char *)); 545 if (error == 0) 546 error = copyinstr(upath, path, 547 MAXPATHLEN, NULL); 548 } 549 if (error == 0) 550 error = hammer_install_volume(hmp, path, devvp); 551 if (error) 552 break; 553 } 554 objcache_put(namei_oc, path); 555 556 /* 557 * Make sure we found a root volume 558 */ 559 if (error == 0 && hmp->rootvol == NULL) { 560 kprintf("hammer_mount: No root volume found!\n"); 561 error = EINVAL; 562 } 563 564 /* 565 * Check that all required volumes are available 566 */ 567 if (error == 0 && hammer_mountcheck_volumes(hmp)) { 568 kprintf("hammer_mount: Missing volumes, cannot mount!\n"); 569 error = EINVAL; 570 } 571 572 if (error) { 573 hammer_free_hmp(mp); 574 return (error); 575 } 576 577 /* 578 * No errors, setup enough of the mount point so we can lookup the 579 * root vnode. 580 */ 581 mp->mnt_iosize_max = MAXPHYS; 582 mp->mnt_kern_flag |= MNTK_FSMID; 583 584 /* 585 * MPSAFE code. Note that VOPs and VFSops which are not MPSAFE 586 * will acquire a per-mount token prior to entry and release it 587 * on return, so even if we do not specify it we no longer get 588 * the BGL regardlless of how we are flagged. 589 */ 590 mp->mnt_kern_flag |= MNTK_RD_MPSAFE | MNTK_GA_MPSAFE | 591 MNTK_IN_MPSAFE; 592 593 /* 594 * note: f_iosize is used by vnode_pager_haspage() when constructing 595 * its VOP_BMAP call. 596 */ 597 mp->mnt_stat.f_iosize = HAMMER_BUFSIZE; 598 mp->mnt_stat.f_bsize = HAMMER_BUFSIZE; 599 600 mp->mnt_vstat.f_frsize = HAMMER_BUFSIZE; 601 mp->mnt_vstat.f_bsize = HAMMER_BUFSIZE; 602 603 mp->mnt_maxsymlinklen = 255; 604 mp->mnt_flag |= MNT_LOCAL; 605 606 vfs_add_vnodeops(mp, &hammer_vnode_vops, &mp->mnt_vn_norm_ops); 607 vfs_add_vnodeops(mp, &hammer_spec_vops, &mp->mnt_vn_spec_ops); 608 vfs_add_vnodeops(mp, &hammer_fifo_vops, &mp->mnt_vn_fifo_ops); 609 610 /* 611 * The root volume's ondisk pointer is only valid if we hold a 612 * reference to it. 613 */ 614 rootvol = hammer_get_root_volume(hmp, &error); 615 if (error) 616 goto failed; 617 618 /* 619 * Perform any necessary UNDO operations. The recovery code does 620 * call hammer_undo_lookup() so we have to pre-cache the blockmap, 621 * and then re-copy it again after recovery is complete. 622 * 623 * If this is a read-only mount the UNDO information is retained 624 * in memory in the form of dirty buffer cache buffers, and not 625 * written back to the media. 626 */ 627 bcopy(rootvol->ondisk->vol0_blockmap, hmp->blockmap, 628 sizeof(hmp->blockmap)); 629 630 /* 631 * Check filesystem version 632 */ 633 hmp->version = rootvol->ondisk->vol_version; 634 if (hmp->version < HAMMER_VOL_VERSION_MIN || 635 hmp->version > HAMMER_VOL_VERSION_MAX) { 636 kprintf("HAMMER: mount unsupported fs version %d\n", 637 hmp->version); 638 error = ERANGE; 639 goto done; 640 } 641 642 /* 643 * The undo_rec_limit limits the size of flush groups to avoid 644 * blowing out the UNDO FIFO. This calculation is typically in 645 * the tens of thousands and is designed primarily when small 646 * HAMMER filesystems are created. 647 */ 648 hmp->undo_rec_limit = hammer_undo_max(hmp) / 8192 + 100; 649 if (hammer_debug_general & 0x0001) 650 kprintf("HAMMER: undo_rec_limit %d\n", hmp->undo_rec_limit); 651 652 /* 653 * NOTE: Recover stage1 not only handles meta-data recovery, it 654 * also sets hmp->undo_seqno for HAMMER VERSION 4+ filesystems. 655 */ 656 error = hammer_recover_stage1(hmp, rootvol); 657 if (error) { 658 kprintf("Failed to recover HAMMER filesystem on mount\n"); 659 goto done; 660 } 661 662 /* 663 * Finish setup now that we have a good root volume. 664 * 665 * The top 16 bits of fsid.val[1] is a pfs id. 666 */ 667 ksnprintf(mp->mnt_stat.f_mntfromname, 668 sizeof(mp->mnt_stat.f_mntfromname), "%s", 669 rootvol->ondisk->vol_name); 670 mp->mnt_stat.f_fsid.val[0] = 671 crc32((char *)&rootvol->ondisk->vol_fsid + 0, 8); 672 mp->mnt_stat.f_fsid.val[1] = 673 crc32((char *)&rootvol->ondisk->vol_fsid + 8, 8); 674 mp->mnt_stat.f_fsid.val[1] &= 0x0000FFFF; 675 676 mp->mnt_vstat.f_fsid_uuid = rootvol->ondisk->vol_fsid; 677 mp->mnt_vstat.f_fsid = crc32(&mp->mnt_vstat.f_fsid_uuid, 678 sizeof(mp->mnt_vstat.f_fsid_uuid)); 679 680 /* 681 * Certain often-modified fields in the root volume are cached in 682 * the hammer_mount structure so we do not have to generate lots 683 * of little UNDO structures for them. 684 * 685 * Recopy after recovery. This also has the side effect of 686 * setting our cached undo FIFO's first_offset, which serves to 687 * placemark the FIFO start for the NEXT flush cycle while the 688 * on-disk first_offset represents the LAST flush cycle. 689 */ 690 hmp->next_tid = rootvol->ondisk->vol0_next_tid; 691 hmp->flush_tid1 = hmp->next_tid; 692 hmp->flush_tid2 = hmp->next_tid; 693 bcopy(rootvol->ondisk->vol0_blockmap, hmp->blockmap, 694 sizeof(hmp->blockmap)); 695 hmp->copy_stat_freebigblocks = rootvol->ondisk->vol0_stat_freebigblocks; 696 697 hammer_flusher_create(hmp); 698 699 /* 700 * Locate the root directory using the root cluster's B-Tree as a 701 * starting point. The root directory uses an obj_id of 1. 702 * 703 * FUTURE: Leave the root directory cached referenced but unlocked 704 * in hmp->rootvp (need to flush it on unmount). 705 */ 706 error = hammer_vfs_vget(mp, NULL, 1, &rootvp); 707 if (error) 708 goto done; 709 vput(rootvp); 710 /*vn_unlock(hmp->rootvp);*/ 711 if (hmp->ronly == 0) 712 error = hammer_recover_stage2(hmp, rootvol); 713 714 done: 715 hammer_rel_volume(rootvol, 0); 716 failed: 717 /* 718 * Cleanup and return. 719 */ 720 if (error) 721 hammer_free_hmp(mp); 722 return (error); 723 } 724 725 static int 726 hammer_vfs_unmount(struct mount *mp, int mntflags) 727 { 728 #if 0 729 struct hammer_mount *hmp = (void *)mp->mnt_data; 730 #endif 731 int flags; 732 int error; 733 734 /* 735 * Clean out the vnodes 736 */ 737 flags = 0; 738 if (mntflags & MNT_FORCE) 739 flags |= FORCECLOSE; 740 if ((error = vflush(mp, 0, flags)) != 0) 741 return (error); 742 743 /* 744 * Clean up the internal mount structure and related entities. This 745 * may issue I/O. 746 */ 747 hammer_free_hmp(mp); 748 return(0); 749 } 750 751 /* 752 * Clean up the internal mount structure and disassociate it from the mount. 753 * This may issue I/O. 754 */ 755 static void 756 hammer_free_hmp(struct mount *mp) 757 { 758 struct hammer_mount *hmp = (void *)mp->mnt_data; 759 hammer_flush_group_t flg; 760 int count; 761 int dummy; 762 763 /* 764 * Flush anything dirty. This won't even run if the 765 * filesystem errored-out. 766 */ 767 count = 0; 768 while (hammer_flusher_haswork(hmp)) { 769 hammer_flusher_sync(hmp); 770 ++count; 771 if (count >= 5) { 772 if (count == 5) 773 kprintf("HAMMER: umount flushing."); 774 else 775 kprintf("."); 776 tsleep(&dummy, 0, "hmrufl", hz); 777 } 778 if (count == 30) { 779 kprintf("giving up\n"); 780 break; 781 } 782 } 783 if (count >= 5 && count < 30) 784 kprintf("\n"); 785 786 /* 787 * If the mount had a critical error we have to destroy any 788 * remaining inodes before we can finish cleaning up the flusher. 789 */ 790 if (hmp->flags & HAMMER_MOUNT_CRITICAL_ERROR) { 791 RB_SCAN(hammer_ino_rb_tree, &hmp->rb_inos_root, NULL, 792 hammer_destroy_inode_callback, NULL); 793 } 794 795 /* 796 * There shouldn't be any inodes left now and any left over 797 * flush groups should now be empty. 798 */ 799 KKASSERT(RB_EMPTY(&hmp->rb_inos_root)); 800 while ((flg = TAILQ_FIRST(&hmp->flush_group_list)) != NULL) { 801 TAILQ_REMOVE(&hmp->flush_group_list, flg, flush_entry); 802 KKASSERT(RB_EMPTY(&flg->flush_tree)); 803 if (flg->refs) { 804 kprintf("HAMMER: Warning, flush_group %p was " 805 "not empty on umount!\n", flg); 806 } 807 kfree(flg, hmp->m_misc); 808 } 809 810 /* 811 * We can finally destroy the flusher 812 */ 813 hammer_flusher_destroy(hmp); 814 815 /* 816 * We may have held recovered buffers due to a read-only mount. 817 * These must be discarded. 818 */ 819 if (hmp->ronly) 820 hammer_recover_flush_buffers(hmp, NULL, -1); 821 822 /* 823 * Unload buffers and then volumes 824 */ 825 RB_SCAN(hammer_buf_rb_tree, &hmp->rb_bufs_root, NULL, 826 hammer_unload_buffer, NULL); 827 RB_SCAN(hammer_vol_rb_tree, &hmp->rb_vols_root, NULL, 828 hammer_unload_volume, NULL); 829 830 mp->mnt_data = NULL; 831 mp->mnt_flag &= ~MNT_LOCAL; 832 hmp->mp = NULL; 833 hammer_destroy_objid_cache(hmp); 834 kmalloc_destroy(&hmp->m_misc); 835 kmalloc_destroy(&hmp->m_inodes); 836 kfree(hmp, M_HAMMER); 837 } 838 839 /* 840 * Report critical errors. ip may be NULL. 841 */ 842 void 843 hammer_critical_error(hammer_mount_t hmp, hammer_inode_t ip, 844 int error, const char *msg) 845 { 846 hmp->flags |= HAMMER_MOUNT_CRITICAL_ERROR; 847 848 krateprintf(&hmp->krate, 849 "HAMMER(%s): Critical error inode=%jd error=%d %s\n", 850 hmp->mp->mnt_stat.f_mntfromname, 851 (intmax_t)(ip ? ip->obj_id : -1), 852 error, msg); 853 854 if (hmp->ronly == 0) { 855 hmp->ronly = 2; /* special errored read-only mode */ 856 hmp->mp->mnt_flag |= MNT_RDONLY; 857 kprintf("HAMMER(%s): Forcing read-only mode\n", 858 hmp->mp->mnt_stat.f_mntfromname); 859 } 860 hmp->error = error; 861 if (hammer_debug_critical) 862 Debugger("Entering debugger"); 863 } 864 865 866 /* 867 * Obtain a vnode for the specified inode number. An exclusively locked 868 * vnode is returned. 869 */ 870 int 871 hammer_vfs_vget(struct mount *mp, struct vnode *dvp, 872 ino_t ino, struct vnode **vpp) 873 { 874 struct hammer_transaction trans; 875 struct hammer_mount *hmp = (void *)mp->mnt_data; 876 struct hammer_inode *ip; 877 int error; 878 u_int32_t localization; 879 880 hammer_simple_transaction(&trans, hmp); 881 882 /* 883 * If a directory vnode is supplied (mainly NFS) then we can acquire 884 * the PFS domain from it. Otherwise we would only be able to vget 885 * inodes in the root PFS. 886 */ 887 if (dvp) { 888 localization = HAMMER_DEF_LOCALIZATION + 889 VTOI(dvp)->obj_localization; 890 } else { 891 localization = HAMMER_DEF_LOCALIZATION; 892 } 893 894 /* 895 * Lookup the requested HAMMER inode. The structure must be 896 * left unlocked while we manipulate the related vnode to avoid 897 * a deadlock. 898 */ 899 ip = hammer_get_inode(&trans, NULL, ino, 900 hmp->asof, localization, 901 0, &error); 902 if (ip == NULL) { 903 *vpp = NULL; 904 hammer_done_transaction(&trans); 905 return(error); 906 } 907 error = hammer_get_vnode(ip, vpp); 908 hammer_rel_inode(ip, 0); 909 hammer_done_transaction(&trans); 910 return (error); 911 } 912 913 /* 914 * Return the root vnode for the filesystem. 915 * 916 * HAMMER stores the root vnode in the hammer_mount structure so 917 * getting it is easy. 918 */ 919 static int 920 hammer_vfs_root(struct mount *mp, struct vnode **vpp) 921 { 922 #if 0 923 struct hammer_mount *hmp = (void *)mp->mnt_data; 924 #endif 925 int error; 926 927 error = hammer_vfs_vget(mp, NULL, 1, vpp); 928 return (error); 929 } 930 931 static int 932 hammer_vfs_statfs(struct mount *mp, struct statfs *sbp, struct ucred *cred) 933 { 934 struct hammer_mount *hmp = (void *)mp->mnt_data; 935 hammer_volume_t volume; 936 hammer_volume_ondisk_t ondisk; 937 int error; 938 int64_t bfree; 939 int64_t breserved; 940 941 volume = hammer_get_root_volume(hmp, &error); 942 if (error) 943 return(error); 944 ondisk = volume->ondisk; 945 946 /* 947 * Basic stats 948 */ 949 _hammer_checkspace(hmp, HAMMER_CHKSPC_WRITE, &breserved); 950 mp->mnt_stat.f_files = ondisk->vol0_stat_inodes; 951 bfree = ondisk->vol0_stat_freebigblocks * HAMMER_LARGEBLOCK_SIZE; 952 hammer_rel_volume(volume, 0); 953 954 mp->mnt_stat.f_bfree = (bfree - breserved) / HAMMER_BUFSIZE; 955 mp->mnt_stat.f_bavail = mp->mnt_stat.f_bfree; 956 if (mp->mnt_stat.f_files < 0) 957 mp->mnt_stat.f_files = 0; 958 959 *sbp = mp->mnt_stat; 960 return(0); 961 } 962 963 static int 964 hammer_vfs_statvfs(struct mount *mp, struct statvfs *sbp, struct ucred *cred) 965 { 966 struct hammer_mount *hmp = (void *)mp->mnt_data; 967 hammer_volume_t volume; 968 hammer_volume_ondisk_t ondisk; 969 int error; 970 int64_t bfree; 971 int64_t breserved; 972 973 volume = hammer_get_root_volume(hmp, &error); 974 if (error) 975 return(error); 976 ondisk = volume->ondisk; 977 978 /* 979 * Basic stats 980 */ 981 _hammer_checkspace(hmp, HAMMER_CHKSPC_WRITE, &breserved); 982 mp->mnt_vstat.f_files = ondisk->vol0_stat_inodes; 983 bfree = ondisk->vol0_stat_freebigblocks * HAMMER_LARGEBLOCK_SIZE; 984 hammer_rel_volume(volume, 0); 985 986 mp->mnt_vstat.f_bfree = (bfree - breserved) / HAMMER_BUFSIZE; 987 mp->mnt_vstat.f_bavail = mp->mnt_vstat.f_bfree; 988 if (mp->mnt_vstat.f_files < 0) 989 mp->mnt_vstat.f_files = 0; 990 *sbp = mp->mnt_vstat; 991 return(0); 992 } 993 994 /* 995 * Sync the filesystem. Currently we have to run it twice, the second 996 * one will advance the undo start index to the end index, so if a crash 997 * occurs no undos will be run on mount. 998 * 999 * We do not sync the filesystem if we are called from a panic. If we did 1000 * we might end up blowing up a sync that was already in progress. 1001 */ 1002 static int 1003 hammer_vfs_sync(struct mount *mp, int waitfor) 1004 { 1005 struct hammer_mount *hmp = (void *)mp->mnt_data; 1006 int error; 1007 1008 if (panicstr == NULL) { 1009 error = hammer_sync_hmp(hmp, waitfor); 1010 } else { 1011 error = EIO; 1012 } 1013 return (error); 1014 } 1015 1016 /* 1017 * Convert a vnode to a file handle. 1018 */ 1019 static int 1020 hammer_vfs_vptofh(struct vnode *vp, struct fid *fhp) 1021 { 1022 hammer_inode_t ip; 1023 1024 KKASSERT(MAXFIDSZ >= 16); 1025 ip = VTOI(vp); 1026 fhp->fid_len = offsetof(struct fid, fid_data[16]); 1027 fhp->fid_ext = ip->obj_localization >> 16; 1028 bcopy(&ip->obj_id, fhp->fid_data + 0, sizeof(ip->obj_id)); 1029 bcopy(&ip->obj_asof, fhp->fid_data + 8, sizeof(ip->obj_asof)); 1030 return(0); 1031 } 1032 1033 1034 /* 1035 * Convert a file handle back to a vnode. 1036 * 1037 * Use rootvp to enforce PFS isolation when a PFS is exported via a 1038 * null mount. 1039 */ 1040 static int 1041 hammer_vfs_fhtovp(struct mount *mp, struct vnode *rootvp, 1042 struct fid *fhp, struct vnode **vpp) 1043 { 1044 struct hammer_transaction trans; 1045 struct hammer_inode *ip; 1046 struct hammer_inode_info info; 1047 int error; 1048 u_int32_t localization; 1049 1050 bcopy(fhp->fid_data + 0, &info.obj_id, sizeof(info.obj_id)); 1051 bcopy(fhp->fid_data + 8, &info.obj_asof, sizeof(info.obj_asof)); 1052 if (rootvp) 1053 localization = VTOI(rootvp)->obj_localization; 1054 else 1055 localization = (u_int32_t)fhp->fid_ext << 16; 1056 1057 hammer_simple_transaction(&trans, (void *)mp->mnt_data); 1058 1059 /* 1060 * Get/allocate the hammer_inode structure. The structure must be 1061 * unlocked while we manipulate the related vnode to avoid a 1062 * deadlock. 1063 */ 1064 ip = hammer_get_inode(&trans, NULL, info.obj_id, 1065 info.obj_asof, localization, 0, &error); 1066 if (ip) { 1067 error = hammer_get_vnode(ip, vpp); 1068 hammer_rel_inode(ip, 0); 1069 } else { 1070 *vpp = NULL; 1071 } 1072 hammer_done_transaction(&trans); 1073 return (error); 1074 } 1075 1076 static int 1077 hammer_vfs_checkexp(struct mount *mp, struct sockaddr *nam, 1078 int *exflagsp, struct ucred **credanonp) 1079 { 1080 hammer_mount_t hmp = (void *)mp->mnt_data; 1081 struct netcred *np; 1082 int error; 1083 1084 np = vfs_export_lookup(mp, &hmp->export, nam); 1085 if (np) { 1086 *exflagsp = np->netc_exflags; 1087 *credanonp = &np->netc_anon; 1088 error = 0; 1089 } else { 1090 error = EACCES; 1091 } 1092 return (error); 1093 1094 } 1095 1096 int 1097 hammer_vfs_export(struct mount *mp, int op, const struct export_args *export) 1098 { 1099 hammer_mount_t hmp = (void *)mp->mnt_data; 1100 int error; 1101 1102 switch(op) { 1103 case MOUNTCTL_SET_EXPORT: 1104 error = vfs_export(mp, &hmp->export, export); 1105 break; 1106 default: 1107 error = EOPNOTSUPP; 1108 break; 1109 } 1110 return(error); 1111 } 1112 1113