1 /*- 2 * Copyright (c) 2011, 2012 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/kernel.h> 37 #include <sys/nlookup.h> 38 #include <sys/vnode.h> 39 #include <sys/mount.h> 40 #include <sys/fcntl.h> 41 #include <sys/buf.h> 42 #include <sys/uuid.h> 43 #include <sys/vfsops.h> 44 #include <sys/sysctl.h> 45 #include <sys/socket.h> 46 47 #include "hammer2.h" 48 #include "hammer2_disk.h" 49 #include "hammer2_mount.h" 50 51 struct hammer2_sync_info { 52 int error; 53 int waitfor; 54 }; 55 56 TAILQ_HEAD(hammer2_mntlist, hammer2_mount); 57 static struct hammer2_mntlist hammer2_mntlist; 58 static struct lock hammer2_mntlk; 59 60 int hammer2_debug; 61 int hammer2_cluster_enable = 1; 62 int hammer2_hardlink_enable = 1; 63 long hammer2_iod_file_read; 64 long hammer2_iod_meta_read; 65 long hammer2_iod_indr_read; 66 long hammer2_iod_file_write; 67 long hammer2_iod_meta_write; 68 long hammer2_iod_indr_write; 69 long hammer2_iod_fmap_write; 70 long hammer2_iod_volu_write; 71 long hammer2_ioa_file_read; 72 long hammer2_ioa_meta_read; 73 long hammer2_ioa_indr_read; 74 long hammer2_ioa_fmap_write; 75 long hammer2_ioa_file_write; 76 long hammer2_ioa_meta_write; 77 long hammer2_ioa_indr_write; 78 long hammer2_ioa_volu_write; 79 80 SYSCTL_NODE(_vfs, OID_AUTO, hammer2, CTLFLAG_RW, 0, "HAMMER2 filesystem"); 81 82 SYSCTL_INT(_vfs_hammer2, OID_AUTO, debug, CTLFLAG_RW, 83 &hammer2_debug, 0, ""); 84 SYSCTL_INT(_vfs_hammer2, OID_AUTO, cluster_enable, CTLFLAG_RW, 85 &hammer2_cluster_enable, 0, ""); 86 SYSCTL_INT(_vfs_hammer2, OID_AUTO, hardlink_enable, CTLFLAG_RW, 87 &hammer2_hardlink_enable, 0, ""); 88 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_file_read, CTLFLAG_RW, 89 &hammer2_iod_file_read, 0, ""); 90 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_meta_read, CTLFLAG_RW, 91 &hammer2_iod_meta_read, 0, ""); 92 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_indr_read, CTLFLAG_RW, 93 &hammer2_iod_indr_read, 0, ""); 94 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_file_write, CTLFLAG_RW, 95 &hammer2_iod_file_write, 0, ""); 96 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_meta_write, CTLFLAG_RW, 97 &hammer2_iod_meta_write, 0, ""); 98 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_indr_write, CTLFLAG_RW, 99 &hammer2_iod_indr_write, 0, ""); 100 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_volu_write, CTLFLAG_RW, 101 &hammer2_iod_volu_write, 0, ""); 102 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_file_read, CTLFLAG_RW, 103 &hammer2_ioa_file_read, 0, ""); 104 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_meta_read, CTLFLAG_RW, 105 &hammer2_ioa_meta_read, 0, ""); 106 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_indr_read, CTLFLAG_RW, 107 &hammer2_ioa_indr_read, 0, ""); 108 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_file_write, CTLFLAG_RW, 109 &hammer2_ioa_file_write, 0, ""); 110 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_meta_write, CTLFLAG_RW, 111 &hammer2_ioa_meta_write, 0, ""); 112 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_indr_write, CTLFLAG_RW, 113 &hammer2_ioa_indr_write, 0, ""); 114 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_volu_write, CTLFLAG_RW, 115 &hammer2_ioa_volu_write, 0, ""); 116 117 static int hammer2_vfs_init(struct vfsconf *conf); 118 static int hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data, 119 struct ucred *cred); 120 static int hammer2_remount(struct mount *, char *, struct vnode *, 121 struct ucred *); 122 static int hammer2_vfs_unmount(struct mount *mp, int mntflags); 123 static int hammer2_vfs_root(struct mount *mp, struct vnode **vpp); 124 static int hammer2_vfs_statfs(struct mount *mp, struct statfs *sbp, 125 struct ucred *cred); 126 static int hammer2_vfs_statvfs(struct mount *mp, struct statvfs *sbp, 127 struct ucred *cred); 128 static int hammer2_vfs_sync(struct mount *mp, int waitfor); 129 static int hammer2_vfs_vget(struct mount *mp, struct vnode *dvp, 130 ino_t ino, struct vnode **vpp); 131 static int hammer2_vfs_fhtovp(struct mount *mp, struct vnode *rootvp, 132 struct fid *fhp, struct vnode **vpp); 133 static int hammer2_vfs_vptofh(struct vnode *vp, struct fid *fhp); 134 static int hammer2_vfs_checkexp(struct mount *mp, struct sockaddr *nam, 135 int *exflagsp, struct ucred **credanonp); 136 137 static int hammer2_install_volume_header(hammer2_mount_t *hmp); 138 static int hammer2_sync_scan1(struct mount *mp, struct vnode *vp, void *data); 139 static int hammer2_sync_scan2(struct mount *mp, struct vnode *vp, void *data); 140 141 static int hammer2_rcvdmsg(kdmsg_msg_t *msg); 142 static void hammer2_autodmsg(kdmsg_msg_t *msg); 143 144 /* 145 * HAMMER2 vfs operations. 146 */ 147 static struct vfsops hammer2_vfsops = { 148 .vfs_init = hammer2_vfs_init, 149 .vfs_sync = hammer2_vfs_sync, 150 .vfs_mount = hammer2_vfs_mount, 151 .vfs_unmount = hammer2_vfs_unmount, 152 .vfs_root = hammer2_vfs_root, 153 .vfs_statfs = hammer2_vfs_statfs, 154 .vfs_statvfs = hammer2_vfs_statvfs, 155 .vfs_vget = hammer2_vfs_vget, 156 .vfs_vptofh = hammer2_vfs_vptofh, 157 .vfs_fhtovp = hammer2_vfs_fhtovp, 158 .vfs_checkexp = hammer2_vfs_checkexp 159 }; 160 161 MALLOC_DEFINE(M_HAMMER2, "HAMMER2-mount", ""); 162 163 VFS_SET(hammer2_vfsops, hammer2, 0); 164 MODULE_VERSION(hammer2, 1); 165 166 static 167 int 168 hammer2_vfs_init(struct vfsconf *conf) 169 { 170 int error; 171 172 error = 0; 173 174 if (HAMMER2_BLOCKREF_BYTES != sizeof(struct hammer2_blockref)) 175 error = EINVAL; 176 if (HAMMER2_INODE_BYTES != sizeof(struct hammer2_inode_data)) 177 error = EINVAL; 178 if (HAMMER2_VOLUME_BYTES != sizeof(struct hammer2_volume_data)) 179 error = EINVAL; 180 181 if (error) 182 kprintf("HAMMER2 structure size mismatch; cannot continue.\n"); 183 184 lockinit(&hammer2_mntlk, "mntlk", 0, 0); 185 TAILQ_INIT(&hammer2_mntlist); 186 187 return (error); 188 } 189 190 /* 191 * Mount or remount HAMMER2 fileystem from physical media 192 * 193 * mountroot 194 * mp mount point structure 195 * path NULL 196 * data <unused> 197 * cred <unused> 198 * 199 * mount 200 * mp mount point structure 201 * path path to mount point 202 * data pointer to argument structure in user space 203 * volume volume path (device@LABEL form) 204 * hflags user mount flags 205 * cred user credentials 206 * 207 * RETURNS: 0 Success 208 * !0 error number 209 */ 210 static 211 int 212 hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data, 213 struct ucred *cred) 214 { 215 struct hammer2_mount_info info; 216 hammer2_pfsmount_t *pmp; 217 hammer2_mount_t *hmp; 218 hammer2_key_t lhc; 219 struct vnode *devvp; 220 struct nlookupdata nd; 221 hammer2_chain_t *parent; 222 hammer2_chain_t *schain; 223 hammer2_chain_t *rchain; 224 struct file *fp; 225 char devstr[MNAMELEN]; 226 size_t size; 227 size_t done; 228 char *dev; 229 char *label; 230 int ronly = 1; 231 int create_hmp; 232 int error; 233 234 hmp = NULL; 235 pmp = NULL; 236 dev = NULL; 237 label = NULL; 238 devvp = NULL; 239 240 kprintf("hammer2_mount\n"); 241 242 if (path == NULL) { 243 /* 244 * Root mount 245 */ 246 bzero(&info, sizeof(info)); 247 info.cluster_fd = -1; 248 return (EOPNOTSUPP); 249 } else { 250 /* 251 * Non-root mount or updating a mount 252 */ 253 error = copyin(data, &info, sizeof(info)); 254 if (error) 255 return (error); 256 257 error = copyinstr(info.volume, devstr, MNAMELEN - 1, &done); 258 if (error) 259 return (error); 260 261 /* Extract device and label */ 262 dev = devstr; 263 label = strchr(devstr, '@'); 264 if (label == NULL || 265 ((label + 1) - dev) > done) { 266 return (EINVAL); 267 } 268 *label = '\0'; 269 label++; 270 if (*label == '\0') 271 return (EINVAL); 272 273 if (mp->mnt_flag & MNT_UPDATE) { 274 /* Update mount */ 275 /* HAMMER2 implements NFS export via mountctl */ 276 hmp = MPTOHMP(mp); 277 devvp = hmp->devvp; 278 error = hammer2_remount(mp, path, devvp, cred); 279 return error; 280 } 281 } 282 283 /* 284 * PFS mount 285 * 286 * Lookup name and verify it refers to a block device. 287 */ 288 error = nlookup_init(&nd, dev, UIO_SYSSPACE, NLC_FOLLOW); 289 if (error == 0) 290 error = nlookup(&nd); 291 if (error == 0) 292 error = cache_vref(&nd.nl_nch, nd.nl_cred, &devvp); 293 nlookup_done(&nd); 294 295 if (error == 0) { 296 if (vn_isdisk(devvp, &error)) 297 error = vfs_mountedon(devvp); 298 } 299 300 /* 301 * Determine if the device has already been mounted. After this 302 * check hmp will be non-NULL if we are doing the second or more 303 * hammer2 mounts from the same device. 304 */ 305 lockmgr(&hammer2_mntlk, LK_EXCLUSIVE); 306 TAILQ_FOREACH(hmp, &hammer2_mntlist, mntentry) { 307 if (hmp->devvp == devvp) 308 break; 309 } 310 311 /* 312 * Open the device if this isn't a secondary mount 313 */ 314 if (hmp) { 315 create_hmp = 0; 316 } else { 317 create_hmp = 1; 318 if (error == 0 && vcount(devvp) > 0) 319 error = EBUSY; 320 321 /* 322 * Now open the device 323 */ 324 if (error == 0) { 325 ronly = ((mp->mnt_flag & MNT_RDONLY) != 0); 326 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 327 error = vinvalbuf(devvp, V_SAVE, 0, 0); 328 if (error == 0) { 329 error = VOP_OPEN(devvp, 330 ronly ? FREAD : FREAD | FWRITE, 331 FSCRED, NULL); 332 } 333 vn_unlock(devvp); 334 } 335 if (error && devvp) { 336 vrele(devvp); 337 devvp = NULL; 338 } 339 if (error) { 340 lockmgr(&hammer2_mntlk, LK_RELEASE); 341 return error; 342 } 343 } 344 345 /* 346 * Block device opened successfully, finish initializing the 347 * mount structure. 348 * 349 * From this point on we have to call hammer2_unmount() on failure. 350 */ 351 pmp = kmalloc(sizeof(*pmp), M_HAMMER2, M_WAITOK | M_ZERO); 352 mp->mnt_data = (qaddr_t)pmp; 353 pmp->mp = mp; 354 355 kmalloc_create(&pmp->mmsg, "HAMMER2-pfsmsg"); 356 kdmsg_iocom_init(&pmp->iocom, pmp, 357 KDMSG_IOCOMF_AUTOCONN | 358 KDMSG_IOCOMF_AUTOSPAN | 359 KDMSG_IOCOMF_AUTOCIRC, 360 pmp->mmsg, hammer2_rcvdmsg); 361 362 if (create_hmp) { 363 hmp = kmalloc(sizeof(*hmp), M_HAMMER2, M_WAITOK | M_ZERO); 364 hmp->ronly = ronly; 365 hmp->devvp = devvp; 366 kmalloc_create(&hmp->minode, "HAMMER2-inodes"); 367 kmalloc_create(&hmp->mchain, "HAMMER2-chains"); 368 TAILQ_INSERT_TAIL(&hammer2_mntlist, hmp, mntentry); 369 } 370 ccms_domain_init(&pmp->ccms_dom); 371 pmp->hmp = hmp; 372 ++hmp->pmp_count; 373 lockmgr(&hammer2_mntlk, LK_RELEASE); 374 kprintf("hammer2_mount hmp=%p pmpcnt=%d\n", hmp, hmp->pmp_count); 375 376 mp->mnt_flag = MNT_LOCAL; 377 mp->mnt_kern_flag |= MNTK_ALL_MPSAFE; /* all entry pts are SMP */ 378 379 if (create_hmp) { 380 /* 381 * vchain setup. vchain.data is special cased to NULL. 382 * vchain.refs is initialized and will never drop to 0. 383 */ 384 hmp->vchain.refs = 1; 385 hmp->vchain.data = (void *)&hmp->voldata; 386 hmp->vchain.bref.type = HAMMER2_BREF_TYPE_VOLUME; 387 hmp->vchain.bref.data_off = 0 | HAMMER2_PBUFRADIX; 388 hmp->vchain.bref_flush = hmp->vchain.bref; 389 ccms_cst_init(&hmp->vchain.cst, NULL); 390 /* hmp->vchain.u.xxx is left NULL */ 391 lockinit(&hmp->alloclk, "h2alloc", 0, 0); 392 lockinit(&hmp->voldatalk, "voldata", 0, LK_CANRECURSE); 393 394 /* 395 * Install the volume header 396 */ 397 error = hammer2_install_volume_header(hmp); 398 if (error) { 399 hammer2_vfs_unmount(mp, MNT_FORCE); 400 return error; 401 } 402 } 403 404 /* 405 * required mount structure initializations 406 */ 407 mp->mnt_stat.f_iosize = HAMMER2_PBUFSIZE; 408 mp->mnt_stat.f_bsize = HAMMER2_PBUFSIZE; 409 410 mp->mnt_vstat.f_frsize = HAMMER2_PBUFSIZE; 411 mp->mnt_vstat.f_bsize = HAMMER2_PBUFSIZE; 412 413 /* 414 * Optional fields 415 */ 416 mp->mnt_iosize_max = MAXPHYS; 417 418 /* 419 * First locate the super-root inode, which is key 0 relative to the 420 * volume header's blockset. 421 * 422 * Then locate the root inode by scanning the directory keyspace 423 * represented by the label. 424 */ 425 if (create_hmp) { 426 parent = &hmp->vchain; 427 hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS); 428 schain = hammer2_chain_lookup(hmp, &parent, 429 HAMMER2_SROOT_KEY, HAMMER2_SROOT_KEY, 0); 430 hammer2_chain_unlock(hmp, parent); 431 if (schain == NULL) { 432 kprintf("hammer2_mount: invalid super-root\n"); 433 hammer2_vfs_unmount(mp, MNT_FORCE); 434 return EINVAL; 435 } 436 hammer2_chain_ref(hmp, schain); /* for hmp->schain */ 437 hmp->schain = schain; /* left locked */ 438 hmp->sroot = hammer2_inode_get(hmp, NULL, NULL, schain); 439 hammer2_inode_ref(hmp->sroot); /* for hmp->sroot */ 440 hammer2_inode_unlock_ex(hmp->sroot, NULL); 441 } else { 442 schain = hmp->schain; 443 hammer2_chain_lock(hmp, schain, HAMMER2_RESOLVE_ALWAYS); 444 } 445 446 /* 447 * schain left locked at this point, use as basis for PFS search. 448 */ 449 parent = schain; 450 lhc = hammer2_dirhash(label, strlen(label)); 451 rchain = hammer2_chain_lookup(hmp, &parent, 452 lhc, lhc + HAMMER2_DIRHASH_LOMASK, 453 0); 454 while (rchain) { 455 if (rchain->bref.type == HAMMER2_BREF_TYPE_INODE && 456 strcmp(label, rchain->data->ipdata.filename) == 0) { 457 break; 458 } 459 rchain = hammer2_chain_next(hmp, &parent, rchain, 460 lhc, lhc + HAMMER2_DIRHASH_LOMASK, 461 0); 462 } 463 hammer2_chain_unlock(hmp, parent); 464 if (rchain == NULL) { 465 kprintf("hammer2_mount: PFS label not found\n"); 466 hammer2_vfs_unmount(mp, MNT_FORCE); 467 return EINVAL; 468 } 469 if (rchain->flags & HAMMER2_CHAIN_MOUNTED) { 470 hammer2_chain_unlock(hmp, rchain); 471 kprintf("hammer2_mount: PFS label already mounted!\n"); 472 hammer2_vfs_unmount(mp, MNT_FORCE); 473 return EBUSY; 474 } 475 atomic_set_int(&rchain->flags, HAMMER2_CHAIN_MOUNTED); 476 477 /* 478 * NOTE: *_get() integrates chain's lock into the inode lock. 479 */ 480 hammer2_chain_ref(hmp, rchain); /* for pmp->rchain */ 481 pmp->rchain = rchain; /* left held & unlocked */ 482 pmp->iroot = hammer2_inode_get(hmp, pmp, NULL, rchain); 483 hammer2_inode_ref(pmp->iroot); /* ref for pmp->iroot */ 484 hammer2_inode_unlock_ex(pmp->iroot, rchain); /* iroot & its chain */ 485 486 kprintf("iroot %p\n", pmp->iroot); 487 488 /* 489 * Ref the cluster management messaging descriptor. The mount 490 * program deals with the other end of the communications pipe. 491 */ 492 fp = holdfp(curproc->p_fd, info.cluster_fd, -1); 493 if (fp == NULL) { 494 kprintf("hammer2_mount: bad cluster_fd!\n"); 495 hammer2_vfs_unmount(mp, MNT_FORCE); 496 return EBADF; 497 } 498 hammer2_cluster_reconnect(pmp, fp); 499 500 /* 501 * Finish setup 502 */ 503 vfs_getnewfsid(mp); 504 vfs_add_vnodeops(mp, &hammer2_vnode_vops, &mp->mnt_vn_norm_ops); 505 vfs_add_vnodeops(mp, &hammer2_spec_vops, &mp->mnt_vn_spec_ops); 506 vfs_add_vnodeops(mp, &hammer2_fifo_vops, &mp->mnt_vn_fifo_ops); 507 508 copyinstr(info.volume, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size); 509 bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); 510 bzero(mp->mnt_stat.f_mntonname, sizeof(mp->mnt_stat.f_mntonname)); 511 copyinstr(path, mp->mnt_stat.f_mntonname, 512 sizeof(mp->mnt_stat.f_mntonname) - 1, 513 &size); 514 515 /* 516 * Initial statfs to prime mnt_stat. 517 */ 518 hammer2_vfs_statfs(mp, &mp->mnt_stat, cred); 519 520 return 0; 521 } 522 523 static 524 int 525 hammer2_remount(struct mount *mp, char *path, struct vnode *devvp, 526 struct ucred *cred) 527 { 528 return (0); 529 } 530 531 static 532 int 533 hammer2_vfs_unmount(struct mount *mp, int mntflags) 534 { 535 hammer2_pfsmount_t *pmp; 536 hammer2_mount_t *hmp; 537 hammer2_chain_t *chain; 538 int flags; 539 int error = 0; 540 int ronly = ((mp->mnt_flag & MNT_RDONLY) != 0); 541 struct vnode *devvp; 542 543 pmp = MPTOPMP(mp); 544 hmp = pmp->hmp; 545 flags = 0; 546 547 if (mntflags & MNT_FORCE) 548 flags |= FORCECLOSE; 549 550 hammer2_mount_exlock(hmp); 551 552 /* 553 * If mount initialization proceeded far enough we must flush 554 * its vnodes. 555 */ 556 if (pmp->iroot) 557 error = vflush(mp, 0, flags); 558 559 if (error) 560 return error; 561 562 lockmgr(&hammer2_mntlk, LK_EXCLUSIVE); 563 --hmp->pmp_count; 564 kprintf("hammer2_unmount hmp=%p pmpcnt=%d\n", hmp, hmp->pmp_count); 565 566 /* 567 * Flush any left over chains. The voldata lock is only used 568 * to synchronize against HAMMER2_CHAIN_MODIFIED_AUX. 569 */ 570 hammer2_voldata_lock(hmp); 571 if (hmp->vchain.flags & (HAMMER2_CHAIN_MODIFIED | 572 HAMMER2_CHAIN_MODIFIED_AUX | 573 HAMMER2_CHAIN_SUBMODIFIED)) { 574 hammer2_voldata_unlock(hmp); 575 hammer2_vfs_sync(mp, MNT_WAIT); 576 } else { 577 hammer2_voldata_unlock(hmp); 578 } 579 if (hmp->pmp_count == 0) { 580 if (hmp->vchain.flags & (HAMMER2_CHAIN_MODIFIED | 581 HAMMER2_CHAIN_MODIFIED_AUX | 582 HAMMER2_CHAIN_SUBMODIFIED)) { 583 kprintf("hammer2_unmount: chains left over after " 584 "final sync\n"); 585 if (hammer2_debug & 0x0010) 586 Debugger("entered debugger"); 587 } 588 } 589 590 /* 591 * Cleanup the root and super-root chain elements (which should be 592 * clean). 593 */ 594 if (pmp->iroot) { 595 chain = hammer2_inode_lock_ex(pmp->iroot); 596 hammer2_inode_put(pmp->iroot, chain); 597 /* lock destroyed by the put */ 598 KKASSERT(pmp->iroot->refs == 1); 599 hammer2_inode_drop(pmp->iroot); /* ref for pmp->iroot */ 600 pmp->iroot = NULL; 601 } 602 if (pmp->rchain) { 603 atomic_clear_int(&pmp->rchain->flags, HAMMER2_CHAIN_MOUNTED); 604 KKASSERT(pmp->rchain->refs == 1); 605 hammer2_chain_drop(hmp, pmp->rchain); 606 pmp->rchain = NULL; 607 } 608 ccms_domain_uninit(&pmp->ccms_dom); 609 610 /* 611 * Kill cluster controller 612 */ 613 kdmsg_iocom_uninit(&pmp->iocom); 614 615 /* 616 * If no PFS's left drop the master hammer2_mount for the device. 617 */ 618 if (hmp->pmp_count == 0) { 619 if (hmp->sroot) { 620 hammer2_inode_drop(hmp->sroot); 621 hmp->sroot = NULL; 622 } 623 if (hmp->schain) { 624 KKASSERT(hmp->schain->refs == 1); 625 hammer2_chain_drop(hmp, hmp->schain); 626 hmp->schain = NULL; 627 } 628 629 /* 630 * Finish up with the device vnode 631 */ 632 if ((devvp = hmp->devvp) != NULL) { 633 vinvalbuf(devvp, (ronly ? 0 : V_SAVE), 0, 0); 634 hmp->devvp = NULL; 635 VOP_CLOSE(devvp, (ronly ? FREAD : FREAD|FWRITE)); 636 vrele(devvp); 637 devvp = NULL; 638 } 639 } 640 hammer2_mount_unlock(hmp); 641 642 pmp->mp = NULL; 643 pmp->hmp = NULL; 644 mp->mnt_data = NULL; 645 646 kmalloc_destroy(&pmp->mmsg); 647 648 kfree(pmp, M_HAMMER2); 649 if (hmp->pmp_count == 0) { 650 TAILQ_REMOVE(&hammer2_mntlist, hmp, mntentry); 651 kmalloc_destroy(&hmp->minode); 652 kmalloc_destroy(&hmp->mchain); 653 kfree(hmp, M_HAMMER2); 654 } 655 lockmgr(&hammer2_mntlk, LK_RELEASE); 656 return (error); 657 } 658 659 static 660 int 661 hammer2_vfs_vget(struct mount *mp, struct vnode *dvp, 662 ino_t ino, struct vnode **vpp) 663 { 664 kprintf("hammer2_vget\n"); 665 return (EOPNOTSUPP); 666 } 667 668 static 669 int 670 hammer2_vfs_root(struct mount *mp, struct vnode **vpp) 671 { 672 hammer2_pfsmount_t *pmp; 673 hammer2_chain_t *ichain; 674 hammer2_mount_t *hmp; 675 int error; 676 struct vnode *vp; 677 678 pmp = MPTOPMP(mp); 679 hmp = pmp->hmp; 680 hammer2_mount_exlock(hmp); 681 if (pmp->iroot == NULL) { 682 *vpp = NULL; 683 error = EINVAL; 684 } else { 685 ichain = hammer2_inode_lock_sh(pmp->iroot); 686 vp = hammer2_igetv(pmp->iroot, &error); 687 hammer2_inode_unlock_sh(pmp->iroot, ichain); 688 *vpp = vp; 689 if (vp == NULL) 690 kprintf("vnodefail\n"); 691 } 692 hammer2_mount_unlock(hmp); 693 694 return (error); 695 } 696 697 /* 698 * Filesystem status 699 * 700 * XXX incorporate ipdata->inode_quota and data_quota 701 */ 702 static 703 int 704 hammer2_vfs_statfs(struct mount *mp, struct statfs *sbp, struct ucred *cred) 705 { 706 hammer2_pfsmount_t *pmp; 707 hammer2_mount_t *hmp; 708 709 pmp = MPTOPMP(mp); 710 hmp = MPTOHMP(mp); 711 712 mp->mnt_stat.f_files = pmp->inode_count; 713 mp->mnt_stat.f_ffree = 0; 714 mp->mnt_stat.f_blocks = hmp->voldata.allocator_size / HAMMER2_PBUFSIZE; 715 mp->mnt_stat.f_bfree = (hmp->voldata.allocator_size - 716 hmp->voldata.allocator_beg) / HAMMER2_PBUFSIZE; 717 mp->mnt_stat.f_bavail = mp->mnt_stat.f_bfree; 718 719 *sbp = mp->mnt_stat; 720 return (0); 721 } 722 723 static 724 int 725 hammer2_vfs_statvfs(struct mount *mp, struct statvfs *sbp, struct ucred *cred) 726 { 727 hammer2_pfsmount_t *pmp; 728 hammer2_mount_t *hmp; 729 730 pmp = MPTOPMP(mp); 731 hmp = MPTOHMP(mp); 732 733 mp->mnt_vstat.f_bsize = HAMMER2_PBUFSIZE; 734 mp->mnt_vstat.f_files = pmp->inode_count; 735 mp->mnt_vstat.f_ffree = 0; 736 mp->mnt_vstat.f_blocks = hmp->voldata.allocator_size / HAMMER2_PBUFSIZE; 737 mp->mnt_vstat.f_bfree = (hmp->voldata.allocator_size - 738 hmp->voldata.allocator_beg) / HAMMER2_PBUFSIZE; 739 mp->mnt_vstat.f_bavail = mp->mnt_vstat.f_bfree; 740 741 *sbp = mp->mnt_vstat; 742 return (0); 743 } 744 745 /* 746 * Sync the entire filesystem; this is called from the filesystem syncer 747 * process periodically and whenever a user calls sync(1) on the hammer 748 * mountpoint. 749 * 750 * Currently is actually called from the syncer! \o/ 751 * 752 * This task will have to snapshot the state of the dirty inode chain. 753 * From that, it will have to make sure all of the inodes on the dirty 754 * chain have IO initiated. We make sure that io is initiated for the root 755 * block. 756 * 757 * If waitfor is set, we wait for media to acknowledge the new rootblock. 758 * 759 * THINKS: side A vs side B, to have sync not stall all I/O? 760 */ 761 static 762 int 763 hammer2_vfs_sync(struct mount *mp, int waitfor) 764 { 765 struct hammer2_sync_info info; 766 hammer2_mount_t *hmp; 767 int flags; 768 int error; 769 int haswork; 770 int i; 771 772 hmp = MPTOHMP(mp); 773 774 flags = VMSC_GETVP; 775 if (waitfor & MNT_LAZY) 776 flags |= VMSC_ONEPASS; 777 778 info.error = 0; 779 info.waitfor = MNT_NOWAIT; 780 vmntvnodescan(mp, flags | VMSC_NOWAIT, 781 hammer2_sync_scan1, 782 hammer2_sync_scan2, &info); 783 if (info.error == 0 && (waitfor & MNT_WAIT)) { 784 info.waitfor = waitfor; 785 vmntvnodescan(mp, flags, 786 hammer2_sync_scan1, 787 hammer2_sync_scan2, &info); 788 789 } 790 #if 0 791 if (waitfor == MNT_WAIT) { 792 /* XXX */ 793 } else { 794 /* XXX */ 795 } 796 #endif 797 hammer2_chain_lock(hmp, &hmp->vchain, HAMMER2_RESOLVE_ALWAYS); 798 if (hmp->vchain.flags & (HAMMER2_CHAIN_MODIFIED | 799 HAMMER2_CHAIN_MODIFIED_AUX | 800 HAMMER2_CHAIN_SUBMODIFIED)) { 801 hammer2_chain_flush(hmp, &hmp->vchain, 0); 802 haswork = 1; 803 } else { 804 haswork = 0; 805 } 806 hammer2_chain_unlock(hmp, &hmp->vchain); 807 808 error = 0; 809 810 /* 811 * We can't safely flush the volume header until we have 812 * flushed any device buffers which have built up. 813 */ 814 #if 0 815 if ((waitfor & MNT_LAZY) == 0) { 816 waitfor = MNT_NOWAIT; 817 vn_lock(hmp->devvp, LK_EXCLUSIVE | LK_RETRY); 818 error = VOP_FSYNC(hmp->devvp, waitfor, 0); 819 vn_unlock(hmp->devvp); 820 } 821 #endif 822 vn_lock(hmp->devvp, LK_EXCLUSIVE | LK_RETRY); 823 error = VOP_FSYNC(hmp->devvp, MNT_WAIT, 0); 824 vn_unlock(hmp->devvp); 825 826 if (error == 0 && haswork) { 827 struct buf *bp; 828 829 /* 830 * Synchronize the disk before flushing the volume 831 * header. 832 */ 833 bp = getpbuf(NULL); 834 bp->b_bio1.bio_offset = 0; 835 bp->b_bufsize = 0; 836 bp->b_bcount = 0; 837 bp->b_cmd = BUF_CMD_FLUSH; 838 bp->b_bio1.bio_done = biodone_sync; 839 bp->b_bio1.bio_flags |= BIO_SYNC; 840 vn_strategy(hmp->devvp, &bp->b_bio1); 841 biowait(&bp->b_bio1, "h2vol"); 842 relpbuf(bp, NULL); 843 844 /* 845 * Then we can safely flush the version of the volume header 846 * synchronized by the flush code. 847 */ 848 i = hmp->volhdrno + 1; 849 if (i >= HAMMER2_NUM_VOLHDRS) 850 i = 0; 851 if (i * HAMMER2_ZONE_BYTES64 + HAMMER2_SEGSIZE > 852 hmp->volsync.volu_size) { 853 i = 0; 854 } 855 kprintf("sync volhdr %d %jd\n", 856 i, (intmax_t)hmp->volsync.volu_size); 857 bp = getblk(hmp->devvp, i * HAMMER2_ZONE_BYTES64, 858 HAMMER2_PBUFSIZE, 0, 0); 859 bcopy(&hmp->volsync, bp->b_data, HAMMER2_PBUFSIZE); 860 bawrite(bp); 861 hmp->volhdrno = i; 862 } 863 return (error); 864 } 865 866 /* 867 * Sync passes. 868 * 869 * NOTE: We don't test SUBMODIFIED or MOVED here because the fsync code 870 * won't flush on those flags. The syncer code above will do a 871 * general meta-data flush globally that will catch these flags. 872 */ 873 static int 874 hammer2_sync_scan1(struct mount *mp, struct vnode *vp, void *data) 875 { 876 hammer2_inode_t *ip; 877 878 ip = VTOI(vp); 879 if (vp->v_type == VNON || ip == NULL || 880 ((ip->flags & HAMMER2_INODE_MODIFIED) == 0 && 881 RB_EMPTY(&vp->v_rbdirty_tree))) { 882 return(-1); 883 } 884 return(0); 885 } 886 887 static int 888 hammer2_sync_scan2(struct mount *mp, struct vnode *vp, void *data) 889 { 890 struct hammer2_sync_info *info = data; 891 hammer2_inode_t *ip; 892 int error; 893 894 ip = VTOI(vp); 895 if (vp->v_type == VNON || vp->v_type == VBAD || 896 ((ip->flags & HAMMER2_INODE_MODIFIED) == 0 && 897 RB_EMPTY(&vp->v_rbdirty_tree))) { 898 return(0); 899 } 900 error = VOP_FSYNC(vp, MNT_NOWAIT, 0); 901 if (error) 902 info->error = error; 903 return(0); 904 } 905 906 static 907 int 908 hammer2_vfs_vptofh(struct vnode *vp, struct fid *fhp) 909 { 910 return (0); 911 } 912 913 static 914 int 915 hammer2_vfs_fhtovp(struct mount *mp, struct vnode *rootvp, 916 struct fid *fhp, struct vnode **vpp) 917 { 918 return (0); 919 } 920 921 static 922 int 923 hammer2_vfs_checkexp(struct mount *mp, struct sockaddr *nam, 924 int *exflagsp, struct ucred **credanonp) 925 { 926 return (0); 927 } 928 929 /* 930 * Support code for hammer2_mount(). Read, verify, and install the volume 931 * header into the HMP 932 * 933 * XXX read four volhdrs and use the one with the highest TID whos CRC 934 * matches. 935 * 936 * XXX check iCRCs. 937 * 938 * XXX For filesystems w/ less than 4 volhdrs, make sure to not write to 939 * nonexistant locations. 940 * 941 * XXX Record selected volhdr and ring updates to each of 4 volhdrs 942 */ 943 static 944 int 945 hammer2_install_volume_header(hammer2_mount_t *hmp) 946 { 947 hammer2_volume_data_t *vd; 948 struct buf *bp; 949 hammer2_crc32_t crc0, crc, bcrc0, bcrc; 950 int error_reported; 951 int error; 952 int valid; 953 int i; 954 955 error_reported = 0; 956 error = 0; 957 valid = 0; 958 bp = NULL; 959 960 /* 961 * There are up to 4 copies of the volume header (syncs iterate 962 * between them so there is no single master). We don't trust the 963 * volu_size field so we don't know precisely how large the filesystem 964 * is, so depend on the OS to return an error if we go beyond the 965 * block device's EOF. 966 */ 967 for (i = 0; i < HAMMER2_NUM_VOLHDRS; i++) { 968 error = bread(hmp->devvp, i * HAMMER2_ZONE_BYTES64, 969 HAMMER2_VOLUME_BYTES, &bp); 970 if (error) { 971 brelse(bp); 972 bp = NULL; 973 continue; 974 } 975 976 vd = (struct hammer2_volume_data *) bp->b_data; 977 if ((vd->magic != HAMMER2_VOLUME_ID_HBO) && 978 (vd->magic != HAMMER2_VOLUME_ID_ABO)) { 979 brelse(bp); 980 bp = NULL; 981 continue; 982 } 983 984 if (vd->magic == HAMMER2_VOLUME_ID_ABO) { 985 /* XXX: Reversed-endianness filesystem */ 986 kprintf("hammer2: reverse-endian filesystem detected"); 987 brelse(bp); 988 bp = NULL; 989 continue; 990 } 991 992 crc = vd->icrc_sects[HAMMER2_VOL_ICRC_SECT0]; 993 crc0 = hammer2_icrc32(bp->b_data + HAMMER2_VOLUME_ICRC0_OFF, 994 HAMMER2_VOLUME_ICRC0_SIZE); 995 bcrc = vd->icrc_sects[HAMMER2_VOL_ICRC_SECT1]; 996 bcrc0 = hammer2_icrc32(bp->b_data + HAMMER2_VOLUME_ICRC1_OFF, 997 HAMMER2_VOLUME_ICRC1_SIZE); 998 if ((crc0 != crc) || (bcrc0 != bcrc)) { 999 kprintf("hammer2 volume header crc " 1000 "mismatch copy #%d %08x/%08x\n", 1001 i, crc0, crc); 1002 error_reported = 1; 1003 brelse(bp); 1004 bp = NULL; 1005 continue; 1006 } 1007 if (valid == 0 || hmp->voldata.mirror_tid < vd->mirror_tid) { 1008 valid = 1; 1009 hmp->voldata = *vd; 1010 hmp->volhdrno = i; 1011 } 1012 brelse(bp); 1013 bp = NULL; 1014 } 1015 if (valid) { 1016 hmp->volsync = hmp->voldata; 1017 error = 0; 1018 if (error_reported || bootverbose || 1) { /* 1/DEBUG */ 1019 kprintf("hammer2: using volume header #%d\n", 1020 hmp->volhdrno); 1021 } 1022 } else { 1023 error = EINVAL; 1024 kprintf("hammer2: no valid volume headers found!\n"); 1025 } 1026 return (error); 1027 } 1028 1029 /* 1030 * Reconnect using the passed file pointer. The caller must ref the 1031 * fp for us. 1032 */ 1033 void 1034 hammer2_cluster_reconnect(hammer2_pfsmount_t *pmp, struct file *fp) 1035 { 1036 hammer2_chain_t *chain; 1037 hammer2_inode_data_t *ipdata; 1038 size_t name_len; 1039 1040 /* 1041 * Closes old comm descriptor, kills threads, cleans up 1042 * states, then installs the new descriptor and creates 1043 * new threads. 1044 */ 1045 kdmsg_iocom_reconnect(&pmp->iocom, fp, "hammer2"); 1046 1047 /* 1048 * Setup LNK_CONN fields for autoinitiated state machine 1049 */ 1050 chain = hammer2_inode_lock_ex(pmp->iroot); 1051 ipdata = &chain->data->ipdata; 1052 pmp->iocom.auto_lnk_conn.pfs_clid = ipdata->pfs_clid; 1053 pmp->iocom.auto_lnk_conn.pfs_fsid = ipdata->pfs_fsid; 1054 pmp->iocom.auto_lnk_conn.pfs_type = ipdata->pfs_type; 1055 pmp->iocom.auto_lnk_conn.proto_version = DMSG_SPAN_PROTO_1; 1056 pmp->iocom.auto_lnk_conn.peer_type = pmp->hmp->voldata.peer_type; 1057 hammer2_inode_unlock_ex(pmp->iroot, chain); 1058 1059 /* 1060 * Filter adjustment. Clients do not need visibility into other 1061 * clients (otherwise millions of clients would present a serious 1062 * problem). The fs_label also serves to restrict the namespace. 1063 */ 1064 pmp->iocom.auto_lnk_conn.peer_mask = 1LLU << HAMMER2_PEER_HAMMER2; 1065 pmp->iocom.auto_lnk_conn.pfs_mask = (uint64_t)-1; 1066 switch (ipdata->pfs_type) { 1067 case DMSG_PFSTYPE_CLIENT: 1068 pmp->iocom.auto_lnk_conn.peer_mask &= 1069 ~(1LLU << DMSG_PFSTYPE_CLIENT); 1070 break; 1071 default: 1072 break; 1073 } 1074 1075 name_len = ipdata->name_len; 1076 if (name_len >= sizeof(pmp->iocom.auto_lnk_conn.fs_label)) 1077 name_len = sizeof(pmp->iocom.auto_lnk_conn.fs_label) - 1; 1078 bcopy(ipdata->filename, 1079 pmp->iocom.auto_lnk_conn.fs_label, 1080 name_len); 1081 pmp->iocom.auto_lnk_conn.fs_label[name_len] = 0; 1082 1083 /* 1084 * Setup LNK_SPAN fields for autoinitiated state machine 1085 */ 1086 pmp->iocom.auto_lnk_span.pfs_clid = ipdata->pfs_clid; 1087 pmp->iocom.auto_lnk_span.pfs_fsid = ipdata->pfs_fsid; 1088 pmp->iocom.auto_lnk_span.pfs_type = ipdata->pfs_type; 1089 pmp->iocom.auto_lnk_span.peer_type = pmp->hmp->voldata.peer_type; 1090 pmp->iocom.auto_lnk_span.proto_version = DMSG_SPAN_PROTO_1; 1091 name_len = ipdata->name_len; 1092 if (name_len >= sizeof(pmp->iocom.auto_lnk_span.fs_label)) 1093 name_len = sizeof(pmp->iocom.auto_lnk_span.fs_label) - 1; 1094 bcopy(ipdata->filename, 1095 pmp->iocom.auto_lnk_span.fs_label, 1096 name_len); 1097 pmp->iocom.auto_lnk_span.fs_label[name_len] = 0; 1098 1099 kdmsg_iocom_autoinitiate(&pmp->iocom, hammer2_autodmsg); 1100 } 1101 1102 static int 1103 hammer2_rcvdmsg(kdmsg_msg_t *msg) 1104 { 1105 switch(msg->any.head.cmd & DMSGF_TRANSMASK) { 1106 case DMSG_DBG_SHELL: 1107 /* 1108 * (non-transaction) 1109 * Execute shell command (not supported atm) 1110 */ 1111 kdmsg_msg_reply(msg, DMSG_ERR_NOSUPP); 1112 break; 1113 case DMSG_DBG_SHELL | DMSGF_REPLY: 1114 /* 1115 * (non-transaction) 1116 */ 1117 if (msg->aux_data) { 1118 msg->aux_data[msg->aux_size - 1] = 0; 1119 kprintf("HAMMER2 DBG: %s\n", msg->aux_data); 1120 } 1121 break; 1122 default: 1123 /* 1124 * Unsupported message received. We only need to 1125 * reply if it's a transaction in order to close our end. 1126 * Ignore any one-way messages are any further messages 1127 * associated with the transaction. 1128 * 1129 * NOTE: This case also includes DMSG_LNK_ERROR messages 1130 * which might be one-way, replying to those would 1131 * cause an infinite ping-pong. 1132 */ 1133 if (msg->any.head.cmd & DMSGF_CREATE) 1134 kdmsg_msg_reply(msg, DMSG_ERR_NOSUPP); 1135 break; 1136 } 1137 return(0); 1138 } 1139 1140 /* 1141 * This function is called after KDMSG has automatically handled processing 1142 * of a LNK layer message (typically CONN, SPAN, or CIRC). 1143 * 1144 * We tag off the LNK_CONN to trigger our LNK_VOLCONF messages which 1145 * advertises all available hammer2 super-root volumes. 1146 */ 1147 static void 1148 hammer2_autodmsg(kdmsg_msg_t *msg) 1149 { 1150 hammer2_pfsmount_t *pmp = msg->iocom->handle; 1151 hammer2_mount_t *hmp = pmp->hmp; 1152 int copyid; 1153 1154 /* 1155 * We only care about replies to our LNK_CONN auto-request. kdmsg 1156 * has already processed the reply, we use this calback as a shim 1157 * to know when we can advertise available super-root volumes. 1158 */ 1159 if ((msg->any.head.cmd & DMSGF_TRANSMASK) != 1160 (DMSG_LNK_CONN | DMSGF_CREATE | DMSGF_REPLY) || 1161 msg->state == NULL) { 1162 return; 1163 } 1164 1165 kprintf("LNK_CONN REPLY RECEIVED CMD %08x\n", msg->any.head.cmd); 1166 1167 if (msg->any.head.cmd & DMSGF_CREATE) { 1168 kprintf("HAMMER2: VOLDATA DUMP\n"); 1169 1170 /* 1171 * Dump the configuration stored in the volume header 1172 */ 1173 hammer2_voldata_lock(hmp); 1174 for (copyid = 0; copyid < HAMMER2_COPYID_COUNT; ++copyid) { 1175 if (hmp->voldata.copyinfo[copyid].copyid == 0) 1176 continue; 1177 hammer2_volconf_update(pmp, copyid); 1178 } 1179 hammer2_voldata_unlock(hmp); 1180 } 1181 if ((msg->any.head.cmd & DMSGF_DELETE) && 1182 msg->state && (msg->state->txcmd & DMSGF_DELETE) == 0) { 1183 kprintf("HAMMER2: CONN WAS TERMINATED\n"); 1184 } 1185 } 1186 1187 /* 1188 * Volume configuration updates are passed onto the userland service 1189 * daemon via the open LNK_CONN transaction. 1190 */ 1191 void 1192 hammer2_volconf_update(hammer2_pfsmount_t *pmp, int index) 1193 { 1194 hammer2_mount_t *hmp = pmp->hmp; 1195 kdmsg_msg_t *msg; 1196 1197 /* XXX interlock against connection state termination */ 1198 kprintf("volconf update %p\n", pmp->iocom.conn_state); 1199 if (pmp->iocom.conn_state) { 1200 kprintf("TRANSMIT VOLCONF VIA OPEN CONN TRANSACTION\n"); 1201 msg = kdmsg_msg_alloc_state(pmp->iocom.conn_state, 1202 DMSG_LNK_VOLCONF, NULL, NULL); 1203 msg->any.lnk_volconf.copy = hmp->voldata.copyinfo[index]; 1204 msg->any.lnk_volconf.mediaid = hmp->voldata.fsid; 1205 msg->any.lnk_volconf.index = index; 1206 kdmsg_msg_write(msg); 1207 } 1208 } 1209