1 /*- 2 * Copyright (c) 2011, 2012 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/kernel.h> 37 #include <sys/nlookup.h> 38 #include <sys/vnode.h> 39 #include <sys/mount.h> 40 #include <sys/fcntl.h> 41 #include <sys/buf.h> 42 #include <sys/uuid.h> 43 #include <sys/vfsops.h> 44 #include <sys/sysctl.h> 45 #include <sys/socket.h> 46 47 #include "hammer2.h" 48 #include "hammer2_disk.h" 49 #include "hammer2_mount.h" 50 51 struct hammer2_sync_info { 52 int error; 53 int waitfor; 54 }; 55 56 TAILQ_HEAD(hammer2_mntlist, hammer2_mount); 57 static struct hammer2_mntlist hammer2_mntlist; 58 static struct lock hammer2_mntlk; 59 60 int hammer2_debug; 61 int hammer2_cluster_enable = 1; 62 int hammer2_hardlink_enable = 1; 63 long hammer2_iod_file_read; 64 long hammer2_iod_meta_read; 65 long hammer2_iod_indr_read; 66 long hammer2_iod_file_write; 67 long hammer2_iod_meta_write; 68 long hammer2_iod_indr_write; 69 long hammer2_iod_volu_write; 70 long hammer2_ioa_file_read; 71 long hammer2_ioa_meta_read; 72 long hammer2_ioa_indr_read; 73 long hammer2_ioa_file_write; 74 long hammer2_ioa_meta_write; 75 long hammer2_ioa_indr_write; 76 long hammer2_ioa_volu_write; 77 78 SYSCTL_NODE(_vfs, OID_AUTO, hammer2, CTLFLAG_RW, 0, "HAMMER2 filesystem"); 79 80 SYSCTL_INT(_vfs_hammer2, OID_AUTO, debug, CTLFLAG_RW, 81 &hammer2_debug, 0, ""); 82 SYSCTL_INT(_vfs_hammer2, OID_AUTO, cluster_enable, CTLFLAG_RW, 83 &hammer2_cluster_enable, 0, ""); 84 SYSCTL_INT(_vfs_hammer2, OID_AUTO, hardlink_enable, CTLFLAG_RW, 85 &hammer2_hardlink_enable, 0, ""); 86 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_file_read, CTLFLAG_RW, 87 &hammer2_iod_file_read, 0, ""); 88 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_meta_read, CTLFLAG_RW, 89 &hammer2_iod_meta_read, 0, ""); 90 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_indr_read, CTLFLAG_RW, 91 &hammer2_iod_indr_read, 0, ""); 92 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_file_write, CTLFLAG_RW, 93 &hammer2_iod_file_write, 0, ""); 94 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_meta_write, CTLFLAG_RW, 95 &hammer2_iod_meta_write, 0, ""); 96 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_indr_write, CTLFLAG_RW, 97 &hammer2_iod_indr_write, 0, ""); 98 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_volu_write, CTLFLAG_RW, 99 &hammer2_iod_volu_write, 0, ""); 100 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_file_read, CTLFLAG_RW, 101 &hammer2_ioa_file_read, 0, ""); 102 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_meta_read, CTLFLAG_RW, 103 &hammer2_ioa_meta_read, 0, ""); 104 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_indr_read, CTLFLAG_RW, 105 &hammer2_ioa_indr_read, 0, ""); 106 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_file_write, CTLFLAG_RW, 107 &hammer2_ioa_file_write, 0, ""); 108 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_meta_write, CTLFLAG_RW, 109 &hammer2_ioa_meta_write, 0, ""); 110 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_indr_write, CTLFLAG_RW, 111 &hammer2_ioa_indr_write, 0, ""); 112 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_volu_write, CTLFLAG_RW, 113 &hammer2_ioa_volu_write, 0, ""); 114 115 static int hammer2_vfs_init(struct vfsconf *conf); 116 static int hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data, 117 struct ucred *cred); 118 static int hammer2_remount(struct mount *, char *, struct vnode *, 119 struct ucred *); 120 static int hammer2_vfs_unmount(struct mount *mp, int mntflags); 121 static int hammer2_vfs_root(struct mount *mp, struct vnode **vpp); 122 static int hammer2_vfs_statfs(struct mount *mp, struct statfs *sbp, 123 struct ucred *cred); 124 static int hammer2_vfs_statvfs(struct mount *mp, struct statvfs *sbp, 125 struct ucred *cred); 126 static int hammer2_vfs_sync(struct mount *mp, int waitfor); 127 static int hammer2_vfs_vget(struct mount *mp, struct vnode *dvp, 128 ino_t ino, struct vnode **vpp); 129 static int hammer2_vfs_fhtovp(struct mount *mp, struct vnode *rootvp, 130 struct fid *fhp, struct vnode **vpp); 131 static int hammer2_vfs_vptofh(struct vnode *vp, struct fid *fhp); 132 static int hammer2_vfs_checkexp(struct mount *mp, struct sockaddr *nam, 133 int *exflagsp, struct ucred **credanonp); 134 135 static int hammer2_install_volume_header(hammer2_mount_t *hmp); 136 static int hammer2_sync_scan1(struct mount *mp, struct vnode *vp, void *data); 137 static int hammer2_sync_scan2(struct mount *mp, struct vnode *vp, void *data); 138 139 static int hammer2_msg_conn_reply(kdmsg_state_t *state, kdmsg_msg_t *msg); 140 static int hammer2_msg_span_reply(kdmsg_state_t *state, kdmsg_msg_t *msg); 141 static int hammer2_msg_lnk_rcvmsg(kdmsg_msg_t *msg); 142 143 /* 144 * HAMMER2 vfs operations. 145 */ 146 static struct vfsops hammer2_vfsops = { 147 .vfs_init = hammer2_vfs_init, 148 .vfs_sync = hammer2_vfs_sync, 149 .vfs_mount = hammer2_vfs_mount, 150 .vfs_unmount = hammer2_vfs_unmount, 151 .vfs_root = hammer2_vfs_root, 152 .vfs_statfs = hammer2_vfs_statfs, 153 .vfs_statvfs = hammer2_vfs_statvfs, 154 .vfs_vget = hammer2_vfs_vget, 155 .vfs_vptofh = hammer2_vfs_vptofh, 156 .vfs_fhtovp = hammer2_vfs_fhtovp, 157 .vfs_checkexp = hammer2_vfs_checkexp 158 }; 159 160 MALLOC_DEFINE(M_HAMMER2, "HAMMER2-mount", ""); 161 162 VFS_SET(hammer2_vfsops, hammer2, 0); 163 MODULE_VERSION(hammer2, 1); 164 165 static 166 int 167 hammer2_vfs_init(struct vfsconf *conf) 168 { 169 int error; 170 171 error = 0; 172 173 if (HAMMER2_BLOCKREF_BYTES != sizeof(struct hammer2_blockref)) 174 error = EINVAL; 175 if (HAMMER2_INODE_BYTES != sizeof(struct hammer2_inode_data)) 176 error = EINVAL; 177 if (HAMMER2_ALLOCREF_BYTES != sizeof(struct hammer2_allocref)) 178 error = EINVAL; 179 if (HAMMER2_VOLUME_BYTES != sizeof(struct hammer2_volume_data)) 180 error = EINVAL; 181 182 if (error) 183 kprintf("HAMMER2 structure size mismatch; cannot continue.\n"); 184 185 lockinit(&hammer2_mntlk, "mntlk", 0, 0); 186 TAILQ_INIT(&hammer2_mntlist); 187 188 return (error); 189 } 190 191 /* 192 * Mount or remount HAMMER2 fileystem from physical media 193 * 194 * mountroot 195 * mp mount point structure 196 * path NULL 197 * data <unused> 198 * cred <unused> 199 * 200 * mount 201 * mp mount point structure 202 * path path to mount point 203 * data pointer to argument structure in user space 204 * volume volume path (device@LABEL form) 205 * hflags user mount flags 206 * cred user credentials 207 * 208 * RETURNS: 0 Success 209 * !0 error number 210 */ 211 static 212 int 213 hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data, 214 struct ucred *cred) 215 { 216 struct hammer2_mount_info info; 217 hammer2_pfsmount_t *pmp; 218 hammer2_mount_t *hmp; 219 hammer2_key_t lhc; 220 struct vnode *devvp; 221 struct nlookupdata nd; 222 hammer2_chain_t *parent; 223 hammer2_chain_t *schain; 224 hammer2_chain_t *rchain; 225 struct file *fp; 226 char devstr[MNAMELEN]; 227 size_t size; 228 size_t done; 229 char *dev; 230 char *label; 231 int ronly = 1; 232 int create_hmp; 233 int error; 234 235 hmp = NULL; 236 pmp = NULL; 237 dev = NULL; 238 label = NULL; 239 devvp = NULL; 240 241 kprintf("hammer2_mount\n"); 242 243 if (path == NULL) { 244 /* 245 * Root mount 246 */ 247 bzero(&info, sizeof(info)); 248 info.cluster_fd = -1; 249 return (EOPNOTSUPP); 250 } else { 251 /* 252 * Non-root mount or updating a mount 253 */ 254 error = copyin(data, &info, sizeof(info)); 255 if (error) 256 return (error); 257 258 error = copyinstr(info.volume, devstr, MNAMELEN - 1, &done); 259 if (error) 260 return (error); 261 262 /* Extract device and label */ 263 dev = devstr; 264 label = strchr(devstr, '@'); 265 if (label == NULL || 266 ((label + 1) - dev) > done) { 267 return (EINVAL); 268 } 269 *label = '\0'; 270 label++; 271 if (*label == '\0') 272 return (EINVAL); 273 274 if (mp->mnt_flag & MNT_UPDATE) { 275 /* Update mount */ 276 /* HAMMER2 implements NFS export via mountctl */ 277 hmp = MPTOHMP(mp); 278 devvp = hmp->devvp; 279 error = hammer2_remount(mp, path, devvp, cred); 280 return error; 281 } 282 } 283 284 /* 285 * PFS mount 286 * 287 * Lookup name and verify it refers to a block device. 288 */ 289 error = nlookup_init(&nd, dev, UIO_SYSSPACE, NLC_FOLLOW); 290 if (error == 0) 291 error = nlookup(&nd); 292 if (error == 0) 293 error = cache_vref(&nd.nl_nch, nd.nl_cred, &devvp); 294 nlookup_done(&nd); 295 296 if (error == 0) { 297 if (vn_isdisk(devvp, &error)) 298 error = vfs_mountedon(devvp); 299 } 300 301 /* 302 * Determine if the device has already been mounted. After this 303 * check hmp will be non-NULL if we are doing the second or more 304 * hammer2 mounts from the same device. 305 */ 306 lockmgr(&hammer2_mntlk, LK_EXCLUSIVE); 307 TAILQ_FOREACH(hmp, &hammer2_mntlist, mntentry) { 308 if (hmp->devvp == devvp) 309 break; 310 } 311 312 /* 313 * Open the device if this isn't a secondary mount 314 */ 315 if (hmp) { 316 create_hmp = 0; 317 } else { 318 create_hmp = 1; 319 if (error == 0 && vcount(devvp) > 0) 320 error = EBUSY; 321 322 /* 323 * Now open the device 324 */ 325 if (error == 0) { 326 ronly = ((mp->mnt_flag & MNT_RDONLY) != 0); 327 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 328 error = vinvalbuf(devvp, V_SAVE, 0, 0); 329 if (error == 0) { 330 error = VOP_OPEN(devvp, 331 ronly ? FREAD : FREAD | FWRITE, 332 FSCRED, NULL); 333 } 334 vn_unlock(devvp); 335 } 336 if (error && devvp) { 337 vrele(devvp); 338 devvp = NULL; 339 } 340 if (error) { 341 lockmgr(&hammer2_mntlk, LK_RELEASE); 342 return error; 343 } 344 } 345 346 /* 347 * Block device opened successfully, finish initializing the 348 * mount structure. 349 * 350 * From this point on we have to call hammer2_unmount() on failure. 351 */ 352 pmp = kmalloc(sizeof(*pmp), M_HAMMER2, M_WAITOK | M_ZERO); 353 mp->mnt_data = (qaddr_t)pmp; 354 pmp->mp = mp; 355 356 kmalloc_create(&pmp->mmsg, "HAMMER2-pfsmsg"); 357 kdmsg_iocom_init(&pmp->iocom, pmp, pmp->mmsg, 358 hammer2_msg_lnk_rcvmsg, 359 hammer2_msg_dbg_rcvmsg, 360 hammer2_msg_adhoc_input); 361 362 if (create_hmp) { 363 hmp = kmalloc(sizeof(*hmp), M_HAMMER2, M_WAITOK | M_ZERO); 364 hmp->ronly = ronly; 365 hmp->devvp = devvp; 366 kmalloc_create(&hmp->minode, "HAMMER2-inodes"); 367 kmalloc_create(&hmp->mchain, "HAMMER2-chains"); 368 TAILQ_INSERT_TAIL(&hammer2_mntlist, hmp, mntentry); 369 } 370 ccms_domain_init(&pmp->ccms_dom); 371 pmp->hmp = hmp; 372 ++hmp->pmp_count; 373 lockmgr(&hammer2_mntlk, LK_RELEASE); 374 kprintf("hammer2_mount hmp=%p pmpcnt=%d\n", hmp, hmp->pmp_count); 375 376 mp->mnt_flag = MNT_LOCAL; 377 mp->mnt_kern_flag |= MNTK_ALL_MPSAFE; /* all entry pts are SMP */ 378 379 if (create_hmp) { 380 /* 381 * vchain setup. vchain.data is special cased to NULL. 382 * vchain.refs is initialized and will never drop to 0. 383 */ 384 hmp->vchain.refs = 1; 385 hmp->vchain.data = (void *)&hmp->voldata; 386 hmp->vchain.bref.type = HAMMER2_BREF_TYPE_VOLUME; 387 hmp->vchain.bref.data_off = 0 | HAMMER2_PBUFRADIX; 388 hmp->vchain.bref_flush = hmp->vchain.bref; 389 ccms_cst_init(&hmp->vchain.cst, NULL); 390 /* hmp->vchain.u.xxx is left NULL */ 391 lockinit(&hmp->alloclk, "h2alloc", 0, 0); 392 lockinit(&hmp->voldatalk, "voldata", 0, LK_CANRECURSE); 393 394 /* 395 * Install the volume header 396 */ 397 error = hammer2_install_volume_header(hmp); 398 if (error) { 399 hammer2_vfs_unmount(mp, MNT_FORCE); 400 return error; 401 } 402 } 403 404 /* 405 * required mount structure initializations 406 */ 407 mp->mnt_stat.f_iosize = HAMMER2_PBUFSIZE; 408 mp->mnt_stat.f_bsize = HAMMER2_PBUFSIZE; 409 410 mp->mnt_vstat.f_frsize = HAMMER2_PBUFSIZE; 411 mp->mnt_vstat.f_bsize = HAMMER2_PBUFSIZE; 412 413 /* 414 * Optional fields 415 */ 416 mp->mnt_iosize_max = MAXPHYS; 417 418 /* 419 * First locate the super-root inode, which is key 0 relative to the 420 * volume header's blockset. 421 * 422 * Then locate the root inode by scanning the directory keyspace 423 * represented by the label. 424 */ 425 if (create_hmp) { 426 parent = &hmp->vchain; 427 hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS); 428 schain = hammer2_chain_lookup(hmp, &parent, 429 HAMMER2_SROOT_KEY, HAMMER2_SROOT_KEY, 0); 430 hammer2_chain_unlock(hmp, parent); 431 if (schain == NULL) { 432 kprintf("hammer2_mount: invalid super-root\n"); 433 hammer2_vfs_unmount(mp, MNT_FORCE); 434 return EINVAL; 435 } 436 hammer2_chain_ref(hmp, schain); /* for hmp->schain */ 437 hmp->schain = schain; /* left locked */ 438 } else { 439 schain = hmp->schain; 440 hammer2_chain_lock(hmp, schain, HAMMER2_RESOLVE_ALWAYS); 441 } 442 443 parent = schain; 444 lhc = hammer2_dirhash(label, strlen(label)); 445 rchain = hammer2_chain_lookup(hmp, &parent, 446 lhc, lhc + HAMMER2_DIRHASH_LOMASK, 447 0); 448 while (rchain) { 449 if (rchain->bref.type == HAMMER2_BREF_TYPE_INODE && 450 rchain->u.ip && 451 strcmp(label, rchain->data->ipdata.filename) == 0) { 452 break; 453 } 454 rchain = hammer2_chain_next(hmp, &parent, rchain, 455 lhc, lhc + HAMMER2_DIRHASH_LOMASK, 456 0); 457 } 458 hammer2_chain_unlock(hmp, parent); 459 if (rchain == NULL) { 460 kprintf("hammer2_mount: PFS label not found\n"); 461 hammer2_vfs_unmount(mp, MNT_FORCE); 462 return EINVAL; 463 } 464 if (rchain->flags & HAMMER2_CHAIN_MOUNTED) { 465 hammer2_chain_unlock(hmp, rchain); 466 kprintf("hammer2_mount: PFS label already mounted!\n"); 467 hammer2_vfs_unmount(mp, MNT_FORCE); 468 return EBUSY; 469 } 470 atomic_set_int(&rchain->flags, HAMMER2_CHAIN_MOUNTED); 471 472 hammer2_chain_ref(hmp, rchain); /* for pmp->rchain */ 473 hammer2_chain_unlock(hmp, rchain); 474 pmp->rchain = rchain; /* left held & unlocked */ 475 pmp->iroot = rchain->u.ip; /* implied hold from rchain */ 476 pmp->iroot->pmp = pmp; 477 478 kprintf("iroot %p\n", pmp->iroot); 479 480 /* 481 * Ref the cluster management messaging descriptor. The mount 482 * program deals with the other end of the communications pipe. 483 */ 484 fp = holdfp(curproc->p_fd, info.cluster_fd, -1); 485 if (fp == NULL) { 486 kprintf("hammer2_mount: bad cluster_fd!\n"); 487 hammer2_vfs_unmount(mp, MNT_FORCE); 488 return EBADF; 489 } 490 hammer2_cluster_reconnect(pmp, fp); 491 492 /* 493 * Finish setup 494 */ 495 vfs_getnewfsid(mp); 496 vfs_add_vnodeops(mp, &hammer2_vnode_vops, &mp->mnt_vn_norm_ops); 497 vfs_add_vnodeops(mp, &hammer2_spec_vops, &mp->mnt_vn_spec_ops); 498 vfs_add_vnodeops(mp, &hammer2_fifo_vops, &mp->mnt_vn_fifo_ops); 499 500 copyinstr(info.volume, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size); 501 bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); 502 bzero(mp->mnt_stat.f_mntonname, sizeof(mp->mnt_stat.f_mntonname)); 503 copyinstr(path, mp->mnt_stat.f_mntonname, 504 sizeof(mp->mnt_stat.f_mntonname) - 1, 505 &size); 506 507 /* 508 * Initial statfs to prime mnt_stat. 509 */ 510 hammer2_vfs_statfs(mp, &mp->mnt_stat, cred); 511 512 return 0; 513 } 514 515 static 516 int 517 hammer2_remount(struct mount *mp, char *path, struct vnode *devvp, 518 struct ucred *cred) 519 { 520 return (0); 521 } 522 523 static 524 int 525 hammer2_vfs_unmount(struct mount *mp, int mntflags) 526 { 527 hammer2_pfsmount_t *pmp; 528 hammer2_mount_t *hmp; 529 int flags; 530 int error = 0; 531 int ronly = ((mp->mnt_flag & MNT_RDONLY) != 0); 532 struct vnode *devvp; 533 534 pmp = MPTOPMP(mp); 535 hmp = pmp->hmp; 536 flags = 0; 537 538 if (mntflags & MNT_FORCE) 539 flags |= FORCECLOSE; 540 541 hammer2_mount_exlock(hmp); 542 543 /* 544 * If mount initialization proceeded far enough we must flush 545 * its vnodes. 546 */ 547 if (pmp->iroot) 548 error = vflush(mp, 0, flags); 549 550 if (error) 551 return error; 552 553 lockmgr(&hammer2_mntlk, LK_EXCLUSIVE); 554 --hmp->pmp_count; 555 kprintf("hammer2_unmount hmp=%p pmpcnt=%d\n", hmp, hmp->pmp_count); 556 557 /* 558 * Flush any left over chains. The voldata lock is only used 559 * to synchronize against HAMMER2_CHAIN_MODIFIED_AUX. 560 */ 561 hammer2_voldata_lock(hmp); 562 if (hmp->vchain.flags & (HAMMER2_CHAIN_MODIFIED | 563 HAMMER2_CHAIN_MODIFIED_AUX | 564 HAMMER2_CHAIN_SUBMODIFIED)) { 565 hammer2_voldata_unlock(hmp); 566 hammer2_vfs_sync(mp, MNT_WAIT); 567 } else { 568 hammer2_voldata_unlock(hmp); 569 } 570 if (hmp->pmp_count == 0) { 571 if (hmp->vchain.flags & (HAMMER2_CHAIN_MODIFIED | 572 HAMMER2_CHAIN_MODIFIED_AUX | 573 HAMMER2_CHAIN_SUBMODIFIED)) { 574 kprintf("hammer2_unmount: chains left over after " 575 "final sync\n"); 576 if (hammer2_debug & 0x0010) 577 Debugger("entered debugger"); 578 } 579 } 580 581 /* 582 * Cleanup the root and super-root chain elements (which should be 583 * clean). 584 */ 585 pmp->iroot = NULL; 586 if (pmp->rchain) { 587 atomic_clear_int(&pmp->rchain->flags, HAMMER2_CHAIN_MOUNTED); 588 KKASSERT(pmp->rchain->refs == 1); 589 hammer2_chain_drop(hmp, pmp->rchain); 590 pmp->rchain = NULL; 591 } 592 ccms_domain_uninit(&pmp->ccms_dom); 593 594 /* 595 * Kill cluster controller 596 */ 597 kdmsg_iocom_uninit(&pmp->iocom); 598 599 /* 600 * If no PFS's left drop the master hammer2_mount for the device. 601 */ 602 if (hmp->pmp_count == 0) { 603 if (hmp->schain) { 604 KKASSERT(hmp->schain->refs == 1); 605 hammer2_chain_drop(hmp, hmp->schain); 606 hmp->schain = NULL; 607 } 608 609 /* 610 * Finish up with the device vnode 611 */ 612 if ((devvp = hmp->devvp) != NULL) { 613 vinvalbuf(devvp, (ronly ? 0 : V_SAVE), 0, 0); 614 hmp->devvp = NULL; 615 VOP_CLOSE(devvp, (ronly ? FREAD : FREAD|FWRITE)); 616 vrele(devvp); 617 devvp = NULL; 618 } 619 } 620 hammer2_mount_unlock(hmp); 621 622 pmp->mp = NULL; 623 pmp->hmp = NULL; 624 mp->mnt_data = NULL; 625 626 kmalloc_destroy(&pmp->mmsg); 627 628 kfree(pmp, M_HAMMER2); 629 if (hmp->pmp_count == 0) { 630 TAILQ_REMOVE(&hammer2_mntlist, hmp, mntentry); 631 kmalloc_destroy(&hmp->minode); 632 kmalloc_destroy(&hmp->mchain); 633 kfree(hmp, M_HAMMER2); 634 } 635 lockmgr(&hammer2_mntlk, LK_RELEASE); 636 return (error); 637 } 638 639 static 640 int 641 hammer2_vfs_vget(struct mount *mp, struct vnode *dvp, 642 ino_t ino, struct vnode **vpp) 643 { 644 kprintf("hammer2_vget\n"); 645 return (EOPNOTSUPP); 646 } 647 648 static 649 int 650 hammer2_vfs_root(struct mount *mp, struct vnode **vpp) 651 { 652 hammer2_pfsmount_t *pmp; 653 hammer2_mount_t *hmp; 654 int error; 655 struct vnode *vp; 656 657 pmp = MPTOPMP(mp); 658 hmp = pmp->hmp; 659 hammer2_mount_exlock(hmp); 660 if (pmp->iroot == NULL) { 661 *vpp = NULL; 662 error = EINVAL; 663 } else { 664 hammer2_chain_lock(hmp, &pmp->iroot->chain, 665 HAMMER2_RESOLVE_ALWAYS | 666 HAMMER2_RESOLVE_SHARED); 667 vp = hammer2_igetv(pmp->iroot, &error); 668 hammer2_chain_unlock(hmp, &pmp->iroot->chain); 669 *vpp = vp; 670 if (vp == NULL) 671 kprintf("vnodefail\n"); 672 } 673 hammer2_mount_unlock(hmp); 674 675 return (error); 676 } 677 678 /* 679 * Filesystem status 680 * 681 * XXX incorporate pmp->iroot->ip_data.inode_quota and data_quota 682 */ 683 static 684 int 685 hammer2_vfs_statfs(struct mount *mp, struct statfs *sbp, struct ucred *cred) 686 { 687 hammer2_pfsmount_t *pmp; 688 hammer2_mount_t *hmp; 689 690 pmp = MPTOPMP(mp); 691 hmp = MPTOHMP(mp); 692 693 mp->mnt_stat.f_files = pmp->iroot->ip_data.inode_count + 694 pmp->iroot->delta_icount; 695 mp->mnt_stat.f_ffree = 0; 696 mp->mnt_stat.f_blocks = hmp->voldata.allocator_size / HAMMER2_PBUFSIZE; 697 mp->mnt_stat.f_bfree = (hmp->voldata.allocator_size - 698 hmp->voldata.allocator_beg) / HAMMER2_PBUFSIZE; 699 mp->mnt_stat.f_bavail = mp->mnt_stat.f_bfree; 700 701 *sbp = mp->mnt_stat; 702 return (0); 703 } 704 705 static 706 int 707 hammer2_vfs_statvfs(struct mount *mp, struct statvfs *sbp, struct ucred *cred) 708 { 709 hammer2_pfsmount_t *pmp; 710 hammer2_mount_t *hmp; 711 712 pmp = MPTOPMP(mp); 713 hmp = MPTOHMP(mp); 714 715 mp->mnt_vstat.f_bsize = HAMMER2_PBUFSIZE; 716 mp->mnt_vstat.f_files = pmp->iroot->ip_data.inode_count + 717 pmp->iroot->delta_icount; 718 mp->mnt_vstat.f_ffree = 0; 719 mp->mnt_vstat.f_blocks = hmp->voldata.allocator_size / HAMMER2_PBUFSIZE; 720 mp->mnt_vstat.f_bfree = (hmp->voldata.allocator_size - 721 hmp->voldata.allocator_beg) / HAMMER2_PBUFSIZE; 722 mp->mnt_vstat.f_bavail = mp->mnt_vstat.f_bfree; 723 724 *sbp = mp->mnt_vstat; 725 return (0); 726 } 727 728 /* 729 * Sync the entire filesystem; this is called from the filesystem syncer 730 * process periodically and whenever a user calls sync(1) on the hammer 731 * mountpoint. 732 * 733 * Currently is actually called from the syncer! \o/ 734 * 735 * This task will have to snapshot the state of the dirty inode chain. 736 * From that, it will have to make sure all of the inodes on the dirty 737 * chain have IO initiated. We make sure that io is initiated for the root 738 * block. 739 * 740 * If waitfor is set, we wait for media to acknowledge the new rootblock. 741 * 742 * THINKS: side A vs side B, to have sync not stall all I/O? 743 */ 744 static 745 int 746 hammer2_vfs_sync(struct mount *mp, int waitfor) 747 { 748 struct hammer2_sync_info info; 749 hammer2_mount_t *hmp; 750 int flags; 751 int error; 752 int haswork; 753 754 hmp = MPTOHMP(mp); 755 756 flags = VMSC_GETVP; 757 if (waitfor & MNT_LAZY) 758 flags |= VMSC_ONEPASS; 759 760 info.error = 0; 761 info.waitfor = MNT_NOWAIT; 762 vmntvnodescan(mp, flags | VMSC_NOWAIT, 763 hammer2_sync_scan1, 764 hammer2_sync_scan2, &info); 765 if (info.error == 0 && (waitfor & MNT_WAIT)) { 766 info.waitfor = waitfor; 767 vmntvnodescan(mp, flags, 768 hammer2_sync_scan1, 769 hammer2_sync_scan2, &info); 770 771 } 772 #if 0 773 if (waitfor == MNT_WAIT) { 774 /* XXX */ 775 } else { 776 /* XXX */ 777 } 778 #endif 779 hammer2_chain_lock(hmp, &hmp->vchain, HAMMER2_RESOLVE_ALWAYS); 780 if (hmp->vchain.flags & (HAMMER2_CHAIN_MODIFIED | 781 HAMMER2_CHAIN_MODIFIED_AUX | 782 HAMMER2_CHAIN_SUBMODIFIED)) { 783 hammer2_chain_flush(hmp, &hmp->vchain, 0); 784 haswork = 1; 785 } else { 786 haswork = 0; 787 } 788 hammer2_chain_unlock(hmp, &hmp->vchain); 789 790 error = 0; 791 792 if ((waitfor & MNT_LAZY) == 0) { 793 waitfor = MNT_NOWAIT; 794 vn_lock(hmp->devvp, LK_EXCLUSIVE | LK_RETRY); 795 error = VOP_FSYNC(hmp->devvp, waitfor, 0); 796 vn_unlock(hmp->devvp); 797 } 798 799 if (error == 0 && haswork) { 800 struct buf *bp; 801 802 /* 803 * Synchronize the disk before flushing the volume 804 * header. 805 */ 806 bp = getpbuf(NULL); 807 bp->b_bio1.bio_offset = 0; 808 bp->b_bufsize = 0; 809 bp->b_bcount = 0; 810 bp->b_cmd = BUF_CMD_FLUSH; 811 bp->b_bio1.bio_done = biodone_sync; 812 bp->b_bio1.bio_flags |= BIO_SYNC; 813 vn_strategy(hmp->devvp, &bp->b_bio1); 814 biowait(&bp->b_bio1, "h2vol"); 815 relpbuf(bp, NULL); 816 817 /* 818 * Then we can safely flush the volume header. Volume 819 * data is locked separately to prevent ioctl functions 820 * from deadlocking due to a configuration issue. 821 */ 822 bp = getblk(hmp->devvp, 0, HAMMER2_PBUFSIZE, 0, 0); 823 hammer2_voldata_lock(hmp); 824 bcopy(&hmp->voldata, bp->b_data, HAMMER2_PBUFSIZE); 825 hammer2_voldata_unlock(hmp); 826 bawrite(bp); 827 } 828 return (error); 829 } 830 831 /* 832 * Sync passes. 833 * 834 * NOTE: We don't test SUBMODIFIED or MOVED here because the fsync code 835 * won't flush on those flags. The syncer code above will do a 836 * general meta-data flush globally that will catch these flags. 837 */ 838 static int 839 hammer2_sync_scan1(struct mount *mp, struct vnode *vp, void *data) 840 { 841 hammer2_inode_t *ip; 842 843 ip = VTOI(vp); 844 if (vp->v_type == VNON || ip == NULL || 845 ((ip->chain.flags & (HAMMER2_CHAIN_MODIFIED | 846 HAMMER2_CHAIN_DIRTYEMBED)) == 0 && 847 RB_EMPTY(&vp->v_rbdirty_tree))) { 848 return(-1); 849 } 850 return(0); 851 } 852 853 static int 854 hammer2_sync_scan2(struct mount *mp, struct vnode *vp, void *data) 855 { 856 struct hammer2_sync_info *info = data; 857 hammer2_inode_t *ip; 858 int error; 859 860 ip = VTOI(vp); 861 if (vp->v_type == VNON || vp->v_type == VBAD || 862 ((ip->chain.flags & (HAMMER2_CHAIN_MODIFIED | 863 HAMMER2_CHAIN_DIRTYEMBED)) == 0 && 864 RB_EMPTY(&vp->v_rbdirty_tree))) { 865 return(0); 866 } 867 error = VOP_FSYNC(vp, MNT_NOWAIT, 0); 868 if (error) 869 info->error = error; 870 return(0); 871 } 872 873 static 874 int 875 hammer2_vfs_vptofh(struct vnode *vp, struct fid *fhp) 876 { 877 return (0); 878 } 879 880 static 881 int 882 hammer2_vfs_fhtovp(struct mount *mp, struct vnode *rootvp, 883 struct fid *fhp, struct vnode **vpp) 884 { 885 return (0); 886 } 887 888 static 889 int 890 hammer2_vfs_checkexp(struct mount *mp, struct sockaddr *nam, 891 int *exflagsp, struct ucred **credanonp) 892 { 893 return (0); 894 } 895 896 /* 897 * Support code for hammer2_mount(). Read, verify, and install the volume 898 * header into the HMP 899 * 900 * XXX read four volhdrs and use the one with the highest TID whos CRC 901 * matches. 902 * 903 * XXX check iCRCs. 904 * 905 * XXX For filesystems w/ less than 4 volhdrs, make sure to not write to 906 * nonexistant locations. 907 * 908 * XXX Record selected volhdr and ring updates to each of 4 volhdrs 909 */ 910 static 911 int 912 hammer2_install_volume_header(hammer2_mount_t *hmp) 913 { 914 hammer2_volume_data_t *vd; 915 struct buf *bp; 916 hammer2_crc32_t crc0, crc, bcrc0, bcrc; 917 int error_reported; 918 int error; 919 int valid; 920 int i; 921 922 error_reported = 0; 923 error = 0; 924 valid = 0; 925 bp = NULL; 926 927 /* 928 * There are up to 4 copies of the volume header (syncs iterate 929 * between them so there is no single master). We don't trust the 930 * volu_size field so we don't know precisely how large the filesystem 931 * is, so depend on the OS to return an error if we go beyond the 932 * block device's EOF. 933 */ 934 for (i = 0; i < HAMMER2_NUM_VOLHDRS; i++) { 935 error = bread(hmp->devvp, i * HAMMER2_ZONE_BYTES64, 936 HAMMER2_VOLUME_BYTES, &bp); 937 if (error) { 938 brelse(bp); 939 bp = NULL; 940 continue; 941 } 942 943 vd = (struct hammer2_volume_data *) bp->b_data; 944 if ((vd->magic != HAMMER2_VOLUME_ID_HBO) && 945 (vd->magic != HAMMER2_VOLUME_ID_ABO)) { 946 brelse(bp); 947 bp = NULL; 948 continue; 949 } 950 951 if (vd->magic == HAMMER2_VOLUME_ID_ABO) { 952 /* XXX: Reversed-endianness filesystem */ 953 kprintf("hammer2: reverse-endian filesystem detected"); 954 brelse(bp); 955 bp = NULL; 956 continue; 957 } 958 959 crc = vd->icrc_sects[HAMMER2_VOL_ICRC_SECT0]; 960 crc0 = hammer2_icrc32(bp->b_data + HAMMER2_VOLUME_ICRC0_OFF, 961 HAMMER2_VOLUME_ICRC0_SIZE); 962 bcrc = vd->icrc_sects[HAMMER2_VOL_ICRC_SECT1]; 963 bcrc0 = hammer2_icrc32(bp->b_data + HAMMER2_VOLUME_ICRC1_OFF, 964 HAMMER2_VOLUME_ICRC1_SIZE); 965 if ((crc0 != crc) || (bcrc0 != bcrc)) { 966 kprintf("hammer2 volume header crc " 967 "mismatch copy #%d\t%08x %08x", 968 i, crc0, crc); 969 error_reported = 1; 970 brelse(bp); 971 bp = NULL; 972 continue; 973 } 974 if (valid == 0 || hmp->voldata.mirror_tid < vd->mirror_tid) { 975 valid = 1; 976 hmp->voldata = *vd; 977 } 978 brelse(bp); 979 bp = NULL; 980 } 981 if (valid) { 982 error = 0; 983 if (error_reported) 984 kprintf("hammer2: a valid volume header was found\n"); 985 } else { 986 error = EINVAL; 987 kprintf("hammer2: no valid volume headers found!\n"); 988 } 989 return (error); 990 } 991 992 /* 993 * Reconnect using the passed file pointer. The caller must ref the 994 * fp for us. 995 */ 996 void 997 hammer2_cluster_reconnect(hammer2_pfsmount_t *pmp, struct file *fp) 998 { 999 kdmsg_msg_t *msg; 1000 size_t name_len; 1001 1002 /* 1003 * Closes old comm descriptor, kills threads, cleans up 1004 * states, then installs the new descriptor and creates 1005 * new threads. 1006 */ 1007 kdmsg_iocom_reconnect(&pmp->iocom, fp, "hammer2"); 1008 1009 /* 1010 * Open a LNK_CONN transaction indicating that we want to take part 1011 * in the spanning tree algorithm. Filter explicitly on the PFS 1012 * info in the iroot. 1013 * 1014 * We do not transmit our (only) LNK_SPAN until the other end has 1015 * acknowledged our link connection request. 1016 * 1017 * The transaction remains fully open for the duration of the 1018 * connection. 1019 */ 1020 msg = kdmsg_msg_alloc(&pmp->iocom.router, DMSG_LNK_CONN | DMSGF_CREATE, 1021 hammer2_msg_conn_reply, pmp); 1022 msg->any.lnk_conn.pfs_clid = pmp->iroot->ip_data.pfs_clid; 1023 msg->any.lnk_conn.pfs_fsid = pmp->iroot->ip_data.pfs_fsid; 1024 msg->any.lnk_conn.pfs_type = pmp->iroot->ip_data.pfs_type; 1025 msg->any.lnk_conn.proto_version = DMSG_SPAN_PROTO_1; 1026 msg->any.lnk_conn.peer_type = pmp->hmp->voldata.peer_type; 1027 msg->any.lnk_conn.peer_mask = 1LLU << HAMMER2_PEER_HAMMER2; 1028 name_len = pmp->iroot->ip_data.name_len; 1029 if (name_len >= sizeof(msg->any.lnk_conn.fs_label)) 1030 name_len = sizeof(msg->any.lnk_conn.fs_label) - 1; 1031 bcopy(pmp->iroot->ip_data.filename, 1032 msg->any.lnk_conn.fs_label, 1033 name_len); 1034 pmp->iocom.conn_state = msg->state; 1035 msg->any.lnk_conn.fs_label[name_len] = 0; 1036 kdmsg_msg_write(msg); 1037 } 1038 1039 static int 1040 hammer2_msg_lnk_rcvmsg(kdmsg_msg_t *msg) 1041 { 1042 switch(msg->any.head.cmd & DMSGF_TRANSMASK) { 1043 case DMSG_LNK_CONN | DMSGF_CREATE: 1044 /* 1045 * connection request from peer, send a streaming 1046 * result of 0 (leave the transaction open). Transaction 1047 * is left open for the duration of the connection, we 1048 * let the kern_dmsg module clean it up on disconnect. 1049 */ 1050 kdmsg_msg_result(msg, 0); 1051 break; 1052 case DMSG_LNK_SPAN | DMSGF_CREATE: 1053 /* 1054 * Incoming SPAN - transaction create 1055 * 1056 * We do not have to respond right now. Instead we will 1057 * respond later on when the peer deletes their side. 1058 */ 1059 break; 1060 case DMSG_LNK_SPAN | DMSGF_DELETE: 1061 /* 1062 * Incoming SPAN - transaction delete. 1063 * 1064 * We must terminate our side so both ends can free up 1065 * their recorded state. 1066 */ 1067 /* fall through */ 1068 case DMSG_LNK_SPAN | DMSGF_CREATE | DMSGF_DELETE: 1069 /* 1070 * Incoming SPAN - transaction delete (degenerate span). 1071 * 1072 * We must terminate our side so both ends can free up 1073 * their recorded state. 1074 */ 1075 kdmsg_msg_reply(msg, 0); 1076 break; 1077 default: 1078 /* 1079 * Unsupported LNK message received. We only need to 1080 * reply if it's a transaction in order to close our end. 1081 * Ignore any one-way messages are any further messages 1082 * associated with the transaction. 1083 * 1084 * NOTE: This case also includes DMSG_LNK_ERROR messages 1085 * which might be one-way, replying to those would 1086 * cause an infinite ping-pong. 1087 */ 1088 if (msg->any.head.cmd & DMSGF_CREATE) 1089 kdmsg_msg_reply(msg, DMSG_ERR_NOSUPP); 1090 break; 1091 } 1092 return(0); 1093 } 1094 1095 /* 1096 * This function is called when the other end replies to our LNK_CONN 1097 * request. 1098 * 1099 * We transmit our (single) SPAN on the initial reply, leaving that 1100 * transaction open too. 1101 */ 1102 static int 1103 hammer2_msg_conn_reply(kdmsg_state_t *state, kdmsg_msg_t *msg) 1104 { 1105 hammer2_pfsmount_t *pmp = state->any.pmp; 1106 hammer2_mount_t *hmp = pmp->hmp; 1107 kdmsg_msg_t *rmsg; 1108 size_t name_len; 1109 int copyid; 1110 1111 kprintf("LNK_CONN REPLY RECEIVED CMD %08x\n", msg->any.head.cmd); 1112 1113 if (msg->any.head.cmd & DMSGF_CREATE) { 1114 kprintf("LNK_CONN transaction replied to, initiate SPAN\n"); 1115 rmsg = kdmsg_msg_alloc(&pmp->iocom.router, 1116 DMSG_LNK_SPAN | DMSGF_CREATE, 1117 hammer2_msg_span_reply, pmp); 1118 rmsg->any.lnk_span.pfs_clid = pmp->iroot->ip_data.pfs_clid; 1119 rmsg->any.lnk_span.pfs_fsid = pmp->iroot->ip_data.pfs_fsid; 1120 rmsg->any.lnk_span.pfs_type = pmp->iroot->ip_data.pfs_type; 1121 rmsg->any.lnk_span.peer_type = pmp->hmp->voldata.peer_type; 1122 rmsg->any.lnk_span.proto_version = DMSG_SPAN_PROTO_1; 1123 name_len = pmp->iroot->ip_data.name_len; 1124 if (name_len >= sizeof(rmsg->any.lnk_span.fs_label)) 1125 name_len = sizeof(rmsg->any.lnk_span.fs_label) - 1; 1126 bcopy(pmp->iroot->ip_data.filename, 1127 rmsg->any.lnk_span.fs_label, 1128 name_len); 1129 rmsg->any.lnk_span.fs_label[name_len] = 0; 1130 kdmsg_msg_write(rmsg); 1131 1132 /* 1133 * Dump the configuration stored in the volume header 1134 */ 1135 hammer2_voldata_lock(hmp); 1136 for (copyid = 0; copyid < HAMMER2_COPYID_COUNT; ++copyid) { 1137 if (hmp->voldata.copyinfo[copyid].copyid == 0) 1138 continue; 1139 hammer2_volconf_update(pmp, copyid); 1140 } 1141 hammer2_voldata_unlock(hmp); 1142 } 1143 if ((state->txcmd & DMSGF_DELETE) == 0 && 1144 (msg->any.head.cmd & DMSGF_DELETE)) { 1145 kprintf("LNK_CONN transaction terminated by remote\n"); 1146 pmp->iocom.conn_state = NULL; 1147 kdmsg_msg_reply(msg, 0); 1148 } 1149 return(0); 1150 } 1151 1152 /* 1153 * Remote terminated our span transaction. We have to terminate our side. 1154 */ 1155 static int 1156 hammer2_msg_span_reply(kdmsg_state_t *state, kdmsg_msg_t *msg) 1157 { 1158 /*hammer2_pfsmount_t *pmp = state->any.pmp;*/ 1159 1160 kprintf("SPAN REPLY - Our sent span was terminated by the " 1161 "remote %08x state %p\n", msg->any.head.cmd, state); 1162 if ((state->txcmd & DMSGF_DELETE) == 0 && 1163 (msg->any.head.cmd & DMSGF_DELETE)) { 1164 kdmsg_msg_reply(msg, 0); 1165 } 1166 return(0); 1167 } 1168 1169 /* 1170 * Volume configuration updates are passed onto the userland service 1171 * daemon via the open LNK_CONN transaction. 1172 */ 1173 void 1174 hammer2_volconf_update(hammer2_pfsmount_t *pmp, int index) 1175 { 1176 hammer2_mount_t *hmp = pmp->hmp; 1177 kdmsg_msg_t *msg; 1178 1179 /* XXX interlock against connection state termination */ 1180 kprintf("volconf update %p\n", pmp->iocom.conn_state); 1181 if (pmp->iocom.conn_state) { 1182 kprintf("TRANSMIT VOLCONF VIA OPEN CONN TRANSACTION\n"); 1183 msg = kdmsg_msg_alloc(&pmp->iocom.router, DMSG_LNK_VOLCONF, 1184 NULL, NULL); 1185 msg->state = pmp->iocom.conn_state; 1186 msg->any.lnk_volconf.copy = hmp->voldata.copyinfo[index]; 1187 msg->any.lnk_volconf.mediaid = hmp->voldata.fsid; 1188 msg->any.lnk_volconf.index = index; 1189 kdmsg_msg_write(msg); 1190 } 1191 } 1192