1 /*- 2 * Copyright (c) 2011-2014 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * by Daniel Flores (GSOC 2013 - mentored by Matthew Dillon, compression) 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in 16 * the documentation and/or other materials provided with the 17 * distribution. 18 * 3. Neither the name of The DragonFly Project nor the names of its 19 * contributors may be used to endorse or promote products derived 20 * from this software without specific, prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 26 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 27 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 31 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/kernel.h> 38 #include <sys/nlookup.h> 39 #include <sys/vnode.h> 40 #include <sys/mount.h> 41 #include <sys/fcntl.h> 42 #include <sys/buf.h> 43 #include <sys/uuid.h> 44 #include <sys/vfsops.h> 45 #include <sys/sysctl.h> 46 #include <sys/socket.h> 47 #include <sys/objcache.h> 48 49 #include <sys/proc.h> 50 #include <sys/namei.h> 51 #include <sys/mountctl.h> 52 #include <sys/dirent.h> 53 #include <sys/uio.h> 54 55 #include <sys/mutex.h> 56 #include <sys/mutex2.h> 57 58 #include "hammer2.h" 59 #include "hammer2_disk.h" 60 #include "hammer2_mount.h" 61 62 #include "hammer2.h" 63 #include "hammer2_lz4.h" 64 65 #include "zlib/hammer2_zlib.h" 66 67 #define REPORT_REFS_ERRORS 1 /* XXX remove me */ 68 69 MALLOC_DEFINE(M_OBJCACHE, "objcache", "Object Cache"); 70 71 struct hammer2_sync_info { 72 hammer2_trans_t trans; 73 int error; 74 int waitfor; 75 }; 76 77 TAILQ_HEAD(hammer2_mntlist, hammer2_mount); 78 static struct hammer2_mntlist hammer2_mntlist; 79 static struct lock hammer2_mntlk; 80 81 int hammer2_debug; 82 int hammer2_cluster_enable = 1; 83 int hammer2_hardlink_enable = 1; 84 int hammer2_flush_pipe = 100; 85 int hammer2_synchronous_flush = 1; 86 long hammer2_limit_dirty_chains; 87 long hammer2_iod_file_read; 88 long hammer2_iod_meta_read; 89 long hammer2_iod_indr_read; 90 long hammer2_iod_fmap_read; 91 long hammer2_iod_volu_read; 92 long hammer2_iod_file_write; 93 long hammer2_iod_meta_write; 94 long hammer2_iod_indr_write; 95 long hammer2_iod_fmap_write; 96 long hammer2_iod_volu_write; 97 long hammer2_ioa_file_read; 98 long hammer2_ioa_meta_read; 99 long hammer2_ioa_indr_read; 100 long hammer2_ioa_fmap_read; 101 long hammer2_ioa_volu_read; 102 long hammer2_ioa_fmap_write; 103 long hammer2_ioa_file_write; 104 long hammer2_ioa_meta_write; 105 long hammer2_ioa_indr_write; 106 long hammer2_ioa_volu_write; 107 108 MALLOC_DECLARE(C_BUFFER); 109 MALLOC_DEFINE(C_BUFFER, "compbuffer", "Buffer used for compression."); 110 111 MALLOC_DECLARE(D_BUFFER); 112 MALLOC_DEFINE(D_BUFFER, "decompbuffer", "Buffer used for decompression."); 113 114 SYSCTL_NODE(_vfs, OID_AUTO, hammer2, CTLFLAG_RW, 0, "HAMMER2 filesystem"); 115 116 SYSCTL_INT(_vfs_hammer2, OID_AUTO, debug, CTLFLAG_RW, 117 &hammer2_debug, 0, ""); 118 SYSCTL_INT(_vfs_hammer2, OID_AUTO, cluster_enable, CTLFLAG_RW, 119 &hammer2_cluster_enable, 0, ""); 120 SYSCTL_INT(_vfs_hammer2, OID_AUTO, hardlink_enable, CTLFLAG_RW, 121 &hammer2_hardlink_enable, 0, ""); 122 SYSCTL_INT(_vfs_hammer2, OID_AUTO, flush_pipe, CTLFLAG_RW, 123 &hammer2_flush_pipe, 0, ""); 124 SYSCTL_INT(_vfs_hammer2, OID_AUTO, synchronous_flush, CTLFLAG_RW, 125 &hammer2_synchronous_flush, 0, ""); 126 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, limit_dirty_chains, CTLFLAG_RW, 127 &hammer2_limit_dirty_chains, 0, ""); 128 129 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_file_read, CTLFLAG_RW, 130 &hammer2_iod_file_read, 0, ""); 131 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_meta_read, CTLFLAG_RW, 132 &hammer2_iod_meta_read, 0, ""); 133 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_indr_read, CTLFLAG_RW, 134 &hammer2_iod_indr_read, 0, ""); 135 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_fmap_read, CTLFLAG_RW, 136 &hammer2_iod_fmap_read, 0, ""); 137 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_volu_read, CTLFLAG_RW, 138 &hammer2_iod_volu_read, 0, ""); 139 140 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_file_write, CTLFLAG_RW, 141 &hammer2_iod_file_write, 0, ""); 142 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_meta_write, CTLFLAG_RW, 143 &hammer2_iod_meta_write, 0, ""); 144 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_indr_write, CTLFLAG_RW, 145 &hammer2_iod_indr_write, 0, ""); 146 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_fmap_write, CTLFLAG_RW, 147 &hammer2_iod_fmap_write, 0, ""); 148 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_volu_write, CTLFLAG_RW, 149 &hammer2_iod_volu_write, 0, ""); 150 151 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_file_read, CTLFLAG_RW, 152 &hammer2_ioa_file_read, 0, ""); 153 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_meta_read, CTLFLAG_RW, 154 &hammer2_ioa_meta_read, 0, ""); 155 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_indr_read, CTLFLAG_RW, 156 &hammer2_ioa_indr_read, 0, ""); 157 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_fmap_read, CTLFLAG_RW, 158 &hammer2_ioa_fmap_read, 0, ""); 159 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_volu_read, CTLFLAG_RW, 160 &hammer2_ioa_volu_read, 0, ""); 161 162 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_file_write, CTLFLAG_RW, 163 &hammer2_ioa_file_write, 0, ""); 164 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_meta_write, CTLFLAG_RW, 165 &hammer2_ioa_meta_write, 0, ""); 166 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_indr_write, CTLFLAG_RW, 167 &hammer2_ioa_indr_write, 0, ""); 168 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_fmap_write, CTLFLAG_RW, 169 &hammer2_ioa_fmap_write, 0, ""); 170 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_volu_write, CTLFLAG_RW, 171 &hammer2_ioa_volu_write, 0, ""); 172 173 static int hammer2_vfs_init(struct vfsconf *conf); 174 static int hammer2_vfs_uninit(struct vfsconf *vfsp); 175 static int hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data, 176 struct ucred *cred); 177 static int hammer2_remount(hammer2_mount_t *, struct mount *, char *, 178 struct vnode *, struct ucred *); 179 static int hammer2_recovery(hammer2_mount_t *hmp); 180 static int hammer2_vfs_unmount(struct mount *mp, int mntflags); 181 static int hammer2_vfs_root(struct mount *mp, struct vnode **vpp); 182 static int hammer2_vfs_statfs(struct mount *mp, struct statfs *sbp, 183 struct ucred *cred); 184 static int hammer2_vfs_statvfs(struct mount *mp, struct statvfs *sbp, 185 struct ucred *cred); 186 static int hammer2_vfs_vget(struct mount *mp, struct vnode *dvp, 187 ino_t ino, struct vnode **vpp); 188 static int hammer2_vfs_fhtovp(struct mount *mp, struct vnode *rootvp, 189 struct fid *fhp, struct vnode **vpp); 190 static int hammer2_vfs_vptofh(struct vnode *vp, struct fid *fhp); 191 static int hammer2_vfs_checkexp(struct mount *mp, struct sockaddr *nam, 192 int *exflagsp, struct ucred **credanonp); 193 194 static int hammer2_install_volume_header(hammer2_mount_t *hmp); 195 static int hammer2_sync_scan2(struct mount *mp, struct vnode *vp, void *data); 196 197 static void hammer2_write_thread(void *arg); 198 199 static void hammer2_vfs_unmount_hmp1(struct mount *mp, hammer2_mount_t *hmp); 200 static void hammer2_vfs_unmount_hmp2(struct mount *mp, hammer2_mount_t *hmp); 201 202 /* 203 * Functions for compression in threads, 204 * from hammer2_vnops.c 205 */ 206 static void hammer2_write_file_core(struct buf *bp, hammer2_trans_t *trans, 207 hammer2_inode_t *ip, 208 hammer2_inode_data_t *ipdata, 209 hammer2_cluster_t *cparent, 210 hammer2_key_t lbase, int ioflag, int pblksize, 211 int *errorp); 212 static void hammer2_compress_and_write(struct buf *bp, hammer2_trans_t *trans, 213 hammer2_inode_t *ip, 214 hammer2_inode_data_t *ipdata, 215 hammer2_cluster_t *cparent, 216 hammer2_key_t lbase, int ioflag, 217 int pblksize, int *errorp, int comp_algo); 218 static void hammer2_zero_check_and_write(struct buf *bp, 219 hammer2_trans_t *trans, hammer2_inode_t *ip, 220 hammer2_inode_data_t *ipdata, 221 hammer2_cluster_t *cparent, 222 hammer2_key_t lbase, 223 int ioflag, int pblksize, int *errorp); 224 static int test_block_zeros(const char *buf, size_t bytes); 225 static void zero_write(struct buf *bp, hammer2_trans_t *trans, 226 hammer2_inode_t *ip, 227 hammer2_inode_data_t *ipdata, 228 hammer2_cluster_t *cparent, 229 hammer2_key_t lbase, 230 int *errorp); 231 static void hammer2_write_bp(hammer2_cluster_t *cluster, struct buf *bp, 232 int ioflag, int pblksize, int *errorp); 233 234 static int hammer2_rcvdmsg(kdmsg_msg_t *msg); 235 static void hammer2_autodmsg(kdmsg_msg_t *msg); 236 237 238 /* 239 * HAMMER2 vfs operations. 240 */ 241 static struct vfsops hammer2_vfsops = { 242 .vfs_init = hammer2_vfs_init, 243 .vfs_uninit = hammer2_vfs_uninit, 244 .vfs_sync = hammer2_vfs_sync, 245 .vfs_mount = hammer2_vfs_mount, 246 .vfs_unmount = hammer2_vfs_unmount, 247 .vfs_root = hammer2_vfs_root, 248 .vfs_statfs = hammer2_vfs_statfs, 249 .vfs_statvfs = hammer2_vfs_statvfs, 250 .vfs_vget = hammer2_vfs_vget, 251 .vfs_vptofh = hammer2_vfs_vptofh, 252 .vfs_fhtovp = hammer2_vfs_fhtovp, 253 .vfs_checkexp = hammer2_vfs_checkexp 254 }; 255 256 MALLOC_DEFINE(M_HAMMER2, "HAMMER2-mount", ""); 257 258 VFS_SET(hammer2_vfsops, hammer2, 0); 259 MODULE_VERSION(hammer2, 1); 260 261 static 262 int 263 hammer2_vfs_init(struct vfsconf *conf) 264 { 265 static struct objcache_malloc_args margs_read; 266 static struct objcache_malloc_args margs_write; 267 268 int error; 269 270 error = 0; 271 272 if (HAMMER2_BLOCKREF_BYTES != sizeof(struct hammer2_blockref)) 273 error = EINVAL; 274 if (HAMMER2_INODE_BYTES != sizeof(struct hammer2_inode_data)) 275 error = EINVAL; 276 if (HAMMER2_VOLUME_BYTES != sizeof(struct hammer2_volume_data)) 277 error = EINVAL; 278 279 if (error) 280 kprintf("HAMMER2 structure size mismatch; cannot continue.\n"); 281 282 margs_read.objsize = 65536; 283 margs_read.mtype = D_BUFFER; 284 285 margs_write.objsize = 32768; 286 margs_write.mtype = C_BUFFER; 287 288 cache_buffer_read = objcache_create(margs_read.mtype->ks_shortdesc, 289 0, 1, NULL, NULL, NULL, objcache_malloc_alloc, 290 objcache_malloc_free, &margs_read); 291 cache_buffer_write = objcache_create(margs_write.mtype->ks_shortdesc, 292 0, 1, NULL, NULL, NULL, objcache_malloc_alloc, 293 objcache_malloc_free, &margs_write); 294 295 lockinit(&hammer2_mntlk, "mntlk", 0, 0); 296 TAILQ_INIT(&hammer2_mntlist); 297 298 hammer2_limit_dirty_chains = desiredvnodes / 10; 299 300 return (error); 301 } 302 303 static 304 int 305 hammer2_vfs_uninit(struct vfsconf *vfsp __unused) 306 { 307 objcache_destroy(cache_buffer_read); 308 objcache_destroy(cache_buffer_write); 309 return 0; 310 } 311 312 /* 313 * Mount or remount HAMMER2 fileystem from physical media 314 * 315 * mountroot 316 * mp mount point structure 317 * path NULL 318 * data <unused> 319 * cred <unused> 320 * 321 * mount 322 * mp mount point structure 323 * path path to mount point 324 * data pointer to argument structure in user space 325 * volume volume path (device@LABEL form) 326 * hflags user mount flags 327 * cred user credentials 328 * 329 * RETURNS: 0 Success 330 * !0 error number 331 */ 332 static 333 int 334 hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data, 335 struct ucred *cred) 336 { 337 struct hammer2_mount_info info; 338 hammer2_pfsmount_t *pmp; 339 hammer2_mount_t *hmp; 340 hammer2_key_t key_next; 341 hammer2_key_t key_dummy; 342 hammer2_key_t lhc; 343 struct vnode *devvp; 344 struct nlookupdata nd; 345 hammer2_chain_t *parent; 346 hammer2_chain_t *rchain; 347 hammer2_chain_t *schain; 348 hammer2_cluster_t *cluster; 349 hammer2_cluster_t *cparent; 350 struct file *fp; 351 char devstr[MNAMELEN]; 352 size_t size; 353 size_t done; 354 char *dev; 355 char *label; 356 int ronly = 1; 357 int error; 358 int cache_index; 359 int ddflag; 360 int i; 361 362 hmp = NULL; 363 pmp = NULL; 364 dev = NULL; 365 label = NULL; 366 devvp = NULL; 367 cache_index = -1; 368 369 kprintf("hammer2_mount\n"); 370 371 if (path == NULL) { 372 /* 373 * Root mount 374 */ 375 bzero(&info, sizeof(info)); 376 info.cluster_fd = -1; 377 return (EOPNOTSUPP); 378 } else { 379 /* 380 * Non-root mount or updating a mount 381 */ 382 error = copyin(data, &info, sizeof(info)); 383 if (error) 384 return (error); 385 386 error = copyinstr(info.volume, devstr, MNAMELEN - 1, &done); 387 if (error) 388 return (error); 389 390 /* Extract device and label */ 391 dev = devstr; 392 label = strchr(devstr, '@'); 393 if (label == NULL || 394 ((label + 1) - dev) > done) { 395 return (EINVAL); 396 } 397 *label = '\0'; 398 label++; 399 if (*label == '\0') 400 return (EINVAL); 401 402 if (mp->mnt_flag & MNT_UPDATE) { 403 /* Update mount */ 404 /* HAMMER2 implements NFS export via mountctl */ 405 pmp = MPTOPMP(mp); 406 for (i = 0; i < pmp->cluster.nchains; ++i) { 407 hmp = pmp->cluster.array[i]->hmp; 408 devvp = hmp->devvp; 409 error = hammer2_remount(hmp, mp, path, 410 devvp, cred); 411 if (error) 412 break; 413 } 414 hammer2_inode_install_hidden(pmp); 415 416 return error; 417 } 418 } 419 420 /* 421 * PFS mount 422 * 423 * Lookup name and verify it refers to a block device. 424 */ 425 error = nlookup_init(&nd, dev, UIO_SYSSPACE, NLC_FOLLOW); 426 if (error == 0) 427 error = nlookup(&nd); 428 if (error == 0) 429 error = cache_vref(&nd.nl_nch, nd.nl_cred, &devvp); 430 nlookup_done(&nd); 431 432 if (error == 0) { 433 if (vn_isdisk(devvp, &error)) 434 error = vfs_mountedon(devvp); 435 } 436 437 /* 438 * Determine if the device has already been mounted. After this 439 * check hmp will be non-NULL if we are doing the second or more 440 * hammer2 mounts from the same device. 441 */ 442 lockmgr(&hammer2_mntlk, LK_EXCLUSIVE); 443 TAILQ_FOREACH(hmp, &hammer2_mntlist, mntentry) { 444 if (hmp->devvp == devvp) 445 break; 446 } 447 448 /* 449 * Open the device if this isn't a secondary mount and construct 450 * the H2 device mount (hmp). 451 */ 452 if (hmp == NULL) { 453 if (error == 0 && vcount(devvp) > 0) 454 error = EBUSY; 455 456 /* 457 * Now open the device 458 */ 459 if (error == 0) { 460 ronly = ((mp->mnt_flag & MNT_RDONLY) != 0); 461 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 462 error = vinvalbuf(devvp, V_SAVE, 0, 0); 463 if (error == 0) { 464 error = VOP_OPEN(devvp, 465 ronly ? FREAD : FREAD | FWRITE, 466 FSCRED, NULL); 467 } 468 vn_unlock(devvp); 469 } 470 if (error && devvp) { 471 vrele(devvp); 472 devvp = NULL; 473 } 474 if (error) { 475 lockmgr(&hammer2_mntlk, LK_RELEASE); 476 return error; 477 } 478 hmp = kmalloc(sizeof(*hmp), M_HAMMER2, M_WAITOK | M_ZERO); 479 hmp->ronly = ronly; 480 hmp->devvp = devvp; 481 kmalloc_create(&hmp->mchain, "HAMMER2-chains"); 482 TAILQ_INSERT_TAIL(&hammer2_mntlist, hmp, mntentry); 483 RB_INIT(&hmp->iotree); 484 485 lockinit(&hmp->alloclk, "h2alloc", 0, 0); 486 lockinit(&hmp->voldatalk, "voldata", 0, LK_CANRECURSE); 487 TAILQ_INIT(&hmp->transq); 488 489 /* 490 * vchain setup. vchain.data is embedded. 491 * vchain.refs is initialized and will never drop to 0. 492 * 493 * NOTE! voldata is not yet loaded. 494 */ 495 hmp->vchain.hmp = hmp; 496 hmp->vchain.refs = 1; 497 hmp->vchain.data = (void *)&hmp->voldata; 498 hmp->vchain.bref.type = HAMMER2_BREF_TYPE_VOLUME; 499 hmp->vchain.bref.data_off = 0 | HAMMER2_PBUFRADIX; 500 hmp->vchain.delete_tid = HAMMER2_MAX_TID; 501 502 hammer2_chain_core_alloc(NULL, &hmp->vchain, NULL); 503 /* hmp->vchain.u.xxx is left NULL */ 504 505 /* 506 * fchain setup. fchain.data is embedded. 507 * fchain.refs is initialized and will never drop to 0. 508 * 509 * The data is not used but needs to be initialized to 510 * pass assertion muster. We use this chain primarily 511 * as a placeholder for the freemap's top-level RBTREE 512 * so it does not interfere with the volume's topology 513 * RBTREE. 514 */ 515 hmp->fchain.hmp = hmp; 516 hmp->fchain.refs = 1; 517 hmp->fchain.data = (void *)&hmp->voldata.freemap_blockset; 518 hmp->fchain.bref.type = HAMMER2_BREF_TYPE_FREEMAP; 519 hmp->fchain.bref.data_off = 0 | HAMMER2_PBUFRADIX; 520 hmp->fchain.bref.methods = 521 HAMMER2_ENC_CHECK(HAMMER2_CHECK_FREEMAP) | 522 HAMMER2_ENC_COMP(HAMMER2_COMP_NONE); 523 hmp->fchain.delete_tid = HAMMER2_MAX_TID; 524 525 hammer2_chain_core_alloc(NULL, &hmp->fchain, NULL); 526 /* hmp->fchain.u.xxx is left NULL */ 527 528 /* 529 * Install the volume header and initialize fields from 530 * voldata. 531 */ 532 error = hammer2_install_volume_header(hmp); 533 if (error) { 534 ++hmp->pmp_count; 535 hammer2_vfs_unmount_hmp1(mp, hmp); 536 hammer2_vfs_unmount_hmp2(mp, hmp); 537 hammer2_vfs_unmount(mp, MNT_FORCE); 538 return error; 539 } 540 541 /* 542 * Really important to get these right or flush will get 543 * confused. 544 */ 545 hmp->vchain.bref.mirror_tid = hmp->voldata.mirror_tid; 546 hmp->vchain.modify_tid = hmp->voldata.mirror_tid; 547 hmp->vchain.update_lo = hmp->voldata.mirror_tid; 548 hmp->fchain.bref.mirror_tid = hmp->voldata.freemap_tid; 549 hmp->fchain.modify_tid = hmp->voldata.freemap_tid; 550 hmp->fchain.update_lo = hmp->voldata.freemap_tid; 551 552 /* 553 * First locate the super-root inode, which is key 0 554 * relative to the volume header's blockset. 555 * 556 * Then locate the root inode by scanning the directory keyspace 557 * represented by the label. 558 */ 559 parent = hammer2_chain_lookup_init(&hmp->vchain, 0); 560 schain = hammer2_chain_lookup(&parent, &key_dummy, 561 HAMMER2_SROOT_KEY, HAMMER2_SROOT_KEY, 562 &cache_index, 0, &ddflag); 563 hammer2_chain_lookup_done(parent); 564 if (schain == NULL) { 565 kprintf("hammer2_mount: invalid super-root\n"); 566 ++hmp->pmp_count; 567 hammer2_vfs_unmount_hmp1(mp, hmp); 568 hammer2_vfs_unmount_hmp2(mp, hmp); 569 hammer2_vfs_unmount(mp, MNT_FORCE); 570 return EINVAL; 571 } 572 573 /* 574 * NOTE: inode_get sucks up schain's lock. 575 */ 576 atomic_set_int(&schain->flags, HAMMER2_CHAIN_PFSROOT); 577 cluster = hammer2_cluster_from_chain(schain); 578 hmp->sroot = hammer2_inode_get(NULL, NULL, cluster); 579 hammer2_inode_ref(hmp->sroot); 580 hammer2_inode_unlock_ex(hmp->sroot, cluster); 581 schain = NULL; 582 /* leave hmp->sroot with one ref */ 583 584 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 585 error = hammer2_recovery(hmp); 586 /* XXX do something with error */ 587 } 588 } 589 590 /* 591 * Block device opened successfully, finish initializing the 592 * mount structure. 593 * 594 * From this point on we have to call hammer2_unmount() on failure. 595 */ 596 pmp = kmalloc(sizeof(*pmp), M_HAMMER2, M_WAITOK | M_ZERO); 597 598 kmalloc_create(&pmp->minode, "HAMMER2-inodes"); 599 kmalloc_create(&pmp->mmsg, "HAMMER2-pfsmsg"); 600 lockinit(&pmp->lock, "pfslk", 0, 0); 601 spin_init(&pmp->inum_spin); 602 RB_INIT(&pmp->inum_tree); 603 TAILQ_INIT(&pmp->unlinkq); 604 spin_init(&pmp->unlinkq_spin); 605 pmp->cluster.flags = HAMMER2_CLUSTER_PFS; 606 607 kdmsg_iocom_init(&pmp->iocom, pmp, 608 KDMSG_IOCOMF_AUTOCONN | 609 KDMSG_IOCOMF_AUTOSPAN | 610 KDMSG_IOCOMF_AUTOCIRC, 611 pmp->mmsg, hammer2_rcvdmsg); 612 613 ccms_domain_init(&pmp->ccms_dom); 614 ++hmp->pmp_count; 615 lockmgr(&hammer2_mntlk, LK_RELEASE); 616 kprintf("hammer2_mount hmp=%p pmp=%p pmpcnt=%d\n", 617 hmp, pmp, hmp->pmp_count); 618 619 mp->mnt_flag = MNT_LOCAL; 620 mp->mnt_kern_flag |= MNTK_ALL_MPSAFE; /* all entry pts are SMP */ 621 mp->mnt_kern_flag |= MNTK_THR_SYNC; /* new vsyncscan semantics */ 622 623 /* 624 * required mount structure initializations 625 */ 626 mp->mnt_stat.f_iosize = HAMMER2_PBUFSIZE; 627 mp->mnt_stat.f_bsize = HAMMER2_PBUFSIZE; 628 629 mp->mnt_vstat.f_frsize = HAMMER2_PBUFSIZE; 630 mp->mnt_vstat.f_bsize = HAMMER2_PBUFSIZE; 631 632 /* 633 * Optional fields 634 */ 635 mp->mnt_iosize_max = MAXPHYS; 636 mp->mnt_data = (qaddr_t)pmp; 637 pmp->mp = mp; 638 639 /* 640 * Lookup mount point under the media-localized super-root. 641 */ 642 cparent = hammer2_inode_lock_ex(hmp->sroot); 643 lhc = hammer2_dirhash(label, strlen(label)); 644 cluster = hammer2_cluster_lookup(cparent, &key_next, 645 lhc, lhc + HAMMER2_DIRHASH_LOMASK, 646 0, &ddflag); 647 while (cluster) { 648 if (hammer2_cluster_type(cluster) == HAMMER2_BREF_TYPE_INODE && 649 strcmp(label, 650 hammer2_cluster_data(cluster)->ipdata.filename) == 0) { 651 break; 652 } 653 cluster = hammer2_cluster_next(cparent, cluster, &key_next, 654 key_next, 655 lhc + HAMMER2_DIRHASH_LOMASK, 0); 656 } 657 hammer2_inode_unlock_ex(hmp->sroot, cparent); 658 659 if (cluster == NULL) { 660 kprintf("hammer2_mount: PFS label not found\n"); 661 hammer2_vfs_unmount_hmp1(mp, hmp); 662 hammer2_vfs_unmount_hmp2(mp, hmp); 663 hammer2_vfs_unmount(mp, MNT_FORCE); 664 return EINVAL; 665 } 666 667 for (i = 0; i < cluster->nchains; ++i) { 668 rchain = cluster->array[i]; 669 if (rchain->flags & HAMMER2_CHAIN_MOUNTED) { 670 kprintf("hammer2_mount: PFS label already mounted!\n"); 671 hammer2_cluster_unlock(cluster); 672 hammer2_vfs_unmount_hmp1(mp, hmp); 673 hammer2_vfs_unmount_hmp2(mp, hmp); 674 hammer2_vfs_unmount(mp, MNT_FORCE); 675 return EBUSY; 676 } 677 #if 0 678 if (rchain->flags & HAMMER2_CHAIN_RECYCLE) { 679 kprintf("hammer2_mount: PFS label is recycling\n"); 680 hammer2_cluster_unlock(cluster); 681 hammer2_vfs_unmount_hmp1(mp, hmp); 682 hammer2_vfs_unmount_hmp2(mp, hmp); 683 hammer2_vfs_unmount(mp, MNT_FORCE); 684 return EBUSY; 685 } 686 #endif 687 } 688 689 /* 690 * After this point hammer2_vfs_unmount() has visibility on hmp 691 * and manual hmp1/hmp2 calls are not needed on fatal errors. 692 */ 693 pmp->cluster = *cluster; 694 KKASSERT(pmp->cluster.refs == 1); 695 for (i = 0; i < cluster->nchains; ++i) { 696 rchain = cluster->array[i]; 697 KKASSERT(rchain->pmp == NULL); /* tracking pmp for rchain */ 698 rchain->pmp = pmp; 699 atomic_set_int(&rchain->flags, HAMMER2_CHAIN_MOUNTED); 700 hammer2_chain_ref(rchain); /* ref for pmp->cluster */ 701 } 702 pmp->iroot = hammer2_inode_get(pmp, NULL, cluster); 703 hammer2_inode_ref(pmp->iroot); /* ref for pmp->iroot */ 704 hammer2_inode_unlock_ex(pmp->iroot, cluster); 705 706 kprintf("iroot %p\n", pmp->iroot); 707 708 /* 709 * The logical file buffer bio write thread handles things 710 * like physical block assignment and compression. 711 */ 712 mtx_init(&pmp->wthread_mtx); 713 bioq_init(&pmp->wthread_bioq); 714 pmp->wthread_destroy = 0; 715 lwkt_create(hammer2_write_thread, pmp, 716 &pmp->wthread_td, NULL, 0, -1, "hwrite-%s", label); 717 718 /* 719 * Ref the cluster management messaging descriptor. The mount 720 * program deals with the other end of the communications pipe. 721 */ 722 fp = holdfp(curproc->p_fd, info.cluster_fd, -1); 723 if (fp == NULL) { 724 kprintf("hammer2_mount: bad cluster_fd!\n"); 725 hammer2_vfs_unmount(mp, MNT_FORCE); 726 return EBADF; 727 } 728 hammer2_cluster_reconnect(pmp, fp); 729 730 /* 731 * With the cluster operational install ihidden. 732 */ 733 hammer2_inode_install_hidden(pmp); 734 735 /* 736 * Finish setup 737 */ 738 vfs_getnewfsid(mp); 739 vfs_add_vnodeops(mp, &hammer2_vnode_vops, &mp->mnt_vn_norm_ops); 740 vfs_add_vnodeops(mp, &hammer2_spec_vops, &mp->mnt_vn_spec_ops); 741 vfs_add_vnodeops(mp, &hammer2_fifo_vops, &mp->mnt_vn_fifo_ops); 742 743 copyinstr(info.volume, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size); 744 bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); 745 bzero(mp->mnt_stat.f_mntonname, sizeof(mp->mnt_stat.f_mntonname)); 746 copyinstr(path, mp->mnt_stat.f_mntonname, 747 sizeof(mp->mnt_stat.f_mntonname) - 1, 748 &size); 749 750 /* 751 * Initial statfs to prime mnt_stat. 752 */ 753 hammer2_vfs_statfs(mp, &mp->mnt_stat, cred); 754 755 return 0; 756 } 757 758 /* 759 * Handle bioq for strategy write 760 */ 761 static 762 void 763 hammer2_write_thread(void *arg) 764 { 765 hammer2_pfsmount_t *pmp; 766 struct bio *bio; 767 struct buf *bp; 768 hammer2_trans_t trans; 769 struct vnode *vp; 770 hammer2_inode_t *ip; 771 hammer2_cluster_t *cparent; 772 hammer2_inode_data_t *ipdata; 773 hammer2_key_t lbase; 774 int lblksize; 775 int pblksize; 776 int error; 777 778 pmp = arg; 779 780 mtx_lock(&pmp->wthread_mtx); 781 while (pmp->wthread_destroy == 0) { 782 if (bioq_first(&pmp->wthread_bioq) == NULL) { 783 mtxsleep(&pmp->wthread_bioq, &pmp->wthread_mtx, 784 0, "h2bioqw", 0); 785 } 786 cparent = NULL; 787 788 hammer2_trans_init(&trans, pmp, NULL, HAMMER2_TRANS_BUFCACHE); 789 790 while ((bio = bioq_takefirst(&pmp->wthread_bioq)) != NULL) { 791 /* 792 * dummy bio for synchronization. The transaction 793 * must be reinitialized. 794 */ 795 if (bio->bio_buf == NULL) { 796 bio->bio_flags |= BIO_DONE; 797 wakeup(bio); 798 hammer2_trans_done(&trans); 799 hammer2_trans_init(&trans, pmp, NULL, 800 HAMMER2_TRANS_BUFCACHE); 801 continue; 802 } 803 804 /* 805 * else normal bio processing 806 */ 807 mtx_unlock(&pmp->wthread_mtx); 808 809 hammer2_lwinprog_drop(pmp); 810 811 error = 0; 812 bp = bio->bio_buf; 813 vp = bp->b_vp; 814 ip = VTOI(vp); 815 816 /* 817 * Inode is modified, flush size and mtime changes 818 * to ensure that the file size remains consistent 819 * with the buffers being flushed. 820 * 821 * NOTE: The inode_fsync() call only flushes the 822 * inode's meta-data state, it doesn't try 823 * to flush underlying buffers or chains. 824 */ 825 cparent = hammer2_inode_lock_ex(ip); 826 if (ip->flags & (HAMMER2_INODE_RESIZED | 827 HAMMER2_INODE_MTIME)) { 828 hammer2_inode_fsync(&trans, ip, cparent); 829 } 830 ipdata = hammer2_cluster_modify_ip(&trans, ip, 831 cparent, 0); 832 lblksize = hammer2_calc_logical(ip, bio->bio_offset, 833 &lbase, NULL); 834 pblksize = hammer2_calc_physical(ip, ipdata, lbase); 835 hammer2_write_file_core(bp, &trans, ip, ipdata, 836 cparent, 837 lbase, IO_ASYNC, 838 pblksize, &error); 839 hammer2_inode_unlock_ex(ip, cparent); 840 if (error) { 841 kprintf("hammer2: error in buffer write\n"); 842 bp->b_flags |= B_ERROR; 843 bp->b_error = EIO; 844 } 845 biodone(bio); 846 mtx_lock(&pmp->wthread_mtx); 847 } 848 hammer2_trans_done(&trans); 849 } 850 pmp->wthread_destroy = -1; 851 wakeup(&pmp->wthread_destroy); 852 853 mtx_unlock(&pmp->wthread_mtx); 854 } 855 856 void 857 hammer2_bioq_sync(hammer2_pfsmount_t *pmp) 858 { 859 struct bio sync_bio; 860 861 bzero(&sync_bio, sizeof(sync_bio)); /* dummy with no bio_buf */ 862 mtx_lock(&pmp->wthread_mtx); 863 if (pmp->wthread_destroy == 0 && 864 TAILQ_FIRST(&pmp->wthread_bioq.queue)) { 865 bioq_insert_tail(&pmp->wthread_bioq, &sync_bio); 866 while ((sync_bio.bio_flags & BIO_DONE) == 0) 867 mtxsleep(&sync_bio, &pmp->wthread_mtx, 0, "h2bioq", 0); 868 } 869 mtx_unlock(&pmp->wthread_mtx); 870 } 871 872 /* 873 * Return a chain suitable for I/O, creating the chain if necessary 874 * and assigning its physical block. 875 */ 876 static 877 hammer2_cluster_t * 878 hammer2_assign_physical(hammer2_trans_t *trans, 879 hammer2_inode_t *ip, hammer2_cluster_t *cparent, 880 hammer2_key_t lbase, int pblksize, int *errorp) 881 { 882 hammer2_cluster_t *cluster; 883 hammer2_cluster_t *dparent; 884 hammer2_key_t key_dummy; 885 int pradix = hammer2_getradix(pblksize); 886 int ddflag; 887 888 /* 889 * Locate the chain associated with lbase, return a locked chain. 890 * However, do not instantiate any data reference (which utilizes a 891 * device buffer) because we will be using direct IO via the 892 * logical buffer cache buffer. 893 */ 894 *errorp = 0; 895 KKASSERT(pblksize >= HAMMER2_MIN_ALLOC); 896 retry: 897 dparent = hammer2_cluster_lookup_init(cparent, 0); 898 cluster = hammer2_cluster_lookup(dparent, &key_dummy, 899 lbase, lbase, 900 HAMMER2_LOOKUP_NODATA, &ddflag); 901 902 if (cluster == NULL) { 903 /* 904 * We found a hole, create a new chain entry. 905 * 906 * NOTE: DATA chains are created without device backing 907 * store (nor do we want any). 908 */ 909 *errorp = hammer2_cluster_create(trans, dparent, &cluster, 910 lbase, HAMMER2_PBUFRADIX, 911 HAMMER2_BREF_TYPE_DATA, 912 pblksize); 913 if (cluster == NULL) { 914 hammer2_cluster_lookup_done(dparent); 915 panic("hammer2_cluster_create: par=%p error=%d\n", 916 dparent->focus, *errorp); 917 goto retry; 918 } 919 /*ip->delta_dcount += pblksize;*/ 920 } else { 921 switch (hammer2_cluster_type(cluster)) { 922 case HAMMER2_BREF_TYPE_INODE: 923 /* 924 * The data is embedded in the inode. The 925 * caller is responsible for marking the inode 926 * modified and copying the data to the embedded 927 * area. 928 */ 929 break; 930 case HAMMER2_BREF_TYPE_DATA: 931 if (hammer2_cluster_bytes(cluster) != pblksize) { 932 hammer2_cluster_resize(trans, ip, 933 dparent, cluster, 934 pradix, 935 HAMMER2_MODIFY_OPTDATA); 936 } 937 hammer2_cluster_modify(trans, cluster, 938 HAMMER2_MODIFY_OPTDATA); 939 break; 940 default: 941 panic("hammer2_assign_physical: bad type"); 942 /* NOT REACHED */ 943 break; 944 } 945 } 946 947 /* 948 * Cleanup. If cluster wound up being the inode itself, i.e. 949 * the DIRECTDATA case for offset 0, then we need to update cparent. 950 * The caller expects cparent to not become stale. 951 */ 952 hammer2_cluster_lookup_done(dparent); 953 /* dparent = NULL; safety */ 954 if (cluster && ddflag) 955 hammer2_cluster_replace_locked(cparent, cluster); 956 return (cluster); 957 } 958 959 /* 960 * From hammer2_vnops.c. 961 * The core write function which determines which path to take 962 * depending on compression settings. 963 */ 964 static 965 void 966 hammer2_write_file_core(struct buf *bp, hammer2_trans_t *trans, 967 hammer2_inode_t *ip, hammer2_inode_data_t *ipdata, 968 hammer2_cluster_t *cparent, 969 hammer2_key_t lbase, int ioflag, int pblksize, 970 int *errorp) 971 { 972 hammer2_cluster_t *cluster; 973 974 switch(HAMMER2_DEC_COMP(ipdata->comp_algo)) { 975 case HAMMER2_COMP_NONE: 976 /* 977 * We have to assign physical storage to the buffer 978 * we intend to dirty or write now to avoid deadlocks 979 * in the strategy code later. 980 * 981 * This can return NOOFFSET for inode-embedded data. 982 * The strategy code will take care of it in that case. 983 */ 984 cluster = hammer2_assign_physical(trans, ip, cparent, 985 lbase, pblksize, 986 errorp); 987 hammer2_write_bp(cluster, bp, ioflag, pblksize, errorp); 988 if (cluster) 989 hammer2_cluster_unlock(cluster); 990 break; 991 case HAMMER2_COMP_AUTOZERO: 992 /* 993 * Check for zero-fill only 994 */ 995 hammer2_zero_check_and_write(bp, trans, ip, 996 ipdata, cparent, lbase, 997 ioflag, pblksize, errorp); 998 break; 999 case HAMMER2_COMP_LZ4: 1000 case HAMMER2_COMP_ZLIB: 1001 default: 1002 /* 1003 * Check for zero-fill and attempt compression. 1004 */ 1005 hammer2_compress_and_write(bp, trans, ip, 1006 ipdata, cparent, 1007 lbase, ioflag, 1008 pblksize, errorp, 1009 ipdata->comp_algo); 1010 break; 1011 } 1012 } 1013 1014 /* 1015 * Generic function that will perform the compression in compression 1016 * write path. The compression algorithm is determined by the settings 1017 * obtained from inode. 1018 */ 1019 static 1020 void 1021 hammer2_compress_and_write(struct buf *bp, hammer2_trans_t *trans, 1022 hammer2_inode_t *ip, hammer2_inode_data_t *ipdata, 1023 hammer2_cluster_t *cparent, 1024 hammer2_key_t lbase, int ioflag, int pblksize, 1025 int *errorp, int comp_algo) 1026 { 1027 hammer2_cluster_t *cluster; 1028 hammer2_chain_t *chain; 1029 int comp_size; 1030 int comp_block_size; 1031 int i; 1032 char *comp_buffer; 1033 1034 if (test_block_zeros(bp->b_data, pblksize)) { 1035 zero_write(bp, trans, ip, ipdata, cparent, lbase, errorp); 1036 return; 1037 } 1038 1039 comp_size = 0; 1040 comp_buffer = NULL; 1041 1042 KKASSERT(pblksize / 2 <= 32768); 1043 1044 if (ip->comp_heuristic < 8 || (ip->comp_heuristic & 7) == 0) { 1045 z_stream strm_compress; 1046 int comp_level; 1047 int ret; 1048 1049 switch(HAMMER2_DEC_COMP(comp_algo)) { 1050 case HAMMER2_COMP_LZ4: 1051 comp_buffer = objcache_get(cache_buffer_write, 1052 M_INTWAIT); 1053 comp_size = LZ4_compress_limitedOutput( 1054 bp->b_data, 1055 &comp_buffer[sizeof(int)], 1056 pblksize, 1057 pblksize / 2 - sizeof(int)); 1058 /* 1059 * We need to prefix with the size, LZ4 1060 * doesn't do it for us. Add the related 1061 * overhead. 1062 */ 1063 *(int *)comp_buffer = comp_size; 1064 if (comp_size) 1065 comp_size += sizeof(int); 1066 break; 1067 case HAMMER2_COMP_ZLIB: 1068 comp_level = HAMMER2_DEC_LEVEL(comp_algo); 1069 if (comp_level == 0) 1070 comp_level = 6; /* default zlib compression */ 1071 else if (comp_level < 6) 1072 comp_level = 6; 1073 else if (comp_level > 9) 1074 comp_level = 9; 1075 ret = deflateInit(&strm_compress, comp_level); 1076 if (ret != Z_OK) { 1077 kprintf("HAMMER2 ZLIB: fatal error " 1078 "on deflateInit.\n"); 1079 } 1080 1081 comp_buffer = objcache_get(cache_buffer_write, 1082 M_INTWAIT); 1083 strm_compress.next_in = bp->b_data; 1084 strm_compress.avail_in = pblksize; 1085 strm_compress.next_out = comp_buffer; 1086 strm_compress.avail_out = pblksize / 2; 1087 ret = deflate(&strm_compress, Z_FINISH); 1088 if (ret == Z_STREAM_END) { 1089 comp_size = pblksize / 2 - 1090 strm_compress.avail_out; 1091 } else { 1092 comp_size = 0; 1093 } 1094 ret = deflateEnd(&strm_compress); 1095 break; 1096 default: 1097 kprintf("Error: Unknown compression method.\n"); 1098 kprintf("Comp_method = %d.\n", comp_algo); 1099 break; 1100 } 1101 } 1102 1103 if (comp_size == 0) { 1104 /* 1105 * compression failed or turned off 1106 */ 1107 comp_block_size = pblksize; /* safety */ 1108 if (++ip->comp_heuristic > 128) 1109 ip->comp_heuristic = 8; 1110 } else { 1111 /* 1112 * compression succeeded 1113 */ 1114 ip->comp_heuristic = 0; 1115 if (comp_size <= 1024) { 1116 comp_block_size = 1024; 1117 } else if (comp_size <= 2048) { 1118 comp_block_size = 2048; 1119 } else if (comp_size <= 4096) { 1120 comp_block_size = 4096; 1121 } else if (comp_size <= 8192) { 1122 comp_block_size = 8192; 1123 } else if (comp_size <= 16384) { 1124 comp_block_size = 16384; 1125 } else if (comp_size <= 32768) { 1126 comp_block_size = 32768; 1127 } else { 1128 panic("hammer2: WRITE PATH: " 1129 "Weird comp_size value."); 1130 /* NOT REACHED */ 1131 comp_block_size = pblksize; 1132 } 1133 } 1134 1135 cluster = hammer2_assign_physical(trans, ip, cparent, 1136 lbase, comp_block_size, 1137 errorp); 1138 ipdata = &hammer2_cluster_data(cparent)->ipdata; 1139 1140 if (*errorp) { 1141 kprintf("WRITE PATH: An error occurred while " 1142 "assigning physical space.\n"); 1143 KKASSERT(cluster == NULL); 1144 goto done; 1145 } 1146 1147 for (i = 0; i < cluster->nchains; ++i) { 1148 hammer2_io_t *dio; 1149 char *bdata; 1150 int temp_check; 1151 1152 chain = cluster->array[i]; 1153 KKASSERT(chain->flags & HAMMER2_CHAIN_MODIFIED); 1154 1155 switch(chain->bref.type) { 1156 case HAMMER2_BREF_TYPE_INODE: 1157 KKASSERT(chain->data->ipdata.op_flags & 1158 HAMMER2_OPFLAG_DIRECTDATA); 1159 KKASSERT(bp->b_loffset == 0); 1160 bcopy(bp->b_data, chain->data->ipdata.u.data, 1161 HAMMER2_EMBEDDED_BYTES); 1162 break; 1163 case HAMMER2_BREF_TYPE_DATA: 1164 temp_check = HAMMER2_DEC_CHECK(chain->bref.methods); 1165 1166 /* 1167 * Optimize out the read-before-write 1168 * if possible. 1169 */ 1170 *errorp = hammer2_io_newnz(chain->hmp, 1171 chain->bref.data_off, 1172 chain->bytes, 1173 &dio); 1174 if (*errorp) { 1175 hammer2_io_brelse(&dio); 1176 kprintf("hammer2: WRITE PATH: " 1177 "dbp bread error\n"); 1178 break; 1179 } 1180 bdata = hammer2_io_data(dio, chain->bref.data_off); 1181 1182 /* 1183 * When loading the block make sure we don't 1184 * leave garbage after the compressed data. 1185 */ 1186 if (comp_size) { 1187 chain->bref.methods = 1188 HAMMER2_ENC_COMP(comp_algo) + 1189 HAMMER2_ENC_CHECK(temp_check); 1190 bcopy(comp_buffer, bdata, comp_size); 1191 if (comp_size != comp_block_size) { 1192 bzero(bdata + comp_size, 1193 comp_block_size - comp_size); 1194 } 1195 } else { 1196 chain->bref.methods = 1197 HAMMER2_ENC_COMP( 1198 HAMMER2_COMP_NONE) + 1199 HAMMER2_ENC_CHECK(temp_check); 1200 bcopy(bp->b_data, bdata, pblksize); 1201 } 1202 1203 /* 1204 * Device buffer is now valid, chain is no 1205 * longer in the initial state. 1206 */ 1207 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_INITIAL); 1208 1209 /* Now write the related bdp. */ 1210 if (ioflag & IO_SYNC) { 1211 /* 1212 * Synchronous I/O requested. 1213 */ 1214 hammer2_io_bwrite(&dio); 1215 /* 1216 } else if ((ioflag & IO_DIRECT) && 1217 loff + n == pblksize) { 1218 hammer2_io_bdwrite(&dio); 1219 */ 1220 } else if (ioflag & IO_ASYNC) { 1221 hammer2_io_bawrite(&dio); 1222 } else { 1223 hammer2_io_bdwrite(&dio); 1224 } 1225 break; 1226 default: 1227 panic("hammer2_write_bp: bad chain type %d\n", 1228 chain->bref.type); 1229 /* NOT REACHED */ 1230 break; 1231 } 1232 1233 hammer2_chain_unlock(chain); 1234 } 1235 done: 1236 if (comp_buffer) 1237 objcache_put(cache_buffer_write, comp_buffer); 1238 } 1239 1240 /* 1241 * Function that performs zero-checking and writing without compression, 1242 * it corresponds to default zero-checking path. 1243 */ 1244 static 1245 void 1246 hammer2_zero_check_and_write(struct buf *bp, hammer2_trans_t *trans, 1247 hammer2_inode_t *ip, hammer2_inode_data_t *ipdata, 1248 hammer2_cluster_t *cparent, 1249 hammer2_key_t lbase, int ioflag, int pblksize, int *errorp) 1250 { 1251 hammer2_cluster_t *cluster; 1252 1253 if (test_block_zeros(bp->b_data, pblksize)) { 1254 zero_write(bp, trans, ip, ipdata, cparent, lbase, errorp); 1255 } else { 1256 cluster = hammer2_assign_physical(trans, ip, cparent, 1257 lbase, pblksize, errorp); 1258 hammer2_write_bp(cluster, bp, ioflag, pblksize, errorp); 1259 if (cluster) 1260 hammer2_cluster_unlock(cluster); 1261 } 1262 } 1263 1264 /* 1265 * A function to test whether a block of data contains only zeros, 1266 * returns TRUE (non-zero) if the block is all zeros. 1267 */ 1268 static 1269 int 1270 test_block_zeros(const char *buf, size_t bytes) 1271 { 1272 size_t i; 1273 1274 for (i = 0; i < bytes; i += sizeof(long)) { 1275 if (*(const long *)(buf + i) != 0) 1276 return (0); 1277 } 1278 return (1); 1279 } 1280 1281 /* 1282 * Function to "write" a block that contains only zeros. 1283 */ 1284 static 1285 void 1286 zero_write(struct buf *bp, hammer2_trans_t *trans, hammer2_inode_t *ip, 1287 hammer2_inode_data_t *ipdata, hammer2_cluster_t *cparent, 1288 hammer2_key_t lbase, int *errorp __unused) 1289 { 1290 hammer2_cluster_t *cluster; 1291 hammer2_media_data_t *data; 1292 hammer2_key_t key_dummy; 1293 int ddflag; 1294 1295 cparent = hammer2_cluster_lookup_init(cparent, 0); 1296 cluster = hammer2_cluster_lookup(cparent, &key_dummy, lbase, lbase, 1297 HAMMER2_LOOKUP_NODATA, &ddflag); 1298 if (cluster) { 1299 data = hammer2_cluster_data(cluster); 1300 1301 if (ddflag) { 1302 bzero(data->ipdata.u.data, HAMMER2_EMBEDDED_BYTES); 1303 } else { 1304 hammer2_cluster_delete(trans, cluster, 0); 1305 } 1306 hammer2_cluster_unlock(cluster); 1307 } 1308 hammer2_cluster_lookup_done(cparent); 1309 } 1310 1311 /* 1312 * Function to write the data as it is, without performing any sort of 1313 * compression. This function is used in path without compression and 1314 * default zero-checking path. 1315 */ 1316 static 1317 void 1318 hammer2_write_bp(hammer2_cluster_t *cluster, struct buf *bp, int ioflag, 1319 int pblksize, int *errorp) 1320 { 1321 hammer2_chain_t *chain; 1322 hammer2_io_t *dio; 1323 char *bdata; 1324 int error; 1325 int i; 1326 int temp_check; 1327 1328 error = 0; /* XXX TODO below */ 1329 1330 for (i = 0; i < cluster->nchains; ++i) { 1331 chain = cluster->array[i]; 1332 1333 temp_check = HAMMER2_DEC_CHECK(chain->bref.methods); 1334 1335 KKASSERT(chain->flags & HAMMER2_CHAIN_MODIFIED); 1336 1337 switch(chain->bref.type) { 1338 case HAMMER2_BREF_TYPE_INODE: 1339 KKASSERT(chain->data->ipdata.op_flags & 1340 HAMMER2_OPFLAG_DIRECTDATA); 1341 KKASSERT(bp->b_loffset == 0); 1342 bcopy(bp->b_data, chain->data->ipdata.u.data, 1343 HAMMER2_EMBEDDED_BYTES); 1344 error = 0; 1345 break; 1346 case HAMMER2_BREF_TYPE_DATA: 1347 error = hammer2_io_newnz(chain->hmp, 1348 chain->bref.data_off, 1349 chain->bytes, &dio); 1350 if (error) { 1351 hammer2_io_bqrelse(&dio); 1352 kprintf("hammer2: WRITE PATH: " 1353 "dbp bread error\n"); 1354 break; 1355 } 1356 bdata = hammer2_io_data(dio, chain->bref.data_off); 1357 1358 chain->bref.methods = HAMMER2_ENC_COMP( 1359 HAMMER2_COMP_NONE) + 1360 HAMMER2_ENC_CHECK(temp_check); 1361 bcopy(bp->b_data, bdata, chain->bytes); 1362 1363 /* 1364 * Device buffer is now valid, chain is no 1365 * longer in the initial state. 1366 */ 1367 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_INITIAL); 1368 1369 if (ioflag & IO_SYNC) { 1370 /* 1371 * Synchronous I/O requested. 1372 */ 1373 hammer2_io_bwrite(&dio); 1374 /* 1375 } else if ((ioflag & IO_DIRECT) && 1376 loff + n == pblksize) { 1377 hammer2_io_bdwrite(&dio); 1378 */ 1379 } else if (ioflag & IO_ASYNC) { 1380 hammer2_io_bawrite(&dio); 1381 } else { 1382 hammer2_io_bdwrite(&dio); 1383 } 1384 break; 1385 default: 1386 panic("hammer2_write_bp: bad chain type %d\n", 1387 chain->bref.type); 1388 /* NOT REACHED */ 1389 error = 0; 1390 break; 1391 } 1392 KKASSERT(error == 0); /* XXX TODO */ 1393 } 1394 *errorp = error; 1395 } 1396 1397 static 1398 int 1399 hammer2_remount(hammer2_mount_t *hmp, struct mount *mp, char *path, 1400 struct vnode *devvp, struct ucred *cred) 1401 { 1402 int error; 1403 1404 if (hmp->ronly && (mp->mnt_kern_flag & MNTK_WANTRDWR)) { 1405 error = hammer2_recovery(hmp); 1406 } else { 1407 error = 0; 1408 } 1409 return error; 1410 } 1411 1412 static 1413 int 1414 hammer2_vfs_unmount(struct mount *mp, int mntflags) 1415 { 1416 hammer2_pfsmount_t *pmp; 1417 hammer2_mount_t *hmp; 1418 hammer2_chain_t *rchain; 1419 int flags; 1420 int error = 0; 1421 int i; 1422 1423 pmp = MPTOPMP(mp); 1424 1425 if (pmp == NULL) 1426 return(0); 1427 1428 lockmgr(&hammer2_mntlk, LK_EXCLUSIVE); 1429 1430 /* 1431 * If mount initialization proceeded far enough we must flush 1432 * its vnodes. 1433 */ 1434 if (mntflags & MNT_FORCE) 1435 flags = FORCECLOSE; 1436 else 1437 flags = 0; 1438 if (pmp->iroot) { 1439 error = vflush(mp, 0, flags); 1440 if (error) 1441 goto failed; 1442 } 1443 1444 ccms_domain_uninit(&pmp->ccms_dom); 1445 kdmsg_iocom_uninit(&pmp->iocom); /* XXX chain dependency */ 1446 1447 if (pmp->wthread_td) { 1448 mtx_lock(&pmp->wthread_mtx); 1449 pmp->wthread_destroy = 1; 1450 wakeup(&pmp->wthread_bioq); 1451 while (pmp->wthread_destroy != -1) { 1452 mtxsleep(&pmp->wthread_destroy, 1453 &pmp->wthread_mtx, 0, 1454 "umount-sleep", 0); 1455 } 1456 mtx_unlock(&pmp->wthread_mtx); 1457 pmp->wthread_td = NULL; 1458 } 1459 1460 /* 1461 * Cleanup our reference on ihidden. 1462 */ 1463 if (pmp->ihidden) { 1464 hammer2_inode_drop(pmp->ihidden); 1465 pmp->ihidden = NULL; 1466 } 1467 1468 /* 1469 * Cleanup our reference on iroot. iroot is (should) not be needed 1470 * by the flush code. 1471 */ 1472 if (pmp->iroot) { 1473 #if REPORT_REFS_ERRORS 1474 if (pmp->iroot->refs != 1) 1475 kprintf("PMP->IROOT %p REFS WRONG %d\n", 1476 pmp->iroot, pmp->iroot->refs); 1477 #else 1478 KKASSERT(pmp->iroot->refs == 1); 1479 #endif 1480 /* ref for pmp->iroot */ 1481 hammer2_inode_drop(pmp->iroot); 1482 pmp->iroot = NULL; 1483 } 1484 1485 for (i = 0; i < pmp->cluster.nchains; ++i) { 1486 hmp = pmp->cluster.array[i]->hmp; 1487 1488 hammer2_vfs_unmount_hmp1(mp, hmp); 1489 1490 rchain = pmp->cluster.array[i]; 1491 if (rchain) { 1492 atomic_clear_int(&rchain->flags, HAMMER2_CHAIN_MOUNTED); 1493 #if REPORT_REFS_ERRORS 1494 if (rchain->refs != 1) 1495 kprintf("PMP->RCHAIN %p REFS WRONG %d\n", 1496 rchain, rchain->refs); 1497 #else 1498 KKASSERT(rchain->refs == 1); 1499 #endif 1500 hammer2_chain_drop(rchain); 1501 pmp->cluster.array[i] = NULL; 1502 } 1503 1504 hammer2_vfs_unmount_hmp2(mp, hmp); 1505 } 1506 1507 pmp->mp = NULL; 1508 mp->mnt_data = NULL; 1509 1510 kmalloc_destroy(&pmp->mmsg); 1511 kmalloc_destroy(&pmp->minode); 1512 1513 kfree(pmp, M_HAMMER2); 1514 error = 0; 1515 1516 failed: 1517 lockmgr(&hammer2_mntlk, LK_RELEASE); 1518 1519 return (error); 1520 } 1521 1522 static 1523 void 1524 hammer2_vfs_unmount_hmp1(struct mount *mp, hammer2_mount_t *hmp) 1525 { 1526 hammer2_mount_exlock(hmp); 1527 --hmp->pmp_count; 1528 1529 kprintf("hammer2_unmount hmp=%p pmpcnt=%d\n", hmp, hmp->pmp_count); 1530 1531 /* 1532 * Flush any left over chains. The voldata lock is only used 1533 * to synchronize against HAMMER2_CHAIN_MODIFIED_AUX. 1534 * 1535 * Flush twice to ensure that the freemap is completely 1536 * synchronized. If we only do it once the next mount's 1537 * recovery scan will have to do some fixups (which isn't 1538 * bad, but we don't want it to have to do it except when 1539 * recovering from a crash). 1540 */ 1541 hammer2_voldata_lock(hmp); 1542 if (((hmp->vchain.flags | hmp->fchain.flags) & 1543 HAMMER2_CHAIN_MODIFIED) || 1544 hmp->vchain.update_hi > hmp->voldata.mirror_tid || 1545 hmp->fchain.update_hi > hmp->voldata.freemap_tid) { 1546 hammer2_voldata_unlock(hmp, 0); 1547 hammer2_vfs_sync(mp, MNT_WAIT); 1548 /*hammer2_vfs_sync(mp, MNT_WAIT);*/ 1549 } else { 1550 hammer2_voldata_unlock(hmp, 0); 1551 } 1552 if (hmp->pmp_count == 0) { 1553 if (((hmp->vchain.flags | hmp->fchain.flags) & 1554 HAMMER2_CHAIN_MODIFIED) || 1555 (hmp->vchain.update_hi > 1556 hmp->voldata.mirror_tid) || 1557 (hmp->fchain.update_hi > 1558 hmp->voldata.freemap_tid)) { 1559 kprintf("hammer2_unmount: chains left over " 1560 "after final sync\n"); 1561 kprintf(" vchain %08x update_hi %jx/%jx\n", 1562 hmp->vchain.flags, 1563 hmp->voldata.mirror_tid, 1564 hmp->vchain.update_hi); 1565 kprintf(" fchain %08x update_hi %jx/%jx\n", 1566 hmp->fchain.flags, 1567 hmp->voldata.freemap_tid, 1568 hmp->fchain.update_hi); 1569 1570 if (hammer2_debug & 0x0010) 1571 Debugger("entered debugger"); 1572 } 1573 } 1574 } 1575 1576 static 1577 void 1578 hammer2_vfs_unmount_hmp2(struct mount *mp, hammer2_mount_t *hmp) 1579 { 1580 struct vnode *devvp; 1581 int dumpcnt; 1582 int ronly = ((mp->mnt_flag & MNT_RDONLY) != 0); 1583 1584 /* 1585 * If no PFS's left drop the master hammer2_mount for the 1586 * device. 1587 */ 1588 if (hmp->pmp_count == 0) { 1589 if (hmp->sroot) { 1590 hammer2_inode_drop(hmp->sroot); 1591 hmp->sroot = NULL; 1592 } 1593 1594 /* 1595 * Finish up with the device vnode 1596 */ 1597 if ((devvp = hmp->devvp) != NULL) { 1598 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 1599 vinvalbuf(devvp, (ronly ? 0 : V_SAVE), 0, 0); 1600 hmp->devvp = NULL; 1601 VOP_CLOSE(devvp, (ronly ? FREAD : FREAD|FWRITE), NULL); 1602 vn_unlock(devvp); 1603 vrele(devvp); 1604 devvp = NULL; 1605 } 1606 1607 /* 1608 * Final drop of embedded freemap root chain to 1609 * clean up fchain.core (fchain structure is not 1610 * flagged ALLOCATED so it is cleaned out and then 1611 * left to rot). 1612 */ 1613 hammer2_chain_drop(&hmp->fchain); 1614 1615 /* 1616 * Final drop of embedded volume root chain to clean 1617 * up vchain.core (vchain structure is not flagged 1618 * ALLOCATED so it is cleaned out and then left to 1619 * rot). 1620 */ 1621 dumpcnt = 50; 1622 hammer2_dump_chain(&hmp->vchain, 0, &dumpcnt, 'v'); 1623 dumpcnt = 50; 1624 hammer2_dump_chain(&hmp->fchain, 0, &dumpcnt, 'f'); 1625 hammer2_mount_unlock(hmp); 1626 hammer2_chain_drop(&hmp->vchain); 1627 1628 hammer2_io_cleanup(hmp, &hmp->iotree); 1629 if (hmp->iofree_count) { 1630 kprintf("io_cleanup: %d I/O's left hanging\n", 1631 hmp->iofree_count); 1632 } 1633 1634 TAILQ_REMOVE(&hammer2_mntlist, hmp, mntentry); 1635 kmalloc_destroy(&hmp->mchain); 1636 kfree(hmp, M_HAMMER2); 1637 } else { 1638 hammer2_mount_unlock(hmp); 1639 } 1640 } 1641 1642 static 1643 int 1644 hammer2_vfs_vget(struct mount *mp, struct vnode *dvp, 1645 ino_t ino, struct vnode **vpp) 1646 { 1647 kprintf("hammer2_vget\n"); 1648 return (EOPNOTSUPP); 1649 } 1650 1651 static 1652 int 1653 hammer2_vfs_root(struct mount *mp, struct vnode **vpp) 1654 { 1655 hammer2_pfsmount_t *pmp; 1656 hammer2_cluster_t *cparent; 1657 int error; 1658 struct vnode *vp; 1659 1660 pmp = MPTOPMP(mp); 1661 if (pmp->iroot == NULL) { 1662 *vpp = NULL; 1663 error = EINVAL; 1664 } else { 1665 cparent = hammer2_inode_lock_sh(pmp->iroot); 1666 vp = hammer2_igetv(pmp->iroot, cparent, &error); 1667 hammer2_inode_unlock_sh(pmp->iroot, cparent); 1668 *vpp = vp; 1669 if (vp == NULL) 1670 kprintf("vnodefail\n"); 1671 } 1672 1673 return (error); 1674 } 1675 1676 /* 1677 * Filesystem status 1678 * 1679 * XXX incorporate ipdata->inode_quota and data_quota 1680 */ 1681 static 1682 int 1683 hammer2_vfs_statfs(struct mount *mp, struct statfs *sbp, struct ucred *cred) 1684 { 1685 hammer2_pfsmount_t *pmp; 1686 hammer2_mount_t *hmp; 1687 1688 pmp = MPTOPMP(mp); 1689 KKASSERT(pmp->cluster.nchains >= 1); 1690 hmp = pmp->cluster.focus->hmp; /* XXX */ 1691 1692 mp->mnt_stat.f_files = pmp->inode_count; 1693 mp->mnt_stat.f_ffree = 0; 1694 mp->mnt_stat.f_blocks = hmp->voldata.allocator_size / HAMMER2_PBUFSIZE; 1695 mp->mnt_stat.f_bfree = hmp->voldata.allocator_free / HAMMER2_PBUFSIZE; 1696 mp->mnt_stat.f_bavail = mp->mnt_stat.f_bfree; 1697 1698 *sbp = mp->mnt_stat; 1699 return (0); 1700 } 1701 1702 static 1703 int 1704 hammer2_vfs_statvfs(struct mount *mp, struct statvfs *sbp, struct ucred *cred) 1705 { 1706 hammer2_pfsmount_t *pmp; 1707 hammer2_mount_t *hmp; 1708 1709 pmp = MPTOPMP(mp); 1710 KKASSERT(pmp->cluster.nchains >= 1); 1711 hmp = pmp->cluster.focus->hmp; /* XXX */ 1712 1713 mp->mnt_vstat.f_bsize = HAMMER2_PBUFSIZE; 1714 mp->mnt_vstat.f_files = pmp->inode_count; 1715 mp->mnt_vstat.f_ffree = 0; 1716 mp->mnt_vstat.f_blocks = hmp->voldata.allocator_size / HAMMER2_PBUFSIZE; 1717 mp->mnt_vstat.f_bfree = hmp->voldata.allocator_free / HAMMER2_PBUFSIZE; 1718 mp->mnt_vstat.f_bavail = mp->mnt_vstat.f_bfree; 1719 1720 *sbp = mp->mnt_vstat; 1721 return (0); 1722 } 1723 1724 /* 1725 * Mount-time recovery (RW mounts) 1726 * 1727 * Updates to the free block table are allowed to lag flushes by one 1728 * transaction. In case of a crash, then on a fresh mount we must do an 1729 * incremental scan of transaction id voldata.mirror_tid and make sure the 1730 * related blocks have been marked allocated. 1731 * 1732 */ 1733 struct hammer2_recovery_elm { 1734 TAILQ_ENTRY(hammer2_recovery_elm) entry; 1735 hammer2_chain_t *chain; 1736 }; 1737 1738 TAILQ_HEAD(hammer2_recovery_list, hammer2_recovery_elm); 1739 1740 static int hammer2_recovery_scan(hammer2_trans_t *trans, hammer2_mount_t *hmp, 1741 hammer2_chain_t *parent, 1742 struct hammer2_recovery_list *list, int depth); 1743 1744 #define HAMMER2_RECOVERY_MAXDEPTH 10 1745 1746 static 1747 int 1748 hammer2_recovery(hammer2_mount_t *hmp) 1749 { 1750 hammer2_trans_t trans; 1751 struct hammer2_recovery_list list; 1752 struct hammer2_recovery_elm *elm; 1753 hammer2_chain_t *parent; 1754 int error; 1755 int cumulative_error = 0; 1756 1757 hammer2_trans_init(&trans, NULL, hmp, 0); 1758 1759 TAILQ_INIT(&list); 1760 parent = hammer2_chain_lookup_init(&hmp->vchain, 0); 1761 cumulative_error = hammer2_recovery_scan(&trans, hmp, parent, &list, 0); 1762 hammer2_chain_lookup_done(parent); 1763 1764 while ((elm = TAILQ_FIRST(&list)) != NULL) { 1765 TAILQ_REMOVE(&list, elm, entry); 1766 parent = elm->chain; 1767 kfree(elm, M_HAMMER2); 1768 1769 hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS | 1770 HAMMER2_RESOLVE_NOREF); 1771 error = hammer2_recovery_scan(&trans, hmp, parent, &list, 0); 1772 hammer2_chain_unlock(parent); 1773 if (error) 1774 cumulative_error = error; 1775 } 1776 hammer2_trans_done(&trans); 1777 1778 return cumulative_error; 1779 } 1780 1781 static 1782 int 1783 hammer2_recovery_scan(hammer2_trans_t *trans, hammer2_mount_t *hmp, 1784 hammer2_chain_t *parent, 1785 struct hammer2_recovery_list *list, int depth) 1786 { 1787 hammer2_chain_t *chain; 1788 int cache_index; 1789 int cumulative_error = 0; 1790 int error; 1791 1792 /* 1793 * Defer operation if depth limit reached. 1794 */ 1795 if (depth >= HAMMER2_RECOVERY_MAXDEPTH) { 1796 struct hammer2_recovery_elm *elm; 1797 1798 elm = kmalloc(sizeof(*elm), M_HAMMER2, M_ZERO | M_WAITOK); 1799 elm->chain = parent; 1800 hammer2_chain_ref(parent); 1801 TAILQ_INSERT_TAIL(list, elm, entry); 1802 /* unlocked by caller */ 1803 1804 return(0); 1805 } 1806 1807 /* 1808 * Adjust freemap to ensure that the block(s) are marked allocated. 1809 */ 1810 if (parent->bref.type != HAMMER2_BREF_TYPE_VOLUME) { 1811 hammer2_freemap_adjust(trans, hmp, &parent->bref, 1812 HAMMER2_FREEMAP_DORECOVER); 1813 } 1814 1815 /* 1816 * Check type for recursive scan 1817 */ 1818 switch(parent->bref.type) { 1819 case HAMMER2_BREF_TYPE_VOLUME: 1820 /* data already instantiated */ 1821 break; 1822 case HAMMER2_BREF_TYPE_INODE: 1823 /* 1824 * Must instantiate data for DIRECTDATA test and also 1825 * for recursion. 1826 */ 1827 hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS); 1828 hammer2_chain_unlock(parent); 1829 if (parent->data->ipdata.op_flags & HAMMER2_OPFLAG_DIRECTDATA) { 1830 /* not applicable to recovery scan */ 1831 return 0; 1832 } 1833 break; 1834 case HAMMER2_BREF_TYPE_INDIRECT: 1835 /* 1836 * Must instantiate data for recursion 1837 */ 1838 hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS); 1839 hammer2_chain_unlock(parent); 1840 break; 1841 case HAMMER2_BREF_TYPE_DATA: 1842 case HAMMER2_BREF_TYPE_FREEMAP: 1843 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1844 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 1845 /* not applicable to recovery scan */ 1846 return 0; 1847 break; 1848 default: 1849 return EDOM; 1850 } 1851 1852 /* 1853 * Recursive scan of the last flushed transaction only. We are 1854 * doing this without pmp assignments so don't leave the chains 1855 * hanging around after we are done with them. 1856 */ 1857 cache_index = 0; 1858 chain = hammer2_chain_scan(parent, NULL, &cache_index, 1859 HAMMER2_LOOKUP_NODATA); 1860 while (chain) { 1861 atomic_set_int(&chain->flags, HAMMER2_CHAIN_RELEASE); 1862 if (chain->bref.mirror_tid >= hmp->voldata.alloc_tid - 1) { 1863 error = hammer2_recovery_scan(trans, hmp, chain, 1864 list, depth + 1); 1865 if (error) 1866 cumulative_error = error; 1867 } 1868 chain = hammer2_chain_scan(parent, chain, &cache_index, 1869 HAMMER2_LOOKUP_NODATA); 1870 } 1871 1872 return cumulative_error; 1873 } 1874 1875 /* 1876 * Sync the entire filesystem; this is called from the filesystem syncer 1877 * process periodically and whenever a user calls sync(1) on the hammer 1878 * mountpoint. 1879 * 1880 * Currently is actually called from the syncer! \o/ 1881 * 1882 * This task will have to snapshot the state of the dirty inode chain. 1883 * From that, it will have to make sure all of the inodes on the dirty 1884 * chain have IO initiated. We make sure that io is initiated for the root 1885 * block. 1886 * 1887 * If waitfor is set, we wait for media to acknowledge the new rootblock. 1888 * 1889 * THINKS: side A vs side B, to have sync not stall all I/O? 1890 */ 1891 int 1892 hammer2_vfs_sync(struct mount *mp, int waitfor) 1893 { 1894 struct hammer2_sync_info info; 1895 hammer2_chain_t *chain; 1896 hammer2_pfsmount_t *pmp; 1897 hammer2_mount_t *hmp; 1898 int flags; 1899 int error; 1900 int total_error; 1901 int force_fchain; 1902 int i; 1903 1904 pmp = MPTOPMP(mp); 1905 1906 /* 1907 * We can't acquire locks on existing vnodes while in a transaction 1908 * without risking a deadlock. This assumes that vfsync() can be 1909 * called without the vnode locked (which it can in DragonFly). 1910 * Otherwise we'd have to implement a multi-pass or flag the lock 1911 * failures and retry. 1912 * 1913 * The reclamation code interlocks with the sync list's token 1914 * (by removing the vnode from the scan list) before unlocking 1915 * the inode, giving us time to ref the inode. 1916 */ 1917 /*flags = VMSC_GETVP;*/ 1918 flags = 0; 1919 if (waitfor & MNT_LAZY) 1920 flags |= VMSC_ONEPASS; 1921 1922 /* 1923 * Start our flush transaction. This does not return until all 1924 * concurrent transactions have completed and will prevent any 1925 * new transactions from running concurrently, except for the 1926 * buffer cache transactions. 1927 * 1928 * For efficiency do an async pass before making sure with a 1929 * synchronous pass on all related buffer cache buffers. It 1930 * should theoretically not be possible for any new file buffers 1931 * to be instantiated during this sequence. 1932 */ 1933 hammer2_trans_init(&info.trans, pmp, NULL, HAMMER2_TRANS_ISFLUSH | 1934 HAMMER2_TRANS_PREFLUSH); 1935 hammer2_run_unlinkq(&info.trans, pmp); 1936 info.error = 0; 1937 info.waitfor = MNT_NOWAIT; 1938 vsyncscan(mp, flags | VMSC_NOWAIT, hammer2_sync_scan2, &info); 1939 info.waitfor = MNT_WAIT; 1940 vsyncscan(mp, flags, hammer2_sync_scan2, &info); 1941 1942 /* 1943 * Clear PREFLUSH. This prevents (or asserts on) any new logical 1944 * buffer cache flushes which occur during the flush. Device buffers 1945 * are not affected. 1946 */ 1947 1948 #if 0 1949 if (info.error == 0 && (waitfor & MNT_WAIT)) { 1950 info.waitfor = waitfor; 1951 vsyncscan(mp, flags, hammer2_sync_scan2, &info); 1952 1953 } 1954 #endif 1955 hammer2_bioq_sync(info.trans.pmp); 1956 atomic_clear_int(&info.trans.flags, HAMMER2_TRANS_PREFLUSH); 1957 1958 #if 0 1959 /* 1960 * Start the flush transaction and flush all meta-data. 1961 */ 1962 hammer2_trans_init(&info.trans, pmp, NULL, HAMMER2_TRANS_ISFLUSH); 1963 #endif 1964 1965 total_error = 0; 1966 for (i = 0; i < pmp->cluster.nchains; ++i) { 1967 hmp = pmp->cluster.array[i]->hmp; 1968 1969 /* 1970 * Media mounts have two 'roots', vchain for the topology 1971 * and fchain for the free block table. Flush both. 1972 * 1973 * Note that the topology and free block table are handled 1974 * independently, so the free block table can wind up being 1975 * ahead of the topology. We depend on the bulk free scan 1976 * code to deal with any loose ends. 1977 */ 1978 #if 1 1979 hammer2_chain_lock(&hmp->fchain, HAMMER2_RESOLVE_ALWAYS); 1980 kprintf("sync tid test fmap %016jx %016jx\n", 1981 hmp->fchain.update_hi, hmp->voldata.freemap_tid); 1982 if ((hmp->fchain.flags & HAMMER2_CHAIN_MODIFIED) || 1983 hmp->fchain.update_hi > hmp->voldata.freemap_tid) { 1984 /* this will also modify vchain as a side effect */ 1985 chain = &hmp->fchain; 1986 hammer2_flush(&info.trans, &chain); 1987 KKASSERT(chain == &hmp->fchain); 1988 } 1989 hammer2_chain_unlock(&hmp->fchain); 1990 #endif 1991 1992 hammer2_chain_lock(&hmp->vchain, HAMMER2_RESOLVE_ALWAYS); 1993 kprintf("sync tid test vmap %016jx %016jx\n", 1994 hmp->vchain.update_hi, hmp->voldata.mirror_tid); 1995 if ((hmp->vchain.flags & HAMMER2_CHAIN_MODIFIED) || 1996 hmp->vchain.update_hi > hmp->voldata.mirror_tid) { 1997 chain = &hmp->vchain; 1998 hammer2_flush(&info.trans, &chain); 1999 KKASSERT(chain == &hmp->vchain); 2000 force_fchain = 1; 2001 } else { 2002 force_fchain = 0; 2003 } 2004 hammer2_chain_unlock(&hmp->vchain); 2005 2006 #if 0 2007 hammer2_chain_lock(&hmp->fchain, HAMMER2_RESOLVE_ALWAYS); 2008 if ((hmp->fchain.flags & HAMMER2_CHAIN_MODIFIED) || 2009 hmp->fchain.update_hi > hmp->voldata.freemap_tid || 2010 force_fchain) { 2011 /* this will also modify vchain as a side effect */ 2012 chain = &hmp->fchain; 2013 hammer2_flush(&info.trans, &chain); 2014 KKASSERT(chain == &hmp->fchain); 2015 } 2016 hammer2_chain_unlock(&hmp->fchain); 2017 #endif 2018 2019 error = 0; 2020 2021 /* 2022 * We can't safely flush the volume header until we have 2023 * flushed any device buffers which have built up. 2024 * 2025 * XXX this isn't being incremental 2026 */ 2027 vn_lock(hmp->devvp, LK_EXCLUSIVE | LK_RETRY); 2028 error = VOP_FSYNC(hmp->devvp, MNT_WAIT, 0); 2029 vn_unlock(hmp->devvp); 2030 2031 /* 2032 * The flush code sets CHAIN_VOLUMESYNC to indicate that the 2033 * volume header needs synchronization via hmp->volsync. 2034 * 2035 * XXX synchronize the flag & data with only this flush XXX 2036 */ 2037 if (error == 0 && 2038 (hmp->vchain.flags & HAMMER2_CHAIN_VOLUMESYNC)) { 2039 struct buf *bp; 2040 2041 /* 2042 * Synchronize the disk before flushing the volume 2043 * header. 2044 */ 2045 bp = getpbuf(NULL); 2046 bp->b_bio1.bio_offset = 0; 2047 bp->b_bufsize = 0; 2048 bp->b_bcount = 0; 2049 bp->b_cmd = BUF_CMD_FLUSH; 2050 bp->b_bio1.bio_done = biodone_sync; 2051 bp->b_bio1.bio_flags |= BIO_SYNC; 2052 vn_strategy(hmp->devvp, &bp->b_bio1); 2053 biowait(&bp->b_bio1, "h2vol"); 2054 relpbuf(bp, NULL); 2055 2056 /* 2057 * Then we can safely flush the version of the 2058 * volume header synchronized by the flush code. 2059 */ 2060 i = hmp->volhdrno + 1; 2061 if (i >= HAMMER2_NUM_VOLHDRS) 2062 i = 0; 2063 if (i * HAMMER2_ZONE_BYTES64 + HAMMER2_SEGSIZE > 2064 hmp->volsync.volu_size) { 2065 i = 0; 2066 } 2067 kprintf("sync volhdr %d %jd\n", 2068 i, (intmax_t)hmp->volsync.volu_size); 2069 bp = getblk(hmp->devvp, i * HAMMER2_ZONE_BYTES64, 2070 HAMMER2_PBUFSIZE, 0, 0); 2071 atomic_clear_int(&hmp->vchain.flags, 2072 HAMMER2_CHAIN_VOLUMESYNC); 2073 bcopy(&hmp->volsync, bp->b_data, HAMMER2_PBUFSIZE); 2074 bawrite(bp); 2075 hmp->volhdrno = i; 2076 } 2077 if (error) 2078 total_error = error; 2079 } 2080 hammer2_trans_done(&info.trans); 2081 2082 return (total_error); 2083 } 2084 2085 /* 2086 * Sync passes. 2087 */ 2088 static int 2089 hammer2_sync_scan2(struct mount *mp, struct vnode *vp, void *data) 2090 { 2091 struct hammer2_sync_info *info = data; 2092 hammer2_inode_t *ip; 2093 int error; 2094 2095 /* 2096 * 2097 */ 2098 ip = VTOI(vp); 2099 if (ip == NULL) 2100 return(0); 2101 if (vp->v_type == VNON || vp->v_type == VBAD) { 2102 vclrisdirty(vp); 2103 return(0); 2104 } 2105 if ((ip->flags & HAMMER2_INODE_MODIFIED) == 0 && 2106 RB_EMPTY(&vp->v_rbdirty_tree)) { 2107 vclrisdirty(vp); 2108 return(0); 2109 } 2110 2111 /* 2112 * VOP_FSYNC will start a new transaction so replicate some code 2113 * here to do it inline (see hammer2_vop_fsync()). 2114 * 2115 * WARNING: The vfsync interacts with the buffer cache and might 2116 * block, we can't hold the inode lock at that time. 2117 * However, we MUST ref ip before blocking to ensure that 2118 * it isn't ripped out from under us (since we do not 2119 * hold a lock on the vnode). 2120 */ 2121 hammer2_inode_ref(ip); 2122 atomic_clear_int(&ip->flags, HAMMER2_INODE_MODIFIED); 2123 if (vp) 2124 vfsync(vp, MNT_NOWAIT, 1, NULL, NULL); 2125 2126 hammer2_inode_drop(ip); 2127 #if 1 2128 error = 0; 2129 if (error) 2130 info->error = error; 2131 #endif 2132 return(0); 2133 } 2134 2135 static 2136 int 2137 hammer2_vfs_vptofh(struct vnode *vp, struct fid *fhp) 2138 { 2139 return (0); 2140 } 2141 2142 static 2143 int 2144 hammer2_vfs_fhtovp(struct mount *mp, struct vnode *rootvp, 2145 struct fid *fhp, struct vnode **vpp) 2146 { 2147 return (0); 2148 } 2149 2150 static 2151 int 2152 hammer2_vfs_checkexp(struct mount *mp, struct sockaddr *nam, 2153 int *exflagsp, struct ucred **credanonp) 2154 { 2155 return (0); 2156 } 2157 2158 /* 2159 * Support code for hammer2_mount(). Read, verify, and install the volume 2160 * header into the HMP 2161 * 2162 * XXX read four volhdrs and use the one with the highest TID whos CRC 2163 * matches. 2164 * 2165 * XXX check iCRCs. 2166 * 2167 * XXX For filesystems w/ less than 4 volhdrs, make sure to not write to 2168 * nonexistant locations. 2169 * 2170 * XXX Record selected volhdr and ring updates to each of 4 volhdrs 2171 */ 2172 static 2173 int 2174 hammer2_install_volume_header(hammer2_mount_t *hmp) 2175 { 2176 hammer2_volume_data_t *vd; 2177 struct buf *bp; 2178 hammer2_crc32_t crc0, crc, bcrc0, bcrc; 2179 int error_reported; 2180 int error; 2181 int valid; 2182 int i; 2183 2184 error_reported = 0; 2185 error = 0; 2186 valid = 0; 2187 bp = NULL; 2188 2189 /* 2190 * There are up to 4 copies of the volume header (syncs iterate 2191 * between them so there is no single master). We don't trust the 2192 * volu_size field so we don't know precisely how large the filesystem 2193 * is, so depend on the OS to return an error if we go beyond the 2194 * block device's EOF. 2195 */ 2196 for (i = 0; i < HAMMER2_NUM_VOLHDRS; i++) { 2197 error = bread(hmp->devvp, i * HAMMER2_ZONE_BYTES64, 2198 HAMMER2_VOLUME_BYTES, &bp); 2199 if (error) { 2200 brelse(bp); 2201 bp = NULL; 2202 continue; 2203 } 2204 2205 vd = (struct hammer2_volume_data *) bp->b_data; 2206 if ((vd->magic != HAMMER2_VOLUME_ID_HBO) && 2207 (vd->magic != HAMMER2_VOLUME_ID_ABO)) { 2208 brelse(bp); 2209 bp = NULL; 2210 continue; 2211 } 2212 2213 if (vd->magic == HAMMER2_VOLUME_ID_ABO) { 2214 /* XXX: Reversed-endianness filesystem */ 2215 kprintf("hammer2: reverse-endian filesystem detected"); 2216 brelse(bp); 2217 bp = NULL; 2218 continue; 2219 } 2220 2221 crc = vd->icrc_sects[HAMMER2_VOL_ICRC_SECT0]; 2222 crc0 = hammer2_icrc32(bp->b_data + HAMMER2_VOLUME_ICRC0_OFF, 2223 HAMMER2_VOLUME_ICRC0_SIZE); 2224 bcrc = vd->icrc_sects[HAMMER2_VOL_ICRC_SECT1]; 2225 bcrc0 = hammer2_icrc32(bp->b_data + HAMMER2_VOLUME_ICRC1_OFF, 2226 HAMMER2_VOLUME_ICRC1_SIZE); 2227 if ((crc0 != crc) || (bcrc0 != bcrc)) { 2228 kprintf("hammer2 volume header crc " 2229 "mismatch copy #%d %08x/%08x\n", 2230 i, crc0, crc); 2231 error_reported = 1; 2232 brelse(bp); 2233 bp = NULL; 2234 continue; 2235 } 2236 if (valid == 0 || hmp->voldata.mirror_tid < vd->mirror_tid) { 2237 valid = 1; 2238 hmp->voldata = *vd; 2239 hmp->volhdrno = i; 2240 } 2241 brelse(bp); 2242 bp = NULL; 2243 } 2244 if (valid) { 2245 hmp->volsync = hmp->voldata; 2246 error = 0; 2247 if (error_reported || bootverbose || 1) { /* 1/DEBUG */ 2248 kprintf("hammer2: using volume header #%d\n", 2249 hmp->volhdrno); 2250 } 2251 } else { 2252 error = EINVAL; 2253 kprintf("hammer2: no valid volume headers found!\n"); 2254 } 2255 return (error); 2256 } 2257 2258 /* 2259 * Reconnect using the passed file pointer. The caller must ref the 2260 * fp for us. 2261 */ 2262 void 2263 hammer2_cluster_reconnect(hammer2_pfsmount_t *pmp, struct file *fp) 2264 { 2265 hammer2_inode_data_t *ipdata; 2266 hammer2_cluster_t *cparent; 2267 hammer2_mount_t *hmp; 2268 size_t name_len; 2269 2270 hmp = pmp->cluster.focus->hmp; /* XXX */ 2271 2272 /* 2273 * Closes old comm descriptor, kills threads, cleans up 2274 * states, then installs the new descriptor and creates 2275 * new threads. 2276 */ 2277 kdmsg_iocom_reconnect(&pmp->iocom, fp, "hammer2"); 2278 2279 /* 2280 * Setup LNK_CONN fields for autoinitiated state machine 2281 */ 2282 cparent = hammer2_inode_lock_ex(pmp->iroot); 2283 ipdata = &hammer2_cluster_data(cparent)->ipdata; 2284 pmp->iocom.auto_lnk_conn.pfs_clid = ipdata->pfs_clid; 2285 pmp->iocom.auto_lnk_conn.pfs_fsid = ipdata->pfs_fsid; 2286 pmp->iocom.auto_lnk_conn.pfs_type = ipdata->pfs_type; 2287 pmp->iocom.auto_lnk_conn.proto_version = DMSG_SPAN_PROTO_1; 2288 pmp->iocom.auto_lnk_conn.peer_type = hmp->voldata.peer_type; 2289 2290 /* 2291 * Filter adjustment. Clients do not need visibility into other 2292 * clients (otherwise millions of clients would present a serious 2293 * problem). The fs_label also serves to restrict the namespace. 2294 */ 2295 pmp->iocom.auto_lnk_conn.peer_mask = 1LLU << HAMMER2_PEER_HAMMER2; 2296 pmp->iocom.auto_lnk_conn.pfs_mask = (uint64_t)-1; 2297 switch (ipdata->pfs_type) { 2298 case DMSG_PFSTYPE_CLIENT: 2299 pmp->iocom.auto_lnk_conn.peer_mask &= 2300 ~(1LLU << DMSG_PFSTYPE_CLIENT); 2301 break; 2302 default: 2303 break; 2304 } 2305 2306 name_len = ipdata->name_len; 2307 if (name_len >= sizeof(pmp->iocom.auto_lnk_conn.fs_label)) 2308 name_len = sizeof(pmp->iocom.auto_lnk_conn.fs_label) - 1; 2309 bcopy(ipdata->filename, 2310 pmp->iocom.auto_lnk_conn.fs_label, 2311 name_len); 2312 pmp->iocom.auto_lnk_conn.fs_label[name_len] = 0; 2313 2314 /* 2315 * Setup LNK_SPAN fields for autoinitiated state machine 2316 */ 2317 pmp->iocom.auto_lnk_span.pfs_clid = ipdata->pfs_clid; 2318 pmp->iocom.auto_lnk_span.pfs_fsid = ipdata->pfs_fsid; 2319 pmp->iocom.auto_lnk_span.pfs_type = ipdata->pfs_type; 2320 pmp->iocom.auto_lnk_span.peer_type = hmp->voldata.peer_type; 2321 pmp->iocom.auto_lnk_span.proto_version = DMSG_SPAN_PROTO_1; 2322 name_len = ipdata->name_len; 2323 if (name_len >= sizeof(pmp->iocom.auto_lnk_span.fs_label)) 2324 name_len = sizeof(pmp->iocom.auto_lnk_span.fs_label) - 1; 2325 bcopy(ipdata->filename, 2326 pmp->iocom.auto_lnk_span.fs_label, 2327 name_len); 2328 pmp->iocom.auto_lnk_span.fs_label[name_len] = 0; 2329 hammer2_inode_unlock_ex(pmp->iroot, cparent); 2330 2331 kdmsg_iocom_autoinitiate(&pmp->iocom, hammer2_autodmsg); 2332 } 2333 2334 static int 2335 hammer2_rcvdmsg(kdmsg_msg_t *msg) 2336 { 2337 switch(msg->any.head.cmd & DMSGF_TRANSMASK) { 2338 case DMSG_DBG_SHELL: 2339 /* 2340 * (non-transaction) 2341 * Execute shell command (not supported atm) 2342 */ 2343 kdmsg_msg_reply(msg, DMSG_ERR_NOSUPP); 2344 break; 2345 case DMSG_DBG_SHELL | DMSGF_REPLY: 2346 /* 2347 * (non-transaction) 2348 */ 2349 if (msg->aux_data) { 2350 msg->aux_data[msg->aux_size - 1] = 0; 2351 kprintf("HAMMER2 DBG: %s\n", msg->aux_data); 2352 } 2353 break; 2354 default: 2355 /* 2356 * Unsupported message received. We only need to 2357 * reply if it's a transaction in order to close our end. 2358 * Ignore any one-way messages are any further messages 2359 * associated with the transaction. 2360 * 2361 * NOTE: This case also includes DMSG_LNK_ERROR messages 2362 * which might be one-way, replying to those would 2363 * cause an infinite ping-pong. 2364 */ 2365 if (msg->any.head.cmd & DMSGF_CREATE) 2366 kdmsg_msg_reply(msg, DMSG_ERR_NOSUPP); 2367 break; 2368 } 2369 return(0); 2370 } 2371 2372 /* 2373 * This function is called after KDMSG has automatically handled processing 2374 * of a LNK layer message (typically CONN, SPAN, or CIRC). 2375 * 2376 * We tag off the LNK_CONN to trigger our LNK_VOLCONF messages which 2377 * advertises all available hammer2 super-root volumes. 2378 */ 2379 static void 2380 hammer2_autodmsg(kdmsg_msg_t *msg) 2381 { 2382 hammer2_pfsmount_t *pmp = msg->iocom->handle; 2383 hammer2_mount_t *hmp = pmp->cluster.focus->hmp; /* XXX */ 2384 int copyid; 2385 2386 /* 2387 * We only care about replies to our LNK_CONN auto-request. kdmsg 2388 * has already processed the reply, we use this calback as a shim 2389 * to know when we can advertise available super-root volumes. 2390 */ 2391 if ((msg->any.head.cmd & DMSGF_TRANSMASK) != 2392 (DMSG_LNK_CONN | DMSGF_CREATE | DMSGF_REPLY) || 2393 msg->state == NULL) { 2394 return; 2395 } 2396 2397 kprintf("LNK_CONN REPLY RECEIVED CMD %08x\n", msg->any.head.cmd); 2398 2399 if (msg->any.head.cmd & DMSGF_CREATE) { 2400 kprintf("HAMMER2: VOLDATA DUMP\n"); 2401 2402 /* 2403 * Dump the configuration stored in the volume header 2404 */ 2405 hammer2_voldata_lock(hmp); 2406 for (copyid = 0; copyid < HAMMER2_COPYID_COUNT; ++copyid) { 2407 if (hmp->voldata.copyinfo[copyid].copyid == 0) 2408 continue; 2409 hammer2_volconf_update(pmp, copyid); 2410 } 2411 hammer2_voldata_unlock(hmp, 0); 2412 } 2413 if ((msg->any.head.cmd & DMSGF_DELETE) && 2414 msg->state && (msg->state->txcmd & DMSGF_DELETE) == 0) { 2415 kprintf("HAMMER2: CONN WAS TERMINATED\n"); 2416 } 2417 } 2418 2419 /* 2420 * Volume configuration updates are passed onto the userland service 2421 * daemon via the open LNK_CONN transaction. 2422 */ 2423 void 2424 hammer2_volconf_update(hammer2_pfsmount_t *pmp, int index) 2425 { 2426 hammer2_mount_t *hmp = pmp->cluster.focus->hmp; /* XXX */ 2427 kdmsg_msg_t *msg; 2428 2429 /* XXX interlock against connection state termination */ 2430 kprintf("volconf update %p\n", pmp->iocom.conn_state); 2431 if (pmp->iocom.conn_state) { 2432 kprintf("TRANSMIT VOLCONF VIA OPEN CONN TRANSACTION\n"); 2433 msg = kdmsg_msg_alloc_state(pmp->iocom.conn_state, 2434 DMSG_LNK_VOLCONF, NULL, NULL); 2435 msg->any.lnk_volconf.copy = hmp->voldata.copyinfo[index]; 2436 msg->any.lnk_volconf.mediaid = hmp->voldata.fsid; 2437 msg->any.lnk_volconf.index = index; 2438 kdmsg_msg_write(msg); 2439 } 2440 } 2441 2442 /* 2443 * This handles hysteresis on regular file flushes. Because the BIOs are 2444 * routed to a thread it is possible for an excessive number to build up 2445 * and cause long front-end stalls long before the runningbuffspace limit 2446 * is hit, so we implement hammer2_flush_pipe to control the 2447 * hysteresis. 2448 * 2449 * This is a particular problem when compression is used. 2450 */ 2451 void 2452 hammer2_lwinprog_ref(hammer2_pfsmount_t *pmp) 2453 { 2454 atomic_add_int(&pmp->count_lwinprog, 1); 2455 } 2456 2457 void 2458 hammer2_lwinprog_drop(hammer2_pfsmount_t *pmp) 2459 { 2460 int lwinprog; 2461 2462 lwinprog = atomic_fetchadd_int(&pmp->count_lwinprog, -1); 2463 if ((lwinprog & HAMMER2_LWINPROG_WAITING) && 2464 (lwinprog & HAMMER2_LWINPROG_MASK) <= hammer2_flush_pipe * 2 / 3) { 2465 atomic_clear_int(&pmp->count_lwinprog, 2466 HAMMER2_LWINPROG_WAITING); 2467 wakeup(&pmp->count_lwinprog); 2468 } 2469 } 2470 2471 void 2472 hammer2_lwinprog_wait(hammer2_pfsmount_t *pmp) 2473 { 2474 int lwinprog; 2475 2476 for (;;) { 2477 lwinprog = pmp->count_lwinprog; 2478 cpu_ccfence(); 2479 if ((lwinprog & HAMMER2_LWINPROG_MASK) < hammer2_flush_pipe) 2480 break; 2481 tsleep_interlock(&pmp->count_lwinprog, 0); 2482 atomic_set_int(&pmp->count_lwinprog, HAMMER2_LWINPROG_WAITING); 2483 lwinprog = pmp->count_lwinprog; 2484 if ((lwinprog & HAMMER2_LWINPROG_MASK) < hammer2_flush_pipe) 2485 break; 2486 tsleep(&pmp->count_lwinprog, PINTERLOCKED, "h2wpipe", hz); 2487 } 2488 } 2489 2490 /* 2491 * Manage excessive memory resource use for chain and related 2492 * structures. 2493 */ 2494 void 2495 hammer2_pfs_memory_wait(hammer2_pfsmount_t *pmp) 2496 { 2497 long waiting; 2498 long count; 2499 long limit; 2500 #if 0 2501 static int zzticks; 2502 #endif 2503 2504 /* 2505 * Atomic check condition and wait. Also do an early speedup of 2506 * the syncer to try to avoid hitting the wait. 2507 */ 2508 for (;;) { 2509 waiting = pmp->inmem_dirty_chains; 2510 cpu_ccfence(); 2511 count = waiting & HAMMER2_DIRTYCHAIN_MASK; 2512 2513 limit = pmp->mp->mnt_nvnodelistsize / 10; 2514 if (limit < hammer2_limit_dirty_chains) 2515 limit = hammer2_limit_dirty_chains; 2516 if (limit < 1000) 2517 limit = 1000; 2518 2519 #if 0 2520 if ((int)(ticks - zzticks) > hz) { 2521 zzticks = ticks; 2522 kprintf("count %ld %ld\n", count, limit); 2523 } 2524 #endif 2525 2526 /* 2527 * Block if there are too many dirty chains present, wait 2528 * for the flush to clean some out. 2529 */ 2530 if (count > limit) { 2531 tsleep_interlock(&pmp->inmem_dirty_chains, 0); 2532 if (atomic_cmpset_long(&pmp->inmem_dirty_chains, 2533 waiting, 2534 waiting | HAMMER2_DIRTYCHAIN_WAITING)) { 2535 speedup_syncer(pmp->mp); 2536 tsleep(&pmp->inmem_dirty_chains, PINTERLOCKED, 2537 "chnmem", hz); 2538 } 2539 continue; /* loop on success or fail */ 2540 } 2541 2542 /* 2543 * Try to start an early flush before we are forced to block. 2544 */ 2545 if (count > limit * 7 / 10) 2546 speedup_syncer(pmp->mp); 2547 break; 2548 } 2549 } 2550 2551 void 2552 hammer2_pfs_memory_inc(hammer2_pfsmount_t *pmp) 2553 { 2554 if (pmp) 2555 atomic_add_long(&pmp->inmem_dirty_chains, 1); 2556 } 2557 2558 void 2559 hammer2_pfs_memory_wakeup(hammer2_pfsmount_t *pmp) 2560 { 2561 long waiting; 2562 2563 if (pmp == NULL) 2564 return; 2565 2566 for (;;) { 2567 waiting = pmp->inmem_dirty_chains; 2568 cpu_ccfence(); 2569 if (atomic_cmpset_long(&pmp->inmem_dirty_chains, 2570 waiting, 2571 (waiting - 1) & 2572 ~HAMMER2_DIRTYCHAIN_WAITING)) { 2573 break; 2574 } 2575 } 2576 2577 if (waiting & HAMMER2_DIRTYCHAIN_WAITING) 2578 wakeup(&pmp->inmem_dirty_chains); 2579 } 2580 2581 /* 2582 * Debugging 2583 */ 2584 void 2585 hammer2_dump_chain(hammer2_chain_t *chain, int tab, int *countp, char pfx) 2586 { 2587 hammer2_chain_t *scan; 2588 hammer2_chain_t *first_parent; 2589 2590 --*countp; 2591 if (*countp == 0) { 2592 kprintf("%*.*s...\n", tab, tab, ""); 2593 return; 2594 } 2595 if (*countp < 0) 2596 return; 2597 first_parent = chain->core ? TAILQ_FIRST(&chain->core->ownerq) : NULL; 2598 kprintf("%*.*s%c-chain %p.%d %016jx/%d mir=%016jx\n", 2599 tab, tab, "", pfx, 2600 chain, chain->bref.type, 2601 chain->bref.key, chain->bref.keybits, 2602 chain->bref.mirror_tid); 2603 2604 kprintf("%*.*s [%08x] (%s) mod=%016jx del=%016jx " 2605 "lo=%08jx hi=%08jx refs=%d\n", 2606 tab, tab, "", 2607 chain->flags, 2608 ((chain->bref.type == HAMMER2_BREF_TYPE_INODE && 2609 chain->data) ? (char *)chain->data->ipdata.filename : "?"), 2610 chain->modify_tid, 2611 chain->delete_tid, 2612 chain->update_lo, 2613 chain->update_hi, 2614 chain->refs); 2615 2616 kprintf("%*.*s core %p [%08x]", 2617 tab, tab, "", 2618 chain->core, (chain->core ? chain->core->flags : 0)); 2619 2620 if (first_parent) 2621 kprintf("\n%*.*s fp=%p np=%p [fpflags %08x fprefs %d", 2622 tab, tab, "", 2623 first_parent, 2624 (first_parent ? TAILQ_NEXT(first_parent, core_entry) : 2625 NULL), 2626 first_parent->flags, 2627 first_parent->refs); 2628 if (chain->core == NULL || RB_EMPTY(&chain->core->rbtree)) 2629 kprintf("\n"); 2630 else 2631 kprintf(" {\n"); 2632 if (chain->core) { 2633 RB_FOREACH(scan, hammer2_chain_tree, &chain->core->rbtree) 2634 hammer2_dump_chain(scan, tab + 4, countp, 'a'); 2635 RB_FOREACH(scan, hammer2_chain_tree, &chain->core->dbtree) 2636 hammer2_dump_chain(scan, tab + 4, countp, 'r'); 2637 TAILQ_FOREACH(scan, &chain->core->dbq, db_entry) 2638 hammer2_dump_chain(scan, tab + 4, countp, 'd'); 2639 } 2640 if (chain->core && !RB_EMPTY(&chain->core->rbtree)) { 2641 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE && chain->data) 2642 kprintf("%*.*s}(%s)\n", tab, tab, "", 2643 chain->data->ipdata.filename); 2644 else 2645 kprintf("%*.*s}\n", tab, tab, ""); 2646 } 2647 } 2648