1 /* 2 * Copyright (c) 2011-2013 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@dragonflybsd.org> 6 * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org> 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in 16 * the documentation and/or other materials provided with the 17 * distribution. 18 * 3. Neither the name of The DragonFly Project nor the names of its 19 * contributors may be used to endorse or promote products derived 20 * from this software without specific, prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 26 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 27 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 31 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36 /* 37 * This header file contains structures used internally by the HAMMER2 38 * implementation. See hammer2_disk.h for on-disk structures. 39 */ 40 41 #ifndef _VFS_HAMMER2_HAMMER2_H_ 42 #define _VFS_HAMMER2_HAMMER2_H_ 43 44 #include <sys/param.h> 45 #include <sys/types.h> 46 #include <sys/kernel.h> 47 #include <sys/conf.h> 48 #include <sys/systm.h> 49 #include <sys/tree.h> 50 #include <sys/malloc.h> 51 #include <sys/mount.h> 52 #include <sys/vnode.h> 53 #include <sys/proc.h> 54 #include <sys/mountctl.h> 55 #include <sys/priv.h> 56 #include <sys/stat.h> 57 #include <sys/thread.h> 58 #include <sys/globaldata.h> 59 #include <sys/lockf.h> 60 #include <sys/buf.h> 61 #include <sys/queue.h> 62 #include <sys/limits.h> 63 #include <sys/buf2.h> 64 #include <sys/signal2.h> 65 #include <sys/dmsg.h> 66 #include <sys/mutex.h> 67 #include <sys/mutex2.h> 68 69 #include "hammer2_disk.h" 70 #include "hammer2_mount.h" 71 #include "hammer2_ioctl.h" 72 #include "hammer2_ccms.h" 73 74 struct hammer2_chain; 75 struct hammer2_inode; 76 struct hammer2_mount; 77 struct hammer2_pfsmount; 78 struct hammer2_span; 79 struct hammer2_state; 80 struct hammer2_msg; 81 82 /* 83 * The chain structure tracks a portion of the media topology from the 84 * root (volume) down. Chains represent volumes, inodes, indirect blocks, 85 * data blocks, and freemap nodes and leafs. 86 * 87 * The chain structure can be multi-homed and its topological recursion 88 * (chain->core) can be shared amongst several chains. Chain structures 89 * are topologically stable once placed in the in-memory topology (they 90 * don't move around). Modifications which cross flush synchronization 91 * boundaries, renames, resizing, or any move of the chain to elsewhere 92 * in the topology is accomplished via the DELETE-DUPLICATE mechanism. 93 * 94 * DELETE-DUPLICATE allows HAMMER2 to track work across flush synchronization 95 * points without stalling the filesystem or corrupting the flush 96 * sychronization point. When necessary a chain will be marked DELETED 97 * and a new, duplicate chain will be allocated. 98 * 99 * This mechanism necessarily requires that we be able to overload chains 100 * at any given layer in the topology. Overloading is accomplished via a 101 * RBTREE recursion through chain->rbtree. 102 * 103 * Advantages: 104 * 105 * (1) Fully coherent snapshots can be taken without requiring 106 * a pre-flush, resulting in extremely fast (sub-millisecond) 107 * snapshots. 108 * 109 * (2) Multiple synchronization points can be in-flight at the same 110 * time, representing multiple snapshots or flushes. 111 * 112 * (3) The algorithms needed to keep track of everything are actually 113 * not that complex. 114 * 115 * Special Considerations: 116 * 117 * A chain is ref-counted on a per-chain basis, but the chain's lock 118 * is associated with the shared chain_core and is not per-chain. 119 * 120 * The power-of-2 nature of the media radix tree ensures that there 121 * will be no overlaps which straddle edges. 122 */ 123 RB_HEAD(hammer2_chain_tree, hammer2_chain); 124 TAILQ_HEAD(h2_flush_deferral_list, hammer2_chain); 125 TAILQ_HEAD(h2_core_list, hammer2_chain); 126 TAILQ_HEAD(h2_layer_list, hammer2_chain_layer); 127 128 struct hammer2_chain_layer { 129 int good; 130 TAILQ_ENTRY(hammer2_chain_layer) entry; 131 struct hammer2_chain_tree rbtree; 132 int refs; /* prevent destruction */ 133 }; 134 135 typedef struct hammer2_chain_layer hammer2_chain_layer_t; 136 137 struct hammer2_chain_core { 138 int good; 139 struct ccms_cst cst; 140 struct h2_core_list ownerq; /* all chains sharing this core */ 141 struct h2_layer_list layerq; 142 int live_zero; /* blockref array opt */ 143 hammer2_tid_t update_lo; /* check update against parent */ 144 hammer2_tid_t update_hi; /* check update against parent */ 145 u_int chain_count; /* total chains in layers */ 146 u_int sharecnt; 147 u_int flags; 148 u_int live_count; /* live (not deleted) chains in tree */ 149 int generation; /* generation number (inserts only) */ 150 }; 151 152 typedef struct hammer2_chain_core hammer2_chain_core_t; 153 154 #define HAMMER2_CORE_UNUSED0001 0x0001 155 #define HAMMER2_CORE_COUNTEDBREFS 0x0002 156 157 /* 158 * H2 is a copy-on-write filesystem. In order to allow chains to allocate 159 * smaller blocks (down to 64-bytes), but improve performance and make 160 * clustered I/O possible using larger block sizes, the kernel buffer cache 161 * is abstracted via the hammer2_io structure. 162 */ 163 RB_HEAD(hammer2_io_tree, hammer2_io); 164 165 struct hammer2_io { 166 RB_ENTRY(hammer2_io) rbnode; /* indexed by device offset */ 167 struct spinlock spin; 168 struct hammer2_mount *hmp; 169 struct buf *bp; 170 struct bio *bio; 171 off_t pbase; 172 int psize; 173 void (*callback)(struct hammer2_io *dio, 174 struct hammer2_chain *chain, 175 void *arg1, off_t arg2); 176 struct hammer2_chain *arg_c; /* INPROG I/O only */ 177 void *arg_p; /* INPROG I/O only */ 178 off_t arg_o; /* INPROG I/O only */ 179 int refs; 180 int act; /* activity */ 181 }; 182 183 typedef struct hammer2_io hammer2_io_t; 184 185 /* 186 * Primary chain structure keeps track of the topology in-memory. 187 */ 188 struct hammer2_chain { 189 RB_ENTRY(hammer2_chain) rbnode; /* node */ 190 TAILQ_ENTRY(hammer2_chain) core_entry; /* contemporary chains */ 191 hammer2_chain_layer_t *inlayer; 192 hammer2_blockref_t bref; 193 hammer2_chain_core_t *core; 194 hammer2_chain_core_t *above; 195 struct hammer2_state *state; /* if active cache msg */ 196 struct hammer2_mount *hmp; 197 struct hammer2_pfsmount *pmp; /* can be NULL */ 198 199 hammer2_tid_t modify_tid; /* snapshot/flush filter */ 200 hammer2_tid_t delete_tid; 201 hammer2_key_t data_count; /* delta's to apply */ 202 hammer2_key_t inode_count; /* delta's to apply */ 203 hammer2_io_t *dio; /* physical data buffer */ 204 u_int bytes; /* physical data size */ 205 u_int flags; 206 u_int refs; 207 u_int lockcnt; 208 hammer2_media_data_t *data; /* data pointer shortcut */ 209 TAILQ_ENTRY(hammer2_chain) flush_node; /* flush deferral list */ 210 211 int inode_reason; 212 }; 213 214 typedef struct hammer2_chain hammer2_chain_t; 215 216 int hammer2_chain_cmp(hammer2_chain_t *chain1, hammer2_chain_t *chain2); 217 RB_PROTOTYPE(hammer2_chain_tree, hammer2_chain, rbnode, hammer2_chain_cmp); 218 219 /* 220 * Special notes on flags: 221 * 222 * INITIAL - This flag allows a chain to be created and for storage to 223 * be allocated without having to immediately instantiate the 224 * related buffer. The data is assumed to be all-zeros. It 225 * is primarily used for indirect blocks. 226 * 227 * MOVED - A modified chain becomes MOVED after it flushes. A chain 228 * can also become MOVED if it is moved within the topology 229 * (even if not modified). 230 * 231 * MODIFIED- The chain's media data has been modified. 232 */ 233 #define HAMMER2_CHAIN_MODIFIED 0x00000001 /* dirty chain data */ 234 #define HAMMER2_CHAIN_ALLOCATED 0x00000002 /* kmalloc'd chain */ 235 #define HAMMER2_CHAIN_UNUSED0004 0x00000004 236 #define HAMMER2_CHAIN_FORCECOW 0x00000008 /* force copy-on-wr */ 237 #define HAMMER2_CHAIN_DELETED 0x00000010 /* deleted chain */ 238 #define HAMMER2_CHAIN_INITIAL 0x00000020 /* initial create */ 239 #define HAMMER2_CHAIN_FLUSHED 0x00000040 /* blktable updated */ 240 #define HAMMER2_CHAIN_MOVED 0x00000080 /* bref changed */ 241 #define HAMMER2_CHAIN_IOFLUSH 0x00000100 /* bawrite on put */ 242 #define HAMMER2_CHAIN_DEFERRED 0x00000200 /* on a deferral list */ 243 #define HAMMER2_CHAIN_UNLINKED 0x00000400 /* delete on reclaim */ 244 #define HAMMER2_CHAIN_VOLUMESYNC 0x00000800 /* needs volume sync */ 245 #define HAMMER2_CHAIN_UNUSED01000 0x00001000 246 #define HAMMER2_CHAIN_MOUNTED 0x00002000 /* PFS is mounted */ 247 #define HAMMER2_CHAIN_ONRBTREE 0x00004000 /* on parent RB tree */ 248 #define HAMMER2_CHAIN_SNAPSHOT 0x00008000 /* snapshot special */ 249 #define HAMMER2_CHAIN_EMBEDDED 0x00010000 /* embedded data */ 250 #define HAMMER2_CHAIN_RELEASE 0x00020000 /* don't keep around */ 251 #define HAMMER2_CHAIN_UNUSED40000 0x00040000 252 #define HAMMER2_CHAIN_UNUSED80000 0x00080000 253 #define HAMMER2_CHAIN_DUPLICATED 0x00100000 /* fwd delete-dup */ 254 #define HAMMER2_CHAIN_PFSROOT 0x00200000 /* in pfs->cluster */ 255 256 /* 257 * Flags passed to hammer2_chain_lookup() and hammer2_chain_next() 258 * 259 * NOTE: MATCHIND allows an indirect block / freemap node to be returned 260 * when the passed key range matches the radix. Remember that key_end 261 * is inclusive (e.g. {0x000,0xFFF}, not {0x000,0x1000}). 262 */ 263 #define HAMMER2_LOOKUP_NOLOCK 0x00000001 /* ref only */ 264 #define HAMMER2_LOOKUP_NODATA 0x00000002 /* data left NULL */ 265 #define HAMMER2_LOOKUP_SHARED 0x00000100 266 #define HAMMER2_LOOKUP_MATCHIND 0x00000200 /* return all chains */ 267 #define HAMMER2_LOOKUP_UNUSED0400 0x00000400 268 #define HAMMER2_LOOKUP_ALWAYS 0x00000800 /* resolve data */ 269 270 /* 271 * Flags passed to hammer2_chain_modify() and hammer2_chain_resize() 272 * 273 * NOTE: OPTDATA allows us to avoid instantiating buffers for INDIRECT 274 * blocks in the INITIAL-create state. 275 */ 276 #define HAMMER2_MODIFY_OPTDATA 0x00000002 /* data can be NULL */ 277 #define HAMMER2_MODIFY_NO_MODIFY_TID 0x00000004 278 #define HAMMER2_MODIFY_ASSERTNOCOPY 0x00000008 /* assert no del-dup */ 279 #define HAMMER2_MODIFY_NOREALLOC 0x00000010 280 #define HAMMER2_MODIFY_INPLACE 0x00000020 /* don't del-dup */ 281 282 /* 283 * Flags passed to hammer2_chain_lock() 284 */ 285 #define HAMMER2_RESOLVE_NEVER 1 286 #define HAMMER2_RESOLVE_MAYBE 2 287 #define HAMMER2_RESOLVE_ALWAYS 3 288 #define HAMMER2_RESOLVE_MASK 0x0F 289 290 #define HAMMER2_RESOLVE_SHARED 0x10 /* request shared lock */ 291 #define HAMMER2_RESOLVE_NOREF 0x20 /* already ref'd on lock */ 292 293 /* 294 * Flags passed to hammer2_chain_delete() 295 */ 296 #define HAMMER2_DELETE_UNUSED0001 0x0001 297 298 /* 299 * Flags passed to hammer2_chain_delete_duplicate() 300 */ 301 #define HAMMER2_DELDUP_RECORE 0x0001 302 303 /* 304 * Cluster different types of storage together for allocations 305 */ 306 #define HAMMER2_FREECACHE_INODE 0 307 #define HAMMER2_FREECACHE_INDIR 1 308 #define HAMMER2_FREECACHE_DATA 2 309 #define HAMMER2_FREECACHE_UNUSED3 3 310 #define HAMMER2_FREECACHE_TYPES 4 311 312 /* 313 * hammer2_freemap_alloc() block preference 314 */ 315 #define HAMMER2_OFF_NOPREF ((hammer2_off_t)-1) 316 317 /* 318 * BMAP read-ahead maximum parameters 319 */ 320 #define HAMMER2_BMAP_COUNT 16 /* max bmap read-ahead */ 321 #define HAMMER2_BMAP_BYTES (HAMMER2_PBUFSIZE * HAMMER2_BMAP_COUNT) 322 323 /* 324 * Misc 325 */ 326 #define HAMMER2_FLUSH_DEPTH_LIMIT 10 /* stack recursion limit */ 327 328 /* 329 * hammer2_freemap_adjust() 330 */ 331 #define HAMMER2_FREEMAP_DORECOVER 1 332 #define HAMMER2_FREEMAP_DOMAYFREE 2 333 #define HAMMER2_FREEMAP_DOREALFREE 3 334 335 /* 336 * HAMMER2 IN-MEMORY CACHE OF MEDIA STRUCTURES 337 * 338 * There is an in-memory representation of all on-media data structure. 339 * Basically everything is represented by a hammer2_chain structure 340 * in-memory and other higher-level structures map to chains. 341 * 342 * A great deal of data is accessed simply via its buffer cache buffer, 343 * which is mapped for the duration of the chain's lock. However, because 344 * chains may represent blocks smaller than the 16KB minimum we impose 345 * on buffer cache buffers, we cannot hold related buffer cache buffers 346 * locked for smaller blocks. In these situations we kmalloc() a copy 347 * of the block. 348 * 349 * When modifications are made to a chain a new filesystem block must be 350 * allocated. Multiple modifications do not necessarily allocate new 351 * blocks. However, when a flush occurs a flush synchronization point 352 * is created and any new modifications made after this point will allocate 353 * a new block even if the chain is already in a modified state. 354 * 355 * The in-memory representation may remain cached (for example in order to 356 * placemark clustering locks) even after the related data has been 357 * detached. 358 * 359 * CORE SHARING 360 * 361 * In order to support concurrent flushes a flush synchronization point 362 * is created represented by a transaction id. Among other things, 363 * operations may move filesystem objects from one part of the topology 364 * to another (for example, if you rename a file or when indirect blocks 365 * are created or destroyed, and a few other things). When this occurs 366 * across a flush synchronization point the flusher needs to be able to 367 * recurse down BOTH the 'before' version of the topology and the 'after' 368 * version. 369 * 370 * To facilitate this modifications to chains do what is called a 371 * DELETE-DUPLICATE operation. Chains are not actually moved in-memory. 372 * Instead the chain we wish to move is deleted and a new chain is created 373 * at the target location in the topology. ANY SUBCHAINS PLACED UNDER THE 374 * CHAIN BEING MOVED HAVE TO EXIST IN BOTH PLACES. To make this work 375 * all sub-chains are managed by the hammer2_chain_core structure. This 376 * structure can be multi-homed, meaning that it can have more than one 377 * chain as its parent. When a chain is delete-duplicated the chain's core 378 * becomes shared under both the old and new chain. 379 * 380 * STALE CHAINS 381 * 382 * When a chain is delete-duplicated the old chain typically becomes stale. 383 * This is detected via the HAMMER2_CHAIN_DUPLICATED flag in chain->flags. 384 * To avoid executing live filesystem operations on stale chains, the inode 385 * locking code will follow stale chains via core->ownerq until it finds 386 * the live chain. The lock prevents ripups by other threads. Lookups 387 * must properly order locking operations to prevent other threads from 388 * racing the lookup operation and will also follow stale chains when 389 * required. 390 */ 391 392 RB_HEAD(hammer2_inode_tree, hammer2_inode); 393 394 /* 395 * A hammer2 inode. 396 * 397 * NOTE: The inode's attribute CST which is also used to lock the inode 398 * is embedded in the chain (chain.cst) and aliased w/ attr_cst. 399 */ 400 struct hammer2_inode { 401 RB_ENTRY(hammer2_inode) rbnode; /* inumber lookup (HL) */ 402 ccms_cst_t topo_cst; /* directory topology cst */ 403 struct hammer2_pfsmount *pmp; /* PFS mount */ 404 struct hammer2_inode *pip; /* parent inode */ 405 struct vnode *vp; 406 hammer2_chain_t *chain; /* NOTE: rehomed on rename */ 407 struct lockf advlock; 408 hammer2_tid_t inum; 409 u_int flags; 410 u_int refs; /* +vpref, +flushref */ 411 uint8_t comp_heuristic; 412 hammer2_off_t size; 413 uint64_t mtime; 414 }; 415 416 typedef struct hammer2_inode hammer2_inode_t; 417 418 #define HAMMER2_INODE_MODIFIED 0x0001 419 #define HAMMER2_INODE_SROOT 0x0002 /* kmalloc special case */ 420 #define HAMMER2_INODE_RENAME_INPROG 0x0004 421 #define HAMMER2_INODE_ONRBTREE 0x0008 422 #define HAMMER2_INODE_RESIZED 0x0010 423 #define HAMMER2_INODE_MTIME 0x0020 424 425 int hammer2_inode_cmp(hammer2_inode_t *ip1, hammer2_inode_t *ip2); 426 RB_PROTOTYPE2(hammer2_inode_tree, hammer2_inode, rbnode, hammer2_inode_cmp, 427 hammer2_tid_t); 428 429 /* 430 * A hammer2 transaction and flush sequencing structure. 431 * 432 * This global structure is tied into hammer2_mount and is used 433 * to sequence modifying operations and flushes. 434 * 435 * (a) Any modifying operations with sync_tid >= flush_tid will stall until 436 * all modifying operating with sync_tid < flush_tid complete. 437 * 438 * The flush related to flush_tid stalls until all modifying operations 439 * with sync_tid < flush_tid complete. 440 * 441 * (b) Once unstalled, modifying operations with sync_tid > flush_tid are 442 * allowed to run. All modifications cause modify/duplicate operations 443 * to occur on the related chains. Note that most INDIRECT blocks will 444 * be unaffected because the modifications just overload the RBTREE 445 * structurally instead of actually modifying the indirect blocks. 446 * 447 * (c) The actual flush unstalls and RUNS CONCURRENTLY with (b), but only 448 * utilizes the chain structures with sync_tid <= flush_tid. The 449 * flush will modify related indirect blocks and inodes in-place 450 * (rather than duplicate) since the adjustments are compatible with 451 * (b)'s RBTREE overloading 452 * 453 * SPECIAL NOTE: Inode modifications have to also propagate along any 454 * modify/duplicate chains. File writes detect the flush 455 * and force out the conflicting buffer cache buffer(s) 456 * before reusing them. 457 * 458 * (d) Snapshots can be made instantly but must be flushed and disconnected 459 * from their duplicative source before they can be mounted. This is 460 * because while H2's on-media structure supports forks, its in-memory 461 * structure only supports very simple forking for background flushing 462 * purposes. 463 * 464 * TODO: Flush merging. When fsync() is called on multiple discrete files 465 * concurrently there is no reason to stall the second fsync. 466 * The final flush that reaches to root can cover both fsync()s. 467 * 468 * The chains typically terminate as they fly onto the disk. The flush 469 * ultimately reaches the volume header. 470 */ 471 struct hammer2_trans { 472 TAILQ_ENTRY(hammer2_trans) entry; 473 struct hammer2_pfsmount *pmp; /* might be NULL */ 474 struct hammer2_mount *hmp_single; /* if single-targetted */ 475 hammer2_tid_t sync_tid; 476 hammer2_tid_t real_tid; 477 hammer2_tid_t inode_tid; 478 thread_t td; /* pointer */ 479 int flags; 480 int blocked; 481 uint8_t inodes_created; 482 uint8_t dummy[7]; 483 }; 484 485 typedef struct hammer2_trans hammer2_trans_t; 486 487 #define HAMMER2_TRANS_ISFLUSH 0x0001 /* formal flush */ 488 #define HAMMER2_TRANS_UNUSED0002 0x0002 489 #define HAMMER2_TRANS_BUFCACHE 0x0004 /* from bioq strategy write */ 490 #define HAMMER2_TRANS_NEWINODE 0x0008 /* caller allocating inode */ 491 #define HAMMER2_TRANS_ISALLOCATING 0x0010 /* in allocator */ 492 493 #define HAMMER2_FREEMAP_HEUR_NRADIX 4 /* pwr 2 PBUFRADIX-MINIORADIX */ 494 #define HAMMER2_FREEMAP_HEUR_TYPES 8 495 #define HAMMER2_FREEMAP_HEUR (HAMMER2_FREEMAP_HEUR_NRADIX * \ 496 HAMMER2_FREEMAP_HEUR_TYPES) 497 498 /* 499 * Global (per device) mount structure for device (aka vp->v_mount->hmp) 500 */ 501 TAILQ_HEAD(hammer2_trans_queue, hammer2_trans); 502 503 struct hammer2_mount { 504 struct vnode *devvp; /* device vnode */ 505 int ronly; /* read-only mount */ 506 int pmp_count; /* PFS mounts backed by us */ 507 TAILQ_ENTRY(hammer2_mount) mntentry; /* hammer2_mntlist */ 508 509 struct malloc_type *mchain; 510 int nipstacks; 511 int maxipstacks; 512 struct spinlock io_spin; /* iotree access */ 513 struct hammer2_io_tree iotree; 514 int iofree_count; 515 hammer2_chain_t vchain; /* anchor chain (topology) */ 516 hammer2_chain_t fchain; /* anchor chain (freemap) */ 517 hammer2_inode_t *sroot; /* super-root localized to media */ 518 struct lock alloclk; /* lockmgr lock */ 519 struct lock voldatalk; /* lockmgr lock */ 520 struct hammer2_trans_queue transq; /* all in-progress transactions */ 521 hammer2_off_t heur_freemap[HAMMER2_FREEMAP_HEUR]; 522 int flushcnt; /* #of flush trans on the list */ 523 524 int volhdrno; /* last volhdrno written */ 525 hammer2_volume_data_t voldata; 526 hammer2_volume_data_t volsync; /* synchronized voldata */ 527 }; 528 529 typedef struct hammer2_mount hammer2_mount_t; 530 531 /* 532 * HAMMER2 cluster - a device/root associated with a PFS. 533 * 534 * A PFS may have several hammer2_cluster's associated with it. 535 */ 536 #define HAMMER2_MAXCLUSTER 8 537 538 struct hammer2_cluster { 539 int nchains; 540 int status; 541 hammer2_chain_t *chains[HAMMER2_MAXCLUSTER]; 542 }; 543 544 typedef struct hammer2_cluster hammer2_cluster_t; 545 546 /* 547 * HAMMER2 PFS mount point structure (aka vp->v_mount->mnt_data). 548 * This has a 1:1 correspondence to struct mount (note that the 549 * hammer2_mount structure has a N:1 correspondence). 550 * 551 * This structure represents a cluster mount and not necessarily a 552 * PFS under a specific device mount (HMP). The distinction is important 553 * because the elements backing a cluster mount can change on the fly. 554 * 555 * Usually the first element under the cluster represents the original 556 * user-requested mount that bootstraps the whole mess. In significant 557 * setups the original is usually just a read-only media image (or 558 * representitive file) that simply contains a bootstrap volume header 559 * listing the configuration. 560 */ 561 struct hammer2_pfsmount { 562 struct mount *mp; 563 hammer2_cluster_t cluster; 564 hammer2_inode_t *iroot; /* PFS root inode */ 565 hammer2_inode_t *ihidden; /* PFS hidden directory */ 566 struct lock lock; /* PFS lock for certain ops */ 567 hammer2_off_t inode_count; /* copy of inode_count */ 568 ccms_domain_t ccms_dom; 569 struct netexport export; /* nfs export */ 570 int ronly; /* read-only mount */ 571 struct malloc_type *minode; 572 struct malloc_type *mmsg; 573 kdmsg_iocom_t iocom; 574 struct spinlock inum_spin; /* inumber lookup */ 575 struct hammer2_inode_tree inum_tree; 576 long inmem_inodes; 577 long inmem_dirty_chains; 578 int count_lwinprog; /* logical write in prog */ 579 thread_t wthread_td; /* write thread td */ 580 struct bio_queue_head wthread_bioq; /* logical buffer bioq */ 581 struct mtx wthread_mtx; /* interlock */ 582 int wthread_destroy;/* termination sequencing */ 583 }; 584 585 typedef struct hammer2_pfsmount hammer2_pfsmount_t; 586 587 #define HAMMER2_DIRTYCHAIN_WAITING 0x80000000 588 #define HAMMER2_DIRTYCHAIN_MASK 0x7FFFFFFF 589 590 #define HAMMER2_LWINPROG_WAITING 0x80000000 591 #define HAMMER2_LWINPROG_MASK 0x7FFFFFFF 592 593 #if defined(_KERNEL) 594 595 MALLOC_DECLARE(M_HAMMER2); 596 597 #define VTOI(vp) ((hammer2_inode_t *)(vp)->v_data) 598 #define ITOV(ip) ((ip)->vp) 599 600 /* 601 * Currently locked chains retain the locked buffer cache buffer for 602 * indirect blocks, and indirect blocks can be one of two sizes. The 603 * device buffer has to match the case to avoid deadlocking recursive 604 * chains that might otherwise try to access different offsets within 605 * the same device buffer. 606 */ 607 static __inline 608 int 609 hammer2_devblkradix(int radix) 610 { 611 if (radix <= HAMMER2_LBUFRADIX) { 612 return (HAMMER2_LBUFRADIX); 613 } else { 614 return (HAMMER2_PBUFRADIX); 615 } 616 } 617 618 static __inline 619 size_t 620 hammer2_devblksize(size_t bytes) 621 { 622 if (bytes <= HAMMER2_LBUFSIZE) { 623 return(HAMMER2_LBUFSIZE); 624 } else { 625 KKASSERT(bytes <= HAMMER2_PBUFSIZE && 626 (bytes ^ (bytes - 1)) == ((bytes << 1) - 1)); 627 return (HAMMER2_PBUFSIZE); 628 } 629 } 630 631 632 static __inline 633 hammer2_pfsmount_t * 634 MPTOPMP(struct mount *mp) 635 { 636 return ((hammer2_pfsmount_t *)mp->mnt_data); 637 } 638 639 extern struct vop_ops hammer2_vnode_vops; 640 extern struct vop_ops hammer2_spec_vops; 641 extern struct vop_ops hammer2_fifo_vops; 642 643 extern int hammer2_debug; 644 extern int hammer2_cluster_enable; 645 extern int hammer2_hardlink_enable; 646 extern int hammer2_flush_pipe; 647 extern long hammer2_limit_dirty_chains; 648 extern long hammer2_iod_file_read; 649 extern long hammer2_iod_meta_read; 650 extern long hammer2_iod_indr_read; 651 extern long hammer2_iod_fmap_read; 652 extern long hammer2_iod_volu_read; 653 extern long hammer2_iod_file_write; 654 extern long hammer2_iod_meta_write; 655 extern long hammer2_iod_indr_write; 656 extern long hammer2_iod_fmap_write; 657 extern long hammer2_iod_volu_write; 658 extern long hammer2_ioa_file_read; 659 extern long hammer2_ioa_meta_read; 660 extern long hammer2_ioa_indr_read; 661 extern long hammer2_ioa_fmap_read; 662 extern long hammer2_ioa_volu_read; 663 extern long hammer2_ioa_file_write; 664 extern long hammer2_ioa_meta_write; 665 extern long hammer2_ioa_indr_write; 666 extern long hammer2_ioa_fmap_write; 667 extern long hammer2_ioa_volu_write; 668 669 extern struct objcache *cache_buffer_read; 670 extern struct objcache *cache_buffer_write; 671 672 extern int destroy; 673 extern int write_thread_wakeup; 674 675 extern mtx_t thread_protect; 676 677 /* 678 * hammer2_subr.c 679 */ 680 #define hammer2_icrc32(buf, size) iscsi_crc32((buf), (size)) 681 #define hammer2_icrc32c(buf, size, crc) iscsi_crc32_ext((buf), (size), (crc)) 682 683 hammer2_chain_t *hammer2_inode_lock_ex(hammer2_inode_t *ip); 684 hammer2_chain_t *hammer2_inode_lock_sh(hammer2_inode_t *ip); 685 void hammer2_inode_unlock_ex(hammer2_inode_t *ip, hammer2_chain_t *chain); 686 void hammer2_inode_unlock_sh(hammer2_inode_t *ip, hammer2_chain_t *chain); 687 void hammer2_chain_refactor(hammer2_chain_t **chainp); 688 void hammer2_voldata_lock(hammer2_mount_t *hmp); 689 void hammer2_voldata_unlock(hammer2_mount_t *hmp, int modify); 690 ccms_state_t hammer2_inode_lock_temp_release(hammer2_inode_t *ip); 691 void hammer2_inode_lock_temp_restore(hammer2_inode_t *ip, ccms_state_t ostate); 692 ccms_state_t hammer2_inode_lock_upgrade(hammer2_inode_t *ip); 693 void hammer2_inode_lock_downgrade(hammer2_inode_t *ip, ccms_state_t ostate); 694 695 void hammer2_mount_exlock(hammer2_mount_t *hmp); 696 void hammer2_mount_shlock(hammer2_mount_t *hmp); 697 void hammer2_mount_unlock(hammer2_mount_t *hmp); 698 699 int hammer2_get_dtype(hammer2_chain_t *chain); 700 int hammer2_get_vtype(hammer2_chain_t *chain); 701 u_int8_t hammer2_get_obj_type(enum vtype vtype); 702 void hammer2_time_to_timespec(u_int64_t xtime, struct timespec *ts); 703 u_int64_t hammer2_timespec_to_time(struct timespec *ts); 704 u_int32_t hammer2_to_unix_xid(uuid_t *uuid); 705 void hammer2_guid_to_uuid(uuid_t *uuid, u_int32_t guid); 706 707 hammer2_key_t hammer2_dirhash(const unsigned char *name, size_t len); 708 int hammer2_getradix(size_t bytes); 709 710 int hammer2_calc_logical(hammer2_inode_t *ip, hammer2_off_t uoff, 711 hammer2_key_t *lbasep, hammer2_key_t *leofp); 712 int hammer2_calc_physical(hammer2_inode_t *ip, hammer2_key_t lbase); 713 void hammer2_update_time(uint64_t *timep); 714 715 /* 716 * hammer2_inode.c 717 */ 718 struct vnode *hammer2_igetv(hammer2_inode_t *ip, int *errorp); 719 720 void hammer2_inode_lock_nlinks(hammer2_inode_t *ip); 721 void hammer2_inode_unlock_nlinks(hammer2_inode_t *ip); 722 hammer2_inode_t *hammer2_inode_lookup(hammer2_pfsmount_t *pmp, 723 hammer2_tid_t inum); 724 hammer2_inode_t *hammer2_inode_get(hammer2_pfsmount_t *pmp, 725 hammer2_inode_t *dip, hammer2_chain_t *chain); 726 void hammer2_inode_free(hammer2_inode_t *ip); 727 void hammer2_inode_ref(hammer2_inode_t *ip); 728 void hammer2_inode_drop(hammer2_inode_t *ip); 729 void hammer2_inode_repoint(hammer2_inode_t *ip, hammer2_inode_t *pip, 730 hammer2_chain_t *chain); 731 732 hammer2_inode_t *hammer2_inode_create(hammer2_trans_t *trans, 733 hammer2_inode_t *dip, 734 struct vattr *vap, struct ucred *cred, 735 const uint8_t *name, size_t name_len, 736 hammer2_chain_t **chainp, int *errorp); 737 int hammer2_inode_connect(hammer2_trans_t *trans, 738 hammer2_chain_t **chainp, int hlink, 739 hammer2_inode_t *dip, hammer2_chain_t **dchainp, 740 const uint8_t *name, size_t name_len, 741 hammer2_key_t key); 742 hammer2_inode_t *hammer2_inode_common_parent(hammer2_inode_t *fdip, 743 hammer2_inode_t *tdip); 744 void hammer2_inode_fsync(hammer2_trans_t *trans, hammer2_inode_t *ip, 745 hammer2_chain_t **parentp); 746 int hammer2_unlink_file(hammer2_trans_t *trans, hammer2_inode_t *dip, 747 const uint8_t *name, size_t name_len, int isdir, 748 int *hlinkp, struct nchandle *nch); 749 int hammer2_hardlink_consolidate(hammer2_trans_t *trans, 750 hammer2_inode_t *ip, hammer2_chain_t **chainp, 751 hammer2_inode_t *cdip, hammer2_chain_t **cdchainp, 752 int nlinks); 753 int hammer2_hardlink_deconsolidate(hammer2_trans_t *trans, hammer2_inode_t *dip, 754 hammer2_chain_t **chainp, hammer2_chain_t **ochainp); 755 int hammer2_hardlink_find(hammer2_inode_t *dip, 756 hammer2_chain_t **chainp, hammer2_chain_t **ochainp); 757 void hammer2_inode_install_hidden(hammer2_pfsmount_t *pmp); 758 759 /* 760 * hammer2_chain.c 761 */ 762 void hammer2_modify_volume(hammer2_mount_t *hmp); 763 hammer2_chain_t *hammer2_chain_alloc(hammer2_mount_t *hmp, 764 hammer2_pfsmount_t *pmp, 765 hammer2_trans_t *trans, 766 hammer2_blockref_t *bref); 767 void hammer2_chain_core_alloc(hammer2_trans_t *trans, hammer2_chain_t *nchain, 768 hammer2_chain_t *ochain); 769 void hammer2_chain_ref(hammer2_chain_t *chain); 770 void hammer2_chain_drop(hammer2_chain_t *chain); 771 int hammer2_chain_lock(hammer2_chain_t *chain, int how); 772 void hammer2_chain_load_async(hammer2_chain_t *chain, 773 void (*func)(hammer2_io_t *dio, 774 hammer2_chain_t *chain, 775 void *arg_p, off_t arg_o), 776 void *arg_p, off_t arg_o); 777 void hammer2_chain_moved(hammer2_chain_t *chain); 778 void hammer2_chain_modify(hammer2_trans_t *trans, 779 hammer2_chain_t **chainp, int flags); 780 hammer2_inode_data_t *hammer2_chain_modify_ip(hammer2_trans_t *trans, 781 hammer2_inode_t *ip, hammer2_chain_t **chainp, 782 int flags); 783 void hammer2_chain_resize(hammer2_trans_t *trans, hammer2_inode_t *ip, 784 hammer2_chain_t *parent, 785 hammer2_chain_t **chainp, 786 int nradix, int flags); 787 void hammer2_chain_unlock(hammer2_chain_t *chain); 788 void hammer2_chain_wait(hammer2_chain_t *chain); 789 hammer2_chain_t *hammer2_chain_get(hammer2_chain_t *parent, 790 hammer2_blockref_t *bref, int generation); 791 hammer2_chain_t *hammer2_chain_lookup_init(hammer2_chain_t *parent, int flags); 792 void hammer2_chain_lookup_done(hammer2_chain_t *parent); 793 hammer2_chain_t *hammer2_chain_lookup(hammer2_chain_t **parentp, 794 hammer2_key_t *key_nextp, 795 hammer2_key_t key_beg, hammer2_key_t key_end, 796 int *cache_indexp, int flags); 797 hammer2_chain_t *hammer2_chain_next(hammer2_chain_t **parentp, 798 hammer2_chain_t *chain, 799 hammer2_key_t *key_nextp, 800 hammer2_key_t key_beg, hammer2_key_t key_end, 801 int *cache_indexp, int flags); 802 hammer2_chain_t *hammer2_chain_scan(hammer2_chain_t *parent, 803 hammer2_chain_t *chain, 804 int *cache_indexp, int flags); 805 806 int hammer2_chain_create(hammer2_trans_t *trans, 807 hammer2_chain_t **parentp, 808 hammer2_chain_t **chainp, 809 hammer2_key_t key, int keybits, 810 int type, size_t bytes); 811 void hammer2_chain_duplicate(hammer2_trans_t *trans, hammer2_chain_t **parentp, 812 hammer2_chain_t **chainp, 813 hammer2_blockref_t *bref, int snapshot, 814 int duplicate_reason); 815 int hammer2_chain_snapshot(hammer2_trans_t *trans, hammer2_chain_t **chainp, 816 hammer2_ioc_pfs_t *pfs); 817 void hammer2_chain_delete(hammer2_trans_t *trans, hammer2_chain_t *chain, 818 int flags); 819 void hammer2_chain_delete_duplicate(hammer2_trans_t *trans, 820 hammer2_chain_t **chainp, int flags); 821 void hammer2_chain_flush(hammer2_trans_t *trans, hammer2_chain_t **chainp); 822 void hammer2_chain_commit(hammer2_trans_t *trans, hammer2_chain_t *chain); 823 void hammer2_chain_setsubmod(hammer2_trans_t *trans, hammer2_chain_t *chain); 824 825 void hammer2_chain_memory_wait(hammer2_pfsmount_t *pmp); 826 void hammer2_chain_memory_inc(hammer2_pfsmount_t *pmp); 827 void hammer2_chain_memory_wakeup(hammer2_pfsmount_t *pmp); 828 void hammer2_chain_countbrefs(hammer2_chain_t *chain, 829 hammer2_blockref_t *base, int count); 830 void hammer2_chain_layer_check_locked(hammer2_mount_t *hmp, 831 hammer2_chain_core_t *core); 832 833 int hammer2_base_find(hammer2_chain_t *chain, 834 hammer2_blockref_t *base, int count, 835 int *cache_indexp, hammer2_key_t *key_nextp, 836 hammer2_key_t key_beg, hammer2_key_t key_end); 837 void hammer2_base_delete(hammer2_trans_t *trans, hammer2_chain_t *chain, 838 hammer2_blockref_t *base, int count, 839 int *cache_indexp, hammer2_chain_t *child); 840 void hammer2_base_insert(hammer2_trans_t *trans, hammer2_chain_t *chain, 841 hammer2_blockref_t *base, int count, 842 int *cache_indexp, hammer2_chain_t *child); 843 844 /* 845 * hammer2_trans.c 846 */ 847 void hammer2_trans_init(hammer2_trans_t *trans, hammer2_pfsmount_t *pmp, 848 hammer2_mount_t *hmp, int flags); 849 void hammer2_trans_clear_invfsync(hammer2_trans_t *trans); 850 void hammer2_trans_done(hammer2_trans_t *trans); 851 852 /* 853 * hammer2_ioctl.c 854 */ 855 int hammer2_ioctl(hammer2_inode_t *ip, u_long com, void *data, 856 int fflag, struct ucred *cred); 857 858 /* 859 * hammer2_io.c 860 */ 861 hammer2_io_t *hammer2_io_getblk(hammer2_mount_t *hmp, off_t lbase, 862 int lsize, int *ownerp); 863 void hammer2_io_putblk(hammer2_io_t **diop); 864 void hammer2_io_cleanup(hammer2_mount_t *hmp, struct hammer2_io_tree *tree); 865 char *hammer2_io_data(hammer2_io_t *dio, off_t lbase); 866 int hammer2_io_new(hammer2_mount_t *hmp, off_t lbase, int lsize, 867 hammer2_io_t **diop); 868 int hammer2_io_newnz(hammer2_mount_t *hmp, off_t lbase, int lsize, 869 hammer2_io_t **diop); 870 int hammer2_io_newq(hammer2_mount_t *hmp, off_t lbase, int lsize, 871 hammer2_io_t **diop); 872 int hammer2_io_bread(hammer2_mount_t *hmp, off_t lbase, int lsize, 873 hammer2_io_t **diop); 874 void hammer2_io_breadcb(hammer2_mount_t *hmp, off_t lbase, int lsize, 875 void (*callback)(hammer2_io_t *dio, 876 hammer2_chain_t *arg_c, 877 void *arg_p, off_t arg_o), 878 hammer2_chain_t *arg_c, 879 void *arg_p, off_t arg_o); 880 void hammer2_io_bawrite(hammer2_io_t **diop); 881 void hammer2_io_bdwrite(hammer2_io_t **diop); 882 int hammer2_io_bwrite(hammer2_io_t **diop); 883 void hammer2_io_setdirty(hammer2_io_t *dio); 884 void hammer2_io_setinval(hammer2_io_t *dio, u_int bytes); 885 void hammer2_io_brelse(hammer2_io_t **diop); 886 void hammer2_io_bqrelse(hammer2_io_t **diop); 887 int hammer2_io_isdirty(hammer2_io_t *dio); 888 889 /* 890 * hammer2_msgops.c 891 */ 892 int hammer2_msg_dbg_rcvmsg(kdmsg_msg_t *msg); 893 int hammer2_msg_adhoc_input(kdmsg_msg_t *msg); 894 895 /* 896 * hammer2_vfsops.c 897 */ 898 void hammer2_clusterctl_wakeup(kdmsg_iocom_t *iocom); 899 void hammer2_volconf_update(hammer2_pfsmount_t *pmp, int index); 900 void hammer2_cluster_reconnect(hammer2_pfsmount_t *pmp, struct file *fp); 901 void hammer2_dump_chain(hammer2_chain_t *chain, int tab, int *countp); 902 void hammer2_bioq_sync(hammer2_pfsmount_t *pmp); 903 int hammer2_vfs_sync(struct mount *mp, int waitflags); 904 void hammer2_lwinprog_ref(hammer2_pfsmount_t *pmp); 905 void hammer2_lwinprog_drop(hammer2_pfsmount_t *pmp); 906 void hammer2_lwinprog_wait(hammer2_pfsmount_t *pmp); 907 908 /* 909 * hammer2_freemap.c 910 */ 911 int hammer2_freemap_alloc(hammer2_trans_t *trans, hammer2_chain_t *chain, 912 size_t bytes); 913 void hammer2_freemap_adjust(hammer2_trans_t *trans, hammer2_mount_t *hmp, 914 hammer2_blockref_t *bref, int how); 915 916 917 #endif /* !_KERNEL */ 918 #endif /* !_VFS_HAMMER2_HAMMER2_H_ */ 919