xref: /dragonfly/sys/vfs/hammer2/hammer2.h (revision 6c2b3e4e)
1 /*
2  * Copyright (c) 2011-2013 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@dragonflybsd.org>
6  * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in
16  *    the documentation and/or other materials provided with the
17  *    distribution.
18  * 3. Neither the name of The DragonFly Project nor the names of its
19  *    contributors may be used to endorse or promote products derived
20  *    from this software without specific, prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
26  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
28  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 /*
37  * This header file contains structures used internally by the HAMMER2
38  * implementation.  See hammer2_disk.h for on-disk structures.
39  */
40 
41 #ifndef _VFS_HAMMER2_HAMMER2_H_
42 #define _VFS_HAMMER2_HAMMER2_H_
43 
44 #include <sys/param.h>
45 #include <sys/types.h>
46 #include <sys/kernel.h>
47 #include <sys/conf.h>
48 #include <sys/systm.h>
49 #include <sys/tree.h>
50 #include <sys/malloc.h>
51 #include <sys/mount.h>
52 #include <sys/vnode.h>
53 #include <sys/proc.h>
54 #include <sys/mountctl.h>
55 #include <sys/priv.h>
56 #include <sys/stat.h>
57 #include <sys/thread.h>
58 #include <sys/globaldata.h>
59 #include <sys/lockf.h>
60 #include <sys/buf.h>
61 #include <sys/queue.h>
62 #include <sys/limits.h>
63 #include <sys/buf2.h>
64 #include <sys/signal2.h>
65 #include <sys/dmsg.h>
66 #include <sys/mutex.h>
67 #include <sys/mutex2.h>
68 
69 #include "hammer2_disk.h"
70 #include "hammer2_mount.h"
71 #include "hammer2_ioctl.h"
72 #include "hammer2_ccms.h"
73 
74 struct hammer2_chain;
75 struct hammer2_inode;
76 struct hammer2_mount;
77 struct hammer2_pfsmount;
78 struct hammer2_span;
79 struct hammer2_state;
80 struct hammer2_msg;
81 
82 /*
83  * The chain structure tracks a portion of the media topology from the
84  * root (volume) down.  Chains represent volumes, inodes, indirect blocks,
85  * data blocks, and freemap nodes and leafs.
86  *
87  * The chain structure can be multi-homed and its topological recursion
88  * (chain->core) can be shared amongst several chains.  Chain structures
89  * are topologically stable once placed in the in-memory topology (they
90  * don't move around).  Modifications which cross flush synchronization
91  * boundaries, renames, resizing, or any move of the chain to elsewhere
92  * in the topology is accomplished via the DELETE-DUPLICATE mechanism.
93  *
94  * DELETE-DUPLICATE allows HAMMER2 to track work across flush synchronization
95  * points without stalling the filesystem or corrupting the flush
96  * sychronization point.  When necessary a chain will be marked DELETED
97  * and a new, duplicate chain will be allocated.
98  *
99  * This mechanism necessarily requires that we be able to overload chains
100  * at any given layer in the topology.  Overloading is accomplished via a
101  * RBTREE recursion through chain->rbtree.
102  *
103  * Advantages:
104  *
105  *	(1) Fully coherent snapshots can be taken without requiring
106  *	    a pre-flush, resulting in extremely fast (sub-millisecond)
107  *	    snapshots.
108  *
109  *	(2) Multiple synchronization points can be in-flight at the same
110  *	    time, representing multiple snapshots or flushes.
111  *
112  *	(3) The algorithms needed to keep track of everything are actually
113  *	    not that complex.
114  *
115  * Special Considerations:
116  *
117  *	A chain is ref-counted on a per-chain basis, but the chain's lock
118  *	is associated with the shared chain_core and is not per-chain.
119  *
120  *	The power-of-2 nature of the media radix tree ensures that there
121  *	will be no overlaps which straddle edges.
122  */
123 RB_HEAD(hammer2_chain_tree, hammer2_chain);
124 TAILQ_HEAD(h2_flush_deferral_list, hammer2_chain);
125 TAILQ_HEAD(h2_core_list, hammer2_chain);
126 TAILQ_HEAD(h2_layer_list, hammer2_chain_layer);
127 
128 struct hammer2_chain_layer {
129 	int		good;
130 	TAILQ_ENTRY(hammer2_chain_layer) entry;
131 	struct hammer2_chain_tree rbtree;
132 	int		refs;		/* prevent destruction */
133 };
134 
135 typedef struct hammer2_chain_layer hammer2_chain_layer_t;
136 
137 struct hammer2_chain_core {
138 	int		good;
139 	struct ccms_cst	cst;
140 	struct h2_core_list ownerq;	/* all chains sharing this core */
141 	struct h2_layer_list layerq;
142 	int		live_zero;	/* blockref array opt */
143 	hammer2_tid_t	update_lo;	/* check update against parent */
144 	hammer2_tid_t	update_hi;	/* check update against parent */
145 	u_int		chain_count;	/* total chains in layers */
146 	u_int		sharecnt;
147 	u_int		flags;
148 	u_int		live_count;	/* live (not deleted) chains in tree */
149 	int		generation;	/* generation number (inserts only) */
150 };
151 
152 typedef struct hammer2_chain_core hammer2_chain_core_t;
153 
154 #define HAMMER2_CORE_UNUSED0001		0x0001
155 #define HAMMER2_CORE_COUNTEDBREFS	0x0002
156 
157 /*
158  * H2 is a copy-on-write filesystem.  In order to allow chains to allocate
159  * smaller blocks (down to 64-bytes), but improve performance and make
160  * clustered I/O possible using larger block sizes, the kernel buffer cache
161  * is abstracted via the hammer2_io structure.
162  */
163 RB_HEAD(hammer2_io_tree, hammer2_io);
164 
165 struct hammer2_io {
166 	RB_ENTRY(hammer2_io) rbnode;	/* indexed by device offset */
167 	struct spinlock spin;
168 	struct hammer2_mount *hmp;
169 	struct buf	*bp;
170 	struct bio	*bio;
171 	off_t		pbase;
172 	int		psize;
173 	void		(*callback)(struct hammer2_io *dio,
174 				    struct hammer2_chain *chain,
175 				    void *arg1, off_t arg2);
176 	struct hammer2_chain *arg_c;		/* INPROG I/O only */
177 	void		*arg_p;			/* INPROG I/O only */
178 	off_t		arg_o;			/* INPROG I/O only */
179 	int		refs;
180 	int		act;			/* activity */
181 };
182 
183 typedef struct hammer2_io hammer2_io_t;
184 
185 /*
186  * Primary chain structure keeps track of the topology in-memory.
187  */
188 struct hammer2_chain {
189 	RB_ENTRY(hammer2_chain) rbnode;		/* node */
190 	TAILQ_ENTRY(hammer2_chain) core_entry;	/* contemporary chains */
191 	hammer2_chain_layer_t	*inlayer;
192 	hammer2_blockref_t	bref;
193 	hammer2_chain_core_t	*core;
194 	hammer2_chain_core_t	*above;
195 	struct hammer2_state	*state;		/* if active cache msg */
196 	struct hammer2_mount	*hmp;
197 	struct hammer2_pfsmount	*pmp;		/* can be NULL */
198 
199 	hammer2_tid_t	modify_tid;		/* snapshot/flush filter */
200 	hammer2_tid_t	delete_tid;
201 	hammer2_key_t   data_count;		/* delta's to apply */
202 	hammer2_key_t   inode_count;		/* delta's to apply */
203 	hammer2_io_t	*dio;			/* physical data buffer */
204 	u_int		bytes;			/* physical data size */
205 	u_int		flags;
206 	u_int		refs;
207 	u_int		lockcnt;
208 	hammer2_media_data_t *data;		/* data pointer shortcut */
209 	TAILQ_ENTRY(hammer2_chain) flush_node;	/* flush deferral list */
210 
211 	int		inode_reason;
212 };
213 
214 typedef struct hammer2_chain hammer2_chain_t;
215 
216 int hammer2_chain_cmp(hammer2_chain_t *chain1, hammer2_chain_t *chain2);
217 RB_PROTOTYPE(hammer2_chain_tree, hammer2_chain, rbnode, hammer2_chain_cmp);
218 
219 /*
220  * Special notes on flags:
221  *
222  * INITIAL - This flag allows a chain to be created and for storage to
223  *	     be allocated without having to immediately instantiate the
224  *	     related buffer.  The data is assumed to be all-zeros.  It
225  *	     is primarily used for indirect blocks.
226  *
227  * MOVED   - A modified chain becomes MOVED after it flushes.  A chain
228  *	     can also become MOVED if it is moved within the topology
229  *	     (even if not modified).
230  *
231  * MODIFIED- The chain's media data has been modified.
232  */
233 #define HAMMER2_CHAIN_MODIFIED		0x00000001	/* dirty chain data */
234 #define HAMMER2_CHAIN_ALLOCATED		0x00000002	/* kmalloc'd chain */
235 #define HAMMER2_CHAIN_UNUSED0004	0x00000004
236 #define HAMMER2_CHAIN_FORCECOW		0x00000008	/* force copy-on-wr */
237 #define HAMMER2_CHAIN_DELETED		0x00000010	/* deleted chain */
238 #define HAMMER2_CHAIN_INITIAL		0x00000020	/* initial create */
239 #define HAMMER2_CHAIN_FLUSHED		0x00000040	/* blktable updated */
240 #define HAMMER2_CHAIN_MOVED		0x00000080	/* bref changed */
241 #define HAMMER2_CHAIN_IOFLUSH		0x00000100	/* bawrite on put */
242 #define HAMMER2_CHAIN_DEFERRED		0x00000200	/* on a deferral list */
243 #define HAMMER2_CHAIN_DESTROYED		0x00000400	/* destroying inode */
244 #define HAMMER2_CHAIN_VOLUMESYNC	0x00000800	/* needs volume sync */
245 #define HAMMER2_CHAIN_UNUSED01000	0x00001000
246 #define HAMMER2_CHAIN_MOUNTED		0x00002000	/* PFS is mounted */
247 #define HAMMER2_CHAIN_ONRBTREE		0x00004000	/* on parent RB tree */
248 #define HAMMER2_CHAIN_SNAPSHOT		0x00008000	/* snapshot special */
249 #define HAMMER2_CHAIN_EMBEDDED		0x00010000	/* embedded data */
250 #define HAMMER2_CHAIN_RELEASE		0x00020000	/* don't keep around */
251 #define HAMMER2_CHAIN_UNUSED40000	0x00040000
252 #define HAMMER2_CHAIN_UNUSED80000	0x00080000
253 #define HAMMER2_CHAIN_DUPLICATED	0x00100000	/* fwd delete-dup */
254 #define HAMMER2_CHAIN_PFSROOT		0x00200000	/* in pfs->cluster */
255 
256 /*
257  * Flags passed to hammer2_chain_lookup() and hammer2_chain_next()
258  *
259  * NOTE: MATCHIND allows an indirect block / freemap node to be returned
260  *	 when the passed key range matches the radix.  Remember that key_end
261  *	 is inclusive (e.g. {0x000,0xFFF}, not {0x000,0x1000}).
262  */
263 #define HAMMER2_LOOKUP_NOLOCK		0x00000001	/* ref only */
264 #define HAMMER2_LOOKUP_NODATA		0x00000002	/* data left NULL */
265 #define HAMMER2_LOOKUP_SHARED		0x00000100
266 #define HAMMER2_LOOKUP_MATCHIND		0x00000200	/* return all chains */
267 #define HAMMER2_LOOKUP_UNUSED0400	0x00000400
268 #define HAMMER2_LOOKUP_ALWAYS		0x00000800	/* resolve data */
269 
270 /*
271  * Flags passed to hammer2_chain_modify() and hammer2_chain_resize()
272  *
273  * NOTE: OPTDATA allows us to avoid instantiating buffers for INDIRECT
274  *	 blocks in the INITIAL-create state.
275  */
276 #define HAMMER2_MODIFY_OPTDATA		0x00000002	/* data can be NULL */
277 #define HAMMER2_MODIFY_NO_MODIFY_TID	0x00000004
278 #define HAMMER2_MODIFY_ASSERTNOCOPY	0x00000008	/* assert no del-dup */
279 #define HAMMER2_MODIFY_NOREALLOC	0x00000010
280 #define HAMMER2_MODIFY_INPLACE		0x00000020	/* don't del-dup */
281 
282 /*
283  * Flags passed to hammer2_chain_lock()
284  */
285 #define HAMMER2_RESOLVE_NEVER		1
286 #define HAMMER2_RESOLVE_MAYBE		2
287 #define HAMMER2_RESOLVE_ALWAYS		3
288 #define HAMMER2_RESOLVE_MASK		0x0F
289 
290 #define HAMMER2_RESOLVE_SHARED		0x10	/* request shared lock */
291 #define HAMMER2_RESOLVE_NOREF		0x20	/* already ref'd on lock */
292 
293 /*
294  * Flags passed to hammer2_chain_delete()
295  */
296 #define HAMMER2_DELETE_UNUSED0001	0x0001
297 
298 /*
299  * Flags passed to hammer2_chain_delete_duplicate()
300  */
301 #define HAMMER2_DELDUP_RECORE		0x0001
302 
303 /*
304  * Cluster different types of storage together for allocations
305  */
306 #define HAMMER2_FREECACHE_INODE		0
307 #define HAMMER2_FREECACHE_INDIR		1
308 #define HAMMER2_FREECACHE_DATA		2
309 #define HAMMER2_FREECACHE_UNUSED3	3
310 #define HAMMER2_FREECACHE_TYPES		4
311 
312 /*
313  * hammer2_freemap_alloc() block preference
314  */
315 #define HAMMER2_OFF_NOPREF		((hammer2_off_t)-1)
316 
317 /*
318  * BMAP read-ahead maximum parameters
319  */
320 #define HAMMER2_BMAP_COUNT		16	/* max bmap read-ahead */
321 #define HAMMER2_BMAP_BYTES		(HAMMER2_PBUFSIZE * HAMMER2_BMAP_COUNT)
322 
323 /*
324  * Misc
325  */
326 #define HAMMER2_FLUSH_DEPTH_LIMIT	10	/* stack recursion limit */
327 
328 /*
329  * hammer2_freemap_adjust()
330  */
331 #define HAMMER2_FREEMAP_DORECOVER	1
332 #define HAMMER2_FREEMAP_DOMAYFREE	2
333 #define HAMMER2_FREEMAP_DOREALFREE	3
334 
335 /*
336  * HAMMER2 IN-MEMORY CACHE OF MEDIA STRUCTURES
337  *
338  * There is an in-memory representation of all on-media data structure.
339  * Basically everything is represented by a hammer2_chain structure
340  * in-memory and other higher-level structures map to chains.
341  *
342  * A great deal of data is accessed simply via its buffer cache buffer,
343  * which is mapped for the duration of the chain's lock.  However, because
344  * chains may represent blocks smaller than the 16KB minimum we impose
345  * on buffer cache buffers, we cannot hold related buffer cache buffers
346  * locked for smaller blocks.  In these situations we kmalloc() a copy
347  * of the block.
348  *
349  * When modifications are made to a chain a new filesystem block must be
350  * allocated.  Multiple modifications do not necessarily allocate new
351  * blocks.  However, when a flush occurs a flush synchronization point
352  * is created and any new modifications made after this point will allocate
353  * a new block even if the chain is already in a modified state.
354  *
355  * The in-memory representation may remain cached (for example in order to
356  * placemark clustering locks) even after the related data has been
357  * detached.
358  *
359  *				CORE SHARING
360  *
361  * In order to support concurrent flushes a flush synchronization point
362  * is created represented by a transaction id.  Among other things,
363  * operations may move filesystem objects from one part of the topology
364  * to another (for example, if you rename a file or when indirect blocks
365  * are created or destroyed, and a few other things).  When this occurs
366  * across a flush synchronization point the flusher needs to be able to
367  * recurse down BOTH the 'before' version of the topology and the 'after'
368  * version.
369  *
370  * To facilitate this modifications to chains do what is called a
371  * DELETE-DUPLICATE operation.  Chains are not actually moved in-memory.
372  * Instead the chain we wish to move is deleted and a new chain is created
373  * at the target location in the topology.  ANY SUBCHAINS PLACED UNDER THE
374  * CHAIN BEING MOVED HAVE TO EXIST IN BOTH PLACES.  To make this work
375  * all sub-chains are managed by the hammer2_chain_core structure.  This
376  * structure can be multi-homed, meaning that it can have more than one
377  * chain as its parent.  When a chain is delete-duplicated the chain's core
378  * becomes shared under both the old and new chain.
379  *
380  *				STALE CHAINS
381  *
382  * When a chain is delete-duplicated the old chain typically becomes stale.
383  * This is detected via the HAMMER2_CHAIN_DUPLICATED flag in chain->flags.
384  * To avoid executing live filesystem operations on stale chains, the inode
385  * locking code will follow stale chains via core->ownerq until it finds
386  * the live chain.  The lock prevents ripups by other threads.  Lookups
387  * must properly order locking operations to prevent other threads from
388  * racing the lookup operation and will also follow stale chains when
389  * required.
390  */
391 
392 RB_HEAD(hammer2_inode_tree, hammer2_inode);
393 
394 /*
395  * A hammer2 inode.
396  *
397  * NOTE: The inode's attribute CST which is also used to lock the inode
398  *	 is embedded in the chain (chain.cst) and aliased w/ attr_cst.
399  */
400 struct hammer2_inode {
401 	RB_ENTRY(hammer2_inode) rbnode;		/* inumber lookup (HL) */
402 	ccms_cst_t		topo_cst;	/* directory topology cst */
403 	struct hammer2_pfsmount	*pmp;		/* PFS mount */
404 	struct hammer2_inode	*pip;		/* parent inode */
405 	struct vnode		*vp;
406 	hammer2_chain_t		*chain;		/* NOTE: rehomed on rename */
407 	struct lockf		advlock;
408 	hammer2_tid_t		inum;
409 	u_int			flags;
410 	u_int			refs;		/* +vpref, +flushref */
411 	uint8_t			comp_heuristic;
412 	hammer2_off_t		size;
413 	uint64_t		mtime;
414 };
415 
416 typedef struct hammer2_inode hammer2_inode_t;
417 
418 #define HAMMER2_INODE_MODIFIED		0x0001
419 #define HAMMER2_INODE_SROOT		0x0002	/* kmalloc special case */
420 #define HAMMER2_INODE_RENAME_INPROG	0x0004
421 #define HAMMER2_INODE_ONRBTREE		0x0008
422 #define HAMMER2_INODE_RESIZED		0x0010
423 #define HAMMER2_INODE_MTIME		0x0020
424 
425 int hammer2_inode_cmp(hammer2_inode_t *ip1, hammer2_inode_t *ip2);
426 RB_PROTOTYPE2(hammer2_inode_tree, hammer2_inode, rbnode, hammer2_inode_cmp,
427 		hammer2_tid_t);
428 
429 /*
430  * A hammer2 transaction and flush sequencing structure.
431  *
432  * This global structure is tied into hammer2_mount and is used
433  * to sequence modifying operations and flushes.
434  *
435  * (a) Any modifying operations with sync_tid >= flush_tid will stall until
436  *     all modifying operating with sync_tid < flush_tid complete.
437  *
438  *     The flush related to flush_tid stalls until all modifying operations
439  *     with sync_tid < flush_tid complete.
440  *
441  * (b) Once unstalled, modifying operations with sync_tid > flush_tid are
442  *     allowed to run.  All modifications cause modify/duplicate operations
443  *     to occur on the related chains.  Note that most INDIRECT blocks will
444  *     be unaffected because the modifications just overload the RBTREE
445  *     structurally instead of actually modifying the indirect blocks.
446  *
447  * (c) The actual flush unstalls and RUNS CONCURRENTLY with (b), but only
448  *     utilizes the chain structures with sync_tid <= flush_tid.  The
449  *     flush will modify related indirect blocks and inodes in-place
450  *     (rather than duplicate) since the adjustments are compatible with
451  *     (b)'s RBTREE overloading
452  *
453  *     SPECIAL NOTE:  Inode modifications have to also propagate along any
454  *		      modify/duplicate chains.  File writes detect the flush
455  *		      and force out the conflicting buffer cache buffer(s)
456  *		      before reusing them.
457  *
458  * (d) Snapshots can be made instantly but must be flushed and disconnected
459  *     from their duplicative source before they can be mounted.  This is
460  *     because while H2's on-media structure supports forks, its in-memory
461  *     structure only supports very simple forking for background flushing
462  *     purposes.
463  *
464  * TODO: Flush merging.  When fsync() is called on multiple discrete files
465  *	 concurrently there is no reason to stall the second fsync.
466  *	 The final flush that reaches to root can cover both fsync()s.
467  *
468  *     The chains typically terminate as they fly onto the disk.  The flush
469  *     ultimately reaches the volume header.
470  */
471 struct hammer2_trans {
472 	TAILQ_ENTRY(hammer2_trans) entry;
473 	struct hammer2_pfsmount *pmp;		/* might be NULL */
474 	struct hammer2_mount	*hmp_single;	/* if single-targetted */
475 	hammer2_tid_t		sync_tid;
476 	hammer2_tid_t		real_tid;
477 	hammer2_tid_t		inode_tid;
478 	thread_t		td;		/* pointer */
479 	int			flags;
480 	int			blocked;
481 	uint8_t			inodes_created;
482 	uint8_t			dummy[7];
483 };
484 
485 typedef struct hammer2_trans hammer2_trans_t;
486 
487 #define HAMMER2_TRANS_ISFLUSH		0x0001	/* formal flush */
488 #define HAMMER2_TRANS_UNUSED0002	0x0002
489 #define HAMMER2_TRANS_BUFCACHE		0x0004	/* from bioq strategy write */
490 #define HAMMER2_TRANS_NEWINODE		0x0008	/* caller allocating inode */
491 #define HAMMER2_TRANS_ISALLOCATING	0x0010	/* in allocator */
492 
493 #define HAMMER2_FREEMAP_HEUR_NRADIX	4	/* pwr 2 PBUFRADIX-MINIORADIX */
494 #define HAMMER2_FREEMAP_HEUR_TYPES	8
495 #define HAMMER2_FREEMAP_HEUR		(HAMMER2_FREEMAP_HEUR_NRADIX * \
496 					 HAMMER2_FREEMAP_HEUR_TYPES)
497 
498 /*
499  * Global (per device) mount structure for device (aka vp->v_mount->hmp)
500  */
501 TAILQ_HEAD(hammer2_trans_queue, hammer2_trans);
502 
503 struct hammer2_mount {
504 	struct vnode	*devvp;		/* device vnode */
505 	int		ronly;		/* read-only mount */
506 	int		pmp_count;	/* PFS mounts backed by us */
507 	TAILQ_ENTRY(hammer2_mount) mntentry; /* hammer2_mntlist */
508 
509 	struct malloc_type *mchain;
510 	int		nipstacks;
511 	int		maxipstacks;
512 	struct spinlock	io_spin;	/* iotree access */
513 	struct hammer2_io_tree iotree;
514 	int		iofree_count;
515 	hammer2_chain_t vchain;		/* anchor chain (topology) */
516 	hammer2_chain_t fchain;		/* anchor chain (freemap) */
517 	hammer2_inode_t	*sroot;		/* super-root localized to media */
518 	struct lock	alloclk;	/* lockmgr lock */
519 	struct lock	voldatalk;	/* lockmgr lock */
520 	struct hammer2_trans_queue transq; /* all in-progress transactions */
521 	hammer2_off_t	heur_freemap[HAMMER2_FREEMAP_HEUR];
522 	int		flushcnt;	/* #of flush trans on the list */
523 
524 	int		volhdrno;	/* last volhdrno written */
525 	hammer2_volume_data_t voldata;
526 	hammer2_volume_data_t volsync;	/* synchronized voldata */
527 };
528 
529 typedef struct hammer2_mount hammer2_mount_t;
530 
531 /*
532  * HAMMER2 cluster - a device/root associated with a PFS.
533  *
534  * A PFS may have several hammer2_cluster's associated with it.
535  */
536 #define HAMMER2_MAXCLUSTER	8
537 
538 struct hammer2_cluster {
539 	int			nchains;
540 	int			status;
541 	hammer2_chain_t		*chains[HAMMER2_MAXCLUSTER];
542 };
543 
544 typedef struct hammer2_cluster hammer2_cluster_t;
545 
546 /*
547  * HAMMER2 PFS mount point structure (aka vp->v_mount->mnt_data).
548  * This has a 1:1 correspondence to struct mount (note that the
549  * hammer2_mount structure has a N:1 correspondence).
550  *
551  * This structure represents a cluster mount and not necessarily a
552  * PFS under a specific device mount (HMP).  The distinction is important
553  * because the elements backing a cluster mount can change on the fly.
554  *
555  * Usually the first element under the cluster represents the original
556  * user-requested mount that bootstraps the whole mess.  In significant
557  * setups the original is usually just a read-only media image (or
558  * representitive file) that simply contains a bootstrap volume header
559  * listing the configuration.
560  */
561 struct hammer2_pfsmount {
562 	struct mount		*mp;
563 	hammer2_cluster_t	cluster;
564 	hammer2_inode_t		*iroot;		/* PFS root inode */
565 	hammer2_off_t		inode_count;	/* copy of inode_count */
566 	ccms_domain_t		ccms_dom;
567 	struct netexport	export;		/* nfs export */
568 	int			ronly;		/* read-only mount */
569 	struct malloc_type	*minode;
570 	struct malloc_type	*mmsg;
571 	kdmsg_iocom_t		iocom;
572 	struct spinlock		inum_spin;	/* inumber lookup */
573 	struct hammer2_inode_tree inum_tree;
574 	long			inmem_inodes;
575 	long			inmem_dirty_chains;
576 	int			count_lwinprog;	/* logical write in prog */
577 	thread_t		wthread_td;	/* write thread td */
578 	struct bio_queue_head	wthread_bioq;	/* logical buffer bioq */
579 	struct mtx		wthread_mtx;	/* interlock */
580 	int			wthread_destroy;/* termination sequencing */
581 };
582 
583 typedef struct hammer2_pfsmount hammer2_pfsmount_t;
584 
585 #define HAMMER2_DIRTYCHAIN_WAITING	0x80000000
586 #define HAMMER2_DIRTYCHAIN_MASK		0x7FFFFFFF
587 
588 #define HAMMER2_LWINPROG_WAITING	0x80000000
589 #define HAMMER2_LWINPROG_MASK		0x7FFFFFFF
590 
591 #if defined(_KERNEL)
592 
593 MALLOC_DECLARE(M_HAMMER2);
594 
595 #define VTOI(vp)	((hammer2_inode_t *)(vp)->v_data)
596 #define ITOV(ip)	((ip)->vp)
597 
598 /*
599  * Currently locked chains retain the locked buffer cache buffer for
600  * indirect blocks, and indirect blocks can be one of two sizes.  The
601  * device buffer has to match the case to avoid deadlocking recursive
602  * chains that might otherwise try to access different offsets within
603  * the same device buffer.
604  */
605 static __inline
606 int
607 hammer2_devblkradix(int radix)
608 {
609 	if (radix <= HAMMER2_LBUFRADIX) {
610 		return (HAMMER2_LBUFRADIX);
611 	} else {
612 		return (HAMMER2_PBUFRADIX);
613 	}
614 }
615 
616 static __inline
617 size_t
618 hammer2_devblksize(size_t bytes)
619 {
620 	if (bytes <= HAMMER2_LBUFSIZE) {
621 		return(HAMMER2_LBUFSIZE);
622 	} else {
623 		KKASSERT(bytes <= HAMMER2_PBUFSIZE &&
624 			 (bytes ^ (bytes - 1)) == ((bytes << 1) - 1));
625 		return (HAMMER2_PBUFSIZE);
626 	}
627 }
628 
629 
630 static __inline
631 hammer2_pfsmount_t *
632 MPTOPMP(struct mount *mp)
633 {
634 	return ((hammer2_pfsmount_t *)mp->mnt_data);
635 }
636 
637 extern struct vop_ops hammer2_vnode_vops;
638 extern struct vop_ops hammer2_spec_vops;
639 extern struct vop_ops hammer2_fifo_vops;
640 
641 extern int hammer2_debug;
642 extern int hammer2_cluster_enable;
643 extern int hammer2_hardlink_enable;
644 extern int hammer2_flush_pipe;
645 extern long hammer2_limit_dirty_chains;
646 extern long hammer2_iod_file_read;
647 extern long hammer2_iod_meta_read;
648 extern long hammer2_iod_indr_read;
649 extern long hammer2_iod_fmap_read;
650 extern long hammer2_iod_volu_read;
651 extern long hammer2_iod_file_write;
652 extern long hammer2_iod_meta_write;
653 extern long hammer2_iod_indr_write;
654 extern long hammer2_iod_fmap_write;
655 extern long hammer2_iod_volu_write;
656 extern long hammer2_ioa_file_read;
657 extern long hammer2_ioa_meta_read;
658 extern long hammer2_ioa_indr_read;
659 extern long hammer2_ioa_fmap_read;
660 extern long hammer2_ioa_volu_read;
661 extern long hammer2_ioa_file_write;
662 extern long hammer2_ioa_meta_write;
663 extern long hammer2_ioa_indr_write;
664 extern long hammer2_ioa_fmap_write;
665 extern long hammer2_ioa_volu_write;
666 
667 extern struct objcache *cache_buffer_read;
668 extern struct objcache *cache_buffer_write;
669 
670 extern int destroy;
671 extern int write_thread_wakeup;
672 
673 extern mtx_t thread_protect;
674 
675 /*
676  * hammer2_subr.c
677  */
678 #define hammer2_icrc32(buf, size)	iscsi_crc32((buf), (size))
679 #define hammer2_icrc32c(buf, size, crc)	iscsi_crc32_ext((buf), (size), (crc))
680 
681 hammer2_chain_t *hammer2_inode_lock_ex(hammer2_inode_t *ip);
682 hammer2_chain_t *hammer2_inode_lock_sh(hammer2_inode_t *ip);
683 void hammer2_inode_unlock_ex(hammer2_inode_t *ip, hammer2_chain_t *chain);
684 void hammer2_inode_unlock_sh(hammer2_inode_t *ip, hammer2_chain_t *chain);
685 void hammer2_chain_refactor(hammer2_chain_t **chainp);
686 void hammer2_voldata_lock(hammer2_mount_t *hmp);
687 void hammer2_voldata_unlock(hammer2_mount_t *hmp, int modify);
688 ccms_state_t hammer2_inode_lock_temp_release(hammer2_inode_t *ip);
689 void hammer2_inode_lock_temp_restore(hammer2_inode_t *ip, ccms_state_t ostate);
690 ccms_state_t hammer2_inode_lock_upgrade(hammer2_inode_t *ip);
691 void hammer2_inode_lock_downgrade(hammer2_inode_t *ip, ccms_state_t ostate);
692 
693 void hammer2_mount_exlock(hammer2_mount_t *hmp);
694 void hammer2_mount_shlock(hammer2_mount_t *hmp);
695 void hammer2_mount_unlock(hammer2_mount_t *hmp);
696 
697 int hammer2_get_dtype(hammer2_chain_t *chain);
698 int hammer2_get_vtype(hammer2_chain_t *chain);
699 u_int8_t hammer2_get_obj_type(enum vtype vtype);
700 void hammer2_time_to_timespec(u_int64_t xtime, struct timespec *ts);
701 u_int64_t hammer2_timespec_to_time(struct timespec *ts);
702 u_int32_t hammer2_to_unix_xid(uuid_t *uuid);
703 void hammer2_guid_to_uuid(uuid_t *uuid, u_int32_t guid);
704 
705 hammer2_key_t hammer2_dirhash(const unsigned char *name, size_t len);
706 int hammer2_getradix(size_t bytes);
707 
708 int hammer2_calc_logical(hammer2_inode_t *ip, hammer2_off_t uoff,
709 			hammer2_key_t *lbasep, hammer2_key_t *leofp);
710 int hammer2_calc_physical(hammer2_inode_t *ip, hammer2_key_t lbase);
711 void hammer2_update_time(uint64_t *timep);
712 
713 /*
714  * hammer2_inode.c
715  */
716 struct vnode *hammer2_igetv(hammer2_inode_t *ip, int *errorp);
717 
718 void hammer2_inode_lock_nlinks(hammer2_inode_t *ip);
719 void hammer2_inode_unlock_nlinks(hammer2_inode_t *ip);
720 hammer2_inode_t *hammer2_inode_lookup(hammer2_pfsmount_t *pmp,
721 			hammer2_tid_t inum);
722 hammer2_inode_t *hammer2_inode_get(hammer2_pfsmount_t *pmp,
723 			hammer2_inode_t *dip, hammer2_chain_t *chain);
724 void hammer2_inode_free(hammer2_inode_t *ip);
725 void hammer2_inode_ref(hammer2_inode_t *ip);
726 void hammer2_inode_drop(hammer2_inode_t *ip);
727 void hammer2_inode_repoint(hammer2_inode_t *ip, hammer2_inode_t *pip,
728 			hammer2_chain_t *chain);
729 
730 hammer2_inode_t *hammer2_inode_create(hammer2_trans_t *trans,
731 			hammer2_inode_t *dip,
732 			struct vattr *vap, struct ucred *cred,
733 			const uint8_t *name, size_t name_len,
734 			hammer2_chain_t **chainp, int *errorp);
735 int hammer2_inode_connect(hammer2_trans_t *trans, int hlink,
736 			hammer2_inode_t *dip, hammer2_chain_t **chainp,
737 			const uint8_t *name, size_t name_len);
738 hammer2_inode_t *hammer2_inode_common_parent(hammer2_inode_t *fdip,
739 			hammer2_inode_t *tdip);
740 void hammer2_inode_fsync(hammer2_trans_t *trans, hammer2_inode_t *ip,
741 			hammer2_chain_t **parentp);
742 int hammer2_unlink_file(hammer2_trans_t *trans, hammer2_inode_t *dip,
743 			const uint8_t *name, size_t name_len, int isdir,
744 			int *hlinkp);
745 int hammer2_hardlink_consolidate(hammer2_trans_t *trans, hammer2_inode_t *ip,
746 			hammer2_chain_t **chainp,
747 			hammer2_inode_t *tdip, int linkcnt);
748 int hammer2_hardlink_deconsolidate(hammer2_trans_t *trans, hammer2_inode_t *dip,
749 			hammer2_chain_t **chainp, hammer2_chain_t **ochainp);
750 int hammer2_hardlink_find(hammer2_inode_t *dip,
751 			hammer2_chain_t **chainp, hammer2_chain_t **ochainp);
752 
753 /*
754  * hammer2_chain.c
755  */
756 void hammer2_modify_volume(hammer2_mount_t *hmp);
757 hammer2_chain_t *hammer2_chain_alloc(hammer2_mount_t *hmp,
758 				hammer2_pfsmount_t *pmp,
759 				hammer2_trans_t *trans,
760 				hammer2_blockref_t *bref);
761 void hammer2_chain_core_alloc(hammer2_trans_t *trans, hammer2_chain_t *nchain,
762 				hammer2_chain_t *ochain);
763 void hammer2_chain_ref(hammer2_chain_t *chain);
764 void hammer2_chain_drop(hammer2_chain_t *chain);
765 int hammer2_chain_lock(hammer2_chain_t *chain, int how);
766 void hammer2_chain_load_async(hammer2_chain_t *chain,
767 				void (*func)(hammer2_io_t *dio,
768 					     hammer2_chain_t *chain,
769 					     void *arg_p, off_t arg_o),
770 				void *arg_p, off_t arg_o);
771 void hammer2_chain_moved(hammer2_chain_t *chain);
772 void hammer2_chain_modify(hammer2_trans_t *trans,
773 				hammer2_chain_t **chainp, int flags);
774 hammer2_inode_data_t *hammer2_chain_modify_ip(hammer2_trans_t *trans,
775 				hammer2_inode_t *ip, hammer2_chain_t **chainp,
776 				int flags);
777 void hammer2_chain_resize(hammer2_trans_t *trans, hammer2_inode_t *ip,
778 				hammer2_chain_t *parent,
779 				hammer2_chain_t **chainp,
780 				int nradix, int flags);
781 void hammer2_chain_unlock(hammer2_chain_t *chain);
782 void hammer2_chain_wait(hammer2_chain_t *chain);
783 hammer2_chain_t *hammer2_chain_get(hammer2_chain_t *parent,
784 				hammer2_blockref_t *bref, int generation);
785 hammer2_chain_t *hammer2_chain_lookup_init(hammer2_chain_t *parent, int flags);
786 void hammer2_chain_lookup_done(hammer2_chain_t *parent);
787 hammer2_chain_t *hammer2_chain_lookup(hammer2_chain_t **parentp,
788 				hammer2_key_t *key_nextp,
789 				hammer2_key_t key_beg, hammer2_key_t key_end,
790 				int *cache_indexp, int flags);
791 hammer2_chain_t *hammer2_chain_next(hammer2_chain_t **parentp,
792 				hammer2_chain_t *chain,
793 				hammer2_key_t *key_nextp,
794 				hammer2_key_t key_beg, hammer2_key_t key_end,
795 				int *cache_indexp, int flags);
796 hammer2_chain_t *hammer2_chain_scan(hammer2_chain_t *parent,
797 				hammer2_chain_t *chain,
798 				int *cache_indexp, int flags);
799 
800 int hammer2_chain_create(hammer2_trans_t *trans,
801 				hammer2_chain_t **parentp,
802 				hammer2_chain_t **chainp,
803 				hammer2_key_t key, int keybits,
804 				int type, size_t bytes);
805 void hammer2_chain_duplicate(hammer2_trans_t *trans, hammer2_chain_t **parentp,
806 				hammer2_chain_t **chainp,
807 				hammer2_blockref_t *bref, int snapshot,
808 				int duplicate_reason);
809 int hammer2_chain_snapshot(hammer2_trans_t *trans, hammer2_chain_t **chainp,
810 				hammer2_ioc_pfs_t *pfs);
811 void hammer2_chain_delete(hammer2_trans_t *trans, hammer2_chain_t *chain,
812 				int flags);
813 void hammer2_chain_delete_duplicate(hammer2_trans_t *trans,
814 				hammer2_chain_t **chainp, int flags);
815 void hammer2_chain_flush(hammer2_trans_t *trans, hammer2_chain_t **chainp);
816 void hammer2_chain_commit(hammer2_trans_t *trans, hammer2_chain_t *chain);
817 void hammer2_chain_setsubmod(hammer2_trans_t *trans, hammer2_chain_t *chain);
818 
819 void hammer2_chain_memory_wait(hammer2_pfsmount_t *pmp);
820 void hammer2_chain_memory_inc(hammer2_pfsmount_t *pmp);
821 void hammer2_chain_memory_wakeup(hammer2_pfsmount_t *pmp);
822 void hammer2_chain_countbrefs(hammer2_chain_t *chain,
823 				hammer2_blockref_t *base, int count);
824 void hammer2_chain_layer_check_locked(hammer2_mount_t *hmp,
825 				hammer2_chain_core_t *core);
826 
827 int hammer2_base_find(hammer2_chain_t *chain,
828 				hammer2_blockref_t *base, int count,
829 				int *cache_indexp, hammer2_key_t *key_nextp,
830 				hammer2_key_t key_beg, hammer2_key_t key_end);
831 void hammer2_base_delete(hammer2_trans_t *trans, hammer2_chain_t *chain,
832 				hammer2_blockref_t *base, int count,
833 				int *cache_indexp, hammer2_chain_t *child);
834 void hammer2_base_insert(hammer2_trans_t *trans, hammer2_chain_t *chain,
835 				hammer2_blockref_t *base, int count,
836 				int *cache_indexp, hammer2_chain_t *child);
837 
838 /*
839  * hammer2_trans.c
840  */
841 void hammer2_trans_init(hammer2_trans_t *trans, hammer2_pfsmount_t *pmp,
842 				hammer2_mount_t *hmp, int flags);
843 void hammer2_trans_clear_invfsync(hammer2_trans_t *trans);
844 void hammer2_trans_done(hammer2_trans_t *trans);
845 
846 /*
847  * hammer2_ioctl.c
848  */
849 int hammer2_ioctl(hammer2_inode_t *ip, u_long com, void *data,
850 				int fflag, struct ucred *cred);
851 
852 /*
853  * hammer2_io.c
854  */
855 hammer2_io_t *hammer2_io_getblk(hammer2_mount_t *hmp, off_t lbase,
856 				int lsize, int *ownerp);
857 void hammer2_io_putblk(hammer2_io_t **diop);
858 void hammer2_io_cleanup(hammer2_mount_t *hmp, struct hammer2_io_tree *tree);
859 char *hammer2_io_data(hammer2_io_t *dio, off_t lbase);
860 int hammer2_io_new(hammer2_mount_t *hmp, off_t lbase, int lsize,
861 				hammer2_io_t **diop);
862 int hammer2_io_newnz(hammer2_mount_t *hmp, off_t lbase, int lsize,
863 				hammer2_io_t **diop);
864 int hammer2_io_newq(hammer2_mount_t *hmp, off_t lbase, int lsize,
865 				hammer2_io_t **diop);
866 int hammer2_io_bread(hammer2_mount_t *hmp, off_t lbase, int lsize,
867 				hammer2_io_t **diop);
868 void hammer2_io_breadcb(hammer2_mount_t *hmp, off_t lbase, int lsize,
869 				void (*callback)(hammer2_io_t *dio,
870 						 hammer2_chain_t *arg_c,
871 						 void *arg_p, off_t arg_o),
872 				hammer2_chain_t *arg_c,
873 				void *arg_p, off_t arg_o);
874 void hammer2_io_bawrite(hammer2_io_t **diop);
875 void hammer2_io_bdwrite(hammer2_io_t **diop);
876 int hammer2_io_bwrite(hammer2_io_t **diop);
877 void hammer2_io_setdirty(hammer2_io_t *dio);
878 void hammer2_io_setinval(hammer2_io_t *dio, u_int bytes);
879 void hammer2_io_brelse(hammer2_io_t **diop);
880 void hammer2_io_bqrelse(hammer2_io_t **diop);
881 int hammer2_io_isdirty(hammer2_io_t *dio);
882 
883 /*
884  * hammer2_msgops.c
885  */
886 int hammer2_msg_dbg_rcvmsg(kdmsg_msg_t *msg);
887 int hammer2_msg_adhoc_input(kdmsg_msg_t *msg);
888 
889 /*
890  * hammer2_vfsops.c
891  */
892 void hammer2_clusterctl_wakeup(kdmsg_iocom_t *iocom);
893 void hammer2_volconf_update(hammer2_pfsmount_t *pmp, int index);
894 void hammer2_cluster_reconnect(hammer2_pfsmount_t *pmp, struct file *fp);
895 void hammer2_dump_chain(hammer2_chain_t *chain, int tab, int *countp);
896 void hammer2_bioq_sync(hammer2_pfsmount_t *pmp);
897 int hammer2_vfs_sync(struct mount *mp, int waitflags);
898 void hammer2_lwinprog_ref(hammer2_pfsmount_t *pmp);
899 void hammer2_lwinprog_drop(hammer2_pfsmount_t *pmp);
900 void hammer2_lwinprog_wait(hammer2_pfsmount_t *pmp);
901 
902 /*
903  * hammer2_freemap.c
904  */
905 int hammer2_freemap_alloc(hammer2_trans_t *trans, hammer2_chain_t *chain,
906 				size_t bytes);
907 void hammer2_freemap_adjust(hammer2_trans_t *trans, hammer2_mount_t *hmp,
908 				hammer2_blockref_t *bref, int how);
909 
910 
911 #endif /* !_KERNEL */
912 #endif /* !_VFS_HAMMER2_HAMMER2_H_ */
913