1 /* $NetBSD: wapbl.h,v 1.17 2016/01/23 16:02:09 christos Exp $ */
2
3 /*-
4 * Copyright (c) 2003,2008 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Wasabi Systems, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #ifndef _SYS_WAPBL_H
33 #define _SYS_WAPBL_H
34
35 #include <sys/mutex.h>
36
37 #if defined(_KERNEL) || defined(_KMEMUSER)
38 #include <miscfs/specfs/specdev.h>
39 #endif
40
41 /* This header file describes the api and data structures for
42 * write ahead physical block logging (WAPBL) support.
43 */
44
45 #if defined(_KERNEL_OPT)
46 #include "opt_wapbl.h"
47 #endif
48
49 #ifdef WAPBL_DEBUG
50 #ifndef WAPBL_DEBUG_PRINT
51 #define WAPBL_DEBUG_PRINT (WAPBL_PRINT_REPLAY | WAPBL_PRINT_OPEN)
52 #endif
53
54 #if 0
55 #define WAPBL_DEBUG_BUFBYTES
56 #endif
57
58 #endif
59
60 #ifdef WAPBL_DEBUG_PRINT
61
62 enum {
63 WAPBL_PRINT_OPEN = 0x1,
64 WAPBL_PRINT_FLUSH = 0x2,
65 WAPBL_PRINT_TRUNCATE = 0x4,
66 WAPBL_PRINT_TRANSACTION = 0x8,
67 WAPBL_PRINT_BUFFER = 0x10,
68 WAPBL_PRINT_BUFFER2 = 0x20,
69 WAPBL_PRINT_ALLOC = 0x40,
70 WAPBL_PRINT_INODE = 0x80,
71 WAPBL_PRINT_WRITE = 0x100,
72 WAPBL_PRINT_IO = 0x200,
73 WAPBL_PRINT_REPLAY = 0x400,
74 WAPBL_PRINT_ERROR = 0x800,
75 WAPBL_PRINT_DISCARD = 0x1000,
76 WAPBL_PRINT_BIODONE = 0x2000,
77 };
78
79 #define WAPBL_PRINTF(mask, a) if (wapbl_debug_print & (mask)) printf a
80 extern int wapbl_debug_print;
81 #else
82 #define WAPBL_PRINTF(mask, a)
83 #endif
84
85 /****************************************************************/
86
87 #include <sys/queue.h>
88 #include <sys/vnode.h>
89 #include <sys/buf.h>
90
91 #ifdef _KERNEL
92
93 struct wapbl_entry;
94 struct wapbl_replay;
95 struct wapbl;
96
97 typedef void (*wapbl_flush_fn_t)(struct mount *, daddr_t *, int *, int);
98
99 /*
100 * This structure holds per transaction log information
101 */
102 struct wapbl_entry {
103 struct wapbl *we_wapbl;
104 SIMPLEQ_ENTRY(wapbl_entry) we_entries;
105 size_t we_bufcount; /* Count of unsynced buffers */
106 size_t we_reclaimable_bytes; /* Number on disk bytes for this
107 transaction */
108 int we_error;
109 #ifdef WAPBL_DEBUG_BUFBYTES
110 size_t we_unsynced_bufbytes; /* Byte count of unsynced buffers */
111 #endif
112 };
113
114 /* Start using a log */
115 int wapbl_start(struct wapbl **, struct mount *, struct vnode *, daddr_t,
116 size_t, size_t, struct wapbl_replay *,
117 wapbl_flush_fn_t, wapbl_flush_fn_t);
118
119 /* Discard the current transaction, potentially dangerous */
120 void wapbl_discard(struct wapbl *);
121
122 /* stop using a log */
123 int wapbl_stop(struct wapbl *, int);
124
125 /*
126 * Begin a new transaction or increment transaction recursion
127 * level if called while a transaction is already in progress
128 * by the current process.
129 */
130 int wapbl_begin(struct wapbl *, const char *, int);
131
132
133 /* End a transaction or decrement the transaction recursion level */
134 void wapbl_end(struct wapbl *);
135
136 /*
137 * Add a new buffer to the current transaction. The buffers
138 * data will be copied to the current transaction log and the
139 * buffer will be marked B_LOCKED so that it will not be
140 * flushed to disk by the syncer or reallocated.
141 */
142 void wapbl_add_buf(struct wapbl *, struct buf *);
143
144 /* Remove a buffer from the current transaction. */
145 void wapbl_remove_buf(struct wapbl *, struct buf *);
146
147 void wapbl_resize_buf(struct wapbl *, struct buf *, long, long);
148
149 /*
150 * This will flush all completed transactions to disk and
151 * start asynchronous writes on the associated buffers
152 */
153 int wapbl_flush(struct wapbl *, int);
154
155 /*
156 * Inodes that are allocated but have zero link count
157 * must be registered with the current transaction
158 * so they may be recorded in the log and cleaned up later.
159 * registration/unregistration of ino numbers already registered is ok.
160 */
161 void wapbl_register_inode(struct wapbl *, ino_t, mode_t);
162 void wapbl_unregister_inode(struct wapbl *, ino_t, mode_t);
163
164 /*
165 * Metadata block deallocations must be registered so
166 * that revocations records can be written and to prevent
167 * the corresponding blocks from being reused as data
168 * blocks until the log is on disk.
169 */
170 void wapbl_register_deallocation(struct wapbl *, daddr_t, int);
171
172 void wapbl_jlock_assert(struct wapbl *wl);
173 void wapbl_junlock_assert(struct wapbl *wl);
174
175 void wapbl_print(struct wapbl *wl, int full, void (*pr)(const char *, ...)
176 __printflike(1, 2));
177
178 #if defined(WAPBL_DEBUG) || defined(DDB)
179 void wapbl_dump(struct wapbl *);
180 #endif
181
182 void wapbl_biodone(struct buf *);
183
184 extern struct wapbl_ops wapbl_ops;
185
186 static __inline struct mount *
wapbl_vptomp(struct vnode * vp)187 wapbl_vptomp(struct vnode *vp)
188 {
189 struct mount *mp;
190
191 mp = NULL;
192 if (vp != NULL) {
193 if (vp->v_type == VBLK)
194 mp = spec_node_getmountedfs(vp);
195 else
196 mp = vp->v_mount;
197 }
198
199 return mp;
200 }
201
202 static __inline bool
wapbl_vphaswapbl(struct vnode * vp)203 wapbl_vphaswapbl(struct vnode *vp)
204 {
205 struct mount *mp;
206
207 if (vp == NULL)
208 return false;
209
210 mp = wapbl_vptomp(vp);
211 return mp && mp->mnt_wapbl;
212 }
213
214 #endif /* _KERNEL */
215
216 /****************************************************************/
217 /* Replay support */
218
219 #ifdef WAPBL_INTERNAL
220 LIST_HEAD(wapbl_blk_head, wapbl_blk);
221 struct wapbl_replay {
222 struct vnode *wr_logvp;
223 struct vnode *wr_devvp;
224 daddr_t wr_logpbn;
225
226 int wr_log_dev_bshift;
227 int wr_fs_dev_bshift;
228 int64_t wr_circ_off;
229 int64_t wr_circ_size;
230 uint32_t wr_generation;
231
232 void *wr_scratch;
233
234 struct wapbl_blk_head *wr_blkhash;
235 u_long wr_blkhashmask;
236 int wr_blkhashcnt;
237
238 off_t wr_inodeshead;
239 off_t wr_inodestail;
240 int wr_inodescnt;
241 struct {
242 uint32_t wr_inumber;
243 uint32_t wr_imode;
244 } *wr_inodes;
245 };
246
247 #define wapbl_replay_isopen(wr) ((wr)->wr_scratch != 0)
248
249 /* Supply this to provide i/o support */
250 int wapbl_write(void *, size_t, struct vnode *, daddr_t);
251 int wapbl_read(void *, size_t, struct vnode *, daddr_t);
252
253 /****************************************************************/
254 #else
255 struct wapbl_replay;
256 #endif /* WAPBL_INTERNAL */
257
258 /****************************************************************/
259
260 int wapbl_replay_start(struct wapbl_replay **, struct vnode *,
261 daddr_t, size_t, size_t);
262 void wapbl_replay_stop(struct wapbl_replay *);
263 void wapbl_replay_free(struct wapbl_replay *);
264 int wapbl_replay_write(struct wapbl_replay *, struct vnode *);
265 int wapbl_replay_can_read(struct wapbl_replay *, daddr_t, long);
266 int wapbl_replay_read(struct wapbl_replay *, void *, daddr_t, long);
267
268 /****************************************************************/
269
270 #endif /* !_SYS_WAPBL_H */
271