xref: /dragonfly/sys/vfs/hammer/hammer_flusher.c (revision 62f7f702)
1 /*
2  * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  * $DragonFly: src/sys/vfs/hammer/hammer_flusher.c,v 1.14 2008/05/06 00:21:07 dillon Exp $
35  */
36 /*
37  * HAMMER dependancy flusher thread
38  *
39  * Meta data updates create buffer dependancies which are arranged as a
40  * hierarchy of lists.
41  */
42 
43 #include "hammer.h"
44 
45 static void hammer_flusher_thread(void *arg);
46 static void hammer_flusher_clean_loose_ios(hammer_mount_t hmp);
47 static void hammer_flusher_flush(hammer_mount_t hmp);
48 static int hammer_must_finalize_undo(hammer_mount_t hmp);
49 static void hammer_flusher_finalize(hammer_transaction_t trans);
50 
51 #define HAMMER_FLUSHER_IMMEDIATE	16
52 
53 void
54 hammer_flusher_sync(hammer_mount_t hmp)
55 {
56 	int seq;
57 
58 	if (hmp->flusher_td) {
59 		seq = hmp->flusher_next;
60 		if (hmp->flusher_signal == 0) {
61 			hmp->flusher_signal = HAMMER_FLUSHER_IMMEDIATE;
62 			wakeup(&hmp->flusher_signal);
63 		}
64 		while ((int)(seq - hmp->flusher_done) > 0)
65 			tsleep(&hmp->flusher_done, 0, "hmrfls", 0);
66 	}
67 }
68 
69 void
70 hammer_flusher_async(hammer_mount_t hmp)
71 {
72 	if (hmp->flusher_td) {
73 		if (hmp->flusher_signal++ == 0)
74 			wakeup(&hmp->flusher_signal);
75 	}
76 }
77 
78 void
79 hammer_flusher_create(hammer_mount_t hmp)
80 {
81 	hmp->flusher_signal = 0;
82 	hmp->flusher_act = 0;
83 	hmp->flusher_done = 0;
84 	hmp->flusher_next = 1;
85 	lwkt_create(hammer_flusher_thread, hmp, &hmp->flusher_td, NULL,
86 		    0, -1, "hammer");
87 }
88 
89 void
90 hammer_flusher_destroy(hammer_mount_t hmp)
91 {
92 	if (hmp->flusher_td) {
93 		hmp->flusher_exiting = 1;
94 		while (hmp->flusher_td) {
95 			hmp->flusher_signal = HAMMER_FLUSHER_IMMEDIATE;
96 			wakeup(&hmp->flusher_signal);
97 			tsleep(&hmp->flusher_exiting, 0, "hmrwex", 0);
98 		}
99 	}
100 }
101 
102 static void
103 hammer_flusher_thread(void *arg)
104 {
105 	hammer_mount_t hmp = arg;
106 
107 	for (;;) {
108 		while (hmp->flusher_lock)
109 			tsleep(&hmp->flusher_lock, 0, "hmrhld", 0);
110 		hmp->flusher_act = hmp->flusher_next;
111 		++hmp->flusher_next;
112 		hkprintf("F");
113 		hammer_flusher_clean_loose_ios(hmp);
114 		hammer_flusher_flush(hmp);
115 		hammer_flusher_clean_loose_ios(hmp);
116 		hmp->flusher_done = hmp->flusher_act;
117 
118 		wakeup(&hmp->flusher_done);
119 
120 		/*
121 		 * Wait for activity.
122 		 */
123 		if (hmp->flusher_exiting && TAILQ_EMPTY(&hmp->flush_list))
124 			break;
125 		hkprintf("E");
126 
127 		/*
128 		 * This is a hack until we can dispose of frontend buffer
129 		 * cache buffers on the frontend.
130 		 */
131 		if (hmp->flusher_signal &&
132 		    hmp->flusher_signal < HAMMER_FLUSHER_IMMEDIATE) {
133 			--hmp->flusher_signal;
134 			tsleep(&hmp->flusher_signal, 0, "hmrqwk", hz / 10);
135 		} else {
136 			while (hmp->flusher_signal == 0 &&
137 			       TAILQ_EMPTY(&hmp->flush_list)) {
138 				tsleep(&hmp->flusher_signal, 0, "hmrwwa", 0);
139 			}
140 			hmp->flusher_signal = 0;
141 		}
142 	}
143 	hmp->flusher_td = NULL;
144 	wakeup(&hmp->flusher_exiting);
145 	lwkt_exit();
146 }
147 
148 static void
149 hammer_flusher_clean_loose_ios(hammer_mount_t hmp)
150 {
151 	hammer_buffer_t buffer;
152 	hammer_io_t io;
153 
154 	/*
155 	 * loose ends - buffers without bp's aren't tracked by the kernel
156 	 * and can build up, so clean them out.  This can occur when an
157 	 * IO completes on a buffer with no references left.
158 	 */
159 	while ((io = TAILQ_FIRST(&hmp->lose_list)) != NULL) {
160 		KKASSERT(io->mod_list == &hmp->lose_list);
161 		TAILQ_REMOVE(io->mod_list, io, mod_entry);
162 		io->mod_list = NULL;
163 		hammer_ref(&io->lock);
164 		buffer = (void *)io;
165 		hammer_rel_buffer(buffer, 0);
166 	}
167 }
168 
169 /*
170  * Flush stuff
171  */
172 static void
173 hammer_flusher_flush(hammer_mount_t hmp)
174 {
175 	struct hammer_transaction trans;
176 	hammer_blockmap_t rootmap;
177 	hammer_inode_t ip;
178 
179 	hammer_start_transaction_fls(&trans, hmp);
180 	rootmap = &hmp->blockmap[HAMMER_ZONE_UNDO_INDEX];
181 
182 	while ((ip = TAILQ_FIRST(&hmp->flush_list)) != NULL) {
183 		/*
184 		 * Stop when we hit a different flush group
185 		 */
186 		if (ip->flush_group != hmp->flusher_act)
187 			break;
188 
189 		/*
190 		 * Remove the inode from the flush list and inherit
191 		 * its reference, sync, and clean-up.
192 		 */
193 		TAILQ_REMOVE(&hmp->flush_list, ip, flush_entry);
194 		ip->error = hammer_sync_inode(ip);
195 		hammer_flush_inode_done(ip);
196 
197 		/*
198 		 * XXX this breaks atomicy
199 		 */
200 		if (hammer_must_finalize_undo(hmp)) {
201 			Debugger("Too many undos!!");
202 			hammer_flusher_finalize(&trans);
203 		}
204 	}
205 	hammer_flusher_finalize(&trans);
206 	hammer_done_transaction(&trans);
207 }
208 
209 /*
210  * If the UNDO area gets over half full we have to flush it.  We can't
211  * afford the UNDO area becoming completely full as that would break
212  * the crash recovery atomicy.
213  */
214 static
215 int
216 hammer_must_finalize_undo(hammer_mount_t hmp)
217 {
218 	if (hammer_undo_space(hmp) < hammer_undo_max(hmp) / 2) {
219 		hkprintf("*");
220 		return(1);
221 	} else {
222 		return(0);
223 	}
224 }
225 
226 /*
227  * To finalize the flush we finish flushing all undo and data buffers
228  * still present, then we update the volume header and flush it,
229  * then we flush out the mata-data (that can now be undone).
230  *
231  * Note that as long as the undo fifo's start and end points do not
232  * match, we always must at least update the volume header.
233  *
234  * The sync_lock is used by other threads to issue modifying operations
235  * to HAMMER media without crossing a synchronization boundary or messing
236  * up the media synchronization operation.  Specifically, the pruning
237  * the reblocking ioctls, and allowing the frontend strategy code to
238  * allocate media data space.
239  */
240 static
241 void
242 hammer_flusher_finalize(hammer_transaction_t trans)
243 {
244 	hammer_mount_t hmp = trans->hmp;
245 	hammer_volume_t root_volume = trans->rootvol;
246 	hammer_blockmap_t rootmap;
247 	const int bmsize = sizeof(root_volume->ondisk->vol0_blockmap);
248 	hammer_io_t io;
249 	int count;
250 	int i;
251 
252 	hammer_lock_ex(&hmp->sync_lock);
253 	rootmap = &hmp->blockmap[HAMMER_ZONE_UNDO_INDEX];
254 
255 	/*
256 	 * Sync the blockmap to the root volume ondisk buffer and generate
257 	 * the appropriate undo record.  We have to generate the UNDO even
258 	 * though we flush the volume header along with the UNDO fifo update
259 	 * because the meta-data (including the volume header) is flushed
260 	 * after the fifo update, not before, and may have to be undone.
261 	 *
262 	 * No UNDOs can be created after this point until we finish the
263 	 * flush.
264 	 */
265 	if (root_volume->io.modified &&
266 	    bcmp(hmp->blockmap, root_volume->ondisk->vol0_blockmap, bmsize)) {
267 		hammer_modify_volume(trans, root_volume,
268 			    &root_volume->ondisk->vol0_blockmap,
269 			    bmsize);
270 		for (i = 0; i < HAMMER_MAX_ZONES; ++i)
271 			hammer_crc_set_blockmap(&hmp->blockmap[i]);
272 		bcopy(hmp->blockmap, root_volume->ondisk->vol0_blockmap,
273 		      bmsize);
274 		hammer_modify_volume_done(root_volume);
275 	}
276 
277 	/*
278 	 * Flush the undo bufs, clear the undo cache.
279 	 */
280 	hammer_clear_undo_history(hmp);
281 
282 	count = 0;
283 	while ((io = TAILQ_FIRST(&hmp->undo_list)) != NULL) {
284 		KKASSERT(io->modify_refs == 0);
285 		hammer_ref(&io->lock);
286 		KKASSERT(io->type != HAMMER_STRUCTURE_VOLUME);
287 		hammer_io_flush(io);
288 		hammer_rel_buffer((hammer_buffer_t)io, 1);
289 		++count;
290 	}
291 	if (count)
292 		hkprintf("X%d", count);
293 
294 	/*
295 	 * Flush data bufs
296 	 */
297 	count = 0;
298 	while ((io = TAILQ_FIRST(&hmp->data_list)) != NULL) {
299 		KKASSERT(io->modify_refs == 0);
300 		hammer_ref(&io->lock);
301 		KKASSERT(io->type != HAMMER_STRUCTURE_VOLUME);
302 		hammer_io_flush(io);
303 		hammer_rel_buffer((hammer_buffer_t)io, 1);
304 		++count;
305 	}
306 	if (count)
307 		hkprintf("Y%d", count);
308 
309 	/*
310 	 * Wait for I/O to complete
311 	 */
312 	crit_enter();
313 	while (hmp->io_running_count)
314 		tsleep(&hmp->io_running_count, 0, "hmrfl1", 0);
315 	crit_exit();
316 
317 	/*
318 	 * Update the root volume's next_tid field.  This field is updated
319 	 * without any related undo.
320 	 */
321 	if (root_volume->ondisk->vol0_next_tid != hmp->next_tid) {
322 		hammer_modify_volume(NULL, root_volume, NULL, 0);
323 		root_volume->ondisk->vol0_next_tid = hmp->next_tid;
324 		hammer_modify_volume_done(root_volume);
325 	}
326 
327 	/*
328 	 * Update the UNDO FIFO's first_offset.  Same deal.
329 	 */
330 	if (rootmap->first_offset != hmp->flusher_undo_start) {
331 		hammer_modify_volume(NULL, root_volume, NULL, 0);
332 		rootmap->first_offset = hmp->flusher_undo_start;
333 		root_volume->ondisk->vol0_blockmap[HAMMER_ZONE_UNDO_INDEX].first_offset = rootmap->first_offset;
334 		hammer_crc_set_blockmap(&root_volume->ondisk->vol0_blockmap[HAMMER_ZONE_UNDO_INDEX]);
335 		hammer_modify_volume_done(root_volume);
336 	}
337 	trans->hmp->flusher_undo_start = rootmap->next_offset;
338 
339 	/*
340 	 * Flush the root volume header.
341 	 *
342 	 * If a crash occurs while the root volume header is being written
343 	 * we just have to hope that the undo range has been updated.  It
344 	 * should be done in one I/O but XXX this won't be perfect.
345 	 */
346 	if (root_volume->io.modified) {
347 		hammer_crc_set_volume(root_volume->ondisk);
348 		hammer_io_flush(&root_volume->io);
349 	}
350 
351 	/*
352 	 * Wait for I/O to complete
353 	 */
354 	crit_enter();
355 	while (hmp->io_running_count)
356 		tsleep(&hmp->io_running_count, 0, "hmrfl2", 0);
357 	crit_exit();
358 
359 	/*
360 	 * Flush meta-data.  The meta-data will be undone if we crash
361 	 * so we can safely flush it asynchronously.
362 	 */
363 	count = 0;
364 	while ((io = TAILQ_FIRST(&hmp->meta_list)) != NULL) {
365 		KKASSERT(io->modify_refs == 0);
366 		hammer_ref(&io->lock);
367 		KKASSERT(io->type != HAMMER_STRUCTURE_VOLUME);
368 		hammer_io_flush(io);
369 		hammer_rel_buffer((hammer_buffer_t)io, 1);
370 		++count;
371 	}
372 	hammer_unlock(&hmp->sync_lock);
373 	if (count)
374 		hkprintf("Z%d", count);
375 }
376 
377