xref: /dragonfly/sys/vfs/hammer/hammer_io.c (revision 1465342b)
1 /*
2  * Copyright (c) 2007 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  * $DragonFly: src/sys/vfs/hammer/hammer_io.c,v 1.6 2007/12/14 08:05:39 dillon Exp $
35  */
36 /*
37  * IO Primitives and buffer cache management
38  *
39  * All major data-tracking structures in HAMMER contain a struct hammer_io
40  * which is used to manage their backing store.  We use filesystem buffers
41  * for backing store and we leave them passively associated with their
42  * HAMMER structures.
43  *
44  * If the kernel tries to release a passively associated buf which we cannot
45  * yet let go we set B_LOCKED in the buffer and then actively released it
46  * later when we can.
47  */
48 
49 #include "hammer.h"
50 #include <sys/fcntl.h>
51 #include <sys/nlookup.h>
52 #include <sys/buf.h>
53 #include <sys/buf2.h>
54 
55 /*
56  * Helper routine to disassociate a buffer cache buffer from an I/O
57  * structure.
58  */
59 static void
60 hammer_io_disassociate(union hammer_io_structure *io)
61 {
62 	struct buf *bp = io->io.bp;
63 
64 	LIST_INIT(&bp->b_dep);	/* clear the association */
65 	bp->b_ops = NULL;
66 	io->io.bp = NULL;
67 
68 	switch(io->io.type) {
69 	case HAMMER_STRUCTURE_VOLUME:
70 		io->volume.ondisk = NULL;
71 		io->volume.alist.meta = NULL;
72 		break;
73 	case HAMMER_STRUCTURE_SUPERCL:
74 		io->supercl.ondisk = NULL;
75 		io->supercl.alist.meta = NULL;
76 		break;
77 	case HAMMER_STRUCTURE_CLUSTER:
78 		io->cluster.ondisk = NULL;
79 		io->cluster.alist_master.meta = NULL;
80 		io->cluster.alist_btree.meta = NULL;
81 		io->cluster.alist_record.meta = NULL;
82 		io->cluster.alist_mdata.meta = NULL;
83 		break;
84 	case HAMMER_STRUCTURE_BUFFER:
85 		io->buffer.ondisk = NULL;
86 		io->buffer.alist.meta = NULL;
87 		break;
88 	}
89 	io->io.modified = 0;
90 	io->io.released = 1;
91 }
92 
93 /*
94  * Mark a cluster as being closed.  This is done as late as possible,
95  * only when we are asked to flush the cluster
96  */
97 static void
98 hammer_close_cluster(hammer_cluster_t cluster)
99 {
100 	while (cluster->state == HAMMER_CLUSTER_ASYNC)
101 		tsleep(cluster, 0, "hmrdep", 0);
102 	if (cluster->state == HAMMER_CLUSTER_OPEN) {
103 		cluster->state = HAMMER_CLUSTER_IDLE;
104 		cluster->ondisk->clu_flags &= ~HAMMER_CLUF_OPEN;
105 		kprintf("CLOSE CLUSTER\n");
106 		hammer_modify_cluster(cluster);
107 	}
108 }
109 
110 
111 /*
112  * Load bp for a HAMMER structure.
113  */
114 int
115 hammer_io_read(struct vnode *devvp, struct hammer_io *io)
116 {
117 	struct buf *bp;
118 	int error;
119 
120 	if ((bp = io->bp) == NULL) {
121 		error = bread(devvp, io->offset, HAMMER_BUFSIZE, &io->bp);
122 		if (error == 0) {
123 			bp = io->bp;
124 			bp->b_ops = &hammer_bioops;
125 			LIST_INSERT_HEAD(&bp->b_dep, &io->worklist, node);
126 			BUF_KERNPROC(bp);
127 		}
128 		io->modified = 0;	/* no new modifications yet */
129 		io->released = 0;	/* we hold an active lock on bp */
130 	} else {
131 		error = 0;
132 	}
133 	return(error);
134 }
135 
136 /*
137  * Similar to hammer_io_read() but returns a zero'd out buffer instead.
138  * vfs_bio_clrbuf() is kinda nasty, enforce serialization against background
139  * I/O so we can call it.
140  */
141 int
142 hammer_io_new(struct vnode *devvp, struct hammer_io *io)
143 {
144 	struct buf *bp;
145 
146 	if ((bp = io->bp) == NULL) {
147 		io->bp = getblk(devvp, io->offset, HAMMER_BUFSIZE, 0, 0);
148 		bp = io->bp;
149 		bp->b_ops = &hammer_bioops;
150 		LIST_INSERT_HEAD(&bp->b_dep, &io->worklist, node);
151 		io->released = 0;	/* we hold an active lock on bp */
152 		BUF_KERNPROC(bp);
153 	} else {
154 		if (io->released) {
155 			regetblk(bp);
156 			io->released = 0;
157 			BUF_KERNPROC(bp);
158 		}
159 	}
160 	io->modified = 1;
161 	vfs_bio_clrbuf(bp);
162 	return(0);
163 }
164 
165 /*
166  * This routine is called when a buffer within a cluster is modified.  We
167  * mark the cluster open and immediately initiate asynchronous I/O.  Any
168  * related hammer_buffer write I/O blocks until our async write completes.
169  * This guarentees (inasmuch as the OS can) that the cluster recovery code
170  * will see a cluster marked open if a crash occured while the filesystem
171  * still had dirty buffers associated with that cluster.
172  */
173 void
174 hammer_io_notify_cluster(hammer_cluster_t cluster)
175 {
176 	struct hammer_io *io = &cluster->io;
177 
178 	if (cluster->state == HAMMER_CLUSTER_IDLE) {
179 		hammer_lock_ex(&cluster->io.lock);
180 		if (cluster->state == HAMMER_CLUSTER_IDLE) {
181 			if (io->released)
182 				regetblk(io->bp);
183 			kprintf("MARK CLUSTER OPEN\n");
184 			cluster->ondisk->clu_flags |= HAMMER_CLUF_OPEN;
185 			cluster->state = HAMMER_CLUSTER_ASYNC;
186 			hammer_modify_cluster(cluster);
187 			bawrite(io->bp);
188 			io->released = 1;
189 			/* leave cluster marked as modified */
190 		}
191 		hammer_unlock(&cluster->io.lock);
192 	}
193 }
194 
195 /*
196  * This routine is called on the last reference to a hammer structure.  If
197  * flush is non-zero we have to completely disassociate the bp from the
198  * structure (which may involve blocking).  Otherwise we can leave the bp
199  * passively associated with the structure.
200  *
201  * The caller is holding io->lock exclusively.
202  */
203 void
204 hammer_io_release(struct hammer_io *io, int flush)
205 {
206 	union hammer_io_structure *iou = (void *)io;
207 	hammer_cluster_t cluster;
208 	struct buf *bp;
209 
210 	if ((bp = io->bp) != NULL) {
211 		/*
212 		 * If neither we nor the kernel want to flush the bp, we can
213 		 * stop here.  Make sure the bp is passively released
214 		 * before returning.  Even though we are still holding it,
215 		 * we want to be notified when the kernel wishes to flush
216 		 * it out so make sure B_DELWRI is properly set if we had
217 		 * made modifications.
218 		 */
219 		if (flush == 0 && (bp->b_flags & B_LOCKED) == 0) {
220 			if ((bp->b_flags & B_DELWRI) == 0 && io->modified) {
221 				if (io->released)
222 					regetblk(bp);
223 				bdwrite(bp);
224 				io->released = 1;
225 			} else if (io->released == 0) {
226 				bqrelse(bp);
227 				io->released = 1;
228 			}
229 			return;
230 		}
231 
232 		/*
233 		 * We've been asked to flush the buffer.
234 		 *
235 		 * If this is a hammer_buffer we may have to wait for the
236 		 * cluster header write to complete.
237 		 */
238 		if (iou->io.type == HAMMER_STRUCTURE_BUFFER &&
239 		    (io->modified || (bp->b_flags & B_DELWRI))) {
240 			cluster = iou->buffer.cluster;
241 			while (cluster->state == HAMMER_CLUSTER_ASYNC)
242 				tsleep(iou->buffer.cluster, 0, "hmrdep", 0);
243 		}
244 
245 		/*
246 		 * If we have an open cluster header, close it
247 		 */
248 		if (iou->io.type == HAMMER_STRUCTURE_CLUSTER) {
249 			hammer_close_cluster(&iou->cluster);
250 		}
251 
252 
253 		/*
254 		 * Ok the dependancies are all gone.  Check for the simple
255 		 * disassociation case.
256 		 */
257 		if (io->released && (bp->b_flags & B_LOCKED) == 0 &&
258 		    (io->modified == 0 || (bp->b_flags & B_DELWRI))) {
259 			hammer_io_disassociate(iou);
260 			return;
261 		}
262 
263 		/*
264 		 * Handle the more complex disassociation case.  Acquire the
265 		 * buffer, clean up B_LOCKED, and deal with the modified
266 		 * flag.
267 		 */
268 		if (io->released)
269 			regetblk(bp);
270 		bp->b_flags &= ~B_LOCKED;
271 		if (io->modified || (bp->b_flags & B_DELWRI))
272 			bawrite(bp);
273 		else
274 			bqrelse(bp);
275 		io->released = 1;
276 		hammer_io_disassociate(iou);
277 	}
278 }
279 
280 /*
281  * Flush dirty data, if any.
282  */
283 void
284 hammer_io_flush(struct hammer_io *io, struct hammer_sync_info *info)
285 {
286 	struct buf *bp;
287 	int error;
288 
289 	if ((bp = io->bp) == NULL)
290 		return;
291 	if (bp->b_flags & B_DELWRI)
292 		io->modified = 1;
293 	if (io->modified == 0)
294 		return;
295 	kprintf("IO FLUSH BP %p TYPE %d REFS %d\n", bp, io->type, io->lock.refs);
296 	hammer_lock_ex(&io->lock);
297 
298 	if ((bp = io->bp) != NULL && io->modified) {
299 		if (io->released)
300 			regetblk(bp);
301 		io->released = 1;
302 
303 		/*
304 		 * We own the bp now
305 		 */
306 		if (info->waitfor & MNT_WAIT) {
307 			io->modified = 0;
308 			error = bwrite(bp);
309 			if (error)
310 				info->error = error;
311 		} else if (io->lock.refs == 1) {
312 			io->modified = 0;
313 			bawrite(bp);
314 		} else {
315 			kprintf("can't flush, %d refs\n", io->lock.refs);
316 			/* structure is in-use, don't race the write */
317 			bqrelse(bp);
318 		}
319 	}
320 	hammer_unlock(&io->lock);
321 }
322 
323 
324 /*
325  * HAMMER_BIOOPS
326  */
327 
328 /*
329  * Pre and post I/O callbacks.
330  */
331 static void hammer_io_deallocate(struct buf *bp);
332 
333 static void
334 hammer_io_start(struct buf *bp)
335 {
336 #if 0
337 	union hammer_io_structure *io = (void *)LIST_FIRST(&bp->b_dep);
338 
339 	if (io->io.type == HAMMER_STRUCTURE_BUFFER) {
340 		while (io->buffer.cluster->io_in_progress) {
341 			kprintf("hammer_io_start: wait for cluster\n");
342 			tsleep(io->buffer.cluster, 0, "hmrdep", 0);
343 			kprintf("hammer_io_start: wait for cluster done\n");
344 		}
345 	}
346 #endif
347 }
348 
349 static void
350 hammer_io_complete(struct buf *bp)
351 {
352 	union hammer_io_structure *io = (void *)LIST_FIRST(&bp->b_dep);
353 
354 	if (io->io.type == HAMMER_STRUCTURE_CLUSTER) {
355 		if (io->cluster.state == HAMMER_CLUSTER_ASYNC) {
356 			kprintf("cluster write complete flags %08x\n",
357 				io->cluster.ondisk->clu_flags);
358 			io->cluster.state = HAMMER_CLUSTER_OPEN;
359 			wakeup(&io->cluster);
360 		}
361 	}
362 }
363 
364 /*
365  * Callback from kernel when it wishes to deallocate a passively
366  * associated structure.  This can only occur if the buffer is
367  * passively associated with the structure.  The kernel has locked
368  * the buffer.
369  *
370  * If we cannot disassociate we set B_LOCKED to prevent the buffer
371  * from getting reused.
372  */
373 static void
374 hammer_io_deallocate(struct buf *bp)
375 {
376 	union hammer_io_structure *io = (void *)LIST_FIRST(&bp->b_dep);
377 
378 	/* XXX memory interlock, spinlock to sync cpus */
379 
380 	/*
381 	 * Since the kernel is passing us a locked buffer, the HAMMER
382 	 * structure had better not believe it has a lock on the buffer.
383 	 */
384 	KKASSERT(io->io.released);
385 	crit_enter();
386 
387 	/*
388 	 * First, ref the structure to prevent either the buffer or the
389 	 * structure from going away or being unexpectedly flushed.
390 	 */
391 	hammer_ref(&io->io.lock);
392 
393 	/*
394 	 * Buffers can have active references from cached hammer_node's,
395 	 * even if those nodes are themselves passively cached.  Attempt
396 	 * to clean them out.  This may not succeed.
397 	 */
398 	if (io->io.type == HAMMER_STRUCTURE_BUFFER &&
399 	    hammer_lock_ex_try(&io->io.lock) == 0) {
400 		hammer_flush_buffer_nodes(&io->buffer);
401 		hammer_unlock(&io->io.lock);
402 	}
403 
404 	if (hammer_islastref(&io->io.lock)) {
405 		/*
406 		 * If we are the only ref left we can disassociate the I/O.
407 		 * It had better still be in a released state because the
408 		 * kernel is holding a lock on the buffer.  Any passive
409 		 * modifications should have already been synchronized with
410 		 * the buffer.
411 		 */
412 		KKASSERT(io->io.released);
413 		hammer_io_disassociate(io);
414 		bp->b_flags &= ~B_LOCKED;
415 		KKASSERT (io->io.modified == 0 || (bp->b_flags & B_DELWRI));
416 
417 		/*
418 		 * Perform final rights on the structure.  This can cause
419 		 * a chain reaction - e.g. last buffer -> last cluster ->
420 		 * last supercluster -> last volume.
421 		 */
422 		switch(io->io.type) {
423 		case HAMMER_STRUCTURE_VOLUME:
424 			hammer_rel_volume(&io->volume, 1);
425 			break;
426 		case HAMMER_STRUCTURE_SUPERCL:
427 			hammer_rel_supercl(&io->supercl, 1);
428 			break;
429 		case HAMMER_STRUCTURE_CLUSTER:
430 			hammer_rel_cluster(&io->cluster, 1);
431 			break;
432 		case HAMMER_STRUCTURE_BUFFER:
433 			hammer_rel_buffer(&io->buffer, 1);
434 			break;
435 		}
436 	} else {
437 		/*
438 		 * Otherwise tell the kernel not to destroy the buffer.
439 		 *
440 		 * We have to unref the structure without performing any
441 		 * final rights to it to avoid a deadlock.
442 		 */
443 		bp->b_flags |= B_LOCKED;
444 		hammer_unref(&io->io.lock);
445 	}
446 
447 	crit_exit();
448 }
449 
450 static int
451 hammer_io_fsync(struct vnode *vp)
452 {
453 	return(0);
454 }
455 
456 /*
457  * NOTE: will not be called unless we tell the kernel about the
458  * bioops.  Unused... we use the mount's VFS_SYNC instead.
459  */
460 static int
461 hammer_io_sync(struct mount *mp)
462 {
463 	return(0);
464 }
465 
466 static void
467 hammer_io_movedeps(struct buf *bp1, struct buf *bp2)
468 {
469 }
470 
471 /*
472  * I/O pre-check for reading and writing.  HAMMER only uses this for
473  * B_CACHE buffers so checkread just shouldn't happen, but if it does
474  * allow it.
475  *
476  * Writing is a different case.  We don't want the kernel to try to write
477  * out a buffer that HAMMER may be modifying passively or which has a
478  * dependancy.
479  *
480  * This code enforces the following write ordering: buffers, then cluster
481  * headers, then volume headers.
482  */
483 static int
484 hammer_io_checkread(struct buf *bp)
485 {
486 	return(0);
487 }
488 
489 static int
490 hammer_io_checkwrite(struct buf *bp)
491 {
492 	union hammer_io_structure *iou = (void *)LIST_FIRST(&bp->b_dep);
493 
494 	if (iou->io.type == HAMMER_STRUCTURE_BUFFER &&
495 	    iou->buffer.cluster->state == HAMMER_CLUSTER_ASYNC) {
496 		/*
497 		 * Cannot write out a cluster buffer if the cluster header
498 		 * I/O opening the cluster has not completed.
499 		 */
500 		kprintf("hammer_io_checkwrite: w/ depend - delayed\n");
501 		bp->b_flags |= B_LOCKED;
502 		return(-1);
503 	} else if (iou->io.lock.refs) {
504 		/*
505 		 * Cannot write out a bp if its associated buffer has active
506 		 * references.
507 		 */
508 		kprintf("hammer_io_checkwrite: w/ refs - delayed\n");
509 		bp->b_flags |= B_LOCKED;
510 		return(-1);
511 	} else {
512 		/*
513 		 * We're good, but before we can let the kernel proceed we
514 		 * may have to make some adjustments.
515 		 */
516 		if (iou->io.type == HAMMER_STRUCTURE_CLUSTER)
517 			hammer_close_cluster(&iou->cluster);
518 		kprintf("hammer_io_checkwrite: ok\n");
519 		KKASSERT(iou->io.released);
520 		hammer_io_disassociate(iou);
521 		return(0);
522 	}
523 }
524 
525 /*
526  * Return non-zero if the caller should flush the structure associated
527  * with this io sub-structure.
528  */
529 int
530 hammer_io_checkflush(struct hammer_io *io)
531 {
532 	if (io->bp == NULL || (io->bp->b_flags & B_LOCKED))
533 		return(1);
534 	return(0);
535 }
536 
537 /*
538  * Return non-zero if we wish to delay the kernel's attempt to flush
539  * this buffer to disk.
540  */
541 static int
542 hammer_io_countdeps(struct buf *bp, int n)
543 {
544 	return(0);
545 }
546 
547 struct bio_ops hammer_bioops = {
548 	.io_start	= hammer_io_start,
549 	.io_complete	= hammer_io_complete,
550 	.io_deallocate	= hammer_io_deallocate,
551 	.io_fsync	= hammer_io_fsync,
552 	.io_sync	= hammer_io_sync,
553 	.io_movedeps	= hammer_io_movedeps,
554 	.io_countdeps	= hammer_io_countdeps,
555 	.io_checkread	= hammer_io_checkread,
556 	.io_checkwrite	= hammer_io_checkwrite,
557 };
558 
559