xref: /dragonfly/sys/vfs/hammer/hammer_io.c (revision dc861544)
1 /*
2  * Copyright (c) 2007 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  * $DragonFly: src/sys/vfs/hammer/hammer_io.c,v 1.23 2008/03/24 23:50:23 dillon Exp $
35  */
36 /*
37  * IO Primitives and buffer cache management
38  *
39  * All major data-tracking structures in HAMMER contain a struct hammer_io
40  * which is used to manage their backing store.  We use filesystem buffers
41  * for backing store and we leave them passively associated with their
42  * HAMMER structures.
43  *
44  * If the kernel tries to release a passively associated buf which we cannot
45  * yet let go we set B_LOCKED in the buffer and then actively released it
46  * later when we can.
47  */
48 
49 #include "hammer.h"
50 #include <sys/fcntl.h>
51 #include <sys/nlookup.h>
52 #include <sys/buf.h>
53 #include <sys/buf2.h>
54 
55 static void hammer_io_deallocate(struct buf *bp);
56 static int hammer_io_checkwrite(struct buf *bp);
57 
58 /*
59  * Initialize an already-zero'd hammer_io structure
60  */
61 void
62 hammer_io_init(hammer_io_t io, enum hammer_io_type type)
63 {
64 	io->type = type;
65 }
66 
67 /*
68  * Helper routine to disassociate a buffer cache buffer from an I/O
69  * structure.  Called with the io structure exclusively locked.
70  *
71  * The io may have 0 or 1 references depending on who called us.  The
72  * caller is responsible for dealing with the refs.
73  *
74  * This call can only be made when no action is required on the buffer.
75  * HAMMER must own the buffer (released == 0) since we mess around with it.
76  */
77 static void
78 hammer_io_disassociate(hammer_io_structure_t iou, int elseit)
79 {
80 	struct buf *bp = iou->io.bp;
81 
82 	KKASSERT(iou->io.modified == 0);
83 	buf_dep_init(bp);
84 	iou->io.bp = NULL;
85 	bp->b_flags &= ~B_LOCKED;
86 	if (elseit) {
87 		KKASSERT(iou->io.released == 0);
88 		iou->io.released = 1;
89 		bqrelse(bp);
90 	} else {
91 		KKASSERT(iou->io.released);
92 	}
93 
94 	switch(iou->io.type) {
95 	case HAMMER_STRUCTURE_VOLUME:
96 		iou->volume.ondisk = NULL;
97 		break;
98 	case HAMMER_STRUCTURE_BUFFER:
99 		iou->buffer.ondisk = NULL;
100 		break;
101 	}
102 }
103 
104 /*
105  * Wait for any physical IO to complete
106  */
107 static void
108 hammer_io_wait(hammer_io_t io)
109 {
110 	if (io->running) {
111 		crit_enter();
112 		tsleep_interlock(io);
113 		io->waiting = 1;
114 		for (;;) {
115 			tsleep(io, 0, "hmrflw", 0);
116 			if (io->running == 0)
117 				break;
118 			tsleep_interlock(io);
119 			io->waiting = 1;
120 			if (io->running == 0)
121 				break;
122 		}
123 		crit_exit();
124 	}
125 }
126 
127 /*
128  * Load bp for a HAMMER structure.  The io is exclusively locked by the
129  * caller.
130  */
131 int
132 hammer_io_read(struct vnode *devvp, struct hammer_io *io)
133 {
134 	struct buf *bp;
135 	int error;
136 
137 	if ((bp = io->bp) == NULL) {
138 		error = bread(devvp, io->offset, HAMMER_BUFSIZE, &io->bp);
139 		if (error == 0) {
140 			bp = io->bp;
141 			bp->b_ops = &hammer_bioops;
142 			LIST_INSERT_HEAD(&bp->b_dep, &io->worklist, node);
143 			BUF_KERNPROC(bp);
144 		}
145 		io->modified = 0;	/* no new modifications yet */
146 		io->released = 0;	/* we hold an active lock on bp */
147 		io->running = 0;
148 		io->waiting = 0;
149 	} else {
150 		error = 0;
151 	}
152 	return(error);
153 }
154 
155 /*
156  * Similar to hammer_io_read() but returns a zero'd out buffer instead.
157  * vfs_bio_clrbuf() is kinda nasty, enforce serialization against background
158  * I/O so we can call it.
159  *
160  * The caller is responsible for calling hammer_modify_*() on the appropriate
161  * HAMMER structure.
162  */
163 int
164 hammer_io_new(struct vnode *devvp, struct hammer_io *io)
165 {
166 	struct buf *bp;
167 
168 	if ((bp = io->bp) == NULL) {
169 		io->bp = getblk(devvp, io->offset, HAMMER_BUFSIZE, 0, 0);
170 		bp = io->bp;
171 		bp->b_ops = &hammer_bioops;
172 		LIST_INSERT_HEAD(&bp->b_dep, &io->worklist, node);
173 		io->modified = 0;
174 		io->released = 0;
175 		io->running = 0;
176 		io->waiting = 0;
177 		BUF_KERNPROC(bp);
178 	} else {
179 		if (io->released) {
180 			regetblk(bp);
181 			BUF_KERNPROC(bp);
182 			io->released = 0;
183 		}
184 	}
185 	vfs_bio_clrbuf(bp);
186 	return(0);
187 }
188 
189 /*
190  * This routine is called on the last reference to a hammer structure.
191  * The io is usually locked exclusively (but may not be during unmount).
192  *
193  * If flush is 1, or B_LOCKED was set indicating that the kernel
194  * wanted to recycle the buffer, and there are no dependancies, this
195  * function will issue an asynchronous write.
196  *
197  * If flush is 2 this function waits until all I/O has completed and
198  * disassociates the bp from the IO before returning, unless there
199  * are still other references.
200  */
201 void
202 hammer_io_release(struct hammer_io *io)
203 {
204 	struct buf *bp;
205 
206 	if ((bp = io->bp) == NULL)
207 		return;
208 
209 #if 0
210 	/*
211 	 * If flush is 2 wait for dependancies
212 	 */
213 	while (io->waitdep && TAILQ_FIRST(&io->deplist)) {
214 		hammer_io_wait(TAILQ_FIRST(&io->deplist));
215 	}
216 #endif
217 
218 	/*
219 	 * Try to flush a dirty IO to disk if asked to by the caller
220 	 * or if the kernel tried to flush the buffer in the past.
221 	 *
222 	 * The flush will fail if any dependancies are present.
223 	 */
224 	if (io->modified && (io->flush || (bp->b_flags & B_LOCKED)))
225 		hammer_io_flush(io);
226 
227 	/*
228 	 * If flush is 2 we wait for the IO to complete.
229 	 */
230 	if (io->waitdep && io->running) {
231 		hammer_io_wait(io);
232 	}
233 
234 	/*
235 	 * Actively or passively release the buffer.  Modified IOs with
236 	 * dependancies cannot be released.
237 	 */
238 	if (io->flush && io->modified == 0 && io->running == 0) {
239 		if (io->released) {
240 			regetblk(bp);
241 			BUF_KERNPROC(bp);
242 			io->released = 0;
243 		}
244 		hammer_io_disassociate((hammer_io_structure_t)io, 1);
245 	} else if (io->modified) {
246 		if (io->released == 0) {
247 			io->released = 1;
248 			bdwrite(bp);
249 		}
250 	} else if (io->released == 0) {
251 		io->released = 1;
252 		bqrelse(bp);
253 	}
254 }
255 
256 /*
257  * This routine is called with a locked IO when a flush is desired and
258  * no other references to the structure exists other then ours.  This
259  * routine is ONLY called when HAMMER believes it is safe to flush a
260  * potentially modified buffer out.
261  */
262 void
263 hammer_io_flush(struct hammer_io *io)
264 {
265 	struct buf *bp;
266 
267 	/*
268 	 * Can't flush if the IO isn't modified or if it has dependancies.
269 	 */
270 	if (io->modified == 0) {
271 		io->flush = 0;
272 		return;
273 	}
274 
275 	KKASSERT(io->bp);
276 
277 	/*
278 	 * XXX - umount syncs buffers without referencing them, check for 0
279 	 * also.
280 	 */
281 	KKASSERT(io->lock.refs == 0 || io->lock.refs == 1);
282 
283 	/*
284 	 * Reset modified to 0 here and re-check it after the IO completes.
285 	 * This is only legal when lock.refs == 1 (otherwise we might clear
286 	 * the modified bit while there are still users of the cluster
287 	 * modifying the data).
288 	 *
289 	 * NOTE: We have no dependancies so we don't have to worry about
290 	 * cluster-open's here.
291 	 *
292 	 * Do this before potentially blocking so any attempt to modify the
293 	 * ondisk while we are blocked blocks waiting for us.
294 	 */
295 	io->modified = 0;	/* force interlock */
296 	io->flush = 0;
297 	bp = io->bp;
298 
299 	if (io->released) {
300 		regetblk(bp);
301 		/* BUF_KERNPROC(io->bp); */
302 		io->released = 0;
303 	}
304 	io->released = 1;
305 	io->running = 1;
306 	bawrite(bp);
307 }
308 
309 /************************************************************************
310  *				BUFFER DIRTYING				*
311  ************************************************************************
312  *
313  * These routines deal with dependancies created when IO buffers get
314  * modified.  The caller must call hammer_modify_*() on a referenced
315  * HAMMER structure prior to modifying its on-disk data.
316  *
317  * Any intent to modify an IO buffer acquires the related bp and imposes
318  * various write ordering dependancies.
319  */
320 
321 /*
322  * Mark a HAMMER structure as undergoing modification.  Return 1 when applying
323  * a non-NULL ordering dependancy for the first time, 0 otherwise.
324  */
325 static __inline
326 void
327 hammer_io_modify(hammer_io_t io)
328 {
329 	/*
330 	 * Shortcut if nothing to do.
331 	 */
332 	KKASSERT(io->lock.refs != 0 && io->bp != NULL);
333 	if (io->modified && io->released == 0)
334 		return;
335 
336 	hammer_lock_ex(&io->lock);
337 	io->modified = 1;
338 	if (io->released) {
339 		regetblk(io->bp);
340 		BUF_KERNPROC(io->bp);
341 		io->released = 0;
342 		KKASSERT(io->modified != 0);
343 	}
344 	hammer_unlock(&io->lock);
345 }
346 
347 void
348 hammer_modify_volume(hammer_transaction_t trans, hammer_volume_t volume,
349 		     void *base, int len)
350 {
351 	hammer_io_modify(&volume->io);
352 
353 	if (len) {
354 		intptr_t rel_offset = (intptr_t)base - (intptr_t)volume->ondisk;
355 		KKASSERT((rel_offset & ~(intptr_t)HAMMER_BUFMASK) == 0);
356 		hammer_generate_undo(trans,
357 			 HAMMER_ENCODE_RAW_VOLUME(volume->vol_no, rel_offset),
358 			 base, len);
359 	}
360 }
361 
362 /*
363  * Caller intends to modify a buffer's ondisk structure.  The related
364  * cluster must be marked open prior to being able to flush the modified
365  * buffer so get that I/O going now.
366  */
367 void
368 hammer_modify_buffer(hammer_transaction_t trans, hammer_buffer_t buffer,
369 		     void *base, int len)
370 {
371 	hammer_io_modify(&buffer->io);
372 	if (len) {
373 		intptr_t rel_offset = (intptr_t)base - (intptr_t)buffer->ondisk;
374 		KKASSERT((rel_offset & ~(intptr_t)HAMMER_BUFMASK) == 0);
375 		hammer_generate_undo(trans,
376 				     buffer->zone2_offset + rel_offset,
377 				     base, len);
378 	}
379 }
380 
381 /*
382  * Mark an entity as not being dirty any more -- this usually occurs when
383  * the governing a-list has freed the entire entity.
384  *
385  * XXX
386  */
387 void
388 hammer_io_clear_modify(struct hammer_io *io)
389 {
390 #if 0
391 	struct buf *bp;
392 
393 	io->modified = 0;
394 	if ((bp = io->bp) != NULL) {
395 		if (io->released) {
396 			regetblk(bp);
397 			/* BUF_KERNPROC(io->bp); */
398 		} else {
399 			io->released = 1;
400 		}
401 		if (io->modified == 0) {
402 			kprintf("hammer_io_clear_modify: cleared %p\n", io);
403 			bundirty(bp);
404 			bqrelse(bp);
405 		} else {
406 			bdwrite(bp);
407 		}
408 	}
409 #endif
410 }
411 
412 /************************************************************************
413  *				HAMMER_BIOOPS				*
414  ************************************************************************
415  *
416  */
417 
418 /*
419  * Pre-IO initiation kernel callback - cluster build only
420  */
421 static void
422 hammer_io_start(struct buf *bp)
423 {
424 }
425 
426 /*
427  * Post-IO completion kernel callback
428  *
429  * NOTE: HAMMER may modify a buffer after initiating I/O.  The modified bit
430  * may also be set if we were marking a cluster header open.  Only remove
431  * our dependancy if the modified bit is clear.
432  */
433 static void
434 hammer_io_complete(struct buf *bp)
435 {
436 	union hammer_io_structure *iou = (void *)LIST_FIRST(&bp->b_dep);
437 
438 	KKASSERT(iou->io.released == 1);
439 
440 	/* XXX DEP REMOVE */
441 
442 	/*
443 	 * If no lock references remain and we can acquire the IO lock and
444 	 * someone at some point wanted us to flush (B_LOCKED test), then
445 	 * try to dispose of the IO.
446 	 */
447 	iou->io.running = 0;
448 	if (iou->io.waiting) {
449 		iou->io.waiting = 0;
450 		wakeup(iou);
451 	}
452 
453 	/*
454 	 * Someone wanted us to flush, try to clean out the buffer.
455 	 */
456 	if ((bp->b_flags & B_LOCKED) && iou->io.lock.refs == 0) {
457 		KKASSERT(iou->io.modified == 0);
458 		bp->b_flags &= ~B_LOCKED;
459 		hammer_io_deallocate(bp);
460 		/* structure may be dead now */
461 	}
462 }
463 
464 /*
465  * Callback from kernel when it wishes to deallocate a passively
466  * associated structure.  This case can only occur with read-only
467  * bp's.
468  *
469  * If we cannot disassociate we set B_LOCKED to prevent the buffer
470  * from getting reused.
471  *
472  * WARNING: Because this can be called directly by getnewbuf we cannot
473  * recurse into the tree.  If a bp cannot be immediately disassociated
474  * our only recourse is to set B_LOCKED.
475  *
476  * WARNING: If the HAMMER structure is passively cached we have to
477  * scrap it here.
478  */
479 static void
480 hammer_io_deallocate(struct buf *bp)
481 {
482 	hammer_io_structure_t iou = (void *)LIST_FIRST(&bp->b_dep);
483 
484 	KKASSERT((bp->b_flags & B_LOCKED) == 0 && iou->io.running == 0);
485 	if (iou->io.lock.refs > 0 || iou->io.modified) {
486 		bp->b_flags |= B_LOCKED;
487 	} else {
488 		/* XXX interlock against ref or another disassociate */
489 		/* XXX this can leave HAMMER structures lying around */
490 		hammer_io_disassociate(iou, 0);
491 #if 0
492 		switch(iou->io.type) {
493 		case HAMMER_STRUCTURE_VOLUME:
494 			hammer_rel_volume(&iou->volume, 1);
495 			break;
496 		case HAMMER_STRUCTURE_BUFFER:
497 			hammer_rel_buffer(&iou->buffer, 1);
498 			break;
499 		}
500 #endif
501 	}
502 }
503 
504 static int
505 hammer_io_fsync(struct vnode *vp)
506 {
507 	return(0);
508 }
509 
510 /*
511  * NOTE: will not be called unless we tell the kernel about the
512  * bioops.  Unused... we use the mount's VFS_SYNC instead.
513  */
514 static int
515 hammer_io_sync(struct mount *mp)
516 {
517 	return(0);
518 }
519 
520 static void
521 hammer_io_movedeps(struct buf *bp1, struct buf *bp2)
522 {
523 }
524 
525 /*
526  * I/O pre-check for reading and writing.  HAMMER only uses this for
527  * B_CACHE buffers so checkread just shouldn't happen, but if it does
528  * allow it.
529  *
530  * Writing is a different case.  We don't want the kernel to try to write
531  * out a buffer that HAMMER may be modifying passively or which has a
532  * dependancy.
533  *
534  * This code enforces the following write ordering: buffers, then cluster
535  * headers, then volume headers.
536  */
537 static int
538 hammer_io_checkread(struct buf *bp)
539 {
540 	return(0);
541 }
542 
543 static int
544 hammer_io_checkwrite(struct buf *bp)
545 {
546 	union hammer_io_structure *iou = (void *)LIST_FIRST(&bp->b_dep);
547 
548 	/*
549 	 * We are called from the kernel on delayed-write buffers, and
550 	 * called from hammer_io_flush() on flush requests.  There should
551 	 * be no dependancies in either case.
552 	 *
553 	 * In the case of delayed-writes, the introduction of a dependancy
554 	 * will block until the bp can be reacquired, and the bp is then
555 	 * simply not released until the dependancy can be satisfied.
556 	 *
557 	 * We can only clear the modified bit when entered from the kernel
558 	 * if io.lock.refs == 0.
559 	 */
560 	if (iou->io.lock.refs == 0) {
561 		iou->io.modified = 0;
562 	}
563 	return(0);
564 }
565 
566 /*
567  * Return non-zero if the caller should flush the structure associated
568  * with this io sub-structure.
569  */
570 int
571 hammer_io_checkflush(struct hammer_io *io)
572 {
573 	if (io->bp == NULL || (io->bp->b_flags & B_LOCKED)) {
574 		return(1);
575 	}
576 	return(0);
577 }
578 
579 /*
580  * Return non-zero if we wish to delay the kernel's attempt to flush
581  * this buffer to disk.
582  */
583 static int
584 hammer_io_countdeps(struct buf *bp, int n)
585 {
586 	return(0);
587 }
588 
589 struct bio_ops hammer_bioops = {
590 	.io_start	= hammer_io_start,
591 	.io_complete	= hammer_io_complete,
592 	.io_deallocate	= hammer_io_deallocate,
593 	.io_fsync	= hammer_io_fsync,
594 	.io_sync	= hammer_io_sync,
595 	.io_movedeps	= hammer_io_movedeps,
596 	.io_countdeps	= hammer_io_countdeps,
597 	.io_checkread	= hammer_io_checkread,
598 	.io_checkwrite	= hammer_io_checkwrite,
599 };
600 
601