xref: /dragonfly/sys/vfs/hammer2/hammer2_io.c (revision b0d289c2)
1 /*
2  * Copyright (c) 2013-2014 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@dragonflybsd.org>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 #include "hammer2.h"
36 
37 /*
38  * Implements an abstraction layer for synchronous and asynchronous
39  * buffered device I/O.  Can be used for OS-abstraction but the main
40  * purpose is to allow larger buffers to be used against hammer2_chain's
41  * using smaller allocations, without causing deadlocks.
42  *
43  */
44 static int hammer2_io_cleanup_callback(hammer2_io_t *dio, void *arg);
45 
46 static int
47 hammer2_io_cmp(hammer2_io_t *io1, hammer2_io_t *io2)
48 {
49 	if (io2->pbase < io1->pbase)
50 		return(-1);
51 	if (io2->pbase > io1->pbase)
52 		return(1);
53 	return(0);
54 }
55 
56 RB_PROTOTYPE2(hammer2_io_tree, hammer2_io, rbnode, hammer2_io_cmp, off_t);
57 RB_GENERATE2(hammer2_io_tree, hammer2_io, rbnode, hammer2_io_cmp,
58 		off_t, pbase);
59 
60 struct hammer2_cleanupcb_info {
61 	struct hammer2_io_tree tmptree;
62 	int	count;
63 };
64 
65 #define HAMMER2_GETBLK_GOOD	0
66 #define HAMMER2_GETBLK_QUEUED	1
67 #define HAMMER2_GETBLK_OWNED	2
68 
69 /*
70  * Allocate/Locate the requested dio, reference it, issue or queue iocb.
71  */
72 void
73 hammer2_io_getblk(hammer2_dev_t *hmp, off_t lbase, int lsize,
74 		  hammer2_iocb_t *iocb)
75 {
76 	hammer2_io_t *dio;
77 	hammer2_io_t *xio;
78 	off_t pbase;
79 	off_t pmask;
80 	/*
81 	 * XXX after free, buffer reuse case w/ different size can clash
82 	 * with dio cache.  Lets avoid it for now.  Ultimate we need to
83 	 * invalidate the dio cache when freeing blocks to allow a mix
84 	 * of 16KB and 64KB block sizes).
85 	 */
86 	/*int psize = hammer2_devblksize(lsize);*/
87 	int psize = HAMMER2_PBUFSIZE;
88 	int refs;
89 
90 	pmask = ~(hammer2_off_t)(psize - 1);
91 
92 	KKASSERT((1 << (int)(lbase & HAMMER2_OFF_MASK_RADIX)) == lsize);
93 	lbase &= ~HAMMER2_OFF_MASK_RADIX;
94 	pbase = lbase & pmask;
95 	KKASSERT(pbase != 0 && ((lbase + lsize - 1) & pmask) == pbase);
96 
97 	/*
98 	 * Access/Allocate the DIO, bump dio->refs to prevent destruction.
99 	 */
100 	hammer2_spin_sh(&hmp->io_spin);
101 	dio = RB_LOOKUP(hammer2_io_tree, &hmp->iotree, pbase);
102 	if (dio) {
103 		if ((atomic_fetchadd_int(&dio->refs, 1) &
104 		     HAMMER2_DIO_MASK) == 0) {
105 			atomic_add_int(&dio->hmp->iofree_count, -1);
106 		}
107 		hammer2_spin_unsh(&hmp->io_spin);
108 	} else {
109 		hammer2_spin_unsh(&hmp->io_spin);
110 		dio = kmalloc(sizeof(*dio), M_HAMMER2, M_INTWAIT | M_ZERO);
111 		dio->hmp = hmp;
112 		dio->pbase = pbase;
113 		dio->psize = psize;
114 		dio->refs = 1;
115 		hammer2_spin_init(&dio->spin, "h2dio");
116 		TAILQ_INIT(&dio->iocbq);
117 		hammer2_spin_ex(&hmp->io_spin);
118 		xio = RB_INSERT(hammer2_io_tree, &hmp->iotree, dio);
119 		if (xio == NULL) {
120 			atomic_add_int(&hammer2_dio_count, 1);
121 			hammer2_spin_unex(&hmp->io_spin);
122 		} else {
123 			if ((atomic_fetchadd_int(&xio->refs, 1) &
124 			     HAMMER2_DIO_MASK) == 0) {
125 				atomic_add_int(&xio->hmp->iofree_count, -1);
126 			}
127 			hammer2_spin_unex(&hmp->io_spin);
128 			kfree(dio, M_HAMMER2);
129 			dio = xio;
130 		}
131 	}
132 
133 	/*
134 	 * Obtain/Validate the buffer.
135 	 */
136 	iocb->dio = dio;
137 
138 	if (dio->act < 5)	/* SMP race ok */
139 		++dio->act;
140 
141 	for (;;) {
142 		refs = dio->refs;
143 		cpu_ccfence();
144 
145 		/*
146 		 * Issue the iocb immediately if the buffer is already good.
147 		 * Once set GOOD cannot be cleared until refs drops to 0.
148 		 */
149 		if (refs & HAMMER2_DIO_GOOD) {
150 			iocb->callback(iocb);
151 			break;
152 		}
153 
154 		/*
155 		 * Try to own the DIO by setting INPROG so we can issue
156 		 * I/O on it.
157 		 */
158 		if (refs & HAMMER2_DIO_INPROG) {
159 			/*
160 			 * If DIO_INPROG is already set then set WAITING and
161 			 * queue the iocb.
162 			 */
163 			hammer2_spin_ex(&dio->spin);
164 			if (atomic_cmpset_int(&dio->refs, refs,
165 					      refs | HAMMER2_DIO_WAITING)) {
166 				iocb->flags |= HAMMER2_IOCB_ONQ |
167 					       HAMMER2_IOCB_INPROG;
168 				TAILQ_INSERT_TAIL(&dio->iocbq, iocb, entry);
169 				hammer2_spin_unex(&dio->spin);
170 				break;
171 			}
172 			hammer2_spin_unex(&dio->spin);
173 			/* retry */
174 		} else {
175 			/*
176 			 * If DIO_INPROG is not set then set it and issue the
177 			 * callback immediately to start I/O.
178 			 */
179 			if (atomic_cmpset_int(&dio->refs, refs,
180 					      refs | HAMMER2_DIO_INPROG)) {
181 				iocb->flags |= HAMMER2_IOCB_INPROG;
182 				iocb->callback(iocb);
183 				break;
184 			}
185 			/* retry */
186 		}
187 		/* retry */
188 	}
189 }
190 
191 /*
192  * The originator of the iocb is finished with it.
193  */
194 void
195 hammer2_io_complete(hammer2_iocb_t *iocb)
196 {
197 	hammer2_io_t *dio = iocb->dio;
198 	hammer2_iocb_t *cbtmp;
199 	uint32_t orefs;
200 	uint32_t nrefs;
201 	uint32_t oflags;
202 	uint32_t nflags;
203 
204 	/*
205 	 * If IOCB_INPROG was not set completion is synchronous due to the
206 	 * buffer already being good.  We can simply set IOCB_DONE and return.
207 	 * In this situation DIO_INPROG is not set and we have no visibility
208 	 * on dio->bp.
209 	 */
210 	if ((iocb->flags & HAMMER2_IOCB_INPROG) == 0) {
211 		atomic_set_int(&iocb->flags, HAMMER2_IOCB_DONE);
212 		return;
213 	}
214 
215 	/*
216 	 * The iocb was queued, obtained DIO_INPROG, and its callback was
217 	 * made.  The callback is now complete.  We still own DIO_INPROG.
218 	 *
219 	 * We can set DIO_GOOD if no error occurred, which gives certain
220 	 * stability guarantees to dio->bp and allows other accessors to
221 	 * short-cut access.  DIO_GOOD cannot be cleared until the last
222 	 * ref is dropped.
223 	 */
224 	KKASSERT(dio->refs & HAMMER2_DIO_INPROG);
225 	if (dio->bp) {
226 		BUF_KERNPROC(dio->bp);
227 		if ((dio->bp->b_flags & B_ERROR) == 0) {
228 			KKASSERT(dio->bp->b_flags & B_CACHE);
229 			atomic_set_int(&dio->refs, HAMMER2_DIO_GOOD);
230 		}
231 	}
232 
233 	/*
234 	 * Clean up the dio before marking the iocb as being done.  If another
235 	 * iocb is pending we chain to it while leaving DIO_INPROG set (it
236 	 * will call io completion and presumably clear DIO_INPROG).
237 	 *
238 	 * Otherwise if no other iocbs are pending we clear DIO_INPROG before
239 	 * finishing up the cbio.  This means that DIO_INPROG is cleared at
240 	 * the end of the chain before ANY of the cbios are marked done.
241 	 *
242 	 * NOTE: The TAILQ is not stable until the spin-lock is held.
243 	 */
244 	for (;;) {
245 		orefs = dio->refs;
246 		nrefs = orefs & ~(HAMMER2_DIO_WAITING | HAMMER2_DIO_INPROG);
247 
248 		if (orefs & HAMMER2_DIO_WAITING) {
249 			hammer2_spin_ex(&dio->spin);
250 			cbtmp = TAILQ_FIRST(&dio->iocbq);
251 			if (cbtmp) {
252 				/*
253 				 * NOTE: flags not adjusted in this case.
254 				 *	 Flags will be adjusted by the last
255 				 *	 iocb.
256 				 */
257 				TAILQ_REMOVE(&dio->iocbq, cbtmp, entry);
258 				hammer2_spin_unex(&dio->spin);
259 				cbtmp->callback(cbtmp);	/* chained */
260 				break;
261 			} else if (atomic_cmpset_int(&dio->refs,
262 						     orefs, nrefs)) {
263 				hammer2_spin_unex(&dio->spin);
264 				break;
265 			}
266 			hammer2_spin_unex(&dio->spin);
267 			/* retry */
268 		} else if (atomic_cmpset_int(&dio->refs, orefs, nrefs)) {
269 			break;
270 		} /* else retry */
271 		/* retry */
272 	}
273 
274 	/*
275 	 * Mark the iocb as done and wakeup any waiters.  This is done after
276 	 * all iocb chains have been called back and after DIO_INPROG has been
277 	 * cleared.  This avoids races against ref count drops by the waiting
278 	 * threads (a hard but not impossible SMP race) which might result in
279 	 * a 1->0 transition of the refs while DIO_INPROG is still set.
280 	 */
281 	for (;;) {
282 		oflags = iocb->flags;
283 		cpu_ccfence();
284 		nflags = oflags;
285 		nflags &= ~(HAMMER2_IOCB_WAKEUP | HAMMER2_IOCB_INPROG);
286 		nflags |= HAMMER2_IOCB_DONE;
287 
288 		if (atomic_cmpset_int(&iocb->flags, oflags, nflags)) {
289 			if (oflags & HAMMER2_IOCB_WAKEUP)
290 				wakeup(iocb);
291 			/* SMP: iocb is now stale */
292 			break;
293 		}
294 		/* retry */
295 	}
296 	iocb = NULL;
297 
298 }
299 
300 /*
301  * Wait for an iocb's I/O to finish.
302  */
303 void
304 hammer2_iocb_wait(hammer2_iocb_t *iocb)
305 {
306 	uint32_t oflags;
307 	uint32_t nflags;
308 
309 	for (;;) {
310 		oflags = iocb->flags;
311 		cpu_ccfence();
312 		nflags = oflags | HAMMER2_IOCB_WAKEUP;
313 		if (oflags & HAMMER2_IOCB_DONE)
314 			break;
315 		tsleep_interlock(iocb, 0);
316 		if (atomic_cmpset_int(&iocb->flags, oflags, nflags)) {
317 			tsleep(iocb, PINTERLOCKED, "h2iocb", hz);
318 		}
319 	}
320 
321 }
322 
323 /*
324  * Release our ref on *diop.
325  *
326  * On the last ref we must atomically clear DIO_GOOD and set DIO_INPROG,
327  * then dispose of the underlying buffer.
328  */
329 void
330 hammer2_io_putblk(hammer2_io_t **diop)
331 {
332 	hammer2_dev_t *hmp;
333 	hammer2_io_t *dio;
334 	hammer2_iocb_t iocb;
335 	struct buf *bp;
336 	off_t peof;
337 	off_t pbase;
338 	int psize;
339 	int refs;
340 
341 	dio = *diop;
342 	*diop = NULL;
343 
344 	/*
345 	 * Drop refs, on 1->0 transition clear flags, set INPROG.
346 	 */
347 	for (;;) {
348 		refs = dio->refs;
349 
350 		if ((refs & HAMMER2_DIO_MASK) == 1) {
351 			if (refs & HAMMER2_DIO_INPROG) {
352 				hammer2_iocb_t *xcb;
353 
354 				xcb = TAILQ_FIRST(&dio->iocbq);
355 				kprintf("BAD REFS dio %p %08x/%08x, cbio %p\n",
356 					dio, refs, dio->refs, xcb);
357 				if (xcb)
358 					kprintf("   IOCB: func=%p dio=%p cl=%p ch=%p ptr=%p\n",
359 						xcb->callback,
360 						xcb->dio,
361 						xcb->cluster,
362 						xcb->chain,
363 						xcb->ptr);
364 			}
365 			KKASSERT((refs & HAMMER2_DIO_INPROG) == 0);
366 			if (atomic_cmpset_int(&dio->refs, refs,
367 					      ((refs - 1) &
368 					       ~(HAMMER2_DIO_GOOD |
369 						 HAMMER2_DIO_DIRTY)) |
370 					      HAMMER2_DIO_INPROG)) {
371 				break;
372 			}
373 			/* retry */
374 		} else {
375 			if (atomic_cmpset_int(&dio->refs, refs, refs - 1))
376 				return;
377 			/* retry */
378 		}
379 		/* retry */
380 	}
381 
382 	/*
383 	 * We have set DIO_INPROG to gain control of the buffer and we have
384 	 * cleared DIO_GOOD to prevent other accessors from thinking it is
385 	 * still good.
386 	 *
387 	 * We can now dispose of the buffer, and should do it before calling
388 	 * io_complete() in case there's a race against a new reference
389 	 * which causes io_complete() to chain and instantiate the bp again.
390 	 */
391 	pbase = dio->pbase;
392 	psize = dio->psize;
393 	bp = dio->bp;
394 	dio->bp = NULL;
395 
396 	if (refs & HAMMER2_DIO_GOOD) {
397 		KKASSERT(bp != NULL);
398 		if (refs & HAMMER2_DIO_DIRTY) {
399 			if (hammer2_cluster_enable) {
400 				peof = (pbase + HAMMER2_SEGMASK64) &
401 				       ~HAMMER2_SEGMASK64;
402 				cluster_write(bp, peof, psize, 4);
403 			} else {
404 				bp->b_flags |= B_CLUSTEROK;
405 				bdwrite(bp);
406 			}
407 		} else if (bp->b_flags & (B_ERROR | B_INVAL | B_RELBUF)) {
408 			brelse(bp);
409 		} else {
410 			bqrelse(bp);
411 		}
412 	} else if (bp) {
413 		if (refs & HAMMER2_DIO_DIRTY) {
414 			bdwrite(bp);
415 		} else {
416 			brelse(bp);
417 		}
418 	}
419 
420 	/*
421 	 * The instant we call io_complete dio is a free agent again and
422 	 * can be ripped out from under us.
423 	 *
424 	 * we can cleanup our final DIO_INPROG by simulating an iocb
425 	 * completion.
426 	 */
427 	hmp = dio->hmp;				/* extract fields */
428 	atomic_add_int(&hmp->iofree_count, 1);
429 	cpu_ccfence();
430 
431 	iocb.dio = dio;
432 	iocb.flags = HAMMER2_IOCB_INPROG;
433 	hammer2_io_complete(&iocb);
434 	dio = NULL;				/* dio stale */
435 
436 	/*
437 	 * We cache free buffers so re-use cases can use a shared lock, but
438 	 * if too many build up we have to clean them out.
439 	 */
440 	if (hmp->iofree_count > 1000) {
441 		struct hammer2_cleanupcb_info info;
442 
443 		RB_INIT(&info.tmptree);
444 		hammer2_spin_ex(&hmp->io_spin);
445 		if (hmp->iofree_count > 1000) {
446 			info.count = hmp->iofree_count / 2;
447 			RB_SCAN(hammer2_io_tree, &hmp->iotree, NULL,
448 				hammer2_io_cleanup_callback, &info);
449 		}
450 		hammer2_spin_unex(&hmp->io_spin);
451 		hammer2_io_cleanup(hmp, &info.tmptree);
452 	}
453 }
454 
455 /*
456  * Cleanup any dio's with (INPROG | refs) == 0.
457  *
458  * Called to clean up cached DIOs on umount after all activity has been
459  * flushed.
460  */
461 static
462 int
463 hammer2_io_cleanup_callback(hammer2_io_t *dio, void *arg)
464 {
465 	struct hammer2_cleanupcb_info *info = arg;
466 	hammer2_io_t *xio;
467 
468 	if ((dio->refs & (HAMMER2_DIO_MASK | HAMMER2_DIO_INPROG)) == 0) {
469 		if (dio->act > 0) {
470 			--dio->act;
471 			return 0;
472 		}
473 		KKASSERT(dio->bp == NULL);
474 		RB_REMOVE(hammer2_io_tree, &dio->hmp->iotree, dio);
475 		xio = RB_INSERT(hammer2_io_tree, &info->tmptree, dio);
476 		KKASSERT(xio == NULL);
477 		if (--info->count <= 0)	/* limit scan */
478 			return(-1);
479 	}
480 	return 0;
481 }
482 
483 void
484 hammer2_io_cleanup(hammer2_dev_t *hmp, struct hammer2_io_tree *tree)
485 {
486 	hammer2_io_t *dio;
487 
488 	while ((dio = RB_ROOT(tree)) != NULL) {
489 		RB_REMOVE(hammer2_io_tree, tree, dio);
490 		KKASSERT(dio->bp == NULL &&
491 		    (dio->refs & (HAMMER2_DIO_MASK | HAMMER2_DIO_INPROG)) == 0);
492 		kfree(dio, M_HAMMER2);
493 		atomic_add_int(&hammer2_dio_count, -1);
494 		atomic_add_int(&hmp->iofree_count, -1);
495 	}
496 }
497 
498 /*
499  * Returns a pointer to the requested data.
500  */
501 char *
502 hammer2_io_data(hammer2_io_t *dio, off_t lbase)
503 {
504 	struct buf *bp;
505 	int off;
506 
507 	bp = dio->bp;
508 	KKASSERT(bp != NULL);
509 	off = (lbase & ~HAMMER2_OFF_MASK_RADIX) - bp->b_loffset;
510 	KKASSERT(off >= 0 && off < bp->b_bufsize);
511 	return(bp->b_data + off);
512 }
513 
514 /*
515  * Helpers for hammer2_io_new*() functions
516  */
517 static
518 void
519 hammer2_iocb_new_callback(hammer2_iocb_t *iocb)
520 {
521 	hammer2_io_t *dio = iocb->dio;
522 	int gbctl = (iocb->flags & HAMMER2_IOCB_QUICK) ? GETBLK_NOWAIT : 0;
523 
524 	/*
525 	 * If IOCB_INPROG is not set the dio already has a good buffer and we
526 	 * can't mess with it other than zero the requested range.
527 	 *
528 	 * If IOCB_INPROG is set we also own DIO_INPROG at this time and can
529 	 * do what needs to be done with dio->bp.
530 	 */
531 	if (iocb->flags & HAMMER2_IOCB_INPROG) {
532 		if ((iocb->flags & HAMMER2_IOCB_READ) == 0) {
533 			if (iocb->lsize == dio->psize) {
534 				/*
535 				 * Fully covered buffer, try to optimize to
536 				 * avoid any I/O.  We might already have the
537 				 * buffer due to iocb chaining.
538 				 */
539 				if (dio->bp == NULL) {
540 					dio->bp = getblk(dio->hmp->devvp,
541 							 dio->pbase, dio->psize,
542 							 gbctl, 0);
543 				}
544 				if (dio->bp) {
545 					vfs_bio_clrbuf(dio->bp);
546 					dio->bp->b_flags |= B_CACHE;
547 				}
548 			} else if (iocb->flags & HAMMER2_IOCB_QUICK) {
549 				/*
550 				 * Partial buffer, quick mode.  Do nothing.
551 				 * Do not instantiate the buffer or try to
552 				 * mark it B_CACHE because other portions of
553 				 * the buffer might have to be read by other
554 				 * accessors.
555 				 */
556 			} else if (dio->bp == NULL ||
557 				   (dio->bp->b_flags & B_CACHE) == 0) {
558 				/*
559 				 * Partial buffer, normal mode, requires
560 				 * read-before-write.  Chain the read.
561 				 *
562 				 * We might already have the buffer due to
563 				 * iocb chaining.  XXX unclear if we really
564 				 * need to write/release it and reacquire
565 				 * in that case.
566 				 *
567 				 * QUEUE ASYNC I/O, IOCB IS NOT YET COMPLETE.
568 				 */
569 				if (dio->bp) {
570 					if (dio->refs & HAMMER2_DIO_DIRTY)
571 						bdwrite(dio->bp);
572 					else
573 						bqrelse(dio->bp);
574 					dio->bp = NULL;
575 				}
576 				atomic_set_int(&iocb->flags, HAMMER2_IOCB_READ);
577 				breadcb(dio->hmp->devvp,
578 					dio->pbase, dio->psize,
579 					hammer2_io_callback, iocb);
580 				return;
581 			} /* else buffer is good */
582 		} /* else callback from breadcb is complete */
583 	}
584 	if (dio->bp) {
585 		if (iocb->flags & HAMMER2_IOCB_ZERO)
586 			bzero(hammer2_io_data(dio, iocb->lbase), iocb->lsize);
587 		atomic_set_int(&dio->refs, HAMMER2_DIO_DIRTY);
588 	}
589 	hammer2_io_complete(iocb);
590 }
591 
592 static
593 int
594 _hammer2_io_new(hammer2_dev_t *hmp, off_t lbase, int lsize,
595 	        hammer2_io_t **diop, int flags)
596 {
597 	hammer2_iocb_t iocb;
598 	hammer2_io_t *dio;
599 
600 	iocb.callback = hammer2_iocb_new_callback;
601 	iocb.cluster = NULL;
602 	iocb.chain = NULL;
603 	iocb.ptr = NULL;
604 	iocb.lbase = lbase;
605 	iocb.lsize = lsize;
606 	iocb.flags = flags;
607 	iocb.error = 0;
608 	hammer2_io_getblk(hmp, lbase, lsize, &iocb);
609 	if ((iocb.flags & HAMMER2_IOCB_DONE) == 0)
610 		hammer2_iocb_wait(&iocb);
611 	dio = *diop = iocb.dio;
612 
613 	return (iocb.error);
614 }
615 
616 int
617 hammer2_io_new(hammer2_dev_t *hmp, off_t lbase, int lsize,
618 	       hammer2_io_t **diop)
619 {
620 	return(_hammer2_io_new(hmp, lbase, lsize, diop, HAMMER2_IOCB_ZERO));
621 }
622 
623 int
624 hammer2_io_newnz(hammer2_dev_t *hmp, off_t lbase, int lsize,
625 	       hammer2_io_t **diop)
626 {
627 	return(_hammer2_io_new(hmp, lbase, lsize, diop, 0));
628 }
629 
630 int
631 hammer2_io_newq(hammer2_dev_t *hmp, off_t lbase, int lsize,
632 	       hammer2_io_t **diop)
633 {
634 	return(_hammer2_io_new(hmp, lbase, lsize, diop, HAMMER2_IOCB_QUICK));
635 }
636 
637 static
638 void
639 hammer2_iocb_bread_callback(hammer2_iocb_t *iocb)
640 {
641 	hammer2_io_t *dio = iocb->dio;
642 	off_t peof;
643 	int error;
644 
645 	/*
646 	 * If IOCB_INPROG is not set the dio already has a good buffer and we
647 	 * can't mess with it other than zero the requested range.
648 	 *
649 	 * If IOCB_INPROG is set we also own DIO_INPROG at this time and can
650 	 * do what needs to be done with dio->bp.
651 	 */
652 	if (iocb->flags & HAMMER2_IOCB_INPROG) {
653 		if (dio->bp && (dio->bp->b_flags & B_CACHE)) {
654 			/*
655 			 * Already good, likely due to being chained from
656 			 * another iocb.
657 			 */
658 			error = 0;
659 		} else if (hammer2_cluster_enable) {
660 			/*
661 			 * Synchronous cluster I/O for now.
662 			 */
663 			if (dio->bp) {
664 				bqrelse(dio->bp);
665 				dio->bp = NULL;
666 			}
667 			peof = (dio->pbase + HAMMER2_SEGMASK64) &
668 			       ~HAMMER2_SEGMASK64;
669 			error = cluster_read(dio->hmp->devvp, peof, dio->pbase,
670 					     dio->psize,
671 					     dio->psize, HAMMER2_PBUFSIZE*4,
672 					     &dio->bp);
673 		} else {
674 			/*
675 			 * Synchronous I/O for now.
676 			 */
677 			if (dio->bp) {
678 				bqrelse(dio->bp);
679 				dio->bp = NULL;
680 			}
681 			error = bread(dio->hmp->devvp, dio->pbase,
682 				      dio->psize, &dio->bp);
683 		}
684 		if (error) {
685 			brelse(dio->bp);
686 			dio->bp = NULL;
687 		}
688 	}
689 	hammer2_io_complete(iocb);
690 }
691 
692 int
693 hammer2_io_bread(hammer2_dev_t *hmp, off_t lbase, int lsize,
694 		hammer2_io_t **diop)
695 {
696 	hammer2_iocb_t iocb;
697 	hammer2_io_t *dio;
698 
699 	iocb.callback = hammer2_iocb_bread_callback;
700 	iocb.cluster = NULL;
701 	iocb.chain = NULL;
702 	iocb.ptr = NULL;
703 	iocb.lbase = lbase;
704 	iocb.lsize = lsize;
705 	iocb.flags = 0;
706 	iocb.error = 0;
707 	hammer2_io_getblk(hmp, lbase, lsize, &iocb);
708 	if ((iocb.flags & HAMMER2_IOCB_DONE) == 0)
709 		hammer2_iocb_wait(&iocb);
710 	dio = *diop = iocb.dio;
711 
712 	return (iocb.error);
713 }
714 
715 /*
716  * System buf/bio async callback extracts the iocb and chains
717  * to the iocb callback.
718  */
719 void
720 hammer2_io_callback(struct bio *bio)
721 {
722 	struct buf *dbp = bio->bio_buf;
723 	hammer2_iocb_t *iocb = bio->bio_caller_info1.ptr;
724 	hammer2_io_t *dio;
725 
726 	dio = iocb->dio;
727 	if ((bio->bio_flags & BIO_DONE) == 0)
728 		bpdone(dbp, 0);
729 	bio->bio_flags &= ~(BIO_DONE | BIO_SYNC);
730 	dio->bp = bio->bio_buf;
731 	iocb->callback(iocb);
732 }
733 
734 void
735 hammer2_io_bawrite(hammer2_io_t **diop)
736 {
737 	atomic_set_int(&(*diop)->refs, HAMMER2_DIO_DIRTY);
738 	hammer2_io_putblk(diop);
739 }
740 
741 void
742 hammer2_io_bdwrite(hammer2_io_t **diop)
743 {
744 	atomic_set_int(&(*diop)->refs, HAMMER2_DIO_DIRTY);
745 	hammer2_io_putblk(diop);
746 }
747 
748 int
749 hammer2_io_bwrite(hammer2_io_t **diop)
750 {
751 	atomic_set_int(&(*diop)->refs, HAMMER2_DIO_DIRTY);
752 	hammer2_io_putblk(diop);
753 	return (0);	/* XXX */
754 }
755 
756 void
757 hammer2_io_setdirty(hammer2_io_t *dio)
758 {
759 	atomic_set_int(&dio->refs, HAMMER2_DIO_DIRTY);
760 }
761 
762 void
763 hammer2_io_setinval(hammer2_io_t *dio, u_int bytes)
764 {
765 	if ((u_int)dio->psize == bytes)
766 		dio->bp->b_flags |= B_INVAL | B_RELBUF;
767 }
768 
769 void
770 hammer2_io_brelse(hammer2_io_t **diop)
771 {
772 	hammer2_io_putblk(diop);
773 }
774 
775 void
776 hammer2_io_bqrelse(hammer2_io_t **diop)
777 {
778 	hammer2_io_putblk(diop);
779 }
780 
781 int
782 hammer2_io_isdirty(hammer2_io_t *dio)
783 {
784 	return((dio->refs & HAMMER2_DIO_DIRTY) != 0);
785 }
786