xref: /dragonfly/sys/vfs/hammer2/hammer2_io.c (revision 8bf5b238)
1 /*
2  * Copyright (c) 2013-2018 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@dragonflybsd.org>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 #include "hammer2.h"
36 
37 #define HAMMER2_DOP_READ	1
38 #define HAMMER2_DOP_NEW		2
39 #define HAMMER2_DOP_NEWNZ	3
40 #define HAMMER2_DOP_READQ	4
41 
42 /*
43  * Implements an abstraction layer for synchronous and asynchronous
44  * buffered device I/O.  Can be used as an OS-abstraction but the main
45  * purpose is to allow larger buffers to be used against hammer2_chain's
46  * using smaller allocations, without causing deadlocks.
47  *
48  * The DIOs also record temporary state with limited persistence.  This
49  * feature is used to keep track of dedupable blocks.
50  */
51 static int hammer2_io_cleanup_callback(hammer2_io_t *dio, void *arg);
52 static void dio_write_stats_update(hammer2_io_t *dio, struct buf *bp);
53 
54 static int
55 hammer2_io_cmp(hammer2_io_t *io1, hammer2_io_t *io2)
56 {
57 	if (io1->pbase < io2->pbase)
58 		return(-1);
59 	if (io1->pbase > io2->pbase)
60 		return(1);
61 	return(0);
62 }
63 
64 RB_PROTOTYPE2(hammer2_io_tree, hammer2_io, rbnode, hammer2_io_cmp, off_t);
65 RB_GENERATE2(hammer2_io_tree, hammer2_io, rbnode, hammer2_io_cmp,
66 		off_t, pbase);
67 
68 struct hammer2_cleanupcb_info {
69 	struct hammer2_io_tree tmptree;
70 	int	count;
71 };
72 
73 #if 0
74 static __inline
75 uint64_t
76 hammer2_io_mask(hammer2_io_t *dio, hammer2_off_t off, u_int bytes)
77 {
78 	uint64_t mask;
79 	int i;
80 
81 	if (bytes < 1024)	/* smaller chunks not supported */
82 		return 0;
83 
84 	/*
85 	 * Calculate crc check mask for larger chunks
86 	 */
87 	i = (((off & ~HAMMER2_OFF_MASK_RADIX) - dio->pbase) &
88 	     HAMMER2_PBUFMASK) >> 10;
89 	if (i == 0 && bytes == HAMMER2_PBUFSIZE)
90 		return((uint64_t)-1);
91 	mask = ((uint64_t)1U << (bytes >> 10)) - 1;
92 	mask <<= i;
93 
94 	return mask;
95 }
96 #endif
97 
98 /*
99  * Returns the DIO corresponding to the data|radix, creating it if necessary.
100  *
101  * If createit is 0, NULL can be returned indicating that the DIO does not
102  * exist.  (btype) is ignored when createit is 0.
103  */
104 static __inline
105 hammer2_io_t *
106 hammer2_io_alloc(hammer2_dev_t *hmp, hammer2_key_t data_off, uint8_t btype,
107 		 int createit, int *isgoodp)
108 {
109 	hammer2_io_t *dio;
110 	hammer2_io_t *xio;
111 	hammer2_key_t lbase;
112 	hammer2_key_t pbase;
113 	hammer2_key_t pmask;
114 	uint64_t refs;
115 	int lsize;
116 	int psize;
117 
118 	psize = HAMMER2_PBUFSIZE;
119 	pmask = ~(hammer2_off_t)(psize - 1);
120 	lsize = 1 << (int)(data_off & HAMMER2_OFF_MASK_RADIX);
121 	lbase = data_off & ~HAMMER2_OFF_MASK_RADIX;
122 	pbase = lbase & pmask;
123 
124 	if (pbase == 0 || ((lbase + lsize - 1) & pmask) != pbase) {
125 		kprintf("Illegal: %016jx %016jx+%08x / %016jx\n",
126 			pbase, lbase, lsize, pmask);
127 	}
128 	KKASSERT(pbase != 0 && ((lbase + lsize - 1) & pmask) == pbase);
129 	*isgoodp = 0;
130 
131 	/*
132 	 * Access/Allocate the DIO, bump dio->refs to prevent destruction.
133 	 */
134 	hammer2_spin_sh(&hmp->io_spin);
135 	dio = RB_LOOKUP(hammer2_io_tree, &hmp->iotree, pbase);
136 	if (dio) {
137 		refs = atomic_fetchadd_64(&dio->refs, 1);
138 		if ((refs & HAMMER2_DIO_MASK) == 0) {
139 			atomic_add_int(&dio->hmp->iofree_count, -1);
140 		}
141 		if (refs & HAMMER2_DIO_GOOD)
142 			*isgoodp = 1;
143 		hammer2_spin_unsh(&hmp->io_spin);
144 	} else if (createit) {
145 		refs = 0;
146 		hammer2_spin_unsh(&hmp->io_spin);
147 		dio = kmalloc(sizeof(*dio), M_HAMMER2, M_INTWAIT | M_ZERO);
148 		dio->hmp = hmp;
149 		dio->pbase = pbase;
150 		dio->psize = psize;
151 		dio->btype = btype;
152 		dio->refs = refs + 1;
153 		dio->act = 5;
154 		hammer2_spin_ex(&hmp->io_spin);
155 		xio = RB_INSERT(hammer2_io_tree, &hmp->iotree, dio);
156 		if (xio == NULL) {
157 			atomic_add_int(&hammer2_dio_count, 1);
158 			hammer2_spin_unex(&hmp->io_spin);
159 		} else {
160 			refs = atomic_fetchadd_64(&xio->refs, 1);
161 			if ((refs & HAMMER2_DIO_MASK) == 0)
162 				atomic_add_int(&xio->hmp->iofree_count, -1);
163 			if (refs & HAMMER2_DIO_GOOD)
164 				*isgoodp = 1;
165 			hammer2_spin_unex(&hmp->io_spin);
166 			kfree(dio, M_HAMMER2);
167 			dio = xio;
168 		}
169 	} else {
170 		hammer2_spin_unsh(&hmp->io_spin);
171 		return NULL;
172 	}
173 	dio->ticks = ticks;
174 	if (dio->act < 10)
175 		++dio->act;
176 
177 	return dio;
178 }
179 
180 /*
181  * Acquire the requested dio.  If DIO_GOOD is not set we must instantiate
182  * a buffer.  If set the buffer already exists and is good to go.
183  */
184 hammer2_io_t *
185 hammer2_io_getblk(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize, int op)
186 {
187 	hammer2_io_t *dio;
188 	off_t peof;
189 	uint64_t orefs;
190 	uint64_t nrefs;
191 	int isgood;
192 	int error;
193 	int hce;
194 	int bflags;
195 
196 	bflags = ((btype == HAMMER2_BREF_TYPE_DATA) ? B_NOTMETA : 0);
197 	bflags |= B_KVABIO;
198 
199 	KKASSERT((1 << (int)(lbase & HAMMER2_OFF_MASK_RADIX)) == lsize);
200 
201 	if (op == HAMMER2_DOP_READQ) {
202 		dio = hammer2_io_alloc(hmp, lbase, btype, 0, &isgood);
203 		if (dio == NULL)
204 			return NULL;
205 		op = HAMMER2_DOP_READ;
206 	} else {
207 		dio = hammer2_io_alloc(hmp, lbase, btype, 1, &isgood);
208 	}
209 
210 	for (;;) {
211 		orefs = dio->refs;
212 		cpu_ccfence();
213 
214 		/*
215 		 * Buffer is already good, handle the op and return.
216 		 */
217 		if (orefs & HAMMER2_DIO_GOOD) {
218 			if (isgood == 0)
219 				cpu_mfence();
220 			bkvasync(dio->bp);
221 
222 			switch(op) {
223 			case HAMMER2_DOP_NEW:
224 				bzero(hammer2_io_data(dio, lbase), lsize);
225 				/* fall through */
226 			case HAMMER2_DOP_NEWNZ:
227 				atomic_set_long(&dio->refs, HAMMER2_DIO_DIRTY);
228 				break;
229 			case HAMMER2_DOP_READ:
230 			default:
231 				/* nothing to do */
232 				break;
233 			}
234 			return (dio);
235 		}
236 
237 		/*
238 		 * Try to own the DIO
239 		 */
240 		if (orefs & HAMMER2_DIO_INPROG) {
241 			nrefs = orefs | HAMMER2_DIO_WAITING;
242 			tsleep_interlock(dio, 0);
243 			if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
244 				tsleep(dio, PINTERLOCKED, "h2dio", hz);
245 			}
246 			/* retry */
247 		} else {
248 			nrefs = orefs | HAMMER2_DIO_INPROG;
249 			if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
250 				break;
251 			}
252 		}
253 	}
254 
255 	/*
256 	 * We break to here if GOOD is not set and we acquired INPROG for
257 	 * the I/O.
258 	 */
259 	KKASSERT(dio->bp == NULL);
260 	if (btype == HAMMER2_BREF_TYPE_DATA)
261 		hce = hammer2_cluster_data_read;
262 	else
263 		hce = hammer2_cluster_meta_read;
264 
265 	error = 0;
266 	if (dio->pbase == (lbase & ~HAMMER2_OFF_MASK_RADIX) &&
267 	    dio->psize == lsize) {
268 		switch(op) {
269 		case HAMMER2_DOP_NEW:
270 		case HAMMER2_DOP_NEWNZ:
271 			dio->bp = getblk(dio->hmp->devvp,
272 					 dio->pbase, dio->psize,
273 					 GETBLK_KVABIO, 0);
274 			if (op == HAMMER2_DOP_NEW) {
275 				bkvasync(dio->bp);
276 				bzero(dio->bp->b_data, dio->psize);
277 			}
278 			atomic_set_long(&dio->refs, HAMMER2_DIO_DIRTY);
279 			break;
280 		case HAMMER2_DOP_READ:
281 		default:
282 			if (hce > 0) {
283 				/*
284 				 * Synchronous cluster I/O for now.
285 				 */
286 				peof = (dio->pbase + HAMMER2_SEGMASK64) &
287 				       ~HAMMER2_SEGMASK64;
288 				dio->bp = NULL;
289 				error = cluster_readx(dio->hmp->devvp,
290 						     peof, dio->pbase,
291 						     dio->psize, bflags,
292 						     dio->psize,
293 						     HAMMER2_PBUFSIZE*hce,
294 						     &dio->bp);
295 			} else {
296 				dio->bp = NULL;
297 				error = breadnx(dio->hmp->devvp, dio->pbase,
298 						dio->psize, bflags,
299 					        NULL, NULL, 0, &dio->bp);
300 			}
301 		}
302 	} else {
303 		if (hce > 0) {
304 			/*
305 			 * Synchronous cluster I/O for now.
306 			 */
307 			peof = (dio->pbase + HAMMER2_SEGMASK64) &
308 			       ~HAMMER2_SEGMASK64;
309 			error = cluster_readx(dio->hmp->devvp,
310 					      peof, dio->pbase, dio->psize,
311 					      bflags,
312 					      dio->psize, HAMMER2_PBUFSIZE*hce,
313 					      &dio->bp);
314 		} else {
315 			error = breadnx(dio->hmp->devvp, dio->pbase,
316 				        dio->psize, bflags,
317 					NULL, NULL, 0, &dio->bp);
318 		}
319 		if (dio->bp) {
320 			/*
321 			 * Handle NEW flags
322 			 */
323 			switch(op) {
324 			case HAMMER2_DOP_NEW:
325 				bkvasync(dio->bp);
326 				bzero(hammer2_io_data(dio, lbase), lsize);
327 				/* fall through */
328 			case HAMMER2_DOP_NEWNZ:
329 				atomic_set_long(&dio->refs, HAMMER2_DIO_DIRTY);
330 				break;
331 			case HAMMER2_DOP_READ:
332 			default:
333 				break;
334 			}
335 
336 			/*
337 			 * Tell the kernel that the buffer cache is not
338 			 * meta-data based on the btype.  This allows
339 			 * swapcache to distinguish between data and
340 			 * meta-data.
341 			 */
342 			switch(btype) {
343 			case HAMMER2_BREF_TYPE_DATA:
344 				dio->bp->b_flags |= B_NOTMETA;
345 				break;
346 			default:
347 				break;
348 			}
349 		}
350 	}
351 
352 	if (dio->bp) {
353 		bkvasync(dio->bp);
354 		BUF_KERNPROC(dio->bp);
355 		dio->bp->b_flags &= ~B_AGE;
356 	}
357 	dio->error = error;
358 
359 	/*
360 	 * Clear INPROG and WAITING, set GOOD wake up anyone waiting.
361 	 */
362 	for (;;) {
363 		orefs = dio->refs;
364 		cpu_ccfence();
365 		nrefs = orefs & ~(HAMMER2_DIO_INPROG | HAMMER2_DIO_WAITING);
366 		if (error == 0)
367 			nrefs |= HAMMER2_DIO_GOOD;
368 		if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
369 			if (orefs & HAMMER2_DIO_WAITING)
370 				wakeup(dio);
371 			break;
372 		}
373 		cpu_pause();
374 	}
375 
376 	/* XXX error handling */
377 
378 	return dio;
379 }
380 
381 /*
382  * Release our ref on *diop.
383  *
384  * On the 1->0 transition we clear DIO_GOOD, set DIO_INPROG, and dispose
385  * of dio->bp.  Then we clean up DIO_INPROG and DIO_WAITING.
386  */
387 void
388 hammer2_io_putblk(hammer2_io_t **diop)
389 {
390 	hammer2_dev_t *hmp;
391 	hammer2_io_t *dio;
392 	struct buf *bp;
393 	off_t pbase;
394 	int psize;
395 	int dio_limit;
396 	uint64_t orefs;
397 	uint64_t nrefs;
398 
399 	dio = *diop;
400 	*diop = NULL;
401 	hmp = dio->hmp;
402 
403 	KKASSERT((dio->refs & HAMMER2_DIO_MASK) != 0);
404 
405 	/*
406 	 * Drop refs.
407 	 *
408 	 * On the 1->0 transition clear GOOD and set INPROG, and break.
409 	 * On any other transition we can return early.
410 	 */
411 	for (;;) {
412 		orefs = dio->refs;
413 		cpu_ccfence();
414 
415 		if ((orefs & HAMMER2_DIO_MASK) == 1 &&
416 		    (orefs & HAMMER2_DIO_INPROG) == 0) {
417 			/*
418 			 * Lastdrop case, INPROG can be set.
419 			 */
420 			nrefs = orefs - 1;
421 			nrefs &= ~(HAMMER2_DIO_GOOD | HAMMER2_DIO_DIRTY);
422 			nrefs |= HAMMER2_DIO_INPROG;
423 			if (atomic_cmpset_64(&dio->refs, orefs, nrefs))
424 				break;
425 		} else if ((orefs & HAMMER2_DIO_MASK) == 1) {
426 			/*
427 			 * Lastdrop case, INPROG already set.  We must
428 			 * wait for INPROG to clear.
429 			 */
430 			nrefs = orefs | HAMMER2_DIO_WAITING;
431 			tsleep_interlock(dio, 0);
432 			if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
433 				tsleep(dio, PINTERLOCKED, "h2dio", hz);
434 			}
435 			/* retry */
436 		} else {
437 			/*
438 			 * Normal drop case.
439 			 */
440 			nrefs = orefs - 1;
441 			if (atomic_cmpset_64(&dio->refs, orefs, nrefs))
442 				return;
443 			/* retry */
444 		}
445 		cpu_pause();
446 		/* retry */
447 	}
448 
449 	/*
450 	 * Lastdrop (1->0 transition).  INPROG has been set, GOOD and DIRTY
451 	 * have been cleared.  iofree_count has not yet been incremented,
452 	 * note that another accessor race will decrement iofree_count so
453 	 * we have to increment it regardless.
454 	 *
455 	 * We can now dispose of the buffer, and should do it before calling
456 	 * io_complete() in case there's a race against a new reference
457 	 * which causes io_complete() to chain and instantiate the bp again.
458 	 */
459 	pbase = dio->pbase;
460 	psize = dio->psize;
461 	bp = dio->bp;
462 	dio->bp = NULL;
463 
464 	if ((orefs & HAMMER2_DIO_GOOD) && bp) {
465 		/*
466 		 * Non-errored disposal of bp
467 		 */
468 		if (orefs & HAMMER2_DIO_DIRTY) {
469 			dio_write_stats_update(dio, bp);
470 
471 			/*
472 			 * Allows dirty buffers to accumulate and
473 			 * possibly be canceled (e.g. by a 'rm'),
474 			 * will burst-write later.  Allow the kernel
475 			 * to cluster the dirty buffers.
476 			 *
477 			 * NOTE: Do not use cluster_write() here.  The
478 			 *	 problem is that due to the way chains
479 			 *	 are locked, buffers are cycled in and out
480 			 *	 quite often so the disposal here is not
481 			 *	 necessarily the final disposal.  Avoid
482 			 *	 excessive rewriting of the same blocks
483 			 *	 by using bdwrite().
484 			 */
485 #if 0
486 			off_t peof;
487 			int hce;
488 
489 			if ((hce = hammer2_cluster_write) > 0) {
490 				/*
491 				 * Allows write-behind to keep the buffer
492 				 * cache sane.
493 				 */
494 				peof = (pbase + HAMMER2_SEGMASK64) &
495 				       ~HAMMER2_SEGMASK64;
496 				bp->b_flags |= B_CLUSTEROK;
497 				cluster_write(bp, peof, psize, hce);
498 			} else
499 #endif
500 			{
501 				bp->b_flags |= B_CLUSTEROK;
502 				bdwrite(bp);
503 			}
504 		} else if (bp->b_flags & (B_ERROR | B_INVAL | B_RELBUF)) {
505 			brelse(bp);
506 		} else {
507 			bqrelse(bp);
508 		}
509 	} else if (bp) {
510 		/*
511 		 * Errored disposal of bp
512 		 */
513 		brelse(bp);
514 	}
515 
516 	/*
517 	 * Update iofree_count before disposing of the dio
518 	 */
519 	hmp = dio->hmp;
520 	atomic_add_int(&hmp->iofree_count, 1);
521 
522 	/*
523 	 * Clear INPROG, GOOD, and WAITING
524 	 */
525 	for (;;) {
526 		orefs = dio->refs;
527 		cpu_ccfence();
528 		nrefs = orefs & ~(HAMMER2_DIO_INPROG | HAMMER2_DIO_GOOD |
529 				  HAMMER2_DIO_WAITING);
530 		if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
531 			if (orefs & HAMMER2_DIO_WAITING)
532 				wakeup(dio);
533 			break;
534 		}
535 		cpu_pause();
536 	}
537 
538 	/*
539 	 * We cache free buffers so re-use cases can use a shared lock, but
540 	 * if too many build up we have to clean them out.
541 	 */
542 	dio_limit = hammer2_dio_limit;
543 	if (dio_limit < 256)
544 		dio_limit = 256;
545 	if (dio_limit > 1024*1024)
546 		dio_limit = 1024*1024;
547 	if (hmp->iofree_count > dio_limit) {
548 		struct hammer2_cleanupcb_info info;
549 
550 		RB_INIT(&info.tmptree);
551 		hammer2_spin_ex(&hmp->io_spin);
552 		if (hmp->iofree_count > dio_limit) {
553 			info.count = hmp->iofree_count / 5;
554 			RB_SCAN(hammer2_io_tree, &hmp->iotree, NULL,
555 				hammer2_io_cleanup_callback, &info);
556 		}
557 		hammer2_spin_unex(&hmp->io_spin);
558 		hammer2_io_cleanup(hmp, &info.tmptree);
559 	}
560 }
561 
562 /*
563  * Cleanup any dio's with (INPROG | refs) == 0.
564  *
565  * Called to clean up cached DIOs on umount after all activity has been
566  * flushed.
567  */
568 static
569 int
570 hammer2_io_cleanup_callback(hammer2_io_t *dio, void *arg)
571 {
572 	struct hammer2_cleanupcb_info *info = arg;
573 	hammer2_io_t *xio;
574 
575 	if ((dio->refs & (HAMMER2_DIO_MASK | HAMMER2_DIO_INPROG)) == 0) {
576 		if (dio->act > 0) {
577 			int act;
578 
579 			act = dio->act - (ticks - dio->ticks) / hz - 1;
580 			if (act > 0) {
581 				dio->act = act;
582 				return 0;
583 			}
584 			dio->act = 0;
585 		}
586 		KKASSERT(dio->bp == NULL);
587 		if (info->count > 0) {
588 			RB_REMOVE(hammer2_io_tree, &dio->hmp->iotree, dio);
589 			xio = RB_INSERT(hammer2_io_tree, &info->tmptree, dio);
590 			KKASSERT(xio == NULL);
591 			--info->count;
592 		}
593 	}
594 	return 0;
595 }
596 
597 void
598 hammer2_io_cleanup(hammer2_dev_t *hmp, struct hammer2_io_tree *tree)
599 {
600 	hammer2_io_t *dio;
601 
602 	while ((dio = RB_ROOT(tree)) != NULL) {
603 		RB_REMOVE(hammer2_io_tree, tree, dio);
604 		KKASSERT(dio->bp == NULL &&
605 		    (dio->refs & (HAMMER2_DIO_MASK | HAMMER2_DIO_INPROG)) == 0);
606 		if (dio->refs & HAMMER2_DIO_DIRTY) {
607 			kprintf("hammer2_io_cleanup: Dirty buffer "
608 				"%016jx/%d (bp=%p)\n",
609 				dio->pbase, dio->psize, dio->bp);
610 		}
611 		kfree(dio, M_HAMMER2);
612 		atomic_add_int(&hammer2_dio_count, -1);
613 		atomic_add_int(&hmp->iofree_count, -1);
614 	}
615 }
616 
617 /*
618  * Returns a pointer to the requested data.
619  */
620 char *
621 hammer2_io_data(hammer2_io_t *dio, off_t lbase)
622 {
623 	struct buf *bp;
624 	int off;
625 
626 	bp = dio->bp;
627 	KKASSERT(bp != NULL);
628 	bkvasync(bp);
629 	off = (lbase & ~HAMMER2_OFF_MASK_RADIX) - bp->b_loffset;
630 	KKASSERT(off >= 0 && off < bp->b_bufsize);
631 	return(bp->b_data + off);
632 }
633 
634 int
635 hammer2_io_new(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize,
636 	       hammer2_io_t **diop)
637 {
638 	*diop = hammer2_io_getblk(hmp, btype, lbase, lsize, HAMMER2_DOP_NEW);
639 	return ((*diop)->error);
640 }
641 
642 int
643 hammer2_io_newnz(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize,
644 		 hammer2_io_t **diop)
645 {
646 	*diop = hammer2_io_getblk(hmp, btype, lbase, lsize, HAMMER2_DOP_NEWNZ);
647 	return ((*diop)->error);
648 }
649 
650 int
651 hammer2_io_bread(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize,
652 		hammer2_io_t **diop)
653 {
654 	*diop = hammer2_io_getblk(hmp, btype, lbase, lsize, HAMMER2_DOP_READ);
655 	return ((*diop)->error);
656 }
657 
658 hammer2_io_t *
659 hammer2_io_getquick(hammer2_dev_t *hmp, off_t lbase, int lsize)
660 {
661 	hammer2_io_t *dio;
662 
663 	dio = hammer2_io_getblk(hmp, 0, lbase, lsize, HAMMER2_DOP_READQ);
664 	return dio;
665 }
666 
667 void
668 hammer2_io_bawrite(hammer2_io_t **diop)
669 {
670 	atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY);
671 	hammer2_io_putblk(diop);
672 }
673 
674 void
675 hammer2_io_bdwrite(hammer2_io_t **diop)
676 {
677 	atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY);
678 	hammer2_io_putblk(diop);
679 }
680 
681 int
682 hammer2_io_bwrite(hammer2_io_t **diop)
683 {
684 	atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY);
685 	hammer2_io_putblk(diop);
686 	return (0);	/* XXX */
687 }
688 
689 void
690 hammer2_io_setdirty(hammer2_io_t *dio)
691 {
692 	atomic_set_64(&dio->refs, HAMMER2_DIO_DIRTY);
693 }
694 
695 /*
696  * This routine is called when a MODIFIED chain is being DESTROYED,
697  * in an attempt to allow the related buffer cache buffer to be
698  * invalidated and discarded instead of flushing it to disk.
699  *
700  * At the moment this case is only really useful for file meta-data.
701  * File data is already handled via the logical buffer cache associated
702  * with the vnode, and will be discarded if it was never flushed to disk.
703  * File meta-data may include inodes, directory entries, and indirect blocks.
704  *
705  * XXX
706  * However, our DIO buffers are PBUFSIZE'd (64KB), and the area being
707  * invalidated might be smaller.  Most of the meta-data structures above
708  * are in the 'smaller' category.  For now, don't try to invalidate the
709  * data areas.
710  */
711 void
712 hammer2_io_inval(hammer2_io_t *dio, hammer2_off_t data_off, u_int bytes)
713 {
714 	/* NOP */
715 }
716 
717 void
718 hammer2_io_brelse(hammer2_io_t **diop)
719 {
720 	hammer2_io_putblk(diop);
721 }
722 
723 void
724 hammer2_io_bqrelse(hammer2_io_t **diop)
725 {
726 	hammer2_io_putblk(diop);
727 }
728 
729 /*
730  * Set dedup validation bits in a DIO.  We do not need the buffer cache
731  * buffer for this.  This must be done concurrent with setting bits in
732  * the freemap so as to interlock with bulkfree's clearing of those bits.
733  */
734 void
735 hammer2_io_dedup_set(hammer2_dev_t *hmp, hammer2_blockref_t *bref)
736 {
737 	hammer2_io_t *dio;
738 	uint64_t mask;
739 	int lsize;
740 	int isgood;
741 
742 	dio = hammer2_io_alloc(hmp, bref->data_off, bref->type, 1, &isgood);
743 	lsize = 1 << (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX);
744 	mask = hammer2_dedup_mask(dio, bref->data_off, lsize);
745 	atomic_clear_64(&dio->dedup_valid, mask);
746 	atomic_set_64(&dio->dedup_alloc, mask);
747 	hammer2_io_putblk(&dio);
748 }
749 
750 /*
751  * Clear dedup validation bits in a DIO.  This is typically done when
752  * a modified chain is destroyed or by the bulkfree code.  No buffer
753  * is needed for this operation.  If the DIO no longer exists it is
754  * equivalent to the bits not being set.
755  */
756 void
757 hammer2_io_dedup_delete(hammer2_dev_t *hmp, uint8_t btype,
758 			hammer2_off_t data_off, u_int bytes)
759 {
760 	hammer2_io_t *dio;
761 	uint64_t mask;
762 	int isgood;
763 
764 	if ((data_off & ~HAMMER2_OFF_MASK_RADIX) == 0)
765 		return;
766 	if (btype != HAMMER2_BREF_TYPE_DATA)
767 		return;
768 	dio = hammer2_io_alloc(hmp, data_off, btype, 0, &isgood);
769 	if (dio) {
770 		if (data_off < dio->pbase ||
771 		    (data_off & ~HAMMER2_OFF_MASK_RADIX) + bytes >
772 		    dio->pbase + dio->psize) {
773 			panic("hammer2_dedup_delete: DATAOFF BAD "
774 			      "%016jx/%d %016jx\n",
775 			      data_off, bytes, dio->pbase);
776 		}
777 		mask = hammer2_dedup_mask(dio, data_off, bytes);
778 		atomic_clear_64(&dio->dedup_alloc, mask);
779 		atomic_clear_64(&dio->dedup_valid, mask);
780 		hammer2_io_putblk(&dio);
781 	}
782 }
783 
784 /*
785  * Assert that dedup allocation bits in a DIO are not set.  This operation
786  * does not require a buffer.  The DIO does not need to exist.
787  */
788 void
789 hammer2_io_dedup_assert(hammer2_dev_t *hmp, hammer2_off_t data_off, u_int bytes)
790 {
791 	hammer2_io_t *dio;
792 	int isgood;
793 
794 	dio = hammer2_io_alloc(hmp, data_off, HAMMER2_BREF_TYPE_DATA,
795 			       0, &isgood);
796 	if (dio) {
797 		KASSERT((dio->dedup_alloc &
798 			  hammer2_dedup_mask(dio, data_off, bytes)) == 0,
799 			("hammer2_dedup_assert: %016jx/%d %016jx/%016jx",
800 			data_off,
801 			bytes,
802 			hammer2_dedup_mask(dio, data_off, bytes),
803 			dio->dedup_alloc));
804 		hammer2_io_putblk(&dio);
805 	}
806 }
807 
808 static
809 void
810 dio_write_stats_update(hammer2_io_t *dio, struct buf *bp)
811 {
812 	long *counterp;
813 
814 	if (bp->b_flags & B_DELWRI)
815 		return;
816 
817 	switch(dio->btype) {
818 	case 0:
819 		return;
820 	case HAMMER2_BREF_TYPE_DATA:
821 		counterp = &hammer2_iod_file_write;
822 		break;
823 	case HAMMER2_BREF_TYPE_DIRENT:
824 	case HAMMER2_BREF_TYPE_INODE:
825 		counterp = &hammer2_iod_meta_write;
826 		break;
827 	case HAMMER2_BREF_TYPE_INDIRECT:
828 		counterp = &hammer2_iod_indr_write;
829 		break;
830 	case HAMMER2_BREF_TYPE_FREEMAP_NODE:
831 	case HAMMER2_BREF_TYPE_FREEMAP_LEAF:
832 		counterp = &hammer2_iod_fmap_write;
833 		break;
834 	default:
835 		counterp = &hammer2_iod_volu_write;
836 		break;
837 	}
838 	*counterp += dio->psize;
839 }
840 
841 void
842 hammer2_io_bkvasync(hammer2_io_t *dio)
843 {
844 	KKASSERT(dio->bp != NULL);
845 	bkvasync(dio->bp);
846 }
847 
848 /*
849  * Ref a dio that is already owned
850  */
851 void
852 hammer2_io_ref(hammer2_io_t *dio)
853 {
854 	atomic_add_64(&dio->refs, 1);
855 }
856