xref: /dragonfly/sys/vfs/hammer2/hammer2_io.c (revision 5071e670)
1 /*
2  * Copyright (c) 2013-2018 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@dragonflybsd.org>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 #include "hammer2.h"
36 
37 #define HAMMER2_DOP_READ	1
38 #define HAMMER2_DOP_NEW		2
39 #define HAMMER2_DOP_NEWNZ	3
40 #define HAMMER2_DOP_READQ	4
41 
42 /*
43  * Implements an abstraction layer for synchronous and asynchronous
44  * buffered device I/O.  Can be used as an OS-abstraction but the main
45  * purpose is to allow larger buffers to be used against hammer2_chain's
46  * using smaller allocations, without causing deadlocks.
47  *
48  * The DIOs also record temporary state with limited persistence.  This
49  * feature is used to keep track of dedupable blocks.
50  */
51 static int hammer2_io_cleanup_callback(hammer2_io_t *dio, void *arg);
52 static void dio_write_stats_update(hammer2_io_t *dio, struct buf *bp);
53 
54 static int
55 hammer2_io_cmp(hammer2_io_t *io1, hammer2_io_t *io2)
56 {
57 	if (io1->pbase < io2->pbase)
58 		return(-1);
59 	if (io1->pbase > io2->pbase)
60 		return(1);
61 	return(0);
62 }
63 
64 RB_PROTOTYPE2(hammer2_io_tree, hammer2_io, rbnode, hammer2_io_cmp, off_t);
65 RB_GENERATE2(hammer2_io_tree, hammer2_io, rbnode, hammer2_io_cmp,
66 		off_t, pbase);
67 
68 struct hammer2_cleanupcb_info {
69 	struct hammer2_io_tree tmptree;
70 	int	count;
71 };
72 
73 #if 0
74 static __inline
75 uint64_t
76 hammer2_io_mask(hammer2_io_t *dio, hammer2_off_t off, u_int bytes)
77 {
78 	uint64_t mask;
79 	int i;
80 
81 	if (bytes < 1024)	/* smaller chunks not supported */
82 		return 0;
83 
84 	/*
85 	 * Calculate crc check mask for larger chunks
86 	 */
87 	i = (((off & ~HAMMER2_OFF_MASK_RADIX) - dio->pbase) &
88 	     HAMMER2_PBUFMASK) >> 10;
89 	if (i == 0 && bytes == HAMMER2_PBUFSIZE)
90 		return((uint64_t)-1);
91 	mask = ((uint64_t)1U << (bytes >> 10)) - 1;
92 	mask <<= i;
93 
94 	return mask;
95 }
96 #endif
97 
98 /*
99  * Returns the DIO corresponding to the data|radix, creating it if necessary.
100  *
101  * If createit is 0, NULL can be returned indicating that the DIO does not
102  * exist.  (btype) is ignored when createit is 0.
103  */
104 static __inline
105 hammer2_io_t *
106 hammer2_io_alloc(hammer2_dev_t *hmp, hammer2_key_t data_off, uint8_t btype,
107 		 int createit, int *isgoodp)
108 {
109 	hammer2_io_t *dio;
110 	hammer2_io_t *xio;
111 	hammer2_key_t lbase;
112 	hammer2_key_t pbase;
113 	hammer2_key_t pmask;
114 	uint64_t refs;
115 	int lsize;
116 	int psize;
117 
118 	psize = HAMMER2_PBUFSIZE;
119 	pmask = ~(hammer2_off_t)(psize - 1);
120 	lsize = 1 << (int)(data_off & HAMMER2_OFF_MASK_RADIX);
121 	lbase = data_off & ~HAMMER2_OFF_MASK_RADIX;
122 	pbase = lbase & pmask;
123 
124 	if (pbase == 0 || ((lbase + lsize - 1) & pmask) != pbase) {
125 		kprintf("Illegal: %016jx %016jx+%08x / %016jx\n",
126 			pbase, lbase, lsize, pmask);
127 	}
128 	KKASSERT(pbase != 0 && ((lbase + lsize - 1) & pmask) == pbase);
129 	*isgoodp = 0;
130 
131 	/*
132 	 * Access/Allocate the DIO, bump dio->refs to prevent destruction.
133 	 */
134 	hammer2_spin_sh(&hmp->io_spin);
135 	dio = RB_LOOKUP(hammer2_io_tree, &hmp->iotree, pbase);
136 	if (dio) {
137 		refs = atomic_fetchadd_64(&dio->refs, 1);
138 		if ((refs & HAMMER2_DIO_MASK) == 0) {
139 			atomic_add_int(&dio->hmp->iofree_count, -1);
140 		}
141 		if (refs & HAMMER2_DIO_GOOD)
142 			*isgoodp = 1;
143 		hammer2_spin_unsh(&hmp->io_spin);
144 	} else if (createit) {
145 		refs = 0;
146 		hammer2_spin_unsh(&hmp->io_spin);
147 		dio = kmalloc(sizeof(*dio), M_HAMMER2, M_INTWAIT | M_ZERO);
148 		dio->hmp = hmp;
149 		dio->pbase = pbase;
150 		dio->psize = psize;
151 		dio->btype = btype;
152 		dio->refs = refs + 1;
153 		dio->act = 5;
154 		hammer2_spin_ex(&hmp->io_spin);
155 		xio = RB_INSERT(hammer2_io_tree, &hmp->iotree, dio);
156 		if (xio == NULL) {
157 			atomic_add_int(&hammer2_dio_count, 1);
158 			hammer2_spin_unex(&hmp->io_spin);
159 		} else {
160 			refs = atomic_fetchadd_64(&xio->refs, 1);
161 			if ((refs & HAMMER2_DIO_MASK) == 0)
162 				atomic_add_int(&xio->hmp->iofree_count, -1);
163 			if (refs & HAMMER2_DIO_GOOD)
164 				*isgoodp = 1;
165 			hammer2_spin_unex(&hmp->io_spin);
166 			kfree(dio, M_HAMMER2);
167 			dio = xio;
168 		}
169 	} else {
170 		hammer2_spin_unsh(&hmp->io_spin);
171 		return NULL;
172 	}
173 	dio->ticks = ticks;
174 	if (dio->act < 10)
175 		++dio->act;
176 
177 	return dio;
178 }
179 
180 /*
181  * Acquire the requested dio.  If DIO_GOOD is not set we must instantiate
182  * a buffer.  If set the buffer already exists and is good to go.
183  */
184 hammer2_io_t *
185 hammer2_io_getblk(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize, int op)
186 {
187 	hammer2_io_t *dio;
188 	off_t peof;
189 	uint64_t orefs;
190 	uint64_t nrefs;
191 	int isgood;
192 	int error;
193 	int hce;
194 	int bflags;
195 
196 	bflags = ((btype == HAMMER2_BREF_TYPE_DATA) ? B_NOTMETA : 0);
197 	bflags |= B_KVABIO;
198 
199 	KKASSERT((1 << (int)(lbase & HAMMER2_OFF_MASK_RADIX)) == lsize);
200 
201 	if (op == HAMMER2_DOP_READQ) {
202 		dio = hammer2_io_alloc(hmp, lbase, btype, 0, &isgood);
203 		if (dio == NULL)
204 			return NULL;
205 		op = HAMMER2_DOP_READ;
206 	} else {
207 		dio = hammer2_io_alloc(hmp, lbase, btype, 1, &isgood);
208 	}
209 
210 	for (;;) {
211 		orefs = dio->refs;
212 		cpu_ccfence();
213 
214 		/*
215 		 * Buffer is already good, handle the op and return.
216 		 */
217 		if (orefs & HAMMER2_DIO_GOOD) {
218 			if (isgood == 0)
219 				cpu_mfence();
220 			bkvasync(dio->bp);
221 
222 			switch(op) {
223 			case HAMMER2_DOP_NEW:
224 				bzero(hammer2_io_data(dio, lbase), lsize);
225 				/* fall through */
226 			case HAMMER2_DOP_NEWNZ:
227 				atomic_set_long(&dio->refs, HAMMER2_DIO_DIRTY);
228 				break;
229 			case HAMMER2_DOP_READ:
230 			default:
231 				/* nothing to do */
232 				break;
233 			}
234 			return (dio);
235 		}
236 
237 		/*
238 		 * Try to own the DIO
239 		 */
240 		if (orefs & HAMMER2_DIO_INPROG) {
241 			nrefs = orefs | HAMMER2_DIO_WAITING;
242 			tsleep_interlock(dio, 0);
243 			if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
244 				tsleep(dio, PINTERLOCKED, "h2dio", hz);
245 			}
246 			/* retry */
247 		} else {
248 			nrefs = orefs | HAMMER2_DIO_INPROG;
249 			if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
250 				break;
251 			}
252 		}
253 	}
254 
255 	/*
256 	 * We break to here if GOOD is not set and we acquired INPROG for
257 	 * the I/O.
258 	 */
259 	KKASSERT(dio->bp == NULL);
260 	if (btype == HAMMER2_BREF_TYPE_DATA)
261 		hce = hammer2_cluster_data_read;
262 	else
263 		hce = hammer2_cluster_meta_read;
264 
265 	error = 0;
266 	if (dio->pbase == (lbase & ~HAMMER2_OFF_MASK_RADIX) &&
267 	    dio->psize == lsize) {
268 		switch(op) {
269 		case HAMMER2_DOP_NEW:
270 		case HAMMER2_DOP_NEWNZ:
271 			dio->bp = getblk(dio->hmp->devvp,
272 					 dio->pbase, dio->psize,
273 					 GETBLK_KVABIO, 0);
274 			if (op == HAMMER2_DOP_NEW) {
275 				bkvasync(dio->bp);
276 				bzero(dio->bp->b_data, dio->psize);
277 			}
278 			atomic_set_long(&dio->refs, HAMMER2_DIO_DIRTY);
279 			break;
280 		case HAMMER2_DOP_READ:
281 		default:
282 			if (hce > 0) {
283 				/*
284 				 * Synchronous cluster I/O for now.
285 				 */
286 				peof = (dio->pbase + HAMMER2_SEGMASK64) &
287 				       ~HAMMER2_SEGMASK64;
288 				dio->bp = NULL;
289 				error = cluster_readx(dio->hmp->devvp,
290 						     peof, dio->pbase,
291 						     dio->psize, bflags,
292 						     dio->psize,
293 						     HAMMER2_PBUFSIZE*hce,
294 						     &dio->bp);
295 			} else {
296 				dio->bp = NULL;
297 				error = breadnx(dio->hmp->devvp, dio->pbase,
298 						dio->psize, bflags,
299 					        NULL, NULL, 0, &dio->bp);
300 			}
301 		}
302 	} else {
303 		if (hce > 0) {
304 			/*
305 			 * Synchronous cluster I/O for now.
306 			 */
307 			peof = (dio->pbase + HAMMER2_SEGMASK64) &
308 			       ~HAMMER2_SEGMASK64;
309 			error = cluster_readx(dio->hmp->devvp,
310 					      peof, dio->pbase, dio->psize,
311 					      bflags,
312 					      dio->psize, HAMMER2_PBUFSIZE*hce,
313 					      &dio->bp);
314 		} else {
315 			error = breadnx(dio->hmp->devvp, dio->pbase,
316 				        dio->psize, bflags,
317 					NULL, NULL, 0, &dio->bp);
318 		}
319 		if (dio->bp) {
320 			/*
321 			 * Handle NEW flags
322 			 */
323 			switch(op) {
324 			case HAMMER2_DOP_NEW:
325 				bkvasync(dio->bp);
326 				bzero(hammer2_io_data(dio, lbase), lsize);
327 				/* fall through */
328 			case HAMMER2_DOP_NEWNZ:
329 				atomic_set_long(&dio->refs, HAMMER2_DIO_DIRTY);
330 				break;
331 			case HAMMER2_DOP_READ:
332 			default:
333 				break;
334 			}
335 
336 			/*
337 			 * Tell the kernel that the buffer cache is not
338 			 * meta-data based on the btype.  This allows
339 			 * swapcache to distinguish between data and
340 			 * meta-data.
341 			 */
342 			switch(btype) {
343 			case HAMMER2_BREF_TYPE_DATA:
344 				dio->bp->b_flags |= B_NOTMETA;
345 				break;
346 			default:
347 				break;
348 			}
349 		}
350 	}
351 
352 	if (dio->bp) {
353 		bkvasync(dio->bp);
354 		BUF_KERNPROC(dio->bp);
355 		dio->bp->b_flags &= ~B_AGE;
356 	}
357 	dio->error = error;
358 
359 	/*
360 	 * Clear INPROG and WAITING, set GOOD wake up anyone waiting.
361 	 */
362 	for (;;) {
363 		orefs = dio->refs;
364 		cpu_ccfence();
365 		nrefs = orefs & ~(HAMMER2_DIO_INPROG | HAMMER2_DIO_WAITING);
366 		if (error == 0)
367 			nrefs |= HAMMER2_DIO_GOOD;
368 		if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
369 			if (orefs & HAMMER2_DIO_WAITING)
370 				wakeup(dio);
371 			break;
372 		}
373 		cpu_pause();
374 	}
375 
376 	/* XXX error handling */
377 
378 	return dio;
379 }
380 
381 /*
382  * Release our ref on *diop.
383  *
384  * On the 1->0 transition we clear DIO_GOOD, set DIO_INPROG, and dispose
385  * of dio->bp.  Then we clean up DIO_INPROG and DIO_WAITING.
386  */
387 void
388 hammer2_io_putblk(hammer2_io_t **diop)
389 {
390 	hammer2_dev_t *hmp;
391 	hammer2_io_t *dio;
392 	struct buf *bp;
393 	off_t pbase;
394 	int psize;
395 	int dio_limit;
396 	uint64_t orefs;
397 	uint64_t nrefs;
398 
399 	dio = *diop;
400 	*diop = NULL;
401 	hmp = dio->hmp;
402 
403 	KKASSERT((dio->refs & HAMMER2_DIO_MASK) != 0);
404 
405 	/*
406 	 * Drop refs.
407 	 *
408 	 * On the 1->0 transition clear GOOD and set INPROG, and break.
409 	 * On any other transition we can return early.
410 	 */
411 	for (;;) {
412 		orefs = dio->refs;
413 		cpu_ccfence();
414 
415 		if ((orefs & HAMMER2_DIO_MASK) == 1 &&
416 		    (orefs & HAMMER2_DIO_INPROG) == 0) {
417 			/*
418 			 * Lastdrop case, INPROG can be set.  GOOD must be
419 			 * cleared to prevent the getblk shortcut.
420 			 */
421 			nrefs = orefs - 1;
422 			nrefs &= ~(HAMMER2_DIO_GOOD | HAMMER2_DIO_DIRTY);
423 			nrefs |= HAMMER2_DIO_INPROG;
424 			if (atomic_cmpset_64(&dio->refs, orefs, nrefs))
425 				break;
426 		} else if ((orefs & HAMMER2_DIO_MASK) == 1) {
427 			/*
428 			 * Lastdrop case, INPROG already set.  We must
429 			 * wait for INPROG to clear.
430 			 */
431 			nrefs = orefs | HAMMER2_DIO_WAITING;
432 			tsleep_interlock(dio, 0);
433 			if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
434 				tsleep(dio, PINTERLOCKED, "h2dio", hz);
435 			}
436 			/* retry */
437 		} else {
438 			/*
439 			 * Normal drop case.
440 			 */
441 			nrefs = orefs - 1;
442 			if (atomic_cmpset_64(&dio->refs, orefs, nrefs))
443 				return;
444 			/* retry */
445 		}
446 		cpu_pause();
447 		/* retry */
448 	}
449 
450 	/*
451 	 * Lastdrop (1->0 transition).  INPROG has been set, GOOD and DIRTY
452 	 * have been cleared.  iofree_count has not yet been incremented,
453 	 * note that another accessor race will decrement iofree_count so
454 	 * we have to increment it regardless.
455 	 *
456 	 * We can now dispose of the buffer, and should do it before calling
457 	 * io_complete() in case there's a race against a new reference
458 	 * which causes io_complete() to chain and instantiate the bp again.
459 	 */
460 	pbase = dio->pbase;
461 	psize = dio->psize;
462 	bp = dio->bp;
463 	dio->bp = NULL;
464 
465 	if ((orefs & HAMMER2_DIO_GOOD) && bp) {
466 		/*
467 		 * Non-errored disposal of bp
468 		 */
469 		if (orefs & HAMMER2_DIO_DIRTY) {
470 			dio_write_stats_update(dio, bp);
471 
472 			/*
473 			 * Allows dirty buffers to accumulate and
474 			 * possibly be canceled (e.g. by a 'rm'),
475 			 * will burst-write later.
476 			 *
477 			 * We normally do not allow the kernel to
478 			 * cluster dirty buffers because H2 already
479 			 * uses a large block size.
480 			 *
481 			 * NOTE: Do not use cluster_write() here.  The
482 			 *	 problem is that due to the way chains
483 			 *	 are locked, buffers are cycled in and out
484 			 *	 quite often so the disposal here is not
485 			 *	 necessarily the final disposal.  Avoid
486 			 *	 excessive rewriting of the same blocks
487 			 *	 by using bdwrite().
488 			 */
489 #if 0
490 			off_t peof;
491 			int hce;
492 
493 			if ((hce = hammer2_cluster_write) > 0) {
494 				/*
495 				 * Allows write-behind to keep the buffer
496 				 * cache sane.
497 				 */
498 				peof = (pbase + HAMMER2_SEGMASK64) &
499 				       ~HAMMER2_SEGMASK64;
500 				bp->b_flags |= B_CLUSTEROK;
501 				cluster_write(bp, peof, psize, hce);
502 			} else
503 #endif
504 			if (hammer2_cluster_write)
505 				bp->b_flags |= B_CLUSTEROK;
506 			else
507 				bp->b_flags &= ~B_CLUSTEROK;
508 			bdwrite(bp);
509 		} else if (bp->b_flags & (B_ERROR | B_INVAL | B_RELBUF)) {
510 			brelse(bp);
511 		} else {
512 			bqrelse(bp);
513 		}
514 	} else if (bp) {
515 		/*
516 		 * Errored disposal of bp
517 		 */
518 		brelse(bp);
519 	}
520 
521 	/*
522 	 * Update iofree_count before disposing of the dio
523 	 */
524 	hmp = dio->hmp;
525 	atomic_add_int(&hmp->iofree_count, 1);
526 
527 	/*
528 	 * Clear INPROG, GOOD, and WAITING (GOOD should already be clear).
529 	 */
530 	for (;;) {
531 		orefs = dio->refs;
532 		cpu_ccfence();
533 		nrefs = orefs & ~(HAMMER2_DIO_INPROG | HAMMER2_DIO_GOOD |
534 				  HAMMER2_DIO_WAITING);
535 		if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
536 			if (orefs & HAMMER2_DIO_WAITING)
537 				wakeup(dio);
538 			break;
539 		}
540 		cpu_pause();
541 	}
542 
543 	/*
544 	 * We cache free buffers so re-use cases can use a shared lock, but
545 	 * if too many build up we have to clean them out.
546 	 */
547 	dio_limit = hammer2_dio_limit;
548 	if (dio_limit < 256)
549 		dio_limit = 256;
550 	if (dio_limit > 1024*1024)
551 		dio_limit = 1024*1024;
552 	if (hmp->iofree_count > dio_limit) {
553 		struct hammer2_cleanupcb_info info;
554 
555 		RB_INIT(&info.tmptree);
556 		hammer2_spin_ex(&hmp->io_spin);
557 		if (hmp->iofree_count > dio_limit) {
558 			info.count = hmp->iofree_count / 5;
559 			RB_SCAN(hammer2_io_tree, &hmp->iotree, NULL,
560 				hammer2_io_cleanup_callback, &info);
561 		}
562 		hammer2_spin_unex(&hmp->io_spin);
563 		hammer2_io_cleanup(hmp, &info.tmptree);
564 	}
565 }
566 
567 /*
568  * Cleanup any dio's with (INPROG | refs) == 0.
569  *
570  * Called to clean up cached DIOs on umount after all activity has been
571  * flushed.
572  */
573 static
574 int
575 hammer2_io_cleanup_callback(hammer2_io_t *dio, void *arg)
576 {
577 	struct hammer2_cleanupcb_info *info = arg;
578 	hammer2_io_t *xio;
579 
580 	if ((dio->refs & (HAMMER2_DIO_MASK | HAMMER2_DIO_INPROG)) == 0) {
581 		if (dio->act > 0) {
582 			int act;
583 
584 			act = dio->act - (ticks - dio->ticks) / hz - 1;
585 			if (act > 0) {
586 				dio->act = act;
587 				return 0;
588 			}
589 			dio->act = 0;
590 		}
591 		KKASSERT(dio->bp == NULL);
592 		if (info->count > 0) {
593 			RB_REMOVE(hammer2_io_tree, &dio->hmp->iotree, dio);
594 			xio = RB_INSERT(hammer2_io_tree, &info->tmptree, dio);
595 			KKASSERT(xio == NULL);
596 			--info->count;
597 		}
598 	}
599 	return 0;
600 }
601 
602 void
603 hammer2_io_cleanup(hammer2_dev_t *hmp, struct hammer2_io_tree *tree)
604 {
605 	hammer2_io_t *dio;
606 
607 	while ((dio = RB_ROOT(tree)) != NULL) {
608 		RB_REMOVE(hammer2_io_tree, tree, dio);
609 		KKASSERT(dio->bp == NULL &&
610 		    (dio->refs & (HAMMER2_DIO_MASK | HAMMER2_DIO_INPROG)) == 0);
611 		if (dio->refs & HAMMER2_DIO_DIRTY) {
612 			kprintf("hammer2_io_cleanup: Dirty buffer "
613 				"%016jx/%d (bp=%p)\n",
614 				dio->pbase, dio->psize, dio->bp);
615 		}
616 		kfree(dio, M_HAMMER2);
617 		atomic_add_int(&hammer2_dio_count, -1);
618 		atomic_add_int(&hmp->iofree_count, -1);
619 	}
620 }
621 
622 /*
623  * Returns a pointer to the requested data.
624  */
625 char *
626 hammer2_io_data(hammer2_io_t *dio, off_t lbase)
627 {
628 	struct buf *bp;
629 	int off;
630 
631 	bp = dio->bp;
632 	KKASSERT(bp != NULL);
633 	bkvasync(bp);
634 	off = (lbase & ~HAMMER2_OFF_MASK_RADIX) - bp->b_loffset;
635 	KKASSERT(off >= 0 && off < bp->b_bufsize);
636 	return(bp->b_data + off);
637 }
638 
639 int
640 hammer2_io_new(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize,
641 	       hammer2_io_t **diop)
642 {
643 	*diop = hammer2_io_getblk(hmp, btype, lbase, lsize, HAMMER2_DOP_NEW);
644 	return ((*diop)->error);
645 }
646 
647 int
648 hammer2_io_newnz(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize,
649 		 hammer2_io_t **diop)
650 {
651 	*diop = hammer2_io_getblk(hmp, btype, lbase, lsize, HAMMER2_DOP_NEWNZ);
652 	return ((*diop)->error);
653 }
654 
655 int
656 hammer2_io_bread(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize,
657 		hammer2_io_t **diop)
658 {
659 	*diop = hammer2_io_getblk(hmp, btype, lbase, lsize, HAMMER2_DOP_READ);
660 	return ((*diop)->error);
661 }
662 
663 hammer2_io_t *
664 hammer2_io_getquick(hammer2_dev_t *hmp, off_t lbase, int lsize)
665 {
666 	hammer2_io_t *dio;
667 
668 	dio = hammer2_io_getblk(hmp, 0, lbase, lsize, HAMMER2_DOP_READQ);
669 	return dio;
670 }
671 
672 void
673 hammer2_io_bawrite(hammer2_io_t **diop)
674 {
675 	atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY);
676 	hammer2_io_putblk(diop);
677 }
678 
679 void
680 hammer2_io_bdwrite(hammer2_io_t **diop)
681 {
682 	atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY);
683 	hammer2_io_putblk(diop);
684 }
685 
686 int
687 hammer2_io_bwrite(hammer2_io_t **diop)
688 {
689 	atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY);
690 	hammer2_io_putblk(diop);
691 	return (0);	/* XXX */
692 }
693 
694 void
695 hammer2_io_setdirty(hammer2_io_t *dio)
696 {
697 	atomic_set_64(&dio->refs, HAMMER2_DIO_DIRTY);
698 }
699 
700 /*
701  * This routine is called when a MODIFIED chain is being DESTROYED,
702  * in an attempt to allow the related buffer cache buffer to be
703  * invalidated and discarded instead of flushing it to disk.
704  *
705  * At the moment this case is only really useful for file meta-data.
706  * File data is already handled via the logical buffer cache associated
707  * with the vnode, and will be discarded if it was never flushed to disk.
708  * File meta-data may include inodes, directory entries, and indirect blocks.
709  *
710  * XXX
711  * However, our DIO buffers are PBUFSIZE'd (64KB), and the area being
712  * invalidated might be smaller.  Most of the meta-data structures above
713  * are in the 'smaller' category.  For now, don't try to invalidate the
714  * data areas.
715  */
716 void
717 hammer2_io_inval(hammer2_io_t *dio, hammer2_off_t data_off, u_int bytes)
718 {
719 	/* NOP */
720 }
721 
722 void
723 hammer2_io_brelse(hammer2_io_t **diop)
724 {
725 	hammer2_io_putblk(diop);
726 }
727 
728 void
729 hammer2_io_bqrelse(hammer2_io_t **diop)
730 {
731 	hammer2_io_putblk(diop);
732 }
733 
734 /*
735  * Set dedup validation bits in a DIO.  We do not need the buffer cache
736  * buffer for this.  This must be done concurrent with setting bits in
737  * the freemap so as to interlock with bulkfree's clearing of those bits.
738  */
739 void
740 hammer2_io_dedup_set(hammer2_dev_t *hmp, hammer2_blockref_t *bref)
741 {
742 	hammer2_io_t *dio;
743 	uint64_t mask;
744 	int lsize;
745 	int isgood;
746 
747 	dio = hammer2_io_alloc(hmp, bref->data_off, bref->type, 1, &isgood);
748 	lsize = 1 << (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX);
749 	mask = hammer2_dedup_mask(dio, bref->data_off, lsize);
750 	atomic_clear_64(&dio->dedup_valid, mask);
751 	atomic_set_64(&dio->dedup_alloc, mask);
752 	hammer2_io_putblk(&dio);
753 }
754 
755 /*
756  * Clear dedup validation bits in a DIO.  This is typically done when
757  * a modified chain is destroyed or by the bulkfree code.  No buffer
758  * is needed for this operation.  If the DIO no longer exists it is
759  * equivalent to the bits not being set.
760  */
761 void
762 hammer2_io_dedup_delete(hammer2_dev_t *hmp, uint8_t btype,
763 			hammer2_off_t data_off, u_int bytes)
764 {
765 	hammer2_io_t *dio;
766 	uint64_t mask;
767 	int isgood;
768 
769 	if ((data_off & ~HAMMER2_OFF_MASK_RADIX) == 0)
770 		return;
771 	if (btype != HAMMER2_BREF_TYPE_DATA)
772 		return;
773 	dio = hammer2_io_alloc(hmp, data_off, btype, 0, &isgood);
774 	if (dio) {
775 		if (data_off < dio->pbase ||
776 		    (data_off & ~HAMMER2_OFF_MASK_RADIX) + bytes >
777 		    dio->pbase + dio->psize) {
778 			panic("hammer2_dedup_delete: DATAOFF BAD "
779 			      "%016jx/%d %016jx\n",
780 			      data_off, bytes, dio->pbase);
781 		}
782 		mask = hammer2_dedup_mask(dio, data_off, bytes);
783 		atomic_clear_64(&dio->dedup_alloc, mask);
784 		atomic_clear_64(&dio->dedup_valid, mask);
785 		hammer2_io_putblk(&dio);
786 	}
787 }
788 
789 /*
790  * Assert that dedup allocation bits in a DIO are not set.  This operation
791  * does not require a buffer.  The DIO does not need to exist.
792  */
793 void
794 hammer2_io_dedup_assert(hammer2_dev_t *hmp, hammer2_off_t data_off, u_int bytes)
795 {
796 	hammer2_io_t *dio;
797 	int isgood;
798 
799 	dio = hammer2_io_alloc(hmp, data_off, HAMMER2_BREF_TYPE_DATA,
800 			       0, &isgood);
801 	if (dio) {
802 		KASSERT((dio->dedup_alloc &
803 			  hammer2_dedup_mask(dio, data_off, bytes)) == 0,
804 			("hammer2_dedup_assert: %016jx/%d %016jx/%016jx",
805 			data_off,
806 			bytes,
807 			hammer2_dedup_mask(dio, data_off, bytes),
808 			dio->dedup_alloc));
809 		hammer2_io_putblk(&dio);
810 	}
811 }
812 
813 static
814 void
815 dio_write_stats_update(hammer2_io_t *dio, struct buf *bp)
816 {
817 	long *counterp;
818 
819 	if (bp->b_flags & B_DELWRI)
820 		return;
821 
822 	switch(dio->btype) {
823 	case 0:
824 		return;
825 	case HAMMER2_BREF_TYPE_DATA:
826 		counterp = &hammer2_iod_file_write;
827 		break;
828 	case HAMMER2_BREF_TYPE_DIRENT:
829 	case HAMMER2_BREF_TYPE_INODE:
830 		counterp = &hammer2_iod_meta_write;
831 		break;
832 	case HAMMER2_BREF_TYPE_INDIRECT:
833 		counterp = &hammer2_iod_indr_write;
834 		break;
835 	case HAMMER2_BREF_TYPE_FREEMAP_NODE:
836 	case HAMMER2_BREF_TYPE_FREEMAP_LEAF:
837 		counterp = &hammer2_iod_fmap_write;
838 		break;
839 	default:
840 		counterp = &hammer2_iod_volu_write;
841 		break;
842 	}
843 	*counterp += dio->psize;
844 }
845 
846 void
847 hammer2_io_bkvasync(hammer2_io_t *dio)
848 {
849 	KKASSERT(dio->bp != NULL);
850 	bkvasync(dio->bp);
851 }
852 
853 /*
854  * Ref a dio that is already owned
855  */
856 void
857 hammer2_io_ref(hammer2_io_t *dio)
858 {
859 	atomic_add_64(&dio->refs, 1);
860 }
861