xref: /dragonfly/sys/vfs/hammer2/hammer2_io.c (revision c70d4562)
1 /*
2  * Copyright (c) 2013-2018 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@dragonflybsd.org>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 #include "hammer2.h"
36 
37 #define HAMMER2_DOP_READ	1
38 #define HAMMER2_DOP_NEW		2
39 #define HAMMER2_DOP_NEWNZ	3
40 #define HAMMER2_DOP_READQ	4
41 
42 /*
43  * Implements an abstraction layer for synchronous and asynchronous
44  * buffered device I/O.  Can be used as an OS-abstraction but the main
45  * purpose is to allow larger buffers to be used against hammer2_chain's
46  * using smaller allocations, without causing deadlocks.
47  *
48  * The DIOs also record temporary state with limited persistence.  This
49  * feature is used to keep track of dedupable blocks.
50  */
51 static int hammer2_io_cleanup_callback(hammer2_io_t *dio, void *arg);
52 static void dio_write_stats_update(hammer2_io_t *dio, struct buf *bp);
53 
54 static int
55 hammer2_io_cmp(hammer2_io_t *io1, hammer2_io_t *io2)
56 {
57 	if (io1->pbase < io2->pbase)
58 		return(-1);
59 	if (io1->pbase > io2->pbase)
60 		return(1);
61 	return(0);
62 }
63 
64 RB_PROTOTYPE2(hammer2_io_tree, hammer2_io, rbnode, hammer2_io_cmp, off_t);
65 RB_GENERATE2(hammer2_io_tree, hammer2_io, rbnode, hammer2_io_cmp,
66 		off_t, pbase);
67 
68 struct hammer2_cleanupcb_info {
69 	struct hammer2_io_tree tmptree;
70 	int	count;
71 };
72 
73 #if 0
74 static __inline
75 uint64_t
76 hammer2_io_mask(hammer2_io_t *dio, hammer2_off_t off, u_int bytes)
77 {
78 	uint64_t mask;
79 	int i;
80 
81 	if (bytes < 1024)	/* smaller chunks not supported */
82 		return 0;
83 
84 	/*
85 	 * Calculate crc check mask for larger chunks
86 	 */
87 	i = (((off & ~HAMMER2_OFF_MASK_RADIX) - dio->pbase) &
88 	     HAMMER2_PBUFMASK) >> 10;
89 	if (i == 0 && bytes == HAMMER2_PBUFSIZE)
90 		return((uint64_t)-1);
91 	mask = ((uint64_t)1U << (bytes >> 10)) - 1;
92 	mask <<= i;
93 
94 	return mask;
95 }
96 #endif
97 
98 /*
99  * Returns the DIO corresponding to the data|radix, creating it if necessary.
100  *
101  * If createit is 0, NULL can be returned indicating that the DIO does not
102  * exist.  (btype) is ignored when createit is 0.
103  */
104 static __inline
105 hammer2_io_t *
106 hammer2_io_alloc(hammer2_dev_t *hmp, hammer2_key_t data_off, uint8_t btype,
107 		 int createit, int *isgoodp)
108 {
109 	hammer2_io_t *dio;
110 	hammer2_io_t *xio;
111 	hammer2_key_t lbase;
112 	hammer2_key_t pbase;
113 	hammer2_key_t pmask;
114 	uint64_t refs;
115 	int lsize;
116 	int psize;
117 
118 	psize = HAMMER2_PBUFSIZE;
119 	pmask = ~(hammer2_off_t)(psize - 1);
120 	lsize = 1 << (int)(data_off & HAMMER2_OFF_MASK_RADIX);
121 	lbase = data_off & ~HAMMER2_OFF_MASK_RADIX;
122 	pbase = lbase & pmask;
123 
124 	if (pbase == 0 || ((lbase + lsize - 1) & pmask) != pbase) {
125 		kprintf("Illegal: %016jx %016jx+%08x / %016jx\n",
126 			pbase, lbase, lsize, pmask);
127 	}
128 	KKASSERT(pbase != 0 && ((lbase + lsize - 1) & pmask) == pbase);
129 	*isgoodp = 0;
130 
131 	/*
132 	 * Access/Allocate the DIO, bump dio->refs to prevent destruction.
133 	 */
134 	hammer2_spin_sh(&hmp->io_spin);
135 	dio = RB_LOOKUP(hammer2_io_tree, &hmp->iotree, pbase);
136 	if (dio) {
137 		refs = atomic_fetchadd_64(&dio->refs, 1);
138 		if ((refs & HAMMER2_DIO_MASK) == 0) {
139 			atomic_add_int(&dio->hmp->iofree_count, -1);
140 		}
141 		if (refs & HAMMER2_DIO_GOOD)
142 			*isgoodp = 1;
143 		hammer2_spin_unsh(&hmp->io_spin);
144 	} else if (createit) {
145 		refs = 0;
146 		hammer2_spin_unsh(&hmp->io_spin);
147 		dio = kmalloc(sizeof(*dio), M_HAMMER2, M_INTWAIT | M_ZERO);
148 		dio->hmp = hmp;
149 		dio->pbase = pbase;
150 		dio->psize = psize;
151 		dio->btype = btype;
152 		dio->refs = refs + 1;
153 		dio->act = 5;
154 		hammer2_spin_ex(&hmp->io_spin);
155 		xio = RB_INSERT(hammer2_io_tree, &hmp->iotree, dio);
156 		if (xio == NULL) {
157 			atomic_add_int(&hammer2_dio_count, 1);
158 			hammer2_spin_unex(&hmp->io_spin);
159 		} else {
160 			refs = atomic_fetchadd_64(&xio->refs, 1);
161 			if ((refs & HAMMER2_DIO_MASK) == 0)
162 				atomic_add_int(&xio->hmp->iofree_count, -1);
163 			if (refs & HAMMER2_DIO_GOOD)
164 				*isgoodp = 1;
165 			hammer2_spin_unex(&hmp->io_spin);
166 			kfree(dio, M_HAMMER2);
167 			dio = xio;
168 		}
169 	} else {
170 		hammer2_spin_unsh(&hmp->io_spin);
171 		return NULL;
172 	}
173 	dio->ticks = ticks;
174 	if (dio->act < 10)
175 		++dio->act;
176 
177 	return dio;
178 }
179 
180 /*
181  * Acquire the requested dio.  If DIO_GOOD is not set we must instantiate
182  * a buffer.  If set the buffer already exists and is good to go.
183  */
184 hammer2_io_t *
185 hammer2_io_getblk(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize, int op)
186 {
187 	hammer2_io_t *dio;
188 	off_t peof;
189 	uint64_t orefs;
190 	uint64_t nrefs;
191 	int isgood;
192 	int error;
193 	int hce;
194 	int bflags;
195 
196 	bflags = ((btype == HAMMER2_BREF_TYPE_DATA) ? B_NOTMETA : 0);
197 	bflags |= B_KVABIO;
198 
199 	KKASSERT((1 << (int)(lbase & HAMMER2_OFF_MASK_RADIX)) == lsize);
200 
201 	if (op == HAMMER2_DOP_READQ) {
202 		dio = hammer2_io_alloc(hmp, lbase, btype, 0, &isgood);
203 		if (dio == NULL)
204 			return NULL;
205 		op = HAMMER2_DOP_READ;
206 	} else {
207 		dio = hammer2_io_alloc(hmp, lbase, btype, 1, &isgood);
208 	}
209 
210 	for (;;) {
211 		orefs = dio->refs;
212 		cpu_ccfence();
213 
214 		/*
215 		 * Buffer is already good, handle the op and return.
216 		 */
217 		if (orefs & HAMMER2_DIO_GOOD) {
218 			if (isgood == 0)
219 				cpu_mfence();
220 			bkvasync(dio->bp);
221 
222 			switch(op) {
223 			case HAMMER2_DOP_NEW:
224 				bzero(hammer2_io_data(dio, lbase), lsize);
225 				/* fall through */
226 			case HAMMER2_DOP_NEWNZ:
227 				atomic_set_long(&dio->refs, HAMMER2_DIO_DIRTY);
228 				break;
229 			case HAMMER2_DOP_READ:
230 			default:
231 				/* nothing to do */
232 				break;
233 			}
234 			return (dio);
235 		}
236 
237 		/*
238 		 * Try to own the DIO
239 		 */
240 		if (orefs & HAMMER2_DIO_INPROG) {
241 			nrefs = orefs | HAMMER2_DIO_WAITING;
242 			tsleep_interlock(dio, 0);
243 			if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
244 				tsleep(dio, PINTERLOCKED, "h2dio", hz);
245 			}
246 			/* retry */
247 		} else {
248 			nrefs = orefs | HAMMER2_DIO_INPROG;
249 			if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
250 				break;
251 			}
252 		}
253 	}
254 
255 	/*
256 	 * We break to here if GOOD is not set and we acquired INPROG for
257 	 * the I/O.
258 	 */
259 	KKASSERT(dio->bp == NULL);
260 	if (btype == HAMMER2_BREF_TYPE_DATA)
261 		hce = hammer2_cluster_data_read;
262 	else
263 		hce = hammer2_cluster_meta_read;
264 
265 	error = 0;
266 	if (dio->pbase == (lbase & ~HAMMER2_OFF_MASK_RADIX) &&
267 	    dio->psize == lsize) {
268 		switch(op) {
269 		case HAMMER2_DOP_NEW:
270 		case HAMMER2_DOP_NEWNZ:
271 			dio->bp = getblk(dio->hmp->devvp,
272 					 dio->pbase, dio->psize,
273 					 GETBLK_KVABIO, 0);
274 			if (op == HAMMER2_DOP_NEW) {
275 				bkvasync(dio->bp);
276 				bzero(dio->bp->b_data, dio->psize);
277 			}
278 			atomic_set_long(&dio->refs, HAMMER2_DIO_DIRTY);
279 			break;
280 		case HAMMER2_DOP_READ:
281 		default:
282 			if (hce > 0) {
283 				/*
284 				 * Synchronous cluster I/O for now.
285 				 */
286 				peof = (dio->pbase + HAMMER2_SEGMASK64) &
287 				       ~HAMMER2_SEGMASK64;
288 				dio->bp = NULL;
289 				error = cluster_readx(dio->hmp->devvp,
290 						     peof, dio->pbase,
291 						     dio->psize, bflags,
292 						     dio->psize,
293 						     HAMMER2_PBUFSIZE*hce,
294 						     &dio->bp);
295 			} else {
296 				dio->bp = NULL;
297 				error = breadnx(dio->hmp->devvp, dio->pbase,
298 						dio->psize, bflags,
299 					        NULL, NULL, 0, &dio->bp);
300 			}
301 		}
302 	} else {
303 		if (hce > 0) {
304 			/*
305 			 * Synchronous cluster I/O for now.
306 			 */
307 			peof = (dio->pbase + HAMMER2_SEGMASK64) &
308 			       ~HAMMER2_SEGMASK64;
309 			error = cluster_readx(dio->hmp->devvp,
310 					      peof, dio->pbase, dio->psize,
311 					      bflags,
312 					      dio->psize, HAMMER2_PBUFSIZE*hce,
313 					      &dio->bp);
314 		} else {
315 			error = breadnx(dio->hmp->devvp, dio->pbase,
316 				        dio->psize, bflags,
317 					NULL, NULL, 0, &dio->bp);
318 		}
319 		if (dio->bp) {
320 			/*
321 			 * Handle NEW flags
322 			 */
323 			switch(op) {
324 			case HAMMER2_DOP_NEW:
325 				bkvasync(dio->bp);
326 				bzero(hammer2_io_data(dio, lbase), lsize);
327 				/* fall through */
328 			case HAMMER2_DOP_NEWNZ:
329 				atomic_set_long(&dio->refs, HAMMER2_DIO_DIRTY);
330 				break;
331 			case HAMMER2_DOP_READ:
332 			default:
333 				break;
334 			}
335 
336 			/*
337 			 * Tell the kernel that the buffer cache is not
338 			 * meta-data based on the btype.  This allows
339 			 * swapcache to distinguish between data and
340 			 * meta-data.
341 			 */
342 			switch(btype) {
343 			case HAMMER2_BREF_TYPE_DATA:
344 				dio->bp->b_flags |= B_NOTMETA;
345 				break;
346 			default:
347 				break;
348 			}
349 		}
350 	}
351 
352 	if (dio->bp) {
353 		bkvasync(dio->bp);
354 		BUF_KERNPROC(dio->bp);
355 		dio->bp->b_flags &= ~B_AGE;
356 	}
357 	dio->error = error;
358 
359 	/*
360 	 * Clear INPROG and WAITING, set GOOD wake up anyone waiting.
361 	 */
362 	for (;;) {
363 		orefs = dio->refs;
364 		cpu_ccfence();
365 		nrefs = orefs & ~(HAMMER2_DIO_INPROG | HAMMER2_DIO_WAITING);
366 		if (error == 0)
367 			nrefs |= HAMMER2_DIO_GOOD;
368 		if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
369 			if (orefs & HAMMER2_DIO_WAITING)
370 				wakeup(dio);
371 			break;
372 		}
373 		cpu_pause();
374 	}
375 
376 	/* XXX error handling */
377 
378 	return dio;
379 }
380 
381 /*
382  * Release our ref on *diop.
383  *
384  * On the 1->0 transition we clear DIO_GOOD, set DIO_INPROG, and dispose
385  * of dio->bp.  Then we clean up DIO_INPROG and DIO_WAITING.
386  */
387 void
388 hammer2_io_putblk(hammer2_io_t **diop)
389 {
390 	hammer2_dev_t *hmp;
391 	hammer2_io_t *dio;
392 	struct buf *bp;
393 	off_t pbase;
394 	int psize;
395 	int dio_limit;
396 	uint64_t orefs;
397 	uint64_t nrefs;
398 
399 	dio = *diop;
400 	*diop = NULL;
401 	hmp = dio->hmp;
402 
403 	KKASSERT((dio->refs & HAMMER2_DIO_MASK) != 0);
404 
405 	/*
406 	 * Drop refs.
407 	 *
408 	 * On the 1->0 transition clear GOOD and set INPROG, and break.
409 	 * On any other transition we can return early.
410 	 */
411 	for (;;) {
412 		orefs = dio->refs;
413 		cpu_ccfence();
414 
415 		if ((orefs & HAMMER2_DIO_MASK) == 1 &&
416 		    (orefs & HAMMER2_DIO_INPROG) == 0) {
417 			/*
418 			 * Lastdrop case, INPROG can be set.
419 			 */
420 			nrefs = orefs - 1;
421 			nrefs &= ~(HAMMER2_DIO_GOOD | HAMMER2_DIO_DIRTY);
422 			nrefs |= HAMMER2_DIO_INPROG;
423 			if (atomic_cmpset_64(&dio->refs, orefs, nrefs))
424 				break;
425 		} else if ((orefs & HAMMER2_DIO_MASK) == 1) {
426 			/*
427 			 * Lastdrop case, INPROG already set.  We must
428 			 * wait for INPROG to clear.
429 			 */
430 			nrefs = orefs | HAMMER2_DIO_WAITING;
431 			tsleep_interlock(dio, 0);
432 			if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
433 				tsleep(dio, PINTERLOCKED, "h2dio", hz);
434 			}
435 			/* retry */
436 		} else {
437 			/*
438 			 * Normal drop case.
439 			 */
440 			nrefs = orefs - 1;
441 			if (atomic_cmpset_64(&dio->refs, orefs, nrefs))
442 				return;
443 			/* retry */
444 		}
445 		cpu_pause();
446 		/* retry */
447 	}
448 
449 	/*
450 	 * Lastdrop (1->0 transition).  INPROG has been set, GOOD and DIRTY
451 	 * have been cleared.  iofree_count has not yet been incremented,
452 	 * note that another accessor race will decrement iofree_count so
453 	 * we have to increment it regardless.
454 	 *
455 	 * We can now dispose of the buffer, and should do it before calling
456 	 * io_complete() in case there's a race against a new reference
457 	 * which causes io_complete() to chain and instantiate the bp again.
458 	 */
459 	pbase = dio->pbase;
460 	psize = dio->psize;
461 	bp = dio->bp;
462 	dio->bp = NULL;
463 
464 	if ((orefs & HAMMER2_DIO_GOOD) && bp) {
465 		/*
466 		 * Non-errored disposal of bp
467 		 */
468 		if (orefs & HAMMER2_DIO_DIRTY) {
469 			dio_write_stats_update(dio, bp);
470 
471 			/*
472 			 * Allows dirty buffers to accumulate and
473 			 * possibly be canceled (e.g. by a 'rm'),
474 			 * will burst-write later.
475 			 *
476 			 * We normally do not allow the kernel to
477 			 * cluster dirty buffers because H2 already
478 			 * uses a large block size.
479 			 *
480 			 * NOTE: Do not use cluster_write() here.  The
481 			 *	 problem is that due to the way chains
482 			 *	 are locked, buffers are cycled in and out
483 			 *	 quite often so the disposal here is not
484 			 *	 necessarily the final disposal.  Avoid
485 			 *	 excessive rewriting of the same blocks
486 			 *	 by using bdwrite().
487 			 */
488 #if 0
489 			off_t peof;
490 			int hce;
491 
492 			if ((hce = hammer2_cluster_write) > 0) {
493 				/*
494 				 * Allows write-behind to keep the buffer
495 				 * cache sane.
496 				 */
497 				peof = (pbase + HAMMER2_SEGMASK64) &
498 				       ~HAMMER2_SEGMASK64;
499 				bp->b_flags |= B_CLUSTEROK;
500 				cluster_write(bp, peof, psize, hce);
501 			} else
502 #endif
503 			if (hammer2_cluster_write)
504 				bp->b_flags |= B_CLUSTEROK;
505 			else
506 				bp->b_flags &= ~B_CLUSTEROK;
507 			bdwrite(bp);
508 		} else if (bp->b_flags & (B_ERROR | B_INVAL | B_RELBUF)) {
509 			brelse(bp);
510 		} else {
511 			bqrelse(bp);
512 		}
513 	} else if (bp) {
514 		/*
515 		 * Errored disposal of bp
516 		 */
517 		brelse(bp);
518 	}
519 
520 	/*
521 	 * Update iofree_count before disposing of the dio
522 	 */
523 	hmp = dio->hmp;
524 	atomic_add_int(&hmp->iofree_count, 1);
525 
526 	/*
527 	 * Clear INPROG, GOOD, and WAITING
528 	 */
529 	for (;;) {
530 		orefs = dio->refs;
531 		cpu_ccfence();
532 		nrefs = orefs & ~(HAMMER2_DIO_INPROG | HAMMER2_DIO_GOOD |
533 				  HAMMER2_DIO_WAITING);
534 		if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
535 			if (orefs & HAMMER2_DIO_WAITING)
536 				wakeup(dio);
537 			break;
538 		}
539 		cpu_pause();
540 	}
541 
542 	/*
543 	 * We cache free buffers so re-use cases can use a shared lock, but
544 	 * if too many build up we have to clean them out.
545 	 */
546 	dio_limit = hammer2_dio_limit;
547 	if (dio_limit < 256)
548 		dio_limit = 256;
549 	if (dio_limit > 1024*1024)
550 		dio_limit = 1024*1024;
551 	if (hmp->iofree_count > dio_limit) {
552 		struct hammer2_cleanupcb_info info;
553 
554 		RB_INIT(&info.tmptree);
555 		hammer2_spin_ex(&hmp->io_spin);
556 		if (hmp->iofree_count > dio_limit) {
557 			info.count = hmp->iofree_count / 5;
558 			RB_SCAN(hammer2_io_tree, &hmp->iotree, NULL,
559 				hammer2_io_cleanup_callback, &info);
560 		}
561 		hammer2_spin_unex(&hmp->io_spin);
562 		hammer2_io_cleanup(hmp, &info.tmptree);
563 	}
564 }
565 
566 /*
567  * Cleanup any dio's with (INPROG | refs) == 0.
568  *
569  * Called to clean up cached DIOs on umount after all activity has been
570  * flushed.
571  */
572 static
573 int
574 hammer2_io_cleanup_callback(hammer2_io_t *dio, void *arg)
575 {
576 	struct hammer2_cleanupcb_info *info = arg;
577 	hammer2_io_t *xio;
578 
579 	if ((dio->refs & (HAMMER2_DIO_MASK | HAMMER2_DIO_INPROG)) == 0) {
580 		if (dio->act > 0) {
581 			int act;
582 
583 			act = dio->act - (ticks - dio->ticks) / hz - 1;
584 			if (act > 0) {
585 				dio->act = act;
586 				return 0;
587 			}
588 			dio->act = 0;
589 		}
590 		KKASSERT(dio->bp == NULL);
591 		if (info->count > 0) {
592 			RB_REMOVE(hammer2_io_tree, &dio->hmp->iotree, dio);
593 			xio = RB_INSERT(hammer2_io_tree, &info->tmptree, dio);
594 			KKASSERT(xio == NULL);
595 			--info->count;
596 		}
597 	}
598 	return 0;
599 }
600 
601 void
602 hammer2_io_cleanup(hammer2_dev_t *hmp, struct hammer2_io_tree *tree)
603 {
604 	hammer2_io_t *dio;
605 
606 	while ((dio = RB_ROOT(tree)) != NULL) {
607 		RB_REMOVE(hammer2_io_tree, tree, dio);
608 		KKASSERT(dio->bp == NULL &&
609 		    (dio->refs & (HAMMER2_DIO_MASK | HAMMER2_DIO_INPROG)) == 0);
610 		if (dio->refs & HAMMER2_DIO_DIRTY) {
611 			kprintf("hammer2_io_cleanup: Dirty buffer "
612 				"%016jx/%d (bp=%p)\n",
613 				dio->pbase, dio->psize, dio->bp);
614 		}
615 		kfree(dio, M_HAMMER2);
616 		atomic_add_int(&hammer2_dio_count, -1);
617 		atomic_add_int(&hmp->iofree_count, -1);
618 	}
619 }
620 
621 /*
622  * Returns a pointer to the requested data.
623  */
624 char *
625 hammer2_io_data(hammer2_io_t *dio, off_t lbase)
626 {
627 	struct buf *bp;
628 	int off;
629 
630 	bp = dio->bp;
631 	KKASSERT(bp != NULL);
632 	bkvasync(bp);
633 	off = (lbase & ~HAMMER2_OFF_MASK_RADIX) - bp->b_loffset;
634 	KKASSERT(off >= 0 && off < bp->b_bufsize);
635 	return(bp->b_data + off);
636 }
637 
638 int
639 hammer2_io_new(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize,
640 	       hammer2_io_t **diop)
641 {
642 	*diop = hammer2_io_getblk(hmp, btype, lbase, lsize, HAMMER2_DOP_NEW);
643 	return ((*diop)->error);
644 }
645 
646 int
647 hammer2_io_newnz(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize,
648 		 hammer2_io_t **diop)
649 {
650 	*diop = hammer2_io_getblk(hmp, btype, lbase, lsize, HAMMER2_DOP_NEWNZ);
651 	return ((*diop)->error);
652 }
653 
654 int
655 hammer2_io_bread(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize,
656 		hammer2_io_t **diop)
657 {
658 	*diop = hammer2_io_getblk(hmp, btype, lbase, lsize, HAMMER2_DOP_READ);
659 	return ((*diop)->error);
660 }
661 
662 hammer2_io_t *
663 hammer2_io_getquick(hammer2_dev_t *hmp, off_t lbase, int lsize)
664 {
665 	hammer2_io_t *dio;
666 
667 	dio = hammer2_io_getblk(hmp, 0, lbase, lsize, HAMMER2_DOP_READQ);
668 	return dio;
669 }
670 
671 void
672 hammer2_io_bawrite(hammer2_io_t **diop)
673 {
674 	atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY);
675 	hammer2_io_putblk(diop);
676 }
677 
678 void
679 hammer2_io_bdwrite(hammer2_io_t **diop)
680 {
681 	atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY);
682 	hammer2_io_putblk(diop);
683 }
684 
685 int
686 hammer2_io_bwrite(hammer2_io_t **diop)
687 {
688 	atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY);
689 	hammer2_io_putblk(diop);
690 	return (0);	/* XXX */
691 }
692 
693 void
694 hammer2_io_setdirty(hammer2_io_t *dio)
695 {
696 	atomic_set_64(&dio->refs, HAMMER2_DIO_DIRTY);
697 }
698 
699 /*
700  * This routine is called when a MODIFIED chain is being DESTROYED,
701  * in an attempt to allow the related buffer cache buffer to be
702  * invalidated and discarded instead of flushing it to disk.
703  *
704  * At the moment this case is only really useful for file meta-data.
705  * File data is already handled via the logical buffer cache associated
706  * with the vnode, and will be discarded if it was never flushed to disk.
707  * File meta-data may include inodes, directory entries, and indirect blocks.
708  *
709  * XXX
710  * However, our DIO buffers are PBUFSIZE'd (64KB), and the area being
711  * invalidated might be smaller.  Most of the meta-data structures above
712  * are in the 'smaller' category.  For now, don't try to invalidate the
713  * data areas.
714  */
715 void
716 hammer2_io_inval(hammer2_io_t *dio, hammer2_off_t data_off, u_int bytes)
717 {
718 	/* NOP */
719 }
720 
721 void
722 hammer2_io_brelse(hammer2_io_t **diop)
723 {
724 	hammer2_io_putblk(diop);
725 }
726 
727 void
728 hammer2_io_bqrelse(hammer2_io_t **diop)
729 {
730 	hammer2_io_putblk(diop);
731 }
732 
733 /*
734  * Set dedup validation bits in a DIO.  We do not need the buffer cache
735  * buffer for this.  This must be done concurrent with setting bits in
736  * the freemap so as to interlock with bulkfree's clearing of those bits.
737  */
738 void
739 hammer2_io_dedup_set(hammer2_dev_t *hmp, hammer2_blockref_t *bref)
740 {
741 	hammer2_io_t *dio;
742 	uint64_t mask;
743 	int lsize;
744 	int isgood;
745 
746 	dio = hammer2_io_alloc(hmp, bref->data_off, bref->type, 1, &isgood);
747 	lsize = 1 << (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX);
748 	mask = hammer2_dedup_mask(dio, bref->data_off, lsize);
749 	atomic_clear_64(&dio->dedup_valid, mask);
750 	atomic_set_64(&dio->dedup_alloc, mask);
751 	hammer2_io_putblk(&dio);
752 }
753 
754 /*
755  * Clear dedup validation bits in a DIO.  This is typically done when
756  * a modified chain is destroyed or by the bulkfree code.  No buffer
757  * is needed for this operation.  If the DIO no longer exists it is
758  * equivalent to the bits not being set.
759  */
760 void
761 hammer2_io_dedup_delete(hammer2_dev_t *hmp, uint8_t btype,
762 			hammer2_off_t data_off, u_int bytes)
763 {
764 	hammer2_io_t *dio;
765 	uint64_t mask;
766 	int isgood;
767 
768 	if ((data_off & ~HAMMER2_OFF_MASK_RADIX) == 0)
769 		return;
770 	if (btype != HAMMER2_BREF_TYPE_DATA)
771 		return;
772 	dio = hammer2_io_alloc(hmp, data_off, btype, 0, &isgood);
773 	if (dio) {
774 		if (data_off < dio->pbase ||
775 		    (data_off & ~HAMMER2_OFF_MASK_RADIX) + bytes >
776 		    dio->pbase + dio->psize) {
777 			panic("hammer2_dedup_delete: DATAOFF BAD "
778 			      "%016jx/%d %016jx\n",
779 			      data_off, bytes, dio->pbase);
780 		}
781 		mask = hammer2_dedup_mask(dio, data_off, bytes);
782 		atomic_clear_64(&dio->dedup_alloc, mask);
783 		atomic_clear_64(&dio->dedup_valid, mask);
784 		hammer2_io_putblk(&dio);
785 	}
786 }
787 
788 /*
789  * Assert that dedup allocation bits in a DIO are not set.  This operation
790  * does not require a buffer.  The DIO does not need to exist.
791  */
792 void
793 hammer2_io_dedup_assert(hammer2_dev_t *hmp, hammer2_off_t data_off, u_int bytes)
794 {
795 	hammer2_io_t *dio;
796 	int isgood;
797 
798 	dio = hammer2_io_alloc(hmp, data_off, HAMMER2_BREF_TYPE_DATA,
799 			       0, &isgood);
800 	if (dio) {
801 		KASSERT((dio->dedup_alloc &
802 			  hammer2_dedup_mask(dio, data_off, bytes)) == 0,
803 			("hammer2_dedup_assert: %016jx/%d %016jx/%016jx",
804 			data_off,
805 			bytes,
806 			hammer2_dedup_mask(dio, data_off, bytes),
807 			dio->dedup_alloc));
808 		hammer2_io_putblk(&dio);
809 	}
810 }
811 
812 static
813 void
814 dio_write_stats_update(hammer2_io_t *dio, struct buf *bp)
815 {
816 	long *counterp;
817 
818 	if (bp->b_flags & B_DELWRI)
819 		return;
820 
821 	switch(dio->btype) {
822 	case 0:
823 		return;
824 	case HAMMER2_BREF_TYPE_DATA:
825 		counterp = &hammer2_iod_file_write;
826 		break;
827 	case HAMMER2_BREF_TYPE_DIRENT:
828 	case HAMMER2_BREF_TYPE_INODE:
829 		counterp = &hammer2_iod_meta_write;
830 		break;
831 	case HAMMER2_BREF_TYPE_INDIRECT:
832 		counterp = &hammer2_iod_indr_write;
833 		break;
834 	case HAMMER2_BREF_TYPE_FREEMAP_NODE:
835 	case HAMMER2_BREF_TYPE_FREEMAP_LEAF:
836 		counterp = &hammer2_iod_fmap_write;
837 		break;
838 	default:
839 		counterp = &hammer2_iod_volu_write;
840 		break;
841 	}
842 	*counterp += dio->psize;
843 }
844 
845 void
846 hammer2_io_bkvasync(hammer2_io_t *dio)
847 {
848 	KKASSERT(dio->bp != NULL);
849 	bkvasync(dio->bp);
850 }
851 
852 /*
853  * Ref a dio that is already owned
854  */
855 void
856 hammer2_io_ref(hammer2_io_t *dio)
857 {
858 	atomic_add_64(&dio->refs, 1);
859 }
860