xref: /dragonfly/sys/vfs/hammer2/hammer2_io.c (revision 1fe7e945)
1 /*
2  * Copyright (c) 2013-2017 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@dragonflybsd.org>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 #include "hammer2.h"
36 
37 #define HAMMER2_DOP_READ	1
38 #define HAMMER2_DOP_NEW		2
39 #define HAMMER2_DOP_NEWNZ	3
40 #define HAMMER2_DOP_READQ	4
41 
42 /*
43  * Implements an abstraction layer for synchronous and asynchronous
44  * buffered device I/O.  Can be used as an OS-abstraction but the main
45  * purpose is to allow larger buffers to be used against hammer2_chain's
46  * using smaller allocations, without causing deadlocks.
47  *
48  * The DIOs also record temporary state with limited persistence.  This
49  * feature is used to keep track of dedupable blocks.
50  */
51 static int hammer2_io_cleanup_callback(hammer2_io_t *dio, void *arg);
52 static void dio_write_stats_update(hammer2_io_t *dio, struct buf *bp);
53 
54 static int
55 hammer2_io_cmp(hammer2_io_t *io1, hammer2_io_t *io2)
56 {
57 	if (io1->pbase < io2->pbase)
58 		return(-1);
59 	if (io1->pbase > io2->pbase)
60 		return(1);
61 	return(0);
62 }
63 
64 RB_PROTOTYPE2(hammer2_io_tree, hammer2_io, rbnode, hammer2_io_cmp, off_t);
65 RB_GENERATE2(hammer2_io_tree, hammer2_io, rbnode, hammer2_io_cmp,
66 		off_t, pbase);
67 
68 struct hammer2_cleanupcb_info {
69 	struct hammer2_io_tree tmptree;
70 	int	count;
71 };
72 
73 #if 0
74 static __inline
75 uint64_t
76 hammer2_io_mask(hammer2_io_t *dio, hammer2_off_t off, u_int bytes)
77 {
78 	uint64_t mask;
79 	int i;
80 
81 	if (bytes < 1024)	/* smaller chunks not supported */
82 		return 0;
83 
84 	/*
85 	 * Calculate crc check mask for larger chunks
86 	 */
87 	i = (((off & ~HAMMER2_OFF_MASK_RADIX) - dio->pbase) &
88 	     HAMMER2_PBUFMASK) >> 10;
89 	if (i == 0 && bytes == HAMMER2_PBUFSIZE)
90 		return((uint64_t)-1);
91 	mask = ((uint64_t)1U << (bytes >> 10)) - 1;
92 	mask <<= i;
93 
94 	return mask;
95 }
96 #endif
97 
98 /*
99  * Returns the DIO corresponding to the data|radix, creating it if necessary.
100  *
101  * If createit is 0, NULL can be returned indicating that the DIO does not
102  * exist.  (btype) is ignored when createit is 0.
103  */
104 static __inline
105 hammer2_io_t *
106 hammer2_io_alloc(hammer2_dev_t *hmp, hammer2_key_t data_off, uint8_t btype,
107 		 int createit, int *isgoodp)
108 {
109 	hammer2_io_t *dio;
110 	hammer2_io_t *xio;
111 	hammer2_key_t lbase;
112 	hammer2_key_t pbase;
113 	hammer2_key_t pmask;
114 	uint64_t refs;
115 	int lsize;
116 	int psize;
117 
118 	psize = HAMMER2_PBUFSIZE;
119 	pmask = ~(hammer2_off_t)(psize - 1);
120 	lsize = 1 << (int)(data_off & HAMMER2_OFF_MASK_RADIX);
121 	lbase = data_off & ~HAMMER2_OFF_MASK_RADIX;
122 	pbase = lbase & pmask;
123 
124 	if (pbase == 0 || ((lbase + lsize - 1) & pmask) != pbase) {
125 		kprintf("Illegal: %016jx %016jx+%08x / %016jx\n",
126 			pbase, lbase, lsize, pmask);
127 	}
128 	KKASSERT(pbase != 0 && ((lbase + lsize - 1) & pmask) == pbase);
129 	*isgoodp = 0;
130 
131 	/*
132 	 * Access/Allocate the DIO, bump dio->refs to prevent destruction.
133 	 */
134 	hammer2_spin_sh(&hmp->io_spin);
135 	dio = RB_LOOKUP(hammer2_io_tree, &hmp->iotree, pbase);
136 	if (dio) {
137 		refs = atomic_fetchadd_64(&dio->refs, 1);
138 		if ((refs & HAMMER2_DIO_MASK) == 0) {
139 			atomic_add_int(&dio->hmp->iofree_count, -1);
140 		}
141 		if (refs & HAMMER2_DIO_GOOD)
142 			*isgoodp = 1;
143 		hammer2_spin_unsh(&hmp->io_spin);
144 	} else if (createit) {
145 		refs = 0;
146 		hammer2_spin_unsh(&hmp->io_spin);
147 		dio = kmalloc(sizeof(*dio), M_HAMMER2, M_INTWAIT | M_ZERO);
148 		dio->hmp = hmp;
149 		dio->pbase = pbase;
150 		dio->psize = psize;
151 		dio->btype = btype;
152 		dio->refs = refs + 1;
153 		dio->act = 5;
154 		hammer2_spin_ex(&hmp->io_spin);
155 		xio = RB_INSERT(hammer2_io_tree, &hmp->iotree, dio);
156 		if (xio == NULL) {
157 			atomic_add_int(&hammer2_dio_count, 1);
158 			hammer2_spin_unex(&hmp->io_spin);
159 		} else {
160 			refs = atomic_fetchadd_64(&xio->refs, 1);
161 			if ((refs & HAMMER2_DIO_MASK) == 0)
162 				atomic_add_int(&xio->hmp->iofree_count, -1);
163 			if (refs & HAMMER2_DIO_GOOD)
164 				*isgoodp = 1;
165 			hammer2_spin_unex(&hmp->io_spin);
166 			kfree(dio, M_HAMMER2);
167 			dio = xio;
168 		}
169 	} else {
170 		hammer2_spin_unsh(&hmp->io_spin);
171 		return NULL;
172 	}
173 	dio->ticks = ticks;
174 	if (dio->act < 10)
175 		++dio->act;
176 
177 	return dio;
178 }
179 
180 /*
181  * Acquire the requested dio.  If DIO_GOOD is not set we must instantiate
182  * a buffer.  If set the buffer already exists and is good to go.
183  */
184 hammer2_io_t *
185 hammer2_io_getblk(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize, int op)
186 {
187 	hammer2_io_t *dio;
188 	off_t peof;
189 	uint64_t orefs;
190 	uint64_t nrefs;
191 	int isgood;
192 	int error;
193 	int hce;
194 	int notmetaflag = ((btype == HAMMER2_BREF_TYPE_DATA) ? B_NOTMETA : 0);
195 
196 	KKASSERT((1 << (int)(lbase & HAMMER2_OFF_MASK_RADIX)) == lsize);
197 
198 	if (op == HAMMER2_DOP_READQ) {
199 		dio = hammer2_io_alloc(hmp, lbase, btype, 0, &isgood);
200 		if (dio == NULL)
201 			return NULL;
202 		op = HAMMER2_DOP_READ;
203 	} else {
204 		dio = hammer2_io_alloc(hmp, lbase, btype, 1, &isgood);
205 	}
206 
207 	for (;;) {
208 		orefs = dio->refs;
209 		cpu_ccfence();
210 
211 		/*
212 		 * Buffer is already good, handle the op and return.
213 		 */
214 		if (orefs & HAMMER2_DIO_GOOD) {
215 			if (isgood == 0)
216 				cpu_mfence();
217 
218 			switch(op) {
219 			case HAMMER2_DOP_NEW:
220 				bzero(hammer2_io_data(dio, lbase), lsize);
221 				/* fall through */
222 			case HAMMER2_DOP_NEWNZ:
223 				atomic_set_long(&dio->refs, HAMMER2_DIO_DIRTY);
224 				break;
225 			case HAMMER2_DOP_READ:
226 			default:
227 				/* nothing to do */
228 				break;
229 			}
230 			return (dio);
231 		}
232 
233 		/*
234 		 * Try to own the DIO
235 		 */
236 		if (orefs & HAMMER2_DIO_INPROG) {
237 			nrefs = orefs | HAMMER2_DIO_WAITING;
238 			tsleep_interlock(dio, 0);
239 			if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
240 				tsleep(dio, PINTERLOCKED, "h2dio", hz);
241 			}
242 			/* retry */
243 		} else {
244 			nrefs = orefs | HAMMER2_DIO_INPROG;
245 			if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
246 				break;
247 			}
248 		}
249 	}
250 
251 	/*
252 	 * We break to here if GOOD is not set and we acquired INPROG for
253 	 * the I/O.
254 	 */
255 	KKASSERT(dio->bp == NULL);
256 	if (btype == HAMMER2_BREF_TYPE_DATA)
257 		hce = hammer2_cluster_data_read;
258 	else
259 		hce = hammer2_cluster_meta_read;
260 
261 	error = 0;
262 	if (dio->pbase == (lbase & ~HAMMER2_OFF_MASK_RADIX) &&
263 	    dio->psize == lsize) {
264 		switch(op) {
265 		case HAMMER2_DOP_NEW:
266 		case HAMMER2_DOP_NEWNZ:
267 			dio->bp = getblk(dio->hmp->devvp,
268 					 dio->pbase, dio->psize,
269 					 0, 0);
270 			if (op == HAMMER2_DOP_NEW)
271 				bzero(dio->bp->b_data, dio->psize);
272 			atomic_set_long(&dio->refs, HAMMER2_DIO_DIRTY);
273 			break;
274 		case HAMMER2_DOP_READ:
275 		default:
276 			if (hce > 0) {
277 				/*
278 				 * Synchronous cluster I/O for now.
279 				 */
280 				peof = (dio->pbase + HAMMER2_SEGMASK64) &
281 				       ~HAMMER2_SEGMASK64;
282 				dio->bp = NULL;
283 				error = cluster_readx(dio->hmp->devvp,
284 						     peof, dio->pbase,
285 						     dio->psize, notmetaflag,
286 						     dio->psize,
287 						     HAMMER2_PBUFSIZE*hce,
288 						     &dio->bp);
289 			} else {
290 				dio->bp = NULL;
291 				error = breadnx(dio->hmp->devvp, dio->pbase,
292 						dio->psize, notmetaflag,
293 					        NULL, NULL, 0, &dio->bp);
294 			}
295 		}
296 	} else {
297 		if (hce > 0) {
298 			/*
299 			 * Synchronous cluster I/O for now.
300 			 */
301 			peof = (dio->pbase + HAMMER2_SEGMASK64) &
302 			       ~HAMMER2_SEGMASK64;
303 			error = cluster_readx(dio->hmp->devvp,
304 					      peof, dio->pbase, dio->psize,
305 					      notmetaflag,
306 					      dio->psize, HAMMER2_PBUFSIZE*hce,
307 					      &dio->bp);
308 		} else {
309 			error = breadnx(dio->hmp->devvp, dio->pbase,
310 				        dio->psize, notmetaflag,
311 					NULL, NULL, 0, &dio->bp);
312 		}
313 		if (dio->bp) {
314 			/*
315 			 * Handle NEW flags
316 			 */
317 			switch(op) {
318 			case HAMMER2_DOP_NEW:
319 				bzero(hammer2_io_data(dio, lbase), lsize);
320 				/* fall through */
321 			case HAMMER2_DOP_NEWNZ:
322 				atomic_set_long(&dio->refs, HAMMER2_DIO_DIRTY);
323 				break;
324 			case HAMMER2_DOP_READ:
325 			default:
326 				break;
327 			}
328 
329 			/*
330 			 * Tell the kernel that the buffer cache is not
331 			 * meta-data based on the btype.  This allows
332 			 * swapcache to distinguish between data and
333 			 * meta-data.
334 			 */
335 			switch(btype) {
336 			case HAMMER2_BREF_TYPE_DATA:
337 				dio->bp->b_flags |= B_NOTMETA;
338 				break;
339 			default:
340 				break;
341 			}
342 		}
343 	}
344 
345 	if (dio->bp) {
346 		BUF_KERNPROC(dio->bp);
347 		dio->bp->b_flags &= ~B_AGE;
348 	}
349 	dio->error = error;
350 
351 	/*
352 	 * Clear INPROG and WAITING, set GOOD wake up anyone waiting.
353 	 */
354 	for (;;) {
355 		orefs = dio->refs;
356 		cpu_ccfence();
357 		nrefs = orefs & ~(HAMMER2_DIO_INPROG | HAMMER2_DIO_WAITING);
358 		if (error == 0)
359 			nrefs |= HAMMER2_DIO_GOOD;
360 		if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
361 			if (orefs & HAMMER2_DIO_WAITING)
362 				wakeup(dio);
363 			break;
364 		}
365 		cpu_pause();
366 	}
367 
368 	/* XXX error handling */
369 
370 	return dio;
371 }
372 
373 /*
374  * Release our ref on *diop.
375  *
376  * On the 1->0 transition we clear DIO_GOOD, set DIO_INPROG, and dispose
377  * of dio->bp.  Then we clean up DIO_INPROG and DIO_WAITING.
378  */
379 void
380 hammer2_io_putblk(hammer2_io_t **diop)
381 {
382 	hammer2_dev_t *hmp;
383 	hammer2_io_t *dio;
384 	struct buf *bp;
385 	off_t pbase;
386 	int psize;
387 	int limit_dio;
388 	uint64_t orefs;
389 	uint64_t nrefs;
390 
391 	dio = *diop;
392 	*diop = NULL;
393 	hmp = dio->hmp;
394 
395 	KKASSERT((dio->refs & HAMMER2_DIO_MASK) != 0);
396 
397 	/*
398 	 * Drop refs.
399 	 *
400 	 * On the 1->0 transition clear GOOD and set INPROG, and break.
401 	 * On any other transition we can return early.
402 	 */
403 	for (;;) {
404 		orefs = dio->refs;
405 		cpu_ccfence();
406 
407 		if ((orefs & HAMMER2_DIO_MASK) == 1 &&
408 		    (orefs & HAMMER2_DIO_INPROG) == 0) {
409 			/*
410 			 * Lastdrop case, INPROG can be set.
411 			 */
412 			nrefs = orefs - 1;
413 			nrefs &= ~(HAMMER2_DIO_GOOD | HAMMER2_DIO_DIRTY);
414 			nrefs |= HAMMER2_DIO_INPROG;
415 			if (atomic_cmpset_64(&dio->refs, orefs, nrefs))
416 				break;
417 		} else if ((orefs & HAMMER2_DIO_MASK) == 1) {
418 			/*
419 			 * Lastdrop case, INPROG already set.  We must
420 			 * wait for INPROG to clear.
421 			 */
422 			nrefs = orefs | HAMMER2_DIO_WAITING;
423 			tsleep_interlock(dio, 0);
424 			if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
425 				tsleep(dio, PINTERLOCKED, "h2dio", hz);
426 			}
427 			/* retry */
428 		} else {
429 			/*
430 			 * Normal drop case.
431 			 */
432 			nrefs = orefs - 1;
433 			if (atomic_cmpset_64(&dio->refs, orefs, nrefs))
434 				return;
435 			/* retry */
436 		}
437 		cpu_pause();
438 		/* retry */
439 	}
440 
441 	/*
442 	 * Lastdrop (1->0 transition).  INPROG has been set, GOOD and DIRTY
443 	 * have been cleared.  iofree_count has not yet been incremented,
444 	 * note that another accessor race will decrement iofree_count so
445 	 * we have to increment it regardless.
446 	 *
447 	 * We can now dispose of the buffer, and should do it before calling
448 	 * io_complete() in case there's a race against a new reference
449 	 * which causes io_complete() to chain and instantiate the bp again.
450 	 */
451 	pbase = dio->pbase;
452 	psize = dio->psize;
453 	bp = dio->bp;
454 	dio->bp = NULL;
455 
456 	if ((orefs & HAMMER2_DIO_GOOD) && bp) {
457 		/*
458 		 * Non-errored disposal of bp
459 		 */
460 		if (orefs & HAMMER2_DIO_DIRTY) {
461 			dio_write_stats_update(dio, bp);
462 
463 			/*
464 			 * Allows dirty buffers to accumulate and
465 			 * possibly be canceled (e.g. by a 'rm'),
466 			 * will burst-write later.  Allow the kernel
467 			 * to cluster the dirty buffers.
468 			 *
469 			 * NOTE: Do not use cluster_write() here.  The
470 			 *	 problem is that due to the way chains
471 			 *	 are locked, buffers are cycled in and out
472 			 *	 quite often so the disposal here is not
473 			 *	 necessarily the final disposal.  Avoid
474 			 *	 excessive rewriting of the same blocks
475 			 *	 by using bdwrite().
476 			 */
477 #if 0
478 			off_t peof;
479 			int hce;
480 
481 			if ((hce = hammer2_cluster_write) > 0) {
482 				/*
483 				 * Allows write-behind to keep the buffer
484 				 * cache sane.
485 				 */
486 				peof = (pbase + HAMMER2_SEGMASK64) &
487 				       ~HAMMER2_SEGMASK64;
488 				bp->b_flags |= B_CLUSTEROK;
489 				cluster_write(bp, peof, psize, hce);
490 			} else
491 #endif
492 			{
493 				bp->b_flags |= B_CLUSTEROK;
494 				bdwrite(bp);
495 			}
496 		} else if (bp->b_flags & (B_ERROR | B_INVAL | B_RELBUF)) {
497 			brelse(bp);
498 		} else {
499 			bqrelse(bp);
500 		}
501 	} else if (bp) {
502 		/*
503 		 * Errored disposal of bp
504 		 */
505 		brelse(bp);
506 	}
507 
508 	/*
509 	 * Update iofree_count before disposing of the dio
510 	 */
511 	hmp = dio->hmp;
512 	atomic_add_int(&hmp->iofree_count, 1);
513 
514 	/*
515 	 * Clear INPROG, GOOD, and WAITING
516 	 */
517 	for (;;) {
518 		orefs = dio->refs;
519 		cpu_ccfence();
520 		nrefs = orefs & ~(HAMMER2_DIO_INPROG | HAMMER2_DIO_GOOD |
521 				  HAMMER2_DIO_WAITING);
522 		if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
523 			if (orefs & HAMMER2_DIO_WAITING)
524 				wakeup(dio);
525 			break;
526 		}
527 		cpu_pause();
528 	}
529 
530 	/*
531 	 * We cache free buffers so re-use cases can use a shared lock, but
532 	 * if too many build up we have to clean them out.
533 	 */
534 	limit_dio = hammer2_limit_dio;
535 	if (limit_dio < 256)
536 		limit_dio = 256;
537 	if (limit_dio > 1024*1024)
538 		limit_dio = 1024*1024;
539 	if (hmp->iofree_count > limit_dio) {
540 		struct hammer2_cleanupcb_info info;
541 
542 		RB_INIT(&info.tmptree);
543 		hammer2_spin_ex(&hmp->io_spin);
544 		if (hmp->iofree_count > limit_dio) {
545 			info.count = hmp->iofree_count / 5;
546 			RB_SCAN(hammer2_io_tree, &hmp->iotree, NULL,
547 				hammer2_io_cleanup_callback, &info);
548 		}
549 		hammer2_spin_unex(&hmp->io_spin);
550 		hammer2_io_cleanup(hmp, &info.tmptree);
551 	}
552 }
553 
554 /*
555  * Cleanup any dio's with (INPROG | refs) == 0.
556  *
557  * Called to clean up cached DIOs on umount after all activity has been
558  * flushed.
559  */
560 static
561 int
562 hammer2_io_cleanup_callback(hammer2_io_t *dio, void *arg)
563 {
564 	struct hammer2_cleanupcb_info *info = arg;
565 	hammer2_io_t *xio;
566 
567 	if ((dio->refs & (HAMMER2_DIO_MASK | HAMMER2_DIO_INPROG)) == 0) {
568 		if (dio->act > 0) {
569 			int act;
570 
571 			act = dio->act - (ticks - dio->ticks) / hz - 1;
572 			if (act > 0) {
573 				dio->act = act;
574 				return 0;
575 			}
576 			dio->act = 0;
577 		}
578 		KKASSERT(dio->bp == NULL);
579 		if (info->count > 0) {
580 			RB_REMOVE(hammer2_io_tree, &dio->hmp->iotree, dio);
581 			xio = RB_INSERT(hammer2_io_tree, &info->tmptree, dio);
582 			KKASSERT(xio == NULL);
583 			--info->count;
584 		}
585 	}
586 	return 0;
587 }
588 
589 void
590 hammer2_io_cleanup(hammer2_dev_t *hmp, struct hammer2_io_tree *tree)
591 {
592 	hammer2_io_t *dio;
593 
594 	while ((dio = RB_ROOT(tree)) != NULL) {
595 		RB_REMOVE(hammer2_io_tree, tree, dio);
596 		KKASSERT(dio->bp == NULL &&
597 		    (dio->refs & (HAMMER2_DIO_MASK | HAMMER2_DIO_INPROG)) == 0);
598 		if (dio->refs & HAMMER2_DIO_DIRTY) {
599 			kprintf("hammer2_io_cleanup: Dirty buffer "
600 				"%016jx/%d (bp=%p)\n",
601 				dio->pbase, dio->psize, dio->bp);
602 		}
603 		kfree(dio, M_HAMMER2);
604 		atomic_add_int(&hammer2_dio_count, -1);
605 		atomic_add_int(&hmp->iofree_count, -1);
606 	}
607 }
608 
609 /*
610  * Returns a pointer to the requested data.
611  */
612 char *
613 hammer2_io_data(hammer2_io_t *dio, off_t lbase)
614 {
615 	struct buf *bp;
616 	int off;
617 
618 	bp = dio->bp;
619 	KKASSERT(bp != NULL);
620 	off = (lbase & ~HAMMER2_OFF_MASK_RADIX) - bp->b_loffset;
621 	KKASSERT(off >= 0 && off < bp->b_bufsize);
622 	return(bp->b_data + off);
623 }
624 
625 int
626 hammer2_io_new(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize,
627 	       hammer2_io_t **diop)
628 {
629 	*diop = hammer2_io_getblk(hmp, btype, lbase, lsize, HAMMER2_DOP_NEW);
630 	return ((*diop)->error);
631 }
632 
633 int
634 hammer2_io_newnz(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize,
635 		 hammer2_io_t **diop)
636 {
637 	*diop = hammer2_io_getblk(hmp, btype, lbase, lsize, HAMMER2_DOP_NEWNZ);
638 	return ((*diop)->error);
639 }
640 
641 int
642 hammer2_io_bread(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize,
643 		hammer2_io_t **diop)
644 {
645 	*diop = hammer2_io_getblk(hmp, btype, lbase, lsize, HAMMER2_DOP_READ);
646 	return ((*diop)->error);
647 }
648 
649 hammer2_io_t *
650 hammer2_io_getquick(hammer2_dev_t *hmp, off_t lbase, int lsize)
651 {
652 	hammer2_io_t *dio;
653 
654 	dio = hammer2_io_getblk(hmp, 0, lbase, lsize, HAMMER2_DOP_READQ);
655 	return dio;
656 }
657 
658 void
659 hammer2_io_bawrite(hammer2_io_t **diop)
660 {
661 	atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY);
662 	hammer2_io_putblk(diop);
663 }
664 
665 void
666 hammer2_io_bdwrite(hammer2_io_t **diop)
667 {
668 	atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY);
669 	hammer2_io_putblk(diop);
670 }
671 
672 int
673 hammer2_io_bwrite(hammer2_io_t **diop)
674 {
675 	atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY);
676 	hammer2_io_putblk(diop);
677 	return (0);	/* XXX */
678 }
679 
680 void
681 hammer2_io_setdirty(hammer2_io_t *dio)
682 {
683 	atomic_set_64(&dio->refs, HAMMER2_DIO_DIRTY);
684 }
685 
686 /*
687  * This routine is called when a MODIFIED chain is being DESTROYED,
688  * in an attempt to allow the related buffer cache buffer to be
689  * invalidated and discarded instead of flushing it to disk.
690  *
691  * At the moment this case is only really useful for file meta-data.
692  * File data is already handled via the logical buffer cache associated
693  * with the vnode, and will be discarded if it was never flushed to disk.
694  * File meta-data may include inodes, directory entries, and indirect blocks.
695  *
696  * XXX
697  * However, our DIO buffers are PBUFSIZE'd (64KB), and the area being
698  * invalidated might be smaller.  Most of the meta-data structures above
699  * are in the 'smaller' category.  For now, don't try to invalidate the
700  * data areas.
701  */
702 void
703 hammer2_io_inval(hammer2_io_t *dio, hammer2_off_t data_off, u_int bytes)
704 {
705 	/* NOP */
706 }
707 
708 void
709 hammer2_io_brelse(hammer2_io_t **diop)
710 {
711 	hammer2_io_putblk(diop);
712 }
713 
714 void
715 hammer2_io_bqrelse(hammer2_io_t **diop)
716 {
717 	hammer2_io_putblk(diop);
718 }
719 
720 /*
721  * Set dedup validation bits in a DIO.  We do not need the buffer cache
722  * buffer for this.  This must be done concurrent with setting bits in
723  * the freemap so as to interlock with bulkfree's clearing of those bits.
724  */
725 void
726 hammer2_io_dedup_set(hammer2_dev_t *hmp, hammer2_blockref_t *bref)
727 {
728 	hammer2_io_t *dio;
729 	uint64_t mask;
730 	int lsize;
731 	int isgood;
732 
733 	dio = hammer2_io_alloc(hmp, bref->data_off, bref->type, 1, &isgood);
734 	lsize = 1 << (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX);
735 	mask = hammer2_dedup_mask(dio, bref->data_off, lsize);
736 	atomic_clear_64(&dio->dedup_valid, mask);
737 	atomic_set_64(&dio->dedup_alloc, mask);
738 	hammer2_io_putblk(&dio);
739 }
740 
741 /*
742  * Clear dedup validation bits in a DIO.  This is typically done when
743  * a modified chain is destroyed or by the bulkfree code.  No buffer
744  * is needed for this operation.  If the DIO no longer exists it is
745  * equivalent to the bits not being set.
746  */
747 void
748 hammer2_io_dedup_delete(hammer2_dev_t *hmp, uint8_t btype,
749 			hammer2_off_t data_off, u_int bytes)
750 {
751 	hammer2_io_t *dio;
752 	uint64_t mask;
753 	int isgood;
754 
755 	if ((data_off & ~HAMMER2_OFF_MASK_RADIX) == 0)
756 		return;
757 	if (btype != HAMMER2_BREF_TYPE_DATA)
758 		return;
759 	dio = hammer2_io_alloc(hmp, data_off, btype, 0, &isgood);
760 	if (dio) {
761 		if (data_off < dio->pbase ||
762 		    (data_off & ~HAMMER2_OFF_MASK_RADIX) + bytes >
763 		    dio->pbase + dio->psize) {
764 			panic("hammer2_dedup_delete: DATAOFF BAD "
765 			      "%016jx/%d %016jx\n",
766 			      data_off, bytes, dio->pbase);
767 		}
768 		mask = hammer2_dedup_mask(dio, data_off, bytes);
769 		atomic_clear_64(&dio->dedup_alloc, mask);
770 		atomic_clear_64(&dio->dedup_valid, mask);
771 		hammer2_io_putblk(&dio);
772 	}
773 }
774 
775 /*
776  * Assert that dedup allocation bits in a DIO are not set.  This operation
777  * does not require a buffer.  The DIO does not need to exist.
778  */
779 void
780 hammer2_io_dedup_assert(hammer2_dev_t *hmp, hammer2_off_t data_off, u_int bytes)
781 {
782 	hammer2_io_t *dio;
783 	int isgood;
784 
785 	dio = hammer2_io_alloc(hmp, data_off, HAMMER2_BREF_TYPE_DATA,
786 			       0, &isgood);
787 	if (dio) {
788 		KASSERT((dio->dedup_alloc &
789 			  hammer2_dedup_mask(dio, data_off, bytes)) == 0,
790 			("hammer2_dedup_assert: %016jx/%d %016jx/%016jx",
791 			data_off,
792 			bytes,
793 			hammer2_dedup_mask(dio, data_off, bytes),
794 			dio->dedup_alloc));
795 		hammer2_io_putblk(&dio);
796 	}
797 }
798 
799 static
800 void
801 dio_write_stats_update(hammer2_io_t *dio, struct buf *bp)
802 {
803 	long *counterp;
804 
805 	if (bp->b_flags & B_DELWRI)
806 		return;
807 
808 	switch(dio->btype) {
809 	case 0:
810 		return;
811 	case HAMMER2_BREF_TYPE_DATA:
812 		counterp = &hammer2_iod_file_write;
813 		break;
814 	case HAMMER2_BREF_TYPE_DIRENT:
815 	case HAMMER2_BREF_TYPE_INODE:
816 		counterp = &hammer2_iod_meta_write;
817 		break;
818 	case HAMMER2_BREF_TYPE_INDIRECT:
819 		counterp = &hammer2_iod_indr_write;
820 		break;
821 	case HAMMER2_BREF_TYPE_FREEMAP_NODE:
822 	case HAMMER2_BREF_TYPE_FREEMAP_LEAF:
823 		counterp = &hammer2_iod_fmap_write;
824 		break;
825 	default:
826 		counterp = &hammer2_iod_volu_write;
827 		break;
828 	}
829 	*counterp += dio->psize;
830 }
831