xref: /dragonfly/sys/vfs/hammer2/hammer2_io.c (revision 2b7dbe20)
1 /*
2  * Copyright (c) 2013-2018 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@dragonflybsd.org>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 #include "hammer2.h"
36 
37 #define HAMMER2_DOP_READ	1
38 #define HAMMER2_DOP_NEW		2
39 #define HAMMER2_DOP_NEWNZ	3
40 #define HAMMER2_DOP_READQ	4
41 
42 /*
43  * Implements an abstraction layer for synchronous and asynchronous
44  * buffered device I/O.  Can be used as an OS-abstraction but the main
45  * purpose is to allow larger buffers to be used against hammer2_chain's
46  * using smaller allocations, without causing deadlocks.
47  *
48  * The DIOs also record temporary state with limited persistence.  This
49  * feature is used to keep track of dedupable blocks.
50  */
51 static int hammer2_io_cleanup_callback(hammer2_io_t *dio, void *arg);
52 static void dio_write_stats_update(hammer2_io_t *dio, struct buf *bp);
53 
54 static int
55 hammer2_io_cmp(hammer2_io_t *io1, hammer2_io_t *io2)
56 {
57 	if (io1->pbase < io2->pbase)
58 		return(-1);
59 	if (io1->pbase > io2->pbase)
60 		return(1);
61 	return(0);
62 }
63 
64 RB_PROTOTYPE2(hammer2_io_tree, hammer2_io, rbnode, hammer2_io_cmp, off_t);
65 RB_GENERATE2(hammer2_io_tree, hammer2_io, rbnode, hammer2_io_cmp,
66 		off_t, pbase);
67 
68 struct hammer2_cleanupcb_info {
69 	struct hammer2_io_tree tmptree;
70 	int	count;
71 };
72 
73 #if 0
74 static __inline
75 uint64_t
76 hammer2_io_mask(hammer2_io_t *dio, hammer2_off_t off, u_int bytes)
77 {
78 	uint64_t mask;
79 	int i;
80 
81 	if (bytes < 1024)	/* smaller chunks not supported */
82 		return 0;
83 
84 	/*
85 	 * Calculate crc check mask for larger chunks
86 	 */
87 	i = (((off & ~HAMMER2_OFF_MASK_RADIX) - dio->pbase) &
88 	     HAMMER2_PBUFMASK) >> 10;
89 	if (i == 0 && bytes == HAMMER2_PBUFSIZE)
90 		return((uint64_t)-1);
91 	mask = ((uint64_t)1U << (bytes >> 10)) - 1;
92 	mask <<= i;
93 
94 	return mask;
95 }
96 #endif
97 
98 #ifdef HAMMER2_IO_DEBUG
99 
100 static __inline void
101 DIO_RECORD(hammer2_io_t *dio HAMMER2_IO_DEBUG_ARGS)
102 {
103 	int i;
104 
105 	i = atomic_fetchadd_int(&dio->debug_index, 1) & HAMMER2_IO_DEBUG_MASK;
106 
107 	dio->debug_file[i] = file;
108 	dio->debug_line[i] = line;
109 	dio->debug_refs[i] = dio->refs;
110 	dio->debug_td[i] = curthread;
111 }
112 
113 #else
114 
115 #define DIO_RECORD(dio)
116 
117 #endif
118 
119 /*
120  * Returns the DIO corresponding to the data|radix, creating it if necessary.
121  *
122  * If createit is 0, NULL can be returned indicating that the DIO does not
123  * exist.  (btype) is ignored when createit is 0.
124  */
125 static __inline
126 hammer2_io_t *
127 hammer2_io_alloc(hammer2_dev_t *hmp, hammer2_key_t data_off, uint8_t btype,
128 		 int createit, int *isgoodp)
129 {
130 	hammer2_io_t *dio;
131 	hammer2_io_t *xio;
132 	hammer2_key_t lbase;
133 	hammer2_key_t pbase;
134 	hammer2_key_t pmask;
135 	uint64_t refs;
136 	int lsize;
137 	int psize;
138 
139 	psize = HAMMER2_PBUFSIZE;
140 	pmask = ~(hammer2_off_t)(psize - 1);
141 	if ((int)(data_off & HAMMER2_OFF_MASK_RADIX))
142 		lsize = 1 << (int)(data_off & HAMMER2_OFF_MASK_RADIX);
143 	else
144 		lsize = 0;
145 	lbase = data_off & ~HAMMER2_OFF_MASK_RADIX;
146 	pbase = lbase & pmask;
147 
148 	if (pbase == 0 || ((lbase + lsize - 1) & pmask) != pbase) {
149 		kprintf("Illegal: %016jx %016jx+%08x / %016jx\n",
150 			pbase, lbase, lsize, pmask);
151 	}
152 	KKASSERT(pbase != 0 && ((lbase + lsize - 1) & pmask) == pbase);
153 	*isgoodp = 0;
154 
155 	/*
156 	 * Access/Allocate the DIO, bump dio->refs to prevent destruction.
157 	 *
158 	 * If DIO_GOOD is set the ref should prevent it from being cleared
159 	 * out from under us, we can set *isgoodp, and the caller can operate
160 	 * on the buffer without any further interaction.
161 	 */
162 	hammer2_spin_sh(&hmp->io_spin);
163 	dio = RB_LOOKUP(hammer2_io_tree, &hmp->iotree, pbase);
164 	if (dio) {
165 		refs = atomic_fetchadd_64(&dio->refs, 1);
166 		if ((refs & HAMMER2_DIO_MASK) == 0) {
167 			atomic_add_int(&dio->hmp->iofree_count, -1);
168 		}
169 		if (refs & HAMMER2_DIO_GOOD)
170 			*isgoodp = 1;
171 		hammer2_spin_unsh(&hmp->io_spin);
172 	} else if (createit) {
173 		refs = 0;
174 		hammer2_spin_unsh(&hmp->io_spin);
175 		dio = kmalloc(sizeof(*dio), M_HAMMER2, M_INTWAIT | M_ZERO);
176 		dio->hmp = hmp;
177 		dio->pbase = pbase;
178 		dio->psize = psize;
179 		dio->btype = btype;
180 		dio->refs = refs + 1;
181 		dio->act = 5;
182 		hammer2_spin_ex(&hmp->io_spin);
183 		xio = RB_INSERT(hammer2_io_tree, &hmp->iotree, dio);
184 		if (xio == NULL) {
185 			atomic_add_int(&hammer2_dio_count, 1);
186 			hammer2_spin_unex(&hmp->io_spin);
187 		} else {
188 			refs = atomic_fetchadd_64(&xio->refs, 1);
189 			if ((refs & HAMMER2_DIO_MASK) == 0)
190 				atomic_add_int(&xio->hmp->iofree_count, -1);
191 			if (refs & HAMMER2_DIO_GOOD)
192 				*isgoodp = 1;
193 			hammer2_spin_unex(&hmp->io_spin);
194 			kfree(dio, M_HAMMER2);
195 			dio = xio;
196 		}
197 	} else {
198 		hammer2_spin_unsh(&hmp->io_spin);
199 		return NULL;
200 	}
201 	dio->ticks = ticks;
202 	if (dio->act < 10)
203 		++dio->act;
204 
205 	return dio;
206 }
207 
208 /*
209  * Acquire the requested dio.  If DIO_GOOD is not set we must instantiate
210  * a buffer.  If set the buffer already exists and is good to go.
211  */
212 hammer2_io_t *
213 _hammer2_io_getblk(hammer2_dev_t *hmp, int btype, off_t lbase,
214 		   int lsize, int op HAMMER2_IO_DEBUG_ARGS)
215 {
216 	hammer2_io_t *dio;
217 	off_t peof;
218 	uint64_t orefs;
219 	uint64_t nrefs;
220 	int isgood;
221 	int error;
222 	int hce;
223 	int bflags;
224 
225 	bflags = ((btype == HAMMER2_BREF_TYPE_DATA) ? B_NOTMETA : 0);
226 	bflags |= B_KVABIO;
227 
228 	KKASSERT((1 << (int)(lbase & HAMMER2_OFF_MASK_RADIX)) == lsize);
229 
230 	if (op == HAMMER2_DOP_READQ) {
231 		dio = hammer2_io_alloc(hmp, lbase, btype, 0, &isgood);
232 		if (dio == NULL)
233 			return NULL;
234 		op = HAMMER2_DOP_READ;
235 	} else {
236 		dio = hammer2_io_alloc(hmp, lbase, btype, 1, &isgood);
237 	}
238 
239 	for (;;) {
240 		orefs = dio->refs;
241 		cpu_ccfence();
242 
243 		/*
244 		 * Buffer is already good, handle the op and return.
245 		 */
246 		if (orefs & HAMMER2_DIO_GOOD) {
247 			if (isgood == 0)
248 				cpu_mfence();
249 			bkvasync(dio->bp);
250 
251 			switch(op) {
252 			case HAMMER2_DOP_NEW:
253 				bzero(hammer2_io_data(dio, lbase), lsize);
254 				/* fall through */
255 			case HAMMER2_DOP_NEWNZ:
256 				atomic_set_long(&dio->refs, HAMMER2_DIO_DIRTY);
257 				break;
258 			case HAMMER2_DOP_READ:
259 			default:
260 				/* nothing to do */
261 				break;
262 			}
263 			DIO_RECORD(dio HAMMER2_IO_DEBUG_CALL);
264 			return (dio);
265 		}
266 
267 		/*
268 		 * Try to own the DIO
269 		 */
270 		if (orefs & HAMMER2_DIO_INPROG) {
271 			nrefs = orefs | HAMMER2_DIO_WAITING;
272 			tsleep_interlock(dio, 0);
273 			if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
274 				tsleep(dio, PINTERLOCKED, "h2dio", hz);
275 			}
276 			/* retry */
277 		} else {
278 			nrefs = orefs | HAMMER2_DIO_INPROG;
279 			if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
280 				break;
281 			}
282 		}
283 	}
284 
285 	/*
286 	 * We break to here if GOOD is not set and we acquired INPROG for
287 	 * the I/O.
288 	 */
289 	KKASSERT(dio->bp == NULL);
290 	if (btype == HAMMER2_BREF_TYPE_DATA)
291 		hce = hammer2_cluster_data_read;
292 	else
293 		hce = hammer2_cluster_meta_read;
294 
295 	error = 0;
296 	if (dio->pbase == (lbase & ~HAMMER2_OFF_MASK_RADIX) &&
297 	    dio->psize == lsize) {
298 		switch(op) {
299 		case HAMMER2_DOP_NEW:
300 		case HAMMER2_DOP_NEWNZ:
301 			dio->bp = getblk(dio->hmp->devvp,
302 					 dio->pbase, dio->psize,
303 					 GETBLK_KVABIO, 0);
304 			if (op == HAMMER2_DOP_NEW) {
305 				bkvasync(dio->bp);
306 				bzero(dio->bp->b_data, dio->psize);
307 			}
308 			atomic_set_long(&dio->refs, HAMMER2_DIO_DIRTY);
309 			break;
310 		case HAMMER2_DOP_READ:
311 		default:
312 			KKASSERT(dio->bp == NULL);
313 			if (hce > 0) {
314 				/*
315 				 * Synchronous cluster I/O for now.
316 				 */
317 				peof = (dio->pbase + HAMMER2_SEGMASK64) &
318 				       ~HAMMER2_SEGMASK64;
319 				error = cluster_readx(dio->hmp->devvp,
320 						     peof, dio->pbase,
321 						     dio->psize, bflags,
322 						     dio->psize,
323 						     HAMMER2_PBUFSIZE*hce,
324 						     &dio->bp);
325 			} else {
326 				error = breadnx(dio->hmp->devvp, dio->pbase,
327 						dio->psize, bflags,
328 					        NULL, NULL, 0, &dio->bp);
329 			}
330 		}
331 	} else {
332 		if (hce > 0) {
333 			/*
334 			 * Synchronous cluster I/O for now.
335 			 */
336 			peof = (dio->pbase + HAMMER2_SEGMASK64) &
337 			       ~HAMMER2_SEGMASK64;
338 			error = cluster_readx(dio->hmp->devvp,
339 					      peof, dio->pbase, dio->psize,
340 					      bflags,
341 					      dio->psize, HAMMER2_PBUFSIZE*hce,
342 					      &dio->bp);
343 		} else {
344 			error = breadnx(dio->hmp->devvp, dio->pbase,
345 				        dio->psize, bflags,
346 					NULL, NULL, 0, &dio->bp);
347 		}
348 		if (dio->bp) {
349 			/*
350 			 * Handle NEW flags
351 			 */
352 			switch(op) {
353 			case HAMMER2_DOP_NEW:
354 				bkvasync(dio->bp);
355 				bzero(hammer2_io_data(dio, lbase), lsize);
356 				/* fall through */
357 			case HAMMER2_DOP_NEWNZ:
358 				atomic_set_long(&dio->refs, HAMMER2_DIO_DIRTY);
359 				break;
360 			case HAMMER2_DOP_READ:
361 			default:
362 				break;
363 			}
364 
365 			/*
366 			 * Tell the kernel that the buffer cache is not
367 			 * meta-data based on the btype.  This allows
368 			 * swapcache to distinguish between data and
369 			 * meta-data.
370 			 */
371 			switch(btype) {
372 			case HAMMER2_BREF_TYPE_DATA:
373 				dio->bp->b_flags |= B_NOTMETA;
374 				break;
375 			default:
376 				break;
377 			}
378 		}
379 	}
380 
381 	if (dio->bp) {
382 		bkvasync(dio->bp);
383 		BUF_KERNPROC(dio->bp);
384 		dio->bp->b_flags &= ~B_AGE;
385 		/* dio->bp->b_debug_info2 = dio; */
386 	}
387 	dio->error = error;
388 
389 	/*
390 	 * Clear INPROG and WAITING, set GOOD wake up anyone waiting.
391 	 */
392 	for (;;) {
393 		orefs = dio->refs;
394 		cpu_ccfence();
395 		nrefs = orefs & ~(HAMMER2_DIO_INPROG | HAMMER2_DIO_WAITING);
396 		if (error == 0)
397 			nrefs |= HAMMER2_DIO_GOOD;
398 		if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
399 			if (orefs & HAMMER2_DIO_WAITING)
400 				wakeup(dio);
401 			break;
402 		}
403 		cpu_pause();
404 	}
405 
406 	/* XXX error handling */
407 	DIO_RECORD(dio HAMMER2_IO_DEBUG_CALL);
408 
409 	return dio;
410 }
411 
412 /*
413  * Release our ref on *diop.
414  *
415  * On the 1->0 transition we clear DIO_GOOD, set DIO_INPROG, and dispose
416  * of dio->bp.  Then we clean up DIO_INPROG and DIO_WAITING.
417  */
418 void
419 _hammer2_io_putblk(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS)
420 {
421 	hammer2_dev_t *hmp;
422 	hammer2_io_t *dio;
423 	struct buf *bp;
424 	off_t pbase;
425 	int psize;
426 	int dio_limit;
427 	uint64_t orefs;
428 	uint64_t nrefs;
429 
430 	dio = *diop;
431 	*diop = NULL;
432 	hmp = dio->hmp;
433 	DIO_RECORD(dio HAMMER2_IO_DEBUG_CALL);
434 
435 	KKASSERT((dio->refs & HAMMER2_DIO_MASK) != 0);
436 
437 	/*
438 	 * Drop refs.
439 	 *
440 	 * On the 1->0 transition clear GOOD and set INPROG, and break.
441 	 * On any other transition we can return early.
442 	 */
443 	for (;;) {
444 		orefs = dio->refs;
445 		cpu_ccfence();
446 
447 		if ((orefs & HAMMER2_DIO_MASK) == 1 &&
448 		    (orefs & HAMMER2_DIO_INPROG) == 0) {
449 			/*
450 			 * Lastdrop case, INPROG can be set.  GOOD must be
451 			 * cleared to prevent the getblk shortcut.
452 			 */
453 			nrefs = orefs - 1;
454 			nrefs &= ~(HAMMER2_DIO_GOOD | HAMMER2_DIO_DIRTY);
455 			nrefs |= HAMMER2_DIO_INPROG;
456 			if (atomic_cmpset_64(&dio->refs, orefs, nrefs))
457 				break;
458 		} else if ((orefs & HAMMER2_DIO_MASK) == 1) {
459 			/*
460 			 * Lastdrop case, INPROG already set.  We must
461 			 * wait for INPROG to clear.
462 			 */
463 			nrefs = orefs | HAMMER2_DIO_WAITING;
464 			tsleep_interlock(dio, 0);
465 			if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
466 				tsleep(dio, PINTERLOCKED, "h2dio", hz);
467 			}
468 			/* retry */
469 		} else {
470 			/*
471 			 * Normal drop case.
472 			 */
473 			nrefs = orefs - 1;
474 			if (atomic_cmpset_64(&dio->refs, orefs, nrefs))
475 				return;
476 			/* retry */
477 		}
478 		cpu_pause();
479 		/* retry */
480 	}
481 
482 	/*
483 	 * Lastdrop (1->0 transition).  INPROG has been set, GOOD and DIRTY
484 	 * have been cleared.  iofree_count has not yet been incremented,
485 	 * note that another accessor race will decrement iofree_count so
486 	 * we have to increment it regardless.
487 	 *
488 	 * We can now dispose of the buffer, and should do it before calling
489 	 * io_complete() in case there's a race against a new reference
490 	 * which causes io_complete() to chain and instantiate the bp again.
491 	 */
492 	pbase = dio->pbase;
493 	psize = dio->psize;
494 	bp = dio->bp;
495 	dio->bp = NULL;
496 
497 	if ((orefs & HAMMER2_DIO_GOOD) && bp) {
498 		/*
499 		 * Non-errored disposal of bp
500 		 */
501 		if (orefs & HAMMER2_DIO_DIRTY) {
502 			dio_write_stats_update(dio, bp);
503 
504 			/*
505 			 * Allows dirty buffers to accumulate and
506 			 * possibly be canceled (e.g. by a 'rm'),
507 			 * by default we will burst-write later.
508 			 *
509 			 * We generally do NOT want to issue an actual
510 			 * b[a]write() or cluster_write() here.  Due to
511 			 * the way chains are locked, buffers may be cycled
512 			 * in and out quite often and disposal here can cause
513 			 * multiple writes or write-read stalls.
514 			 *
515 			 * If FLUSH is set we do want to issue the actual
516 			 * write.  This typically occurs in the write-behind
517 			 * case when writing to large files.
518 			 */
519 			off_t peof;
520 			int hce;
521 			if (dio->refs & HAMMER2_DIO_FLUSH) {
522 				if ((hce = hammer2_cluster_write) != 0) {
523 					peof = (pbase + HAMMER2_SEGMASK64) &
524 					       ~HAMMER2_SEGMASK64;
525 					bp->b_flags |= B_CLUSTEROK;
526 					cluster_write(bp, peof, psize, hce);
527 				} else {
528 					bp->b_flags &= ~B_CLUSTEROK;
529 					bawrite(bp);
530 				}
531 			} else {
532 				bp->b_flags &= ~B_CLUSTEROK;
533 				bdwrite(bp);
534 			}
535 		} else if (bp->b_flags & (B_ERROR | B_INVAL | B_RELBUF)) {
536 			brelse(bp);
537 		} else {
538 			bqrelse(bp);
539 		}
540 	} else if (bp) {
541 		/*
542 		 * Errored disposal of bp
543 		 */
544 		brelse(bp);
545 	}
546 
547 	/*
548 	 * Update iofree_count before disposing of the dio
549 	 */
550 	hmp = dio->hmp;
551 	atomic_add_int(&hmp->iofree_count, 1);
552 
553 	/*
554 	 * Clear INPROG, GOOD, and WAITING (GOOD should already be clear).
555 	 *
556 	 * Also clear FLUSH as it was handled above.
557 	 */
558 	for (;;) {
559 		orefs = dio->refs;
560 		cpu_ccfence();
561 		nrefs = orefs & ~(HAMMER2_DIO_INPROG | HAMMER2_DIO_GOOD |
562 				  HAMMER2_DIO_WAITING | HAMMER2_DIO_FLUSH);
563 		if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
564 			if (orefs & HAMMER2_DIO_WAITING)
565 				wakeup(dio);
566 			break;
567 		}
568 		cpu_pause();
569 	}
570 
571 	/*
572 	 * We cache free buffers so re-use cases can use a shared lock, but
573 	 * if too many build up we have to clean them out.
574 	 */
575 	dio_limit = hammer2_dio_limit;
576 	if (dio_limit < 256)
577 		dio_limit = 256;
578 	if (dio_limit > 1024*1024)
579 		dio_limit = 1024*1024;
580 	if (hmp->iofree_count > dio_limit) {
581 		struct hammer2_cleanupcb_info info;
582 
583 		RB_INIT(&info.tmptree);
584 		hammer2_spin_ex(&hmp->io_spin);
585 		if (hmp->iofree_count > dio_limit) {
586 			info.count = hmp->iofree_count / 5;
587 			RB_SCAN(hammer2_io_tree, &hmp->iotree, NULL,
588 				hammer2_io_cleanup_callback, &info);
589 		}
590 		hammer2_spin_unex(&hmp->io_spin);
591 		hammer2_io_cleanup(hmp, &info.tmptree);
592 	}
593 }
594 
595 /*
596  * Cleanup any dio's with (INPROG | refs) == 0.
597  *
598  * Called to clean up cached DIOs on umount after all activity has been
599  * flushed.
600  */
601 static
602 int
603 hammer2_io_cleanup_callback(hammer2_io_t *dio, void *arg)
604 {
605 	struct hammer2_cleanupcb_info *info = arg;
606 	hammer2_io_t *xio;
607 
608 	if ((dio->refs & (HAMMER2_DIO_MASK | HAMMER2_DIO_INPROG)) == 0) {
609 		if (dio->act > 0) {
610 			int act;
611 
612 			act = dio->act - (ticks - dio->ticks) / hz - 1;
613 			if (act > 0) {
614 				dio->act = act;
615 				return 0;
616 			}
617 			dio->act = 0;
618 		}
619 		KKASSERT(dio->bp == NULL);
620 		if (info->count > 0) {
621 			RB_REMOVE(hammer2_io_tree, &dio->hmp->iotree, dio);
622 			xio = RB_INSERT(hammer2_io_tree, &info->tmptree, dio);
623 			KKASSERT(xio == NULL);
624 			--info->count;
625 		}
626 	}
627 	return 0;
628 }
629 
630 void
631 hammer2_io_cleanup(hammer2_dev_t *hmp, struct hammer2_io_tree *tree)
632 {
633 	hammer2_io_t *dio;
634 
635 	while ((dio = RB_ROOT(tree)) != NULL) {
636 		RB_REMOVE(hammer2_io_tree, tree, dio);
637 		KKASSERT(dio->bp == NULL &&
638 		    (dio->refs & (HAMMER2_DIO_MASK | HAMMER2_DIO_INPROG)) == 0);
639 		if (dio->refs & HAMMER2_DIO_DIRTY) {
640 			kprintf("hammer2_io_cleanup: Dirty buffer "
641 				"%016jx/%d (bp=%p)\n",
642 				dio->pbase, dio->psize, dio->bp);
643 		}
644 		kfree(dio, M_HAMMER2);
645 		atomic_add_int(&hammer2_dio_count, -1);
646 		atomic_add_int(&hmp->iofree_count, -1);
647 	}
648 }
649 
650 /*
651  * Returns a pointer to the requested data.
652  */
653 char *
654 hammer2_io_data(hammer2_io_t *dio, off_t lbase)
655 {
656 	struct buf *bp;
657 	int off;
658 
659 	bp = dio->bp;
660 	KKASSERT(bp != NULL);
661 	bkvasync(bp);
662 	off = (lbase & ~HAMMER2_OFF_MASK_RADIX) - bp->b_loffset;
663 	KKASSERT(off >= 0 && off < bp->b_bufsize);
664 	return(bp->b_data + off);
665 }
666 
667 int
668 hammer2_io_new(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize,
669 	       hammer2_io_t **diop)
670 {
671 	*diop = hammer2_io_getblk(hmp, btype, lbase, lsize, HAMMER2_DOP_NEW);
672 	return ((*diop)->error);
673 }
674 
675 int
676 hammer2_io_newnz(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize,
677 		 hammer2_io_t **diop)
678 {
679 	*diop = hammer2_io_getblk(hmp, btype, lbase, lsize, HAMMER2_DOP_NEWNZ);
680 	return ((*diop)->error);
681 }
682 
683 int
684 _hammer2_io_bread(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize,
685 		hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS)
686 {
687 #ifdef HAMMER2_IO_DEBUG
688 	hammer2_io_t *dio;
689 #endif
690 
691 	*diop = _hammer2_io_getblk(hmp, btype, lbase, lsize,
692 				   HAMMER2_DOP_READ HAMMER2_IO_DEBUG_CALL);
693 #ifdef HAMMER2_IO_DEBUG
694 	if ((dio = *diop) != NULL) {
695 		int i = (dio->debug_index - 1) & HAMMER2_IO_DEBUG_MASK;
696 		dio->debug_data[i] = debug_data;
697 	}
698 #endif
699 	return ((*diop)->error);
700 }
701 
702 hammer2_io_t *
703 _hammer2_io_getquick(hammer2_dev_t *hmp, off_t lbase,
704 		     int lsize HAMMER2_IO_DEBUG_ARGS)
705 {
706 	hammer2_io_t *dio;
707 
708 	dio = _hammer2_io_getblk(hmp, 0, lbase, lsize,
709 				 HAMMER2_DOP_READQ HAMMER2_IO_DEBUG_CALL);
710 	return dio;
711 }
712 
713 void
714 _hammer2_io_bawrite(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS)
715 {
716 	atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY |
717 				      HAMMER2_DIO_FLUSH);
718 	_hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL);
719 }
720 
721 void
722 _hammer2_io_bdwrite(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS)
723 {
724 	atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY);
725 	_hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL);
726 }
727 
728 int
729 _hammer2_io_bwrite(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS)
730 {
731 	atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY |
732 				      HAMMER2_DIO_FLUSH);
733 	_hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL);
734 	return (0);	/* XXX */
735 }
736 
737 void
738 hammer2_io_setdirty(hammer2_io_t *dio)
739 {
740 	atomic_set_64(&dio->refs, HAMMER2_DIO_DIRTY);
741 }
742 
743 /*
744  * This routine is called when a MODIFIED chain is being DESTROYED,
745  * in an attempt to allow the related buffer cache buffer to be
746  * invalidated and discarded instead of flushing it to disk.
747  *
748  * At the moment this case is only really useful for file meta-data.
749  * File data is already handled via the logical buffer cache associated
750  * with the vnode, and will be discarded if it was never flushed to disk.
751  * File meta-data may include inodes, directory entries, and indirect blocks.
752  *
753  * XXX
754  * However, our DIO buffers are PBUFSIZE'd (64KB), and the area being
755  * invalidated might be smaller.  Most of the meta-data structures above
756  * are in the 'smaller' category.  For now, don't try to invalidate the
757  * data areas.
758  */
759 void
760 hammer2_io_inval(hammer2_io_t *dio, hammer2_off_t data_off, u_int bytes)
761 {
762 	/* NOP */
763 }
764 
765 void
766 _hammer2_io_brelse(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS)
767 {
768 	_hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL);
769 }
770 
771 void
772 _hammer2_io_bqrelse(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS)
773 {
774 	_hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL);
775 }
776 
777 /*
778  * Set dedup validation bits in a DIO.  We do not need the buffer cache
779  * buffer for this.  This must be done concurrent with setting bits in
780  * the freemap so as to interlock with bulkfree's clearing of those bits.
781  */
782 void
783 hammer2_io_dedup_set(hammer2_dev_t *hmp, hammer2_blockref_t *bref)
784 {
785 	hammer2_io_t *dio;
786 	uint64_t mask;
787 	int lsize;
788 	int isgood;
789 
790 	dio = hammer2_io_alloc(hmp, bref->data_off, bref->type, 1, &isgood);
791 	if ((int)(bref->data_off & HAMMER2_OFF_MASK_RADIX))
792 		lsize = 1 << (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX);
793 	else
794 		lsize = 0;
795 	mask = hammer2_dedup_mask(dio, bref->data_off, lsize);
796 	atomic_clear_64(&dio->dedup_valid, mask);
797 	atomic_set_64(&dio->dedup_alloc, mask);
798 	hammer2_io_putblk(&dio);
799 }
800 
801 /*
802  * Clear dedup validation bits in a DIO.  This is typically done when
803  * a modified chain is destroyed or by the bulkfree code.  No buffer
804  * is needed for this operation.  If the DIO no longer exists it is
805  * equivalent to the bits not being set.
806  */
807 void
808 hammer2_io_dedup_delete(hammer2_dev_t *hmp, uint8_t btype,
809 			hammer2_off_t data_off, u_int bytes)
810 {
811 	hammer2_io_t *dio;
812 	uint64_t mask;
813 	int isgood;
814 
815 	if ((data_off & ~HAMMER2_OFF_MASK_RADIX) == 0)
816 		return;
817 	if (btype != HAMMER2_BREF_TYPE_DATA)
818 		return;
819 	dio = hammer2_io_alloc(hmp, data_off, btype, 0, &isgood);
820 	if (dio) {
821 		if (data_off < dio->pbase ||
822 		    (data_off & ~HAMMER2_OFF_MASK_RADIX) + bytes >
823 		    dio->pbase + dio->psize) {
824 			panic("hammer2_io_dedup_delete: DATAOFF BAD "
825 			      "%016jx/%d %016jx\n",
826 			      data_off, bytes, dio->pbase);
827 		}
828 		mask = hammer2_dedup_mask(dio, data_off, bytes);
829 		atomic_clear_64(&dio->dedup_alloc, mask);
830 		atomic_clear_64(&dio->dedup_valid, mask);
831 		hammer2_io_putblk(&dio);
832 	}
833 }
834 
835 /*
836  * Assert that dedup allocation bits in a DIO are not set.  This operation
837  * does not require a buffer.  The DIO does not need to exist.
838  */
839 void
840 hammer2_io_dedup_assert(hammer2_dev_t *hmp, hammer2_off_t data_off, u_int bytes)
841 {
842 	hammer2_io_t *dio;
843 	int isgood;
844 
845 	dio = hammer2_io_alloc(hmp, data_off, HAMMER2_BREF_TYPE_DATA,
846 			       0, &isgood);
847 	if (dio) {
848 		KASSERT((dio->dedup_alloc &
849 			  hammer2_dedup_mask(dio, data_off, bytes)) == 0,
850 			("hammer2_dedup_assert: %016jx/%d %016jx/%016jx",
851 			data_off,
852 			bytes,
853 			hammer2_dedup_mask(dio, data_off, bytes),
854 			dio->dedup_alloc));
855 		hammer2_io_putblk(&dio);
856 	}
857 }
858 
859 static
860 void
861 dio_write_stats_update(hammer2_io_t *dio, struct buf *bp)
862 {
863 	if (bp->b_flags & B_DELWRI)
864 		return;
865 	hammer2_adjwritecounter(dio->btype, dio->psize);
866 }
867 
868 void
869 hammer2_io_bkvasync(hammer2_io_t *dio)
870 {
871 	KKASSERT(dio->bp != NULL);
872 	bkvasync(dio->bp);
873 }
874 
875 /*
876  * Ref a dio that is already owned
877  */
878 void
879 _hammer2_io_ref(hammer2_io_t *dio HAMMER2_IO_DEBUG_ARGS)
880 {
881 	DIO_RECORD(dio HAMMER2_IO_DEBUG_CALL);
882 	atomic_add_64(&dio->refs, 1);
883 }
884