1 /*
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 2022 Tomohiro Kusumi <tkusumi@netbsd.org>
5  * Copyright (c) 2011-2022 The DragonFly Project.  All rights reserved.
6  *
7  * This code is derived from software contributed to The DragonFly Project
8  * by Matthew Dillon <dillon@dragonflybsd.org>
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  *
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in
18  *    the documentation and/or other materials provided with the
19  *    distribution.
20  * 3. Neither the name of The DragonFly Project nor the names of its
21  *    contributors may be used to endorse or promote products derived
22  *    from this software without specific, prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
27  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
28  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
29  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
30  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
31  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
32  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
33  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
34  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  */
37 
38 #include "hammer2.h"
39 
40 #define HAMMER2_DOP_READ	1
41 #define HAMMER2_DOP_NEW		2
42 #define HAMMER2_DOP_NEWNZ	3
43 #define HAMMER2_DOP_READQ	4
44 
45 /*
46  * Implements an abstraction layer for synchronous and asynchronous
47  * buffered device I/O.  Can be used as an OS-abstraction but the main
48  * purpose is to allow larger buffers to be used against hammer2_chain's
49  * using smaller allocations, without causing deadlocks.
50  *
51  * The DIOs also record temporary state with limited persistence.  This
52  * feature is used to keep track of dedupable blocks.
53  */
54 static int hammer2_io_cleanup_callback(hammer2_io_t *dio, void *arg);
55 static void dio_write_stats_update(hammer2_io_t *dio, struct buf *bp);
56 
57 static int
58 hammer2_io_cmp(hammer2_io_t *io1, hammer2_io_t *io2)
59 {
60 	if (io1->pbase < io2->pbase)
61 		return(-1);
62 	if (io1->pbase > io2->pbase)
63 		return(1);
64 	return(0);
65 }
66 
67 RB_PROTOTYPE2(hammer2_io_tree, hammer2_io, rbnode, hammer2_io_cmp, off_t);
68 RB_GENERATE2(hammer2_io_tree, hammer2_io, rbnode, hammer2_io_cmp,
69 		off_t, pbase);
70 
71 struct hammer2_cleanupcb_info {
72 	struct hammer2_io_tree tmptree;
73 	int	count;
74 };
75 
76 #if 0
77 static __inline
78 uint64_t
79 hammer2_io_mask(hammer2_io_t *dio, hammer2_off_t off, u_int bytes)
80 {
81 	uint64_t mask;
82 	int i;
83 
84 	if (bytes < 1024)	/* smaller chunks not supported */
85 		return 0;
86 
87 	/*
88 	 * Calculate crc check mask for larger chunks
89 	 */
90 	i = (((off & ~HAMMER2_OFF_MASK_RADIX) - dio->pbase) &
91 	     HAMMER2_PBUFMASK) >> 10;
92 	if (i == 0 && bytes == HAMMER2_PBUFSIZE)
93 		return((uint64_t)-1);
94 	mask = ((uint64_t)1U << (bytes >> 10)) - 1;
95 	mask <<= i;
96 
97 	return mask;
98 }
99 #endif
100 
101 #ifdef HAMMER2_IO_DEBUG
102 
103 static __inline void
104 DIO_RECORD(hammer2_io_t *dio HAMMER2_IO_DEBUG_ARGS)
105 {
106 	int i;
107 
108 	i = atomic_fetchadd_int(&dio->debug_index, 1) & HAMMER2_IO_DEBUG_MASK;
109 
110 	dio->debug_file[i] = file;
111 	dio->debug_line[i] = line;
112 	dio->debug_refs[i] = dio->refs;
113 	dio->debug_td[i] = curthread;
114 }
115 
116 #else
117 
118 #define DIO_RECORD(dio)
119 
120 #endif
121 
122 /*
123  * Returns the DIO corresponding to the data|radix, creating it if necessary.
124  *
125  * If createit is 0, NULL can be returned indicating that the DIO does not
126  * exist.  (btype) is ignored when createit is 0.
127  */
128 static
129 hammer2_io_t *
130 hammer2_io_alloc(hammer2_dev_t *hmp, hammer2_key_t data_off, uint8_t btype,
131 		 int createit, int *isgoodp)
132 {
133 	hammer2_io_t *dio;
134 	hammer2_io_t *xio;
135 	hammer2_key_t lbase;
136 	hammer2_key_t pbase;
137 	hammer2_key_t pmask;
138 	hammer2_vfsvolume_t *vol;
139 	uint64_t refs;
140 	int lsize;
141 	int psize;
142 
143 	psize = HAMMER2_PBUFSIZE;
144 	pmask = ~(hammer2_off_t)(psize - 1);
145 	if ((int)(data_off & HAMMER2_OFF_MASK_RADIX))
146 		lsize = 1 << (int)(data_off & HAMMER2_OFF_MASK_RADIX);
147 	else
148 		lsize = 0;
149 	lbase = data_off & ~HAMMER2_OFF_MASK_RADIX;
150 	pbase = lbase & pmask;
151 
152 	if (pbase == 0 || ((lbase + lsize - 1) & pmask) != pbase) {
153 		kprintf("Illegal: %016jx %016jx+%08x / %016jx\n",
154 			pbase, lbase, lsize, pmask);
155 	}
156 	KKASSERT(pbase != 0 && ((lbase + lsize - 1) & pmask) == pbase);
157 	*isgoodp = 0;
158 
159 	/*
160 	 * Access/Allocate the DIO, bump dio->refs to prevent destruction.
161 	 *
162 	 * If DIO_GOOD is set the ref should prevent it from being cleared
163 	 * out from under us, we can set *isgoodp, and the caller can operate
164 	 * on the buffer without any further interaction.
165 	 */
166 	hammer2_spin_sh(&hmp->io_spin);
167 	dio = RB_LOOKUP(hammer2_io_tree, &hmp->iotree, pbase);
168 	if (dio) {
169 		refs = atomic_fetchadd_64(&dio->refs, 1);
170 		if ((refs & HAMMER2_DIO_MASK) == 0) {
171 			atomic_add_int(&dio->hmp->iofree_count, -1);
172 		}
173 		if (refs & HAMMER2_DIO_GOOD)
174 			*isgoodp = 1;
175 		hammer2_spin_unsh(&hmp->io_spin);
176 	} else if (createit) {
177 		refs = 0;
178 		hammer2_spin_unsh(&hmp->io_spin);
179 		vol = hammer2_get_volume(hmp, pbase);
180 		dio = kmalloc_obj(sizeof(*dio), hmp->mio, M_INTWAIT | M_ZERO);
181 		dio->hmp = hmp;
182 		dio->devvp = vol->dev->devvp;
183 		dio->dbase = vol->offset;
184 		KKASSERT((dio->dbase & HAMMER2_FREEMAP_LEVEL1_MASK) == 0);
185 		dio->pbase = pbase;
186 		dio->psize = psize;
187 		dio->btype = btype;
188 		dio->refs = refs + 1;
189 		dio->act = 5;
190 		hammer2_spin_ex(&hmp->io_spin);
191 		xio = RB_INSERT(hammer2_io_tree, &hmp->iotree, dio);
192 		if (xio == NULL) {
193 			atomic_add_int(&hammer2_dio_count, 1);
194 			hammer2_spin_unex(&hmp->io_spin);
195 		} else {
196 			refs = atomic_fetchadd_64(&xio->refs, 1);
197 			if ((refs & HAMMER2_DIO_MASK) == 0)
198 				atomic_add_int(&xio->hmp->iofree_count, -1);
199 			if (refs & HAMMER2_DIO_GOOD)
200 				*isgoodp = 1;
201 			hammer2_spin_unex(&hmp->io_spin);
202 			kfree_obj(dio, hmp->mio);
203 			dio = xio;
204 		}
205 	} else {
206 		hammer2_spin_unsh(&hmp->io_spin);
207 		return NULL;
208 	}
209 	dio->ticks = ticks;
210 	if (dio->act < 10)
211 		++dio->act;
212 
213 	return dio;
214 }
215 
216 /*
217  * Acquire the requested dio.  If DIO_GOOD is not set we must instantiate
218  * a buffer.  If set the buffer already exists and is good to go.
219  */
220 hammer2_io_t *
221 _hammer2_io_getblk(hammer2_dev_t *hmp, int btype, off_t lbase,
222 		   int lsize, int op HAMMER2_IO_DEBUG_ARGS)
223 {
224 	hammer2_io_t *dio;
225 	hammer2_off_t dev_pbase;
226 	//off_t peof;
227 	uint64_t orefs;
228 	uint64_t nrefs;
229 	int isgood;
230 	int error;
231 	int hce;
232 	//int bflags;
233 
234 	//bflags = ((btype == HAMMER2_BREF_TYPE_DATA) ? B_NOTMETA : 0);
235 	//bflags |= B_KVABIO;
236 
237 	KKASSERT((1 << (int)(lbase & HAMMER2_OFF_MASK_RADIX)) == lsize);
238 
239 	if (op == HAMMER2_DOP_READQ) {
240 		dio = hammer2_io_alloc(hmp, lbase, btype, 0, &isgood);
241 		if (dio == NULL)
242 			return NULL;
243 		op = HAMMER2_DOP_READ;
244 	} else {
245 		dio = hammer2_io_alloc(hmp, lbase, btype, 1, &isgood);
246 	}
247 
248 	for (;;) {
249 		orefs = dio->refs;
250 		cpu_ccfence();
251 
252 		/*
253 		 * Buffer is already good, handle the op and return.
254 		 */
255 		if (orefs & HAMMER2_DIO_GOOD) {
256 			if (isgood == 0)
257 				cpu_mfence();
258 			bkvasync(dio->bp);
259 
260 			switch(op) {
261 			case HAMMER2_DOP_NEW:
262 				bzero(hammer2_io_data(dio, lbase), lsize);
263 				/* fall through */
264 			case HAMMER2_DOP_NEWNZ:
265 				atomic_set_long(&dio->refs, HAMMER2_DIO_DIRTY);
266 				break;
267 			case HAMMER2_DOP_READ:
268 			default:
269 				/* nothing to do */
270 				break;
271 			}
272 			DIO_RECORD(dio HAMMER2_IO_DEBUG_CALL);
273 			return (dio);
274 		}
275 
276 		/*
277 		 * Try to own the DIO
278 		 */
279 		if (orefs & HAMMER2_DIO_INPROG) {
280 			nrefs = orefs | HAMMER2_DIO_WAITING;
281 			tsleep_interlock(dio, 0);
282 			if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
283 				tsleep(dio, PINTERLOCKED, "h2dio", hz);
284 			}
285 			/* retry */
286 		} else {
287 			nrefs = orefs | HAMMER2_DIO_INPROG;
288 			if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
289 				break;
290 			}
291 		}
292 	}
293 
294 	/*
295 	 * We break to here if GOOD is not set and we acquired INPROG for
296 	 * the I/O.
297 	 */
298 	KKASSERT(dio->bp == NULL);
299 	if (btype == HAMMER2_BREF_TYPE_DATA)
300 		hce = hammer2_cluster_data_read;
301 	else
302 		hce = hammer2_cluster_meta_read;
303 
304 	error = 0;
305 	dev_pbase = dio->pbase - dio->dbase;
306 	if (dio->pbase == (lbase & ~HAMMER2_OFF_MASK_RADIX) &&
307 	    dio->psize == lsize) {
308 		switch(op) {
309 		case HAMMER2_DOP_NEW:
310 		case HAMMER2_DOP_NEWNZ:
311 			dio->bp = getblkx(dio->devvp,
312 					 dev_pbase, dio->psize,
313 					 GETBLK_KVABIO, 0);
314 			if (op == HAMMER2_DOP_NEW) {
315 				bkvasync(dio->bp);
316 				bzero(dio->bp->b_data, dio->psize);
317 			}
318 			atomic_set_long(&dio->refs, HAMMER2_DIO_DIRTY);
319 			break;
320 		case HAMMER2_DOP_READ:
321 		default:
322 			KKASSERT(dio->bp == NULL);
323 #if 0
324 			if (hce > 0) {
325 				/*
326 				 * Synchronous cluster I/O for now.
327 				 */
328 				peof = (dio->pbase + HAMMER2_SEGMASK64) &
329 				       ~HAMMER2_SEGMASK64;
330 				peof -= dio->dbase;
331 				error = cluster_readx(dio->devvp,
332 						     peof, dev_pbase,
333 						     dio->psize, bflags,
334 						     dio->psize,
335 						     HAMMER2_PBUFSIZE*hce,
336 						     &dio->bp);
337 			} else {
338 				error = breadnx(dio->devvp, dev_pbase,
339 						dio->psize, bflags,
340 					        NULL, NULL, 0, &dio->bp);
341 			}
342 #else
343 			error = breadx(dio->devvp, dev_pbase, dio->psize, &dio->bp);
344 #endif
345 			break;
346 		}
347 	} else {
348 #if 0
349 		if (hce > 0) {
350 			/*
351 			 * Synchronous cluster I/O for now.
352 			 */
353 			peof = (dio->pbase + HAMMER2_SEGMASK64) &
354 			       ~HAMMER2_SEGMASK64;
355 			peof -= dio->dbase;
356 			error = cluster_readx(dio->devvp,
357 					      peof, dev_pbase, dio->psize,
358 					      bflags,
359 					      dio->psize, HAMMER2_PBUFSIZE*hce,
360 					      &dio->bp);
361 		} else {
362 			error = breadnx(dio->devvp, dev_pbase,
363 				        dio->psize, bflags,
364 					NULL, NULL, 0, &dio->bp);
365 		}
366 #else
367 		error = breadx(dio->devvp, dev_pbase, dio->psize, &dio->bp);
368 #endif
369 		if (dio->bp) {
370 			/*
371 			 * Handle NEW flags
372 			 */
373 			switch(op) {
374 			case HAMMER2_DOP_NEW:
375 				bkvasync(dio->bp);
376 				bzero(hammer2_io_data(dio, lbase), lsize);
377 				/* fall through */
378 			case HAMMER2_DOP_NEWNZ:
379 				atomic_set_long(&dio->refs, HAMMER2_DIO_DIRTY);
380 				break;
381 			case HAMMER2_DOP_READ:
382 			default:
383 				break;
384 			}
385 
386 			/*
387 			 * Tell the kernel that the buffer cache is not
388 			 * meta-data based on the btype.  This allows
389 			 * swapcache to distinguish between data and
390 			 * meta-data.
391 			 */
392 			switch(btype) {
393 			case HAMMER2_BREF_TYPE_DATA:
394 				//dio->bp->b_flags |= B_NOTMETA;
395 				break;
396 			default:
397 				break;
398 			}
399 		}
400 	}
401 
402 	if (dio->bp) {
403 		bkvasync(dio->bp);
404 		BUF_KERNPROC(dio->bp);
405 		//dio->bp->b_flags &= ~B_AGE;
406 		/* dio->bp->b_debug_info2 = dio; */
407 	}
408 	dio->error = error;
409 
410 	/*
411 	 * Clear INPROG and WAITING, set GOOD wake up anyone waiting.
412 	 */
413 	for (;;) {
414 		orefs = dio->refs;
415 		cpu_ccfence();
416 		nrefs = orefs & ~(HAMMER2_DIO_INPROG | HAMMER2_DIO_WAITING);
417 		if (error == 0)
418 			nrefs |= HAMMER2_DIO_GOOD;
419 		if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
420 			if (orefs & HAMMER2_DIO_WAITING)
421 				wakeup(dio);
422 			break;
423 		}
424 		cpu_pause();
425 	}
426 
427 	/* XXX error handling */
428 	DIO_RECORD(dio HAMMER2_IO_DEBUG_CALL);
429 
430 	return dio;
431 }
432 
433 /*
434  * Release our ref on *diop.
435  *
436  * On the 1->0 transition we clear DIO_GOOD, set DIO_INPROG, and dispose
437  * of dio->bp.  Then we clean up DIO_INPROG and DIO_WAITING.
438  */
439 void
440 _hammer2_io_putblk(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS)
441 {
442 	hammer2_dev_t *hmp;
443 	hammer2_io_t *dio;
444 	struct buf *bp;
445 	off_t pbase;
446 	int psize;
447 	int dio_limit;
448 	uint64_t orefs;
449 	uint64_t nrefs;
450 
451 	dio = *diop;
452 	*diop = NULL;
453 	hmp = dio->hmp;
454 	DIO_RECORD(dio HAMMER2_IO_DEBUG_CALL);
455 
456 	KKASSERT((dio->refs & HAMMER2_DIO_MASK) != 0);
457 
458 	/*
459 	 * Drop refs.
460 	 *
461 	 * On the 1->0 transition clear GOOD and set INPROG, and break.
462 	 * On any other transition we can return early.
463 	 */
464 	for (;;) {
465 		orefs = dio->refs;
466 		cpu_ccfence();
467 
468 		if ((orefs & HAMMER2_DIO_MASK) == 1 &&
469 		    (orefs & HAMMER2_DIO_INPROG) == 0) {
470 			/*
471 			 * Lastdrop case, INPROG can be set.  GOOD must be
472 			 * cleared to prevent the getblk shortcut.
473 			 */
474 			nrefs = orefs - 1;
475 			nrefs &= ~(HAMMER2_DIO_GOOD | HAMMER2_DIO_DIRTY);
476 			nrefs |= HAMMER2_DIO_INPROG;
477 			if (atomic_cmpset_64(&dio->refs, orefs, nrefs))
478 				break;
479 		} else if ((orefs & HAMMER2_DIO_MASK) == 1) {
480 			/*
481 			 * Lastdrop case, INPROG already set.  We must
482 			 * wait for INPROG to clear.
483 			 */
484 			nrefs = orefs | HAMMER2_DIO_WAITING;
485 			tsleep_interlock(dio, 0);
486 			if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
487 				tsleep(dio, PINTERLOCKED, "h2dio", hz);
488 			}
489 			/* retry */
490 		} else {
491 			/*
492 			 * Normal drop case.
493 			 */
494 			nrefs = orefs - 1;
495 			if (atomic_cmpset_64(&dio->refs, orefs, nrefs))
496 				return;
497 			/* retry */
498 		}
499 		cpu_pause();
500 		/* retry */
501 	}
502 
503 	/*
504 	 * Lastdrop (1->0 transition).  INPROG has been set, GOOD and DIRTY
505 	 * have been cleared.  iofree_count has not yet been incremented,
506 	 * note that another accessor race will decrement iofree_count so
507 	 * we have to increment it regardless.
508 	 *
509 	 * We can now dispose of the buffer, and should do it before calling
510 	 * io_complete() in case there's a race against a new reference
511 	 * which causes io_complete() to chain and instantiate the bp again.
512 	 */
513 	pbase = dio->pbase;
514 	psize = dio->psize;
515 	bp = dio->bp;
516 	dio->bp = NULL;
517 
518 	if ((orefs & HAMMER2_DIO_GOOD) && bp) {
519 		/*
520 		 * Non-errored disposal of bp
521 		 */
522 		if (orefs & HAMMER2_DIO_DIRTY) {
523 			dio_write_stats_update(dio, bp);
524 
525 			/*
526 			 * Allows dirty buffers to accumulate and
527 			 * possibly be canceled (e.g. by a 'rm'),
528 			 * by default we will burst-write later.
529 			 *
530 			 * We generally do NOT want to issue an actual
531 			 * b[a]write() or cluster_write() here.  Due to
532 			 * the way chains are locked, buffers may be cycled
533 			 * in and out quite often and disposal here can cause
534 			 * multiple writes or write-read stalls.
535 			 *
536 			 * If FLUSH is set we do want to issue the actual
537 			 * write.  This typically occurs in the write-behind
538 			 * case when writing to large files.
539 			 */
540 			//off_t peof;
541 			//int hce;
542 			if (dio->refs & HAMMER2_DIO_FLUSH) {
543 #if 0
544 				if ((hce = hammer2_cluster_write) != 0) {
545 					peof = (pbase + HAMMER2_SEGMASK64) &
546 					       ~HAMMER2_SEGMASK64;
547 					peof -= dio->dbase;
548 					bp->b_flags |= B_CLUSTEROK;
549 					cluster_write(bp, peof, psize, hce);
550 				} else {
551 					bp->b_flags &= ~B_CLUSTEROK;
552 					bawrite(bp);
553 				}
554 #else
555 				bawrite(bp);
556 #endif
557 			} else {
558 				//bp->b_flags &= ~B_CLUSTEROK;
559 				bdwrite(bp);
560 			}
561 #if 0
562 		} else if (bp->b_flags & (B_ERROR | B_INVAL | B_RELBUF)) {
563 			brelse(bp);
564 #endif
565 		} else {
566 			bqrelse(bp);
567 		}
568 	} else if (bp) {
569 		/*
570 		 * Errored disposal of bp
571 		 */
572 		brelse(bp);
573 	}
574 
575 	/*
576 	 * Update iofree_count before disposing of the dio
577 	 */
578 	hmp = dio->hmp;
579 	atomic_add_int(&hmp->iofree_count, 1);
580 
581 	/*
582 	 * Clear INPROG, GOOD, and WAITING (GOOD should already be clear).
583 	 *
584 	 * Also clear FLUSH as it was handled above.
585 	 */
586 	for (;;) {
587 		orefs = dio->refs;
588 		cpu_ccfence();
589 		nrefs = orefs & ~(HAMMER2_DIO_INPROG | HAMMER2_DIO_GOOD |
590 				  HAMMER2_DIO_WAITING | HAMMER2_DIO_FLUSH);
591 		if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
592 			if (orefs & HAMMER2_DIO_WAITING)
593 				wakeup(dio);
594 			break;
595 		}
596 		cpu_pause();
597 	}
598 
599 	/*
600 	 * We cache free buffers so re-use cases can use a shared lock, but
601 	 * if too many build up we have to clean them out.
602 	 */
603 	dio_limit = hammer2_dio_limit;
604 	if (dio_limit < 256)
605 		dio_limit = 256;
606 	if (dio_limit > 1024*1024)
607 		dio_limit = 1024*1024;
608 	if (hmp->iofree_count > dio_limit) {
609 		struct hammer2_cleanupcb_info info;
610 
611 		RB_INIT(&info.tmptree);
612 		hammer2_spin_ex(&hmp->io_spin);
613 		if (hmp->iofree_count > dio_limit) {
614 			info.count = hmp->iofree_count / 5;
615 			RB_SCAN(hammer2_io_tree, &hmp->iotree, NULL,
616 				hammer2_io_cleanup_callback, &info);
617 		}
618 		hammer2_spin_unex(&hmp->io_spin);
619 		hammer2_io_cleanup(hmp, &info.tmptree);
620 	}
621 }
622 
623 /*
624  * Cleanup any dio's with (INPROG | refs) == 0.
625  *
626  * Called to clean up cached DIOs on umount after all activity has been
627  * flushed.
628  */
629 static
630 int
631 hammer2_io_cleanup_callback(hammer2_io_t *dio, void *arg)
632 {
633 	struct hammer2_cleanupcb_info *info = arg;
634 	hammer2_io_t *xio;
635 
636 	if ((dio->refs & (HAMMER2_DIO_MASK | HAMMER2_DIO_INPROG)) == 0) {
637 		/*
638 		if (dio->act > 0) {
639 			int act;
640 
641 			act = dio->act - (ticks - dio->ticks) / hz - 1;
642 			if (act > 0) {
643 				dio->act = act;
644 				return 0;
645 			}
646 			dio->act = 0;
647 		}
648 		*/
649 		KKASSERT(dio->bp == NULL);
650 		if (info->count > 0) {
651 			RB_REMOVE(hammer2_io_tree, &dio->hmp->iotree, dio);
652 			xio = RB_INSERT(hammer2_io_tree, &info->tmptree, dio);
653 			KKASSERT(xio == NULL);
654 			--info->count;
655 		}
656 	}
657 	return 0;
658 }
659 
660 void
661 hammer2_io_cleanup(hammer2_dev_t *hmp, struct hammer2_io_tree *tree)
662 {
663 	hammer2_io_t *dio;
664 
665 	while ((dio = RB_ROOT(tree)) != NULL) {
666 		RB_REMOVE(hammer2_io_tree, tree, dio);
667 		KKASSERT(dio->bp == NULL &&
668 		    (dio->refs & (HAMMER2_DIO_MASK | HAMMER2_DIO_INPROG)) == 0);
669 		if (dio->refs & HAMMER2_DIO_DIRTY) {
670 			kprintf("hammer2_io_cleanup: Dirty buffer "
671 				"%016jx/%d (bp=%p)\n",
672 				dio->pbase, dio->psize, dio->bp);
673 		}
674 		kfree_obj(dio, hmp->mio);
675 		atomic_add_int(&hammer2_dio_count, -1);
676 		atomic_add_int(&hmp->iofree_count, -1);
677 	}
678 }
679 
680 /*
681  * Returns a pointer to the requested data.
682  */
683 char *
684 hammer2_io_data(hammer2_io_t *dio, off_t lbase)
685 {
686 	struct buf *bp;
687 	int off;
688 
689 	bp = dio->bp;
690 	KKASSERT(bp != NULL);
691 	bkvasync(bp);
692 	lbase -= dio->dbase;
693 	off = (lbase & ~HAMMER2_OFF_MASK_RADIX) - bp->b_loffset;
694 	KKASSERT(off >= 0 && off < bp->b_bufsize);
695 	return(bp->b_data + off);
696 }
697 
698 int
699 hammer2_io_new(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize,
700 	       hammer2_io_t **diop)
701 {
702 	*diop = hammer2_io_getblk(hmp, btype, lbase, lsize, HAMMER2_DOP_NEW);
703 	return ((*diop)->error);
704 }
705 
706 int
707 hammer2_io_newnz(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize,
708 		 hammer2_io_t **diop)
709 {
710 	*diop = hammer2_io_getblk(hmp, btype, lbase, lsize, HAMMER2_DOP_NEWNZ);
711 	return ((*diop)->error);
712 }
713 
714 int
715 _hammer2_io_bread(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize,
716 		hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS)
717 {
718 #ifdef HAMMER2_IO_DEBUG
719 	hammer2_io_t *dio;
720 #endif
721 
722 	*diop = _hammer2_io_getblk(hmp, btype, lbase, lsize,
723 				   HAMMER2_DOP_READ HAMMER2_IO_DEBUG_CALL);
724 #ifdef HAMMER2_IO_DEBUG
725 	if ((dio = *diop) != NULL) {
726 #if 0
727 		int i = (dio->debug_index - 1) & HAMMER2_IO_DEBUG_MASK;
728 		dio->debug_data[i] = debug_data;
729 #endif
730 	}
731 #endif
732 	return ((*diop)->error);
733 }
734 
735 hammer2_io_t *
736 _hammer2_io_getquick(hammer2_dev_t *hmp, off_t lbase,
737 		     int lsize HAMMER2_IO_DEBUG_ARGS)
738 {
739 	hammer2_io_t *dio;
740 
741 	dio = _hammer2_io_getblk(hmp, 0, lbase, lsize,
742 				 HAMMER2_DOP_READQ HAMMER2_IO_DEBUG_CALL);
743 	return dio;
744 }
745 
746 void
747 _hammer2_io_bawrite(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS)
748 {
749 	atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY |
750 				      HAMMER2_DIO_FLUSH);
751 	_hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL);
752 }
753 
754 void
755 _hammer2_io_bdwrite(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS)
756 {
757 	atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY);
758 	_hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL);
759 }
760 
761 int
762 _hammer2_io_bwrite(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS)
763 {
764 	atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY |
765 				      HAMMER2_DIO_FLUSH);
766 	_hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL);
767 	return (0);	/* XXX */
768 }
769 
770 void
771 hammer2_io_setdirty(hammer2_io_t *dio)
772 {
773 	atomic_set_64(&dio->refs, HAMMER2_DIO_DIRTY);
774 }
775 
776 /*
777  * This routine is called when a MODIFIED chain is being DESTROYED,
778  * in an attempt to allow the related buffer cache buffer to be
779  * invalidated and discarded instead of flushing it to disk.
780  *
781  * At the moment this case is only really useful for file meta-data.
782  * File data is already handled via the logical buffer cache associated
783  * with the vnode, and will be discarded if it was never flushed to disk.
784  * File meta-data may include inodes, directory entries, and indirect blocks.
785  *
786  * XXX
787  * However, our DIO buffers are PBUFSIZE'd (64KB), and the area being
788  * invalidated might be smaller.  Most of the meta-data structures above
789  * are in the 'smaller' category.  For now, don't try to invalidate the
790  * data areas.
791  */
792 void
793 hammer2_io_inval(hammer2_io_t *dio, hammer2_off_t data_off, u_int bytes)
794 {
795 	/* NOP */
796 }
797 
798 void
799 _hammer2_io_brelse(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS)
800 {
801 	_hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL);
802 }
803 
804 void
805 _hammer2_io_bqrelse(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS)
806 {
807 	_hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL);
808 }
809 
810 /*
811  * Set dedup validation bits in a DIO.  We do not need the buffer cache
812  * buffer for this.  This must be done concurrent with setting bits in
813  * the freemap so as to interlock with bulkfree's clearing of those bits.
814  */
815 void
816 hammer2_io_dedup_set(hammer2_dev_t *hmp, hammer2_blockref_t *bref)
817 {
818 	hammer2_io_t *dio;
819 	uint64_t mask;
820 	int lsize;
821 	int isgood;
822 
823 	dio = hammer2_io_alloc(hmp, bref->data_off, bref->type, 1, &isgood);
824 	if ((int)(bref->data_off & HAMMER2_OFF_MASK_RADIX))
825 		lsize = 1 << (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX);
826 	else
827 		lsize = 0;
828 	mask = hammer2_dedup_mask(dio, bref->data_off, lsize);
829 	atomic_clear_64(&dio->dedup_valid, mask);
830 	atomic_set_64(&dio->dedup_alloc, mask);
831 	hammer2_io_putblk(&dio);
832 }
833 
834 /*
835  * Clear dedup validation bits in a DIO.  This is typically done when
836  * a modified chain is destroyed or by the bulkfree code.  No buffer
837  * is needed for this operation.  If the DIO no longer exists it is
838  * equivalent to the bits not being set.
839  */
840 void
841 hammer2_io_dedup_delete(hammer2_dev_t *hmp, uint8_t btype,
842 			hammer2_off_t data_off, u_int bytes)
843 {
844 	hammer2_io_t *dio;
845 	uint64_t mask;
846 	int isgood;
847 
848 	if ((data_off & ~HAMMER2_OFF_MASK_RADIX) == 0)
849 		return;
850 	if (btype != HAMMER2_BREF_TYPE_DATA)
851 		return;
852 	dio = hammer2_io_alloc(hmp, data_off, btype, 0, &isgood);
853 	if (dio) {
854 		if (data_off < dio->pbase ||
855 		    (data_off & ~HAMMER2_OFF_MASK_RADIX) + bytes >
856 		    dio->pbase + dio->psize) {
857 			panic("hammer2_io_dedup_delete: DATAOFF BAD "
858 			      "%016jx/%d %016jx\n",
859 			      data_off, bytes, dio->pbase);
860 		}
861 		mask = hammer2_dedup_mask(dio, data_off, bytes);
862 		atomic_clear_64(&dio->dedup_alloc, mask);
863 		atomic_clear_64(&dio->dedup_valid, mask);
864 		hammer2_io_putblk(&dio);
865 	}
866 }
867 
868 /*
869  * Assert that dedup allocation bits in a DIO are not set.  This operation
870  * does not require a buffer.  The DIO does not need to exist.
871  */
872 void
873 hammer2_io_dedup_assert(hammer2_dev_t *hmp, hammer2_off_t data_off, u_int bytes)
874 {
875 	hammer2_io_t *dio;
876 	int isgood;
877 
878 	dio = hammer2_io_alloc(hmp, data_off, HAMMER2_BREF_TYPE_DATA,
879 			       0, &isgood);
880 	if (dio) {
881 		KASSERT((dio->dedup_alloc &
882 			  hammer2_dedup_mask(dio, data_off, bytes)) == 0,
883 			("hammer2_dedup_assert: %016jx/%d %016jx/%016jx",
884 			data_off,
885 			bytes,
886 			hammer2_dedup_mask(dio, data_off, bytes),
887 			dio->dedup_alloc));
888 		hammer2_io_putblk(&dio);
889 	}
890 }
891 
892 static
893 void
894 dio_write_stats_update(hammer2_io_t *dio, struct buf *bp)
895 {
896 	/*
897 	if (bp->b_flags & B_DELWRI)
898 		return;
899 	*/
900 	hammer2_adjwritecounter(dio->btype, dio->psize);
901 }
902 
903 void
904 hammer2_io_bkvasync(hammer2_io_t *dio)
905 {
906 	KKASSERT(dio->bp != NULL);
907 	bkvasync(dio->bp);
908 }
909 
910 /*
911  * Ref a dio that is already owned
912  */
913 void
914 _hammer2_io_ref(hammer2_io_t *dio HAMMER2_IO_DEBUG_ARGS)
915 {
916 	DIO_RECORD(dio HAMMER2_IO_DEBUG_CALL);
917 	atomic_add_64(&dio->refs, 1);
918 }
919