xref: /dragonfly/sys/vfs/hammer2/hammer2_io.c (revision 7d84b73d)
1 /*
2  * Copyright (c) 2013-2023 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@dragonflybsd.org>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 #include "hammer2.h"
36 
37 #define HAMMER2_DOP_READ	1
38 #define HAMMER2_DOP_NEW		2
39 #define HAMMER2_DOP_NEWNZ	3
40 #define HAMMER2_DOP_READQ	4
41 
42 /*
43  * Implements an abstraction layer for synchronous and asynchronous
44  * buffered device I/O.  Can be used as an OS-abstraction but the main
45  * purpose is to allow larger buffers to be used against hammer2_chain's
46  * using smaller allocations, without causing deadlocks.
47  *
48  * The DIOs also record temporary state with limited persistence.  This
49  * feature is used to keep track of dedupable blocks.
50  */
51 static void dio_write_stats_update(hammer2_io_t *dio, struct buf *bp);
52 
53 static hammer2_io_t *hammer2_io_hash_lookup(hammer2_dev_t *hmp,
54 			hammer2_off_t pbase, uint64_t *refsp);
55 static hammer2_io_t *hammer2_io_hash_enter(hammer2_dev_t *hmp,
56 			hammer2_io_t *dio, uint64_t *refsp);
57 static void hammer2_io_hash_cleanup(hammer2_dev_t *hmp, int dio_limit);
58 
59 void
60 hammer2_io_hash_init(hammer2_dev_t *hmp)
61 {
62 	hammer2_io_hash_t *hash;
63 	int i;
64 
65 	for (i = 0; i < HAMMER2_IOHASH_SIZE; ++i) {
66 		hash = &hmp->iohash[i];
67 		hammer2_spin_init(&hash->spin, "h2iohash");
68 	}
69 }
70 
71 #ifdef HAMMER2_IO_DEBUG
72 
73 static __inline void
74 DIO_RECORD(hammer2_io_t *dio HAMMER2_IO_DEBUG_ARGS)
75 {
76 	int i;
77 
78 	i = atomic_fetchadd_int(&dio->debug_index, 1) & HAMMER2_IO_DEBUG_MASK;
79 
80 	dio->debug_file[i] = file;
81 	dio->debug_line[i] = line;
82 	dio->debug_refs[i] = dio->refs;
83 	dio->debug_td[i] = curthread;
84 }
85 
86 #else
87 
88 #define DIO_RECORD(dio)
89 
90 #endif
91 
92 /*
93  * Returns the DIO corresponding to the data|radix, creating it if necessary.
94  *
95  * If createit is 0, NULL can be returned indicating that the DIO does not
96  * exist.  (btype) is ignored when createit is 0.
97  */
98 static __inline
99 hammer2_io_t *
100 hammer2_io_alloc(hammer2_dev_t *hmp, hammer2_off_t data_off, uint8_t btype,
101 		 int createit, int *isgoodp)
102 {
103 	hammer2_io_t *dio;
104 	hammer2_io_t *xio;
105 	hammer2_off_t lbase;
106 	hammer2_off_t pbase;
107 	hammer2_off_t pmask;
108 	hammer2_volume_t *vol;
109 	uint64_t refs;
110 	int lsize;
111 	int psize;
112 
113 	psize = HAMMER2_PBUFSIZE;
114 	pmask = ~(hammer2_off_t)(psize - 1);
115 	if ((int)(data_off & HAMMER2_OFF_MASK_RADIX))
116 		lsize = 1 << (int)(data_off & HAMMER2_OFF_MASK_RADIX);
117 	else
118 		lsize = 0;
119 	lbase = data_off & ~HAMMER2_OFF_MASK_RADIX;
120 	pbase = lbase & pmask;
121 
122 	if (pbase == 0 || ((lbase + lsize - 1) & pmask) != pbase) {
123 		kprintf("Illegal: %016jx %016jx+%08x / %016jx\n",
124 			pbase, lbase, lsize, pmask);
125 	}
126 	KKASSERT(pbase != 0 && ((lbase + lsize - 1) & pmask) == pbase);
127 	*isgoodp = 0;
128 
129 	/*
130 	 * Access/Allocate the DIO, bump dio->refs to prevent destruction.
131 	 *
132 	 * If DIO_GOOD is set the ref should prevent it from being cleared
133 	 * out from under us, we can set *isgoodp, and the caller can operate
134 	 * on the buffer without any further interaction.
135 	 */
136 	dio = hammer2_io_hash_lookup(hmp, pbase, &refs);
137 	if (dio) {
138 		if (refs & HAMMER2_DIO_GOOD)
139 			*isgoodp = 1;
140 	} else if (createit) {
141 		refs = 0;
142 		vol = hammer2_get_volume(hmp, pbase);
143 		dio = kmalloc_obj(sizeof(*dio), hmp->mio, M_INTWAIT | M_ZERO);
144 		dio->hmp = hmp;
145 		dio->devvp = vol->dev->devvp;
146 		dio->dbase = vol->offset;
147 		KKASSERT((dio->dbase & HAMMER2_FREEMAP_LEVEL1_MASK) == 0);
148 		dio->pbase = pbase;
149 		dio->psize = psize;
150 		dio->btype = btype;
151 		dio->refs = refs + 1;
152 		dio->act = 5;
153 		xio = hammer2_io_hash_enter(hmp, dio, &refs);
154 		if (xio == NULL) {
155 			atomic_add_int(&hammer2_dio_count, 1);
156 		} else {
157 			if (refs & HAMMER2_DIO_GOOD)
158 				*isgoodp = 1;
159 			kfree_obj(dio, hmp->mio);
160 			dio = xio;
161 		}
162 	} else {
163 		return NULL;
164 	}
165 	dio->ticks = ticks;
166 	if (dio->act < 10)
167 		++dio->act;
168 
169 	return dio;
170 }
171 
172 /*
173  * Acquire the requested dio.  If DIO_GOOD is not set we must instantiate
174  * a buffer.  If set the buffer already exists and is good to go.
175  */
176 hammer2_io_t *
177 _hammer2_io_getblk(hammer2_dev_t *hmp, int btype, off_t lbase,
178 		   int lsize, int op HAMMER2_IO_DEBUG_ARGS)
179 {
180 	hammer2_io_t *dio;
181 	hammer2_off_t dev_pbase;
182 	off_t peof;
183 	uint64_t orefs;
184 	uint64_t nrefs;
185 	int isgood;
186 	int error;
187 	int hce;
188 	int bflags;
189 
190 	bflags = ((btype == HAMMER2_BREF_TYPE_DATA) ? B_NOTMETA : 0);
191 	bflags |= B_KVABIO;
192 
193 	KKASSERT((1 << (int)(lbase & HAMMER2_OFF_MASK_RADIX)) == lsize);
194 
195 	if (op == HAMMER2_DOP_READQ) {
196 		dio = hammer2_io_alloc(hmp, lbase, btype, 0, &isgood);
197 		if (dio == NULL)
198 			return NULL;
199 		op = HAMMER2_DOP_READ;
200 	} else {
201 		dio = hammer2_io_alloc(hmp, lbase, btype, 1, &isgood);
202 	}
203 
204 	for (;;) {
205 		orefs = dio->refs;
206 		cpu_ccfence();
207 
208 		/*
209 		 * Buffer is already good, handle the op and return.
210 		 */
211 		if (orefs & HAMMER2_DIO_GOOD) {
212 			if (isgood == 0)
213 				cpu_mfence();
214 			bkvasync(dio->bp);
215 
216 			switch(op) {
217 			case HAMMER2_DOP_NEW:
218 				bzero(hammer2_io_data(dio, lbase), lsize);
219 				/* fall through */
220 			case HAMMER2_DOP_NEWNZ:
221 				atomic_set_long(&dio->refs, HAMMER2_DIO_DIRTY);
222 				break;
223 			case HAMMER2_DOP_READ:
224 			default:
225 				/* nothing to do */
226 				break;
227 			}
228 			DIO_RECORD(dio HAMMER2_IO_DEBUG_CALL);
229 			return (dio);
230 		}
231 
232 		/*
233 		 * Try to own the DIO
234 		 */
235 		if (orefs & HAMMER2_DIO_INPROG) {
236 			nrefs = orefs | HAMMER2_DIO_WAITING;
237 			tsleep_interlock(dio, 0);
238 			if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
239 				tsleep(dio, PINTERLOCKED, "h2dio", hz);
240 			}
241 			/* retry */
242 		} else {
243 			nrefs = orefs | HAMMER2_DIO_INPROG;
244 			if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
245 				break;
246 			}
247 		}
248 	}
249 
250 	/*
251 	 * We break to here if GOOD is not set and we acquired INPROG for
252 	 * the I/O.
253 	 */
254 	KKASSERT(dio->bp == NULL);
255 	if (btype == HAMMER2_BREF_TYPE_DATA)
256 		hce = hammer2_cluster_data_read;
257 	else
258 		hce = hammer2_cluster_meta_read;
259 
260 	error = 0;
261 	dev_pbase = dio->pbase - dio->dbase;
262 	if (dio->pbase == (lbase & ~HAMMER2_OFF_MASK_RADIX) &&
263 	    dio->psize == lsize) {
264 		switch(op) {
265 		case HAMMER2_DOP_NEW:
266 		case HAMMER2_DOP_NEWNZ:
267 			dio->bp = getblk(dio->devvp,
268 					 dev_pbase, dio->psize,
269 					 GETBLK_KVABIO, 0);
270 			if (op == HAMMER2_DOP_NEW) {
271 				bkvasync(dio->bp);
272 				bzero(dio->bp->b_data, dio->psize);
273 			}
274 			atomic_set_long(&dio->refs, HAMMER2_DIO_DIRTY);
275 			break;
276 		case HAMMER2_DOP_READ:
277 		default:
278 			KKASSERT(dio->bp == NULL);
279 			if (hce > 0) {
280 				/*
281 				 * Synchronous cluster I/O for now.
282 				 */
283 				peof = (dio->pbase + HAMMER2_SEGMASK64) &
284 				       ~HAMMER2_SEGMASK64;
285 				peof -= dio->dbase;
286 				error = cluster_readx(dio->devvp,
287 						     peof, dev_pbase,
288 						     dio->psize, bflags,
289 						     dio->psize,
290 						     HAMMER2_PBUFSIZE*hce,
291 						     &dio->bp);
292 			} else {
293 				error = breadnx(dio->devvp, dev_pbase,
294 						dio->psize, bflags,
295 					        NULL, NULL, 0, &dio->bp);
296 			}
297 			break;
298 		}
299 	} else {
300 		if (hce > 0) {
301 			/*
302 			 * Synchronous cluster I/O for now.
303 			 */
304 			peof = (dio->pbase + HAMMER2_SEGMASK64) &
305 			       ~HAMMER2_SEGMASK64;
306 			peof -= dio->dbase;
307 			error = cluster_readx(dio->devvp,
308 					      peof, dev_pbase, dio->psize,
309 					      bflags,
310 					      dio->psize, HAMMER2_PBUFSIZE*hce,
311 					      &dio->bp);
312 		} else {
313 			error = breadnx(dio->devvp, dev_pbase,
314 				        dio->psize, bflags,
315 					NULL, NULL, 0, &dio->bp);
316 		}
317 		if (dio->bp) {
318 			/*
319 			 * Handle NEW flags
320 			 */
321 			switch(op) {
322 			case HAMMER2_DOP_NEW:
323 				bkvasync(dio->bp);
324 				bzero(hammer2_io_data(dio, lbase), lsize);
325 				/* fall through */
326 			case HAMMER2_DOP_NEWNZ:
327 				atomic_set_long(&dio->refs, HAMMER2_DIO_DIRTY);
328 				break;
329 			case HAMMER2_DOP_READ:
330 			default:
331 				break;
332 			}
333 
334 			/*
335 			 * Tell the kernel that the buffer cache is not
336 			 * meta-data based on the btype.  This allows
337 			 * swapcache to distinguish between data and
338 			 * meta-data.
339 			 */
340 			switch(btype) {
341 			case HAMMER2_BREF_TYPE_DATA:
342 				dio->bp->b_flags |= B_NOTMETA;
343 				break;
344 			default:
345 				break;
346 			}
347 		}
348 	}
349 
350 	if (dio->bp) {
351 		bkvasync(dio->bp);
352 		BUF_KERNPROC(dio->bp);
353 		dio->bp->b_flags &= ~B_AGE;
354 		/* dio->bp->b_debug_info2 = dio; */
355 	}
356 	dio->error = error;
357 
358 	/*
359 	 * Clear INPROG and WAITING, set GOOD wake up anyone waiting.
360 	 */
361 	for (;;) {
362 		orefs = dio->refs;
363 		cpu_ccfence();
364 		nrefs = orefs & ~(HAMMER2_DIO_INPROG | HAMMER2_DIO_WAITING);
365 		if (error == 0)
366 			nrefs |= HAMMER2_DIO_GOOD;
367 		if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
368 			if (orefs & HAMMER2_DIO_WAITING)
369 				wakeup(dio);
370 			break;
371 		}
372 		cpu_pause();
373 	}
374 
375 	/* XXX error handling */
376 	DIO_RECORD(dio HAMMER2_IO_DEBUG_CALL);
377 
378 	return dio;
379 }
380 
381 /*
382  * Release our ref on *diop.
383  *
384  * On the 1->0 transition we clear DIO_GOOD, set DIO_INPROG, and dispose
385  * of dio->bp.  Then we clean up DIO_INPROG and DIO_WAITING.
386  */
387 void
388 _hammer2_io_putblk(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS)
389 {
390 	hammer2_dev_t *hmp;
391 	hammer2_io_t *dio;
392 	struct buf *bp;
393 	off_t pbase;
394 	int psize;
395 	int dio_limit;
396 	uint64_t orefs;
397 	uint64_t nrefs;
398 
399 	dio = *diop;
400 	*diop = NULL;
401 	hmp = dio->hmp;
402 	DIO_RECORD(dio HAMMER2_IO_DEBUG_CALL);
403 
404 	KKASSERT((dio->refs & HAMMER2_DIO_MASK) != 0);
405 
406 	/*
407 	 * Drop refs.
408 	 *
409 	 * On the 1->0 transition clear GOOD and set INPROG, and break.
410 	 * On any other transition we can return early.
411 	 */
412 	for (;;) {
413 		orefs = dio->refs;
414 		cpu_ccfence();
415 
416 		if ((orefs & HAMMER2_DIO_MASK) == 1 &&
417 		    (orefs & HAMMER2_DIO_INPROG) == 0) {
418 			/*
419 			 * Lastdrop case, INPROG can be set.  GOOD must be
420 			 * cleared to prevent the getblk shortcut.
421 			 */
422 			nrefs = orefs - 1;
423 			nrefs &= ~(HAMMER2_DIO_GOOD | HAMMER2_DIO_DIRTY);
424 			nrefs |= HAMMER2_DIO_INPROG;
425 			if (atomic_cmpset_64(&dio->refs, orefs, nrefs))
426 				break;
427 		} else if ((orefs & HAMMER2_DIO_MASK) == 1) {
428 			/*
429 			 * Lastdrop case, INPROG already set.  We must
430 			 * wait for INPROG to clear.
431 			 */
432 			nrefs = orefs | HAMMER2_DIO_WAITING;
433 			tsleep_interlock(dio, 0);
434 			if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
435 				tsleep(dio, PINTERLOCKED, "h2dio", hz);
436 			}
437 			/* retry */
438 		} else {
439 			/*
440 			 * Normal drop case.
441 			 */
442 			nrefs = orefs - 1;
443 			if (atomic_cmpset_64(&dio->refs, orefs, nrefs))
444 				return;
445 			/* retry */
446 		}
447 		cpu_pause();
448 		/* retry */
449 	}
450 
451 	/*
452 	 * Lastdrop (1->0 transition).  INPROG has been set, GOOD and DIRTY
453 	 * have been cleared.  iofree_count has not yet been incremented,
454 	 * note that another accessor race will decrement iofree_count so
455 	 * we have to increment it regardless.
456 	 * We can now dispose of the buffer.
457 	 */
458 	pbase = dio->pbase;
459 	psize = dio->psize;
460 	bp = dio->bp;
461 	dio->bp = NULL;
462 
463 	if ((orefs & HAMMER2_DIO_GOOD) && bp) {
464 		/*
465 		 * Non-errored disposal of bp
466 		 */
467 		if (orefs & HAMMER2_DIO_DIRTY) {
468 			dio_write_stats_update(dio, bp);
469 
470 			/*
471 			 * Allows dirty buffers to accumulate and
472 			 * possibly be canceled (e.g. by a 'rm'),
473 			 * by default we will burst-write later.
474 			 *
475 			 * We generally do NOT want to issue an actual
476 			 * b[a]write() or cluster_write() here.  Due to
477 			 * the way chains are locked, buffers may be cycled
478 			 * in and out quite often and disposal here can cause
479 			 * multiple writes or write-read stalls.
480 			 *
481 			 * If FLUSH is set we do want to issue the actual
482 			 * write.  This typically occurs in the write-behind
483 			 * case when writing to large files.
484 			 */
485 			off_t peof;
486 			int hce;
487 			if (dio->refs & HAMMER2_DIO_FLUSH) {
488 				if ((hce = hammer2_cluster_write) != 0) {
489 					peof = (pbase + HAMMER2_SEGMASK64) &
490 					       ~HAMMER2_SEGMASK64;
491 					peof -= dio->dbase;
492 					bp->b_flags |= B_CLUSTEROK;
493 					cluster_write(bp, peof, psize, hce);
494 				} else {
495 					bp->b_flags &= ~B_CLUSTEROK;
496 					bawrite(bp);
497 				}
498 			} else {
499 				bp->b_flags &= ~B_CLUSTEROK;
500 				bdwrite(bp);
501 			}
502 		} else if (bp->b_flags & (B_ERROR | B_INVAL | B_RELBUF)) {
503 			brelse(bp);
504 		} else {
505 			bqrelse(bp);
506 		}
507 	} else if (bp) {
508 		/*
509 		 * Errored disposal of bp
510 		 */
511 		brelse(bp);
512 	}
513 
514 	/*
515 	 * Update iofree_count before disposing of the dio
516 	 */
517 	hmp = dio->hmp;
518 	atomic_add_int(&hmp->iofree_count, 1);
519 
520 	/*
521 	 * Clear INPROG, GOOD, and WAITING (GOOD should already be clear).
522 	 *
523 	 * Also clear FLUSH as it was handled above.
524 	 */
525 	for (;;) {
526 		orefs = dio->refs;
527 		cpu_ccfence();
528 		nrefs = orefs & ~(HAMMER2_DIO_INPROG | HAMMER2_DIO_GOOD |
529 				  HAMMER2_DIO_WAITING | HAMMER2_DIO_FLUSH);
530 		if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
531 			if (orefs & HAMMER2_DIO_WAITING)
532 				wakeup(dio);
533 			break;
534 		}
535 		cpu_pause();
536 	}
537 
538 	/*
539 	 * We cache free buffers so re-use cases can use a shared lock, but
540 	 * if too many build up we have to clean them out.
541 	 */
542 	dio_limit = hammer2_dio_limit;
543 	if (dio_limit < 256)
544 		dio_limit = 256;
545 	if (dio_limit > 1024*1024)
546 		dio_limit = 1024*1024;
547 	if (hmp->iofree_count > dio_limit)
548 		hammer2_io_hash_cleanup(hmp, dio_limit);
549 }
550 
551 /*
552  * Returns a pointer to the requested data.
553  */
554 char *
555 hammer2_io_data(hammer2_io_t *dio, off_t lbase)
556 {
557 	struct buf *bp;
558 	int off;
559 
560 	bp = dio->bp;
561 	KKASSERT(bp != NULL);
562 	bkvasync(bp);
563 	lbase -= dio->dbase;
564 	off = (lbase & ~HAMMER2_OFF_MASK_RADIX) - bp->b_loffset;
565 	KKASSERT(off >= 0 && off < bp->b_bufsize);
566 	return(bp->b_data + off);
567 }
568 
569 int
570 hammer2_io_new(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize,
571 	       hammer2_io_t **diop)
572 {
573 	*diop = hammer2_io_getblk(hmp, btype, lbase, lsize, HAMMER2_DOP_NEW);
574 	return ((*diop)->error);
575 }
576 
577 int
578 hammer2_io_newnz(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize,
579 		 hammer2_io_t **diop)
580 {
581 	*diop = hammer2_io_getblk(hmp, btype, lbase, lsize, HAMMER2_DOP_NEWNZ);
582 	return ((*diop)->error);
583 }
584 
585 int
586 _hammer2_io_bread(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize,
587 		hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS)
588 {
589 #ifdef HAMMER2_IO_DEBUG
590 	hammer2_io_t *dio;
591 #endif
592 
593 	*diop = _hammer2_io_getblk(hmp, btype, lbase, lsize,
594 				   HAMMER2_DOP_READ HAMMER2_IO_DEBUG_CALL);
595 #ifdef HAMMER2_IO_DEBUG
596 	if ((dio = *diop) != NULL) {
597 #if 0
598 		int i = (dio->debug_index - 1) & HAMMER2_IO_DEBUG_MASK;
599 		dio->debug_data[i] = debug_data;
600 #endif
601 	}
602 #endif
603 	return ((*diop)->error);
604 }
605 
606 hammer2_io_t *
607 _hammer2_io_getquick(hammer2_dev_t *hmp, off_t lbase,
608 		     int lsize HAMMER2_IO_DEBUG_ARGS)
609 {
610 	hammer2_io_t *dio;
611 
612 	dio = _hammer2_io_getblk(hmp, 0, lbase, lsize,
613 				 HAMMER2_DOP_READQ HAMMER2_IO_DEBUG_CALL);
614 	return dio;
615 }
616 
617 void
618 _hammer2_io_bawrite(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS)
619 {
620 	atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY |
621 				      HAMMER2_DIO_FLUSH);
622 	_hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL);
623 }
624 
625 void
626 _hammer2_io_bdwrite(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS)
627 {
628 	atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY);
629 	_hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL);
630 }
631 
632 int
633 _hammer2_io_bwrite(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS)
634 {
635 	atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY |
636 				      HAMMER2_DIO_FLUSH);
637 	_hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL);
638 	return (0);	/* XXX */
639 }
640 
641 void
642 hammer2_io_setdirty(hammer2_io_t *dio)
643 {
644 	atomic_set_64(&dio->refs, HAMMER2_DIO_DIRTY);
645 }
646 
647 /*
648  * This routine is called when a MODIFIED chain is being DESTROYED,
649  * in an attempt to allow the related buffer cache buffer to be
650  * invalidated and discarded instead of flushing it to disk.
651  *
652  * At the moment this case is only really useful for file meta-data.
653  * File data is already handled via the logical buffer cache associated
654  * with the vnode, and will be discarded if it was never flushed to disk.
655  * File meta-data may include inodes, directory entries, and indirect blocks.
656  *
657  * XXX
658  * However, our DIO buffers are PBUFSIZE'd (64KB), and the area being
659  * invalidated might be smaller.  Most of the meta-data structures above
660  * are in the 'smaller' category.  For now, don't try to invalidate the
661  * data areas.
662  */
663 void
664 hammer2_io_inval(hammer2_io_t *dio, hammer2_off_t data_off, u_int bytes)
665 {
666 	/* NOP */
667 }
668 
669 void
670 _hammer2_io_brelse(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS)
671 {
672 	_hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL);
673 }
674 
675 void
676 _hammer2_io_bqrelse(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS)
677 {
678 	_hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL);
679 }
680 
681 /*
682  * Set dedup validation bits in a DIO.  We do not need the buffer cache
683  * buffer for this.  This must be done concurrent with setting bits in
684  * the freemap so as to interlock with bulkfree's clearing of those bits.
685  */
686 void
687 hammer2_io_dedup_set(hammer2_dev_t *hmp, hammer2_blockref_t *bref)
688 {
689 	hammer2_io_t *dio;
690 	uint64_t mask;
691 	int lsize;
692 	int isgood;
693 
694 	dio = hammer2_io_alloc(hmp, bref->data_off, bref->type, 1, &isgood);
695 	if ((int)(bref->data_off & HAMMER2_OFF_MASK_RADIX))
696 		lsize = 1 << (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX);
697 	else
698 		lsize = 0;
699 	mask = hammer2_dedup_mask(dio, bref->data_off, lsize);
700 	atomic_clear_64(&dio->dedup_valid, mask);
701 	atomic_set_64(&dio->dedup_alloc, mask);
702 	hammer2_io_putblk(&dio);
703 }
704 
705 /*
706  * Clear dedup validation bits in a DIO.  This is typically done when
707  * a modified chain is destroyed or by the bulkfree code.  No buffer
708  * is needed for this operation.  If the DIO no longer exists it is
709  * equivalent to the bits not being set.
710  */
711 void
712 hammer2_io_dedup_delete(hammer2_dev_t *hmp, uint8_t btype,
713 			hammer2_off_t data_off, u_int bytes)
714 {
715 	hammer2_io_t *dio;
716 	uint64_t mask;
717 	int isgood;
718 
719 	if ((data_off & ~HAMMER2_OFF_MASK_RADIX) == 0)
720 		return;
721 	if (btype != HAMMER2_BREF_TYPE_DATA)
722 		return;
723 	dio = hammer2_io_alloc(hmp, data_off, btype, 0, &isgood);
724 	if (dio) {
725 		if (data_off < dio->pbase ||
726 		    (data_off & ~HAMMER2_OFF_MASK_RADIX) + bytes >
727 		    dio->pbase + dio->psize) {
728 			panic("hammer2_io_dedup_delete: DATAOFF BAD "
729 			      "%016jx/%d %016jx\n",
730 			      data_off, bytes, dio->pbase);
731 		}
732 		mask = hammer2_dedup_mask(dio, data_off, bytes);
733 		atomic_clear_64(&dio->dedup_alloc, mask);
734 		atomic_clear_64(&dio->dedup_valid, mask);
735 		hammer2_io_putblk(&dio);
736 	}
737 }
738 
739 /*
740  * Assert that dedup allocation bits in a DIO are not set.  This operation
741  * does not require a buffer.  The DIO does not need to exist.
742  */
743 void
744 hammer2_io_dedup_assert(hammer2_dev_t *hmp, hammer2_off_t data_off, u_int bytes)
745 {
746 	hammer2_io_t *dio;
747 	int isgood;
748 
749 	dio = hammer2_io_alloc(hmp, data_off, HAMMER2_BREF_TYPE_DATA,
750 			       0, &isgood);
751 	if (dio) {
752 		KASSERT((dio->dedup_alloc &
753 			  hammer2_dedup_mask(dio, data_off, bytes)) == 0,
754 			("hammer2_dedup_assert: %016jx/%d %016jx/%016jx",
755 			data_off,
756 			bytes,
757 			hammer2_dedup_mask(dio, data_off, bytes),
758 			dio->dedup_alloc));
759 		hammer2_io_putblk(&dio);
760 	}
761 }
762 
763 static
764 void
765 dio_write_stats_update(hammer2_io_t *dio, struct buf *bp)
766 {
767 	if (bp->b_flags & B_DELWRI)
768 		return;
769 	hammer2_adjwritecounter(dio->btype, dio->psize);
770 }
771 
772 void
773 hammer2_io_bkvasync(hammer2_io_t *dio)
774 {
775 	KKASSERT(dio->bp != NULL);
776 	bkvasync(dio->bp);
777 }
778 
779 /*
780  * Ref a dio that is already owned
781  */
782 void
783 _hammer2_io_ref(hammer2_io_t *dio HAMMER2_IO_DEBUG_ARGS)
784 {
785 	DIO_RECORD(dio HAMMER2_IO_DEBUG_CALL);
786 	atomic_add_64(&dio->refs, 1);
787 }
788 
789 static __inline hammer2_io_hash_t *
790 hammer2_io_hashv(hammer2_dev_t *hmp, hammer2_off_t pbase)
791 {
792 	int hv;
793 
794 	hv = (int)pbase + (int)(pbase >> 16);
795 	return (&hmp->iohash[hv & HAMMER2_IOHASH_MASK]);
796 }
797 
798 /*
799  * Lookup and reference the requested dio
800  */
801 static hammer2_io_t *
802 hammer2_io_hash_lookup(hammer2_dev_t *hmp, hammer2_off_t pbase, uint64_t *refsp)
803 {
804 	hammer2_io_hash_t *hash;
805 	hammer2_io_t *dio;
806 	uint64_t refs;
807 
808 	*refsp = 0;
809 	hash = hammer2_io_hashv(hmp, pbase);
810 	hammer2_spin_sh(&hash->spin);
811 	for (dio = hash->base; dio; dio = dio->next) {
812 		if (dio->pbase == pbase) {
813 			refs = atomic_fetchadd_64(&dio->refs, 1);
814 			if ((refs & HAMMER2_DIO_MASK) == 0)
815 				atomic_add_int(&dio->hmp->iofree_count, -1);
816 			*refsp = refs;
817 			break;
818 		}
819 	}
820 	hammer2_spin_unsh(&hash->spin);
821 
822 	return dio;
823 }
824 
825 /*
826  * Enter a dio into the hash.  If the pbase already exists in the hash,
827  * the xio in the hash is referenced and returned.  If dio is sucessfully
828  * entered into the hash, NULL is returned.
829  */
830 static hammer2_io_t *
831 hammer2_io_hash_enter(hammer2_dev_t *hmp, hammer2_io_t *dio, uint64_t *refsp)
832 {
833 	hammer2_io_t *xio;
834 	hammer2_io_t **xiop;
835 	hammer2_io_hash_t *hash;
836 	uint64_t refs;
837 
838 	*refsp = 0;
839 	hash = hammer2_io_hashv(hmp, dio->pbase);
840 	hammer2_spin_ex(&hash->spin);
841 	for (xiop = &hash->base; (xio = *xiop) != NULL; xiop = &xio->next) {
842 		if (xio->pbase == dio->pbase) {
843 			refs = atomic_fetchadd_64(&xio->refs, 1);
844 			if ((refs & HAMMER2_DIO_MASK) == 0)
845 				atomic_add_int(&xio->hmp->iofree_count, -1);
846 			*refsp = refs;
847 			goto done;
848 		}
849 	}
850 	dio->next = NULL;
851 	*xiop = dio;
852 done:
853 	hammer2_spin_unex(&hash->spin);
854 
855 	return xio;
856 }
857 
858 /*
859  * Clean out a limited number of freeable DIOs
860  */
861 static void
862 hammer2_io_hash_cleanup(hammer2_dev_t *hmp, int dio_limit)
863 {
864 	hammer2_io_hash_t *hash;
865 	hammer2_io_t *dio;
866 	hammer2_io_t **diop;
867 	hammer2_io_t **cleanapp;
868 	hammer2_io_t *cleanbase;
869 	int count;
870 	int maxscan;
871 	int i;
872 
873 	count = hmp->iofree_count - dio_limit + 32;
874 	if (count <= 0)
875 		return;
876 	cleanbase = NULL;
877 	cleanapp = &cleanbase;
878 
879 	i = hmp->io_iterator++;
880 	maxscan = HAMMER2_IOHASH_SIZE;
881 	while (count > 0 && maxscan--) {
882 		hash = &hmp->iohash[i & HAMMER2_IOHASH_MASK];
883 		hammer2_spin_ex(&hash->spin);
884 		diop = &hash->base;
885 		while ((dio = *diop) != NULL) {
886 			if ((dio->refs & (HAMMER2_DIO_MASK |
887 					  HAMMER2_DIO_INPROG)) != 0)
888 			{
889 				diop = &dio->next;
890 				continue;
891 			}
892 			if (dio->act > 0) {
893 				int act;
894 
895 				act = dio->act - (ticks - dio->ticks) / hz - 1;
896 				dio->act = (act < 0) ? 0 : act;
897 			}
898 			if (dio->act) {
899 				diop = &dio->next;
900 				continue;
901 			}
902 			KKASSERT(dio->bp == NULL);
903 			*diop = dio->next;
904 			dio->next = NULL;
905 			*cleanapp = dio;
906 			cleanapp = &dio->next;
907 			--count;
908 			/* diop remains unchanged */
909 			atomic_add_int(&hmp->iofree_count, -1);
910 		}
911 		hammer2_spin_unex(&hash->spin);
912 		i = hmp->io_iterator++;
913 	}
914 
915 	/*
916 	 * Get rid of dios on clean list without holding any locks
917 	 */
918 	while ((dio = cleanbase) != NULL) {
919 		cleanbase = dio->next;
920 		dio->next = NULL;
921 		KKASSERT(dio->bp == NULL &&
922 		    (dio->refs & (HAMMER2_DIO_MASK |
923 				  HAMMER2_DIO_INPROG)) == 0);
924 		if (dio->refs & HAMMER2_DIO_DIRTY) {
925 			kprintf("hammer2_io_cleanup: Dirty buffer "
926 				"%016jx/%d (bp=%p)\n",
927 				dio->pbase, dio->psize, dio->bp);
928 		}
929 		kfree_obj(dio, hmp->mio);
930 		atomic_add_int(&hammer2_dio_count, -1);
931 	}
932 }
933 
934 /*
935  * Destroy all DIOs associated with the media
936  */
937 void
938 hammer2_io_hash_cleanup_all(hammer2_dev_t *hmp)
939 {
940 	hammer2_io_hash_t *hash;
941 	hammer2_io_t *dio;
942 	int i;
943 
944 	for (i = 0; i < HAMMER2_IOHASH_SIZE; ++i) {
945 		hash = &hmp->iohash[i];
946 
947 		while ((dio = hash->base) != NULL) {
948 			hash->base = dio->next;
949 			dio->next = NULL;
950 			KKASSERT(dio->bp == NULL &&
951 			    (dio->refs & (HAMMER2_DIO_MASK |
952 					  HAMMER2_DIO_INPROG)) == 0);
953 			if (dio->refs & HAMMER2_DIO_DIRTY) {
954 				kprintf("hammer2_io_cleanup: Dirty buffer "
955 					"%016jx/%d (bp=%p)\n",
956 					dio->pbase, dio->psize, dio->bp);
957 			}
958 			kfree_obj(dio, hmp->mio);
959 			atomic_add_int(&hammer2_dio_count, -1);
960 			atomic_add_int(&hmp->iofree_count, -1);
961 		}
962 	}
963 }
964