1 /*
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 2022 Tomohiro Kusumi <tkusumi@netbsd.org>
5 * Copyright (c) 2013-2023 The DragonFly Project. All rights reserved.
6 *
7 * This code is derived from software contributed to The DragonFly Project
8 * by Matthew Dillon <dillon@dragonflybsd.org>
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 *
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in
18 * the documentation and/or other materials provided with the
19 * distribution.
20 * 3. Neither the name of The DragonFly Project nor the names of its
21 * contributors may be used to endorse or promote products derived
22 * from this software without specific, prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
27 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
28 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
29 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
30 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
31 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
32 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
33 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
34 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 */
37
38 #include "hammer2.h"
39
40 #define HAMMER2_DOP_READ 1
41 #define HAMMER2_DOP_NEW 2
42 #define HAMMER2_DOP_NEWNZ 3
43 #define HAMMER2_DOP_READQ 4
44
45 /*
46 * Implements an abstraction layer for synchronous and asynchronous
47 * buffered device I/O. Can be used as an OS-abstraction but the main
48 * purpose is to allow larger buffers to be used against hammer2_chain's
49 * using smaller allocations, without causing deadlocks.
50 *
51 * The DIOs also record temporary state with limited persistence. This
52 * feature is used to keep track of dedupable blocks.
53 */
54 static void dio_write_stats_update(hammer2_io_t *dio, struct m_buf *bp);
55
56 static hammer2_io_t *hammer2_io_hash_lookup(hammer2_dev_t *hmp,
57 hammer2_off_t pbase, uint64_t *refsp);
58 static hammer2_io_t *hammer2_io_hash_enter(hammer2_dev_t *hmp,
59 hammer2_io_t *dio, uint64_t *refsp);
60 static void hammer2_io_hash_cleanup(hammer2_dev_t *hmp, int dio_limit);
61
62 void
hammer2_io_hash_init(hammer2_dev_t * hmp)63 hammer2_io_hash_init(hammer2_dev_t *hmp)
64 {
65 hammer2_io_hash_t *hash;
66 int i;
67
68 for (i = 0; i < HAMMER2_IOHASH_SIZE; ++i) {
69 hash = &hmp->iohash[i];
70 hammer2_spin_init(&hash->spin, "h2iohash");
71 }
72 }
73
74 #ifdef HAMMER2_IO_DEBUG
75
76 static __inline void
DIO_RECORD(hammer2_io_t * dio HAMMER2_IO_DEBUG_ARGS)77 DIO_RECORD(hammer2_io_t *dio HAMMER2_IO_DEBUG_ARGS)
78 {
79 int i;
80
81 i = atomic_fetchadd_int(&dio->debug_index, 1) & HAMMER2_IO_DEBUG_MASK;
82
83 dio->debug_file[i] = file;
84 dio->debug_line[i] = line;
85 dio->debug_refs[i] = dio->refs;
86 dio->debug_td[i] = curthread;
87 }
88
89 #else
90
91 #define DIO_RECORD(dio)
92
93 #endif
94
95 /*
96 * Returns the DIO corresponding to the data|radix, creating it if necessary.
97 *
98 * If createit is 0, NULL can be returned indicating that the DIO does not
99 * exist. (btype) is ignored when createit is 0.
100 */
101 static
102 hammer2_io_t *
hammer2_io_alloc(hammer2_dev_t * hmp,hammer2_off_t data_off,uint8_t btype,int createit,int * isgoodp)103 hammer2_io_alloc(hammer2_dev_t *hmp, hammer2_off_t data_off, uint8_t btype,
104 int createit, int *isgoodp)
105 {
106 hammer2_io_t *dio;
107 hammer2_io_t *xio;
108 hammer2_off_t lbase;
109 hammer2_off_t pbase;
110 hammer2_off_t pmask;
111 hammer2_vfsvolume_t *vol;
112 uint64_t refs;
113 int lsize;
114 int psize;
115
116 psize = HAMMER2_PBUFSIZE;
117 pmask = ~(hammer2_off_t)(psize - 1);
118 if ((int)(data_off & HAMMER2_OFF_MASK_RADIX))
119 lsize = 1 << (int)(data_off & HAMMER2_OFF_MASK_RADIX);
120 else
121 lsize = 0;
122 lbase = data_off & ~HAMMER2_OFF_MASK_RADIX;
123 pbase = lbase & pmask;
124
125 if (pbase == 0 || ((lbase + lsize - 1) & pmask) != pbase) {
126 kprintf("Illegal: %016jx %016jx+%08x / %016jx\n",
127 pbase, lbase, lsize, pmask);
128 }
129 KKASSERT(pbase != 0 && ((lbase + lsize - 1) & pmask) == pbase);
130 *isgoodp = 0;
131
132 /*
133 * Access/Allocate the DIO, bump dio->refs to prevent destruction.
134 *
135 * If DIO_GOOD is set the ref should prevent it from being cleared
136 * out from under us, we can set *isgoodp, and the caller can operate
137 * on the buffer without any further interaction.
138 */
139 dio = hammer2_io_hash_lookup(hmp, pbase, &refs);
140 if (dio) {
141 if (refs & HAMMER2_DIO_GOOD)
142 *isgoodp = 1;
143 } else if (createit) {
144 refs = 0;
145 vol = hammer2_get_volume_from_hmp(hmp, pbase);
146 dio = kmalloc_obj(sizeof(*dio), hmp->mio, M_INTWAIT | M_ZERO);
147 dio->hmp = hmp;
148 dio->devvp = vol->dev->devvp;
149 dio->dbase = vol->offset;
150 KKASSERT((dio->dbase & HAMMER2_FREEMAP_LEVEL1_MASK) == 0);
151 dio->pbase = pbase;
152 dio->psize = psize;
153 dio->btype = btype;
154 dio->refs = refs + 1;
155 dio->act = 5;
156 xio = hammer2_io_hash_enter(hmp, dio, &refs);
157 if (xio == NULL) {
158 atomic_add_int(&hammer2_dio_count, 1);
159 } else {
160 if (refs & HAMMER2_DIO_GOOD)
161 *isgoodp = 1;
162 kfree_obj(dio, hmp->mio);
163 dio = xio;
164 }
165 } else {
166 return NULL;
167 }
168 dio->ticks = ticks;
169 if (dio->act < 10)
170 ++dio->act;
171
172 return dio;
173 }
174
175 /*
176 * Acquire the requested dio. If DIO_GOOD is not set we must instantiate
177 * a buffer. If set the buffer already exists and is good to go.
178 */
179 hammer2_io_t *
_hammer2_io_getblk(hammer2_dev_t * hmp,int btype,off_t lbase,int lsize,int op HAMMER2_IO_DEBUG_ARGS)180 _hammer2_io_getblk(hammer2_dev_t *hmp, int btype, off_t lbase,
181 int lsize, int op HAMMER2_IO_DEBUG_ARGS)
182 {
183 hammer2_io_t *dio;
184 hammer2_off_t dev_pbase;
185 //off_t peof;
186 uint64_t orefs;
187 uint64_t nrefs;
188 int isgood;
189 int error;
190 int hce;
191 //int bflags;
192
193 //bflags = ((btype == HAMMER2_BREF_TYPE_DATA) ? B_NOTMETA : 0);
194 //bflags |= B_KVABIO;
195
196 KKASSERT((1 << (int)(lbase & HAMMER2_OFF_MASK_RADIX)) == lsize);
197
198 if (op == HAMMER2_DOP_READQ) {
199 dio = hammer2_io_alloc(hmp, lbase, btype, 0, &isgood);
200 if (dio == NULL)
201 return NULL;
202 op = HAMMER2_DOP_READ;
203 } else {
204 dio = hammer2_io_alloc(hmp, lbase, btype, 1, &isgood);
205 }
206
207 for (;;) {
208 orefs = dio->refs;
209 cpu_ccfence();
210
211 /*
212 * Buffer is already good, handle the op and return.
213 */
214 if (orefs & HAMMER2_DIO_GOOD) {
215 if (isgood == 0)
216 cpu_mfence();
217 bkvasync(dio->bp);
218
219 switch(op) {
220 case HAMMER2_DOP_NEW:
221 bzero(hammer2_io_data(dio, lbase), lsize);
222 /* fall through */
223 case HAMMER2_DOP_NEWNZ:
224 atomic_set_long(&dio->refs, HAMMER2_DIO_DIRTY);
225 break;
226 case HAMMER2_DOP_READ:
227 default:
228 /* nothing to do */
229 break;
230 }
231 DIO_RECORD(dio HAMMER2_IO_DEBUG_CALL);
232 return (dio);
233 }
234
235 /*
236 * Try to own the DIO
237 */
238 if (orefs & HAMMER2_DIO_INPROG) {
239 nrefs = orefs | HAMMER2_DIO_WAITING;
240 tsleep_interlock(dio, 0);
241 if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
242 tsleep(dio, PINTERLOCKED, "h2dio", hz);
243 }
244 /* retry */
245 } else {
246 nrefs = orefs | HAMMER2_DIO_INPROG;
247 if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
248 break;
249 }
250 }
251 }
252
253 /*
254 * We break to here if GOOD is not set and we acquired INPROG for
255 * the I/O.
256 */
257 KKASSERT(dio->bp == NULL);
258 if (btype == HAMMER2_BREF_TYPE_DATA)
259 hce = hammer2_cluster_data_read;
260 else
261 hce = hammer2_cluster_meta_read;
262
263 error = 0;
264 dev_pbase = dio->pbase - dio->dbase;
265 if (dio->pbase == (lbase & ~HAMMER2_OFF_MASK_RADIX) &&
266 dio->psize == lsize) {
267 switch(op) {
268 case HAMMER2_DOP_NEW:
269 case HAMMER2_DOP_NEWNZ:
270 dio->bp = getblkx(dio->devvp,
271 dev_pbase, dio->psize,
272 GETBLK_KVABIO, 0);
273 if (op == HAMMER2_DOP_NEW) {
274 bkvasync(dio->bp);
275 bzero(dio->bp->b_data, dio->psize);
276 }
277 atomic_set_long(&dio->refs, HAMMER2_DIO_DIRTY);
278 break;
279 case HAMMER2_DOP_READ:
280 default:
281 KKASSERT(dio->bp == NULL);
282 #if 0
283 if (hce > 0) {
284 /*
285 * Synchronous cluster I/O for now.
286 */
287 peof = (dio->pbase + HAMMER2_SEGMASK64) &
288 ~HAMMER2_SEGMASK64;
289 peof -= dio->dbase;
290 error = cluster_readx(dio->devvp,
291 peof, dev_pbase,
292 dio->psize, bflags,
293 dio->psize,
294 HAMMER2_PBUFSIZE*hce,
295 &dio->bp);
296 } else {
297 error = breadnx(dio->devvp, dev_pbase,
298 dio->psize, bflags,
299 NULL, NULL, 0, &dio->bp);
300 }
301 #else
302 error = breadx(dio->devvp, dev_pbase, dio->psize, &dio->bp);
303 #endif
304 break;
305 }
306 } else {
307 #if 0
308 if (hce > 0) {
309 /*
310 * Synchronous cluster I/O for now.
311 */
312 peof = (dio->pbase + HAMMER2_SEGMASK64) &
313 ~HAMMER2_SEGMASK64;
314 peof -= dio->dbase;
315 error = cluster_readx(dio->devvp,
316 peof, dev_pbase, dio->psize,
317 bflags,
318 dio->psize, HAMMER2_PBUFSIZE*hce,
319 &dio->bp);
320 } else {
321 error = breadnx(dio->devvp, dev_pbase,
322 dio->psize, bflags,
323 NULL, NULL, 0, &dio->bp);
324 }
325 #else
326 error = breadx(dio->devvp, dev_pbase, dio->psize, &dio->bp);
327 #endif
328 if (dio->bp) {
329 /*
330 * Handle NEW flags
331 */
332 switch(op) {
333 case HAMMER2_DOP_NEW:
334 bkvasync(dio->bp);
335 bzero(hammer2_io_data(dio, lbase), lsize);
336 /* fall through */
337 case HAMMER2_DOP_NEWNZ:
338 atomic_set_long(&dio->refs, HAMMER2_DIO_DIRTY);
339 break;
340 case HAMMER2_DOP_READ:
341 default:
342 break;
343 }
344
345 /*
346 * Tell the kernel that the buffer cache is not
347 * meta-data based on the btype. This allows
348 * swapcache to distinguish between data and
349 * meta-data.
350 */
351 switch(btype) {
352 case HAMMER2_BREF_TYPE_DATA:
353 //dio->bp->b_flags |= B_NOTMETA;
354 break;
355 default:
356 break;
357 }
358 }
359 }
360
361 if (dio->bp) {
362 bkvasync(dio->bp);
363 BUF_KERNPROC(dio->bp);
364 //dio->bp->b_flags &= ~B_AGE;
365 /* dio->bp->b_debug_info2 = dio; */
366 }
367 dio->error = error;
368
369 /*
370 * Clear INPROG and WAITING, set GOOD wake up anyone waiting.
371 */
372 for (;;) {
373 orefs = dio->refs;
374 cpu_ccfence();
375 nrefs = orefs & ~(HAMMER2_DIO_INPROG | HAMMER2_DIO_WAITING);
376 if (error == 0)
377 nrefs |= HAMMER2_DIO_GOOD;
378 if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
379 if (orefs & HAMMER2_DIO_WAITING)
380 wakeup(dio);
381 break;
382 }
383 cpu_pause();
384 }
385
386 /* XXX error handling */
387 DIO_RECORD(dio HAMMER2_IO_DEBUG_CALL);
388
389 return dio;
390 }
391
392 /*
393 * Release our ref on *diop.
394 *
395 * On the 1->0 transition we clear DIO_GOOD, set DIO_INPROG, and dispose
396 * of dio->bp. Then we clean up DIO_INPROG and DIO_WAITING.
397 */
398 void
_hammer2_io_putblk(hammer2_io_t ** diop HAMMER2_IO_DEBUG_ARGS)399 _hammer2_io_putblk(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS)
400 {
401 hammer2_dev_t *hmp;
402 hammer2_io_t *dio;
403 struct m_buf *bp;
404 off_t pbase;
405 int psize;
406 int dio_limit;
407 uint64_t orefs;
408 uint64_t nrefs;
409
410 dio = *diop;
411 *diop = NULL;
412 hmp = dio->hmp;
413 DIO_RECORD(dio HAMMER2_IO_DEBUG_CALL);
414
415 KKASSERT((dio->refs & HAMMER2_DIO_MASK) != 0);
416
417 /*
418 * Drop refs.
419 *
420 * On the 1->0 transition clear GOOD and set INPROG, and break.
421 * On any other transition we can return early.
422 */
423 for (;;) {
424 orefs = dio->refs;
425 cpu_ccfence();
426
427 if ((orefs & HAMMER2_DIO_MASK) == 1 &&
428 (orefs & HAMMER2_DIO_INPROG) == 0) {
429 /*
430 * Lastdrop case, INPROG can be set. GOOD must be
431 * cleared to prevent the getblk shortcut.
432 */
433 nrefs = orefs - 1;
434 nrefs &= ~(HAMMER2_DIO_GOOD | HAMMER2_DIO_DIRTY);
435 nrefs |= HAMMER2_DIO_INPROG;
436 if (atomic_cmpset_64(&dio->refs, orefs, nrefs))
437 break;
438 } else if ((orefs & HAMMER2_DIO_MASK) == 1) {
439 /*
440 * Lastdrop case, INPROG already set. We must
441 * wait for INPROG to clear.
442 */
443 nrefs = orefs | HAMMER2_DIO_WAITING;
444 tsleep_interlock(dio, 0);
445 if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
446 tsleep(dio, PINTERLOCKED, "h2dio", hz);
447 }
448 /* retry */
449 } else {
450 /*
451 * Normal drop case.
452 */
453 nrefs = orefs - 1;
454 if (atomic_cmpset_64(&dio->refs, orefs, nrefs))
455 return;
456 /* retry */
457 }
458 cpu_pause();
459 /* retry */
460 }
461
462 /*
463 * Lastdrop (1->0 transition). INPROG has been set, GOOD and DIRTY
464 * have been cleared. iofree_count has not yet been incremented,
465 * note that another accessor race will decrement iofree_count so
466 * we have to increment it regardless.
467 * We can now dispose of the buffer.
468 */
469 pbase = dio->pbase;
470 psize = dio->psize;
471 bp = dio->bp;
472 dio->bp = NULL;
473
474 if ((orefs & HAMMER2_DIO_GOOD) && bp) {
475 /*
476 * Non-errored disposal of bp
477 */
478 if (orefs & HAMMER2_DIO_DIRTY) {
479 dio_write_stats_update(dio, bp);
480
481 /*
482 * Allows dirty buffers to accumulate and
483 * possibly be canceled (e.g. by a 'rm'),
484 * by default we will burst-write later.
485 *
486 * We generally do NOT want to issue an actual
487 * b[a]write() or cluster_write() here. Due to
488 * the way chains are locked, buffers may be cycled
489 * in and out quite often and disposal here can cause
490 * multiple writes or write-read stalls.
491 *
492 * If FLUSH is set we do want to issue the actual
493 * write. This typically occurs in the write-behind
494 * case when writing to large files.
495 */
496 //off_t peof;
497 //int hce;
498 if (dio->refs & HAMMER2_DIO_FLUSH) {
499 #if 0
500 if ((hce = hammer2_cluster_write) != 0) {
501 peof = (pbase + HAMMER2_SEGMASK64) &
502 ~HAMMER2_SEGMASK64;
503 peof -= dio->dbase;
504 bp->b_flags |= B_CLUSTEROK;
505 cluster_write(bp, peof, psize, hce);
506 } else {
507 bp->b_flags &= ~B_CLUSTEROK;
508 bawrite(bp);
509 }
510 #else
511 bawrite(bp);
512 #endif
513 } else {
514 //bp->b_flags &= ~B_CLUSTEROK;
515 bdwrite(bp);
516 }
517 #if 0
518 } else if (bp->b_flags & (B_ERROR | B_INVAL | B_RELBUF)) {
519 brelse(bp);
520 #endif
521 } else {
522 bqrelse(bp);
523 }
524 } else if (bp) {
525 /*
526 * Errored disposal of bp
527 */
528 brelse(bp);
529 }
530
531 /*
532 * Update iofree_count before disposing of the dio
533 */
534 hmp = dio->hmp;
535 atomic_add_int(&hmp->iofree_count, 1);
536
537 /*
538 * Clear INPROG, GOOD, and WAITING (GOOD should already be clear).
539 *
540 * Also clear FLUSH as it was handled above.
541 */
542 for (;;) {
543 orefs = dio->refs;
544 cpu_ccfence();
545 nrefs = orefs & ~(HAMMER2_DIO_INPROG | HAMMER2_DIO_GOOD |
546 HAMMER2_DIO_WAITING | HAMMER2_DIO_FLUSH);
547 if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
548 if (orefs & HAMMER2_DIO_WAITING)
549 wakeup(dio);
550 break;
551 }
552 cpu_pause();
553 }
554
555 /*
556 * We cache free buffers so re-use cases can use a shared lock, but
557 * if too many build up we have to clean them out.
558 */
559 dio_limit = hammer2_dio_limit;
560 if (dio_limit < 256)
561 dio_limit = 256;
562 if (dio_limit > 1024*1024)
563 dio_limit = 1024*1024;
564 if (hmp->iofree_count > dio_limit)
565 hammer2_io_hash_cleanup(hmp, dio_limit);
566 }
567
568 /*
569 * Returns a pointer to the requested data.
570 */
571 char *
hammer2_io_data(hammer2_io_t * dio,off_t lbase)572 hammer2_io_data(hammer2_io_t *dio, off_t lbase)
573 {
574 struct m_buf *bp;
575 int off;
576
577 bp = dio->bp;
578 KKASSERT(bp != NULL);
579 bkvasync(bp);
580 lbase -= dio->dbase;
581 off = (lbase & ~HAMMER2_OFF_MASK_RADIX) - bp->b_loffset;
582 KKASSERT(off >= 0 && off < bp->b_bufsize);
583 return(bp->b_data + off);
584 }
585
586 int
hammer2_io_new(hammer2_dev_t * hmp,int btype,off_t lbase,int lsize,hammer2_io_t ** diop)587 hammer2_io_new(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize,
588 hammer2_io_t **diop)
589 {
590 *diop = hammer2_io_getblk(hmp, btype, lbase, lsize, HAMMER2_DOP_NEW);
591 return ((*diop)->error);
592 }
593
594 int
hammer2_io_newnz(hammer2_dev_t * hmp,int btype,off_t lbase,int lsize,hammer2_io_t ** diop)595 hammer2_io_newnz(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize,
596 hammer2_io_t **diop)
597 {
598 *diop = hammer2_io_getblk(hmp, btype, lbase, lsize, HAMMER2_DOP_NEWNZ);
599 return ((*diop)->error);
600 }
601
602 int
_hammer2_io_bread(hammer2_dev_t * hmp,int btype,off_t lbase,int lsize,hammer2_io_t ** diop HAMMER2_IO_DEBUG_ARGS)603 _hammer2_io_bread(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize,
604 hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS)
605 {
606 #ifdef HAMMER2_IO_DEBUG
607 hammer2_io_t *dio;
608 #endif
609
610 *diop = _hammer2_io_getblk(hmp, btype, lbase, lsize,
611 HAMMER2_DOP_READ HAMMER2_IO_DEBUG_CALL);
612 #ifdef HAMMER2_IO_DEBUG
613 if ((dio = *diop) != NULL) {
614 #if 0
615 int i = (dio->debug_index - 1) & HAMMER2_IO_DEBUG_MASK;
616 dio->debug_data[i] = debug_data;
617 #endif
618 }
619 #endif
620 return ((*diop)->error);
621 }
622
623 hammer2_io_t *
_hammer2_io_getquick(hammer2_dev_t * hmp,off_t lbase,int lsize HAMMER2_IO_DEBUG_ARGS)624 _hammer2_io_getquick(hammer2_dev_t *hmp, off_t lbase,
625 int lsize HAMMER2_IO_DEBUG_ARGS)
626 {
627 hammer2_io_t *dio;
628
629 dio = _hammer2_io_getblk(hmp, 0, lbase, lsize,
630 HAMMER2_DOP_READQ HAMMER2_IO_DEBUG_CALL);
631 return dio;
632 }
633
634 void
_hammer2_io_bawrite(hammer2_io_t ** diop HAMMER2_IO_DEBUG_ARGS)635 _hammer2_io_bawrite(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS)
636 {
637 atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY |
638 HAMMER2_DIO_FLUSH);
639 _hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL);
640 }
641
642 void
_hammer2_io_bdwrite(hammer2_io_t ** diop HAMMER2_IO_DEBUG_ARGS)643 _hammer2_io_bdwrite(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS)
644 {
645 atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY);
646 _hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL);
647 }
648
649 int
_hammer2_io_bwrite(hammer2_io_t ** diop HAMMER2_IO_DEBUG_ARGS)650 _hammer2_io_bwrite(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS)
651 {
652 atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY |
653 HAMMER2_DIO_FLUSH);
654 _hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL);
655 return (0); /* XXX */
656 }
657
658 void
hammer2_io_setdirty(hammer2_io_t * dio)659 hammer2_io_setdirty(hammer2_io_t *dio)
660 {
661 atomic_set_64(&dio->refs, HAMMER2_DIO_DIRTY);
662 }
663
664 /*
665 * This routine is called when a MODIFIED chain is being DESTROYED,
666 * in an attempt to allow the related buffer cache buffer to be
667 * invalidated and discarded instead of flushing it to disk.
668 *
669 * At the moment this case is only really useful for file meta-data.
670 * File data is already handled via the logical buffer cache associated
671 * with the vnode, and will be discarded if it was never flushed to disk.
672 * File meta-data may include inodes, directory entries, and indirect blocks.
673 *
674 * XXX
675 * However, our DIO buffers are PBUFSIZE'd (64KB), and the area being
676 * invalidated might be smaller. Most of the meta-data structures above
677 * are in the 'smaller' category. For now, don't try to invalidate the
678 * data areas.
679 */
680 void
hammer2_io_inval(hammer2_io_t * dio,hammer2_off_t data_off,u_int bytes)681 hammer2_io_inval(hammer2_io_t *dio, hammer2_off_t data_off, u_int bytes)
682 {
683 /* NOP */
684 }
685
686 void
_hammer2_io_brelse(hammer2_io_t ** diop HAMMER2_IO_DEBUG_ARGS)687 _hammer2_io_brelse(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS)
688 {
689 _hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL);
690 }
691
692 void
_hammer2_io_bqrelse(hammer2_io_t ** diop HAMMER2_IO_DEBUG_ARGS)693 _hammer2_io_bqrelse(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS)
694 {
695 _hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL);
696 }
697
698 /*
699 * Set dedup validation bits in a DIO. We do not need the buffer cache
700 * buffer for this. This must be done concurrent with setting bits in
701 * the freemap so as to interlock with bulkfree's clearing of those bits.
702 */
703 void
hammer2_io_dedup_set(hammer2_dev_t * hmp,hammer2_blockref_t * bref)704 hammer2_io_dedup_set(hammer2_dev_t *hmp, hammer2_blockref_t *bref)
705 {
706 hammer2_io_t *dio;
707 uint64_t mask;
708 int lsize;
709 int isgood;
710
711 dio = hammer2_io_alloc(hmp, bref->data_off, bref->type, 1, &isgood);
712 if ((int)(bref->data_off & HAMMER2_OFF_MASK_RADIX))
713 lsize = 1 << (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX);
714 else
715 lsize = 0;
716 mask = hammer2_dedup_mask(dio, bref->data_off, lsize);
717 atomic_clear_64(&dio->dedup_valid, mask);
718 atomic_set_64(&dio->dedup_alloc, mask);
719 hammer2_io_putblk(&dio);
720 }
721
722 /*
723 * Clear dedup validation bits in a DIO. This is typically done when
724 * a modified chain is destroyed or by the bulkfree code. No buffer
725 * is needed for this operation. If the DIO no longer exists it is
726 * equivalent to the bits not being set.
727 */
728 void
hammer2_io_dedup_delete(hammer2_dev_t * hmp,uint8_t btype,hammer2_off_t data_off,u_int bytes)729 hammer2_io_dedup_delete(hammer2_dev_t *hmp, uint8_t btype,
730 hammer2_off_t data_off, u_int bytes)
731 {
732 hammer2_io_t *dio;
733 uint64_t mask;
734 int isgood;
735
736 if ((data_off & ~HAMMER2_OFF_MASK_RADIX) == 0)
737 return;
738 if (btype != HAMMER2_BREF_TYPE_DATA)
739 return;
740 dio = hammer2_io_alloc(hmp, data_off, btype, 0, &isgood);
741 if (dio) {
742 if (data_off < dio->pbase ||
743 (data_off & ~HAMMER2_OFF_MASK_RADIX) + bytes >
744 dio->pbase + dio->psize) {
745 panic("hammer2_io_dedup_delete: DATAOFF BAD "
746 "%016jx/%d %016jx\n",
747 data_off, bytes, dio->pbase);
748 }
749 mask = hammer2_dedup_mask(dio, data_off, bytes);
750 atomic_clear_64(&dio->dedup_alloc, mask);
751 atomic_clear_64(&dio->dedup_valid, mask);
752 hammer2_io_putblk(&dio);
753 }
754 }
755
756 /*
757 * Assert that dedup allocation bits in a DIO are not set. This operation
758 * does not require a buffer. The DIO does not need to exist.
759 */
760 void
hammer2_io_dedup_assert(hammer2_dev_t * hmp,hammer2_off_t data_off,u_int bytes)761 hammer2_io_dedup_assert(hammer2_dev_t *hmp, hammer2_off_t data_off, u_int bytes)
762 {
763 hammer2_io_t *dio;
764 int isgood;
765
766 dio = hammer2_io_alloc(hmp, data_off, HAMMER2_BREF_TYPE_DATA,
767 0, &isgood);
768 if (dio) {
769 KASSERT((dio->dedup_alloc &
770 hammer2_dedup_mask(dio, data_off, bytes)) == 0,
771 ("hammer2_dedup_assert: %016jx/%d %016jx/%016jx",
772 data_off,
773 bytes,
774 hammer2_dedup_mask(dio, data_off, bytes),
775 dio->dedup_alloc));
776 hammer2_io_putblk(&dio);
777 }
778 }
779
780 static
781 void
dio_write_stats_update(hammer2_io_t * dio,struct m_buf * bp)782 dio_write_stats_update(hammer2_io_t *dio, struct m_buf *bp)
783 {
784 /*
785 if (bp->b_flags & B_DELWRI)
786 return;
787 */
788 hammer2_adjwritecounter(dio->btype, dio->psize);
789 }
790
791 void
hammer2_io_bkvasync(hammer2_io_t * dio)792 hammer2_io_bkvasync(hammer2_io_t *dio)
793 {
794 KKASSERT(dio->bp != NULL);
795 bkvasync(dio->bp);
796 }
797
798 /*
799 * Ref a dio that is already owned
800 */
801 void
_hammer2_io_ref(hammer2_io_t * dio HAMMER2_IO_DEBUG_ARGS)802 _hammer2_io_ref(hammer2_io_t *dio HAMMER2_IO_DEBUG_ARGS)
803 {
804 DIO_RECORD(dio HAMMER2_IO_DEBUG_CALL);
805 atomic_add_64(&dio->refs, 1);
806 }
807
808 static __inline hammer2_io_hash_t *
hammer2_io_hashv(hammer2_dev_t * hmp,hammer2_off_t pbase)809 hammer2_io_hashv(hammer2_dev_t *hmp, hammer2_off_t pbase)
810 {
811 int hv;
812
813 hv = (int)pbase + (int)(pbase >> 16);
814 return (&hmp->iohash[hv & HAMMER2_IOHASH_MASK]);
815 }
816
817 /*
818 * Lookup and reference the requested dio
819 */
820 static hammer2_io_t *
hammer2_io_hash_lookup(hammer2_dev_t * hmp,hammer2_off_t pbase,uint64_t * refsp)821 hammer2_io_hash_lookup(hammer2_dev_t *hmp, hammer2_off_t pbase, uint64_t *refsp)
822 {
823 hammer2_io_hash_t *hash;
824 hammer2_io_t *dio;
825 uint64_t refs;
826
827 *refsp = 0;
828 hash = hammer2_io_hashv(hmp, pbase);
829 hammer2_spin_sh(&hash->spin);
830 for (dio = hash->base; dio; dio = dio->next) {
831 if (dio->pbase == pbase) {
832 refs = atomic_fetchadd_64(&dio->refs, 1);
833 if ((refs & HAMMER2_DIO_MASK) == 0)
834 atomic_add_int(&dio->hmp->iofree_count, -1);
835 *refsp = refs;
836 break;
837 }
838 }
839 hammer2_spin_unsh(&hash->spin);
840
841 return dio;
842 }
843
844 /*
845 * Enter a dio into the hash. If the pbase already exists in the hash,
846 * the xio in the hash is referenced and returned. If dio is sucessfully
847 * entered into the hash, NULL is returned.
848 */
849 static hammer2_io_t *
hammer2_io_hash_enter(hammer2_dev_t * hmp,hammer2_io_t * dio,uint64_t * refsp)850 hammer2_io_hash_enter(hammer2_dev_t *hmp, hammer2_io_t *dio, uint64_t *refsp)
851 {
852 hammer2_io_t *xio;
853 hammer2_io_t **xiop;
854 hammer2_io_hash_t *hash;
855 uint64_t refs;
856
857 *refsp = 0;
858 hash = hammer2_io_hashv(hmp, dio->pbase);
859 hammer2_spin_ex(&hash->spin);
860 for (xiop = &hash->base; (xio = *xiop) != NULL; xiop = &xio->next) {
861 if (xio->pbase == dio->pbase) {
862 refs = atomic_fetchadd_64(&xio->refs, 1);
863 if ((refs & HAMMER2_DIO_MASK) == 0)
864 atomic_add_int(&xio->hmp->iofree_count, -1);
865 *refsp = refs;
866 goto done;
867 }
868 }
869 dio->next = NULL;
870 *xiop = dio;
871 done:
872 hammer2_spin_unex(&hash->spin);
873
874 return xio;
875 }
876
877 /*
878 * Clean out a limited number of freeable DIOs
879 */
880 static void
hammer2_io_hash_cleanup(hammer2_dev_t * hmp,int dio_limit)881 hammer2_io_hash_cleanup(hammer2_dev_t *hmp, int dio_limit)
882 {
883 hammer2_io_hash_t *hash;
884 hammer2_io_t *dio;
885 hammer2_io_t **diop;
886 hammer2_io_t **cleanapp;
887 hammer2_io_t *cleanbase;
888 int count;
889 int maxscan;
890 int i;
891
892 count = hmp->iofree_count - dio_limit + 32;
893 if (count <= 0)
894 return;
895 cleanbase = NULL;
896 cleanapp = &cleanbase;
897
898 i = hmp->io_iterator++;
899 maxscan = HAMMER2_IOHASH_SIZE;
900 while (count > 0 && maxscan--) {
901 hash = &hmp->iohash[i & HAMMER2_IOHASH_MASK];
902 hammer2_spin_ex(&hash->spin);
903 diop = &hash->base;
904 while ((dio = *diop) != NULL) {
905 if ((dio->refs & (HAMMER2_DIO_MASK |
906 HAMMER2_DIO_INPROG)) != 0)
907 {
908 diop = &dio->next;
909 continue;
910 }
911 if (dio->act > 0) {
912 int act;
913
914 act = dio->act - (ticks - dio->ticks) / hz - 1;
915 dio->act = (act < 0) ? 0 : act;
916 }
917 if (dio->act) {
918 diop = &dio->next;
919 continue;
920 }
921 KKASSERT(dio->bp == NULL);
922 *diop = dio->next;
923 dio->next = NULL;
924 *cleanapp = dio;
925 cleanapp = &dio->next;
926 --count;
927 /* diop remains unchanged */
928 atomic_add_int(&hmp->iofree_count, -1);
929 }
930 hammer2_spin_unex(&hash->spin);
931 i = hmp->io_iterator++;
932 }
933
934 /*
935 * Get rid of dios on clean list without holding any locks
936 */
937 while ((dio = cleanbase) != NULL) {
938 cleanbase = dio->next;
939 dio->next = NULL;
940 KKASSERT(dio->bp == NULL &&
941 (dio->refs & (HAMMER2_DIO_MASK |
942 HAMMER2_DIO_INPROG)) == 0);
943 if (dio->refs & HAMMER2_DIO_DIRTY) {
944 kprintf("hammer2_io_cleanup: Dirty buffer "
945 "%016jx/%d (bp=%p)\n",
946 dio->pbase, dio->psize, dio->bp);
947 }
948 kfree_obj(dio, hmp->mio);
949 atomic_add_int(&hammer2_dio_count, -1);
950 }
951 }
952
953 /*
954 * Destroy all DIOs associated with the media
955 */
956 void
hammer2_io_hash_cleanup_all(hammer2_dev_t * hmp)957 hammer2_io_hash_cleanup_all(hammer2_dev_t *hmp)
958 {
959 hammer2_io_hash_t *hash;
960 hammer2_io_t *dio;
961 int i;
962
963 for (i = 0; i < HAMMER2_IOHASH_SIZE; ++i) {
964 hash = &hmp->iohash[i];
965
966 while ((dio = hash->base) != NULL) {
967 hash->base = dio->next;
968 dio->next = NULL;
969 KKASSERT(dio->bp == NULL &&
970 (dio->refs & (HAMMER2_DIO_MASK |
971 HAMMER2_DIO_INPROG)) == 0);
972 if (dio->refs & HAMMER2_DIO_DIRTY) {
973 kprintf("hammer2_io_cleanup: Dirty buffer "
974 "%016jx/%d (bp=%p)\n",
975 dio->pbase, dio->psize, dio->bp);
976 }
977 kfree_obj(dio, hmp->mio);
978 atomic_add_int(&hammer2_dio_count, -1);
979 atomic_add_int(&hmp->iofree_count, -1);
980 }
981 }
982 }
983