xref: /dragonfly/sys/vfs/hammer2/hammer2_io.c (revision cfd1aba3)
1 /*
2  * Copyright (c) 2013-2014 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@dragonflybsd.org>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 #include "hammer2.h"
36 
37 /*
38  * Implements an abstraction layer for synchronous and asynchronous
39  * buffered device I/O.  Can be used for OS-abstraction but the main
40  * purpose is to allow larger buffers to be used against hammer2_chain's
41  * using smaller allocations, without causing deadlocks.
42  *
43  */
44 static void hammer2_io_callback(struct bio *bio);
45 static int hammer2_io_cleanup_callback(hammer2_io_t *dio, void *arg);
46 
47 static int
48 hammer2_io_cmp(hammer2_io_t *io1, hammer2_io_t *io2)
49 {
50 	if (io2->pbase < io1->pbase)
51 		return(-1);
52 	if (io2->pbase > io1->pbase)
53 		return(1);
54 	return(0);
55 }
56 
57 RB_PROTOTYPE2(hammer2_io_tree, hammer2_io, rbnode, hammer2_io_cmp, off_t);
58 RB_GENERATE2(hammer2_io_tree, hammer2_io, rbnode, hammer2_io_cmp,
59 		off_t, pbase);
60 
61 struct hammer2_cleanupcb_info {
62 	struct hammer2_io_tree tmptree;
63 	int	count;
64 };
65 
66 
67 #define HAMMER2_DIO_INPROG	0x80000000
68 #define HAMMER2_DIO_GOOD	0x40000000
69 #define HAMMER2_DIO_WAITING	0x20000000
70 #define HAMMER2_DIO_DIRTY	0x10000000
71 
72 #define HAMMER2_DIO_MASK	0x0FFFFFFF
73 
74 /*
75  * Acquire the requested dio, set *ownerp based on state.  If state is good
76  * *ownerp is set to 0, otherwise *ownerp is set to DIO_INPROG and the
77  * caller must resolve the buffer.
78  */
79 hammer2_io_t *
80 hammer2_io_getblk(hammer2_mount_t *hmp, off_t lbase, int lsize, int *ownerp)
81 {
82 	hammer2_io_t *dio;
83 	hammer2_io_t *xio;
84 	off_t pbase;
85 	off_t pmask;
86 	int psize = hammer2_devblksize(lsize);
87 	int refs;
88 
89 	pmask = ~(hammer2_off_t)(psize - 1);
90 
91 	KKASSERT((1 << (int)(lbase & HAMMER2_OFF_MASK_RADIX)) == lsize);
92 	lbase &= ~HAMMER2_OFF_MASK_RADIX;
93 	pbase = lbase & pmask;
94 	KKASSERT(pbase != 0 && ((lbase + lsize - 1) & pmask) == pbase);
95 
96 	/*
97 	 * Access/Allocate the DIO
98 	 */
99 	spin_lock_shared(&hmp->io_spin);
100 	dio = RB_LOOKUP(hammer2_io_tree, &hmp->iotree, pbase);
101 	if (dio) {
102 		if ((atomic_fetchadd_int(&dio->refs, 1) &
103 		     HAMMER2_DIO_MASK) == 0) {
104 			atomic_add_int(&dio->hmp->iofree_count, -1);
105 		}
106 		spin_unlock_shared(&hmp->io_spin);
107 	} else {
108 		spin_unlock_shared(&hmp->io_spin);
109 		dio = kmalloc(sizeof(*dio), M_HAMMER2, M_INTWAIT | M_ZERO);
110 		dio->hmp = hmp;
111 		dio->pbase = pbase;
112 		dio->psize = psize;
113 		dio->refs = 1;
114 		spin_lock(&hmp->io_spin);
115 		xio = RB_INSERT(hammer2_io_tree, &hmp->iotree, dio);
116 		if (xio == NULL) {
117 			spin_unlock(&hmp->io_spin);
118 		} else {
119 			if ((atomic_fetchadd_int(&xio->refs, 1) &
120 			     HAMMER2_DIO_MASK) == 0) {
121 				atomic_add_int(&xio->hmp->iofree_count, -1);
122 			}
123 			spin_unlock(&hmp->io_spin);
124 			kfree(dio, M_HAMMER2);
125 			dio = xio;
126 		}
127 	}
128 
129 	/*
130 	 * Obtain/Validate the buffer.
131 	 */
132 	for (;;) {
133 		refs = dio->refs;
134 		cpu_ccfence();
135 
136 		/*
137 		 * Stop if the buffer is good.  Once set GOOD the flag cannot
138 		 * be cleared until refs drops to 0.
139 		 */
140 		if (refs & HAMMER2_DIO_GOOD) {
141 			*ownerp = 0;
142 			goto done;
143 		}
144 
145 		/*
146 		 * We need to acquire the in-progress lock on the buffer
147 		 */
148 		if (refs & HAMMER2_DIO_INPROG) {
149 			tsleep_interlock(dio, 0);
150 			if (atomic_cmpset_int(&dio->refs, refs,
151 					      refs | HAMMER2_DIO_WAITING)) {
152 				tsleep(dio, PINTERLOCKED, "h2dio", 0);
153 			}
154 			/* retry */
155 		} else {
156 			if (atomic_cmpset_int(&dio->refs, refs,
157 					      refs | HAMMER2_DIO_INPROG)) {
158 				break;
159 			}
160 		}
161 		/* retry */
162 	}
163 
164 	/*
165 	 * We need to do more work before the buffer is usable
166 	 */
167 	*ownerp = HAMMER2_DIO_INPROG;
168 done:
169 	if (dio->act < 5)
170 		++dio->act;
171 	return(dio);
172 }
173 
174 /*
175  * If part of an asynchronous I/O the asynchronous I/O is biodone()'d.
176  *
177  * If the caller owned INPROG then the dio will be set GOOD or not
178  * depending on whether the caller disposed of dio->bp or not.
179  */
180 static
181 void
182 hammer2_io_complete(hammer2_io_t *dio, int owner)
183 {
184 	int refs;
185 	int good;
186 
187 	while (owner & HAMMER2_DIO_INPROG) {
188 		refs = dio->refs;
189 		cpu_ccfence();
190 		good = dio->bp ? HAMMER2_DIO_GOOD : 0;
191 		if (atomic_cmpset_int(&dio->refs, refs,
192 				      (refs & ~(HAMMER2_DIO_WAITING |
193 					        HAMMER2_DIO_INPROG)) |
194 				      good)) {
195 			if (refs & HAMMER2_DIO_WAITING)
196 				wakeup(dio);
197 			if (good)
198 				BUF_KERNPROC(dio->bp);
199 			break;
200 		}
201 		/* retry */
202 	}
203 }
204 
205 /*
206  * Release our ref on *diop, dispose of the underlying buffer.
207  */
208 void
209 hammer2_io_putblk(hammer2_io_t **diop)
210 {
211 	hammer2_mount_t *hmp;
212 	hammer2_io_t *dio;
213 	struct buf *bp;
214 	off_t peof;
215 	off_t pbase;
216 	int psize;
217 	int refs;
218 
219 	dio = *diop;
220 	*diop = NULL;
221 
222 	for (;;) {
223 		refs = dio->refs;
224 
225 		if ((refs & HAMMER2_DIO_MASK) == 1) {
226 			KKASSERT((refs & HAMMER2_DIO_INPROG) == 0);
227 			if (atomic_cmpset_int(&dio->refs, refs,
228 					      ((refs - 1) &
229 					       ~(HAMMER2_DIO_GOOD |
230 						 HAMMER2_DIO_DIRTY)) |
231 					      HAMMER2_DIO_INPROG)) {
232 				break;
233 			}
234 			/* retry */
235 		} else {
236 			if (atomic_cmpset_int(&dio->refs, refs, refs - 1))
237 				return;
238 			/* retry */
239 		}
240 		/* retry */
241 	}
242 
243 	/*
244 	 * Locked INPROG on 1->0 transition and we cleared DIO_GOOD (which is
245 	 * legal only on the last ref).  This allows us to dispose of the
246 	 * buffer.  refs is now 0.
247 	 *
248 	 * The instant we call io_complete dio is a free agent again and
249 	 * can be ripped out from under us.  Acquisition of the dio after
250 	 * this point will require a shared or exclusive spinlock.
251 	 */
252 	hmp = dio->hmp;
253 	bp = dio->bp;
254 	dio->bp = NULL;
255 	pbase = dio->pbase;
256 	psize = dio->psize;
257 	atomic_add_int(&hmp->iofree_count, 1);
258 	hammer2_io_complete(dio, HAMMER2_DIO_INPROG);	/* clears INPROG */
259 	dio = NULL;	/* dio stale */
260 
261 	if (refs & HAMMER2_DIO_GOOD) {
262 		KKASSERT(bp != NULL);
263 		if (refs & HAMMER2_DIO_DIRTY) {
264 			if (hammer2_cluster_enable) {
265 				peof = (pbase + HAMMER2_SEGMASK64) &
266 				       ~HAMMER2_SEGMASK64;
267 				cluster_write(bp, peof, psize, 4);
268 			} else {
269 				bp->b_flags |= B_CLUSTEROK;
270 				bdwrite(bp);
271 			}
272 		} else if (bp->b_flags & (B_ERROR | B_INVAL | B_RELBUF)) {
273 			brelse(bp);
274 		} else {
275 			bqrelse(bp);
276 		}
277 	}
278 
279 	/*
280 	 * We cache free buffers so re-use cases can use a shared lock, but
281 	 * if too many build up we have to clean them out.
282 	 */
283 	if (hmp->iofree_count > 1000) {
284 		struct hammer2_cleanupcb_info info;
285 
286 		RB_INIT(&info.tmptree);
287 		spin_lock(&hmp->io_spin);
288 		if (hmp->iofree_count > 1000) {
289 			info.count = hmp->iofree_count / 2;
290 			RB_SCAN(hammer2_io_tree, &hmp->iotree, NULL,
291 				hammer2_io_cleanup_callback, &info);
292 		}
293 		spin_unlock(&hmp->io_spin);
294 		hammer2_io_cleanup(hmp, &info.tmptree);
295 	}
296 }
297 
298 /*
299  * Cleanup any dio's with no references which are not in-progress.
300  */
301 static
302 int
303 hammer2_io_cleanup_callback(hammer2_io_t *dio, void *arg)
304 {
305 	struct hammer2_cleanupcb_info *info = arg;
306 	hammer2_io_t *xio;
307 
308 	if ((dio->refs & (HAMMER2_DIO_MASK | HAMMER2_DIO_INPROG)) == 0) {
309 		if (dio->act > 0) {
310 			--dio->act;
311 			return 0;
312 		}
313 		KKASSERT(dio->bp == NULL);
314 		RB_REMOVE(hammer2_io_tree, &dio->hmp->iotree, dio);
315 		xio = RB_INSERT(hammer2_io_tree, &info->tmptree, dio);
316 		KKASSERT(xio == NULL);
317 		if (--info->count <= 0)	/* limit scan */
318 			return(-1);
319 	}
320 	return 0;
321 }
322 
323 void
324 hammer2_io_cleanup(hammer2_mount_t *hmp, struct hammer2_io_tree *tree)
325 {
326 	hammer2_io_t *dio;
327 
328 	while ((dio = RB_ROOT(tree)) != NULL) {
329 		RB_REMOVE(hammer2_io_tree, tree, dio);
330 		KKASSERT(dio->bp == NULL &&
331 		    (dio->refs & (HAMMER2_DIO_MASK | HAMMER2_DIO_INPROG)) == 0);
332 		kfree(dio, M_HAMMER2);
333 		atomic_add_int(&hmp->iofree_count, -1);
334 	}
335 }
336 
337 char *
338 hammer2_io_data(hammer2_io_t *dio, off_t lbase)
339 {
340 	struct buf *bp;
341 	int off;
342 
343 	bp = dio->bp;
344 	KKASSERT(bp != NULL);
345 	off = (lbase & ~HAMMER2_OFF_MASK_RADIX) - bp->b_loffset;
346 	KKASSERT(off >= 0 && off < bp->b_bufsize);
347 	return(bp->b_data + off);
348 }
349 
350 static
351 int
352 _hammer2_io_new(hammer2_mount_t *hmp, off_t lbase, int lsize,
353 	        hammer2_io_t **diop, int dozero, int quick)
354 {
355 	hammer2_io_t *dio;
356 	int owner;
357 	int error;
358 
359 	dio = *diop = hammer2_io_getblk(hmp, lbase, lsize, &owner);
360 	if (owner) {
361 		if (lsize == dio->psize) {
362 			dio->bp = getblk(hmp->devvp,
363 					     dio->pbase, dio->psize,
364 					     (quick ? GETBLK_NOWAIT : 0),
365 					     0);
366 			if (dio->bp) {
367 				vfs_bio_clrbuf(dio->bp);
368 				if (quick) {
369 					dio->bp->b_flags |= B_CACHE;
370 					bqrelse(dio->bp);
371 					dio->bp = NULL;
372 				}
373 			}
374 			error = 0;
375 		} else if (quick) {
376 			/* do nothing */
377 			error = 0;
378 		} else {
379 			error = bread(hmp->devvp, dio->pbase,
380 				      dio->psize, &dio->bp);
381 		}
382 		if (error) {
383 			brelse(dio->bp);
384 			dio->bp = NULL;
385 		}
386 		hammer2_io_complete(dio, owner);
387 	} else {
388 		error = 0;
389 	}
390 	if (dio->bp) {
391 		if (dozero)
392 			bzero(hammer2_io_data(dio, lbase), lsize);
393 		atomic_set_int(&dio->refs, HAMMER2_DIO_DIRTY);
394 	}
395 	return error;
396 }
397 
398 int
399 hammer2_io_new(hammer2_mount_t *hmp, off_t lbase, int lsize,
400 	       hammer2_io_t **diop)
401 {
402 	return(_hammer2_io_new(hmp, lbase, lsize, diop, 1, 0));
403 }
404 
405 int
406 hammer2_io_newnz(hammer2_mount_t *hmp, off_t lbase, int lsize,
407 	       hammer2_io_t **diop)
408 {
409 	return(_hammer2_io_new(hmp, lbase, lsize, diop, 0, 0));
410 }
411 
412 int
413 hammer2_io_newq(hammer2_mount_t *hmp, off_t lbase, int lsize,
414 	       hammer2_io_t **diop)
415 {
416 	return(_hammer2_io_new(hmp, lbase, lsize, diop, 0, 1));
417 }
418 
419 int
420 hammer2_io_bread(hammer2_mount_t *hmp, off_t lbase, int lsize,
421 		hammer2_io_t **diop)
422 {
423 	hammer2_io_t *dio;
424 	off_t peof;
425 	int owner;
426 	int error;
427 
428 	dio = *diop = hammer2_io_getblk(hmp, lbase, lsize, &owner);
429 	if (owner) {
430 		if (hammer2_cluster_enable) {
431 			peof = (dio->pbase + HAMMER2_SEGMASK64) &
432 			       ~HAMMER2_SEGMASK64;
433 			error = cluster_read(hmp->devvp, peof, dio->pbase,
434 					     dio->psize,
435 					     dio->psize, HAMMER2_PBUFSIZE*4,
436 					     &dio->bp);
437 		} else {
438 			error = bread(hmp->devvp, dio->pbase,
439 				      dio->psize, &dio->bp);
440 		}
441 		if (error) {
442 			brelse(dio->bp);
443 			dio->bp = NULL;
444 		}
445 		hammer2_io_complete(dio, owner);
446 	} else {
447 		error = 0;
448 	}
449 	return error;
450 }
451 
452 void
453 hammer2_io_breadcb(hammer2_mount_t *hmp, off_t lbase, int lsize,
454 		  void (*callback)(hammer2_io_t *dio,
455 				   hammer2_cluster_t *arg_l,
456 				   hammer2_chain_t *arg_c,
457 				   void *arg_p, off_t arg_o),
458 		  hammer2_cluster_t *arg_l, hammer2_chain_t *arg_c,
459 		  void *arg_p, off_t arg_o)
460 {
461 	hammer2_io_t *dio;
462 	int owner;
463 	int error;
464 
465 	dio = hammer2_io_getblk(hmp, lbase, lsize, &owner);
466 	if (owner) {
467 		dio->callback = callback;
468 		dio->arg_l = arg_l;
469 		dio->arg_c = arg_c;
470 		dio->arg_p = arg_p;
471 		dio->arg_o = arg_o;
472 		breadcb(hmp->devvp, dio->pbase, dio->psize,
473 			hammer2_io_callback, dio);
474 	} else {
475 		error = 0;
476 		callback(dio, arg_l, arg_c, arg_p, arg_o);
477 		hammer2_io_bqrelse(&dio);
478 	}
479 }
480 
481 static void
482 hammer2_io_callback(struct bio *bio)
483 {
484 	struct buf *dbp = bio->bio_buf;
485 	hammer2_io_t *dio = bio->bio_caller_info1.ptr;
486 
487 	if ((bio->bio_flags & BIO_DONE) == 0)
488 		bpdone(dbp, 0);
489 	bio->bio_flags &= ~(BIO_DONE | BIO_SYNC);
490 	dio->bp = bio->bio_buf;
491 	KKASSERT((dio->bp->b_flags & B_ERROR) == 0); /* XXX */
492 	hammer2_io_complete(dio, HAMMER2_DIO_INPROG);
493 
494 	/*
495 	 * We still have the ref and DIO_GOOD is now set so nothing else
496 	 * should mess with the callback fields until we release the dio.
497 	 */
498 	dio->callback(dio, dio->arg_l, dio->arg_c, dio->arg_p, dio->arg_o);
499 	hammer2_io_bqrelse(&dio);
500 	/* TODO: async load meta-data and assign chain->dio */
501 }
502 
503 void
504 hammer2_io_bawrite(hammer2_io_t **diop)
505 {
506 	atomic_set_int(&(*diop)->refs, HAMMER2_DIO_DIRTY);
507 	hammer2_io_putblk(diop);
508 }
509 
510 void
511 hammer2_io_bdwrite(hammer2_io_t **diop)
512 {
513 	atomic_set_int(&(*diop)->refs, HAMMER2_DIO_DIRTY);
514 	hammer2_io_putblk(diop);
515 }
516 
517 int
518 hammer2_io_bwrite(hammer2_io_t **diop)
519 {
520 	atomic_set_int(&(*diop)->refs, HAMMER2_DIO_DIRTY);
521 	hammer2_io_putblk(diop);
522 	return (0);	/* XXX */
523 }
524 
525 void
526 hammer2_io_setdirty(hammer2_io_t *dio)
527 {
528 	atomic_set_int(&dio->refs, HAMMER2_DIO_DIRTY);
529 }
530 
531 void
532 hammer2_io_setinval(hammer2_io_t *dio, u_int bytes)
533 {
534 	if ((u_int)dio->psize == bytes)
535 		dio->bp->b_flags |= B_INVAL | B_RELBUF;
536 }
537 
538 void
539 hammer2_io_brelse(hammer2_io_t **diop)
540 {
541 	hammer2_io_putblk(diop);
542 }
543 
544 void
545 hammer2_io_bqrelse(hammer2_io_t **diop)
546 {
547 	hammer2_io_putblk(diop);
548 }
549 
550 int
551 hammer2_io_isdirty(hammer2_io_t *dio)
552 {
553 	return((dio->refs & HAMMER2_DIO_DIRTY) != 0);
554 }
555