xref: /openbsd/sys/kern/kern_bufq.c (revision 3cab2bb3)
1 /*	$OpenBSD: kern_bufq.c,v 1.33 2019/12/19 17:40:10 mpi Exp $	*/
2 /*
3  * Copyright (c) 2010 Thordur I. Bjornsson <thib@openbsd.org>
4  * Copyright (c) 2010 David Gwynne <dlg@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/param.h>
20 #include <sys/systm.h>
21 #include <sys/kernel.h>
22 #include <sys/malloc.h>
23 #include <sys/mount.h>
24 #include <sys/mutex.h>
25 #include <sys/buf.h>
26 #include <sys/errno.h>
27 #include <sys/queue.h>
28 
29 SLIST_HEAD(, bufq)	bufqs = SLIST_HEAD_INITIALIZER(bufqs);
30 struct mutex		bufqs_mtx = MUTEX_INITIALIZER(IPL_NONE);
31 int			bufqs_stop;
32 
33 struct bufq_impl {
34 	void		*(*impl_create)(void);
35 	void		 (*impl_destroy)(void *);
36 
37 	void		 (*impl_queue)(void *, struct buf *);
38 	struct buf	*(*impl_dequeue)(void *);
39 	void		 (*impl_requeue)(void *, struct buf *);
40 	int		 (*impl_peek)(void *);
41 };
42 
43 void		*bufq_fifo_create(void);
44 void		 bufq_fifo_destroy(void *);
45 void		 bufq_fifo_queue(void *, struct buf *);
46 struct buf	*bufq_fifo_dequeue(void *);
47 void		 bufq_fifo_requeue(void *, struct buf *);
48 int		 bufq_fifo_peek(void *);
49 
50 void		*bufq_nscan_create(void);
51 void		 bufq_nscan_destroy(void *);
52 void		 bufq_nscan_queue(void *, struct buf *);
53 struct buf	*bufq_nscan_dequeue(void *);
54 void		 bufq_nscan_requeue(void *, struct buf *);
55 int		 bufq_nscan_peek(void *);
56 
57 const struct bufq_impl bufq_impls[BUFQ_HOWMANY] = {
58 	{
59 		bufq_fifo_create,
60 		bufq_fifo_destroy,
61 		bufq_fifo_queue,
62 		bufq_fifo_dequeue,
63 		bufq_fifo_requeue,
64 		bufq_fifo_peek
65 	},
66 	{
67 		bufq_nscan_create,
68 		bufq_nscan_destroy,
69 		bufq_nscan_queue,
70 		bufq_nscan_dequeue,
71 		bufq_nscan_requeue,
72 		bufq_nscan_peek
73 	}
74 };
75 
76 int
77 bufq_init(struct bufq *bq, int type)
78 {
79 	u_int hi = BUFQ_HI, low = BUFQ_LOW;
80 
81 	if (type >= BUFQ_HOWMANY)
82 		panic("bufq_init: type %i unknown", type);
83 
84 	/*
85 	 * Ensure that writes can't consume the entire amount of kva
86 	 * available the buffer cache if we only have a limited amount
87 	 * of kva available to us.
88 	 */
89 	if (hi >= (bcstats.kvaslots / 16)) {
90 		hi = bcstats.kvaslots / 16;
91 		if (hi < 2)
92 			hi = 2;
93 		low = hi / 2;
94 	}
95 
96 	mtx_init(&bq->bufq_mtx, IPL_BIO);
97 	bq->bufq_hi = hi;
98 	bq->bufq_low = low;
99 	bq->bufq_type = type;
100 	bq->bufq_impl = &bufq_impls[type];
101 	bq->bufq_data = bq->bufq_impl->impl_create();
102 	if (bq->bufq_data == NULL) {
103 		/*
104 		 * we should actually return failure so disks attaching after
105 		 * boot in low memory situations dont panic the system.
106 		 */
107 		panic("bufq init fail");
108 	}
109 
110 	mtx_enter(&bufqs_mtx);
111 	while (bufqs_stop) {
112 		msleep_nsec(&bufqs_stop, &bufqs_mtx, PRIBIO, "bqinit", INFSLP);
113 	}
114 	SLIST_INSERT_HEAD(&bufqs, bq, bufq_entries);
115 	mtx_leave(&bufqs_mtx);
116 
117 	return (0);
118 }
119 
120 int
121 bufq_switch(struct bufq *bq, int type)
122 {
123 	void		*data;
124 	void		*odata;
125 	int		otype;
126 	struct buf	*bp;
127 	int		ret;
128 
129 	mtx_enter(&bq->bufq_mtx);
130 	ret = (bq->bufq_type == type);
131 	mtx_leave(&bq->bufq_mtx);
132 	if (ret)
133 		return (0);
134 
135 	data = bufq_impls[type].impl_create();
136 	if (data == NULL)
137 		return (ENOMEM);
138 
139 	mtx_enter(&bq->bufq_mtx);
140 	if (bq->bufq_type != type) { /* might have changed during create */
141 		odata = bq->bufq_data;
142 		otype = bq->bufq_type;
143 
144 		while ((bp = bufq_impls[otype].impl_dequeue(odata)) != NULL)
145 			bufq_impls[type].impl_queue(data, bp);
146 
147 		bq->bufq_data = data;
148 		bq->bufq_type = type;
149 		bq->bufq_impl = &bufq_impls[type];
150 	} else {
151 		otype = type;
152 		odata = data;
153 	}
154 	mtx_leave(&bq->bufq_mtx);
155 
156 	bufq_impls[otype].impl_destroy(odata);
157 
158 	return (0);
159 }
160 
161 void
162 bufq_destroy(struct bufq *bq)
163 {
164 	bufq_drain(bq);
165 
166 	bq->bufq_impl->impl_destroy(bq->bufq_data);
167 	bq->bufq_data = NULL;
168 
169 	mtx_enter(&bufqs_mtx);
170 	while (bufqs_stop) {
171 		msleep_nsec(&bufqs_stop, &bufqs_mtx, PRIBIO, "bqdest", INFSLP);
172 	}
173 	SLIST_REMOVE(&bufqs, bq, bufq, bufq_entries);
174 	mtx_leave(&bufqs_mtx);
175 }
176 
177 
178 void
179 bufq_queue(struct bufq *bq, struct buf *bp)
180 {
181 	mtx_enter(&bq->bufq_mtx);
182 	while (bq->bufq_stop) {
183 		msleep_nsec(&bq->bufq_stop, &bq->bufq_mtx, PRIBIO, "bqqueue",
184 		    INFSLP);
185 	}
186 
187 	bp->b_bq = bq;
188 	bq->bufq_outstanding++;
189 	bq->bufq_impl->impl_queue(bq->bufq_data, bp);
190 	mtx_leave(&bq->bufq_mtx);
191 }
192 
193 struct buf *
194 bufq_dequeue(struct bufq *bq)
195 {
196 	struct buf	*bp;
197 
198 	mtx_enter(&bq->bufq_mtx);
199 	bp = bq->bufq_impl->impl_dequeue(bq->bufq_data);
200 	mtx_leave(&bq->bufq_mtx);
201 
202 	return (bp);
203 }
204 
205 void
206 bufq_requeue(struct bufq *bq, struct buf *bp)
207 {
208 	mtx_enter(&bq->bufq_mtx);
209 	bq->bufq_impl->impl_requeue(bq->bufq_data, bp);
210 	mtx_leave(&bq->bufq_mtx);
211 }
212 
213 int
214 bufq_peek(struct bufq *bq)
215 {
216 	int		rv;
217 
218 	mtx_enter(&bq->bufq_mtx);
219 	rv = bq->bufq_impl->impl_peek(bq->bufq_data);
220 	mtx_leave(&bq->bufq_mtx);
221 
222 	return (rv);
223 }
224 
225 void
226 bufq_drain(struct bufq *bq)
227 {
228 	struct buf	*bp;
229 	int		 s;
230 
231 	while ((bp = bufq_dequeue(bq)) != NULL) {
232 		bp->b_error = ENXIO;
233 		bp->b_flags |= B_ERROR;
234 		s = splbio();
235 		biodone(bp);
236 		splx(s);
237 	}
238 }
239 
240 void
241 bufq_wait(struct bufq *bq)
242 {
243 	if (bq->bufq_hi) {
244 		assertwaitok();
245 		mtx_enter(&bq->bufq_mtx);
246 		while (bq->bufq_outstanding >= bq->bufq_hi) {
247 			bq->bufq_waiting++;
248 			msleep_nsec(&bq->bufq_waiting, &bq->bufq_mtx,
249 			    PRIBIO, "bqwait", INFSLP);
250 			bq->bufq_waiting--;
251 		}
252 		mtx_leave(&bq->bufq_mtx);
253 	}
254 }
255 
256 void
257 bufq_done(struct bufq *bq, struct buf *bp)
258 {
259 	mtx_enter(&bq->bufq_mtx);
260 	KASSERT(bq->bufq_outstanding > 0);
261 	bq->bufq_outstanding--;
262 	if (bq->bufq_stop && bq->bufq_outstanding == 0)
263 		wakeup(&bq->bufq_outstanding);
264 	if (bq->bufq_waiting && bq->bufq_outstanding < bq->bufq_low)
265 		wakeup(&bq->bufq_waiting);
266 	mtx_leave(&bq->bufq_mtx);
267 	bp->b_bq = NULL;
268 }
269 
270 void
271 bufq_quiesce(void)
272 {
273 	struct bufq		*bq;
274 
275 	mtx_enter(&bufqs_mtx);
276 	bufqs_stop = 1;
277 	mtx_leave(&bufqs_mtx);
278 	/*
279 	 * We can safely walk the list since it can't be modified as
280 	 * long as bufqs_stop is non-zero.
281 	 */
282 	SLIST_FOREACH(bq, &bufqs, bufq_entries) {
283 		mtx_enter(&bq->bufq_mtx);
284 		bq->bufq_stop = 1;
285 		while (bq->bufq_outstanding) {
286 			msleep_nsec(&bq->bufq_outstanding, &bq->bufq_mtx,
287 			    PRIBIO, "bqquies", INFSLP);
288 		}
289 		mtx_leave(&bq->bufq_mtx);
290 	}
291 }
292 
293 void
294 bufq_restart(void)
295 {
296 	struct bufq		*bq;
297 
298 	mtx_enter(&bufqs_mtx);
299 	SLIST_FOREACH(bq, &bufqs, bufq_entries) {
300 		mtx_enter(&bq->bufq_mtx);
301 		bq->bufq_stop = 0;
302 		wakeup(&bq->bufq_stop);
303 		mtx_leave(&bq->bufq_mtx);
304 	}
305 	bufqs_stop = 0;
306 	wakeup(&bufqs_stop);
307 	mtx_leave(&bufqs_mtx);
308 }
309 
310 
311 /*
312  * fifo implementation
313  */
314 
315 void *
316 bufq_fifo_create(void)
317 {
318 	struct bufq_fifo_head	*head;
319 
320 	head = malloc(sizeof(*head), M_DEVBUF, M_NOWAIT | M_ZERO);
321 	if (head == NULL)
322 		return (NULL);
323 
324 	SIMPLEQ_INIT(head);
325 
326 	return (head);
327 }
328 
329 void
330 bufq_fifo_destroy(void *data)
331 {
332 	struct bufq_fifo_head	*head = data;
333 
334 	free(head, M_DEVBUF, sizeof(*head));
335 }
336 
337 void
338 bufq_fifo_queue(void *data, struct buf *bp)
339 {
340 	struct bufq_fifo_head	*head = data;
341 
342 	SIMPLEQ_INSERT_TAIL(head, bp, b_bufq.bufq_data_fifo.bqf_entries);
343 }
344 
345 struct buf *
346 bufq_fifo_dequeue(void *data)
347 {
348 	struct bufq_fifo_head	*head = data;
349 	struct buf		*bp;
350 
351 	bp = SIMPLEQ_FIRST(head);
352 	if (bp != NULL)
353 		SIMPLEQ_REMOVE_HEAD(head, b_bufq.bufq_data_fifo.bqf_entries);
354 
355 	return (bp);
356 }
357 
358 void
359 bufq_fifo_requeue(void *data, struct buf *bp)
360 {
361 	struct bufq_fifo_head	*head = data;
362 
363 	SIMPLEQ_INSERT_HEAD(head, bp, b_bufq.bufq_data_fifo.bqf_entries);
364 }
365 
366 int
367 bufq_fifo_peek(void *data)
368 {
369 	struct bufq_fifo_head	*head = data;
370 
371 	return (SIMPLEQ_FIRST(head) != NULL);
372 }
373 
374 /*
375  * nscan implementation
376  */
377 
378 #define BUF_INORDER(ba, bb) ((ba)->b_blkno < (bb)->b_blkno)
379 
380 #define dsentries b_bufq.bufq_data_nscan.bqf_entries
381 
382 struct bufq_nscan_data {
383 	struct bufq_nscan_head sorted;
384 	struct bufq_nscan_head fifo;
385 	int leftoverroom; /* Remaining number of buffer inserts allowed  */
386 };
387 
388 void bufq_nscan_resort(struct bufq_nscan_data *data);
389 void bufq_simple_nscan(struct bufq_nscan_head *, struct buf *);
390 
391 void
392 bufq_simple_nscan(struct bufq_nscan_head *head, struct buf *bp)
393 {
394 	struct buf *cur, *prev;
395 
396 	prev = NULL;
397 	/*
398 	 * We look for the first slot where we would fit, then insert
399 	 * after the element we just passed.
400 	 */
401 	SIMPLEQ_FOREACH(cur, head, dsentries) {
402 		if (BUF_INORDER(bp, cur))
403 			break;
404 		prev = cur;
405 	}
406 	if (prev)
407 		SIMPLEQ_INSERT_AFTER(head, prev, bp, dsentries);
408 	else
409 		SIMPLEQ_INSERT_HEAD(head, bp, dsentries);
410 
411 }
412 
413 /*
414  * Take N elements from the fifo queue and sort them
415  */
416 void
417 bufq_nscan_resort(struct bufq_nscan_data *data)
418 {
419 	struct bufq_nscan_head *fifo = &data->fifo;
420 	struct bufq_nscan_head *sorted = &data->sorted;
421 	int count, segmentsize = BUFQ_NSCAN_N;
422 	struct buf *bp;
423 
424 	for (count = 0; count < segmentsize; count++) {
425 		bp = SIMPLEQ_FIRST(fifo);
426 		if (!bp)
427 			break;
428 		SIMPLEQ_REMOVE_HEAD(fifo, dsentries);
429 		bufq_simple_nscan(sorted, bp);
430 	}
431 	data->leftoverroom = segmentsize - count;
432 }
433 
434 void *
435 bufq_nscan_create(void)
436 {
437 	struct bufq_nscan_data *data;
438 
439 	data = malloc(sizeof(*data), M_DEVBUF, M_NOWAIT | M_ZERO);
440 	if (!data)
441 		return NULL;
442 	SIMPLEQ_INIT(&data->sorted);
443 	SIMPLEQ_INIT(&data->fifo);
444 
445 	return data;
446 }
447 
448 void
449 bufq_nscan_destroy(void *vdata)
450 {
451 	struct bufq_nscan_data *data = vdata;
452 
453 	free(data, M_DEVBUF, sizeof(*data));
454 }
455 
456 void
457 bufq_nscan_queue(void *vdata, struct buf *bp)
458 {
459 	struct bufq_nscan_data *data = vdata;
460 
461 	/*
462 	 * If the previous sorted segment was small, we will continue
463 	 * packing in bufs as long as they're in order.
464 	 */
465 	if (data->leftoverroom) {
466 		struct buf *next = SIMPLEQ_FIRST(&data->sorted);
467 		if (next && BUF_INORDER(next, bp)) {
468 			bufq_simple_nscan(&data->sorted, bp);
469 			data->leftoverroom--;
470 			return;
471 		}
472 	}
473 
474 	SIMPLEQ_INSERT_TAIL(&data->fifo, bp, dsentries);
475 
476 }
477 
478 struct buf *
479 bufq_nscan_dequeue(void *vdata)
480 {
481 	struct bufq_nscan_data *data = vdata;
482 	struct bufq_nscan_head *sorted = &data->sorted;
483 	struct buf	*bp;
484 
485 	if (SIMPLEQ_FIRST(sorted) == NULL)
486 		bufq_nscan_resort(data);
487 
488 	bp = SIMPLEQ_FIRST(sorted);
489 	if (bp != NULL)
490 		SIMPLEQ_REMOVE_HEAD(sorted, dsentries);
491 
492 	return (bp);
493 }
494 
495 void
496 bufq_nscan_requeue(void *vdata, struct buf *bp)
497 {
498 	struct bufq_nscan_data *data = vdata;
499 
500 	SIMPLEQ_INSERT_HEAD(&data->fifo, bp, dsentries);
501 }
502 
503 int
504 bufq_nscan_peek(void *vdata)
505 {
506 	struct bufq_nscan_data *data = vdata;
507 
508 	return (SIMPLEQ_FIRST(&data->sorted) != NULL) ||
509 	    (SIMPLEQ_FIRST(&data->fifo) != NULL);
510 }
511