1 /* $OpenBSD: kern_bufq.c,v 1.35 2022/12/05 23:18:37 deraadt Exp $ */
2 /*
3 * Copyright (c) 2010 Thordur I. Bjornsson <thib@openbsd.org>
4 * Copyright (c) 2010 David Gwynne <dlg@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19 #include <sys/param.h>
20 #include <sys/systm.h>
21 #include <sys/malloc.h>
22 #include <sys/mount.h>
23 #include <sys/mutex.h>
24 #include <sys/buf.h>
25 #include <sys/errno.h>
26 #include <sys/queue.h>
27
28 SLIST_HEAD(, bufq) bufqs = SLIST_HEAD_INITIALIZER(bufqs);
29 struct mutex bufqs_mtx = MUTEX_INITIALIZER(IPL_NONE);
30 int bufqs_stop;
31
32 struct bufq_impl {
33 void *(*impl_create)(void);
34 void (*impl_destroy)(void *);
35
36 void (*impl_queue)(void *, struct buf *);
37 struct buf *(*impl_dequeue)(void *);
38 void (*impl_requeue)(void *, struct buf *);
39 int (*impl_peek)(void *);
40 };
41
42 void *bufq_fifo_create(void);
43 void bufq_fifo_destroy(void *);
44 void bufq_fifo_queue(void *, struct buf *);
45 struct buf *bufq_fifo_dequeue(void *);
46 void bufq_fifo_requeue(void *, struct buf *);
47 int bufq_fifo_peek(void *);
48
49 void *bufq_nscan_create(void);
50 void bufq_nscan_destroy(void *);
51 void bufq_nscan_queue(void *, struct buf *);
52 struct buf *bufq_nscan_dequeue(void *);
53 void bufq_nscan_requeue(void *, struct buf *);
54 int bufq_nscan_peek(void *);
55
56 const struct bufq_impl bufq_impls[BUFQ_HOWMANY] = {
57 {
58 bufq_fifo_create,
59 bufq_fifo_destroy,
60 bufq_fifo_queue,
61 bufq_fifo_dequeue,
62 bufq_fifo_requeue,
63 bufq_fifo_peek
64 },
65 {
66 bufq_nscan_create,
67 bufq_nscan_destroy,
68 bufq_nscan_queue,
69 bufq_nscan_dequeue,
70 bufq_nscan_requeue,
71 bufq_nscan_peek
72 }
73 };
74
75 int
bufq_init(struct bufq * bq,int type)76 bufq_init(struct bufq *bq, int type)
77 {
78 u_int hi = BUFQ_HI, low = BUFQ_LOW;
79
80 if (type >= BUFQ_HOWMANY)
81 panic("bufq_init: type %i unknown", type);
82
83 /*
84 * Ensure that writes can't consume the entire amount of kva
85 * available the buffer cache if we only have a limited amount
86 * of kva available to us.
87 */
88 if (hi >= (bcstats.kvaslots / 16)) {
89 hi = bcstats.kvaslots / 16;
90 if (hi < 2)
91 hi = 2;
92 low = hi / 2;
93 }
94
95 mtx_init(&bq->bufq_mtx, IPL_BIO);
96 bq->bufq_hi = hi;
97 bq->bufq_low = low;
98 bq->bufq_type = type;
99 bq->bufq_impl = &bufq_impls[type];
100 bq->bufq_data = bq->bufq_impl->impl_create();
101 if (bq->bufq_data == NULL) {
102 /*
103 * we should actually return failure so disks attaching after
104 * boot in low memory situations dont panic the system.
105 */
106 panic("bufq init fail");
107 }
108
109 mtx_enter(&bufqs_mtx);
110 while (bufqs_stop) {
111 msleep_nsec(&bufqs_stop, &bufqs_mtx, PRIBIO, "bqinit", INFSLP);
112 }
113 SLIST_INSERT_HEAD(&bufqs, bq, bufq_entries);
114 mtx_leave(&bufqs_mtx);
115
116 return (0);
117 }
118
119 int
bufq_switch(struct bufq * bq,int type)120 bufq_switch(struct bufq *bq, int type)
121 {
122 void *data;
123 void *odata;
124 int otype;
125 struct buf *bp;
126 int ret;
127
128 mtx_enter(&bq->bufq_mtx);
129 ret = (bq->bufq_type == type);
130 mtx_leave(&bq->bufq_mtx);
131 if (ret)
132 return (0);
133
134 data = bufq_impls[type].impl_create();
135 if (data == NULL)
136 return (ENOMEM);
137
138 mtx_enter(&bq->bufq_mtx);
139 if (bq->bufq_type != type) { /* might have changed during create */
140 odata = bq->bufq_data;
141 otype = bq->bufq_type;
142
143 while ((bp = bufq_impls[otype].impl_dequeue(odata)) != NULL)
144 bufq_impls[type].impl_queue(data, bp);
145
146 bq->bufq_data = data;
147 bq->bufq_type = type;
148 bq->bufq_impl = &bufq_impls[type];
149 } else {
150 otype = type;
151 odata = data;
152 }
153 mtx_leave(&bq->bufq_mtx);
154
155 bufq_impls[otype].impl_destroy(odata);
156
157 return (0);
158 }
159
160 void
bufq_destroy(struct bufq * bq)161 bufq_destroy(struct bufq *bq)
162 {
163 bufq_drain(bq);
164
165 bq->bufq_impl->impl_destroy(bq->bufq_data);
166 bq->bufq_data = NULL;
167
168 mtx_enter(&bufqs_mtx);
169 while (bufqs_stop) {
170 msleep_nsec(&bufqs_stop, &bufqs_mtx, PRIBIO, "bqdest", INFSLP);
171 }
172 SLIST_REMOVE(&bufqs, bq, bufq, bufq_entries);
173 mtx_leave(&bufqs_mtx);
174 }
175
176
177 void
bufq_queue(struct bufq * bq,struct buf * bp)178 bufq_queue(struct bufq *bq, struct buf *bp)
179 {
180 mtx_enter(&bq->bufq_mtx);
181 while (bq->bufq_stop) {
182 msleep_nsec(&bq->bufq_stop, &bq->bufq_mtx, PRIBIO, "bqqueue",
183 INFSLP);
184 }
185
186 bp->b_bq = bq;
187 bq->bufq_outstanding++;
188 bq->bufq_impl->impl_queue(bq->bufq_data, bp);
189 mtx_leave(&bq->bufq_mtx);
190 }
191
192 struct buf *
bufq_dequeue(struct bufq * bq)193 bufq_dequeue(struct bufq *bq)
194 {
195 struct buf *bp;
196
197 mtx_enter(&bq->bufq_mtx);
198 bp = bq->bufq_impl->impl_dequeue(bq->bufq_data);
199 mtx_leave(&bq->bufq_mtx);
200
201 return (bp);
202 }
203
204 void
bufq_requeue(struct bufq * bq,struct buf * bp)205 bufq_requeue(struct bufq *bq, struct buf *bp)
206 {
207 mtx_enter(&bq->bufq_mtx);
208 bq->bufq_impl->impl_requeue(bq->bufq_data, bp);
209 mtx_leave(&bq->bufq_mtx);
210 }
211
212 int
bufq_peek(struct bufq * bq)213 bufq_peek(struct bufq *bq)
214 {
215 int rv;
216
217 mtx_enter(&bq->bufq_mtx);
218 rv = bq->bufq_impl->impl_peek(bq->bufq_data);
219 mtx_leave(&bq->bufq_mtx);
220
221 return (rv);
222 }
223
224 void
bufq_drain(struct bufq * bq)225 bufq_drain(struct bufq *bq)
226 {
227 struct buf *bp;
228 int s;
229
230 while ((bp = bufq_dequeue(bq)) != NULL) {
231 bp->b_error = ENXIO;
232 bp->b_flags |= B_ERROR;
233 s = splbio();
234 biodone(bp);
235 splx(s);
236 }
237 }
238
239 void
bufq_wait(struct bufq * bq)240 bufq_wait(struct bufq *bq)
241 {
242 if (bq->bufq_hi) {
243 assertwaitok();
244 mtx_enter(&bq->bufq_mtx);
245 while (bq->bufq_outstanding >= bq->bufq_hi) {
246 bq->bufq_waiting++;
247 msleep_nsec(&bq->bufq_waiting, &bq->bufq_mtx,
248 PRIBIO, "bqwait", INFSLP);
249 bq->bufq_waiting--;
250 }
251 mtx_leave(&bq->bufq_mtx);
252 }
253 }
254
255 void
bufq_done(struct bufq * bq,struct buf * bp)256 bufq_done(struct bufq *bq, struct buf *bp)
257 {
258 mtx_enter(&bq->bufq_mtx);
259 KASSERT(bq->bufq_outstanding > 0);
260 bq->bufq_outstanding--;
261 if (bq->bufq_stop && bq->bufq_outstanding == 0)
262 wakeup(&bq->bufq_outstanding);
263 if (bq->bufq_waiting && bq->bufq_outstanding < bq->bufq_low)
264 wakeup(&bq->bufq_waiting);
265 mtx_leave(&bq->bufq_mtx);
266 bp->b_bq = NULL;
267 }
268
269 void
bufq_quiesce(void)270 bufq_quiesce(void)
271 {
272 struct bufq *bq;
273
274 mtx_enter(&bufqs_mtx);
275 bufqs_stop = 1;
276 mtx_leave(&bufqs_mtx);
277 /*
278 * We can safely walk the list since it can't be modified as
279 * long as bufqs_stop is non-zero.
280 */
281 SLIST_FOREACH(bq, &bufqs, bufq_entries) {
282 mtx_enter(&bq->bufq_mtx);
283 bq->bufq_stop = 1;
284 while (bq->bufq_outstanding) {
285 msleep_nsec(&bq->bufq_outstanding, &bq->bufq_mtx,
286 PRIBIO, "bqquies", INFSLP);
287 }
288 mtx_leave(&bq->bufq_mtx);
289 }
290 }
291
292 void
bufq_restart(void)293 bufq_restart(void)
294 {
295 struct bufq *bq;
296
297 mtx_enter(&bufqs_mtx);
298 SLIST_FOREACH(bq, &bufqs, bufq_entries) {
299 mtx_enter(&bq->bufq_mtx);
300 bq->bufq_stop = 0;
301 wakeup(&bq->bufq_stop);
302 mtx_leave(&bq->bufq_mtx);
303 }
304 bufqs_stop = 0;
305 wakeup(&bufqs_stop);
306 mtx_leave(&bufqs_mtx);
307 }
308
309
310 /*
311 * fifo implementation
312 */
313
314 void *
bufq_fifo_create(void)315 bufq_fifo_create(void)
316 {
317 struct bufq_fifo_head *head;
318
319 head = malloc(sizeof(*head), M_DEVBUF, M_NOWAIT | M_ZERO);
320 if (head == NULL)
321 return (NULL);
322
323 SIMPLEQ_INIT(head);
324
325 return (head);
326 }
327
328 void
bufq_fifo_destroy(void * data)329 bufq_fifo_destroy(void *data)
330 {
331 struct bufq_fifo_head *head = data;
332
333 free(head, M_DEVBUF, sizeof(*head));
334 }
335
336 void
bufq_fifo_queue(void * data,struct buf * bp)337 bufq_fifo_queue(void *data, struct buf *bp)
338 {
339 struct bufq_fifo_head *head = data;
340
341 SIMPLEQ_INSERT_TAIL(head, bp, b_bufq.bufq_data_fifo.bqf_entries);
342 }
343
344 struct buf *
bufq_fifo_dequeue(void * data)345 bufq_fifo_dequeue(void *data)
346 {
347 struct bufq_fifo_head *head = data;
348 struct buf *bp;
349
350 bp = SIMPLEQ_FIRST(head);
351 if (bp != NULL)
352 SIMPLEQ_REMOVE_HEAD(head, b_bufq.bufq_data_fifo.bqf_entries);
353
354 return (bp);
355 }
356
357 void
bufq_fifo_requeue(void * data,struct buf * bp)358 bufq_fifo_requeue(void *data, struct buf *bp)
359 {
360 struct bufq_fifo_head *head = data;
361
362 SIMPLEQ_INSERT_HEAD(head, bp, b_bufq.bufq_data_fifo.bqf_entries);
363 }
364
365 int
bufq_fifo_peek(void * data)366 bufq_fifo_peek(void *data)
367 {
368 struct bufq_fifo_head *head = data;
369
370 return (SIMPLEQ_FIRST(head) != NULL);
371 }
372
373 /*
374 * nscan implementation
375 */
376
377 #define BUF_INORDER(ba, bb) ((ba)->b_blkno < (bb)->b_blkno)
378
379 #define dsentries b_bufq.bufq_data_nscan.bqf_entries
380
381 struct bufq_nscan_data {
382 struct bufq_nscan_head sorted;
383 struct bufq_nscan_head fifo;
384 int leftoverroom; /* Remaining number of buffer inserts allowed */
385 };
386
387 void bufq_nscan_resort(struct bufq_nscan_data *data);
388 void bufq_simple_nscan(struct bufq_nscan_head *, struct buf *);
389
390 void
bufq_simple_nscan(struct bufq_nscan_head * head,struct buf * bp)391 bufq_simple_nscan(struct bufq_nscan_head *head, struct buf *bp)
392 {
393 struct buf *cur, *prev;
394
395 prev = NULL;
396 /*
397 * We look for the first slot where we would fit, then insert
398 * after the element we just passed.
399 */
400 SIMPLEQ_FOREACH(cur, head, dsentries) {
401 if (BUF_INORDER(bp, cur))
402 break;
403 prev = cur;
404 }
405 if (prev)
406 SIMPLEQ_INSERT_AFTER(head, prev, bp, dsentries);
407 else
408 SIMPLEQ_INSERT_HEAD(head, bp, dsentries);
409
410 }
411
412 /*
413 * Take N elements from the fifo queue and sort them
414 */
415 void
bufq_nscan_resort(struct bufq_nscan_data * data)416 bufq_nscan_resort(struct bufq_nscan_data *data)
417 {
418 struct bufq_nscan_head *fifo = &data->fifo;
419 struct bufq_nscan_head *sorted = &data->sorted;
420 int count, segmentsize = BUFQ_NSCAN_N;
421 struct buf *bp;
422
423 for (count = 0; count < segmentsize; count++) {
424 bp = SIMPLEQ_FIRST(fifo);
425 if (!bp)
426 break;
427 SIMPLEQ_REMOVE_HEAD(fifo, dsentries);
428 bufq_simple_nscan(sorted, bp);
429 }
430 data->leftoverroom = segmentsize - count;
431 }
432
433 void *
bufq_nscan_create(void)434 bufq_nscan_create(void)
435 {
436 struct bufq_nscan_data *data;
437
438 data = malloc(sizeof(*data), M_DEVBUF, M_NOWAIT | M_ZERO);
439 if (!data)
440 return NULL;
441 SIMPLEQ_INIT(&data->sorted);
442 SIMPLEQ_INIT(&data->fifo);
443
444 return data;
445 }
446
447 void
bufq_nscan_destroy(void * vdata)448 bufq_nscan_destroy(void *vdata)
449 {
450 struct bufq_nscan_data *data = vdata;
451
452 free(data, M_DEVBUF, sizeof(*data));
453 }
454
455 void
bufq_nscan_queue(void * vdata,struct buf * bp)456 bufq_nscan_queue(void *vdata, struct buf *bp)
457 {
458 struct bufq_nscan_data *data = vdata;
459
460 /*
461 * If the previous sorted segment was small, we will continue
462 * packing in bufs as long as they're in order.
463 */
464 if (data->leftoverroom) {
465 struct buf *next = SIMPLEQ_FIRST(&data->sorted);
466 if (next && BUF_INORDER(next, bp)) {
467 bufq_simple_nscan(&data->sorted, bp);
468 data->leftoverroom--;
469 return;
470 }
471 }
472
473 SIMPLEQ_INSERT_TAIL(&data->fifo, bp, dsentries);
474
475 }
476
477 struct buf *
bufq_nscan_dequeue(void * vdata)478 bufq_nscan_dequeue(void *vdata)
479 {
480 struct bufq_nscan_data *data = vdata;
481 struct bufq_nscan_head *sorted = &data->sorted;
482 struct buf *bp;
483
484 if (SIMPLEQ_FIRST(sorted) == NULL)
485 bufq_nscan_resort(data);
486
487 bp = SIMPLEQ_FIRST(sorted);
488 if (bp != NULL)
489 SIMPLEQ_REMOVE_HEAD(sorted, dsentries);
490
491 return (bp);
492 }
493
494 void
bufq_nscan_requeue(void * vdata,struct buf * bp)495 bufq_nscan_requeue(void *vdata, struct buf *bp)
496 {
497 struct bufq_nscan_data *data = vdata;
498
499 SIMPLEQ_INSERT_HEAD(&data->fifo, bp, dsentries);
500 }
501
502 int
bufq_nscan_peek(void * vdata)503 bufq_nscan_peek(void *vdata)
504 {
505 struct bufq_nscan_data *data = vdata;
506
507 return (SIMPLEQ_FIRST(&data->sorted) != NULL) ||
508 (SIMPLEQ_FIRST(&data->fifo) != NULL);
509 }
510