1 /* $OpenBSD: kern_bufq.c,v 1.35 2022/12/05 23:18:37 deraadt Exp $ */ 2 /* 3 * Copyright (c) 2010 Thordur I. Bjornsson <thib@openbsd.org> 4 * Copyright (c) 2010 David Gwynne <dlg@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include <sys/param.h> 20 #include <sys/systm.h> 21 #include <sys/malloc.h> 22 #include <sys/mount.h> 23 #include <sys/mutex.h> 24 #include <sys/buf.h> 25 #include <sys/errno.h> 26 #include <sys/queue.h> 27 28 SLIST_HEAD(, bufq) bufqs = SLIST_HEAD_INITIALIZER(bufqs); 29 struct mutex bufqs_mtx = MUTEX_INITIALIZER(IPL_NONE); 30 int bufqs_stop; 31 32 struct bufq_impl { 33 void *(*impl_create)(void); 34 void (*impl_destroy)(void *); 35 36 void (*impl_queue)(void *, struct buf *); 37 struct buf *(*impl_dequeue)(void *); 38 void (*impl_requeue)(void *, struct buf *); 39 int (*impl_peek)(void *); 40 }; 41 42 void *bufq_fifo_create(void); 43 void bufq_fifo_destroy(void *); 44 void bufq_fifo_queue(void *, struct buf *); 45 struct buf *bufq_fifo_dequeue(void *); 46 void bufq_fifo_requeue(void *, struct buf *); 47 int bufq_fifo_peek(void *); 48 49 void *bufq_nscan_create(void); 50 void bufq_nscan_destroy(void *); 51 void bufq_nscan_queue(void *, struct buf *); 52 struct buf *bufq_nscan_dequeue(void *); 53 void bufq_nscan_requeue(void *, struct buf *); 54 int bufq_nscan_peek(void *); 55 56 const struct bufq_impl bufq_impls[BUFQ_HOWMANY] = { 57 { 58 bufq_fifo_create, 59 bufq_fifo_destroy, 60 bufq_fifo_queue, 61 bufq_fifo_dequeue, 62 bufq_fifo_requeue, 63 bufq_fifo_peek 64 }, 65 { 66 bufq_nscan_create, 67 bufq_nscan_destroy, 68 bufq_nscan_queue, 69 bufq_nscan_dequeue, 70 bufq_nscan_requeue, 71 bufq_nscan_peek 72 } 73 }; 74 75 int 76 bufq_init(struct bufq *bq, int type) 77 { 78 u_int hi = BUFQ_HI, low = BUFQ_LOW; 79 80 if (type >= BUFQ_HOWMANY) 81 panic("bufq_init: type %i unknown", type); 82 83 /* 84 * Ensure that writes can't consume the entire amount of kva 85 * available the buffer cache if we only have a limited amount 86 * of kva available to us. 87 */ 88 if (hi >= (bcstats.kvaslots / 16)) { 89 hi = bcstats.kvaslots / 16; 90 if (hi < 2) 91 hi = 2; 92 low = hi / 2; 93 } 94 95 mtx_init(&bq->bufq_mtx, IPL_BIO); 96 bq->bufq_hi = hi; 97 bq->bufq_low = low; 98 bq->bufq_type = type; 99 bq->bufq_impl = &bufq_impls[type]; 100 bq->bufq_data = bq->bufq_impl->impl_create(); 101 if (bq->bufq_data == NULL) { 102 /* 103 * we should actually return failure so disks attaching after 104 * boot in low memory situations dont panic the system. 105 */ 106 panic("bufq init fail"); 107 } 108 109 mtx_enter(&bufqs_mtx); 110 while (bufqs_stop) { 111 msleep_nsec(&bufqs_stop, &bufqs_mtx, PRIBIO, "bqinit", INFSLP); 112 } 113 SLIST_INSERT_HEAD(&bufqs, bq, bufq_entries); 114 mtx_leave(&bufqs_mtx); 115 116 return (0); 117 } 118 119 int 120 bufq_switch(struct bufq *bq, int type) 121 { 122 void *data; 123 void *odata; 124 int otype; 125 struct buf *bp; 126 int ret; 127 128 mtx_enter(&bq->bufq_mtx); 129 ret = (bq->bufq_type == type); 130 mtx_leave(&bq->bufq_mtx); 131 if (ret) 132 return (0); 133 134 data = bufq_impls[type].impl_create(); 135 if (data == NULL) 136 return (ENOMEM); 137 138 mtx_enter(&bq->bufq_mtx); 139 if (bq->bufq_type != type) { /* might have changed during create */ 140 odata = bq->bufq_data; 141 otype = bq->bufq_type; 142 143 while ((bp = bufq_impls[otype].impl_dequeue(odata)) != NULL) 144 bufq_impls[type].impl_queue(data, bp); 145 146 bq->bufq_data = data; 147 bq->bufq_type = type; 148 bq->bufq_impl = &bufq_impls[type]; 149 } else { 150 otype = type; 151 odata = data; 152 } 153 mtx_leave(&bq->bufq_mtx); 154 155 bufq_impls[otype].impl_destroy(odata); 156 157 return (0); 158 } 159 160 void 161 bufq_destroy(struct bufq *bq) 162 { 163 bufq_drain(bq); 164 165 bq->bufq_impl->impl_destroy(bq->bufq_data); 166 bq->bufq_data = NULL; 167 168 mtx_enter(&bufqs_mtx); 169 while (bufqs_stop) { 170 msleep_nsec(&bufqs_stop, &bufqs_mtx, PRIBIO, "bqdest", INFSLP); 171 } 172 SLIST_REMOVE(&bufqs, bq, bufq, bufq_entries); 173 mtx_leave(&bufqs_mtx); 174 } 175 176 177 void 178 bufq_queue(struct bufq *bq, struct buf *bp) 179 { 180 mtx_enter(&bq->bufq_mtx); 181 while (bq->bufq_stop) { 182 msleep_nsec(&bq->bufq_stop, &bq->bufq_mtx, PRIBIO, "bqqueue", 183 INFSLP); 184 } 185 186 bp->b_bq = bq; 187 bq->bufq_outstanding++; 188 bq->bufq_impl->impl_queue(bq->bufq_data, bp); 189 mtx_leave(&bq->bufq_mtx); 190 } 191 192 struct buf * 193 bufq_dequeue(struct bufq *bq) 194 { 195 struct buf *bp; 196 197 mtx_enter(&bq->bufq_mtx); 198 bp = bq->bufq_impl->impl_dequeue(bq->bufq_data); 199 mtx_leave(&bq->bufq_mtx); 200 201 return (bp); 202 } 203 204 void 205 bufq_requeue(struct bufq *bq, struct buf *bp) 206 { 207 mtx_enter(&bq->bufq_mtx); 208 bq->bufq_impl->impl_requeue(bq->bufq_data, bp); 209 mtx_leave(&bq->bufq_mtx); 210 } 211 212 int 213 bufq_peek(struct bufq *bq) 214 { 215 int rv; 216 217 mtx_enter(&bq->bufq_mtx); 218 rv = bq->bufq_impl->impl_peek(bq->bufq_data); 219 mtx_leave(&bq->bufq_mtx); 220 221 return (rv); 222 } 223 224 void 225 bufq_drain(struct bufq *bq) 226 { 227 struct buf *bp; 228 int s; 229 230 while ((bp = bufq_dequeue(bq)) != NULL) { 231 bp->b_error = ENXIO; 232 bp->b_flags |= B_ERROR; 233 s = splbio(); 234 biodone(bp); 235 splx(s); 236 } 237 } 238 239 void 240 bufq_wait(struct bufq *bq) 241 { 242 if (bq->bufq_hi) { 243 assertwaitok(); 244 mtx_enter(&bq->bufq_mtx); 245 while (bq->bufq_outstanding >= bq->bufq_hi) { 246 bq->bufq_waiting++; 247 msleep_nsec(&bq->bufq_waiting, &bq->bufq_mtx, 248 PRIBIO, "bqwait", INFSLP); 249 bq->bufq_waiting--; 250 } 251 mtx_leave(&bq->bufq_mtx); 252 } 253 } 254 255 void 256 bufq_done(struct bufq *bq, struct buf *bp) 257 { 258 mtx_enter(&bq->bufq_mtx); 259 KASSERT(bq->bufq_outstanding > 0); 260 bq->bufq_outstanding--; 261 if (bq->bufq_stop && bq->bufq_outstanding == 0) 262 wakeup(&bq->bufq_outstanding); 263 if (bq->bufq_waiting && bq->bufq_outstanding < bq->bufq_low) 264 wakeup(&bq->bufq_waiting); 265 mtx_leave(&bq->bufq_mtx); 266 bp->b_bq = NULL; 267 } 268 269 void 270 bufq_quiesce(void) 271 { 272 struct bufq *bq; 273 274 mtx_enter(&bufqs_mtx); 275 bufqs_stop = 1; 276 mtx_leave(&bufqs_mtx); 277 /* 278 * We can safely walk the list since it can't be modified as 279 * long as bufqs_stop is non-zero. 280 */ 281 SLIST_FOREACH(bq, &bufqs, bufq_entries) { 282 mtx_enter(&bq->bufq_mtx); 283 bq->bufq_stop = 1; 284 while (bq->bufq_outstanding) { 285 msleep_nsec(&bq->bufq_outstanding, &bq->bufq_mtx, 286 PRIBIO, "bqquies", INFSLP); 287 } 288 mtx_leave(&bq->bufq_mtx); 289 } 290 } 291 292 void 293 bufq_restart(void) 294 { 295 struct bufq *bq; 296 297 mtx_enter(&bufqs_mtx); 298 SLIST_FOREACH(bq, &bufqs, bufq_entries) { 299 mtx_enter(&bq->bufq_mtx); 300 bq->bufq_stop = 0; 301 wakeup(&bq->bufq_stop); 302 mtx_leave(&bq->bufq_mtx); 303 } 304 bufqs_stop = 0; 305 wakeup(&bufqs_stop); 306 mtx_leave(&bufqs_mtx); 307 } 308 309 310 /* 311 * fifo implementation 312 */ 313 314 void * 315 bufq_fifo_create(void) 316 { 317 struct bufq_fifo_head *head; 318 319 head = malloc(sizeof(*head), M_DEVBUF, M_NOWAIT | M_ZERO); 320 if (head == NULL) 321 return (NULL); 322 323 SIMPLEQ_INIT(head); 324 325 return (head); 326 } 327 328 void 329 bufq_fifo_destroy(void *data) 330 { 331 struct bufq_fifo_head *head = data; 332 333 free(head, M_DEVBUF, sizeof(*head)); 334 } 335 336 void 337 bufq_fifo_queue(void *data, struct buf *bp) 338 { 339 struct bufq_fifo_head *head = data; 340 341 SIMPLEQ_INSERT_TAIL(head, bp, b_bufq.bufq_data_fifo.bqf_entries); 342 } 343 344 struct buf * 345 bufq_fifo_dequeue(void *data) 346 { 347 struct bufq_fifo_head *head = data; 348 struct buf *bp; 349 350 bp = SIMPLEQ_FIRST(head); 351 if (bp != NULL) 352 SIMPLEQ_REMOVE_HEAD(head, b_bufq.bufq_data_fifo.bqf_entries); 353 354 return (bp); 355 } 356 357 void 358 bufq_fifo_requeue(void *data, struct buf *bp) 359 { 360 struct bufq_fifo_head *head = data; 361 362 SIMPLEQ_INSERT_HEAD(head, bp, b_bufq.bufq_data_fifo.bqf_entries); 363 } 364 365 int 366 bufq_fifo_peek(void *data) 367 { 368 struct bufq_fifo_head *head = data; 369 370 return (SIMPLEQ_FIRST(head) != NULL); 371 } 372 373 /* 374 * nscan implementation 375 */ 376 377 #define BUF_INORDER(ba, bb) ((ba)->b_blkno < (bb)->b_blkno) 378 379 #define dsentries b_bufq.bufq_data_nscan.bqf_entries 380 381 struct bufq_nscan_data { 382 struct bufq_nscan_head sorted; 383 struct bufq_nscan_head fifo; 384 int leftoverroom; /* Remaining number of buffer inserts allowed */ 385 }; 386 387 void bufq_nscan_resort(struct bufq_nscan_data *data); 388 void bufq_simple_nscan(struct bufq_nscan_head *, struct buf *); 389 390 void 391 bufq_simple_nscan(struct bufq_nscan_head *head, struct buf *bp) 392 { 393 struct buf *cur, *prev; 394 395 prev = NULL; 396 /* 397 * We look for the first slot where we would fit, then insert 398 * after the element we just passed. 399 */ 400 SIMPLEQ_FOREACH(cur, head, dsentries) { 401 if (BUF_INORDER(bp, cur)) 402 break; 403 prev = cur; 404 } 405 if (prev) 406 SIMPLEQ_INSERT_AFTER(head, prev, bp, dsentries); 407 else 408 SIMPLEQ_INSERT_HEAD(head, bp, dsentries); 409 410 } 411 412 /* 413 * Take N elements from the fifo queue and sort them 414 */ 415 void 416 bufq_nscan_resort(struct bufq_nscan_data *data) 417 { 418 struct bufq_nscan_head *fifo = &data->fifo; 419 struct bufq_nscan_head *sorted = &data->sorted; 420 int count, segmentsize = BUFQ_NSCAN_N; 421 struct buf *bp; 422 423 for (count = 0; count < segmentsize; count++) { 424 bp = SIMPLEQ_FIRST(fifo); 425 if (!bp) 426 break; 427 SIMPLEQ_REMOVE_HEAD(fifo, dsentries); 428 bufq_simple_nscan(sorted, bp); 429 } 430 data->leftoverroom = segmentsize - count; 431 } 432 433 void * 434 bufq_nscan_create(void) 435 { 436 struct bufq_nscan_data *data; 437 438 data = malloc(sizeof(*data), M_DEVBUF, M_NOWAIT | M_ZERO); 439 if (!data) 440 return NULL; 441 SIMPLEQ_INIT(&data->sorted); 442 SIMPLEQ_INIT(&data->fifo); 443 444 return data; 445 } 446 447 void 448 bufq_nscan_destroy(void *vdata) 449 { 450 struct bufq_nscan_data *data = vdata; 451 452 free(data, M_DEVBUF, sizeof(*data)); 453 } 454 455 void 456 bufq_nscan_queue(void *vdata, struct buf *bp) 457 { 458 struct bufq_nscan_data *data = vdata; 459 460 /* 461 * If the previous sorted segment was small, we will continue 462 * packing in bufs as long as they're in order. 463 */ 464 if (data->leftoverroom) { 465 struct buf *next = SIMPLEQ_FIRST(&data->sorted); 466 if (next && BUF_INORDER(next, bp)) { 467 bufq_simple_nscan(&data->sorted, bp); 468 data->leftoverroom--; 469 return; 470 } 471 } 472 473 SIMPLEQ_INSERT_TAIL(&data->fifo, bp, dsentries); 474 475 } 476 477 struct buf * 478 bufq_nscan_dequeue(void *vdata) 479 { 480 struct bufq_nscan_data *data = vdata; 481 struct bufq_nscan_head *sorted = &data->sorted; 482 struct buf *bp; 483 484 if (SIMPLEQ_FIRST(sorted) == NULL) 485 bufq_nscan_resort(data); 486 487 bp = SIMPLEQ_FIRST(sorted); 488 if (bp != NULL) 489 SIMPLEQ_REMOVE_HEAD(sorted, dsentries); 490 491 return (bp); 492 } 493 494 void 495 bufq_nscan_requeue(void *vdata, struct buf *bp) 496 { 497 struct bufq_nscan_data *data = vdata; 498 499 SIMPLEQ_INSERT_HEAD(&data->fifo, bp, dsentries); 500 } 501 502 int 503 bufq_nscan_peek(void *vdata) 504 { 505 struct bufq_nscan_data *data = vdata; 506 507 return (SIMPLEQ_FIRST(&data->sorted) != NULL) || 508 (SIMPLEQ_FIRST(&data->fifo) != NULL); 509 } 510