1 /* $OpenBSD: kern_bufq.c,v 1.33 2019/12/19 17:40:10 mpi Exp $ */ 2 /* 3 * Copyright (c) 2010 Thordur I. Bjornsson <thib@openbsd.org> 4 * Copyright (c) 2010 David Gwynne <dlg@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include <sys/param.h> 20 #include <sys/systm.h> 21 #include <sys/kernel.h> 22 #include <sys/malloc.h> 23 #include <sys/mount.h> 24 #include <sys/mutex.h> 25 #include <sys/buf.h> 26 #include <sys/errno.h> 27 #include <sys/queue.h> 28 29 SLIST_HEAD(, bufq) bufqs = SLIST_HEAD_INITIALIZER(bufqs); 30 struct mutex bufqs_mtx = MUTEX_INITIALIZER(IPL_NONE); 31 int bufqs_stop; 32 33 struct bufq_impl { 34 void *(*impl_create)(void); 35 void (*impl_destroy)(void *); 36 37 void (*impl_queue)(void *, struct buf *); 38 struct buf *(*impl_dequeue)(void *); 39 void (*impl_requeue)(void *, struct buf *); 40 int (*impl_peek)(void *); 41 }; 42 43 void *bufq_fifo_create(void); 44 void bufq_fifo_destroy(void *); 45 void bufq_fifo_queue(void *, struct buf *); 46 struct buf *bufq_fifo_dequeue(void *); 47 void bufq_fifo_requeue(void *, struct buf *); 48 int bufq_fifo_peek(void *); 49 50 void *bufq_nscan_create(void); 51 void bufq_nscan_destroy(void *); 52 void bufq_nscan_queue(void *, struct buf *); 53 struct buf *bufq_nscan_dequeue(void *); 54 void bufq_nscan_requeue(void *, struct buf *); 55 int bufq_nscan_peek(void *); 56 57 const struct bufq_impl bufq_impls[BUFQ_HOWMANY] = { 58 { 59 bufq_fifo_create, 60 bufq_fifo_destroy, 61 bufq_fifo_queue, 62 bufq_fifo_dequeue, 63 bufq_fifo_requeue, 64 bufq_fifo_peek 65 }, 66 { 67 bufq_nscan_create, 68 bufq_nscan_destroy, 69 bufq_nscan_queue, 70 bufq_nscan_dequeue, 71 bufq_nscan_requeue, 72 bufq_nscan_peek 73 } 74 }; 75 76 int 77 bufq_init(struct bufq *bq, int type) 78 { 79 u_int hi = BUFQ_HI, low = BUFQ_LOW; 80 81 if (type >= BUFQ_HOWMANY) 82 panic("bufq_init: type %i unknown", type); 83 84 /* 85 * Ensure that writes can't consume the entire amount of kva 86 * available the buffer cache if we only have a limited amount 87 * of kva available to us. 88 */ 89 if (hi >= (bcstats.kvaslots / 16)) { 90 hi = bcstats.kvaslots / 16; 91 if (hi < 2) 92 hi = 2; 93 low = hi / 2; 94 } 95 96 mtx_init(&bq->bufq_mtx, IPL_BIO); 97 bq->bufq_hi = hi; 98 bq->bufq_low = low; 99 bq->bufq_type = type; 100 bq->bufq_impl = &bufq_impls[type]; 101 bq->bufq_data = bq->bufq_impl->impl_create(); 102 if (bq->bufq_data == NULL) { 103 /* 104 * we should actually return failure so disks attaching after 105 * boot in low memory situations dont panic the system. 106 */ 107 panic("bufq init fail"); 108 } 109 110 mtx_enter(&bufqs_mtx); 111 while (bufqs_stop) { 112 msleep_nsec(&bufqs_stop, &bufqs_mtx, PRIBIO, "bqinit", INFSLP); 113 } 114 SLIST_INSERT_HEAD(&bufqs, bq, bufq_entries); 115 mtx_leave(&bufqs_mtx); 116 117 return (0); 118 } 119 120 int 121 bufq_switch(struct bufq *bq, int type) 122 { 123 void *data; 124 void *odata; 125 int otype; 126 struct buf *bp; 127 int ret; 128 129 mtx_enter(&bq->bufq_mtx); 130 ret = (bq->bufq_type == type); 131 mtx_leave(&bq->bufq_mtx); 132 if (ret) 133 return (0); 134 135 data = bufq_impls[type].impl_create(); 136 if (data == NULL) 137 return (ENOMEM); 138 139 mtx_enter(&bq->bufq_mtx); 140 if (bq->bufq_type != type) { /* might have changed during create */ 141 odata = bq->bufq_data; 142 otype = bq->bufq_type; 143 144 while ((bp = bufq_impls[otype].impl_dequeue(odata)) != NULL) 145 bufq_impls[type].impl_queue(data, bp); 146 147 bq->bufq_data = data; 148 bq->bufq_type = type; 149 bq->bufq_impl = &bufq_impls[type]; 150 } else { 151 otype = type; 152 odata = data; 153 } 154 mtx_leave(&bq->bufq_mtx); 155 156 bufq_impls[otype].impl_destroy(odata); 157 158 return (0); 159 } 160 161 void 162 bufq_destroy(struct bufq *bq) 163 { 164 bufq_drain(bq); 165 166 bq->bufq_impl->impl_destroy(bq->bufq_data); 167 bq->bufq_data = NULL; 168 169 mtx_enter(&bufqs_mtx); 170 while (bufqs_stop) { 171 msleep_nsec(&bufqs_stop, &bufqs_mtx, PRIBIO, "bqdest", INFSLP); 172 } 173 SLIST_REMOVE(&bufqs, bq, bufq, bufq_entries); 174 mtx_leave(&bufqs_mtx); 175 } 176 177 178 void 179 bufq_queue(struct bufq *bq, struct buf *bp) 180 { 181 mtx_enter(&bq->bufq_mtx); 182 while (bq->bufq_stop) { 183 msleep_nsec(&bq->bufq_stop, &bq->bufq_mtx, PRIBIO, "bqqueue", 184 INFSLP); 185 } 186 187 bp->b_bq = bq; 188 bq->bufq_outstanding++; 189 bq->bufq_impl->impl_queue(bq->bufq_data, bp); 190 mtx_leave(&bq->bufq_mtx); 191 } 192 193 struct buf * 194 bufq_dequeue(struct bufq *bq) 195 { 196 struct buf *bp; 197 198 mtx_enter(&bq->bufq_mtx); 199 bp = bq->bufq_impl->impl_dequeue(bq->bufq_data); 200 mtx_leave(&bq->bufq_mtx); 201 202 return (bp); 203 } 204 205 void 206 bufq_requeue(struct bufq *bq, struct buf *bp) 207 { 208 mtx_enter(&bq->bufq_mtx); 209 bq->bufq_impl->impl_requeue(bq->bufq_data, bp); 210 mtx_leave(&bq->bufq_mtx); 211 } 212 213 int 214 bufq_peek(struct bufq *bq) 215 { 216 int rv; 217 218 mtx_enter(&bq->bufq_mtx); 219 rv = bq->bufq_impl->impl_peek(bq->bufq_data); 220 mtx_leave(&bq->bufq_mtx); 221 222 return (rv); 223 } 224 225 void 226 bufq_drain(struct bufq *bq) 227 { 228 struct buf *bp; 229 int s; 230 231 while ((bp = bufq_dequeue(bq)) != NULL) { 232 bp->b_error = ENXIO; 233 bp->b_flags |= B_ERROR; 234 s = splbio(); 235 biodone(bp); 236 splx(s); 237 } 238 } 239 240 void 241 bufq_wait(struct bufq *bq) 242 { 243 if (bq->bufq_hi) { 244 assertwaitok(); 245 mtx_enter(&bq->bufq_mtx); 246 while (bq->bufq_outstanding >= bq->bufq_hi) { 247 bq->bufq_waiting++; 248 msleep_nsec(&bq->bufq_waiting, &bq->bufq_mtx, 249 PRIBIO, "bqwait", INFSLP); 250 bq->bufq_waiting--; 251 } 252 mtx_leave(&bq->bufq_mtx); 253 } 254 } 255 256 void 257 bufq_done(struct bufq *bq, struct buf *bp) 258 { 259 mtx_enter(&bq->bufq_mtx); 260 KASSERT(bq->bufq_outstanding > 0); 261 bq->bufq_outstanding--; 262 if (bq->bufq_stop && bq->bufq_outstanding == 0) 263 wakeup(&bq->bufq_outstanding); 264 if (bq->bufq_waiting && bq->bufq_outstanding < bq->bufq_low) 265 wakeup(&bq->bufq_waiting); 266 mtx_leave(&bq->bufq_mtx); 267 bp->b_bq = NULL; 268 } 269 270 void 271 bufq_quiesce(void) 272 { 273 struct bufq *bq; 274 275 mtx_enter(&bufqs_mtx); 276 bufqs_stop = 1; 277 mtx_leave(&bufqs_mtx); 278 /* 279 * We can safely walk the list since it can't be modified as 280 * long as bufqs_stop is non-zero. 281 */ 282 SLIST_FOREACH(bq, &bufqs, bufq_entries) { 283 mtx_enter(&bq->bufq_mtx); 284 bq->bufq_stop = 1; 285 while (bq->bufq_outstanding) { 286 msleep_nsec(&bq->bufq_outstanding, &bq->bufq_mtx, 287 PRIBIO, "bqquies", INFSLP); 288 } 289 mtx_leave(&bq->bufq_mtx); 290 } 291 } 292 293 void 294 bufq_restart(void) 295 { 296 struct bufq *bq; 297 298 mtx_enter(&bufqs_mtx); 299 SLIST_FOREACH(bq, &bufqs, bufq_entries) { 300 mtx_enter(&bq->bufq_mtx); 301 bq->bufq_stop = 0; 302 wakeup(&bq->bufq_stop); 303 mtx_leave(&bq->bufq_mtx); 304 } 305 bufqs_stop = 0; 306 wakeup(&bufqs_stop); 307 mtx_leave(&bufqs_mtx); 308 } 309 310 311 /* 312 * fifo implementation 313 */ 314 315 void * 316 bufq_fifo_create(void) 317 { 318 struct bufq_fifo_head *head; 319 320 head = malloc(sizeof(*head), M_DEVBUF, M_NOWAIT | M_ZERO); 321 if (head == NULL) 322 return (NULL); 323 324 SIMPLEQ_INIT(head); 325 326 return (head); 327 } 328 329 void 330 bufq_fifo_destroy(void *data) 331 { 332 struct bufq_fifo_head *head = data; 333 334 free(head, M_DEVBUF, sizeof(*head)); 335 } 336 337 void 338 bufq_fifo_queue(void *data, struct buf *bp) 339 { 340 struct bufq_fifo_head *head = data; 341 342 SIMPLEQ_INSERT_TAIL(head, bp, b_bufq.bufq_data_fifo.bqf_entries); 343 } 344 345 struct buf * 346 bufq_fifo_dequeue(void *data) 347 { 348 struct bufq_fifo_head *head = data; 349 struct buf *bp; 350 351 bp = SIMPLEQ_FIRST(head); 352 if (bp != NULL) 353 SIMPLEQ_REMOVE_HEAD(head, b_bufq.bufq_data_fifo.bqf_entries); 354 355 return (bp); 356 } 357 358 void 359 bufq_fifo_requeue(void *data, struct buf *bp) 360 { 361 struct bufq_fifo_head *head = data; 362 363 SIMPLEQ_INSERT_HEAD(head, bp, b_bufq.bufq_data_fifo.bqf_entries); 364 } 365 366 int 367 bufq_fifo_peek(void *data) 368 { 369 struct bufq_fifo_head *head = data; 370 371 return (SIMPLEQ_FIRST(head) != NULL); 372 } 373 374 /* 375 * nscan implementation 376 */ 377 378 #define BUF_INORDER(ba, bb) ((ba)->b_blkno < (bb)->b_blkno) 379 380 #define dsentries b_bufq.bufq_data_nscan.bqf_entries 381 382 struct bufq_nscan_data { 383 struct bufq_nscan_head sorted; 384 struct bufq_nscan_head fifo; 385 int leftoverroom; /* Remaining number of buffer inserts allowed */ 386 }; 387 388 void bufq_nscan_resort(struct bufq_nscan_data *data); 389 void bufq_simple_nscan(struct bufq_nscan_head *, struct buf *); 390 391 void 392 bufq_simple_nscan(struct bufq_nscan_head *head, struct buf *bp) 393 { 394 struct buf *cur, *prev; 395 396 prev = NULL; 397 /* 398 * We look for the first slot where we would fit, then insert 399 * after the element we just passed. 400 */ 401 SIMPLEQ_FOREACH(cur, head, dsentries) { 402 if (BUF_INORDER(bp, cur)) 403 break; 404 prev = cur; 405 } 406 if (prev) 407 SIMPLEQ_INSERT_AFTER(head, prev, bp, dsentries); 408 else 409 SIMPLEQ_INSERT_HEAD(head, bp, dsentries); 410 411 } 412 413 /* 414 * Take N elements from the fifo queue and sort them 415 */ 416 void 417 bufq_nscan_resort(struct bufq_nscan_data *data) 418 { 419 struct bufq_nscan_head *fifo = &data->fifo; 420 struct bufq_nscan_head *sorted = &data->sorted; 421 int count, segmentsize = BUFQ_NSCAN_N; 422 struct buf *bp; 423 424 for (count = 0; count < segmentsize; count++) { 425 bp = SIMPLEQ_FIRST(fifo); 426 if (!bp) 427 break; 428 SIMPLEQ_REMOVE_HEAD(fifo, dsentries); 429 bufq_simple_nscan(sorted, bp); 430 } 431 data->leftoverroom = segmentsize - count; 432 } 433 434 void * 435 bufq_nscan_create(void) 436 { 437 struct bufq_nscan_data *data; 438 439 data = malloc(sizeof(*data), M_DEVBUF, M_NOWAIT | M_ZERO); 440 if (!data) 441 return NULL; 442 SIMPLEQ_INIT(&data->sorted); 443 SIMPLEQ_INIT(&data->fifo); 444 445 return data; 446 } 447 448 void 449 bufq_nscan_destroy(void *vdata) 450 { 451 struct bufq_nscan_data *data = vdata; 452 453 free(data, M_DEVBUF, sizeof(*data)); 454 } 455 456 void 457 bufq_nscan_queue(void *vdata, struct buf *bp) 458 { 459 struct bufq_nscan_data *data = vdata; 460 461 /* 462 * If the previous sorted segment was small, we will continue 463 * packing in bufs as long as they're in order. 464 */ 465 if (data->leftoverroom) { 466 struct buf *next = SIMPLEQ_FIRST(&data->sorted); 467 if (next && BUF_INORDER(next, bp)) { 468 bufq_simple_nscan(&data->sorted, bp); 469 data->leftoverroom--; 470 return; 471 } 472 } 473 474 SIMPLEQ_INSERT_TAIL(&data->fifo, bp, dsentries); 475 476 } 477 478 struct buf * 479 bufq_nscan_dequeue(void *vdata) 480 { 481 struct bufq_nscan_data *data = vdata; 482 struct bufq_nscan_head *sorted = &data->sorted; 483 struct buf *bp; 484 485 if (SIMPLEQ_FIRST(sorted) == NULL) 486 bufq_nscan_resort(data); 487 488 bp = SIMPLEQ_FIRST(sorted); 489 if (bp != NULL) 490 SIMPLEQ_REMOVE_HEAD(sorted, dsentries); 491 492 return (bp); 493 } 494 495 void 496 bufq_nscan_requeue(void *vdata, struct buf *bp) 497 { 498 struct bufq_nscan_data *data = vdata; 499 500 SIMPLEQ_INSERT_HEAD(&data->fifo, bp, dsentries); 501 } 502 503 int 504 bufq_nscan_peek(void *vdata) 505 { 506 struct bufq_nscan_data *data = vdata; 507 508 return (SIMPLEQ_FIRST(&data->sorted) != NULL) || 509 (SIMPLEQ_FIRST(&data->fifo) != NULL); 510 } 511