1 /* $OpenBSD: uvm_swap.c,v 1.167 2023/10/27 19:18:53 mpi Exp $ */ 2 /* $NetBSD: uvm_swap.c,v 1.40 2000/11/17 11:39:39 mrg Exp $ */ 3 4 /* 5 * Copyright (c) 1995, 1996, 1997 Matthew R. Green 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 22 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 23 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 24 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 25 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * from: NetBSD: vm_swap.c,v 1.52 1997/12/02 13:47:37 pk Exp 30 * from: Id: uvm_swap.c,v 1.1.2.42 1998/02/02 20:38:06 chuck Exp 31 */ 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/buf.h> 36 #include <sys/conf.h> 37 #include <sys/proc.h> 38 #include <sys/namei.h> 39 #include <sys/disklabel.h> 40 #include <sys/errno.h> 41 #include <sys/kernel.h> 42 #include <sys/malloc.h> 43 #include <sys/vnode.h> 44 #include <sys/fcntl.h> 45 #include <sys/extent.h> 46 #include <sys/blist.h> 47 #include <sys/mount.h> 48 #include <sys/mutex.h> 49 #include <sys/pool.h> 50 #include <sys/syscallargs.h> 51 #include <sys/swap.h> 52 #include <sys/disk.h> 53 #include <sys/task.h> 54 #include <sys/pledge.h> 55 #if defined(NFSCLIENT) 56 #include <sys/socket.h> 57 #include <netinet/in.h> 58 #include <nfs/nfsproto.h> 59 #include <nfs/nfsdiskless.h> 60 #endif 61 62 #include <uvm/uvm.h> 63 #ifdef UVM_SWAP_ENCRYPT 64 #include <uvm/uvm_swap_encrypt.h> 65 #endif 66 67 #include <sys/specdev.h> 68 69 #include "vnd.h" 70 71 /* 72 * uvm_swap.c: manage configuration and i/o to swap space. 73 */ 74 75 /* 76 * swap space is managed in the following way: 77 * 78 * each swap partition or file is described by a "swapdev" structure. 79 * each "swapdev" structure contains a "swapent" structure which contains 80 * information that is passed up to the user (via system calls). 81 * 82 * each swap partition is assigned a "priority" (int) which controls 83 * swap partition usage. 84 * 85 * the system maintains a global data structure describing all swap 86 * partitions/files. there is a sorted LIST of "swappri" structures 87 * which describe "swapdev"'s at that priority. this LIST is headed 88 * by the "swap_priority" global var. each "swappri" contains a 89 * TAILQ of "swapdev" structures at that priority. 90 * 91 * locking: 92 * - swap_syscall_lock (sleep lock): this lock serializes the swapctl 93 * system call and prevents the swap priority list from changing 94 * while we are in the middle of a system call (e.g. SWAP_STATS). 95 * - uvm_swap_data_lock (mutex): this lock protects all swap data 96 * structures including the priority list, the swapdev structures, 97 * and the swapmap arena. 98 * 99 * each swap device has the following info: 100 * - swap device in use (could be disabled, preventing future use) 101 * - swap enabled (allows new allocations on swap) 102 * - map info in /dev/drum 103 * - vnode pointer 104 * for swap files only: 105 * - block size 106 * - max byte count in buffer 107 * - buffer 108 * - credentials to use when doing i/o to file 109 * 110 * userland controls and configures swap with the swapctl(2) system call. 111 * the sys_swapctl performs the following operations: 112 * [1] SWAP_NSWAP: returns the number of swap devices currently configured 113 * [2] SWAP_STATS: given a pointer to an array of swapent structures 114 * (passed in via "arg") of a size passed in via "misc" ... we load 115 * the current swap config into the array. 116 * [3] SWAP_ON: given a pathname in arg (could be device or file) and a 117 * priority in "misc", start swapping on it. 118 * [4] SWAP_OFF: as SWAP_ON, but stops swapping to a device 119 * [5] SWAP_CTL: changes the priority of a swap device (new priority in 120 * "misc") 121 */ 122 123 /* 124 * swapdev: describes a single swap partition/file 125 * 126 * note the following should be true: 127 * swd_inuse <= swd_nblks [number of blocks in use is <= total blocks] 128 * swd_nblks <= swd_mapsize [because mapsize includes disklabel] 129 */ 130 struct swapdev { 131 struct swapent swd_se; 132 #define swd_dev swd_se.se_dev /* device id */ 133 #define swd_flags swd_se.se_flags /* flags:inuse/enable/fake */ 134 #define swd_priority swd_se.se_priority /* our priority */ 135 #define swd_inuse swd_se.se_inuse /* blocks used */ 136 #define swd_nblks swd_se.se_nblks /* total blocks */ 137 char *swd_path; /* saved pathname of device */ 138 int swd_pathlen; /* length of pathname */ 139 int swd_npages; /* #pages we can use */ 140 int swd_npginuse; /* #pages in use */ 141 int swd_npgbad; /* #pages bad */ 142 int swd_drumoffset; /* page0 offset in drum */ 143 int swd_drumsize; /* #pages in drum */ 144 blist_t swd_blist; /* blist for this swapdev */ 145 struct vnode *swd_vp; /* backing vnode */ 146 TAILQ_ENTRY(swapdev) swd_next; /* priority tailq */ 147 148 int swd_bsize; /* blocksize (bytes) */ 149 int swd_maxactive; /* max active i/o reqs */ 150 int swd_active; /* # of active i/o reqs */ 151 struct bufq swd_bufq; 152 struct ucred *swd_cred; /* cred for file access */ 153 #ifdef UVM_SWAP_ENCRYPT 154 #define SWD_KEY_SHIFT 7 /* One key per 0.5 MByte */ 155 #define SWD_KEY(x,y) &((x)->swd_keys[((y) - (x)->swd_drumoffset) >> SWD_KEY_SHIFT]) 156 #define SWD_KEY_SIZE(x) (((x) + (1 << SWD_KEY_SHIFT) - 1) >> SWD_KEY_SHIFT) 157 158 #define SWD_DCRYPT_SHIFT 5 159 #define SWD_DCRYPT_BITS 32 160 #define SWD_DCRYPT_MASK (SWD_DCRYPT_BITS - 1) 161 #define SWD_DCRYPT_OFF(x) ((x) >> SWD_DCRYPT_SHIFT) 162 #define SWD_DCRYPT_BIT(x) ((x) & SWD_DCRYPT_MASK) 163 #define SWD_DCRYPT_SIZE(x) (SWD_DCRYPT_OFF((x) + SWD_DCRYPT_MASK) * sizeof(u_int32_t)) 164 u_int32_t *swd_decrypt; /* bitmap for decryption */ 165 struct swap_key *swd_keys; /* keys for different parts */ 166 #endif 167 }; 168 169 /* 170 * swap device priority entry; the list is kept sorted on `spi_priority'. 171 */ 172 struct swappri { 173 int spi_priority; /* priority */ 174 TAILQ_HEAD(spi_swapdev, swapdev) spi_swapdev; 175 /* tailq of swapdevs at this priority */ 176 LIST_ENTRY(swappri) spi_swappri; /* global list of pri's */ 177 }; 178 179 /* 180 * The following two structures are used to keep track of data transfers 181 * on swap devices associated with regular files. 182 * NOTE: this code is more or less a copy of vnd.c; we use the same 183 * structure names here to ease porting.. 184 */ 185 struct vndxfer { 186 struct buf *vx_bp; /* Pointer to parent buffer */ 187 struct swapdev *vx_sdp; 188 int vx_error; 189 int vx_pending; /* # of pending aux buffers */ 190 int vx_flags; 191 #define VX_BUSY 1 192 #define VX_DEAD 2 193 }; 194 195 struct vndbuf { 196 struct buf vb_buf; 197 struct vndxfer *vb_vnx; 198 struct task vb_task; 199 }; 200 201 /* 202 * We keep a of pool vndbuf's and vndxfer structures. 203 */ 204 struct pool vndxfer_pool; 205 struct pool vndbuf_pool; 206 207 208 /* 209 * local variables 210 */ 211 struct extent *swapmap; /* controls the mapping of /dev/drum */ 212 213 /* list of all active swap devices [by priority] */ 214 LIST_HEAD(swap_priority, swappri); 215 struct swap_priority swap_priority; /* [S] */ 216 217 /* locks */ 218 struct mutex uvm_swap_data_lock = MUTEX_INITIALIZER(IPL_MPFLOOR); 219 struct rwlock swap_syscall_lock = RWLOCK_INITIALIZER("swplk"); 220 221 struct mutex oommtx = MUTEX_INITIALIZER(IPL_VM); 222 struct vm_page *oompps[SWCLUSTPAGES]; 223 int oom = 0; 224 225 /* 226 * prototypes 227 */ 228 void swapdrum_add(struct swapdev *, int); 229 struct swapdev *swapdrum_getsdp(int); 230 231 struct swapdev *swaplist_find(struct vnode *, int); 232 void swaplist_insert(struct swapdev *, 233 struct swappri *, int); 234 void swaplist_trim(void); 235 236 int swap_on(struct proc *, struct swapdev *); 237 int swap_off(struct proc *, struct swapdev *); 238 239 void sw_reg_strategy(struct swapdev *, struct buf *, int); 240 void sw_reg_iodone(struct buf *); 241 void sw_reg_iodone_internal(void *); 242 void sw_reg_start(struct swapdev *); 243 244 int uvm_swap_io(struct vm_page **, int, int, int); 245 246 void swapmount(void); 247 int uvm_swap_allocpages(struct vm_page **, int, int); 248 249 #ifdef UVM_SWAP_ENCRYPT 250 /* for swap encrypt */ 251 void uvm_swap_markdecrypt(struct swapdev *, int, int, int); 252 boolean_t uvm_swap_needdecrypt(struct swapdev *, int); 253 void uvm_swap_initcrypt(struct swapdev *, int); 254 #endif 255 256 /* 257 * uvm_swap_init: init the swap system data structures and locks 258 * 259 * => called at boot time from init_main.c after the filesystems 260 * are brought up (which happens after uvm_init()) 261 */ 262 void 263 uvm_swap_init(void) 264 { 265 int error; 266 267 /* 268 * first, init the swap list, its counter, and its lock. 269 * then get a handle on the vnode for /dev/drum by using 270 * the its dev_t number ("swapdev", from MD conf.c). 271 */ 272 LIST_INIT(&swap_priority); 273 uvmexp.nswapdev = 0; 274 275 if (!swapdev_vp && bdevvp(swapdev, &swapdev_vp)) 276 panic("uvm_swap_init: can't get vnode for swap device"); 277 278 /* 279 * create swap block extent to map /dev/drum. The extent spans 280 * 1 to INT_MAX allows 2 gigablocks of swap space. Note that 281 * block 0 is reserved (used to indicate an allocation failure, 282 * or no allocation). 283 */ 284 swapmap = extent_create("swapmap", 1, INT_MAX, 285 M_VMSWAP, 0, 0, EX_NOWAIT); 286 if (swapmap == 0) 287 panic("uvm_swap_init: extent_create failed"); 288 289 /* allocate pools for structures used for swapping to files. */ 290 pool_init(&vndxfer_pool, sizeof(struct vndxfer), 0, IPL_BIO, 0, 291 "swp vnx", NULL); 292 pool_init(&vndbuf_pool, sizeof(struct vndbuf), 0, IPL_BIO, 0, 293 "swp vnd", NULL); 294 295 /* allocate pages for OOM situations. */ 296 error = uvm_swap_allocpages(oompps, SWCLUSTPAGES, UVM_PLA_NOWAIT); 297 KASSERT(error == 0); 298 299 /* Setup the initial swap partition */ 300 swapmount(); 301 } 302 303 #ifdef UVM_SWAP_ENCRYPT 304 void 305 uvm_swap_initcrypt_all(void) 306 { 307 struct swapdev *sdp; 308 struct swappri *spp; 309 int npages; 310 311 312 LIST_FOREACH(spp, &swap_priority, spi_swappri) { 313 TAILQ_FOREACH(sdp, &spp->spi_swapdev, swd_next) { 314 if (sdp->swd_decrypt == NULL) { 315 npages = dbtob((uint64_t)sdp->swd_nblks) >> 316 PAGE_SHIFT; 317 uvm_swap_initcrypt(sdp, npages); 318 } 319 } 320 } 321 } 322 323 void 324 uvm_swap_initcrypt(struct swapdev *sdp, int npages) 325 { 326 /* 327 * keep information if a page needs to be decrypted when we get it 328 * from the swap device. 329 * We cannot chance a malloc later, if we are doing ASYNC puts, 330 * we may not call malloc with M_WAITOK. This consumes only 331 * 8KB memory for a 256MB swap partition. 332 */ 333 sdp->swd_decrypt = malloc(SWD_DCRYPT_SIZE(npages), M_VMSWAP, 334 M_WAITOK|M_ZERO); 335 sdp->swd_keys = mallocarray(SWD_KEY_SIZE(npages), 336 sizeof(struct swap_key), M_VMSWAP, M_WAITOK|M_ZERO); 337 } 338 339 #endif /* UVM_SWAP_ENCRYPT */ 340 341 int 342 uvm_swap_allocpages(struct vm_page **pps, int npages, int flags) 343 { 344 struct pglist pgl; 345 int error, i; 346 347 KASSERT(npages <= SWCLUSTPAGES); 348 349 TAILQ_INIT(&pgl); 350 again: 351 error = uvm_pglistalloc(npages * PAGE_SIZE, dma_constraint.ucr_low, 352 dma_constraint.ucr_high, 0, 0, &pgl, npages, flags); 353 if (error && (curproc == uvm.pagedaemon_proc)) { 354 mtx_enter(&oommtx); 355 if (oom) { 356 msleep_nsec(&oom, &oommtx, PVM | PNORELOCK, 357 "oom", INFSLP); 358 goto again; 359 } 360 oom = 1; 361 for (i = 0; i < npages; i++) { 362 pps[i] = oompps[i]; 363 atomic_setbits_int(&pps[i]->pg_flags, PG_BUSY); 364 } 365 mtx_leave(&oommtx); 366 return 0; 367 } 368 if (error) 369 return error; 370 371 for (i = 0; i < npages; i++) { 372 pps[i] = TAILQ_FIRST(&pgl); 373 /* *sigh* */ 374 atomic_setbits_int(&pps[i]->pg_flags, PG_BUSY); 375 TAILQ_REMOVE(&pgl, pps[i], pageq); 376 } 377 378 return 0; 379 } 380 381 void 382 uvm_swap_freepages(struct vm_page **pps, int npages) 383 { 384 int i; 385 386 if (pps[0] == oompps[0]) { 387 for (i = 0; i < npages; i++) 388 uvm_pageclean(pps[i]); 389 390 mtx_enter(&oommtx); 391 KASSERT(oom == 1); 392 oom = 0; 393 mtx_leave(&oommtx); 394 wakeup(&oom); 395 return; 396 } 397 398 uvm_lock_pageq(); 399 for (i = 0; i < npages; i++) 400 uvm_pagefree(pps[i]); 401 uvm_unlock_pageq(); 402 403 } 404 405 #ifdef UVM_SWAP_ENCRYPT 406 /* 407 * Mark pages on the swap device for later decryption 408 */ 409 410 void 411 uvm_swap_markdecrypt(struct swapdev *sdp, int startslot, int npages, 412 int decrypt) 413 { 414 int pagestart, i; 415 int off, bit; 416 417 if (!sdp) 418 return; 419 420 pagestart = startslot - sdp->swd_drumoffset; 421 for (i = 0; i < npages; i++, pagestart++) { 422 off = SWD_DCRYPT_OFF(pagestart); 423 bit = SWD_DCRYPT_BIT(pagestart); 424 if (decrypt) 425 /* pages read need decryption */ 426 sdp->swd_decrypt[off] |= 1 << bit; 427 else 428 /* pages read do not need decryption */ 429 sdp->swd_decrypt[off] &= ~(1 << bit); 430 } 431 } 432 433 /* 434 * Check if the page that we got from disk needs to be decrypted 435 */ 436 437 boolean_t 438 uvm_swap_needdecrypt(struct swapdev *sdp, int off) 439 { 440 if (!sdp) 441 return FALSE; 442 443 off -= sdp->swd_drumoffset; 444 return sdp->swd_decrypt[SWD_DCRYPT_OFF(off)] & (1 << SWD_DCRYPT_BIT(off)) ? 445 TRUE : FALSE; 446 } 447 448 void 449 uvm_swap_finicrypt_all(void) 450 { 451 struct swapdev *sdp; 452 struct swappri *spp; 453 struct swap_key *key; 454 unsigned int nkeys; 455 456 LIST_FOREACH(spp, &swap_priority, spi_swappri) { 457 TAILQ_FOREACH(sdp, &spp->spi_swapdev, swd_next) { 458 if (sdp->swd_decrypt == NULL) 459 continue; 460 461 nkeys = dbtob((uint64_t)sdp->swd_nblks) >> PAGE_SHIFT; 462 key = sdp->swd_keys + (SWD_KEY_SIZE(nkeys) - 1); 463 do { 464 if (key->refcount != 0) 465 swap_key_delete(key); 466 } while (key-- != sdp->swd_keys); 467 } 468 } 469 } 470 #endif /* UVM_SWAP_ENCRYPT */ 471 472 /* 473 * swaplist functions: functions that operate on the list of swap 474 * devices on the system. 475 */ 476 477 /* 478 * swaplist_insert: insert swap device "sdp" into the global list 479 * 480 * => caller must hold both swap_syscall_lock and uvm_swap_data_lock 481 * => caller must provide a newly allocated swappri structure (we will 482 * FREE it if we don't need it... this it to prevent allocation 483 * blocking here while adding swap) 484 */ 485 void 486 swaplist_insert(struct swapdev *sdp, struct swappri *newspp, int priority) 487 { 488 struct swappri *spp, *pspp; 489 490 KASSERT(rw_write_held(&swap_syscall_lock)); 491 MUTEX_ASSERT_LOCKED(&uvm_swap_data_lock); 492 493 /* 494 * find entry at or after which to insert the new device. 495 */ 496 pspp = NULL; 497 LIST_FOREACH(spp, &swap_priority, spi_swappri) { 498 if (priority <= spp->spi_priority) 499 break; 500 pspp = spp; 501 } 502 503 /* 504 * new priority? 505 */ 506 if (spp == NULL || spp->spi_priority != priority) { 507 spp = newspp; /* use newspp! */ 508 509 spp->spi_priority = priority; 510 TAILQ_INIT(&spp->spi_swapdev); 511 512 if (pspp) 513 LIST_INSERT_AFTER(pspp, spp, spi_swappri); 514 else 515 LIST_INSERT_HEAD(&swap_priority, spp, spi_swappri); 516 } else { 517 /* we don't need a new priority structure, free it */ 518 free(newspp, M_VMSWAP, sizeof(*newspp)); 519 } 520 521 /* 522 * priority found (or created). now insert on the priority's 523 * tailq list and bump the total number of swapdevs. 524 */ 525 sdp->swd_priority = priority; 526 TAILQ_INSERT_TAIL(&spp->spi_swapdev, sdp, swd_next); 527 uvmexp.nswapdev++; 528 } 529 530 /* 531 * swaplist_find: find and optionally remove a swap device from the 532 * global list. 533 * 534 * => caller must hold both swap_syscall_lock and uvm_swap_data_lock 535 * => we return the swapdev we found (and removed) 536 */ 537 struct swapdev * 538 swaplist_find(struct vnode *vp, boolean_t remove) 539 { 540 struct swapdev *sdp; 541 struct swappri *spp; 542 543 KASSERT(rw_write_held(&swap_syscall_lock)); 544 MUTEX_ASSERT_LOCKED(&uvm_swap_data_lock); 545 546 /* 547 * search the lists for the requested vp 548 */ 549 LIST_FOREACH(spp, &swap_priority, spi_swappri) { 550 TAILQ_FOREACH(sdp, &spp->spi_swapdev, swd_next) { 551 if (sdp->swd_vp != vp) 552 continue; 553 if (remove) { 554 TAILQ_REMOVE(&spp->spi_swapdev, sdp, swd_next); 555 uvmexp.nswapdev--; 556 } 557 return (sdp); 558 } 559 } 560 return (NULL); 561 } 562 563 564 /* 565 * swaplist_trim: scan priority list for empty priority entries and kill 566 * them. 567 * 568 * => caller must hold both swap_syscall_lock and uvm_swap_data_lock 569 */ 570 void 571 swaplist_trim(void) 572 { 573 struct swappri *spp, *nextspp; 574 575 KASSERT(rw_write_held(&swap_syscall_lock)); 576 MUTEX_ASSERT_LOCKED(&uvm_swap_data_lock); 577 578 LIST_FOREACH_SAFE(spp, &swap_priority, spi_swappri, nextspp) { 579 if (!TAILQ_EMPTY(&spp->spi_swapdev)) 580 continue; 581 LIST_REMOVE(spp, spi_swappri); 582 free(spp, M_VMSWAP, sizeof(*spp)); 583 } 584 } 585 586 /* 587 * swapdrum_add: add a "swapdev"'s blocks into /dev/drum's area. 588 * 589 * => caller must hold swap_syscall_lock 590 * => uvm_swap_data_lock should be unlocked (we may sleep) 591 */ 592 void 593 swapdrum_add(struct swapdev *sdp, int npages) 594 { 595 u_long result; 596 597 if (extent_alloc(swapmap, npages, EX_NOALIGN, 0, EX_NOBOUNDARY, 598 EX_WAITOK, &result)) 599 panic("swapdrum_add"); 600 601 sdp->swd_drumoffset = result; 602 sdp->swd_drumsize = npages; 603 } 604 605 /* 606 * swapdrum_getsdp: given a page offset in /dev/drum, convert it back 607 * to the "swapdev" that maps that section of the drum. 608 * 609 * => each swapdev takes one big contig chunk of the drum 610 * => caller must hold uvm_swap_data_lock 611 */ 612 struct swapdev * 613 swapdrum_getsdp(int pgno) 614 { 615 struct swapdev *sdp; 616 struct swappri *spp; 617 618 MUTEX_ASSERT_LOCKED(&uvm_swap_data_lock); 619 620 LIST_FOREACH(spp, &swap_priority, spi_swappri) { 621 TAILQ_FOREACH(sdp, &spp->spi_swapdev, swd_next) { 622 if (pgno >= sdp->swd_drumoffset && 623 pgno < (sdp->swd_drumoffset + sdp->swd_drumsize)) { 624 return sdp; 625 } 626 } 627 } 628 return NULL; 629 } 630 631 632 /* 633 * sys_swapctl: main entry point for swapctl(2) system call 634 * [with two helper functions: swap_on and swap_off] 635 */ 636 int 637 sys_swapctl(struct proc *p, void *v, register_t *retval) 638 { 639 struct sys_swapctl_args /* { 640 syscallarg(int) cmd; 641 syscallarg(void *) arg; 642 syscallarg(int) misc; 643 } */ *uap = (struct sys_swapctl_args *)v; 644 struct vnode *vp; 645 struct nameidata nd; 646 struct swappri *spp; 647 struct swapdev *sdp; 648 struct swapent *sep; 649 char userpath[MAXPATHLEN]; 650 size_t len; 651 int count, error, misc; 652 int priority; 653 654 misc = SCARG(uap, misc); 655 656 if ((error = pledge_swapctl(p, SCARG(uap, cmd)))) 657 return error; 658 659 /* 660 * ensure serialized syscall access by grabbing the swap_syscall_lock 661 */ 662 rw_enter_write(&swap_syscall_lock); 663 664 /* 665 * we handle the non-priv NSWAP and STATS request first. 666 * 667 * SWAP_NSWAP: return number of config'd swap devices 668 * [can also be obtained with uvmexp sysctl] 669 */ 670 if (SCARG(uap, cmd) == SWAP_NSWAP) { 671 *retval = uvmexp.nswapdev; 672 error = 0; 673 goto out; 674 } 675 676 /* 677 * SWAP_STATS: get stats on current # of configured swap devs 678 * 679 * note that the swap_priority list can't change as long 680 * as we are holding the swap_syscall_lock. we don't want 681 * to grab the uvm_swap_data_lock because we may fault&sleep during 682 * copyout() and we don't want to be holding that lock then! 683 */ 684 if (SCARG(uap, cmd) == SWAP_STATS) { 685 sep = (struct swapent *)SCARG(uap, arg); 686 count = 0; 687 688 LIST_FOREACH(spp, &swap_priority, spi_swappri) { 689 TAILQ_FOREACH(sdp, &spp->spi_swapdev, swd_next) { 690 if (count >= misc) 691 continue; 692 693 sdp->swd_inuse = 694 btodb((u_int64_t)sdp->swd_npginuse << 695 PAGE_SHIFT); 696 error = copyout(&sdp->swd_se, sep, 697 sizeof(struct swapent)); 698 if (error) 699 goto out; 700 701 /* now copy out the path if necessary */ 702 error = copyoutstr(sdp->swd_path, 703 sep->se_path, sizeof(sep->se_path), NULL); 704 if (error) 705 goto out; 706 707 count++; 708 sep++; 709 } 710 } 711 712 *retval = count; 713 error = 0; 714 goto out; 715 } 716 717 /* all other requests require superuser privs. verify. */ 718 if ((error = suser(p))) 719 goto out; 720 721 /* 722 * at this point we expect a path name in arg. we will 723 * use namei() to gain a vnode reference (vref), and lock 724 * the vnode (VOP_LOCK). 725 */ 726 error = copyinstr(SCARG(uap, arg), userpath, sizeof(userpath), &len); 727 if (error) 728 goto out; 729 disk_map(userpath, userpath, sizeof(userpath), DM_OPENBLCK); 730 NDINIT(&nd, LOOKUP, FOLLOW|LOCKLEAF, UIO_SYSSPACE, userpath, p); 731 if ((error = namei(&nd))) 732 goto out; 733 vp = nd.ni_vp; 734 /* note: "vp" is referenced and locked */ 735 736 error = 0; /* assume no error */ 737 switch(SCARG(uap, cmd)) { 738 case SWAP_DUMPDEV: 739 if (vp->v_type != VBLK) { 740 error = ENOTBLK; 741 break; 742 } 743 dumpdev = vp->v_rdev; 744 break; 745 case SWAP_CTL: 746 /* 747 * get new priority, remove old entry (if any) and then 748 * reinsert it in the correct place. finally, prune out 749 * any empty priority structures. 750 */ 751 priority = SCARG(uap, misc); 752 spp = malloc(sizeof *spp, M_VMSWAP, M_WAITOK); 753 mtx_enter(&uvm_swap_data_lock); 754 if ((sdp = swaplist_find(vp, 1)) == NULL) { 755 error = ENOENT; 756 } else { 757 swaplist_insert(sdp, spp, priority); 758 swaplist_trim(); 759 } 760 mtx_leave(&uvm_swap_data_lock); 761 if (error) 762 free(spp, M_VMSWAP, sizeof(*spp)); 763 break; 764 case SWAP_ON: 765 /* 766 * If the device is a regular file, make sure the filesystem 767 * can be used for swapping. 768 */ 769 if (vp->v_type == VREG && 770 (vp->v_mount->mnt_flag & MNT_SWAPPABLE) == 0) { 771 error = ENOTSUP; 772 break; 773 } 774 775 /* 776 * check for duplicates. if none found, then insert a 777 * dummy entry on the list to prevent someone else from 778 * trying to enable this device while we are working on 779 * it. 780 */ 781 782 priority = SCARG(uap, misc); 783 sdp = malloc(sizeof *sdp, M_VMSWAP, M_WAITOK|M_ZERO); 784 spp = malloc(sizeof *spp, M_VMSWAP, M_WAITOK); 785 sdp->swd_flags = SWF_FAKE; /* placeholder only */ 786 sdp->swd_vp = vp; 787 sdp->swd_dev = (vp->v_type == VBLK) ? vp->v_rdev : NODEV; 788 789 /* 790 * XXX Is NFS elaboration necessary? 791 */ 792 if (vp->v_type == VREG) { 793 sdp->swd_cred = crdup(p->p_ucred); 794 } 795 796 mtx_enter(&uvm_swap_data_lock); 797 if (swaplist_find(vp, 0) != NULL) { 798 error = EBUSY; 799 mtx_leave(&uvm_swap_data_lock); 800 if (vp->v_type == VREG) { 801 crfree(sdp->swd_cred); 802 } 803 free(sdp, M_VMSWAP, sizeof *sdp); 804 free(spp, M_VMSWAP, sizeof *spp); 805 break; 806 } 807 swaplist_insert(sdp, spp, priority); 808 mtx_leave(&uvm_swap_data_lock); 809 810 sdp->swd_pathlen = len; 811 sdp->swd_path = malloc(sdp->swd_pathlen, M_VMSWAP, M_WAITOK); 812 strlcpy(sdp->swd_path, userpath, len); 813 814 /* 815 * we've now got a FAKE placeholder in the swap list. 816 * now attempt to enable swap on it. if we fail, undo 817 * what we've done and kill the fake entry we just inserted. 818 * if swap_on is a success, it will clear the SWF_FAKE flag 819 */ 820 821 if ((error = swap_on(p, sdp)) != 0) { 822 mtx_enter(&uvm_swap_data_lock); 823 (void) swaplist_find(vp, 1); /* kill fake entry */ 824 swaplist_trim(); 825 mtx_leave(&uvm_swap_data_lock); 826 if (vp->v_type == VREG) { 827 crfree(sdp->swd_cred); 828 } 829 free(sdp->swd_path, M_VMSWAP, sdp->swd_pathlen); 830 free(sdp, M_VMSWAP, sizeof(*sdp)); 831 break; 832 } 833 break; 834 case SWAP_OFF: 835 mtx_enter(&uvm_swap_data_lock); 836 if ((sdp = swaplist_find(vp, 0)) == NULL) { 837 mtx_leave(&uvm_swap_data_lock); 838 error = ENXIO; 839 break; 840 } 841 842 /* 843 * If a device isn't in use or enabled, we 844 * can't stop swapping from it (again). 845 */ 846 if ((sdp->swd_flags & (SWF_INUSE|SWF_ENABLE)) == 0) { 847 mtx_leave(&uvm_swap_data_lock); 848 error = EBUSY; 849 break; 850 } 851 852 /* 853 * do the real work. 854 */ 855 error = swap_off(p, sdp); 856 break; 857 default: 858 error = EINVAL; 859 } 860 861 /* done! release the ref gained by namei() and unlock. */ 862 vput(vp); 863 864 out: 865 rw_exit_write(&swap_syscall_lock); 866 867 return (error); 868 } 869 870 /* 871 * swap_on: attempt to enable a swapdev for swapping. note that the 872 * swapdev is already on the global list, but disabled (marked 873 * SWF_FAKE). 874 * 875 * => we avoid the start of the disk (to protect disk labels) 876 * => caller should leave uvm_swap_data_lock unlocked, we may lock it 877 * if needed. 878 */ 879 int 880 swap_on(struct proc *p, struct swapdev *sdp) 881 { 882 struct vnode *vp; 883 int error, npages, nblocks, size; 884 long addr; 885 struct vattr va; 886 #if defined(NFSCLIENT) 887 extern const struct vops nfs_vops; 888 #endif /* defined(NFSCLIENT) */ 889 dev_t dev; 890 891 /* 892 * we want to enable swapping on sdp. the swd_vp contains 893 * the vnode we want (locked and ref'd), and the swd_dev 894 * contains the dev_t of the file, if it a block device. 895 */ 896 897 vp = sdp->swd_vp; 898 dev = sdp->swd_dev; 899 900 #if NVND > 0 901 /* no swapping to vnds. */ 902 if (bdevsw[major(dev)].d_strategy == vndstrategy) 903 return (EOPNOTSUPP); 904 #endif 905 906 /* 907 * open the swap file (mostly useful for block device files to 908 * let device driver know what is up). 909 * 910 * we skip the open/close for root on swap because the root 911 * has already been opened when root was mounted (mountroot). 912 */ 913 if (vp != rootvp) { 914 if ((error = VOP_OPEN(vp, FREAD|FWRITE, p->p_ucred, p))) 915 return (error); 916 } 917 918 /* XXX this only works for block devices */ 919 /* 920 * we now need to determine the size of the swap area. for 921 * block specials we can call the d_psize function. 922 * for normal files, we must stat [get attrs]. 923 * 924 * we put the result in nblks. 925 * for normal files, we also want the filesystem block size 926 * (which we get with statfs). 927 */ 928 switch (vp->v_type) { 929 case VBLK: 930 if (bdevsw[major(dev)].d_psize == 0 || 931 (nblocks = (*bdevsw[major(dev)].d_psize)(dev)) == -1) { 932 error = ENXIO; 933 goto bad; 934 } 935 break; 936 937 case VREG: 938 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p))) 939 goto bad; 940 nblocks = (int)btodb(va.va_size); 941 if ((error = 942 VFS_STATFS(vp->v_mount, &vp->v_mount->mnt_stat, p)) != 0) 943 goto bad; 944 945 sdp->swd_bsize = vp->v_mount->mnt_stat.f_iosize; 946 /* 947 * limit the max # of outstanding I/O requests we issue 948 * at any one time. take it easy on NFS servers. 949 */ 950 #if defined(NFSCLIENT) 951 if (vp->v_op == &nfs_vops) 952 sdp->swd_maxactive = 2; /* XXX */ 953 else 954 #endif /* defined(NFSCLIENT) */ 955 sdp->swd_maxactive = 8; /* XXX */ 956 bufq_init(&sdp->swd_bufq, BUFQ_FIFO); 957 break; 958 959 default: 960 error = ENXIO; 961 goto bad; 962 } 963 964 /* 965 * save nblocks in a safe place and convert to pages. 966 */ 967 968 sdp->swd_nblks = nblocks; 969 npages = dbtob((u_int64_t)nblocks) >> PAGE_SHIFT; 970 971 /* 972 * for block special files, we want to make sure that leave 973 * the disklabel and bootblocks alone, so we arrange to skip 974 * over them (arbitrarily choosing to skip PAGE_SIZE bytes). 975 * note that because of this the "size" can be less than the 976 * actual number of blocks on the device. 977 */ 978 if (vp->v_type == VBLK) { 979 /* we use pages 1 to (size - 1) [inclusive] */ 980 size = npages - 1; 981 addr = 1; 982 } else { 983 /* we use pages 0 to (size - 1) [inclusive] */ 984 size = npages; 985 addr = 0; 986 } 987 988 /* 989 * make sure we have enough blocks for a reasonable sized swap 990 * area. we want at least one page. 991 */ 992 993 if (size < 1) { 994 error = EINVAL; 995 goto bad; 996 } 997 998 /* 999 * now we need to allocate a blist to manage this swap device 1000 */ 1001 sdp->swd_blist = blist_create(npages); 1002 /* mark all expect the `saved' region free. */ 1003 blist_free(sdp->swd_blist, addr, size); 1004 1005 #ifdef HIBERNATE 1006 /* 1007 * Lock down the last region of primary disk swap, in case 1008 * hibernate needs to place a signature there. 1009 */ 1010 if (dev == swdevt[0].sw_dev && vp->v_type == VBLK && size > 3 ) { 1011 if (blist_fill(sdp->swd_blist, npages - 1, 1) != 1) 1012 panic("hibernate reserve"); 1013 } 1014 #endif 1015 1016 /* add a ref to vp to reflect usage as a swap device. */ 1017 vref(vp); 1018 1019 #ifdef UVM_SWAP_ENCRYPT 1020 if (uvm_doswapencrypt) 1021 uvm_swap_initcrypt(sdp, npages); 1022 #endif 1023 /* now add the new swapdev to the drum and enable. */ 1024 swapdrum_add(sdp, npages); 1025 sdp->swd_npages = size; 1026 mtx_enter(&uvm_swap_data_lock); 1027 sdp->swd_flags &= ~SWF_FAKE; /* going live */ 1028 sdp->swd_flags |= (SWF_INUSE|SWF_ENABLE); 1029 uvmexp.swpages += size; 1030 mtx_leave(&uvm_swap_data_lock); 1031 return (0); 1032 1033 /* 1034 * failure: clean up and return error. 1035 */ 1036 1037 bad: 1038 if (vp != rootvp) 1039 (void)VOP_CLOSE(vp, FREAD|FWRITE, p->p_ucred, p); 1040 return (error); 1041 } 1042 1043 /* 1044 * swap_off: stop swapping on swapdev 1045 * 1046 * => swap data should be locked, we will unlock. 1047 */ 1048 int 1049 swap_off(struct proc *p, struct swapdev *sdp) 1050 { 1051 int npages = sdp->swd_npages; 1052 int error = 0; 1053 1054 KASSERT(rw_write_held(&swap_syscall_lock)); 1055 MUTEX_ASSERT_LOCKED(&uvm_swap_data_lock); 1056 1057 /* disable the swap area being removed */ 1058 sdp->swd_flags &= ~SWF_ENABLE; 1059 mtx_leave(&uvm_swap_data_lock); 1060 1061 /* 1062 * the idea is to find all the pages that are paged out to this 1063 * device, and page them all in. in uvm, swap-backed pageable 1064 * memory can take two forms: aobjs and anons. call the 1065 * swapoff hook for each subsystem to bring in pages. 1066 */ 1067 1068 if (uao_swap_off(sdp->swd_drumoffset, 1069 sdp->swd_drumoffset + sdp->swd_drumsize) || 1070 amap_swap_off(sdp->swd_drumoffset, 1071 sdp->swd_drumoffset + sdp->swd_drumsize)) { 1072 error = ENOMEM; 1073 } else if (sdp->swd_npginuse > sdp->swd_npgbad) { 1074 error = EBUSY; 1075 } 1076 1077 if (error) { 1078 mtx_enter(&uvm_swap_data_lock); 1079 sdp->swd_flags |= SWF_ENABLE; 1080 mtx_leave(&uvm_swap_data_lock); 1081 return error; 1082 } 1083 1084 /* 1085 * done with the vnode and saved creds. 1086 * drop our ref on the vnode before calling VOP_CLOSE() 1087 * so that spec_close() can tell if this is the last close. 1088 */ 1089 if (sdp->swd_vp->v_type == VREG) { 1090 crfree(sdp->swd_cred); 1091 } 1092 vrele(sdp->swd_vp); 1093 if (sdp->swd_vp != rootvp) { 1094 (void) VOP_CLOSE(sdp->swd_vp, FREAD|FWRITE, p->p_ucred, p); 1095 } 1096 1097 mtx_enter(&uvm_swap_data_lock); 1098 uvmexp.swpages -= npages; 1099 1100 if (swaplist_find(sdp->swd_vp, 1) == NULL) 1101 panic("swap_off: swapdev not in list"); 1102 swaplist_trim(); 1103 mtx_leave(&uvm_swap_data_lock); 1104 1105 /* 1106 * free all resources! 1107 */ 1108 extent_free(swapmap, sdp->swd_drumoffset, sdp->swd_drumsize, 1109 EX_WAITOK); 1110 blist_destroy(sdp->swd_blist); 1111 /* free sdp->swd_path ? */ 1112 free(sdp, M_VMSWAP, sizeof(*sdp)); 1113 return (0); 1114 } 1115 1116 /* 1117 * /dev/drum interface and i/o functions 1118 */ 1119 1120 /* 1121 * swstrategy: perform I/O on the drum 1122 * 1123 * => we must map the i/o request from the drum to the correct swapdev. 1124 */ 1125 void 1126 swstrategy(struct buf *bp) 1127 { 1128 struct swapdev *sdp; 1129 int s, pageno, bn; 1130 1131 /* 1132 * convert block number to swapdev. note that swapdev can't 1133 * be yanked out from under us because we are holding resources 1134 * in it (i.e. the blocks we are doing I/O on). 1135 */ 1136 pageno = dbtob((u_int64_t)bp->b_blkno) >> PAGE_SHIFT; 1137 mtx_enter(&uvm_swap_data_lock); 1138 sdp = swapdrum_getsdp(pageno); 1139 mtx_leave(&uvm_swap_data_lock); 1140 if (sdp == NULL) { 1141 bp->b_error = EINVAL; 1142 bp->b_flags |= B_ERROR; 1143 s = splbio(); 1144 biodone(bp); 1145 splx(s); 1146 return; 1147 } 1148 1149 /* convert drum page number to block number on this swapdev. */ 1150 pageno -= sdp->swd_drumoffset; /* page # on swapdev */ 1151 bn = btodb((u_int64_t)pageno << PAGE_SHIFT); /* convert to diskblock */ 1152 1153 /* 1154 * for block devices we finish up here. 1155 * for regular files we have to do more work which we delegate 1156 * to sw_reg_strategy(). 1157 */ 1158 switch (sdp->swd_vp->v_type) { 1159 default: 1160 panic("swstrategy: vnode type 0x%x", sdp->swd_vp->v_type); 1161 case VBLK: 1162 /* 1163 * must convert "bp" from an I/O on /dev/drum to an I/O 1164 * on the swapdev (sdp). 1165 */ 1166 s = splbio(); 1167 buf_replacevnode(bp, sdp->swd_vp); 1168 1169 bp->b_blkno = bn; 1170 splx(s); 1171 VOP_STRATEGY(bp->b_vp, bp); 1172 return; 1173 case VREG: 1174 /* delegate to sw_reg_strategy function. */ 1175 sw_reg_strategy(sdp, bp, bn); 1176 return; 1177 } 1178 /* NOTREACHED */ 1179 } 1180 1181 /* 1182 * sw_reg_strategy: handle swap i/o to regular files 1183 */ 1184 void 1185 sw_reg_strategy(struct swapdev *sdp, struct buf *bp, int bn) 1186 { 1187 struct vnode *vp; 1188 struct vndxfer *vnx; 1189 daddr_t nbn; 1190 caddr_t addr; 1191 off_t byteoff; 1192 int s, off, nra, error, sz, resid; 1193 1194 /* 1195 * allocate a vndxfer head for this transfer and point it to 1196 * our buffer. 1197 */ 1198 vnx = pool_get(&vndxfer_pool, PR_WAITOK); 1199 vnx->vx_flags = VX_BUSY; 1200 vnx->vx_error = 0; 1201 vnx->vx_pending = 0; 1202 vnx->vx_bp = bp; 1203 vnx->vx_sdp = sdp; 1204 1205 /* 1206 * setup for main loop where we read filesystem blocks into 1207 * our buffer. 1208 */ 1209 error = 0; 1210 bp->b_resid = bp->b_bcount; /* nothing transferred yet! */ 1211 addr = bp->b_data; /* current position in buffer */ 1212 byteoff = dbtob((u_int64_t)bn); 1213 1214 for (resid = bp->b_resid; resid; resid -= sz) { 1215 struct vndbuf *nbp; 1216 /* 1217 * translate byteoffset into block number. return values: 1218 * vp = vnode of underlying device 1219 * nbn = new block number (on underlying vnode dev) 1220 * nra = num blocks we can read-ahead (excludes requested 1221 * block) 1222 */ 1223 nra = 0; 1224 error = VOP_BMAP(sdp->swd_vp, byteoff / sdp->swd_bsize, 1225 &vp, &nbn, &nra); 1226 1227 if (error == 0 && nbn == -1) { 1228 /* 1229 * this used to just set error, but that doesn't 1230 * do the right thing. Instead, it causes random 1231 * memory errors. The panic() should remain until 1232 * this condition doesn't destabilize the system. 1233 */ 1234 #if 1 1235 panic("sw_reg_strategy: swap to sparse file"); 1236 #else 1237 error = EIO; /* failure */ 1238 #endif 1239 } 1240 1241 /* 1242 * punt if there was an error or a hole in the file. 1243 * we must wait for any i/o ops we have already started 1244 * to finish before returning. 1245 * 1246 * XXX we could deal with holes here but it would be 1247 * a hassle (in the write case). 1248 */ 1249 if (error) { 1250 s = splbio(); 1251 vnx->vx_error = error; /* pass error up */ 1252 goto out; 1253 } 1254 1255 /* 1256 * compute the size ("sz") of this transfer (in bytes). 1257 */ 1258 off = byteoff % sdp->swd_bsize; 1259 sz = (1 + nra) * sdp->swd_bsize - off; 1260 if (sz > resid) 1261 sz = resid; 1262 1263 /* 1264 * now get a buf structure. note that the vb_buf is 1265 * at the front of the nbp structure so that you can 1266 * cast pointers between the two structure easily. 1267 */ 1268 nbp = pool_get(&vndbuf_pool, PR_WAITOK); 1269 nbp->vb_buf.b_flags = bp->b_flags | B_CALL; 1270 nbp->vb_buf.b_bcount = sz; 1271 nbp->vb_buf.b_bufsize = sz; 1272 nbp->vb_buf.b_error = 0; 1273 nbp->vb_buf.b_data = addr; 1274 nbp->vb_buf.b_bq = NULL; 1275 nbp->vb_buf.b_blkno = nbn + btodb(off); 1276 nbp->vb_buf.b_proc = bp->b_proc; 1277 nbp->vb_buf.b_iodone = sw_reg_iodone; 1278 nbp->vb_buf.b_vp = NULLVP; 1279 nbp->vb_buf.b_vnbufs.le_next = NOLIST; 1280 LIST_INIT(&nbp->vb_buf.b_dep); 1281 1282 /* 1283 * set b_dirtyoff/end and b_validoff/end. this is 1284 * required by the NFS client code (otherwise it will 1285 * just discard our I/O request). 1286 */ 1287 if (bp->b_dirtyend == 0) { 1288 nbp->vb_buf.b_dirtyoff = 0; 1289 nbp->vb_buf.b_dirtyend = sz; 1290 } else { 1291 nbp->vb_buf.b_dirtyoff = 1292 max(0, bp->b_dirtyoff - (bp->b_bcount-resid)); 1293 nbp->vb_buf.b_dirtyend = 1294 min(sz, 1295 max(0, bp->b_dirtyend - (bp->b_bcount-resid))); 1296 } 1297 if (bp->b_validend == 0) { 1298 nbp->vb_buf.b_validoff = 0; 1299 nbp->vb_buf.b_validend = sz; 1300 } else { 1301 nbp->vb_buf.b_validoff = 1302 max(0, bp->b_validoff - (bp->b_bcount-resid)); 1303 nbp->vb_buf.b_validend = 1304 min(sz, 1305 max(0, bp->b_validend - (bp->b_bcount-resid))); 1306 } 1307 1308 /* patch it back to the vnx */ 1309 nbp->vb_vnx = vnx; 1310 task_set(&nbp->vb_task, sw_reg_iodone_internal, nbp); 1311 1312 s = splbio(); 1313 if (vnx->vx_error != 0) { 1314 pool_put(&vndbuf_pool, nbp); 1315 goto out; 1316 } 1317 vnx->vx_pending++; 1318 1319 /* assoc new buffer with underlying vnode */ 1320 bgetvp(vp, &nbp->vb_buf); 1321 1322 /* start I/O if we are not over our limit */ 1323 bufq_queue(&sdp->swd_bufq, &nbp->vb_buf); 1324 sw_reg_start(sdp); 1325 splx(s); 1326 1327 /* 1328 * advance to the next I/O 1329 */ 1330 byteoff += sz; 1331 addr += sz; 1332 } 1333 1334 s = splbio(); 1335 1336 out: /* Arrive here at splbio */ 1337 vnx->vx_flags &= ~VX_BUSY; 1338 if (vnx->vx_pending == 0) { 1339 if (vnx->vx_error != 0) { 1340 bp->b_error = vnx->vx_error; 1341 bp->b_flags |= B_ERROR; 1342 } 1343 pool_put(&vndxfer_pool, vnx); 1344 biodone(bp); 1345 } 1346 splx(s); 1347 } 1348 1349 /* sw_reg_start: start an I/O request on the requested swapdev. */ 1350 void 1351 sw_reg_start(struct swapdev *sdp) 1352 { 1353 struct buf *bp; 1354 1355 /* XXX: recursion control */ 1356 if ((sdp->swd_flags & SWF_BUSY) != 0) 1357 return; 1358 1359 sdp->swd_flags |= SWF_BUSY; 1360 1361 while (sdp->swd_active < sdp->swd_maxactive) { 1362 bp = bufq_dequeue(&sdp->swd_bufq); 1363 if (bp == NULL) 1364 break; 1365 1366 sdp->swd_active++; 1367 1368 if ((bp->b_flags & B_READ) == 0) 1369 bp->b_vp->v_numoutput++; 1370 1371 VOP_STRATEGY(bp->b_vp, bp); 1372 } 1373 sdp->swd_flags &= ~SWF_BUSY; 1374 } 1375 1376 /* 1377 * sw_reg_iodone: one of our i/o's has completed and needs post-i/o cleanup 1378 * 1379 * => note that we can recover the vndbuf struct by casting the buf ptr 1380 * 1381 * XXX: 1382 * We only put this onto a taskq here, because of the maxactive game since 1383 * it basically requires us to call back into VOP_STRATEGY() (where we must 1384 * be able to sleep) via sw_reg_start(). 1385 */ 1386 void 1387 sw_reg_iodone(struct buf *bp) 1388 { 1389 struct vndbuf *vbp = (struct vndbuf *)bp; 1390 task_add(systq, &vbp->vb_task); 1391 } 1392 1393 void 1394 sw_reg_iodone_internal(void *xvbp) 1395 { 1396 struct vndbuf *vbp = xvbp; 1397 struct vndxfer *vnx = vbp->vb_vnx; 1398 struct buf *pbp = vnx->vx_bp; /* parent buffer */ 1399 struct swapdev *sdp = vnx->vx_sdp; 1400 int resid, s; 1401 1402 s = splbio(); 1403 1404 resid = vbp->vb_buf.b_bcount - vbp->vb_buf.b_resid; 1405 pbp->b_resid -= resid; 1406 vnx->vx_pending--; 1407 1408 /* pass error upward */ 1409 if (vbp->vb_buf.b_error) 1410 vnx->vx_error = vbp->vb_buf.b_error; 1411 1412 /* disassociate this buffer from the vnode (if any). */ 1413 if (vbp->vb_buf.b_vp != NULL) { 1414 brelvp(&vbp->vb_buf); 1415 } 1416 1417 /* kill vbp structure */ 1418 pool_put(&vndbuf_pool, vbp); 1419 1420 /* 1421 * wrap up this transaction if it has run to completion or, in 1422 * case of an error, when all auxiliary buffers have returned. 1423 */ 1424 if (vnx->vx_error != 0) { 1425 /* pass error upward */ 1426 pbp->b_flags |= B_ERROR; 1427 pbp->b_error = vnx->vx_error; 1428 if ((vnx->vx_flags & VX_BUSY) == 0 && vnx->vx_pending == 0) { 1429 pool_put(&vndxfer_pool, vnx); 1430 biodone(pbp); 1431 } 1432 } else if (pbp->b_resid == 0) { 1433 KASSERT(vnx->vx_pending == 0); 1434 if ((vnx->vx_flags & VX_BUSY) == 0) { 1435 pool_put(&vndxfer_pool, vnx); 1436 biodone(pbp); 1437 } 1438 } 1439 1440 /* 1441 * done! start next swapdev I/O if one is pending 1442 */ 1443 sdp->swd_active--; 1444 sw_reg_start(sdp); 1445 splx(s); 1446 } 1447 1448 1449 /* 1450 * uvm_swap_alloc: allocate space on swap 1451 * 1452 * => allocation is done "round robin" down the priority list, as we 1453 * allocate in a priority we "rotate" the tail queue. 1454 * => space can be freed with uvm_swap_free 1455 * => we return the page slot number in /dev/drum (0 == invalid slot) 1456 * => we lock uvm_swap_data_lock 1457 * => XXXMRG: "LESSOK" INTERFACE NEEDED TO EXTENT SYSTEM 1458 */ 1459 int 1460 uvm_swap_alloc(int *nslots, boolean_t lessok) 1461 { 1462 struct swapdev *sdp; 1463 struct swappri *spp; 1464 1465 /* 1466 * no swap devices configured yet? definite failure. 1467 */ 1468 if (uvmexp.nswapdev < 1) 1469 return 0; 1470 1471 /* 1472 * lock data lock, convert slots into blocks, and enter loop 1473 */ 1474 KERNEL_ASSERT_LOCKED(); 1475 mtx_enter(&uvm_swap_data_lock); 1476 1477 ReTry: /* XXXMRG */ 1478 LIST_FOREACH(spp, &swap_priority, spi_swappri) { 1479 TAILQ_FOREACH(sdp, &spp->spi_swapdev, swd_next) { 1480 swblk_t result; 1481 1482 /* if it's not enabled, then we can't swap from it */ 1483 if ((sdp->swd_flags & SWF_ENABLE) == 0) 1484 continue; 1485 if (sdp->swd_npginuse + *nslots > sdp->swd_npages) 1486 continue; 1487 result = blist_alloc(sdp->swd_blist, *nslots); 1488 if (result == SWAPBLK_NONE) { 1489 continue; 1490 } 1491 KASSERT(result < sdp->swd_drumsize); 1492 1493 /* 1494 * successful allocation! now rotate the tailq. 1495 */ 1496 TAILQ_REMOVE(&spp->spi_swapdev, sdp, swd_next); 1497 TAILQ_INSERT_TAIL(&spp->spi_swapdev, sdp, swd_next); 1498 sdp->swd_npginuse += *nslots; 1499 uvmexp.swpginuse += *nslots; 1500 mtx_leave(&uvm_swap_data_lock); 1501 /* done! return drum slot number */ 1502 return result + sdp->swd_drumoffset; 1503 } 1504 } 1505 1506 /* XXXMRG: BEGIN HACK */ 1507 if (*nslots > 1 && lessok) { 1508 *nslots = 1; 1509 /* XXXMRG: ugh! blist should support this for us */ 1510 goto ReTry; 1511 } 1512 /* XXXMRG: END HACK */ 1513 1514 mtx_leave(&uvm_swap_data_lock); 1515 return 0; /* failed */ 1516 } 1517 1518 /* 1519 * uvm_swapisfilled: return true if the amount of free space in swap is 1520 * smaller than the size of a cluster. 1521 * 1522 * As long as some swap slots are being used by pages currently in memory, 1523 * it is possible to reuse them. Even if the swap space has been completly 1524 * filled we do not consider it full. 1525 */ 1526 int 1527 uvm_swapisfilled(void) 1528 { 1529 int result; 1530 1531 mtx_enter(&uvm_swap_data_lock); 1532 KASSERT(uvmexp.swpginuse <= uvmexp.swpages); 1533 result = (uvmexp.swpginuse + SWCLUSTPAGES) >= uvmexp.swpages; 1534 mtx_leave(&uvm_swap_data_lock); 1535 1536 return result; 1537 } 1538 1539 /* 1540 * uvm_swapisfull: return true if the amount of pages only in swap 1541 * accounts for more than 99% of the total swap space. 1542 * 1543 */ 1544 int 1545 uvm_swapisfull(void) 1546 { 1547 int result; 1548 1549 mtx_enter(&uvm_swap_data_lock); 1550 KASSERT(uvmexp.swpgonly <= uvmexp.swpages); 1551 result = (uvmexp.swpgonly >= (uvmexp.swpages * 99 / 100)); 1552 mtx_leave(&uvm_swap_data_lock); 1553 1554 return result; 1555 } 1556 1557 /* 1558 * uvm_swap_markbad: keep track of swap ranges where we've had i/o errors 1559 * 1560 * => we lock uvm_swap_data_lock 1561 */ 1562 void 1563 uvm_swap_markbad(int startslot, int nslots) 1564 { 1565 struct swapdev *sdp; 1566 1567 mtx_enter(&uvm_swap_data_lock); 1568 sdp = swapdrum_getsdp(startslot); 1569 if (sdp != NULL) { 1570 /* 1571 * we just keep track of how many pages have been marked bad 1572 * in this device, to make everything add up in swap_off(). 1573 * we assume here that the range of slots will all be within 1574 * one swap device. 1575 */ 1576 sdp->swd_npgbad += nslots; 1577 } 1578 mtx_leave(&uvm_swap_data_lock); 1579 } 1580 1581 /* 1582 * uvm_swap_free: free swap slots 1583 * 1584 * => this can be all or part of an allocation made by uvm_swap_alloc 1585 * => we lock uvm_swap_data_lock 1586 */ 1587 void 1588 uvm_swap_free(int startslot, int nslots) 1589 { 1590 struct swapdev *sdp; 1591 1592 /* 1593 * ignore attempts to free the "bad" slot. 1594 */ 1595 1596 if (startslot == SWSLOT_BAD) { 1597 return; 1598 } 1599 1600 /* 1601 * convert drum slot offset back to sdp, free the blocks 1602 * in the extent, and return. must hold pri lock to do 1603 * lookup and access the extent. 1604 */ 1605 KERNEL_LOCK(); 1606 mtx_enter(&uvm_swap_data_lock); 1607 sdp = swapdrum_getsdp(startslot); 1608 KASSERT(uvmexp.nswapdev >= 1); 1609 KASSERT(sdp != NULL); 1610 KASSERT(sdp->swd_npginuse >= nslots); 1611 blist_free(sdp->swd_blist, startslot - sdp->swd_drumoffset, nslots); 1612 sdp->swd_npginuse -= nslots; 1613 uvmexp.swpginuse -= nslots; 1614 mtx_leave(&uvm_swap_data_lock); 1615 1616 #ifdef UVM_SWAP_ENCRYPT 1617 { 1618 int i; 1619 if (swap_encrypt_initialized) { 1620 /* Dereference keys */ 1621 for (i = 0; i < nslots; i++) 1622 if (uvm_swap_needdecrypt(sdp, startslot + i)) { 1623 struct swap_key *key; 1624 1625 key = SWD_KEY(sdp, startslot + i); 1626 if (key->refcount != 0) 1627 SWAP_KEY_PUT(sdp, key); 1628 } 1629 1630 /* Mark range as not decrypt */ 1631 uvm_swap_markdecrypt(sdp, startslot, nslots, 0); 1632 } 1633 } 1634 #endif /* UVM_SWAP_ENCRYPT */ 1635 KERNEL_UNLOCK(); 1636 } 1637 1638 /* 1639 * uvm_swap_put: put any number of pages into a contig place on swap 1640 * 1641 * => can be sync or async 1642 */ 1643 int 1644 uvm_swap_put(int swslot, struct vm_page **ppsp, int npages, int flags) 1645 { 1646 int result; 1647 1648 result = uvm_swap_io(ppsp, swslot, npages, B_WRITE | 1649 ((flags & PGO_SYNCIO) ? 0 : B_ASYNC)); 1650 1651 return (result); 1652 } 1653 1654 /* 1655 * uvm_swap_get: get a single page from swap 1656 * 1657 * => usually a sync op (from fault) 1658 */ 1659 int 1660 uvm_swap_get(struct vm_page *page, int swslot, int flags) 1661 { 1662 int result; 1663 1664 atomic_inc_int(&uvmexp.nswget); 1665 KASSERT(flags & PGO_SYNCIO); 1666 if (swslot == SWSLOT_BAD) { 1667 return VM_PAGER_ERROR; 1668 } 1669 1670 KERNEL_LOCK(); 1671 result = uvm_swap_io(&page, swslot, 1, B_READ); 1672 KERNEL_UNLOCK(); 1673 1674 if (result == VM_PAGER_OK || result == VM_PAGER_PEND) { 1675 /* 1676 * this page is no longer only in swap. 1677 */ 1678 atomic_dec_int(&uvmexp.swpgonly); 1679 } 1680 return (result); 1681 } 1682 1683 /* 1684 * uvm_swap_io: do an i/o operation to swap 1685 */ 1686 1687 int 1688 uvm_swap_io(struct vm_page **pps, int startslot, int npages, int flags) 1689 { 1690 daddr_t startblk; 1691 struct buf *bp; 1692 vaddr_t kva; 1693 int result, s, mapinflags, pflag, bounce = 0, i; 1694 boolean_t write, async; 1695 vaddr_t bouncekva; 1696 struct vm_page *tpps[SWCLUSTPAGES]; 1697 int pdaemon = (curproc == uvm.pagedaemon_proc); 1698 #ifdef UVM_SWAP_ENCRYPT 1699 struct swapdev *sdp; 1700 int encrypt = 0; 1701 #endif 1702 1703 KERNEL_ASSERT_LOCKED(); 1704 1705 write = (flags & B_READ) == 0; 1706 async = (flags & B_ASYNC) != 0; 1707 1708 /* convert starting drum slot to block number */ 1709 startblk = btodb((u_int64_t)startslot << PAGE_SHIFT); 1710 1711 pflag = (async || pdaemon) ? PR_NOWAIT : PR_WAITOK; 1712 bp = pool_get(&bufpool, pflag | PR_ZERO); 1713 if (bp == NULL) 1714 return (VM_PAGER_AGAIN); 1715 1716 /* 1717 * map the pages into the kernel (XXX: currently required 1718 * by buffer system). 1719 */ 1720 mapinflags = !write ? UVMPAGER_MAPIN_READ : UVMPAGER_MAPIN_WRITE; 1721 if (!async) 1722 mapinflags |= UVMPAGER_MAPIN_WAITOK; 1723 kva = uvm_pagermapin(pps, npages, mapinflags); 1724 if (kva == 0) { 1725 pool_put(&bufpool, bp); 1726 return (VM_PAGER_AGAIN); 1727 } 1728 1729 #ifdef UVM_SWAP_ENCRYPT 1730 if (write) { 1731 /* 1732 * Check if we need to do swap encryption on old pages. 1733 * Later we need a different scheme, that swap encrypts 1734 * all pages of a process that had at least one page swap 1735 * encrypted. Then we might not need to copy all pages 1736 * in the cluster, and avoid the memory overheard in 1737 * swapping. 1738 */ 1739 if (uvm_doswapencrypt) 1740 encrypt = 1; 1741 } 1742 1743 if (swap_encrypt_initialized || encrypt) { 1744 /* 1745 * we need to know the swap device that we are swapping to/from 1746 * to see if the pages need to be marked for decryption or 1747 * actually need to be decrypted. 1748 * XXX - does this information stay the same over the whole 1749 * execution of this function? 1750 */ 1751 mtx_enter(&uvm_swap_data_lock); 1752 sdp = swapdrum_getsdp(startslot); 1753 mtx_leave(&uvm_swap_data_lock); 1754 } 1755 1756 /* 1757 * Check that we are dma capable for read (write always bounces 1758 * through the swapencrypt anyway... 1759 */ 1760 if (write && encrypt) { 1761 bounce = 1; /* bounce through swapencrypt always */ 1762 } else { 1763 #else 1764 { 1765 #endif 1766 1767 for (i = 0; i < npages; i++) { 1768 if (VM_PAGE_TO_PHYS(pps[i]) < dma_constraint.ucr_low || 1769 VM_PAGE_TO_PHYS(pps[i]) > dma_constraint.ucr_high) { 1770 bounce = 1; 1771 break; 1772 } 1773 } 1774 } 1775 1776 if (bounce) { 1777 int swmapflags, plaflags; 1778 1779 /* We always need write access. */ 1780 swmapflags = UVMPAGER_MAPIN_READ; 1781 plaflags = UVM_PLA_NOWAIT; 1782 if (!async) { 1783 swmapflags |= UVMPAGER_MAPIN_WAITOK; 1784 plaflags = UVM_PLA_WAITOK; 1785 } 1786 if (uvm_swap_allocpages(tpps, npages, plaflags)) { 1787 pool_put(&bufpool, bp); 1788 uvm_pagermapout(kva, npages); 1789 return (VM_PAGER_AGAIN); 1790 } 1791 1792 bouncekva = uvm_pagermapin(tpps, npages, swmapflags); 1793 if (bouncekva == 0) { 1794 pool_put(&bufpool, bp); 1795 uvm_pagermapout(kva, npages); 1796 uvm_swap_freepages(tpps, npages); 1797 return (VM_PAGER_AGAIN); 1798 } 1799 } 1800 1801 /* encrypt to swap */ 1802 if (write && bounce) { 1803 int i, opages; 1804 caddr_t src, dst; 1805 u_int64_t block; 1806 1807 src = (caddr_t) kva; 1808 dst = (caddr_t) bouncekva; 1809 block = startblk; 1810 for (i = 0; i < npages; i++) { 1811 #ifdef UVM_SWAP_ENCRYPT 1812 struct swap_key *key; 1813 1814 if (encrypt) { 1815 key = SWD_KEY(sdp, startslot + i); 1816 SWAP_KEY_GET(sdp, key); /* add reference */ 1817 1818 swap_encrypt(key, src, dst, block, PAGE_SIZE); 1819 block += btodb(PAGE_SIZE); 1820 } else { 1821 #else 1822 { 1823 #endif /* UVM_SWAP_ENCRYPT */ 1824 memcpy(dst, src, PAGE_SIZE); 1825 } 1826 /* this just tells async callbacks to free */ 1827 atomic_setbits_int(&tpps[i]->pg_flags, PQ_ENCRYPT); 1828 src += PAGE_SIZE; 1829 dst += PAGE_SIZE; 1830 } 1831 1832 uvm_pagermapout(kva, npages); 1833 1834 /* dispose of pages we dont use anymore */ 1835 opages = npages; 1836 uvm_pager_dropcluster(NULL, NULL, pps, &opages, 1837 PGO_PDFREECLUST); 1838 1839 kva = bouncekva; 1840 } 1841 1842 /* 1843 * prevent ASYNC reads. 1844 * uvm_swap_io is only called from uvm_swap_get, uvm_swap_get 1845 * assumes that all gets are SYNCIO. Just make sure here. 1846 * XXXARTUBC - might not be true anymore. 1847 */ 1848 if (!write) { 1849 flags &= ~B_ASYNC; 1850 async = 0; 1851 } 1852 1853 /* 1854 * fill in the bp. we currently route our i/o through 1855 * /dev/drum's vnode [swapdev_vp]. 1856 */ 1857 bp->b_flags = B_BUSY | B_NOCACHE | B_RAW | (flags & (B_READ|B_ASYNC)); 1858 bp->b_proc = &proc0; /* XXX */ 1859 bp->b_vnbufs.le_next = NOLIST; 1860 if (bounce) 1861 bp->b_data = (caddr_t)bouncekva; 1862 else 1863 bp->b_data = (caddr_t)kva; 1864 bp->b_bq = NULL; 1865 bp->b_blkno = startblk; 1866 LIST_INIT(&bp->b_dep); 1867 s = splbio(); 1868 bp->b_vp = NULL; 1869 buf_replacevnode(bp, swapdev_vp); 1870 splx(s); 1871 bp->b_bufsize = bp->b_bcount = (long)npages << PAGE_SHIFT; 1872 1873 /* 1874 * for pageouts we must set "dirtyoff" [NFS client code needs it]. 1875 * and we bump v_numoutput (counter of number of active outputs). 1876 */ 1877 if (write) { 1878 bp->b_dirtyoff = 0; 1879 bp->b_dirtyend = npages << PAGE_SHIFT; 1880 #ifdef UVM_SWAP_ENCRYPT 1881 /* mark the pages in the drum for decryption */ 1882 if (swap_encrypt_initialized) 1883 uvm_swap_markdecrypt(sdp, startslot, npages, encrypt); 1884 #endif 1885 s = splbio(); 1886 swapdev_vp->v_numoutput++; 1887 splx(s); 1888 } 1889 1890 /* for async ops we must set up the iodone handler. */ 1891 if (async) { 1892 bp->b_flags |= B_CALL | (pdaemon ? B_PDAEMON : 0); 1893 bp->b_iodone = uvm_aio_biodone; 1894 } 1895 1896 /* now we start the I/O, and if async, return. */ 1897 VOP_STRATEGY(bp->b_vp, bp); 1898 if (async) 1899 return (VM_PAGER_PEND); 1900 1901 /* must be sync i/o. wait for it to finish */ 1902 (void) biowait(bp); 1903 result = (bp->b_flags & B_ERROR) ? VM_PAGER_ERROR : VM_PAGER_OK; 1904 1905 /* decrypt swap */ 1906 if (!write && !(bp->b_flags & B_ERROR)) { 1907 int i; 1908 caddr_t data = (caddr_t)kva; 1909 caddr_t dst = (caddr_t)kva; 1910 u_int64_t block = startblk; 1911 1912 if (bounce) 1913 data = (caddr_t)bouncekva; 1914 1915 for (i = 0; i < npages; i++) { 1916 #ifdef UVM_SWAP_ENCRYPT 1917 struct swap_key *key; 1918 1919 /* Check if we need to decrypt */ 1920 if (swap_encrypt_initialized && 1921 uvm_swap_needdecrypt(sdp, startslot + i)) { 1922 key = SWD_KEY(sdp, startslot + i); 1923 if (key->refcount == 0) { 1924 result = VM_PAGER_ERROR; 1925 break; 1926 } 1927 swap_decrypt(key, data, dst, block, PAGE_SIZE); 1928 } else if (bounce) { 1929 #else 1930 if (bounce) { 1931 #endif 1932 memcpy(dst, data, PAGE_SIZE); 1933 } 1934 data += PAGE_SIZE; 1935 dst += PAGE_SIZE; 1936 block += btodb(PAGE_SIZE); 1937 } 1938 if (bounce) 1939 uvm_pagermapout(bouncekva, npages); 1940 } 1941 /* kill the pager mapping */ 1942 uvm_pagermapout(kva, npages); 1943 1944 /* Not anymore needed, free after encryption/bouncing */ 1945 if (!write && bounce) 1946 uvm_swap_freepages(tpps, npages); 1947 1948 /* now dispose of the buf */ 1949 s = splbio(); 1950 if (bp->b_vp) 1951 brelvp(bp); 1952 1953 if (write && bp->b_vp) 1954 vwakeup(bp->b_vp); 1955 pool_put(&bufpool, bp); 1956 splx(s); 1957 1958 /* finally return. */ 1959 return (result); 1960 } 1961 1962 void 1963 swapmount(void) 1964 { 1965 struct swapdev *sdp; 1966 struct swappri *spp; 1967 struct vnode *vp; 1968 dev_t swap_dev = swdevt[0].sw_dev; 1969 char *nam; 1970 char path[MNAMELEN + 1]; 1971 1972 if (swap_dev == NODEV) 1973 return; 1974 1975 rw_enter_write(&swap_syscall_lock); 1976 1977 #if defined(NFSCLIENT) 1978 if (swap_dev == NETDEV) { 1979 extern struct nfs_diskless nfs_diskless; 1980 1981 snprintf(path, sizeof(path), "%s", 1982 nfs_diskless.nd_swap.ndm_host); 1983 vp = nfs_diskless.sw_vp; 1984 goto gotit; 1985 } else 1986 #endif 1987 if (bdevvp(swap_dev, &vp)) { 1988 rw_exit_write(&swap_syscall_lock); 1989 return; 1990 } 1991 1992 /* Construct a potential path to swap */ 1993 if ((nam = findblkname(major(swap_dev)))) 1994 snprintf(path, sizeof(path), "/dev/%s%d%c", nam, 1995 DISKUNIT(swap_dev), 'a' + DISKPART(swap_dev)); 1996 else 1997 snprintf(path, sizeof(path), "blkdev0x%x", 1998 swap_dev); 1999 2000 #if defined(NFSCLIENT) 2001 gotit: 2002 #endif 2003 sdp = malloc(sizeof(*sdp), M_VMSWAP, M_WAITOK|M_ZERO); 2004 spp = malloc(sizeof(*spp), M_VMSWAP, M_WAITOK); 2005 2006 sdp->swd_flags = SWF_FAKE; 2007 sdp->swd_dev = swap_dev; 2008 2009 sdp->swd_pathlen = strlen(path) + 1; 2010 sdp->swd_path = malloc(sdp->swd_pathlen, M_VMSWAP, M_WAITOK | M_ZERO); 2011 strlcpy(sdp->swd_path, path, sdp->swd_pathlen); 2012 2013 sdp->swd_vp = vp; 2014 2015 mtx_enter(&uvm_swap_data_lock); 2016 swaplist_insert(sdp, spp, 0); 2017 mtx_leave(&uvm_swap_data_lock); 2018 2019 if (swap_on(curproc, sdp)) { 2020 mtx_enter(&uvm_swap_data_lock); 2021 swaplist_find(vp, 1); 2022 swaplist_trim(); 2023 vput(sdp->swd_vp); 2024 mtx_leave(&uvm_swap_data_lock); 2025 rw_exit_write(&swap_syscall_lock); 2026 free(sdp->swd_path, M_VMSWAP, sdp->swd_pathlen); 2027 free(sdp, M_VMSWAP, sizeof(*sdp)); 2028 return; 2029 } 2030 rw_exit_write(&swap_syscall_lock); 2031 } 2032 2033 #ifdef HIBERNATE 2034 int 2035 uvm_hibswap(dev_t dev, u_long *sp, u_long *ep) 2036 { 2037 struct swapdev *sdp, *swd = NULL; 2038 struct swappri *spp; 2039 2040 /* no swap devices configured yet? */ 2041 if (uvmexp.nswapdev < 1 || dev != swdevt[0].sw_dev) 2042 return (1); 2043 2044 LIST_FOREACH(spp, &swap_priority, spi_swappri) { 2045 TAILQ_FOREACH(sdp, &spp->spi_swapdev, swd_next) { 2046 if (sdp->swd_dev == dev) 2047 swd = sdp; 2048 } 2049 } 2050 2051 if (swd == NULL || (swd->swd_flags & SWF_ENABLE) == 0) 2052 return (1); 2053 2054 blist_gapfind(swd->swd_blist, sp, ep); 2055 2056 if (*ep - *sp == 0) 2057 /* no gap found */ 2058 return (1); 2059 2060 /* 2061 * blist_gapfind returns the gap as [sp,ep[ , 2062 * whereas [sp,ep] is expected from uvm_hibswap(). 2063 */ 2064 *ep -= 1; 2065 2066 return (0); 2067 } 2068 #endif /* HIBERNATE */ 2069 2070 #ifdef DDB 2071 void 2072 swap_print_all(int (*pr)(const char *, ...)) 2073 { 2074 struct swappri *spp; 2075 struct swapdev *sdp; 2076 2077 LIST_FOREACH(spp, &swap_priority, spi_swappri) { 2078 TAILQ_FOREACH(sdp, &spp->spi_swapdev, swd_next) { 2079 #ifdef HIBERNATE 2080 u_long bgap = 0, egap = 0; 2081 #endif 2082 2083 pr("swap %p path \"%s\" flags 0x%x\n", sdp, 2084 sdp->swd_path, sdp->swd_flags); 2085 2086 blist_print(sdp->swd_blist); 2087 2088 #ifdef HIBERNATE 2089 if (!uvm_hibswap(sdp->swd_dev, &bgap, &egap)) 2090 pr("hibernate gap: [0x%lx, 0x%lx] size=%lu\n", 2091 bgap, egap, (egap - bgap + 1)); 2092 else 2093 pr("hibernate gap: not found\n"); 2094 #endif 2095 } 2096 } 2097 } 2098 #endif /* DDB */ 2099