1 /* $OpenBSD: uvm_pdaemon.c,v 1.114 2024/05/01 12:54:27 mpi Exp $ */
2 /* $NetBSD: uvm_pdaemon.c,v 1.23 2000/08/20 10:24:14 bjh21 Exp $ */
3
4 /*
5 * Copyright (c) 1997 Charles D. Cranor and Washington University.
6 * Copyright (c) 1991, 1993, The Regents of the University of California.
7 *
8 * All rights reserved.
9 *
10 * This code is derived from software contributed to Berkeley by
11 * The Mach Operating System project at Carnegie-Mellon University.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 * 3. Neither the name of the University nor the names of its contributors
22 * may be used to endorse or promote products derived from this software
23 * without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 * @(#)vm_pageout.c 8.5 (Berkeley) 2/14/94
38 * from: Id: uvm_pdaemon.c,v 1.1.2.32 1998/02/06 05:26:30 chs Exp
39 *
40 *
41 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
42 * All rights reserved.
43 *
44 * Permission to use, copy, modify and distribute this software and
45 * its documentation is hereby granted, provided that both the copyright
46 * notice and this permission notice appear in all copies of the
47 * software, derivative works or modified versions, and any portions
48 * thereof, and that both notices appear in supporting documentation.
49 *
50 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
51 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
52 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
53 *
54 * Carnegie Mellon requests users of this software to return to
55 *
56 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
57 * School of Computer Science
58 * Carnegie Mellon University
59 * Pittsburgh PA 15213-3890
60 *
61 * any improvements or extensions that they make and grant Carnegie the
62 * rights to redistribute these changes.
63 */
64
65 /*
66 * uvm_pdaemon.c: the page daemon
67 */
68
69 #include <sys/param.h>
70 #include <sys/systm.h>
71 #include <sys/kernel.h>
72 #include <sys/pool.h>
73 #include <sys/proc.h>
74 #include <sys/buf.h>
75 #include <sys/mount.h>
76 #include <sys/atomic.h>
77
78 #ifdef HIBERNATE
79 #include <sys/hibernate.h>
80 #endif
81
82 #include <uvm/uvm.h>
83
84 #include "drm.h"
85
86 #if NDRM > 0
87 extern void drmbackoff(long);
88 #endif
89
90 /*
91 * UVMPD_NUMDIRTYREACTS is how many dirty pages the pagedaemon will reactivate
92 * in a pass thru the inactive list when swap is full. the value should be
93 * "small"... if it's too large we'll cycle the active pages thru the inactive
94 * queue too quickly to for them to be referenced and avoid being freed.
95 */
96
97 #define UVMPD_NUMDIRTYREACTS 16
98
99
100 /*
101 * local prototypes
102 */
103
104 struct rwlock *uvmpd_trylockowner(struct vm_page *);
105 void uvmpd_scan(struct uvm_pmalloc *, struct uvm_constraint_range *);
106 void uvmpd_scan_inactive(struct uvm_pmalloc *,
107 struct uvm_constraint_range *, struct pglist *);
108 void uvmpd_tune(void);
109 void uvmpd_drop(struct pglist *);
110 int uvmpd_dropswap(struct vm_page *);
111
112 /*
113 * uvm_wait: wait (sleep) for the page daemon to free some pages
114 *
115 * => should be called with all locks released
116 * => should _not_ be called by the page daemon (to avoid deadlock)
117 */
118
119 void
uvm_wait(const char * wmsg)120 uvm_wait(const char *wmsg)
121 {
122 uint64_t timo = INFSLP;
123
124 #ifdef DIAGNOSTIC
125 if (curproc == &proc0)
126 panic("%s: cannot sleep for memory during boot", __func__);
127 #endif
128
129 /*
130 * check for page daemon going to sleep (waiting for itself)
131 */
132 if (curproc == uvm.pagedaemon_proc) {
133 printf("uvm_wait emergency bufbackoff\n");
134 if (bufbackoff(NULL, 4) == 0)
135 return;
136 /*
137 * now we have a problem: the pagedaemon wants to go to
138 * sleep until it frees more memory. but how can it
139 * free more memory if it is asleep? that is a deadlock.
140 * we have two options:
141 * [1] panic now
142 * [2] put a timeout on the sleep, thus causing the
143 * pagedaemon to only pause (rather than sleep forever)
144 *
145 * note that option [2] will only help us if we get lucky
146 * and some other process on the system breaks the deadlock
147 * by exiting or freeing memory (thus allowing the pagedaemon
148 * to continue). for now we panic if DEBUG is defined,
149 * otherwise we hope for the best with option [2] (better
150 * yet, this should never happen in the first place!).
151 */
152
153 printf("pagedaemon: deadlock detected!\n");
154 timo = MSEC_TO_NSEC(125); /* set timeout */
155 #if defined(DEBUG)
156 /* DEBUG: panic so we can debug it */
157 panic("pagedaemon deadlock");
158 #endif
159 }
160
161 uvm_lock_fpageq();
162 wakeup(&uvm.pagedaemon); /* wake the daemon! */
163 msleep_nsec(&uvmexp.free, &uvm.fpageqlock, PVM | PNORELOCK, wmsg, timo);
164 }
165
166 /*
167 * uvmpd_tune: tune paging parameters
168 */
169 void
uvmpd_tune(void)170 uvmpd_tune(void)
171 {
172 int val;
173
174 val = uvmexp.npages / 30;
175
176 /* XXX: what are these values good for? */
177 val = max(val, (16*1024) >> PAGE_SHIFT);
178
179 /* Make sure there's always a user page free. */
180 if (val < uvmexp.reserve_kernel + 1)
181 val = uvmexp.reserve_kernel + 1;
182 uvmexp.freemin = val;
183
184 /* Calculate free target. */
185 val = (uvmexp.freemin * 4) / 3;
186 if (val <= uvmexp.freemin)
187 val = uvmexp.freemin + 1;
188 uvmexp.freetarg = val;
189
190 uvmexp.wiredmax = uvmexp.npages / 3;
191 }
192
193 /*
194 * Indicate to the page daemon that a nowait call failed and it should
195 * recover at least some memory in the most restricted region (assumed
196 * to be dma_constraint).
197 */
198 volatile int uvm_nowait_failed;
199
200 /*
201 * uvm_pageout: the main loop for the pagedaemon
202 */
203 void
uvm_pageout(void * arg)204 uvm_pageout(void *arg)
205 {
206 struct uvm_constraint_range constraint;
207 struct uvm_pmalloc *pma;
208 int free;
209
210 /* ensure correct priority and set paging parameters... */
211 uvm.pagedaemon_proc = curproc;
212 (void) spl0();
213 uvmpd_tune();
214
215 for (;;) {
216 long size;
217
218 uvm_lock_fpageq();
219 if (!uvm_nowait_failed && TAILQ_EMPTY(&uvm.pmr_control.allocs)) {
220 msleep_nsec(&uvm.pagedaemon, &uvm.fpageqlock, PVM,
221 "pgdaemon", INFSLP);
222 uvmexp.pdwoke++;
223 }
224
225 if ((pma = TAILQ_FIRST(&uvm.pmr_control.allocs)) != NULL) {
226 pma->pm_flags |= UVM_PMA_BUSY;
227 constraint = pma->pm_constraint;
228 } else {
229 if (uvm_nowait_failed) {
230 /*
231 * XXX realistically, this is what our
232 * nowait callers probably care about
233 */
234 constraint = dma_constraint;
235 uvm_nowait_failed = 0;
236 } else
237 constraint = no_constraint;
238 }
239 free = uvmexp.free - BUFPAGES_DEFICIT;
240 uvm_unlock_fpageq();
241
242 /*
243 * now lock page queues and recompute inactive count
244 */
245 uvm_lock_pageq();
246 uvmexp.inactarg = (uvmexp.active + uvmexp.inactive) / 3;
247 if (uvmexp.inactarg <= uvmexp.freetarg) {
248 uvmexp.inactarg = uvmexp.freetarg + 1;
249 }
250 uvm_unlock_pageq();
251
252 /* Reclaim pages from the buffer cache if possible. */
253 size = 0;
254 if (pma != NULL)
255 size += pma->pm_size >> PAGE_SHIFT;
256 if (free < uvmexp.freetarg)
257 size += uvmexp.freetarg - free;
258 if (size == 0)
259 size = 16; /* XXX */
260
261 (void) bufbackoff(&constraint, size * 2);
262 #if NDRM > 0
263 drmbackoff(size * 2);
264 #endif
265 uvm_pmr_cache_drain();
266
267 /*
268 * scan if needed
269 */
270 uvm_lock_pageq();
271 free = uvmexp.free - BUFPAGES_DEFICIT;
272 if (pma != NULL || (free < uvmexp.freetarg) ||
273 ((uvmexp.inactive + BUFPAGES_INACT) < uvmexp.inactarg)) {
274 uvmpd_scan(pma, &constraint);
275 }
276
277 /*
278 * if there's any free memory to be had,
279 * wake up any waiters.
280 */
281 uvm_lock_fpageq();
282 if (uvmexp.free > uvmexp.reserve_kernel ||
283 uvmexp.paging == 0) {
284 wakeup(&uvmexp.free);
285 }
286
287 if (pma != NULL) {
288 /*
289 * XXX If UVM_PMA_FREED isn't set, no pages
290 * were freed. Should we set UVM_PMA_FAIL in
291 * that case?
292 */
293 pma->pm_flags &= ~UVM_PMA_BUSY;
294 if (pma->pm_flags & UVM_PMA_FREED) {
295 pma->pm_flags &= ~UVM_PMA_LINKED;
296 TAILQ_REMOVE(&uvm.pmr_control.allocs, pma,
297 pmq);
298 wakeup(pma);
299 }
300 }
301 uvm_unlock_fpageq();
302
303 /*
304 * scan done. unlock page queues (the only lock we are holding)
305 */
306 uvm_unlock_pageq();
307
308 sched_pause(yield);
309 }
310 /*NOTREACHED*/
311 }
312
313
314 /*
315 * uvm_aiodone_daemon: main loop for the aiodone daemon.
316 */
317 void
uvm_aiodone_daemon(void * arg)318 uvm_aiodone_daemon(void *arg)
319 {
320 int s, free;
321 struct buf *bp, *nbp;
322
323 uvm.aiodoned_proc = curproc;
324
325 for (;;) {
326 /*
327 * Check for done aio structures. If we've got structures to
328 * process, do so. Otherwise sleep while avoiding races.
329 */
330 mtx_enter(&uvm.aiodoned_lock);
331 while ((bp = TAILQ_FIRST(&uvm.aio_done)) == NULL)
332 msleep_nsec(&uvm.aiodoned, &uvm.aiodoned_lock,
333 PVM, "aiodoned", INFSLP);
334 /* Take the list for ourselves. */
335 TAILQ_INIT(&uvm.aio_done);
336 mtx_leave(&uvm.aiodoned_lock);
337
338 /* process each i/o that's done. */
339 free = uvmexp.free;
340 while (bp != NULL) {
341 if (bp->b_flags & B_PDAEMON) {
342 uvmexp.paging -= bp->b_bufsize >> PAGE_SHIFT;
343 }
344 nbp = TAILQ_NEXT(bp, b_freelist);
345 s = splbio(); /* b_iodone must by called at splbio */
346 (*bp->b_iodone)(bp);
347 splx(s);
348 bp = nbp;
349
350 sched_pause(yield);
351 }
352 uvm_lock_fpageq();
353 wakeup(free <= uvmexp.reserve_kernel ? &uvm.pagedaemon :
354 &uvmexp.free);
355 uvm_unlock_fpageq();
356 }
357 }
358
359 /*
360 * uvmpd_trylockowner: trylock the page's owner.
361 *
362 * => return the locked rwlock on success. otherwise, return NULL.
363 */
364 struct rwlock *
uvmpd_trylockowner(struct vm_page * pg)365 uvmpd_trylockowner(struct vm_page *pg)
366 {
367
368 struct uvm_object *uobj = pg->uobject;
369 struct rwlock *slock;
370
371 if (uobj != NULL) {
372 slock = uobj->vmobjlock;
373 } else {
374 struct vm_anon *anon = pg->uanon;
375
376 KASSERT(anon != NULL);
377 slock = anon->an_lock;
378 }
379
380 if (rw_enter(slock, RW_WRITE|RW_NOSLEEP)) {
381 return NULL;
382 }
383
384 return slock;
385 }
386
387 /*
388 * uvmpd_dropswap: free any swap allocated to this page.
389 *
390 * => called with owner locked.
391 * => return 1 if a page had an associated slot.
392 */
393 int
uvmpd_dropswap(struct vm_page * pg)394 uvmpd_dropswap(struct vm_page *pg)
395 {
396 struct vm_anon *anon = pg->uanon;
397 int slot, result = 0;
398
399 if ((pg->pg_flags & PQ_ANON) && anon->an_swslot) {
400 uvm_swap_free(anon->an_swslot, 1);
401 anon->an_swslot = 0;
402 result = 1;
403 } else if (pg->pg_flags & PQ_AOBJ) {
404 slot = uao_dropswap(pg->uobject, pg->offset >> PAGE_SHIFT);
405 if (slot)
406 result = 1;
407 }
408
409 return result;
410 }
411
412 /*
413 * uvmpd_scan_inactive: scan an inactive list for pages to clean or free.
414 *
415 * => called with page queues locked
416 * => we work on meeting our free target by converting inactive pages
417 * into free pages.
418 * => we handle the building of swap-backed clusters
419 * => we return TRUE if we are exiting because we met our target
420 */
421 void
uvmpd_scan_inactive(struct uvm_pmalloc * pma,struct uvm_constraint_range * constraint,struct pglist * pglst)422 uvmpd_scan_inactive(struct uvm_pmalloc *pma,
423 struct uvm_constraint_range *constraint, struct pglist *pglst)
424 {
425 int free, result;
426 struct vm_page *p, *nextpg;
427 struct uvm_object *uobj;
428 struct vm_page *pps[SWCLUSTPAGES], **ppsp;
429 int npages;
430 struct vm_page *swpps[SWCLUSTPAGES]; /* XXX: see below */
431 struct rwlock *slock;
432 int swnpages, swcpages; /* XXX: see below */
433 int swslot;
434 struct vm_anon *anon;
435 boolean_t swap_backed;
436 vaddr_t start;
437 int dirtyreacts;
438 paddr_t paddr;
439
440 /*
441 * swslot is non-zero if we are building a swap cluster. we want
442 * to stay in the loop while we have a page to scan or we have
443 * a swap-cluster to build.
444 */
445 swslot = 0;
446 swnpages = swcpages = 0;
447 dirtyreacts = 0;
448 p = NULL;
449
450 /* Start with the first page on the list that fit in `constraint' */
451 TAILQ_FOREACH(p, pglst, pageq) {
452 paddr = atop(VM_PAGE_TO_PHYS(p));
453 if (paddr >= constraint->ucr_low &&
454 paddr < constraint->ucr_high)
455 break;
456 }
457
458 for (; p != NULL || swslot != 0; p = nextpg) {
459 /*
460 * note that p can be NULL iff we have traversed the whole
461 * list and need to do one final swap-backed clustered pageout.
462 */
463 uobj = NULL;
464 anon = NULL;
465 if (p) {
466 /*
467 * see if we've met our target
468 */
469 free = uvmexp.free - BUFPAGES_DEFICIT;
470 if (((pma == NULL || (pma->pm_flags & UVM_PMA_FREED)) &&
471 (free + uvmexp.paging >= uvmexp.freetarg << 2)) ||
472 dirtyreacts == UVMPD_NUMDIRTYREACTS) {
473 if (swslot == 0) {
474 /* exit now if no swap-i/o pending */
475 break;
476 }
477
478 /* set p to null to signal final swap i/o */
479 p = NULL;
480 nextpg = NULL;
481 }
482 }
483 if (p) { /* if (we have a new page to consider) */
484 /*
485 * we are below target and have a new page to consider.
486 */
487 uvmexp.pdscans++;
488 nextpg = TAILQ_NEXT(p, pageq);
489
490 anon = p->uanon;
491 uobj = p->uobject;
492
493 /*
494 * first we attempt to lock the object that this page
495 * belongs to. if our attempt fails we skip on to
496 * the next page (no harm done). it is important to
497 * "try" locking the object as we are locking in the
498 * wrong order (pageq -> object) and we don't want to
499 * deadlock.
500 */
501 slock = uvmpd_trylockowner(p);
502 if (slock == NULL) {
503 continue;
504 }
505
506 /*
507 * move referenced pages back to active queue
508 * and skip to next page.
509 */
510 if (pmap_is_referenced(p)) {
511 uvm_pageactivate(p);
512 rw_exit(slock);
513 uvmexp.pdreact++;
514 continue;
515 }
516
517 if (p->pg_flags & PG_BUSY) {
518 rw_exit(slock);
519 uvmexp.pdbusy++;
520 continue;
521 }
522
523 /* does the page belong to an object? */
524 if (uobj != NULL) {
525 uvmexp.pdobscan++;
526 } else {
527 KASSERT(anon != NULL);
528 uvmexp.pdanscan++;
529 }
530
531 /*
532 * we now have the page queues locked.
533 * the page is not busy. if the page is clean we
534 * can free it now and continue.
535 */
536 if (p->pg_flags & PG_CLEAN) {
537 if (p->pg_flags & PQ_SWAPBACKED) {
538 /* this page now lives only in swap */
539 atomic_inc_int(&uvmexp.swpgonly);
540 }
541
542 /* zap all mappings with pmap_page_protect... */
543 pmap_page_protect(p, PROT_NONE);
544 uvm_pagefree(p);
545 uvmexp.pdfreed++;
546
547 if (anon) {
548
549 /*
550 * an anonymous page can only be clean
551 * if it has backing store assigned.
552 */
553
554 KASSERT(anon->an_swslot != 0);
555
556 /* remove from object */
557 anon->an_page = NULL;
558 }
559 rw_exit(slock);
560 continue;
561 }
562
563 /*
564 * this page is dirty, skip it if we'll have met our
565 * free target when all the current pageouts complete.
566 */
567 if ((pma == NULL || (pma->pm_flags & UVM_PMA_FREED)) &&
568 (free + uvmexp.paging > uvmexp.freetarg << 2)) {
569 rw_exit(slock);
570 continue;
571 }
572
573 /*
574 * this page is dirty, but we can't page it out
575 * since all pages in swap are only in swap.
576 * reactivate it so that we eventually cycle
577 * all pages thru the inactive queue.
578 */
579 if ((p->pg_flags & PQ_SWAPBACKED) && uvm_swapisfull()) {
580 dirtyreacts++;
581 uvm_pageactivate(p);
582 rw_exit(slock);
583 continue;
584 }
585
586 /*
587 * if the page is swap-backed and dirty and swap space
588 * is full, free any swap allocated to the page
589 * so that other pages can be paged out.
590 */
591 if ((p->pg_flags & PQ_SWAPBACKED) && uvm_swapisfilled())
592 uvmpd_dropswap(p);
593
594 /*
595 * the page we are looking at is dirty. we must
596 * clean it before it can be freed. to do this we
597 * first mark the page busy so that no one else will
598 * touch the page. we write protect all the mappings
599 * of the page so that no one touches it while it is
600 * in I/O.
601 */
602
603 swap_backed = ((p->pg_flags & PQ_SWAPBACKED) != 0);
604 atomic_setbits_int(&p->pg_flags, PG_BUSY);
605 UVM_PAGE_OWN(p, "scan_inactive");
606 pmap_page_protect(p, PROT_READ);
607 uvmexp.pgswapout++;
608
609 /*
610 * for swap-backed pages we need to (re)allocate
611 * swap space.
612 */
613 if (swap_backed) {
614 /* free old swap slot (if any) */
615 uvmpd_dropswap(p);
616
617 /* start new cluster (if necessary) */
618 if (swslot == 0) {
619 swnpages = SWCLUSTPAGES;
620 swslot = uvm_swap_alloc(&swnpages,
621 TRUE);
622 if (swslot == 0) {
623 /* no swap? give up! */
624 atomic_clearbits_int(
625 &p->pg_flags,
626 PG_BUSY);
627 UVM_PAGE_OWN(p, NULL);
628 rw_exit(slock);
629 continue;
630 }
631 swcpages = 0; /* cluster is empty */
632 }
633
634 /* add block to cluster */
635 swpps[swcpages] = p;
636 if (anon)
637 anon->an_swslot = swslot + swcpages;
638 else
639 uao_set_swslot(uobj,
640 p->offset >> PAGE_SHIFT,
641 swslot + swcpages);
642 swcpages++;
643 rw_exit(slock);
644
645 /* cluster not full yet? */
646 if (swcpages < swnpages)
647 continue;
648 }
649 } else {
650 /* if p == NULL we must be doing a last swap i/o */
651 swap_backed = TRUE;
652 }
653
654 /*
655 * now consider doing the pageout.
656 *
657 * for swap-backed pages, we do the pageout if we have either
658 * filled the cluster (in which case (swnpages == swcpages) or
659 * run out of pages (p == NULL).
660 *
661 * for object pages, we always do the pageout.
662 */
663 if (swap_backed) {
664 /* starting I/O now... set up for it */
665 npages = swcpages;
666 ppsp = swpps;
667 /* for swap-backed pages only */
668 start = (vaddr_t) swslot;
669
670 /* if this is final pageout we could have a few
671 * extra swap blocks */
672 if (swcpages < swnpages) {
673 uvm_swap_free(swslot + swcpages,
674 (swnpages - swcpages));
675 }
676 } else {
677 /* normal object pageout */
678 ppsp = pps;
679 npages = sizeof(pps) / sizeof(struct vm_page *);
680 /* not looked at because PGO_ALLPAGES is set */
681 start = 0;
682 }
683
684 /*
685 * now do the pageout.
686 *
687 * for swap_backed pages we have already built the cluster.
688 * for !swap_backed pages, uvm_pager_put will call the object's
689 * "make put cluster" function to build a cluster on our behalf.
690 *
691 * we pass the PGO_PDFREECLUST flag to uvm_pager_put to instruct
692 * it to free the cluster pages for us on a successful I/O (it
693 * always does this for un-successful I/O requests). this
694 * allows us to do clustered pageout without having to deal
695 * with cluster pages at this level.
696 *
697 * note locking semantics of uvm_pager_put with PGO_PDFREECLUST:
698 * IN: locked: page queues
699 * OUT: locked:
700 * !locked: pageqs
701 */
702
703 uvmexp.pdpageouts++;
704 result = uvm_pager_put(swap_backed ? NULL : uobj, p,
705 &ppsp, &npages, PGO_ALLPAGES|PGO_PDFREECLUST, start, 0);
706
707 /*
708 * if we did i/o to swap, zero swslot to indicate that we are
709 * no longer building a swap-backed cluster.
710 */
711
712 if (swap_backed)
713 swslot = 0; /* done with this cluster */
714
715 /*
716 * first, we check for VM_PAGER_PEND which means that the
717 * async I/O is in progress and the async I/O done routine
718 * will clean up after us. in this case we move on to the
719 * next page.
720 *
721 * there is a very remote chance that the pending async i/o can
722 * finish _before_ we get here. if that happens, our page "p"
723 * may no longer be on the inactive queue. so we verify this
724 * when determining the next page (starting over at the head if
725 * we've lost our inactive page).
726 */
727
728 if (result == VM_PAGER_PEND) {
729 uvmexp.paging += npages;
730 uvm_lock_pageq();
731 uvmexp.pdpending++;
732 if (p) {
733 if (p->pg_flags & PQ_INACTIVE)
734 nextpg = TAILQ_NEXT(p, pageq);
735 else
736 nextpg = TAILQ_FIRST(pglst);
737 } else {
738 nextpg = NULL;
739 }
740 continue;
741 }
742
743 /* clean up "p" if we have one */
744 if (p) {
745 /*
746 * the I/O request to "p" is done and uvm_pager_put
747 * has freed any cluster pages it may have allocated
748 * during I/O. all that is left for us to do is
749 * clean up page "p" (which is still PG_BUSY).
750 *
751 * our result could be one of the following:
752 * VM_PAGER_OK: successful pageout
753 *
754 * VM_PAGER_AGAIN: tmp resource shortage, we skip
755 * to next page
756 * VM_PAGER_{FAIL,ERROR,BAD}: an error. we
757 * "reactivate" page to get it out of the way (it
758 * will eventually drift back into the inactive
759 * queue for a retry).
760 * VM_PAGER_UNLOCK: should never see this as it is
761 * only valid for "get" operations
762 */
763
764 /* relock p's object: page queues not lock yet, so
765 * no need for "try" */
766
767 /* !swap_backed case: already locked... */
768 if (swap_backed) {
769 rw_enter(slock, RW_WRITE);
770 }
771
772 #ifdef DIAGNOSTIC
773 if (result == VM_PAGER_UNLOCK)
774 panic("pagedaemon: pageout returned "
775 "invalid 'unlock' code");
776 #endif
777
778 /* handle PG_WANTED now */
779 if (p->pg_flags & PG_WANTED)
780 wakeup(p);
781
782 atomic_clearbits_int(&p->pg_flags, PG_BUSY|PG_WANTED);
783 UVM_PAGE_OWN(p, NULL);
784
785 /* released during I/O? Can only happen for anons */
786 if (p->pg_flags & PG_RELEASED) {
787 KASSERT(anon != NULL);
788 /*
789 * remove page so we can get nextpg,
790 * also zero out anon so we don't use
791 * it after the free.
792 */
793 anon->an_page = NULL;
794 p->uanon = NULL;
795
796 rw_exit(anon->an_lock);
797 uvm_anfree(anon); /* kills anon */
798 pmap_page_protect(p, PROT_NONE);
799 anon = NULL;
800 uvm_lock_pageq();
801 nextpg = TAILQ_NEXT(p, pageq);
802 /* free released page */
803 uvm_pagefree(p);
804 } else { /* page was not released during I/O */
805 uvm_lock_pageq();
806 nextpg = TAILQ_NEXT(p, pageq);
807 if (result != VM_PAGER_OK) {
808 /* pageout was a failure... */
809 if (result != VM_PAGER_AGAIN)
810 uvm_pageactivate(p);
811 pmap_clear_reference(p);
812 /* XXXCDC: if (swap_backed) FREE p's
813 * swap block? */
814 } else {
815 /* pageout was a success... */
816 pmap_clear_reference(p);
817 pmap_clear_modify(p);
818 atomic_setbits_int(&p->pg_flags,
819 PG_CLEAN);
820 }
821 }
822
823 /*
824 * drop object lock (if there is an object left). do
825 * a safety check of nextpg to make sure it is on the
826 * inactive queue (it should be since PG_BUSY pages on
827 * the inactive queue can't be re-queued [note: not
828 * true for active queue]).
829 */
830 rw_exit(slock);
831
832 if (nextpg && (nextpg->pg_flags & PQ_INACTIVE) == 0) {
833 nextpg = TAILQ_FIRST(pglst); /* reload! */
834 }
835 } else {
836 /*
837 * if p is null in this loop, make sure it stays null
838 * in the next loop.
839 */
840 nextpg = NULL;
841
842 /*
843 * lock page queues here just so they're always locked
844 * at the end of the loop.
845 */
846 uvm_lock_pageq();
847 }
848 }
849 }
850
851 /*
852 * uvmpd_scan: scan the page queues and attempt to meet our targets.
853 *
854 * => called with pageq's locked
855 */
856
857 void
uvmpd_scan(struct uvm_pmalloc * pma,struct uvm_constraint_range * constraint)858 uvmpd_scan(struct uvm_pmalloc *pma, struct uvm_constraint_range *constraint)
859 {
860 int free, inactive_shortage, swap_shortage, pages_freed;
861 struct vm_page *p, *nextpg;
862 struct rwlock *slock;
863 paddr_t paddr;
864
865 MUTEX_ASSERT_LOCKED(&uvm.pageqlock);
866
867 uvmexp.pdrevs++; /* counter */
868
869 /*
870 * get current "free" page count
871 */
872 free = uvmexp.free - BUFPAGES_DEFICIT;
873
874 #ifdef __HAVE_PMAP_COLLECT
875 /*
876 * swap out some processes if we are below our free target.
877 * we need to unlock the page queues for this.
878 */
879 if (free < uvmexp.freetarg) {
880 uvmexp.pdswout++;
881 uvm_unlock_pageq();
882 uvm_swapout_threads();
883 uvm_lock_pageq();
884 }
885 #endif
886
887 /*
888 * now we want to work on meeting our targets. first we work on our
889 * free target by converting inactive pages into free pages. then
890 * we work on meeting our inactive target by converting active pages
891 * to inactive ones.
892 */
893
894 pages_freed = uvmexp.pdfreed;
895 (void) uvmpd_scan_inactive(pma, constraint, &uvm.page_inactive);
896 pages_freed = uvmexp.pdfreed - pages_freed;
897
898 /*
899 * we have done the scan to get free pages. now we work on meeting
900 * our inactive target.
901 */
902 inactive_shortage = uvmexp.inactarg - uvmexp.inactive - BUFPAGES_INACT;
903
904 /*
905 * detect if we're not going to be able to page anything out
906 * until we free some swap resources from active pages.
907 */
908 free = uvmexp.free - BUFPAGES_DEFICIT;
909 swap_shortage = 0;
910 if (free < uvmexp.freetarg && uvm_swapisfilled() && !uvm_swapisfull() &&
911 pages_freed == 0) {
912 swap_shortage = uvmexp.freetarg - free;
913 }
914
915 for (p = TAILQ_FIRST(&uvm.page_active);
916 p != NULL && (inactive_shortage > 0 || swap_shortage > 0);
917 p = nextpg) {
918 nextpg = TAILQ_NEXT(p, pageq);
919 if (p->pg_flags & PG_BUSY) {
920 continue;
921 }
922
923 /*
924 * skip this page if it doesn't match the constraint.
925 */
926 paddr = atop(VM_PAGE_TO_PHYS(p));
927 if (paddr < constraint->ucr_low &&
928 paddr >= constraint->ucr_high)
929 continue;
930
931 /*
932 * lock the page's owner.
933 */
934 slock = uvmpd_trylockowner(p);
935 if (slock == NULL) {
936 continue;
937 }
938
939 /*
940 * skip this page if it's busy.
941 */
942 if ((p->pg_flags & PG_BUSY) != 0) {
943 rw_exit(slock);
944 continue;
945 }
946
947 /*
948 * if there's a shortage of swap, free any swap allocated
949 * to this page so that other pages can be paged out.
950 */
951 if (swap_shortage > 0) {
952 if (uvmpd_dropswap(p)) {
953 atomic_clearbits_int(&p->pg_flags, PG_CLEAN);
954 swap_shortage--;
955 }
956 }
957
958 /*
959 * deactivate this page if there's a shortage of
960 * inactive pages.
961 */
962 if (inactive_shortage > 0) {
963 pmap_page_protect(p, PROT_NONE);
964 /* no need to check wire_count as pg is "active" */
965 uvm_pagedeactivate(p);
966 uvmexp.pddeact++;
967 inactive_shortage--;
968 }
969
970 /*
971 * we're done with this page.
972 */
973 rw_exit(slock);
974 }
975 }
976
977 #ifdef HIBERNATE
978
979 /*
980 * uvmpd_drop: drop clean pages from list
981 */
982 void
uvmpd_drop(struct pglist * pglst)983 uvmpd_drop(struct pglist *pglst)
984 {
985 struct vm_page *p, *nextpg;
986
987 for (p = TAILQ_FIRST(pglst); p != NULL; p = nextpg) {
988 nextpg = TAILQ_NEXT(p, pageq);
989
990 if (p->pg_flags & PQ_ANON || p->uobject == NULL)
991 continue;
992
993 if (p->pg_flags & PG_BUSY)
994 continue;
995
996 if (p->pg_flags & PG_CLEAN) {
997 struct uvm_object * uobj = p->uobject;
998
999 rw_enter(uobj->vmobjlock, RW_WRITE);
1000 uvm_lock_pageq();
1001 /*
1002 * we now have the page queues locked.
1003 * the page is not busy. if the page is clean we
1004 * can free it now and continue.
1005 */
1006 if (p->pg_flags & PG_CLEAN) {
1007 if (p->pg_flags & PQ_SWAPBACKED) {
1008 /* this page now lives only in swap */
1009 atomic_inc_int(&uvmexp.swpgonly);
1010 }
1011
1012 /* zap all mappings with pmap_page_protect... */
1013 pmap_page_protect(p, PROT_NONE);
1014 uvm_pagefree(p);
1015 }
1016 uvm_unlock_pageq();
1017 rw_exit(uobj->vmobjlock);
1018 }
1019 }
1020 }
1021
1022 void
uvmpd_hibernate(void)1023 uvmpd_hibernate(void)
1024 {
1025 uvmpd_drop(&uvm.page_inactive);
1026 uvmpd_drop(&uvm.page_active);
1027 }
1028
1029 #endif
1030