1 /* $OpenBSD: uvm_pdaemon.c,v 1.134 2025/01/25 08:55:52 mpi Exp $ */
2 /* $NetBSD: uvm_pdaemon.c,v 1.23 2000/08/20 10:24:14 bjh21 Exp $ */
3
4 /*
5 * Copyright (c) 1997 Charles D. Cranor and Washington University.
6 * Copyright (c) 1991, 1993, The Regents of the University of California.
7 *
8 * All rights reserved.
9 *
10 * This code is derived from software contributed to Berkeley by
11 * The Mach Operating System project at Carnegie-Mellon University.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 * 3. Neither the name of the University nor the names of its contributors
22 * may be used to endorse or promote products derived from this software
23 * without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 * @(#)vm_pageout.c 8.5 (Berkeley) 2/14/94
38 * from: Id: uvm_pdaemon.c,v 1.1.2.32 1998/02/06 05:26:30 chs Exp
39 *
40 *
41 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
42 * All rights reserved.
43 *
44 * Permission to use, copy, modify and distribute this software and
45 * its documentation is hereby granted, provided that both the copyright
46 * notice and this permission notice appear in all copies of the
47 * software, derivative works or modified versions, and any portions
48 * thereof, and that both notices appear in supporting documentation.
49 *
50 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
51 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
52 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
53 *
54 * Carnegie Mellon requests users of this software to return to
55 *
56 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
57 * School of Computer Science
58 * Carnegie Mellon University
59 * Pittsburgh PA 15213-3890
60 *
61 * any improvements or extensions that they make and grant Carnegie the
62 * rights to redistribute these changes.
63 */
64
65 /*
66 * uvm_pdaemon.c: the page daemon
67 */
68
69 #include <sys/param.h>
70 #include <sys/systm.h>
71 #include <sys/kernel.h>
72 #include <sys/pool.h>
73 #include <sys/proc.h>
74 #include <sys/buf.h>
75 #include <sys/mount.h>
76 #include <sys/atomic.h>
77
78 #ifdef HIBERNATE
79 #include <sys/hibernate.h>
80 #endif
81
82 #include <uvm/uvm.h>
83
84 #include "drm.h"
85
86 #if NDRM > 0
87 extern unsigned long drmbackoff(long);
88 #endif
89
90 /*
91 * UVMPD_NUMDIRTYREACTS is how many dirty pages the pagedaemon will reactivate
92 * in a pass thru the inactive list when swap is full. the value should be
93 * "small"... if it's too large we'll cycle the active pages thru the inactive
94 * queue too quickly to for them to be referenced and avoid being freed.
95 */
96
97 #define UVMPD_NUMDIRTYREACTS 16
98
99
100 /*
101 * local prototypes
102 */
103
104 struct rwlock *uvmpd_trylockowner(struct vm_page *);
105 void uvmpd_scan(struct uvm_pmalloc *, int, int);
106 int uvmpd_scan_inactive(struct uvm_pmalloc *, int);
107 void uvmpd_scan_active(struct uvm_pmalloc *, int, int);
108 void uvmpd_tune(void);
109 void uvmpd_drop(struct pglist *);
110 int uvmpd_dropswap(struct vm_page *);
111
112 /*
113 * uvm_wait: wait (sleep) for the page daemon to free some pages
114 *
115 * => should be called with all locks released
116 * => should _not_ be called by the page daemon (to avoid deadlock)
117 */
118
119 void
uvm_wait(const char * wmsg)120 uvm_wait(const char *wmsg)
121 {
122 uint64_t timo = INFSLP;
123
124 #ifdef DIAGNOSTIC
125 if (curproc == &proc0)
126 panic("%s: cannot sleep for memory during boot", __func__);
127 #endif
128
129 /*
130 * check for page daemon going to sleep (waiting for itself)
131 */
132 if (curproc == uvm.pagedaemon_proc) {
133 printf("uvm_wait emergency bufbackoff\n");
134 if (bufbackoff(NULL, 4) >= 4)
135 return;
136 /*
137 * now we have a problem: the pagedaemon wants to go to
138 * sleep until it frees more memory. but how can it
139 * free more memory if it is asleep? that is a deadlock.
140 * we have two options:
141 * [1] panic now
142 * [2] put a timeout on the sleep, thus causing the
143 * pagedaemon to only pause (rather than sleep forever)
144 *
145 * note that option [2] will only help us if we get lucky
146 * and some other process on the system breaks the deadlock
147 * by exiting or freeing memory (thus allowing the pagedaemon
148 * to continue). for now we panic if DEBUG is defined,
149 * otherwise we hope for the best with option [2] (better
150 * yet, this should never happen in the first place!).
151 */
152
153 printf("pagedaemon: deadlock detected!\n");
154 timo = MSEC_TO_NSEC(125); /* set timeout */
155 #if defined(DEBUG)
156 /* DEBUG: panic so we can debug it */
157 panic("pagedaemon deadlock");
158 #endif
159 }
160
161 uvm_lock_fpageq();
162 wakeup(&uvm.pagedaemon); /* wake the daemon! */
163 msleep_nsec(&uvmexp.free, &uvm.fpageqlock, PVM | PNORELOCK, wmsg, timo);
164 }
165
166 /*
167 * uvmpd_tune: tune paging parameters
168 */
169 void
uvmpd_tune(void)170 uvmpd_tune(void)
171 {
172 int val;
173
174 val = uvmexp.npages / 30;
175
176 /* XXX: what are these values good for? */
177 val = max(val, (16*1024) >> PAGE_SHIFT);
178
179 /* Make sure there's always a user page free. */
180 if (val < uvmexp.reserve_kernel + 1)
181 val = uvmexp.reserve_kernel + 1;
182 uvmexp.freemin = val;
183
184 /* Calculate free target. */
185 val = (uvmexp.freemin * 4) / 3;
186 if (val <= uvmexp.freemin)
187 val = uvmexp.freemin + 1;
188 uvmexp.freetarg = val;
189
190 uvmexp.wiredmax = uvmexp.npages / 3;
191 }
192
193 /*
194 * Indicate to the page daemon that a nowait call failed and it should
195 * recover at least some memory in the most restricted region (assumed
196 * to be dma_constraint).
197 */
198 struct uvm_pmalloc nowait_pma;
199
200 static inline int
uvmpd_pma_done(struct uvm_pmalloc * pma)201 uvmpd_pma_done(struct uvm_pmalloc *pma)
202 {
203 if (pma == NULL || (pma->pm_flags & UVM_PMA_FREED))
204 return 1;
205 return 0;
206 }
207
208 /*
209 * uvm_pageout: the main loop for the pagedaemon
210 */
211 void
uvm_pageout(void * arg)212 uvm_pageout(void *arg)
213 {
214 struct uvm_constraint_range constraint;
215 struct uvm_pmalloc *pma;
216 int shortage, inactive_shortage;
217
218 /* ensure correct priority and set paging parameters... */
219 uvm.pagedaemon_proc = curproc;
220 (void) spl0();
221 uvmpd_tune();
222
223 /*
224 * XXX realistically, this is what our nowait callers probably
225 * care about.
226 */
227 nowait_pma.pm_constraint = dma_constraint;
228 nowait_pma.pm_size = (16 << PAGE_SHIFT); /* XXX */
229 nowait_pma.pm_flags = 0;
230
231 for (;;) {
232 long size;
233
234 uvm_lock_fpageq();
235 if (TAILQ_EMPTY(&uvm.pmr_control.allocs) || uvmexp.paging > 0) {
236 msleep_nsec(&uvm.pagedaemon, &uvm.fpageqlock, PVM,
237 "pgdaemon", INFSLP);
238 uvmexp.pdwoke++;
239 }
240
241 if ((pma = TAILQ_FIRST(&uvm.pmr_control.allocs)) != NULL) {
242 pma->pm_flags |= UVM_PMA_BUSY;
243 constraint = pma->pm_constraint;
244 } else {
245 constraint = no_constraint;
246 }
247 /* How many pages do we need to free during this round? */
248 shortage = uvmexp.freetarg -
249 (uvmexp.free + uvmexp.paging) + BUFPAGES_DEFICIT;
250 uvm_unlock_fpageq();
251
252 /*
253 * now lock page queues and recompute inactive count
254 */
255 uvm_lock_pageq();
256 uvmexp.inactarg = (uvmexp.active + uvmexp.inactive) / 3;
257 if (uvmexp.inactarg <= uvmexp.freetarg) {
258 uvmexp.inactarg = uvmexp.freetarg + 1;
259 }
260 inactive_shortage =
261 uvmexp.inactarg - uvmexp.inactive - BUFPAGES_INACT;
262 uvm_unlock_pageq();
263
264 size = 0;
265 if (pma != NULL)
266 size += pma->pm_size >> PAGE_SHIFT;
267 if (shortage > 0)
268 size += shortage;
269
270 if (size == 0) {
271 /*
272 * Since the inactive target just got updated
273 * above, both `size' and `inactive_shortage' can
274 * be 0.
275 */
276 if (inactive_shortage) {
277 uvm_lock_pageq();
278 uvmpd_scan_active(NULL, 0, inactive_shortage);
279 uvm_unlock_pageq();
280 }
281 continue;
282 }
283
284 /* Reclaim pages from the buffer cache if possible. */
285 shortage -= bufbackoff(&constraint, size * 2);
286 #if NDRM > 0
287 shortage -= drmbackoff(size * 2);
288 #endif
289 if (shortage > 0)
290 shortage -= uvm_pmr_cache_drain();
291
292 /*
293 * scan if needed
294 */
295 uvm_lock_pageq();
296 if (!uvmpd_pma_done(pma) ||
297 (shortage > 0) || (inactive_shortage > 0)) {
298 uvmpd_scan(pma, shortage, inactive_shortage);
299 }
300
301 /*
302 * if there's any free memory to be had,
303 * wake up any waiters.
304 */
305 uvm_lock_fpageq();
306 if (uvmexp.free > uvmexp.reserve_kernel || uvmexp.paging == 0) {
307 wakeup(&uvmexp.free);
308 }
309
310 if (pma != NULL) {
311 /*
312 * XXX If UVM_PMA_FREED isn't set, no pages
313 * were freed. Should we set UVM_PMA_FAIL in
314 * that case?
315 */
316 pma->pm_flags &= ~UVM_PMA_BUSY;
317 if (pma->pm_flags & UVM_PMA_FREED) {
318 pma->pm_flags &= ~UVM_PMA_LINKED;
319 TAILQ_REMOVE(&uvm.pmr_control.allocs, pma, pmq);
320 wakeup(pma);
321 }
322 }
323 uvm_unlock_fpageq();
324
325 /*
326 * scan done. unlock page queues (the only lock we are holding)
327 */
328 uvm_unlock_pageq();
329
330 sched_pause(yield);
331 }
332 /*NOTREACHED*/
333 }
334
335
336 /*
337 * uvm_aiodone_daemon: main loop for the aiodone daemon.
338 */
339 void
uvm_aiodone_daemon(void * arg)340 uvm_aiodone_daemon(void *arg)
341 {
342 int s, npages;
343 struct buf *bp, *nbp;
344
345 uvm.aiodoned_proc = curproc;
346 KERNEL_UNLOCK();
347
348 for (;;) {
349 /*
350 * Check for done aio structures. If we've got structures to
351 * process, do so. Otherwise sleep while avoiding races.
352 */
353 mtx_enter(&uvm.aiodoned_lock);
354 while ((bp = TAILQ_FIRST(&uvm.aio_done)) == NULL)
355 msleep_nsec(&uvm.aiodoned, &uvm.aiodoned_lock,
356 PVM, "aiodoned", INFSLP);
357 /* Take the list for ourselves. */
358 TAILQ_INIT(&uvm.aio_done);
359 mtx_leave(&uvm.aiodoned_lock);
360
361 /* process each i/o that's done. */
362 npages = 0;
363 KERNEL_LOCK();
364 while (bp != NULL) {
365 if (bp->b_flags & B_PDAEMON) {
366 npages += bp->b_bufsize >> PAGE_SHIFT;
367 }
368 nbp = TAILQ_NEXT(bp, b_freelist);
369 s = splbio(); /* b_iodone must by called at splbio */
370 (*bp->b_iodone)(bp);
371 splx(s);
372 bp = nbp;
373
374 sched_pause(yield);
375 }
376 KERNEL_UNLOCK();
377
378 uvm_lock_fpageq();
379 atomic_sub_int(&uvmexp.paging, npages);
380 wakeup(uvmexp.free <= uvmexp.reserve_kernel ? &uvm.pagedaemon :
381 &uvmexp.free);
382 uvm_unlock_fpageq();
383 }
384 }
385
386 /*
387 * uvmpd_trylockowner: trylock the page's owner.
388 *
389 * => return the locked rwlock on success. otherwise, return NULL.
390 */
391 struct rwlock *
uvmpd_trylockowner(struct vm_page * pg)392 uvmpd_trylockowner(struct vm_page *pg)
393 {
394
395 struct uvm_object *uobj = pg->uobject;
396 struct rwlock *slock;
397
398 if (uobj != NULL) {
399 slock = uobj->vmobjlock;
400 } else {
401 struct vm_anon *anon = pg->uanon;
402
403 KASSERT(anon != NULL);
404 slock = anon->an_lock;
405 }
406
407 if (rw_enter(slock, RW_WRITE|RW_NOSLEEP)) {
408 return NULL;
409 }
410
411 return slock;
412 }
413
414 /*
415 * uvmpd_dropswap: free any swap allocated to this page.
416 *
417 * => called with owner locked.
418 * => return 1 if a page had an associated slot.
419 */
420 int
uvmpd_dropswap(struct vm_page * pg)421 uvmpd_dropswap(struct vm_page *pg)
422 {
423 struct vm_anon *anon = pg->uanon;
424 int slot, result = 0;
425
426 if ((pg->pg_flags & PQ_ANON) && anon->an_swslot) {
427 uvm_swap_free(anon->an_swslot, 1);
428 anon->an_swslot = 0;
429 result = 1;
430 } else if (pg->pg_flags & PQ_AOBJ) {
431 slot = uao_dropswap(pg->uobject, pg->offset >> PAGE_SHIFT);
432 if (slot)
433 result = 1;
434 }
435
436 return result;
437 }
438
439 /*
440 * Return 1 if the page `p' belongs to the memory range described by
441 * 'constraint', 0 otherwise.
442 */
443 static inline int
uvmpd_match_constraint(struct vm_page * p,struct uvm_constraint_range * constraint)444 uvmpd_match_constraint(struct vm_page *p,
445 struct uvm_constraint_range *constraint)
446 {
447 paddr_t paddr;
448
449 paddr = atop(VM_PAGE_TO_PHYS(p));
450 if (paddr >= constraint->ucr_low && paddr < constraint->ucr_high)
451 return 1;
452
453 return 0;
454 }
455
456 /*
457 * uvmpd_scan_inactive: scan an inactive list for pages to clean or free.
458 *
459 * => called with page queues locked
460 * => we work on meeting our free target by converting inactive pages
461 * into free pages.
462 * => we handle the building of swap-backed clusters
463 * => we return TRUE if we are exiting because we met our target
464 */
465 int
uvmpd_scan_inactive(struct uvm_pmalloc * pma,int shortage)466 uvmpd_scan_inactive(struct uvm_pmalloc *pma, int shortage)
467 {
468 struct pglist *pglst = &uvm.page_inactive;
469 int result, freed = 0;
470 struct vm_page *p, *nextpg;
471 struct uvm_object *uobj;
472 struct vm_page *pps[SWCLUSTPAGES], **ppsp;
473 int npages;
474 struct vm_page *swpps[SWCLUSTPAGES]; /* XXX: see below */
475 struct rwlock *slock;
476 int swnpages, swcpages; /* XXX: see below */
477 int swslot;
478 struct vm_anon *anon;
479 boolean_t swap_backed;
480 vaddr_t start;
481 int dirtyreacts;
482
483 /*
484 * swslot is non-zero if we are building a swap cluster. we want
485 * to stay in the loop while we have a page to scan or we have
486 * a swap-cluster to build.
487 */
488 swslot = 0;
489 swnpages = swcpages = 0;
490 dirtyreacts = 0;
491 p = NULL;
492
493 /*
494 * If a thread is waiting for us to release memory from a specific
495 * memory range start with the first page on the list that fits in
496 * it.
497 */
498 TAILQ_FOREACH(p, pglst, pageq) {
499 if (uvmpd_pma_done(pma) ||
500 uvmpd_match_constraint(p, &pma->pm_constraint))
501 break;
502 }
503
504 for (; p != NULL || swslot != 0; p = nextpg) {
505 /*
506 * note that p can be NULL iff we have traversed the whole
507 * list and need to do one final swap-backed clustered pageout.
508 */
509 uobj = NULL;
510 anon = NULL;
511 if (p) {
512 /*
513 * see if we've met our target
514 */
515 if ((uvmpd_pma_done(pma) &&
516 (uvmexp.paging >= (shortage - freed))) ||
517 dirtyreacts == UVMPD_NUMDIRTYREACTS) {
518 if (swslot == 0) {
519 /* exit now if no swap-i/o pending */
520 break;
521 }
522
523 /* set p to null to signal final swap i/o */
524 p = NULL;
525 nextpg = NULL;
526 }
527 }
528 if (p) { /* if (we have a new page to consider) */
529 /*
530 * we are below target and have a new page to consider.
531 */
532 uvmexp.pdscans++;
533 nextpg = TAILQ_NEXT(p, pageq);
534
535 /*
536 * If we are not short on memory and only interested
537 * in releasing pages from a given memory range, do not
538 * bother with other pages.
539 */
540 if (uvmexp.paging >= (shortage - freed) &&
541 !uvmpd_pma_done(pma) &&
542 !uvmpd_match_constraint(p, &pma->pm_constraint))
543 continue;
544
545 anon = p->uanon;
546 uobj = p->uobject;
547
548 /*
549 * first we attempt to lock the object that this page
550 * belongs to. if our attempt fails we skip on to
551 * the next page (no harm done). it is important to
552 * "try" locking the object as we are locking in the
553 * wrong order (pageq -> object) and we don't want to
554 * deadlock.
555 */
556 slock = uvmpd_trylockowner(p);
557 if (slock == NULL) {
558 continue;
559 }
560
561 /*
562 * move referenced pages back to active queue
563 * and skip to next page.
564 */
565 if (pmap_is_referenced(p)) {
566 uvm_pageactivate(p);
567 rw_exit(slock);
568 uvmexp.pdreact++;
569 continue;
570 }
571
572 if (p->pg_flags & PG_BUSY) {
573 rw_exit(slock);
574 uvmexp.pdbusy++;
575 continue;
576 }
577
578 /* does the page belong to an object? */
579 if (uobj != NULL) {
580 uvmexp.pdobscan++;
581 } else {
582 KASSERT(anon != NULL);
583 uvmexp.pdanscan++;
584 }
585
586 /*
587 * we now have the page queues locked.
588 * the page is not busy. if the page is clean we
589 * can free it now and continue.
590 */
591 if (p->pg_flags & PG_CLEAN) {
592 if (p->pg_flags & PQ_SWAPBACKED) {
593 /* this page now lives only in swap */
594 atomic_inc_int(&uvmexp.swpgonly);
595 }
596
597 /* zap all mappings with pmap_page_protect... */
598 pmap_page_protect(p, PROT_NONE);
599 uvm_pagefree(p);
600 freed++;
601
602 if (anon) {
603
604 /*
605 * an anonymous page can only be clean
606 * if it has backing store assigned.
607 */
608
609 KASSERT(anon->an_swslot != 0);
610
611 /* remove from object */
612 anon->an_page = NULL;
613 }
614 rw_exit(slock);
615 continue;
616 }
617
618 /*
619 * this page is dirty, skip it if we'll have met our
620 * free target when all the current pageouts complete.
621 */
622 if (uvmpd_pma_done(pma) &&
623 (uvmexp.paging > (shortage - freed))) {
624 rw_exit(slock);
625 continue;
626 }
627
628 /*
629 * this page is dirty, but we can't page it out
630 * since all pages in swap are only in swap.
631 * reactivate it so that we eventually cycle
632 * all pages thru the inactive queue.
633 */
634 if ((p->pg_flags & PQ_SWAPBACKED) && uvm_swapisfull()) {
635 dirtyreacts++;
636 uvm_pageactivate(p);
637 rw_exit(slock);
638 continue;
639 }
640
641 /*
642 * if the page is swap-backed and dirty and swap space
643 * is full, free any swap allocated to the page
644 * so that other pages can be paged out.
645 */
646 if ((p->pg_flags & PQ_SWAPBACKED) && uvm_swapisfilled())
647 uvmpd_dropswap(p);
648
649 /*
650 * the page we are looking at is dirty. we must
651 * clean it before it can be freed. to do this we
652 * first mark the page busy so that no one else will
653 * touch the page. we write protect all the mappings
654 * of the page so that no one touches it while it is
655 * in I/O.
656 */
657
658 swap_backed = ((p->pg_flags & PQ_SWAPBACKED) != 0);
659 atomic_setbits_int(&p->pg_flags, PG_BUSY);
660 UVM_PAGE_OWN(p, "scan_inactive");
661 pmap_page_protect(p, PROT_READ);
662 uvmexp.pgswapout++;
663
664 /*
665 * for swap-backed pages we need to (re)allocate
666 * swap space.
667 */
668 if (swap_backed) {
669 /* free old swap slot (if any) */
670 uvmpd_dropswap(p);
671
672 /* start new cluster (if necessary) */
673 if (swslot == 0) {
674 swnpages = SWCLUSTPAGES;
675 swslot = uvm_swap_alloc(&swnpages,
676 TRUE);
677 if (swslot == 0) {
678 /* no swap? give up! */
679 atomic_clearbits_int(
680 &p->pg_flags,
681 PG_BUSY);
682 UVM_PAGE_OWN(p, NULL);
683 rw_exit(slock);
684 continue;
685 }
686 swcpages = 0; /* cluster is empty */
687 }
688
689 /* add block to cluster */
690 swpps[swcpages] = p;
691 if (anon)
692 anon->an_swslot = swslot + swcpages;
693 else
694 uao_set_swslot(uobj,
695 p->offset >> PAGE_SHIFT,
696 swslot + swcpages);
697 swcpages++;
698 rw_exit(slock);
699
700 /* cluster not full yet? */
701 if (swcpages < swnpages)
702 continue;
703 }
704 } else {
705 /* if p == NULL we must be doing a last swap i/o */
706 swap_backed = TRUE;
707 }
708
709 /*
710 * now consider doing the pageout.
711 *
712 * for swap-backed pages, we do the pageout if we have either
713 * filled the cluster (in which case (swnpages == swcpages) or
714 * run out of pages (p == NULL).
715 *
716 * for object pages, we always do the pageout.
717 */
718 if (swap_backed) {
719 /* starting I/O now... set up for it */
720 npages = swcpages;
721 ppsp = swpps;
722 /* for swap-backed pages only */
723 start = (vaddr_t) swslot;
724
725 /* if this is final pageout we could have a few
726 * extra swap blocks */
727 if (swcpages < swnpages) {
728 uvm_swap_free(swslot + swcpages,
729 (swnpages - swcpages));
730 }
731 } else {
732 /* normal object pageout */
733 ppsp = pps;
734 npages = sizeof(pps) / sizeof(struct vm_page *);
735 /* not looked at because PGO_ALLPAGES is set */
736 start = 0;
737 }
738
739 /*
740 * now do the pageout.
741 *
742 * for swap_backed pages we have already built the cluster.
743 * for !swap_backed pages, uvm_pager_put will call the object's
744 * "make put cluster" function to build a cluster on our behalf.
745 *
746 * we pass the PGO_PDFREECLUST flag to uvm_pager_put to instruct
747 * it to free the cluster pages for us on a successful I/O (it
748 * always does this for un-successful I/O requests). this
749 * allows us to do clustered pageout without having to deal
750 * with cluster pages at this level.
751 *
752 * note locking semantics of uvm_pager_put with PGO_PDFREECLUST:
753 * IN: locked: page queues
754 * OUT: locked:
755 * !locked: pageqs
756 */
757
758 uvmexp.pdpageouts++;
759 result = uvm_pager_put(swap_backed ? NULL : uobj, p,
760 &ppsp, &npages, PGO_ALLPAGES|PGO_PDFREECLUST, start, 0);
761
762 /*
763 * if we did i/o to swap, zero swslot to indicate that we are
764 * no longer building a swap-backed cluster.
765 */
766
767 if (swap_backed)
768 swslot = 0; /* done with this cluster */
769
770 /*
771 * first, we check for VM_PAGER_PEND which means that the
772 * async I/O is in progress and the async I/O done routine
773 * will clean up after us. in this case we move on to the
774 * next page.
775 *
776 * there is a very remote chance that the pending async i/o can
777 * finish _before_ we get here. if that happens, our page "p"
778 * may no longer be on the inactive queue. so we verify this
779 * when determining the next page (starting over at the head if
780 * we've lost our inactive page).
781 */
782
783 if (result == VM_PAGER_PEND) {
784 atomic_add_int(&uvmexp.paging, npages);
785 uvm_lock_pageq();
786 uvmexp.pdpending++;
787 if (p) {
788 if (p->pg_flags & PQ_INACTIVE)
789 nextpg = TAILQ_NEXT(p, pageq);
790 else
791 nextpg = TAILQ_FIRST(pglst);
792 } else {
793 nextpg = NULL;
794 }
795 continue;
796 }
797
798 /* clean up "p" if we have one */
799 if (p) {
800 /*
801 * the I/O request to "p" is done and uvm_pager_put
802 * has freed any cluster pages it may have allocated
803 * during I/O. all that is left for us to do is
804 * clean up page "p" (which is still PG_BUSY).
805 *
806 * our result could be one of the following:
807 * VM_PAGER_OK: successful pageout
808 *
809 * VM_PAGER_AGAIN: tmp resource shortage, we skip
810 * to next page
811 * VM_PAGER_{FAIL,ERROR,BAD}: an error. we
812 * "reactivate" page to get it out of the way (it
813 * will eventually drift back into the inactive
814 * queue for a retry).
815 * VM_PAGER_UNLOCK: should never see this as it is
816 * only valid for "get" operations
817 */
818
819 /* relock p's object: page queues not lock yet, so
820 * no need for "try" */
821
822 /* !swap_backed case: already locked... */
823 if (swap_backed) {
824 rw_enter(slock, RW_WRITE);
825 }
826
827 #ifdef DIAGNOSTIC
828 if (result == VM_PAGER_UNLOCK)
829 panic("pagedaemon: pageout returned "
830 "invalid 'unlock' code");
831 #endif
832
833 /* handle PG_WANTED now */
834 if (p->pg_flags & PG_WANTED)
835 wakeup(p);
836
837 atomic_clearbits_int(&p->pg_flags, PG_BUSY|PG_WANTED);
838 UVM_PAGE_OWN(p, NULL);
839
840 /* released during I/O? Can only happen for anons */
841 if (p->pg_flags & PG_RELEASED) {
842 KASSERT(anon != NULL);
843 /*
844 * remove page so we can get nextpg,
845 * also zero out anon so we don't use
846 * it after the free.
847 */
848 anon->an_page = NULL;
849 p->uanon = NULL;
850
851 uvm_anfree(anon); /* kills anon */
852 pmap_page_protect(p, PROT_NONE);
853 anon = NULL;
854 uvm_lock_pageq();
855 nextpg = TAILQ_NEXT(p, pageq);
856 /* free released page */
857 uvm_pagefree(p);
858 } else { /* page was not released during I/O */
859 uvm_lock_pageq();
860 nextpg = TAILQ_NEXT(p, pageq);
861 if (result != VM_PAGER_OK) {
862 /* pageout was a failure... */
863 if (result != VM_PAGER_AGAIN)
864 uvm_pageactivate(p);
865 pmap_clear_reference(p);
866 } else {
867 /* pageout was a success... */
868 pmap_clear_reference(p);
869 pmap_clear_modify(p);
870 atomic_setbits_int(&p->pg_flags,
871 PG_CLEAN);
872 }
873 }
874
875 /*
876 * drop object lock (if there is an object left). do
877 * a safety check of nextpg to make sure it is on the
878 * inactive queue (it should be since PG_BUSY pages on
879 * the inactive queue can't be re-queued [note: not
880 * true for active queue]).
881 */
882 rw_exit(slock);
883
884 if (nextpg && (nextpg->pg_flags & PQ_INACTIVE) == 0) {
885 nextpg = TAILQ_FIRST(pglst); /* reload! */
886 }
887 } else {
888 /*
889 * if p is null in this loop, make sure it stays null
890 * in the next loop.
891 */
892 nextpg = NULL;
893
894 /*
895 * lock page queues here just so they're always locked
896 * at the end of the loop.
897 */
898 uvm_lock_pageq();
899 }
900 }
901
902 return freed;
903 }
904
905 /*
906 * uvmpd_scan: scan the page queues and attempt to meet our targets.
907 *
908 * => called with pageq's locked
909 */
910
911 void
uvmpd_scan(struct uvm_pmalloc * pma,int shortage,int inactive_shortage)912 uvmpd_scan(struct uvm_pmalloc *pma, int shortage, int inactive_shortage)
913 {
914 int swap_shortage, pages_freed;
915
916 MUTEX_ASSERT_LOCKED(&uvm.pageqlock);
917
918 uvmexp.pdrevs++; /* counter */
919
920
921 #ifdef __HAVE_PMAP_COLLECT
922 /*
923 * swap out some processes if we are below our free target.
924 * we need to unlock the page queues for this.
925 */
926 if (shortage > 0) {
927 uvmexp.pdswout++;
928 uvm_unlock_pageq();
929 shortage -= uvm_swapout_threads();
930 uvm_lock_pageq();
931 }
932 #endif
933
934 /*
935 * now we want to work on meeting our targets. first we work on our
936 * free target by converting inactive pages into free pages. then
937 * we work on meeting our inactive target by converting active pages
938 * to inactive ones.
939 */
940 pages_freed = uvmpd_scan_inactive(pma, shortage);
941 uvmexp.pdfreed += pages_freed;
942 shortage -= pages_freed;
943
944 /*
945 * we have done the scan to get free pages. now we work on meeting
946 * our inactive target.
947 *
948 * detect if we're not going to be able to page anything out
949 * until we free some swap resources from active pages.
950 */
951 swap_shortage = 0;
952 if ((shortage > 0) && uvm_swapisfilled() && !uvm_swapisfull() &&
953 pages_freed == 0) {
954 swap_shortage = shortage;
955 }
956
957 uvmpd_scan_active(pma, swap_shortage, inactive_shortage);
958 }
959
960 void
uvmpd_scan_active(struct uvm_pmalloc * pma,int swap_shortage,int inactive_shortage)961 uvmpd_scan_active(struct uvm_pmalloc *pma, int swap_shortage,
962 int inactive_shortage)
963 {
964 struct vm_page *p, *nextpg;
965 struct rwlock *slock;
966
967 MUTEX_ASSERT_LOCKED(&uvm.pageqlock);
968
969 for (p = TAILQ_FIRST(&uvm.page_active);
970 p != NULL && (inactive_shortage > 0 || swap_shortage > 0);
971 p = nextpg) {
972 nextpg = TAILQ_NEXT(p, pageq);
973 if (p->pg_flags & PG_BUSY) {
974 continue;
975 }
976
977 /*
978 * If we couldn't release enough pages from a given memory
979 * range try to deactivate them first...
980 *
981 * ...unless we are low on swap slots, in such case we are
982 * probably OOM and want to release swap resources as quickly
983 * as possible.
984 */
985 if (inactive_shortage > 0 && swap_shortage == 0 &&
986 !uvmpd_pma_done(pma) &&
987 !uvmpd_match_constraint(p, &pma->pm_constraint))
988 continue;
989
990 /*
991 * lock the page's owner.
992 */
993 slock = uvmpd_trylockowner(p);
994 if (slock == NULL) {
995 continue;
996 }
997
998 /*
999 * skip this page if it's busy.
1000 */
1001 if ((p->pg_flags & PG_BUSY) != 0) {
1002 rw_exit(slock);
1003 continue;
1004 }
1005
1006 /*
1007 * if there's a shortage of swap, free any swap allocated
1008 * to this page so that other pages can be paged out.
1009 */
1010 if (swap_shortage > 0) {
1011 if (uvmpd_dropswap(p)) {
1012 atomic_clearbits_int(&p->pg_flags, PG_CLEAN);
1013 swap_shortage--;
1014 }
1015 }
1016
1017 /*
1018 * deactivate this page if there's a shortage of
1019 * inactive pages.
1020 */
1021 if (inactive_shortage > 0) {
1022 /* no need to check wire_count as pg is "active" */
1023 uvm_pagedeactivate(p);
1024 uvmexp.pddeact++;
1025 inactive_shortage--;
1026 }
1027
1028 /*
1029 * we're done with this page.
1030 */
1031 rw_exit(slock);
1032 }
1033 }
1034
1035 #ifdef HIBERNATE
1036
1037 /*
1038 * uvmpd_drop: drop clean pages from list
1039 */
1040 void
uvmpd_drop(struct pglist * pglst)1041 uvmpd_drop(struct pglist *pglst)
1042 {
1043 struct vm_page *p, *nextpg;
1044
1045 for (p = TAILQ_FIRST(pglst); p != NULL; p = nextpg) {
1046 nextpg = TAILQ_NEXT(p, pageq);
1047
1048 if (p->pg_flags & PQ_ANON || p->uobject == NULL)
1049 continue;
1050
1051 if (p->pg_flags & PG_BUSY)
1052 continue;
1053
1054 if (p->pg_flags & PG_CLEAN) {
1055 struct uvm_object * uobj = p->uobject;
1056
1057 rw_enter(uobj->vmobjlock, RW_WRITE);
1058 uvm_lock_pageq();
1059 /*
1060 * we now have the page queues locked.
1061 * the page is not busy. if the page is clean we
1062 * can free it now and continue.
1063 */
1064 if (p->pg_flags & PG_CLEAN) {
1065 if (p->pg_flags & PQ_SWAPBACKED) {
1066 /* this page now lives only in swap */
1067 atomic_inc_int(&uvmexp.swpgonly);
1068 }
1069
1070 /* zap all mappings with pmap_page_protect... */
1071 pmap_page_protect(p, PROT_NONE);
1072 uvm_pagefree(p);
1073 }
1074 uvm_unlock_pageq();
1075 rw_exit(uobj->vmobjlock);
1076 }
1077 }
1078 }
1079
1080 void
uvmpd_hibernate(void)1081 uvmpd_hibernate(void)
1082 {
1083 uvmpd_drop(&uvm.page_inactive);
1084 uvmpd_drop(&uvm.page_active);
1085 }
1086
1087 #endif
1088