xref: /original-bsd/sys/vm/vm_pageout.c (revision a79d9c15)
1 /*
2  * Copyright (c) 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * The Mach Operating System project at Carnegie-Mellon University.
7  *
8  * %sccs.include.redist.c%
9  *
10  *	@(#)vm_pageout.c	8.7 (Berkeley) 06/19/95
11  *
12  *
13  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
14  * All rights reserved.
15  *
16  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
17  *
18  * Permission to use, copy, modify and distribute this software and
19  * its documentation is hereby granted, provided that both the copyright
20  * notice and this permission notice appear in all copies of the
21  * software, derivative works or modified versions, and any portions
22  * thereof, and that both notices appear in supporting documentation.
23  *
24  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
25  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
26  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
27  *
28  * Carnegie Mellon requests users of this software to return to
29  *
30  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
31  *  School of Computer Science
32  *  Carnegie Mellon University
33  *  Pittsburgh PA 15213-3890
34  *
35  * any improvements or extensions that they make and grant Carnegie the
36  * rights to redistribute these changes.
37  */
38 
39 /*
40  *	The proverbial page-out daemon.
41  */
42 
43 #include <sys/param.h>
44 
45 #include <vm/vm.h>
46 #include <vm/vm_page.h>
47 #include <vm/vm_pageout.h>
48 
49 #ifndef VM_PAGE_FREE_MIN
50 #define VM_PAGE_FREE_MIN	(cnt.v_free_count / 20)
51 #endif
52 
53 #ifndef VM_PAGE_FREE_TARGET
54 #define VM_PAGE_FREE_TARGET	((cnt.v_free_min * 4) / 3)
55 #endif
56 
57 int	vm_page_free_min_min = 16 * 1024;
58 int	vm_page_free_min_max = 256 * 1024;
59 
60 int	vm_pages_needed;	/* Event on which pageout daemon sleeps */
61 
62 int	vm_page_max_wired = 0;	/* XXX max # of wired pages system-wide */
63 
64 #ifdef CLUSTERED_PAGEOUT
65 #define MAXPOCLUSTER		(MAXPHYS/NBPG)	/* XXX */
66 int doclustered_pageout = 1;
67 #endif
68 
69 /*
70  *	vm_pageout_scan does the dirty work for the pageout daemon.
71  */
72 void
73 vm_pageout_scan()
74 {
75 	register vm_page_t	m, next;
76 	register int		page_shortage;
77 	register int		s;
78 	register int		pages_freed;
79 	int			free;
80 	vm_object_t		object;
81 
82 	/*
83 	 *	Only continue when we want more pages to be "free"
84 	 */
85 
86 	cnt.v_rev++;
87 
88 	s = splimp();
89 	simple_lock(&vm_page_queue_free_lock);
90 	free = cnt.v_free_count;
91 	simple_unlock(&vm_page_queue_free_lock);
92 	splx(s);
93 
94 	if (free < cnt.v_free_target) {
95 		swapout_threads();
96 
97 		/*
98 		 *	Be sure the pmap system is updated so
99 		 *	we can scan the inactive queue.
100 		 */
101 
102 		pmap_update();
103 	}
104 
105 	/*
106 	 *	Acquire the resident page system lock,
107 	 *	as we may be changing what's resident quite a bit.
108 	 */
109 	vm_page_lock_queues();
110 
111 	/*
112 	 *	Start scanning the inactive queue for pages we can free.
113 	 *	We keep scanning until we have enough free pages or
114 	 *	we have scanned through the entire queue.  If we
115 	 *	encounter dirty pages, we start cleaning them.
116 	 */
117 
118 	pages_freed = 0;
119 	for (m = vm_page_queue_inactive.tqh_first; m != NULL; m = next) {
120 		s = splimp();
121 		simple_lock(&vm_page_queue_free_lock);
122 		free = cnt.v_free_count;
123 		simple_unlock(&vm_page_queue_free_lock);
124 		splx(s);
125 		if (free >= cnt.v_free_target)
126 			break;
127 
128 		cnt.v_scan++;
129 		next = m->pageq.tqe_next;
130 
131 		/*
132 		 * If the page has been referenced, move it back to the
133 		 * active queue.
134 		 */
135 		if (pmap_is_referenced(VM_PAGE_TO_PHYS(m))) {
136 			vm_page_activate(m);
137 			cnt.v_reactivated++;
138 			continue;
139 		}
140 
141 		/*
142 		 * If the page is clean, free it up.
143 		 */
144 		if (m->flags & PG_CLEAN) {
145 			object = m->object;
146 			if (vm_object_lock_try(object)) {
147 				pmap_page_protect(VM_PAGE_TO_PHYS(m),
148 						  VM_PROT_NONE);
149 				vm_page_free(m);
150 				pages_freed++;
151 				cnt.v_dfree++;
152 				vm_object_unlock(object);
153 			}
154 			continue;
155 		}
156 
157 		/*
158 		 * If the page is dirty but already being washed, skip it.
159 		 */
160 		if ((m->flags & PG_LAUNDRY) == 0)
161 			continue;
162 
163 		/*
164 		 * Otherwise the page is dirty and still in the laundry,
165 		 * so we start the cleaning operation and remove it from
166 		 * the laundry.
167 		 */
168 		object = m->object;
169 		if (!vm_object_lock_try(object))
170 			continue;
171 		cnt.v_pageouts++;
172 #ifdef CLUSTERED_PAGEOUT
173 		if (object->pager &&
174 		    vm_pager_cancluster(object->pager, PG_CLUSTERPUT))
175 			vm_pageout_cluster(m, object);
176 		else
177 #endif
178 		vm_pageout_page(m, object);
179 		thread_wakeup(object);
180 		vm_object_unlock(object);
181 		/*
182 		 * Former next page may no longer even be on the inactive
183 		 * queue (due to potential blocking in the pager with the
184 		 * queues unlocked).  If it isn't, we just start over.
185 		 */
186 		if (next && (next->flags & PG_INACTIVE) == 0)
187 			next = vm_page_queue_inactive.tqh_first;
188 	}
189 
190 	/*
191 	 *	Compute the page shortage.  If we are still very low on memory
192 	 *	be sure that we will move a minimal amount of pages from active
193 	 *	to inactive.
194 	 */
195 
196 	page_shortage = cnt.v_inactive_target - cnt.v_inactive_count;
197 	if (page_shortage <= 0 && pages_freed == 0)
198 		page_shortage = 1;
199 
200 	while (page_shortage > 0) {
201 		/*
202 		 *	Move some more pages from active to inactive.
203 		 */
204 
205 		if ((m = vm_page_queue_active.tqh_first) == NULL)
206 			break;
207 		vm_page_deactivate(m);
208 		page_shortage--;
209 	}
210 
211 	vm_page_unlock_queues();
212 }
213 
214 /*
215  * Called with object and page queues locked.
216  * If reactivate is TRUE, a pager error causes the page to be
217  * put back on the active queue, ow it is left on the inactive queue.
218  */
219 void
220 vm_pageout_page(m, object)
221 	vm_page_t m;
222 	vm_object_t object;
223 {
224 	vm_pager_t pager;
225 	int pageout_status;
226 
227 	/*
228 	 * We set the busy bit to cause potential page faults on
229 	 * this page to block.
230 	 *
231 	 * We also set pageout-in-progress to keep the object from
232 	 * disappearing during pageout.  This guarantees that the
233 	 * page won't move from the inactive queue.  (However, any
234 	 * other page on the inactive queue may move!)
235 	 */
236 	pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_NONE);
237 	m->flags |= PG_BUSY;
238 
239 	/*
240 	 * Try to collapse the object before making a pager for it.
241 	 * We must unlock the page queues first.
242 	 */
243 	vm_page_unlock_queues();
244 	if (object->pager == NULL)
245 		vm_object_collapse(object);
246 
247 	object->paging_in_progress++;
248 	vm_object_unlock(object);
249 
250 	/*
251 	 * Do a wakeup here in case the following operations block.
252 	 */
253 	thread_wakeup(&cnt.v_free_count);
254 
255 	/*
256 	 * If there is no pager for the page, use the default pager.
257 	 * If there is no place to put the page at the moment,
258 	 * leave it in the laundry and hope that there will be
259 	 * paging space later.
260 	 */
261 	if ((pager = object->pager) == NULL) {
262 		pager = vm_pager_allocate(PG_DFLT, (caddr_t)0, object->size,
263 					  VM_PROT_ALL, (vm_offset_t)0);
264 		if (pager != NULL)
265 			vm_object_setpager(object, pager, 0, FALSE);
266 	}
267 	pageout_status = pager ? vm_pager_put(pager, m, FALSE) : VM_PAGER_FAIL;
268 	vm_object_lock(object);
269 	vm_page_lock_queues();
270 
271 	switch (pageout_status) {
272 	case VM_PAGER_OK:
273 	case VM_PAGER_PEND:
274 		cnt.v_pgpgout++;
275 		m->flags &= ~PG_LAUNDRY;
276 		break;
277 	case VM_PAGER_BAD:
278 		/*
279 		 * Page outside of range of object.  Right now we
280 		 * essentially lose the changes by pretending it
281 		 * worked.
282 		 *
283 		 * XXX dubious, what should we do?
284 		 */
285 		m->flags &= ~PG_LAUNDRY;
286 		m->flags |= PG_CLEAN;
287 		pmap_clear_modify(VM_PAGE_TO_PHYS(m));
288 		break;
289 	case VM_PAGER_AGAIN:
290 	{
291 		extern int lbolt;
292 
293 		/*
294 		 * FAIL on a write is interpreted to mean a resource
295 		 * shortage, so we put pause for awhile and try again.
296 		 * XXX could get stuck here.
297 		 */
298 		vm_page_unlock_queues();
299 		vm_object_unlock(object);
300 		(void) tsleep((caddr_t)&lbolt, PZERO|PCATCH, "pageout", 0);
301 		vm_object_lock(object);
302 		vm_page_lock_queues();
303 		break;
304 	}
305 	case VM_PAGER_FAIL:
306 	case VM_PAGER_ERROR:
307 		/*
308 		 * If page couldn't be paged out, then reactivate
309 		 * the page so it doesn't clog the inactive list.
310 		 * (We will try paging out it again later).
311 		 */
312 		vm_page_activate(m);
313 		cnt.v_reactivated++;
314 		break;
315 	}
316 
317 	pmap_clear_reference(VM_PAGE_TO_PHYS(m));
318 
319 	/*
320 	 * If the operation is still going, leave the page busy
321 	 * to block all other accesses.  Also, leave the paging
322 	 * in progress indicator set so that we don't attempt an
323 	 * object collapse.
324 	 */
325 	if (pageout_status != VM_PAGER_PEND) {
326 		m->flags &= ~PG_BUSY;
327 		PAGE_WAKEUP(m);
328 		object->paging_in_progress--;
329 	}
330 }
331 
332 #ifdef CLUSTERED_PAGEOUT
333 #define PAGEOUTABLE(p) \
334 	((((p)->flags & (PG_INACTIVE|PG_CLEAN|PG_LAUNDRY)) == \
335 	  (PG_INACTIVE|PG_LAUNDRY)) && !pmap_is_referenced(VM_PAGE_TO_PHYS(p)))
336 
337 /*
338  * Attempt to pageout as many contiguous (to ``m'') dirty pages as possible
339  * from ``object''.  Using information returned from the pager, we assemble
340  * a sorted list of contiguous dirty pages and feed them to the pager in one
341  * chunk.  Called with paging queues and object locked.  Also, object must
342  * already have a pager.
343  */
344 void
345 vm_pageout_cluster(m, object)
346 	vm_page_t m;
347 	vm_object_t object;
348 {
349 	vm_offset_t offset, loff, hoff;
350 	vm_page_t plist[MAXPOCLUSTER], *plistp, p;
351 	int postatus, ix, count;
352 
353 	/*
354 	 * Determine the range of pages that can be part of a cluster
355 	 * for this object/offset.  If it is only our single page, just
356 	 * do it normally.
357 	 */
358 	vm_pager_cluster(object->pager, m->offset, &loff, &hoff);
359 	if (hoff - loff == PAGE_SIZE) {
360 		vm_pageout_page(m, object);
361 		return;
362 	}
363 
364 	plistp = plist;
365 
366 	/*
367 	 * Target page is always part of the cluster.
368 	 */
369 	pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_NONE);
370 	m->flags |= PG_BUSY;
371 	plistp[atop(m->offset - loff)] = m;
372 	count = 1;
373 
374 	/*
375 	 * Backup from the given page til we find one not fulfilling
376 	 * the pageout criteria or we hit the lower bound for the
377 	 * cluster.  For each page determined to be part of the
378 	 * cluster, unmap it and busy it out so it won't change.
379 	 */
380 	ix = atop(m->offset - loff);
381 	offset = m->offset;
382 	while (offset > loff && count < MAXPOCLUSTER-1) {
383 		p = vm_page_lookup(object, offset - PAGE_SIZE);
384 		if (p == NULL || !PAGEOUTABLE(p))
385 			break;
386 		pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_NONE);
387 		p->flags |= PG_BUSY;
388 		plistp[--ix] = p;
389 		offset -= PAGE_SIZE;
390 		count++;
391 	}
392 	plistp += atop(offset - loff);
393 	loff = offset;
394 
395 	/*
396 	 * Now do the same moving forward from the target.
397 	 */
398 	ix = atop(m->offset - loff) + 1;
399 	offset = m->offset + PAGE_SIZE;
400 	while (offset < hoff && count < MAXPOCLUSTER) {
401 		p = vm_page_lookup(object, offset);
402 		if (p == NULL || !PAGEOUTABLE(p))
403 			break;
404 		pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_NONE);
405 		p->flags |= PG_BUSY;
406 		plistp[ix++] = p;
407 		offset += PAGE_SIZE;
408 		count++;
409 	}
410 	hoff = offset;
411 
412 	/*
413 	 * Pageout the page.
414 	 * Unlock everything and do a wakeup prior to the pager call
415 	 * in case it blocks.
416 	 */
417 	vm_page_unlock_queues();
418 	object->paging_in_progress++;
419 	vm_object_unlock(object);
420 again:
421 	thread_wakeup(&cnt.v_free_count);
422 	postatus = vm_pager_put_pages(object->pager, plistp, count, FALSE);
423 	/*
424 	 * XXX rethink this
425 	 */
426 	if (postatus == VM_PAGER_AGAIN) {
427 		extern int lbolt;
428 
429 		(void) tsleep((caddr_t)&lbolt, PZERO|PCATCH, "pageout", 0);
430 		goto again;
431 	} else if (postatus == VM_PAGER_BAD)
432 		panic("vm_pageout_cluster: VM_PAGER_BAD");
433 	vm_object_lock(object);
434 	vm_page_lock_queues();
435 
436 	/*
437 	 * Loop through the affected pages, reflecting the outcome of
438 	 * the operation.
439 	 */
440 	for (ix = 0; ix < count; ix++) {
441 		p = *plistp++;
442 		switch (postatus) {
443 		case VM_PAGER_OK:
444 		case VM_PAGER_PEND:
445 			cnt.v_pgpgout++;
446 			p->flags &= ~PG_LAUNDRY;
447 			break;
448 		case VM_PAGER_FAIL:
449 		case VM_PAGER_ERROR:
450 			/*
451 			 * Pageout failed, reactivate the target page so it
452 			 * doesn't clog the inactive list.  Other pages are
453 			 * left as they are.
454 			 */
455 			if (p == m) {
456 				vm_page_activate(p);
457 				cnt.v_reactivated++;
458 			}
459 			break;
460 		}
461 		pmap_clear_reference(VM_PAGE_TO_PHYS(p));
462 		/*
463 		 * If the operation is still going, leave the page busy
464 		 * to block all other accesses.
465 		 */
466 		if (postatus != VM_PAGER_PEND) {
467 			p->flags &= ~PG_BUSY;
468 			PAGE_WAKEUP(p);
469 
470 		}
471 	}
472 	/*
473 	 * If the operation is still going, leave the paging in progress
474 	 * indicator set so that we don't attempt an object collapse.
475 	 */
476 	if (postatus != VM_PAGER_PEND)
477 		object->paging_in_progress--;
478 
479 }
480 #endif
481 
482 /*
483  *	vm_pageout is the high level pageout daemon.
484  */
485 
486 void
487 vm_pageout()
488 {
489 	(void) spl0();
490 
491 	/*
492 	 *	Initialize some paging parameters.
493 	 */
494 
495 	if (cnt.v_free_min == 0) {
496 		cnt.v_free_min = VM_PAGE_FREE_MIN;
497 		vm_page_free_min_min /= cnt.v_page_size;
498 		vm_page_free_min_max /= cnt.v_page_size;
499 		if (cnt.v_free_min < vm_page_free_min_min)
500 			cnt.v_free_min = vm_page_free_min_min;
501 		if (cnt.v_free_min > vm_page_free_min_max)
502 			cnt.v_free_min = vm_page_free_min_max;
503 	}
504 
505 	if (cnt.v_free_target == 0)
506 		cnt.v_free_target = VM_PAGE_FREE_TARGET;
507 
508 	if (cnt.v_free_target <= cnt.v_free_min)
509 		cnt.v_free_target = cnt.v_free_min + 1;
510 
511 	/* XXX does not really belong here */
512 	if (vm_page_max_wired == 0)
513 		vm_page_max_wired = cnt.v_free_count / 3;
514 
515 	/*
516 	 *	The pageout daemon is never done, so loop
517 	 *	forever.
518 	 */
519 
520 	simple_lock(&vm_pages_needed_lock);
521 	while (TRUE) {
522 		thread_sleep(&vm_pages_needed, &vm_pages_needed_lock, FALSE);
523 		/*
524 		 * Compute the inactive target for this scan.
525 		 * We need to keep a reasonable amount of memory in the
526 		 * inactive list to better simulate LRU behavior.
527 		 */
528 		cnt.v_inactive_target =
529 			(cnt.v_active_count + cnt.v_inactive_count) / 3;
530 		if (cnt.v_inactive_target <= cnt.v_free_target)
531 			cnt.v_inactive_target = cnt.v_free_target + 1;
532 
533 		/*
534 		 * Only make a scan if we are likely to do something.
535 		 * Otherwise we might have been awakened by a pager
536 		 * to clean up async pageouts.
537 		 */
538 		if (cnt.v_free_count < cnt.v_free_target ||
539 		    cnt.v_inactive_count < cnt.v_inactive_target)
540 			vm_pageout_scan();
541 		vm_pager_sync();
542 		simple_lock(&vm_pages_needed_lock);
543 		thread_wakeup(&cnt.v_free_count);
544 	}
545 }
546