xref: /original-bsd/sys/vm/vm_pageout.c (revision 0997b878)
1 /*
2  * Copyright (c) 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * The Mach Operating System project at Carnegie-Mellon University.
7  *
8  * %sccs.include.redist.c%
9  *
10  *	@(#)vm_pageout.c	8.6 (Berkeley) 01/09/95
11  *
12  *
13  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
14  * All rights reserved.
15  *
16  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
17  *
18  * Permission to use, copy, modify and distribute this software and
19  * its documentation is hereby granted, provided that both the copyright
20  * notice and this permission notice appear in all copies of the
21  * software, derivative works or modified versions, and any portions
22  * thereof, and that both notices appear in supporting documentation.
23  *
24  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
25  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
26  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
27  *
28  * Carnegie Mellon requests users of this software to return to
29  *
30  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
31  *  School of Computer Science
32  *  Carnegie Mellon University
33  *  Pittsburgh PA 15213-3890
34  *
35  * any improvements or extensions that they make and grant Carnegie the
36  * rights to redistribute these changes.
37  */
38 
39 /*
40  *	The proverbial page-out daemon.
41  */
42 
43 #include <sys/param.h>
44 
45 #include <vm/vm.h>
46 #include <vm/vm_page.h>
47 #include <vm/vm_pageout.h>
48 
49 #ifndef VM_PAGE_FREE_MIN
50 #define VM_PAGE_FREE_MIN	(cnt.v_free_count / 20)
51 #endif
52 
53 #ifndef VM_PAGE_FREE_TARGET
54 #define VM_PAGE_FREE_TARGET	((cnt.v_free_min * 4) / 3)
55 #endif
56 
57 int	vm_page_free_min_min = 16 * 1024;
58 int	vm_page_free_min_max = 256 * 1024;
59 
60 int	vm_pages_needed;	/* Event on which pageout daemon sleeps */
61 
62 int	vm_page_max_wired = 0;	/* XXX max # of wired pages system-wide */
63 
64 #ifdef CLUSTERED_PAGEOUT
65 #define MAXPOCLUSTER		(MAXPHYS/NBPG)	/* XXX */
66 int doclustered_pageout = 1;
67 #endif
68 
69 /*
70  *	vm_pageout_scan does the dirty work for the pageout daemon.
71  */
72 void
73 vm_pageout_scan()
74 {
75 	register vm_page_t	m, next;
76 	register int		page_shortage;
77 	register int		s;
78 	register int		pages_freed;
79 	int			free;
80 	vm_object_t		object;
81 
82 	/*
83 	 *	Only continue when we want more pages to be "free"
84 	 */
85 
86 	cnt.v_rev++;
87 
88 	s = splimp();
89 	simple_lock(&vm_page_queue_free_lock);
90 	free = cnt.v_free_count;
91 	simple_unlock(&vm_page_queue_free_lock);
92 	splx(s);
93 
94 	if (free < cnt.v_free_target) {
95 		swapout_threads();
96 
97 		/*
98 		 *	Be sure the pmap system is updated so
99 		 *	we can scan the inactive queue.
100 		 */
101 
102 		pmap_update();
103 	}
104 
105 	/*
106 	 *	Acquire the resident page system lock,
107 	 *	as we may be changing what's resident quite a bit.
108 	 */
109 	vm_page_lock_queues();
110 
111 	/*
112 	 *	Start scanning the inactive queue for pages we can free.
113 	 *	We keep scanning until we have enough free pages or
114 	 *	we have scanned through the entire queue.  If we
115 	 *	encounter dirty pages, we start cleaning them.
116 	 */
117 
118 	pages_freed = 0;
119 	for (m = vm_page_queue_inactive.tqh_first; m != NULL; m = next) {
120 		s = splimp();
121 		simple_lock(&vm_page_queue_free_lock);
122 		free = cnt.v_free_count;
123 		simple_unlock(&vm_page_queue_free_lock);
124 		splx(s);
125 		if (free >= cnt.v_free_target)
126 			break;
127 
128 		cnt.v_scan++;
129 		next = m->pageq.tqe_next;
130 
131 		/*
132 		 * If the page has been referenced, move it back to the
133 		 * active queue.
134 		 */
135 		if (pmap_is_referenced(VM_PAGE_TO_PHYS(m))) {
136 			vm_page_activate(m);
137 			cnt.v_reactivated++;
138 			continue;
139 		}
140 
141 		/*
142 		 * If the page is clean, free it up.
143 		 */
144 		if (m->flags & PG_CLEAN) {
145 			object = m->object;
146 			if (vm_object_lock_try(object)) {
147 				pmap_page_protect(VM_PAGE_TO_PHYS(m),
148 						  VM_PROT_NONE);
149 				vm_page_free(m);
150 				pages_freed++;
151 				cnt.v_dfree++;
152 				vm_object_unlock(object);
153 			}
154 			continue;
155 		}
156 
157 		/*
158 		 * If the page is dirty but already being washed, skip it.
159 		 */
160 		if ((m->flags & PG_LAUNDRY) == 0)
161 			continue;
162 
163 		/*
164 		 * Otherwise the page is dirty and still in the laundry,
165 		 * so we start the cleaning operation and remove it from
166 		 * the laundry.
167 		 */
168 		object = m->object;
169 		if (!vm_object_lock_try(object))
170 			continue;
171 		cnt.v_pageouts++;
172 #ifdef CLUSTERED_PAGEOUT
173 		if (object->pager &&
174 		    vm_pager_cancluster(object->pager, PG_CLUSTERPUT))
175 			vm_pageout_cluster(m, object);
176 		else
177 #endif
178 		vm_pageout_page(m, object);
179 		thread_wakeup(object);
180 		vm_object_unlock(object);
181 		/*
182 		 * Former next page may no longer even be on the inactive
183 		 * queue (due to potential blocking in the pager with the
184 		 * queues unlocked).  If it isn't, we just start over.
185 		 */
186 		if (next && (next->flags & PG_INACTIVE) == 0)
187 			next = vm_page_queue_inactive.tqh_first;
188 	}
189 
190 	/*
191 	 *	Compute the page shortage.  If we are still very low on memory
192 	 *	be sure that we will move a minimal amount of pages from active
193 	 *	to inactive.
194 	 */
195 
196 	page_shortage = cnt.v_inactive_target - cnt.v_inactive_count;
197 	if (page_shortage <= 0 && pages_freed == 0)
198 		page_shortage = 1;
199 
200 	while (page_shortage > 0) {
201 		/*
202 		 *	Move some more pages from active to inactive.
203 		 */
204 
205 		if ((m = vm_page_queue_active.tqh_first) == NULL)
206 			break;
207 		vm_page_deactivate(m);
208 		page_shortage--;
209 	}
210 
211 	vm_page_unlock_queues();
212 }
213 
214 /*
215  * Called with object and page queues locked.
216  * If reactivate is TRUE, a pager error causes the page to be
217  * put back on the active queue, ow it is left on the inactive queue.
218  */
219 void
220 vm_pageout_page(m, object)
221 	vm_page_t m;
222 	vm_object_t object;
223 {
224 	vm_pager_t pager;
225 	int pageout_status;
226 
227 	/*
228 	 * We set the busy bit to cause potential page faults on
229 	 * this page to block.
230 	 *
231 	 * We also set pageout-in-progress to keep the object from
232 	 * disappearing during pageout.  This guarantees that the
233 	 * page won't move from the inactive queue.  (However, any
234 	 * other page on the inactive queue may move!)
235 	 */
236 	pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_NONE);
237 	m->flags |= PG_BUSY;
238 
239 	/*
240 	 * Try to collapse the object before making a pager for it.
241 	 * We must unlock the page queues first.
242 	 */
243 	vm_page_unlock_queues();
244 	if (object->pager == NULL)
245 		vm_object_collapse(object);
246 
247 	object->paging_in_progress++;
248 	vm_object_unlock(object);
249 
250 	/*
251 	 * Do a wakeup here in case the following operations block.
252 	 */
253 	thread_wakeup(&cnt.v_free_count);
254 
255 	/*
256 	 * If there is no pager for the page, use the default pager.
257 	 * If there is no place to put the page at the moment,
258 	 * leave it in the laundry and hope that there will be
259 	 * paging space later.
260 	 */
261 	if ((pager = object->pager) == NULL) {
262 		pager = vm_pager_allocate(PG_DFLT, (caddr_t)0, object->size,
263 					  VM_PROT_ALL, (vm_offset_t)0);
264 		if (pager != NULL)
265 			vm_object_setpager(object, pager, 0, FALSE);
266 	}
267 	pageout_status = pager ? vm_pager_put(pager, m, FALSE) : VM_PAGER_FAIL;
268 	vm_object_lock(object);
269 	vm_page_lock_queues();
270 
271 	switch (pageout_status) {
272 	case VM_PAGER_OK:
273 	case VM_PAGER_PEND:
274 		cnt.v_pgpgout++;
275 		m->flags &= ~PG_LAUNDRY;
276 		break;
277 	case VM_PAGER_BAD:
278 		/*
279 		 * Page outside of range of object.  Right now we
280 		 * essentially lose the changes by pretending it
281 		 * worked.
282 		 *
283 		 * XXX dubious, what should we do?
284 		 */
285 		m->flags &= ~PG_LAUNDRY;
286 		m->flags |= PG_CLEAN;
287 		pmap_clear_modify(VM_PAGE_TO_PHYS(m));
288 		break;
289 	case VM_PAGER_AGAIN:
290 	{
291 		extern int lbolt;
292 
293 		/*
294 		 * FAIL on a write is interpreted to mean a resource
295 		 * shortage, so we put pause for awhile and try again.
296 		 * XXX could get stuck here.
297 		 */
298 		(void) tsleep((caddr_t)&lbolt, PZERO|PCATCH, "pageout", 0);
299 		break;
300 	}
301 	case VM_PAGER_FAIL:
302 	case VM_PAGER_ERROR:
303 		/*
304 		 * If page couldn't be paged out, then reactivate
305 		 * the page so it doesn't clog the inactive list.
306 		 * (We will try paging out it again later).
307 		 */
308 		vm_page_activate(m);
309 		cnt.v_reactivated++;
310 		break;
311 	}
312 
313 	pmap_clear_reference(VM_PAGE_TO_PHYS(m));
314 
315 	/*
316 	 * If the operation is still going, leave the page busy
317 	 * to block all other accesses.  Also, leave the paging
318 	 * in progress indicator set so that we don't attempt an
319 	 * object collapse.
320 	 */
321 	if (pageout_status != VM_PAGER_PEND) {
322 		m->flags &= ~PG_BUSY;
323 		PAGE_WAKEUP(m);
324 		object->paging_in_progress--;
325 	}
326 }
327 
328 #ifdef CLUSTERED_PAGEOUT
329 #define PAGEOUTABLE(p) \
330 	((((p)->flags & (PG_INACTIVE|PG_CLEAN|PG_LAUNDRY)) == \
331 	  (PG_INACTIVE|PG_LAUNDRY)) && !pmap_is_referenced(VM_PAGE_TO_PHYS(p)))
332 
333 /*
334  * Attempt to pageout as many contiguous (to ``m'') dirty pages as possible
335  * from ``object''.  Using information returned from the pager, we assemble
336  * a sorted list of contiguous dirty pages and feed them to the pager in one
337  * chunk.  Called with paging queues and object locked.  Also, object must
338  * already have a pager.
339  */
340 void
341 vm_pageout_cluster(m, object)
342 	vm_page_t m;
343 	vm_object_t object;
344 {
345 	vm_offset_t offset, loff, hoff;
346 	vm_page_t plist[MAXPOCLUSTER], *plistp, p;
347 	int postatus, ix, count;
348 
349 	/*
350 	 * Determine the range of pages that can be part of a cluster
351 	 * for this object/offset.  If it is only our single page, just
352 	 * do it normally.
353 	 */
354 	vm_pager_cluster(object->pager, m->offset, &loff, &hoff);
355 	if (hoff - loff == PAGE_SIZE) {
356 		vm_pageout_page(m, object);
357 		return;
358 	}
359 
360 	plistp = plist;
361 
362 	/*
363 	 * Target page is always part of the cluster.
364 	 */
365 	pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_NONE);
366 	m->flags |= PG_BUSY;
367 	plistp[atop(m->offset - loff)] = m;
368 	count = 1;
369 
370 	/*
371 	 * Backup from the given page til we find one not fulfilling
372 	 * the pageout criteria or we hit the lower bound for the
373 	 * cluster.  For each page determined to be part of the
374 	 * cluster, unmap it and busy it out so it won't change.
375 	 */
376 	ix = atop(m->offset - loff);
377 	offset = m->offset;
378 	while (offset > loff && count < MAXPOCLUSTER-1) {
379 		p = vm_page_lookup(object, offset - PAGE_SIZE);
380 		if (p == NULL || !PAGEOUTABLE(p))
381 			break;
382 		pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_NONE);
383 		p->flags |= PG_BUSY;
384 		plistp[--ix] = p;
385 		offset -= PAGE_SIZE;
386 		count++;
387 	}
388 	plistp += atop(offset - loff);
389 	loff = offset;
390 
391 	/*
392 	 * Now do the same moving forward from the target.
393 	 */
394 	ix = atop(m->offset - loff) + 1;
395 	offset = m->offset + PAGE_SIZE;
396 	while (offset < hoff && count < MAXPOCLUSTER) {
397 		p = vm_page_lookup(object, offset);
398 		if (p == NULL || !PAGEOUTABLE(p))
399 			break;
400 		pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_NONE);
401 		p->flags |= PG_BUSY;
402 		plistp[ix++] = p;
403 		offset += PAGE_SIZE;
404 		count++;
405 	}
406 	hoff = offset;
407 
408 	/*
409 	 * Pageout the page.
410 	 * Unlock everything and do a wakeup prior to the pager call
411 	 * in case it blocks.
412 	 */
413 	vm_page_unlock_queues();
414 	object->paging_in_progress++;
415 	vm_object_unlock(object);
416 again:
417 	thread_wakeup(&cnt.v_free_count);
418 	postatus = vm_pager_put_pages(object->pager, plistp, count, FALSE);
419 	/*
420 	 * XXX rethink this
421 	 */
422 	if (postatus == VM_PAGER_AGAIN) {
423 		extern int lbolt;
424 
425 		(void) tsleep((caddr_t)&lbolt, PZERO|PCATCH, "pageout", 0);
426 		goto again;
427 	} else if (postatus == VM_PAGER_BAD)
428 		panic("vm_pageout_cluster: VM_PAGER_BAD");
429 	vm_object_lock(object);
430 	vm_page_lock_queues();
431 
432 	/*
433 	 * Loop through the affected pages, reflecting the outcome of
434 	 * the operation.
435 	 */
436 	for (ix = 0; ix < count; ix++) {
437 		p = *plistp++;
438 		switch (postatus) {
439 		case VM_PAGER_OK:
440 		case VM_PAGER_PEND:
441 			cnt.v_pgpgout++;
442 			p->flags &= ~PG_LAUNDRY;
443 			break;
444 		case VM_PAGER_FAIL:
445 		case VM_PAGER_ERROR:
446 			/*
447 			 * Pageout failed, reactivate the target page so it
448 			 * doesn't clog the inactive list.  Other pages are
449 			 * left as they are.
450 			 */
451 			if (p == m) {
452 				vm_page_activate(p);
453 				cnt.v_reactivated++;
454 			}
455 			break;
456 		}
457 		pmap_clear_reference(VM_PAGE_TO_PHYS(p));
458 		/*
459 		 * If the operation is still going, leave the page busy
460 		 * to block all other accesses.
461 		 */
462 		if (postatus != VM_PAGER_PEND) {
463 			p->flags &= ~PG_BUSY;
464 			PAGE_WAKEUP(p);
465 
466 		}
467 	}
468 	/*
469 	 * If the operation is still going, leave the paging in progress
470 	 * indicator set so that we don't attempt an object collapse.
471 	 */
472 	if (postatus != VM_PAGER_PEND)
473 		object->paging_in_progress--;
474 
475 }
476 #endif
477 
478 /*
479  *	vm_pageout is the high level pageout daemon.
480  */
481 
482 void
483 vm_pageout()
484 {
485 	(void) spl0();
486 
487 	/*
488 	 *	Initialize some paging parameters.
489 	 */
490 
491 	if (cnt.v_free_min == 0) {
492 		cnt.v_free_min = VM_PAGE_FREE_MIN;
493 		vm_page_free_min_min /= cnt.v_page_size;
494 		vm_page_free_min_max /= cnt.v_page_size;
495 		if (cnt.v_free_min < vm_page_free_min_min)
496 			cnt.v_free_min = vm_page_free_min_min;
497 		if (cnt.v_free_min > vm_page_free_min_max)
498 			cnt.v_free_min = vm_page_free_min_max;
499 	}
500 
501 	if (cnt.v_free_target == 0)
502 		cnt.v_free_target = VM_PAGE_FREE_TARGET;
503 
504 	if (cnt.v_free_target <= cnt.v_free_min)
505 		cnt.v_free_target = cnt.v_free_min + 1;
506 
507 	/* XXX does not really belong here */
508 	if (vm_page_max_wired == 0)
509 		vm_page_max_wired = cnt.v_free_count / 3;
510 
511 	/*
512 	 *	The pageout daemon is never done, so loop
513 	 *	forever.
514 	 */
515 
516 	simple_lock(&vm_pages_needed_lock);
517 	while (TRUE) {
518 		thread_sleep(&vm_pages_needed, &vm_pages_needed_lock, FALSE);
519 		/*
520 		 * Compute the inactive target for this scan.
521 		 * We need to keep a reasonable amount of memory in the
522 		 * inactive list to better simulate LRU behavior.
523 		 */
524 		cnt.v_inactive_target =
525 			(cnt.v_active_count + cnt.v_inactive_count) / 3;
526 		if (cnt.v_inactive_target <= cnt.v_free_target)
527 			cnt.v_inactive_target = cnt.v_free_target + 1;
528 
529 		/*
530 		 * Only make a scan if we are likely to do something.
531 		 * Otherwise we might have been awakened by a pager
532 		 * to clean up async pageouts.
533 		 */
534 		if (cnt.v_free_count < cnt.v_free_target ||
535 		    cnt.v_inactive_count < cnt.v_inactive_target)
536 			vm_pageout_scan();
537 		vm_pager_sync();
538 		simple_lock(&vm_pages_needed_lock);
539 		thread_wakeup(&cnt.v_free_count);
540 	}
541 }
542