xref: /original-bsd/sys/vm/vm_object.c (revision 3b6250d9)
1 /*
2  * Copyright (c) 1991 Regents of the University of California.
3  * All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * The Mach Operating System project at Carnegie-Mellon University.
7  *
8  * %sccs.include.redist.c%
9  *
10  *	@(#)vm_object.c	7.11 (Berkeley) 05/04/92
11  *
12  *
13  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
14  * All rights reserved.
15  *
16  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
17  *
18  * Permission to use, copy, modify and distribute this software and
19  * its documentation is hereby granted, provided that both the copyright
20  * notice and this permission notice appear in all copies of the
21  * software, derivative works or modified versions, and any portions
22  * thereof, and that both notices appear in supporting documentation.
23  *
24  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
25  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
26  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
27  *
28  * Carnegie Mellon requests users of this software to return to
29  *
30  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
31  *  School of Computer Science
32  *  Carnegie Mellon University
33  *  Pittsburgh PA 15213-3890
34  *
35  * any improvements or extensions that they make and grant Carnegie the
36  * rights to redistribute these changes.
37  */
38 
39 /*
40  *	Virtual memory object module.
41  */
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/malloc.h>
46 
47 #include <vm/vm.h>
48 #include <vm/vm_page.h>
49 
50 /*
51  *	Virtual memory objects maintain the actual data
52  *	associated with allocated virtual memory.  A given
53  *	page of memory exists within exactly one object.
54  *
55  *	An object is only deallocated when all "references"
56  *	are given up.  Only one "reference" to a given
57  *	region of an object should be writeable.
58  *
59  *	Associated with each object is a list of all resident
60  *	memory pages belonging to that object; this list is
61  *	maintained by the "vm_page" module, and locked by the object's
62  *	lock.
63  *
64  *	Each object also records a "pager" routine which is
65  *	used to retrieve (and store) pages to the proper backing
66  *	storage.  In addition, objects may be backed by other
67  *	objects from which they were virtual-copied.
68  *
69  *	The only items within the object structure which are
70  *	modified after time of creation are:
71  *		reference count		locked by object's lock
72  *		pager routine		locked by object's lock
73  *
74  */
75 
76 struct vm_object	kernel_object_store;
77 struct vm_object	kmem_object_store;
78 
79 #define	VM_OBJECT_HASH_COUNT	157
80 
81 int		vm_cache_max = 100;	/* can patch if necessary */
82 queue_head_t	vm_object_hashtable[VM_OBJECT_HASH_COUNT];
83 
84 long	object_collapses = 0;
85 long	object_bypasses  = 0;
86 
87 static void _vm_object_allocate __P((vm_size_t, vm_object_t));
88 
89 /*
90  *	vm_object_init:
91  *
92  *	Initialize the VM objects module.
93  */
94 void vm_object_init(size)
95 	vm_size_t	size;
96 {
97 	register int	i;
98 
99 	queue_init(&vm_object_cached_list);
100 	queue_init(&vm_object_list);
101 	vm_object_count = 0;
102 	simple_lock_init(&vm_cache_lock);
103 	simple_lock_init(&vm_object_list_lock);
104 
105 	for (i = 0; i < VM_OBJECT_HASH_COUNT; i++)
106 		queue_init(&vm_object_hashtable[i]);
107 
108 	kernel_object = &kernel_object_store;
109 	_vm_object_allocate(size, kernel_object);
110 
111 	kmem_object = &kmem_object_store;
112 	_vm_object_allocate(VM_KMEM_SIZE + VM_MBUF_SIZE, kmem_object);
113 }
114 
115 /*
116  *	vm_object_allocate:
117  *
118  *	Returns a new object with the given size.
119  */
120 
121 vm_object_t vm_object_allocate(size)
122 	vm_size_t	size;
123 {
124 	register vm_object_t	result;
125 
126 	result = (vm_object_t)
127 		malloc((u_long)sizeof *result, M_VMOBJ, M_WAITOK);
128 
129 	_vm_object_allocate(size, result);
130 
131 	return(result);
132 }
133 
134 static void
135 _vm_object_allocate(size, object)
136 	vm_size_t		size;
137 	register vm_object_t	object;
138 {
139 	queue_init(&object->memq);
140 	vm_object_lock_init(object);
141 	object->ref_count = 1;
142 	object->resident_page_count = 0;
143 	object->size = size;
144 	object->flags = OBJ_INTERNAL;	/* vm_allocate_with_pager will reset */
145 	object->paging_in_progress = 0;
146 	object->copy = NULL;
147 
148 	/*
149 	 *	Object starts out read-write, with no pager.
150 	 */
151 
152 	object->pager = NULL;
153 	object->paging_offset = 0;
154 	object->shadow = NULL;
155 	object->shadow_offset = (vm_offset_t) 0;
156 
157 	simple_lock(&vm_object_list_lock);
158 	queue_enter(&vm_object_list, object, vm_object_t, object_list);
159 	vm_object_count++;
160 	simple_unlock(&vm_object_list_lock);
161 }
162 
163 /*
164  *	vm_object_reference:
165  *
166  *	Gets another reference to the given object.
167  */
168 void vm_object_reference(object)
169 	register vm_object_t	object;
170 {
171 	if (object == NULL)
172 		return;
173 
174 	vm_object_lock(object);
175 	object->ref_count++;
176 	vm_object_unlock(object);
177 }
178 
179 /*
180  *	vm_object_deallocate:
181  *
182  *	Release a reference to the specified object,
183  *	gained either through a vm_object_allocate
184  *	or a vm_object_reference call.  When all references
185  *	are gone, storage associated with this object
186  *	may be relinquished.
187  *
188  *	No object may be locked.
189  */
190 void vm_object_deallocate(object)
191 	register vm_object_t	object;
192 {
193 	vm_object_t	temp;
194 
195 	while (object != NULL) {
196 
197 		/*
198 		 *	The cache holds a reference (uncounted) to
199 		 *	the object; we must lock it before removing
200 		 *	the object.
201 		 */
202 
203 		vm_object_cache_lock();
204 
205 		/*
206 		 *	Lose the reference
207 		 */
208 		vm_object_lock(object);
209 		if (--(object->ref_count) != 0) {
210 
211 			/*
212 			 *	If there are still references, then
213 			 *	we are done.
214 			 */
215 			vm_object_unlock(object);
216 			vm_object_cache_unlock();
217 			return;
218 		}
219 
220 		/*
221 		 *	See if this object can persist.  If so, enter
222 		 *	it in the cache, then deactivate all of its
223 		 *	pages.
224 		 */
225 
226 		if (object->flags & OBJ_CANPERSIST) {
227 
228 			queue_enter(&vm_object_cached_list, object,
229 				vm_object_t, cached_list);
230 			vm_object_cached++;
231 			vm_object_cache_unlock();
232 
233 			vm_object_deactivate_pages(object);
234 			vm_object_unlock(object);
235 
236 			vm_object_cache_trim();
237 			return;
238 		}
239 
240 		/*
241 		 *	Make sure no one can look us up now.
242 		 */
243 		vm_object_remove(object->pager);
244 		vm_object_cache_unlock();
245 
246 		temp = object->shadow;
247 		vm_object_terminate(object);
248 			/* unlocks and deallocates object */
249 		object = temp;
250 	}
251 }
252 
253 
254 /*
255  *	vm_object_terminate actually destroys the specified object, freeing
256  *	up all previously used resources.
257  *
258  *	The object must be locked.
259  */
260 void vm_object_terminate(object)
261 	register vm_object_t	object;
262 {
263 	register vm_page_t	p;
264 	vm_object_t		shadow_object;
265 
266 	/*
267 	 *	Detach the object from its shadow if we are the shadow's
268 	 *	copy.
269 	 */
270 	if ((shadow_object = object->shadow) != NULL) {
271 		vm_object_lock(shadow_object);
272 		if (shadow_object->copy == object)
273 			shadow_object->copy = NULL;
274 #if 0
275 		else if (shadow_object->copy != NULL)
276 			panic("vm_object_terminate: copy/shadow inconsistency");
277 #endif
278 		vm_object_unlock(shadow_object);
279 	}
280 
281 	/*
282 	 * Wait until the pageout daemon is through with the object.
283 	 */
284 	while (object->paging_in_progress) {
285 		vm_object_sleep((int)object, object, FALSE);
286 		vm_object_lock(object);
287 	}
288 
289 	/*
290 	 * If not an internal object clean all the pages, removing them
291 	 * from paging queues as we go.
292 	 */
293 	if ((object->flags & OBJ_INTERNAL) == 0) {
294 		vm_object_page_clean(object, 0, 0, TRUE);
295 		vm_object_unlock(object);
296 	}
297 
298 	/*
299 	 * Now free the pages.
300 	 * For internal objects, this also removes them from paging queues.
301 	 */
302 	while (!queue_empty(&object->memq)) {
303 		p = (vm_page_t) queue_first(&object->memq);
304 		VM_PAGE_CHECK(p);
305 		vm_page_lock_queues();
306 		vm_page_free(p);
307 		vm_page_unlock_queues();
308 	}
309 	if ((object->flags & OBJ_INTERNAL) == 0)
310 		vm_object_unlock(object);
311 
312 	/*
313 	 * Let the pager know object is dead.
314 	 */
315 	if (object->pager != NULL)
316 		vm_pager_deallocate(object->pager);
317 
318 	simple_lock(&vm_object_list_lock);
319 	queue_remove(&vm_object_list, object, vm_object_t, object_list);
320 	vm_object_count--;
321 	simple_unlock(&vm_object_list_lock);
322 
323 	/*
324 	 * Free the space for the object.
325 	 */
326 	free((caddr_t)object, M_VMOBJ);
327 }
328 
329 /*
330  *	vm_object_page_clean
331  *
332  *	Clean all dirty pages in the specified range of object.
333  *	If de_queue is TRUE, pages are removed from any paging queue
334  *	they were on, otherwise they are left on whatever queue they
335  *	were on before the cleaning operation began.
336  *
337  *	Odd semantics: if start == end, we clean everything.
338  *
339  *	The object must be locked.
340  */
341 void
342 vm_object_page_clean(object, start, end, de_queue)
343 	register vm_object_t	object;
344 	register vm_offset_t	start;
345 	register vm_offset_t	end;
346 	boolean_t		de_queue;
347 {
348 	register vm_page_t	p;
349 	int onqueue;
350 
351 	if (object->pager == NULL)
352 		return;
353 
354 again:
355 	/*
356 	 * Wait until the pageout daemon is through with the object.
357 	 */
358 	while (object->paging_in_progress) {
359 		vm_object_sleep((int)object, object, FALSE);
360 		vm_object_lock(object);
361 	}
362 	/*
363 	 * Loop through the object page list cleaning as necessary.
364 	 */
365 	p = (vm_page_t) queue_first(&object->memq);
366 	while (!queue_end(&object->memq, (queue_entry_t) p)) {
367 		if (start == end ||
368 		    p->offset >= start && p->offset < end) {
369 			if (p->clean && pmap_is_modified(VM_PAGE_TO_PHYS(p)))
370 				p->clean = FALSE;
371 			/*
372 			 * Remove the page from any paging queue.
373 			 * This needs to be done if either we have been
374 			 * explicitly asked to do so or it is about to
375 			 * be cleaned (see comment below).
376 			 */
377 			if (de_queue || !p->clean) {
378 				vm_page_lock_queues();
379 				if (p->active) {
380 					queue_remove(&vm_page_queue_active,
381 						     p, vm_page_t, pageq);
382 					p->active = FALSE;
383 					cnt.v_active_count--;
384 					onqueue = 1;
385 				} else if (p->inactive) {
386 					queue_remove(&vm_page_queue_inactive,
387 						     p, vm_page_t, pageq);
388 					p->inactive = FALSE;
389 					cnt.v_inactive_count--;
390 					onqueue = -1;
391 				} else
392 					onqueue = 0;
393 				vm_page_unlock_queues();
394 			}
395 			/*
396 			 * To ensure the state of the page doesn't change
397 			 * during the clean operation we do two things.
398 			 * First we set the busy bit and invalidate all
399 			 * mappings to ensure that thread accesses to the
400 			 * page block (in vm_fault).  Second, we remove
401 			 * the page from any paging queue to foil the
402 			 * pageout daemon (vm_pageout_scan).
403 			 */
404 			pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_NONE);
405 			if (!p->clean) {
406 				p->busy = TRUE;
407 				object->paging_in_progress++;
408 				vm_object_unlock(object);
409 				(void) vm_pager_put(object->pager, p, TRUE);
410 				vm_object_lock(object);
411 				object->paging_in_progress--;
412 				if (!de_queue && onqueue) {
413 					vm_page_lock_queues();
414 					if (onqueue > 0)
415 						vm_page_activate(p);
416 					else
417 						vm_page_deactivate(p);
418 					vm_page_unlock_queues();
419 				}
420 				p->busy = FALSE;
421 				PAGE_WAKEUP(p);
422 				goto again;
423 			}
424 		}
425 		p = (vm_page_t) queue_next(&p->listq);
426 	}
427 }
428 
429 /*
430  *	vm_object_deactivate_pages
431  *
432  *	Deactivate all pages in the specified object.  (Keep its pages
433  *	in memory even though it is no longer referenced.)
434  *
435  *	The object must be locked.
436  */
437 void
438 vm_object_deactivate_pages(object)
439 	register vm_object_t	object;
440 {
441 	register vm_page_t	p, next;
442 
443 	p = (vm_page_t) queue_first(&object->memq);
444 	while (!queue_end(&object->memq, (queue_entry_t) p)) {
445 		next = (vm_page_t) queue_next(&p->listq);
446 		vm_page_lock_queues();
447 		vm_page_deactivate(p);
448 		vm_page_unlock_queues();
449 		p = next;
450 	}
451 }
452 
453 /*
454  *	Trim the object cache to size.
455  */
456 void
457 vm_object_cache_trim()
458 {
459 	register vm_object_t	object;
460 
461 	vm_object_cache_lock();
462 	while (vm_object_cached > vm_cache_max) {
463 		object = (vm_object_t) queue_first(&vm_object_cached_list);
464 		vm_object_cache_unlock();
465 
466 		if (object != vm_object_lookup(object->pager))
467 			panic("vm_object_deactivate: I'm sooo confused.");
468 
469 		pager_cache(object, FALSE);
470 
471 		vm_object_cache_lock();
472 	}
473 	vm_object_cache_unlock();
474 }
475 
476 
477 /*
478  *	vm_object_shutdown()
479  *
480  *	Shut down the object system.  Unfortunately, while we
481  *	may be trying to do this, init is happily waiting for
482  *	processes to exit, and therefore will be causing some objects
483  *	to be deallocated.  To handle this, we gain a fake reference
484  *	to all objects we release paging areas for.  This will prevent
485  *	a duplicate deallocation.  This routine is probably full of
486  *	race conditions!
487  */
488 
489 void vm_object_shutdown()
490 {
491 	register vm_object_t	object;
492 
493 	/*
494 	 *	Clean up the object cache *before* we screw up the reference
495 	 *	counts on all of the objects.
496 	 */
497 
498 	vm_object_cache_clear();
499 
500 	printf("free paging spaces: ");
501 
502 	/*
503 	 *	First we gain a reference to each object so that
504 	 *	no one else will deallocate them.
505 	 */
506 
507 	simple_lock(&vm_object_list_lock);
508 	object = (vm_object_t) queue_first(&vm_object_list);
509 	while (!queue_end(&vm_object_list, (queue_entry_t) object)) {
510 		vm_object_reference(object);
511 		object = (vm_object_t) queue_next(&object->object_list);
512 	}
513 	simple_unlock(&vm_object_list_lock);
514 
515 	/*
516 	 *	Now we deallocate all the paging areas.  We don't need
517 	 *	to lock anything because we've reduced to a single
518 	 *	processor while shutting down.	This also assumes that
519 	 *	no new objects are being created.
520 	 */
521 
522 	object = (vm_object_t) queue_first(&vm_object_list);
523 	while (!queue_end(&vm_object_list, (queue_entry_t) object)) {
524 		if (object->pager != NULL)
525 			vm_pager_deallocate(object->pager);
526 		object = (vm_object_t) queue_next(&object->object_list);
527 		printf(".");
528 	}
529 	printf("done.\n");
530 }
531 
532 /*
533  *	vm_object_pmap_copy:
534  *
535  *	Makes all physical pages in the specified
536  *	object range copy-on-write.  No writeable
537  *	references to these pages should remain.
538  *
539  *	The object must *not* be locked.
540  */
541 void vm_object_pmap_copy(object, start, end)
542 	register vm_object_t	object;
543 	register vm_offset_t	start;
544 	register vm_offset_t	end;
545 {
546 	register vm_page_t	p;
547 
548 	if (object == NULL)
549 		return;
550 
551 	vm_object_lock(object);
552 	p = (vm_page_t) queue_first(&object->memq);
553 	while (!queue_end(&object->memq, (queue_entry_t) p)) {
554 		if ((start <= p->offset) && (p->offset < end)) {
555 			pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_READ);
556 			p->copy_on_write = TRUE;
557 		}
558 		p = (vm_page_t) queue_next(&p->listq);
559 	}
560 	vm_object_unlock(object);
561 }
562 
563 /*
564  *	vm_object_pmap_remove:
565  *
566  *	Removes all physical pages in the specified
567  *	object range from all physical maps.
568  *
569  *	The object must *not* be locked.
570  */
571 void vm_object_pmap_remove(object, start, end)
572 	register vm_object_t	object;
573 	register vm_offset_t	start;
574 	register vm_offset_t	end;
575 {
576 	register vm_page_t	p;
577 
578 	if (object == NULL)
579 		return;
580 
581 	vm_object_lock(object);
582 	p = (vm_page_t) queue_first(&object->memq);
583 	while (!queue_end(&object->memq, (queue_entry_t) p)) {
584 		if ((start <= p->offset) && (p->offset < end))
585 			pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_NONE);
586 		p = (vm_page_t) queue_next(&p->listq);
587 	}
588 	vm_object_unlock(object);
589 }
590 
591 /*
592  *	vm_object_copy:
593  *
594  *	Create a new object which is a copy of an existing
595  *	object, and mark all of the pages in the existing
596  *	object 'copy-on-write'.  The new object has one reference.
597  *	Returns the new object.
598  *
599  *	May defer the copy until later if the object is not backed
600  *	up by a non-default pager.
601  */
602 void vm_object_copy(src_object, src_offset, size,
603 		    dst_object, dst_offset, src_needs_copy)
604 	register vm_object_t	src_object;
605 	vm_offset_t		src_offset;
606 	vm_size_t		size;
607 	vm_object_t		*dst_object;	/* OUT */
608 	vm_offset_t		*dst_offset;	/* OUT */
609 	boolean_t		*src_needs_copy;	/* OUT */
610 {
611 	register vm_object_t	new_copy;
612 	register vm_object_t	old_copy;
613 	vm_offset_t		new_start, new_end;
614 
615 	register vm_page_t	p;
616 
617 	if (src_object == NULL) {
618 		/*
619 		 *	Nothing to copy
620 		 */
621 		*dst_object = NULL;
622 		*dst_offset = 0;
623 		*src_needs_copy = FALSE;
624 		return;
625 	}
626 
627 	/*
628 	 *	If the object's pager is null_pager or the
629 	 *	default pager, we don't have to make a copy
630 	 *	of it.  Instead, we set the needs copy flag and
631 	 *	make a shadow later.
632 	 */
633 
634 	vm_object_lock(src_object);
635 	if (src_object->pager == NULL ||
636 	    (src_object->flags & OBJ_INTERNAL)) {
637 
638 		/*
639 		 *	Make another reference to the object
640 		 */
641 		src_object->ref_count++;
642 
643 		/*
644 		 *	Mark all of the pages copy-on-write.
645 		 */
646 		for (p = (vm_page_t) queue_first(&src_object->memq);
647 		     !queue_end(&src_object->memq, (queue_entry_t)p);
648 		     p = (vm_page_t) queue_next(&p->listq)) {
649 			if (src_offset <= p->offset &&
650 			    p->offset < src_offset + size)
651 				p->copy_on_write = TRUE;
652 		}
653 		vm_object_unlock(src_object);
654 
655 		*dst_object = src_object;
656 		*dst_offset = src_offset;
657 
658 		/*
659 		 *	Must make a shadow when write is desired
660 		 */
661 		*src_needs_copy = TRUE;
662 		return;
663 	}
664 
665 	/*
666 	 *	Try to collapse the object before copying it.
667 	 */
668 	vm_object_collapse(src_object);
669 
670 	/*
671 	 *	If the object has a pager, the pager wants to
672 	 *	see all of the changes.  We need a copy-object
673 	 *	for the changed pages.
674 	 *
675 	 *	If there is a copy-object, and it is empty,
676 	 *	no changes have been made to the object since the
677 	 *	copy-object was made.  We can use the same copy-
678 	 *	object.
679 	 */
680 
681     Retry1:
682 	old_copy = src_object->copy;
683 	if (old_copy != NULL) {
684 		/*
685 		 *	Try to get the locks (out of order)
686 		 */
687 		if (!vm_object_lock_try(old_copy)) {
688 			vm_object_unlock(src_object);
689 
690 			/* should spin a bit here... */
691 			vm_object_lock(src_object);
692 			goto Retry1;
693 		}
694 
695 		if (old_copy->resident_page_count == 0 &&
696 		    old_copy->pager == NULL) {
697 			/*
698 			 *	Return another reference to
699 			 *	the existing copy-object.
700 			 */
701 			old_copy->ref_count++;
702 			vm_object_unlock(old_copy);
703 			vm_object_unlock(src_object);
704 			*dst_object = old_copy;
705 			*dst_offset = src_offset;
706 			*src_needs_copy = FALSE;
707 			return;
708 		}
709 		vm_object_unlock(old_copy);
710 	}
711 	vm_object_unlock(src_object);
712 
713 	/*
714 	 *	If the object has a pager, the pager wants
715 	 *	to see all of the changes.  We must make
716 	 *	a copy-object and put the changed pages there.
717 	 *
718 	 *	The copy-object is always made large enough to
719 	 *	completely shadow the original object, since
720 	 *	it may have several users who want to shadow
721 	 *	the original object at different points.
722 	 */
723 
724 	new_copy = vm_object_allocate(src_object->size);
725 
726     Retry2:
727 	vm_object_lock(src_object);
728 	/*
729 	 *	Copy object may have changed while we were unlocked
730 	 */
731 	old_copy = src_object->copy;
732 	if (old_copy != NULL) {
733 		/*
734 		 *	Try to get the locks (out of order)
735 		 */
736 		if (!vm_object_lock_try(old_copy)) {
737 			vm_object_unlock(src_object);
738 			goto Retry2;
739 		}
740 
741 		/*
742 		 *	Consistency check
743 		 */
744 		if (old_copy->shadow != src_object ||
745 		    old_copy->shadow_offset != (vm_offset_t) 0)
746 			panic("vm_object_copy: copy/shadow inconsistency");
747 
748 		/*
749 		 *	Make the old copy-object shadow the new one.
750 		 *	It will receive no more pages from the original
751 		 *	object.
752 		 */
753 
754 		src_object->ref_count--;	/* remove ref. from old_copy */
755 		old_copy->shadow = new_copy;
756 		new_copy->ref_count++;		/* locking not needed - we
757 						   have the only pointer */
758 		vm_object_unlock(old_copy);	/* done with old_copy */
759 	}
760 
761 	new_start = (vm_offset_t) 0;	/* always shadow original at 0 */
762 	new_end   = (vm_offset_t) new_copy->size; /* for the whole object */
763 
764 	/*
765 	 *	Point the new copy at the existing object.
766 	 */
767 
768 	new_copy->shadow = src_object;
769 	new_copy->shadow_offset = new_start;
770 	src_object->ref_count++;
771 	src_object->copy = new_copy;
772 
773 	/*
774 	 *	Mark all the affected pages of the existing object
775 	 *	copy-on-write.
776 	 */
777 	p = (vm_page_t) queue_first(&src_object->memq);
778 	while (!queue_end(&src_object->memq, (queue_entry_t) p)) {
779 		if ((new_start <= p->offset) && (p->offset < new_end))
780 			p->copy_on_write = TRUE;
781 		p = (vm_page_t) queue_next(&p->listq);
782 	}
783 
784 	vm_object_unlock(src_object);
785 
786 	*dst_object = new_copy;
787 	*dst_offset = src_offset - new_start;
788 	*src_needs_copy = FALSE;
789 }
790 
791 /*
792  *	vm_object_shadow:
793  *
794  *	Create a new object which is backed by the
795  *	specified existing object range.  The source
796  *	object reference is deallocated.
797  *
798  *	The new object and offset into that object
799  *	are returned in the source parameters.
800  */
801 
802 void vm_object_shadow(object, offset, length)
803 	vm_object_t	*object;	/* IN/OUT */
804 	vm_offset_t	*offset;	/* IN/OUT */
805 	vm_size_t	length;
806 {
807 	register vm_object_t	source;
808 	register vm_object_t	result;
809 
810 	source = *object;
811 
812 	/*
813 	 *	Allocate a new object with the given length
814 	 */
815 
816 	if ((result = vm_object_allocate(length)) == NULL)
817 		panic("vm_object_shadow: no object for shadowing");
818 
819 	/*
820 	 *	The new object shadows the source object, adding
821 	 *	a reference to it.  Our caller changes his reference
822 	 *	to point to the new object, removing a reference to
823 	 *	the source object.  Net result: no change of reference
824 	 *	count.
825 	 */
826 	result->shadow = source;
827 
828 	/*
829 	 *	Store the offset into the source object,
830 	 *	and fix up the offset into the new object.
831 	 */
832 
833 	result->shadow_offset = *offset;
834 
835 	/*
836 	 *	Return the new things
837 	 */
838 
839 	*offset = 0;
840 	*object = result;
841 }
842 
843 /*
844  *	Set the specified object's pager to the specified pager.
845  */
846 
847 void vm_object_setpager(object, pager, paging_offset,
848 			read_only)
849 	vm_object_t	object;
850 	vm_pager_t	pager;
851 	vm_offset_t	paging_offset;
852 	boolean_t	read_only;
853 {
854 #ifdef	lint
855 	read_only++;	/* No longer used */
856 #endif	lint
857 
858 	vm_object_lock(object);			/* XXX ? */
859 	object->pager = pager;
860 	object->paging_offset = paging_offset;
861 	vm_object_unlock(object);			/* XXX ? */
862 }
863 
864 /*
865  *	vm_object_hash hashes the pager/id pair.
866  */
867 
868 #define vm_object_hash(pager) \
869 	(((unsigned)pager)%VM_OBJECT_HASH_COUNT)
870 
871 /*
872  *	vm_object_lookup looks in the object cache for an object with the
873  *	specified pager and paging id.
874  */
875 
876 vm_object_t vm_object_lookup(pager)
877 	vm_pager_t	pager;
878 {
879 	register queue_t		bucket;
880 	register vm_object_hash_entry_t	entry;
881 	vm_object_t			object;
882 
883 	bucket = &vm_object_hashtable[vm_object_hash(pager)];
884 
885 	vm_object_cache_lock();
886 
887 	entry = (vm_object_hash_entry_t) queue_first(bucket);
888 	while (!queue_end(bucket, (queue_entry_t) entry)) {
889 		object = entry->object;
890 		if (object->pager == pager) {
891 			vm_object_lock(object);
892 			if (object->ref_count == 0) {
893 				queue_remove(&vm_object_cached_list, object,
894 						vm_object_t, cached_list);
895 				vm_object_cached--;
896 			}
897 			object->ref_count++;
898 			vm_object_unlock(object);
899 			vm_object_cache_unlock();
900 			return(object);
901 		}
902 		entry = (vm_object_hash_entry_t) queue_next(&entry->hash_links);
903 	}
904 
905 	vm_object_cache_unlock();
906 	return(NULL);
907 }
908 
909 /*
910  *	vm_object_enter enters the specified object/pager/id into
911  *	the hash table.
912  */
913 
914 void vm_object_enter(object, pager)
915 	vm_object_t	object;
916 	vm_pager_t	pager;
917 {
918 	register queue_t		bucket;
919 	register vm_object_hash_entry_t	entry;
920 
921 	/*
922 	 *	We don't cache null objects, and we can't cache
923 	 *	objects with the null pager.
924 	 */
925 
926 	if (object == NULL)
927 		return;
928 	if (pager == NULL)
929 		return;
930 
931 	bucket = &vm_object_hashtable[vm_object_hash(pager)];
932 	entry = (vm_object_hash_entry_t)
933 		malloc((u_long)sizeof *entry, M_VMOBJHASH, M_WAITOK);
934 	entry->object = object;
935 	object->flags |= OBJ_CANPERSIST;
936 
937 	vm_object_cache_lock();
938 	queue_enter(bucket, entry, vm_object_hash_entry_t, hash_links);
939 	vm_object_cache_unlock();
940 }
941 
942 /*
943  *	vm_object_remove:
944  *
945  *	Remove the pager from the hash table.
946  *	Note:  This assumes that the object cache
947  *	is locked.  XXX this should be fixed
948  *	by reorganizing vm_object_deallocate.
949  */
950 void
951 vm_object_remove(pager)
952 	register vm_pager_t	pager;
953 {
954 	register queue_t		bucket;
955 	register vm_object_hash_entry_t	entry;
956 	register vm_object_t		object;
957 
958 	bucket = &vm_object_hashtable[vm_object_hash(pager)];
959 
960 	entry = (vm_object_hash_entry_t) queue_first(bucket);
961 	while (!queue_end(bucket, (queue_entry_t) entry)) {
962 		object = entry->object;
963 		if (object->pager == pager) {
964 			queue_remove(bucket, entry, vm_object_hash_entry_t,
965 					hash_links);
966 			free((caddr_t)entry, M_VMOBJHASH);
967 			break;
968 		}
969 		entry = (vm_object_hash_entry_t) queue_next(&entry->hash_links);
970 	}
971 }
972 
973 /*
974  *	vm_object_cache_clear removes all objects from the cache.
975  *
976  */
977 
978 void vm_object_cache_clear()
979 {
980 	register vm_object_t	object;
981 
982 	/*
983 	 *	Remove each object in the cache by scanning down the
984 	 *	list of cached objects.
985 	 */
986 	vm_object_cache_lock();
987 	while (!queue_empty(&vm_object_cached_list)) {
988 		object = (vm_object_t) queue_first(&vm_object_cached_list);
989 		vm_object_cache_unlock();
990 
991 		/*
992 		 * Note: it is important that we use vm_object_lookup
993 		 * to gain a reference, and not vm_object_reference, because
994 		 * the logic for removing an object from the cache lies in
995 		 * lookup.
996 		 */
997 		if (object != vm_object_lookup(object->pager))
998 			panic("vm_object_cache_clear: I'm sooo confused.");
999 		pager_cache(object, FALSE);
1000 
1001 		vm_object_cache_lock();
1002 	}
1003 	vm_object_cache_unlock();
1004 }
1005 
1006 boolean_t	vm_object_collapse_allowed = TRUE;
1007 /*
1008  *	vm_object_collapse:
1009  *
1010  *	Collapse an object with the object backing it.
1011  *	Pages in the backing object are moved into the
1012  *	parent, and the backing object is deallocated.
1013  *
1014  *	Requires that the object be locked and the page
1015  *	queues be unlocked.
1016  *
1017  */
1018 void vm_object_collapse(object)
1019 	register vm_object_t	object;
1020 
1021 {
1022 	register vm_object_t	backing_object;
1023 	register vm_offset_t	backing_offset;
1024 	register vm_size_t	size;
1025 	register vm_offset_t	new_offset;
1026 	register vm_page_t	p, pp;
1027 
1028 	if (!vm_object_collapse_allowed)
1029 		return;
1030 
1031 	while (TRUE) {
1032 		/*
1033 		 *	Verify that the conditions are right for collapse:
1034 		 *
1035 		 *	The object exists and no pages in it are currently
1036 		 *	being paged out (or have ever been paged out).
1037 		 */
1038 		if (object == NULL ||
1039 		    object->paging_in_progress != 0 ||
1040 		    object->pager != NULL)
1041 			return;
1042 
1043 		/*
1044 		 *		There is a backing object, and
1045 		 */
1046 
1047 		if ((backing_object = object->shadow) == NULL)
1048 			return;
1049 
1050 		vm_object_lock(backing_object);
1051 		/*
1052 		 *	...
1053 		 *		The backing object is not read_only,
1054 		 *		and no pages in the backing object are
1055 		 *		currently being paged out.
1056 		 *		The backing object is internal.
1057 		 */
1058 
1059 		if ((backing_object->flags & OBJ_INTERNAL) == 0 ||
1060 		    backing_object->paging_in_progress != 0) {
1061 			vm_object_unlock(backing_object);
1062 			return;
1063 		}
1064 
1065 		/*
1066 		 *	The backing object can't be a copy-object:
1067 		 *	the shadow_offset for the copy-object must stay
1068 		 *	as 0.  Furthermore (for the 'we have all the
1069 		 *	pages' case), if we bypass backing_object and
1070 		 *	just shadow the next object in the chain, old
1071 		 *	pages from that object would then have to be copied
1072 		 *	BOTH into the (former) backing_object and into the
1073 		 *	parent object.
1074 		 */
1075 		if (backing_object->shadow != NULL &&
1076 		    backing_object->shadow->copy != NULL) {
1077 			vm_object_unlock(backing_object);
1078 			return;
1079 		}
1080 
1081 		/*
1082 		 *	We know that we can either collapse the backing
1083 		 *	object (if the parent is the only reference to
1084 		 *	it) or (perhaps) remove the parent's reference
1085 		 *	to it.
1086 		 */
1087 
1088 		backing_offset = object->shadow_offset;
1089 		size = object->size;
1090 
1091 		/*
1092 		 *	If there is exactly one reference to the backing
1093 		 *	object, we can collapse it into the parent.
1094 		 */
1095 
1096 		if (backing_object->ref_count == 1) {
1097 
1098 			/*
1099 			 *	We can collapse the backing object.
1100 			 *
1101 			 *	Move all in-memory pages from backing_object
1102 			 *	to the parent.  Pages that have been paged out
1103 			 *	will be overwritten by any of the parent's
1104 			 *	pages that shadow them.
1105 			 */
1106 
1107 			while (!queue_empty(&backing_object->memq)) {
1108 
1109 				p = (vm_page_t)
1110 					queue_first(&backing_object->memq);
1111 
1112 				new_offset = (p->offset - backing_offset);
1113 
1114 				/*
1115 				 *	If the parent has a page here, or if
1116 				 *	this page falls outside the parent,
1117 				 *	dispose of it.
1118 				 *
1119 				 *	Otherwise, move it as planned.
1120 				 */
1121 
1122 				if (p->offset < backing_offset ||
1123 				    new_offset >= size) {
1124 					vm_page_lock_queues();
1125 					vm_page_free(p);
1126 					vm_page_unlock_queues();
1127 				} else {
1128 				    pp = vm_page_lookup(object, new_offset);
1129 				    if (pp != NULL && !pp->fake) {
1130 					vm_page_lock_queues();
1131 					vm_page_free(p);
1132 					vm_page_unlock_queues();
1133 				    }
1134 				    else {
1135 					if (pp) {
1136 					    /* may be someone waiting for it */
1137 					    PAGE_WAKEUP(pp);
1138 					    vm_page_lock_queues();
1139 					    vm_page_free(pp);
1140 					    vm_page_unlock_queues();
1141 					}
1142 					vm_page_rename(p, object, new_offset);
1143 				    }
1144 				}
1145 			}
1146 
1147 			/*
1148 			 *	Move the pager from backing_object to object.
1149 			 *
1150 			 *	XXX We're only using part of the paging space
1151 			 *	for keeps now... we ought to discard the
1152 			 *	unused portion.
1153 			 */
1154 
1155 			object->pager = backing_object->pager;
1156 			object->paging_offset += backing_offset;
1157 
1158 			backing_object->pager = NULL;
1159 
1160 			/*
1161 			 *	Object now shadows whatever backing_object did.
1162 			 *	Note that the reference to backing_object->shadow
1163 			 *	moves from within backing_object to within object.
1164 			 */
1165 
1166 			object->shadow = backing_object->shadow;
1167 			object->shadow_offset += backing_object->shadow_offset;
1168 			if (object->shadow != NULL &&
1169 			    object->shadow->copy != NULL) {
1170 				panic("vm_object_collapse: we collapsed a copy-object!");
1171 			}
1172 			/*
1173 			 *	Discard backing_object.
1174 			 *
1175 			 *	Since the backing object has no pages, no
1176 			 *	pager left, and no object references within it,
1177 			 *	all that is necessary is to dispose of it.
1178 			 */
1179 
1180 			vm_object_unlock(backing_object);
1181 
1182 			simple_lock(&vm_object_list_lock);
1183 			queue_remove(&vm_object_list, backing_object,
1184 						vm_object_t, object_list);
1185 			vm_object_count--;
1186 			simple_unlock(&vm_object_list_lock);
1187 
1188 			free((caddr_t)backing_object, M_VMOBJ);
1189 
1190 			object_collapses++;
1191 		}
1192 		else {
1193 			/*
1194 			 *	If all of the pages in the backing object are
1195 			 *	shadowed by the parent object, the parent
1196 			 *	object no longer has to shadow the backing
1197 			 *	object; it can shadow the next one in the
1198 			 *	chain.
1199 			 *
1200 			 *	The backing object must not be paged out - we'd
1201 			 *	have to check all of the paged-out pages, as
1202 			 *	well.
1203 			 */
1204 
1205 			if (backing_object->pager != NULL) {
1206 				vm_object_unlock(backing_object);
1207 				return;
1208 			}
1209 
1210 			/*
1211 			 *	Should have a check for a 'small' number
1212 			 *	of pages here.
1213 			 */
1214 
1215 			p = (vm_page_t) queue_first(&backing_object->memq);
1216 			while (!queue_end(&backing_object->memq,
1217 					  (queue_entry_t) p)) {
1218 
1219 				new_offset = (p->offset - backing_offset);
1220 
1221 				/*
1222 				 *	If the parent has a page here, or if
1223 				 *	this page falls outside the parent,
1224 				 *	keep going.
1225 				 *
1226 				 *	Otherwise, the backing_object must be
1227 				 *	left in the chain.
1228 				 */
1229 
1230 				if (p->offset >= backing_offset &&
1231 				    new_offset <= size &&
1232 				    ((pp = vm_page_lookup(object, new_offset))
1233 				      == NULL ||
1234 				     pp->fake)) {
1235 					/*
1236 					 *	Page still needed.
1237 					 *	Can't go any further.
1238 					 */
1239 					vm_object_unlock(backing_object);
1240 					return;
1241 				}
1242 				p = (vm_page_t) queue_next(&p->listq);
1243 			}
1244 
1245 			/*
1246 			 *	Make the parent shadow the next object
1247 			 *	in the chain.  Deallocating backing_object
1248 			 *	will not remove it, since its reference
1249 			 *	count is at least 2.
1250 			 */
1251 
1252 			vm_object_reference(object->shadow = backing_object->shadow);
1253 			object->shadow_offset += backing_object->shadow_offset;
1254 
1255 			/*	Drop the reference count on backing_object.
1256 			 *	Since its ref_count was at least 2, it
1257 			 *	will not vanish; so we don't need to call
1258 			 *	vm_object_deallocate.
1259 			 */
1260 			backing_object->ref_count--;
1261 			vm_object_unlock(backing_object);
1262 
1263 			object_bypasses ++;
1264 
1265 		}
1266 
1267 		/*
1268 		 *	Try again with this object's new backing object.
1269 		 */
1270 	}
1271 }
1272 
1273 /*
1274  *	vm_object_page_remove: [internal]
1275  *
1276  *	Removes all physical pages in the specified
1277  *	object range from the object's list of pages.
1278  *
1279  *	The object must be locked.
1280  */
1281 void vm_object_page_remove(object, start, end)
1282 	register vm_object_t	object;
1283 	register vm_offset_t	start;
1284 	register vm_offset_t	end;
1285 {
1286 	register vm_page_t	p, next;
1287 
1288 	if (object == NULL)
1289 		return;
1290 
1291 	p = (vm_page_t) queue_first(&object->memq);
1292 	while (!queue_end(&object->memq, (queue_entry_t) p)) {
1293 		next = (vm_page_t) queue_next(&p->listq);
1294 		if ((start <= p->offset) && (p->offset < end)) {
1295 			pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_NONE);
1296 			vm_page_lock_queues();
1297 			vm_page_free(p);
1298 			vm_page_unlock_queues();
1299 		}
1300 		p = next;
1301 	}
1302 }
1303 
1304 /*
1305  *	Routine:	vm_object_coalesce
1306  *	Function:	Coalesces two objects backing up adjoining
1307  *			regions of memory into a single object.
1308  *
1309  *	returns TRUE if objects were combined.
1310  *
1311  *	NOTE:	Only works at the moment if the second object is NULL -
1312  *		if it's not, which object do we lock first?
1313  *
1314  *	Parameters:
1315  *		prev_object	First object to coalesce
1316  *		prev_offset	Offset into prev_object
1317  *		next_object	Second object into coalesce
1318  *		next_offset	Offset into next_object
1319  *
1320  *		prev_size	Size of reference to prev_object
1321  *		next_size	Size of reference to next_object
1322  *
1323  *	Conditions:
1324  *	The object must *not* be locked.
1325  */
1326 boolean_t vm_object_coalesce(prev_object, next_object,
1327 			prev_offset, next_offset,
1328 			prev_size, next_size)
1329 
1330 	register vm_object_t	prev_object;
1331 	vm_object_t	next_object;
1332 	vm_offset_t	prev_offset, next_offset;
1333 	vm_size_t	prev_size, next_size;
1334 {
1335 	vm_size_t	newsize;
1336 
1337 #ifdef	lint
1338 	next_offset++;
1339 #endif	lint
1340 
1341 	if (next_object != NULL) {
1342 		return(FALSE);
1343 	}
1344 
1345 	if (prev_object == NULL) {
1346 		return(TRUE);
1347 	}
1348 
1349 	vm_object_lock(prev_object);
1350 
1351 	/*
1352 	 *	Try to collapse the object first
1353 	 */
1354 	vm_object_collapse(prev_object);
1355 
1356 	/*
1357 	 *	Can't coalesce if:
1358 	 *	. more than one reference
1359 	 *	. paged out
1360 	 *	. shadows another object
1361 	 *	. has a copy elsewhere
1362 	 *	(any of which mean that the pages not mapped to
1363 	 *	prev_entry may be in use anyway)
1364 	 */
1365 
1366 	if (prev_object->ref_count > 1 ||
1367 		prev_object->pager != NULL ||
1368 		prev_object->shadow != NULL ||
1369 		prev_object->copy != NULL) {
1370 		vm_object_unlock(prev_object);
1371 		return(FALSE);
1372 	}
1373 
1374 	/*
1375 	 *	Remove any pages that may still be in the object from
1376 	 *	a previous deallocation.
1377 	 */
1378 
1379 	vm_object_page_remove(prev_object,
1380 			prev_offset + prev_size,
1381 			prev_offset + prev_size + next_size);
1382 
1383 	/*
1384 	 *	Extend the object if necessary.
1385 	 */
1386 	newsize = prev_offset + prev_size + next_size;
1387 	if (newsize > prev_object->size)
1388 		prev_object->size = newsize;
1389 
1390 	vm_object_unlock(prev_object);
1391 	return(TRUE);
1392 }
1393 
1394 /*
1395  *	vm_object_print:	[ debug ]
1396  */
1397 void vm_object_print(object, full)
1398 	vm_object_t	object;
1399 	boolean_t	full;
1400 {
1401 	register vm_page_t	p;
1402 	extern indent;
1403 
1404 	register int count;
1405 
1406 	if (object == NULL)
1407 		return;
1408 
1409 	iprintf("Object 0x%x: size=0x%x, res=%d, ref=%d, ",
1410 		(int) object, (int) object->size,
1411 		object->resident_page_count, object->ref_count);
1412 	printf("pager=0x%x+0x%x, shadow=(0x%x)+0x%x\n",
1413 	       (int) object->pager, (int) object->paging_offset,
1414 	       (int) object->shadow, (int) object->shadow_offset);
1415 	printf("cache: next=0x%x, prev=0x%x\n",
1416 	       object->cached_list.next, object->cached_list.prev);
1417 
1418 	if (!full)
1419 		return;
1420 
1421 	indent += 2;
1422 	count = 0;
1423 	p = (vm_page_t) queue_first(&object->memq);
1424 	while (!queue_end(&object->memq, (queue_entry_t) p)) {
1425 		if (count == 0)
1426 			iprintf("memory:=");
1427 		else if (count == 6) {
1428 			printf("\n");
1429 			iprintf(" ...");
1430 			count = 0;
1431 		} else
1432 			printf(",");
1433 		count++;
1434 
1435 		printf("(off=0x%x,page=0x%x)", p->offset, VM_PAGE_TO_PHYS(p));
1436 		p = (vm_page_t) queue_next(&p->listq);
1437 	}
1438 	if (count != 0)
1439 		printf("\n");
1440 	indent -= 2;
1441 }
1442