xref: /original-bsd/sys/vm/vm_object.c (revision b7cc7b86)
1 /*
2  * Copyright (c) 1985, Avadis Tevanian, Jr., Michael Wayne Young
3  * Copyright (c) 1987 Carnegie-Mellon University
4  * Copyright (c) 1991 Regents of the University of California.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * The Mach Operating System project at Carnegie-Mellon University.
9  *
10  * The CMU software License Agreement specifies the terms and conditions
11  * for use and redistribution.
12  *
13  *	@(#)vm_object.c	7.1 (Berkeley) 12/05/90
14  */
15 
16 /*
17  *	Virtual memory object module.
18  */
19 
20 #include "param.h"
21 #include "malloc.h"
22 #include "../vm/vm_param.h"
23 #include "lock.h"
24 #include "../vm/vm_page.h"
25 #include "../vm/vm_map.h"
26 #include "../vm/vm_object.h"
27 
28 /*
29  *	Virtual memory objects maintain the actual data
30  *	associated with allocated virtual memory.  A given
31  *	page of memory exists within exactly one object.
32  *
33  *	An object is only deallocated when all "references"
34  *	are given up.  Only one "reference" to a given
35  *	region of an object should be writeable.
36  *
37  *	Associated with each object is a list of all resident
38  *	memory pages belonging to that object; this list is
39  *	maintained by the "vm_page" module, and locked by the object's
40  *	lock.
41  *
42  *	Each object also records a "pager" routine which is
43  *	used to retrieve (and store) pages to the proper backing
44  *	storage.  In addition, objects may be backed by other
45  *	objects from which they were virtual-copied.
46  *
47  *	The only items within the object structure which are
48  *	modified after time of creation are:
49  *		reference count		locked by object's lock
50  *		pager routine		locked by object's lock
51  *
52  */
53 
54 struct vm_object	kernel_object_store;
55 struct vm_object	kmem_object_store;
56 
57 #define	VM_OBJECT_HASH_COUNT	157
58 
59 int		vm_cache_max = 100;	/* can patch if necessary */
60 queue_head_t	vm_object_hashtable[VM_OBJECT_HASH_COUNT];
61 
62 long	object_collapses = 0;
63 long	object_bypasses  = 0;
64 
65 /*
66  *	vm_object_init:
67  *
68  *	Initialize the VM objects module.
69  */
70 void vm_object_init()
71 {
72 	register int	i;
73 
74 	queue_init(&vm_object_cached_list);
75 	queue_init(&vm_object_list);
76 	vm_object_count = 0;
77 	simple_lock_init(&vm_cache_lock);
78 	simple_lock_init(&vm_object_list_lock);
79 
80 	for (i = 0; i < VM_OBJECT_HASH_COUNT; i++)
81 		queue_init(&vm_object_hashtable[i]);
82 
83 	kernel_object = &kernel_object_store;
84 	_vm_object_allocate(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS,
85 			kernel_object);
86 
87 	kmem_object = &kmem_object_store;
88 	_vm_object_allocate(VM_KMEM_SIZE + VM_MBUF_SIZE, kmem_object);
89 }
90 
91 /*
92  *	vm_object_allocate:
93  *
94  *	Returns a new object with the given size.
95  */
96 
97 vm_object_t vm_object_allocate(size)
98 	vm_size_t	size;
99 {
100 	register vm_object_t	result;
101 
102 	result = (vm_object_t)
103 		malloc((u_long)sizeof *result, M_VMOBJ, M_WAITOK);
104 
105 	_vm_object_allocate(size, result);
106 
107 	return(result);
108 }
109 
110 _vm_object_allocate(size, object)
111 	vm_size_t		size;
112 	register vm_object_t	object;
113 {
114 	queue_init(&object->memq);
115 	vm_object_lock_init(object);
116 	object->ref_count = 1;
117 	object->resident_page_count = 0;
118 	object->size = size;
119 	object->can_persist = FALSE;
120 	object->paging_in_progress = 0;
121 	object->copy = VM_OBJECT_NULL;
122 
123 	/*
124 	 *	Object starts out read-write, with no pager.
125 	 */
126 
127 	object->pager = vm_pager_null;
128 	object->pager_ready = FALSE;
129 	object->internal = TRUE;	/* vm_allocate_with_pager will reset */
130 	object->paging_offset = 0;
131 	object->shadow = VM_OBJECT_NULL;
132 	object->shadow_offset = (vm_offset_t) 0;
133 
134 	simple_lock(&vm_object_list_lock);
135 	queue_enter(&vm_object_list, object, vm_object_t, object_list);
136 	vm_object_count++;
137 	simple_unlock(&vm_object_list_lock);
138 }
139 
140 /*
141  *	vm_object_reference:
142  *
143  *	Gets another reference to the given object.
144  */
145 void vm_object_reference(object)
146 	register vm_object_t	object;
147 {
148 	if (object == VM_OBJECT_NULL)
149 		return;
150 
151 	vm_object_lock(object);
152 	object->ref_count++;
153 	vm_object_unlock(object);
154 }
155 
156 /*
157  *	vm_object_deallocate:
158  *
159  *	Release a reference to the specified object,
160  *	gained either through a vm_object_allocate
161  *	or a vm_object_reference call.  When all references
162  *	are gone, storage associated with this object
163  *	may be relinquished.
164  *
165  *	No object may be locked.
166  */
167 void vm_object_deallocate(object)
168 	register vm_object_t	object;
169 {
170 	vm_object_t	temp;
171 
172 	while (object != VM_OBJECT_NULL) {
173 
174 		/*
175 		 *	The cache holds a reference (uncounted) to
176 		 *	the object; we must lock it before removing
177 		 *	the object.
178 		 */
179 
180 		vm_object_cache_lock();
181 
182 		/*
183 		 *	Lose the reference
184 		 */
185 		vm_object_lock(object);
186 		if (--(object->ref_count) != 0) {
187 
188 			/*
189 			 *	If there are still references, then
190 			 *	we are done.
191 			 */
192 			vm_object_unlock(object);
193 			vm_object_cache_unlock();
194 			return;
195 		}
196 
197 		/*
198 		 *	See if this object can persist.  If so, enter
199 		 *	it in the cache, then deactivate all of its
200 		 *	pages.
201 		 */
202 
203 		if (object->can_persist) {
204 
205 			queue_enter(&vm_object_cached_list, object,
206 				vm_object_t, cached_list);
207 			vm_object_cached++;
208 			vm_object_cache_unlock();
209 
210 			vm_object_deactivate_pages(object);
211 			vm_object_unlock(object);
212 
213 			vm_object_cache_trim();
214 			return;
215 		}
216 
217 		/*
218 		 *	Make sure no one can look us up now.
219 		 */
220 		vm_object_remove(object->pager);
221 		vm_object_cache_unlock();
222 
223 		temp = object->shadow;
224 		vm_object_terminate(object);
225 			/* unlocks and deallocates object */
226 		object = temp;
227 	}
228 }
229 
230 
231 /*
232  *	vm_object_terminate actually destroys the specified object, freeing
233  *	up all previously used resources.
234  *
235  *	The object must be locked.
236  */
237 void vm_object_terminate(object)
238 	register vm_object_t	object;
239 {
240 	register vm_page_t	p;
241 	vm_object_t		shadow_object;
242 
243 	/*
244 	 *	Detach the object from its shadow if we are the shadow's
245 	 *	copy.
246 	 */
247 	if ((shadow_object = object->shadow) != VM_OBJECT_NULL) {
248 		vm_object_lock(shadow_object);
249 		if (shadow_object->copy == object)
250 			shadow_object->copy = VM_OBJECT_NULL;
251 #if 0
252 		else if (shadow_object->copy != VM_OBJECT_NULL)
253 			panic("vm_object_terminate: copy/shadow inconsistency");
254 #endif
255 		vm_object_unlock(shadow_object);
256 	}
257 
258 	/*
259 	 *	Wait until the pageout daemon is through
260 	 *	with the object.
261 	 */
262 
263 	while (object->paging_in_progress != 0) {
264 		vm_object_sleep(object, object, FALSE);
265 		vm_object_lock(object);
266 	}
267 
268 
269 	/*
270 	 *	While the paging system is locked,
271 	 *	pull the object's pages off the active
272 	 *	and inactive queues.  This keeps the
273 	 *	pageout daemon from playing with them
274 	 *	during vm_pager_deallocate.
275 	 *
276 	 *	We can't free the pages yet, because the
277 	 *	object's pager may have to write them out
278 	 *	before deallocating the paging space.
279 	 */
280 
281 	p = (vm_page_t) queue_first(&object->memq);
282 	while (!queue_end(&object->memq, (queue_entry_t) p)) {
283 		VM_PAGE_CHECK(p);
284 
285 		vm_page_lock_queues();
286 		if (p->active) {
287 			queue_remove(&vm_page_queue_active, p, vm_page_t,
288 						pageq);
289 			p->active = FALSE;
290 			vm_page_active_count--;
291 		}
292 
293 		if (p->inactive) {
294 			queue_remove(&vm_page_queue_inactive, p, vm_page_t,
295 						pageq);
296 			p->inactive = FALSE;
297 			vm_page_inactive_count--;
298 		}
299 		vm_page_unlock_queues();
300 		p = (vm_page_t) queue_next(&p->listq);
301 	}
302 
303 	vm_object_unlock(object);
304 
305 	if (object->paging_in_progress != 0)
306 		panic("vm_object_deallocate: pageout in progress");
307 
308 	/*
309 	 *	Clean and free the pages, as appropriate.
310 	 *	All references to the object are gone,
311 	 *	so we don't need to lock it.
312 	 */
313 
314 	if (!object->internal) {
315 		vm_object_lock(object);
316 		vm_object_page_clean(object, 0, 0);
317 		vm_object_unlock(object);
318 	}
319 	while (!queue_empty(&object->memq)) {
320 		p = (vm_page_t) queue_first(&object->memq);
321 
322 		VM_PAGE_CHECK(p);
323 
324 		vm_page_lock_queues();
325 		vm_page_free(p);
326 		vm_page_unlock_queues();
327 	}
328 
329 	/*
330 	 *	Let the pager know object is dead.
331 	 */
332 
333 	if (object->pager != vm_pager_null)
334 		vm_pager_deallocate(object->pager);
335 
336 
337 	simple_lock(&vm_object_list_lock);
338 	queue_remove(&vm_object_list, object, vm_object_t, object_list);
339 	vm_object_count--;
340 	simple_unlock(&vm_object_list_lock);
341 
342 	/*
343 	 *	Free the space for the object.
344 	 */
345 
346 	free((caddr_t)object, M_VMOBJ);
347 }
348 
349 /*
350  *	vm_object_page_clean
351  *
352  *	Clean all dirty pages in the specified range of object.
353  *	Leaves page on whatever queue it is currently on.
354  *
355  *	Odd semantics: if start == end, we clean everything.
356  *
357  *	The object must be locked.
358  */
359 vm_object_page_clean(object, start, end)
360 	register vm_object_t	object;
361 	register vm_offset_t	start;
362 	register vm_offset_t	end;
363 {
364 	register vm_page_t	p;
365 
366 	if (object->pager == vm_pager_null)
367 		return;
368 
369 again:
370 	p = (vm_page_t) queue_first(&object->memq);
371 	while (!queue_end(&object->memq, (queue_entry_t) p)) {
372 		if (start == end ||
373 		    p->offset >= start && p->offset < end) {
374 			if (p->clean && pmap_is_modified(VM_PAGE_TO_PHYS(p)))
375 				p->clean = FALSE;
376 			pmap_remove_all(VM_PAGE_TO_PHYS(p));
377 			if (!p->clean) {
378 				p->busy = TRUE;
379 				object->paging_in_progress++;
380 				vm_object_unlock(object);
381 				(void) vm_pager_put(object->pager, p, TRUE);
382 				vm_object_lock(object);
383 				object->paging_in_progress--;
384 				p->busy = FALSE;
385 				PAGE_WAKEUP(p);
386 				goto again;
387 			}
388 		}
389 		p = (vm_page_t) queue_next(&p->listq);
390 	}
391 }
392 
393 /*
394  *	vm_object_deactivate_pages
395  *
396  *	Deactivate all pages in the specified object.  (Keep its pages
397  *	in memory even though it is no longer referenced.)
398  *
399  *	The object must be locked.
400  */
401 vm_object_deactivate_pages(object)
402 	register vm_object_t	object;
403 {
404 	register vm_page_t	p, next;
405 
406 	p = (vm_page_t) queue_first(&object->memq);
407 	while (!queue_end(&object->memq, (queue_entry_t) p)) {
408 		next = (vm_page_t) queue_next(&p->listq);
409 		vm_page_lock_queues();
410 		vm_page_deactivate(p);
411 		vm_page_unlock_queues();
412 		p = next;
413 	}
414 }
415 
416 /*
417  *	Trim the object cache to size.
418  */
419 vm_object_cache_trim()
420 {
421 	register vm_object_t	object;
422 
423 	vm_object_cache_lock();
424 	while (vm_object_cached > vm_cache_max) {
425 		object = (vm_object_t) queue_first(&vm_object_cached_list);
426 		vm_object_cache_unlock();
427 
428 		if (object != vm_object_lookup(object->pager))
429 			panic("vm_object_deactivate: I'm sooo confused.");
430 
431 		pager_cache(object, FALSE);
432 
433 		vm_object_cache_lock();
434 	}
435 	vm_object_cache_unlock();
436 }
437 
438 
439 /*
440  *	vm_object_shutdown()
441  *
442  *	Shut down the object system.  Unfortunately, while we
443  *	may be trying to do this, init is happily waiting for
444  *	processes to exit, and therefore will be causing some objects
445  *	to be deallocated.  To handle this, we gain a fake reference
446  *	to all objects we release paging areas for.  This will prevent
447  *	a duplicate deallocation.  This routine is probably full of
448  *	race conditions!
449  */
450 
451 void vm_object_shutdown()
452 {
453 	register vm_object_t	object;
454 
455 	/*
456 	 *	Clean up the object cache *before* we screw up the reference
457 	 *	counts on all of the objects.
458 	 */
459 
460 	vm_object_cache_clear();
461 
462 	printf("free paging spaces: ");
463 
464 	/*
465 	 *	First we gain a reference to each object so that
466 	 *	no one else will deallocate them.
467 	 */
468 
469 	simple_lock(&vm_object_list_lock);
470 	object = (vm_object_t) queue_first(&vm_object_list);
471 	while (!queue_end(&vm_object_list, (queue_entry_t) object)) {
472 		vm_object_reference(object);
473 		object = (vm_object_t) queue_next(&object->object_list);
474 	}
475 	simple_unlock(&vm_object_list_lock);
476 
477 	/*
478 	 *	Now we deallocate all the paging areas.  We don't need
479 	 *	to lock anything because we've reduced to a single
480 	 *	processor while shutting down.	This also assumes that
481 	 *	no new objects are being created.
482 	 */
483 
484 	object = (vm_object_t) queue_first(&vm_object_list);
485 	while (!queue_end(&vm_object_list, (queue_entry_t) object)) {
486 		if (object->pager != vm_pager_null)
487 			vm_pager_deallocate(object->pager);
488 		object = (vm_object_t) queue_next(&object->object_list);
489 		printf(".");
490 	}
491 	printf("done.\n");
492 }
493 
494 /*
495  *	vm_object_pmap_copy:
496  *
497  *	Makes all physical pages in the specified
498  *	object range copy-on-write.  No writeable
499  *	references to these pages should remain.
500  *
501  *	The object must *not* be locked.
502  */
503 void vm_object_pmap_copy(object, start, end)
504 	register vm_object_t	object;
505 	register vm_offset_t	start;
506 	register vm_offset_t	end;
507 {
508 	register vm_page_t	p;
509 
510 	if (object == VM_OBJECT_NULL)
511 		return;
512 
513 	vm_object_lock(object);
514 	p = (vm_page_t) queue_first(&object->memq);
515 	while (!queue_end(&object->memq, (queue_entry_t) p)) {
516 		if ((start <= p->offset) && (p->offset < end)) {
517 			if (!p->copy_on_write) {
518 				pmap_copy_on_write(VM_PAGE_TO_PHYS(p));
519 				p->copy_on_write = TRUE;
520 			}
521 		}
522 		p = (vm_page_t) queue_next(&p->listq);
523 	}
524 	vm_object_unlock(object);
525 }
526 
527 /*
528  *	vm_object_pmap_remove:
529  *
530  *	Removes all physical pages in the specified
531  *	object range from all physical maps.
532  *
533  *	The object must *not* be locked.
534  */
535 void vm_object_pmap_remove(object, start, end)
536 	register vm_object_t	object;
537 	register vm_offset_t	start;
538 	register vm_offset_t	end;
539 {
540 	register vm_page_t	p;
541 
542 	if (object == VM_OBJECT_NULL)
543 		return;
544 
545 	vm_object_lock(object);
546 	p = (vm_page_t) queue_first(&object->memq);
547 	while (!queue_end(&object->memq, (queue_entry_t) p)) {
548 		if ((start <= p->offset) && (p->offset < end)) {
549 			pmap_remove_all(VM_PAGE_TO_PHYS(p));
550 		}
551 		p = (vm_page_t) queue_next(&p->listq);
552 	}
553 	vm_object_unlock(object);
554 }
555 
556 /*
557  *	vm_object_copy:
558  *
559  *	Create a new object which is a copy of an existing
560  *	object, and mark all of the pages in the existing
561  *	object 'copy-on-write'.  The new object has one reference.
562  *	Returns the new object.
563  *
564  *	May defer the copy until later if the object is not backed
565  *	up by a non-default pager.
566  */
567 void vm_object_copy(src_object, src_offset, size,
568 		    dst_object, dst_offset, src_needs_copy)
569 	register vm_object_t	src_object;
570 	vm_offset_t		src_offset;
571 	vm_size_t		size;
572 	vm_object_t		*dst_object;	/* OUT */
573 	vm_offset_t		*dst_offset;	/* OUT */
574 	boolean_t		*src_needs_copy;	/* OUT */
575 {
576 	register vm_object_t	new_copy;
577 	register vm_object_t	old_copy;
578 	vm_offset_t		new_start, new_end;
579 
580 	register vm_page_t	p;
581 
582 	if (src_object == VM_OBJECT_NULL) {
583 		/*
584 		 *	Nothing to copy
585 		 */
586 		*dst_object = VM_OBJECT_NULL;
587 		*dst_offset = 0;
588 		*src_needs_copy = FALSE;
589 		return;
590 	}
591 
592 	/*
593 	 *	If the object's pager is null_pager or the
594 	 *	default pager, we don't have to make a copy
595 	 *	of it.  Instead, we set the needs copy flag and
596 	 *	make a shadow later.
597 	 */
598 
599 	vm_object_lock(src_object);
600 	if (src_object->pager == vm_pager_null ||
601 	    src_object->internal) {
602 
603 		/*
604 		 *	Make another reference to the object
605 		 */
606 		src_object->ref_count++;
607 
608 		/*
609 		 *	Mark all of the pages copy-on-write.
610 		 */
611 		for (p = (vm_page_t) queue_first(&src_object->memq);
612 		     !queue_end(&src_object->memq, (queue_entry_t)p);
613 		     p = (vm_page_t) queue_next(&p->listq)) {
614 			if (src_offset <= p->offset &&
615 			    p->offset < src_offset + size)
616 				p->copy_on_write = TRUE;
617 		}
618 		vm_object_unlock(src_object);
619 
620 		*dst_object = src_object;
621 		*dst_offset = src_offset;
622 
623 		/*
624 		 *	Must make a shadow when write is desired
625 		 */
626 		*src_needs_copy = TRUE;
627 		return;
628 	}
629 
630 	/*
631 	 *	Try to collapse the object before copying it.
632 	 */
633 	vm_object_collapse(src_object);
634 
635 	/*
636 	 *	If the object has a pager, the pager wants to
637 	 *	see all of the changes.  We need a copy-object
638 	 *	for the changed pages.
639 	 *
640 	 *	If there is a copy-object, and it is empty,
641 	 *	no changes have been made to the object since the
642 	 *	copy-object was made.  We can use the same copy-
643 	 *	object.
644 	 */
645 
646     Retry1:
647 	old_copy = src_object->copy;
648 	if (old_copy != VM_OBJECT_NULL) {
649 		/*
650 		 *	Try to get the locks (out of order)
651 		 */
652 		if (!vm_object_lock_try(old_copy)) {
653 			vm_object_unlock(src_object);
654 
655 			/* should spin a bit here... */
656 			vm_object_lock(src_object);
657 			goto Retry1;
658 		}
659 
660 		if (old_copy->resident_page_count == 0 &&
661 		    old_copy->pager == vm_pager_null) {
662 			/*
663 			 *	Return another reference to
664 			 *	the existing copy-object.
665 			 */
666 			old_copy->ref_count++;
667 			vm_object_unlock(old_copy);
668 			vm_object_unlock(src_object);
669 			*dst_object = old_copy;
670 			*dst_offset = src_offset;
671 			*src_needs_copy = FALSE;
672 			return;
673 		}
674 		vm_object_unlock(old_copy);
675 	}
676 	vm_object_unlock(src_object);
677 
678 	/*
679 	 *	If the object has a pager, the pager wants
680 	 *	to see all of the changes.  We must make
681 	 *	a copy-object and put the changed pages there.
682 	 *
683 	 *	The copy-object is always made large enough to
684 	 *	completely shadow the original object, since
685 	 *	it may have several users who want to shadow
686 	 *	the original object at different points.
687 	 */
688 
689 	new_copy = vm_object_allocate(src_object->size);
690 
691     Retry2:
692 	vm_object_lock(src_object);
693 	/*
694 	 *	Copy object may have changed while we were unlocked
695 	 */
696 	old_copy = src_object->copy;
697 	if (old_copy != VM_OBJECT_NULL) {
698 		/*
699 		 *	Try to get the locks (out of order)
700 		 */
701 		if (!vm_object_lock_try(old_copy)) {
702 			vm_object_unlock(src_object);
703 			goto Retry2;
704 		}
705 
706 		/*
707 		 *	Consistency check
708 		 */
709 		if (old_copy->shadow != src_object ||
710 		    old_copy->shadow_offset != (vm_offset_t) 0)
711 			panic("vm_object_copy: copy/shadow inconsistency");
712 
713 		/*
714 		 *	Make the old copy-object shadow the new one.
715 		 *	It will receive no more pages from the original
716 		 *	object.
717 		 */
718 
719 		src_object->ref_count--;	/* remove ref. from old_copy */
720 		old_copy->shadow = new_copy;
721 		new_copy->ref_count++;		/* locking not needed - we
722 						   have the only pointer */
723 		vm_object_unlock(old_copy);	/* done with old_copy */
724 	}
725 
726 	new_start = (vm_offset_t) 0;	/* always shadow original at 0 */
727 	new_end   = (vm_offset_t) new_copy->size; /* for the whole object */
728 
729 	/*
730 	 *	Point the new copy at the existing object.
731 	 */
732 
733 	new_copy->shadow = src_object;
734 	new_copy->shadow_offset = new_start;
735 	src_object->ref_count++;
736 	src_object->copy = new_copy;
737 
738 	/*
739 	 *	Mark all the affected pages of the existing object
740 	 *	copy-on-write.
741 	 */
742 	p = (vm_page_t) queue_first(&src_object->memq);
743 	while (!queue_end(&src_object->memq, (queue_entry_t) p)) {
744 		if ((new_start <= p->offset) && (p->offset < new_end)) {
745 			p->copy_on_write = TRUE;
746 		}
747 		p = (vm_page_t) queue_next(&p->listq);
748 	}
749 
750 	vm_object_unlock(src_object);
751 
752 	*dst_object = new_copy;
753 	*dst_offset = src_offset - new_start;
754 	*src_needs_copy = FALSE;
755 }
756 
757 /*
758  *	vm_object_shadow:
759  *
760  *	Create a new object which is backed by the
761  *	specified existing object range.  The source
762  *	object reference is deallocated.
763  *
764  *	The new object and offset into that object
765  *	are returned in the source parameters.
766  */
767 
768 void vm_object_shadow(object, offset, length)
769 	vm_object_t	*object;	/* IN/OUT */
770 	vm_offset_t	*offset;	/* IN/OUT */
771 	vm_size_t	length;
772 {
773 	register vm_object_t	source;
774 	register vm_object_t	result;
775 
776 	source = *object;
777 
778 	/*
779 	 *	Allocate a new object with the given length
780 	 */
781 
782 	if ((result = vm_object_allocate(length)) == VM_OBJECT_NULL)
783 		panic("vm_object_shadow: no object for shadowing");
784 
785 	/*
786 	 *	The new object shadows the source object, adding
787 	 *	a reference to it.  Our caller changes his reference
788 	 *	to point to the new object, removing a reference to
789 	 *	the source object.  Net result: no change of reference
790 	 *	count.
791 	 */
792 	result->shadow = source;
793 
794 	/*
795 	 *	Store the offset into the source object,
796 	 *	and fix up the offset into the new object.
797 	 */
798 
799 	result->shadow_offset = *offset;
800 
801 	/*
802 	 *	Return the new things
803 	 */
804 
805 	*offset = 0;
806 	*object = result;
807 }
808 
809 /*
810  *	Set the specified object's pager to the specified pager.
811  */
812 
813 void vm_object_setpager(object, pager, paging_offset,
814 			read_only)
815 	vm_object_t	object;
816 	vm_pager_t	pager;
817 	vm_offset_t	paging_offset;
818 	boolean_t	read_only;
819 {
820 #ifdef	lint
821 	read_only++;	/* No longer used */
822 #endif	lint
823 
824 	vm_object_lock(object);			/* XXX ? */
825 	object->pager = pager;
826 	object->paging_offset = paging_offset;
827 	vm_object_unlock(object);			/* XXX ? */
828 }
829 
830 /*
831  *	vm_object_hash hashes the pager/id pair.
832  */
833 
834 #define vm_object_hash(pager) \
835 	(((unsigned)pager)%VM_OBJECT_HASH_COUNT)
836 
837 /*
838  *	vm_object_lookup looks in the object cache for an object with the
839  *	specified pager and paging id.
840  */
841 
842 vm_object_t vm_object_lookup(pager)
843 	vm_pager_t	pager;
844 {
845 	register queue_t		bucket;
846 	register vm_object_hash_entry_t	entry;
847 	vm_object_t			object;
848 
849 	bucket = &vm_object_hashtable[vm_object_hash(pager)];
850 
851 	vm_object_cache_lock();
852 
853 	entry = (vm_object_hash_entry_t) queue_first(bucket);
854 	while (!queue_end(bucket, (queue_entry_t) entry)) {
855 		object = entry->object;
856 		if (object->pager == pager) {
857 			vm_object_lock(object);
858 			if (object->ref_count == 0) {
859 				queue_remove(&vm_object_cached_list, object,
860 						vm_object_t, cached_list);
861 				vm_object_cached--;
862 			}
863 			object->ref_count++;
864 			vm_object_unlock(object);
865 			vm_object_cache_unlock();
866 			return(object);
867 		}
868 		entry = (vm_object_hash_entry_t) queue_next(&entry->hash_links);
869 	}
870 
871 	vm_object_cache_unlock();
872 	return(VM_OBJECT_NULL);
873 }
874 
875 /*
876  *	vm_object_enter enters the specified object/pager/id into
877  *	the hash table.
878  */
879 
880 void vm_object_enter(object, pager)
881 	vm_object_t	object;
882 	vm_pager_t	pager;
883 {
884 	register queue_t		bucket;
885 	register vm_object_hash_entry_t	entry;
886 
887 	/*
888 	 *	We don't cache null objects, and we can't cache
889 	 *	objects with the null pager.
890 	 */
891 
892 	if (object == VM_OBJECT_NULL)
893 		return;
894 	if (pager == vm_pager_null)
895 		return;
896 
897 	bucket = &vm_object_hashtable[vm_object_hash(pager)];
898 	entry = (vm_object_hash_entry_t)
899 		malloc((u_long)sizeof *entry, M_VMOBJHASH, M_WAITOK);
900 	entry->object = object;
901 	object->can_persist = TRUE;
902 
903 	vm_object_cache_lock();
904 	queue_enter(bucket, entry, vm_object_hash_entry_t, hash_links);
905 	vm_object_cache_unlock();
906 }
907 
908 /*
909  *	vm_object_remove:
910  *
911  *	Remove the pager from the hash table.
912  *	Note:  This assumes that the object cache
913  *	is locked.  XXX this should be fixed
914  *	by reorganizing vm_object_deallocate.
915  */
916 vm_object_remove(pager)
917 	register vm_pager_t	pager;
918 {
919 	register queue_t		bucket;
920 	register vm_object_hash_entry_t	entry;
921 	register vm_object_t		object;
922 
923 	bucket = &vm_object_hashtable[vm_object_hash(pager)];
924 
925 	entry = (vm_object_hash_entry_t) queue_first(bucket);
926 	while (!queue_end(bucket, (queue_entry_t) entry)) {
927 		object = entry->object;
928 		if (object->pager == pager) {
929 			queue_remove(bucket, entry, vm_object_hash_entry_t,
930 					hash_links);
931 			free((caddr_t)entry, M_VMOBJHASH);
932 			break;
933 		}
934 		entry = (vm_object_hash_entry_t) queue_next(&entry->hash_links);
935 	}
936 }
937 
938 /*
939  *	vm_object_cache_clear removes all objects from the cache.
940  *
941  */
942 
943 void vm_object_cache_clear()
944 {
945 	register vm_object_t	object;
946 
947 	/*
948 	 *	Remove each object in the cache by scanning down the
949 	 *	list of cached objects.
950 	 */
951 	vm_object_cache_lock();
952 	while (!queue_empty(&vm_object_cached_list)) {
953 		object = (vm_object_t) queue_first(&vm_object_cached_list);
954 		vm_object_cache_unlock();
955 
956 		/*
957 		 * Note: it is important that we use vm_object_lookup
958 		 * to gain a reference, and not vm_object_reference, because
959 		 * the logic for removing an object from the cache lies in
960 		 * lookup.
961 		 */
962 		if (object != vm_object_lookup(object->pager))
963 			panic("vm_object_cache_clear: I'm sooo confused.");
964 		pager_cache(object, FALSE);
965 
966 		vm_object_cache_lock();
967 	}
968 	vm_object_cache_unlock();
969 }
970 
971 boolean_t	vm_object_collapse_allowed = TRUE;
972 /*
973  *	vm_object_collapse:
974  *
975  *	Collapse an object with the object backing it.
976  *	Pages in the backing object are moved into the
977  *	parent, and the backing object is deallocated.
978  *
979  *	Requires that the object be locked and the page
980  *	queues be unlocked.
981  *
982  */
983 void vm_object_collapse(object)
984 	register vm_object_t	object;
985 
986 {
987 	register vm_object_t	backing_object;
988 	register vm_offset_t	backing_offset;
989 	register vm_size_t	size;
990 	register vm_offset_t	new_offset;
991 	register vm_page_t	p, pp;
992 
993 	if (!vm_object_collapse_allowed)
994 		return;
995 
996 	while (TRUE) {
997 		/*
998 		 *	Verify that the conditions are right for collapse:
999 		 *
1000 		 *	The object exists and no pages in it are currently
1001 		 *	being paged out (or have ever been paged out).
1002 		 */
1003 		if (object == VM_OBJECT_NULL ||
1004 		    object->paging_in_progress != 0 ||
1005 		    object->pager != vm_pager_null)
1006 			return;
1007 
1008 		/*
1009 		 *		There is a backing object, and
1010 		 */
1011 
1012 		if ((backing_object = object->shadow) == VM_OBJECT_NULL)
1013 			return;
1014 
1015 		vm_object_lock(backing_object);
1016 		/*
1017 		 *	...
1018 		 *		The backing object is not read_only,
1019 		 *		and no pages in the backing object are
1020 		 *		currently being paged out.
1021 		 *		The backing object is internal.
1022 		 */
1023 
1024 		if (!backing_object->internal ||
1025 		    backing_object->paging_in_progress != 0) {
1026 			vm_object_unlock(backing_object);
1027 			return;
1028 		}
1029 
1030 		/*
1031 		 *	The backing object can't be a copy-object:
1032 		 *	the shadow_offset for the copy-object must stay
1033 		 *	as 0.  Furthermore (for the 'we have all the
1034 		 *	pages' case), if we bypass backing_object and
1035 		 *	just shadow the next object in the chain, old
1036 		 *	pages from that object would then have to be copied
1037 		 *	BOTH into the (former) backing_object and into the
1038 		 *	parent object.
1039 		 */
1040 		if (backing_object->shadow != VM_OBJECT_NULL &&
1041 		    backing_object->shadow->copy != VM_OBJECT_NULL) {
1042 			vm_object_unlock(backing_object);
1043 			return;
1044 		}
1045 
1046 		/*
1047 		 *	We know that we can either collapse the backing
1048 		 *	object (if the parent is the only reference to
1049 		 *	it) or (perhaps) remove the parent's reference
1050 		 *	to it.
1051 		 */
1052 
1053 		backing_offset = object->shadow_offset;
1054 		size = object->size;
1055 
1056 		/*
1057 		 *	If there is exactly one reference to the backing
1058 		 *	object, we can collapse it into the parent.
1059 		 */
1060 
1061 		if (backing_object->ref_count == 1) {
1062 
1063 			/*
1064 			 *	We can collapse the backing object.
1065 			 *
1066 			 *	Move all in-memory pages from backing_object
1067 			 *	to the parent.  Pages that have been paged out
1068 			 *	will be overwritten by any of the parent's
1069 			 *	pages that shadow them.
1070 			 */
1071 
1072 			while (!queue_empty(&backing_object->memq)) {
1073 
1074 				p = (vm_page_t)
1075 					queue_first(&backing_object->memq);
1076 
1077 				new_offset = (p->offset - backing_offset);
1078 
1079 				/*
1080 				 *	If the parent has a page here, or if
1081 				 *	this page falls outside the parent,
1082 				 *	dispose of it.
1083 				 *
1084 				 *	Otherwise, move it as planned.
1085 				 */
1086 
1087 				if (p->offset < backing_offset ||
1088 				    new_offset >= size) {
1089 					vm_page_lock_queues();
1090 					vm_page_free(p);
1091 					vm_page_unlock_queues();
1092 				} else {
1093 				    pp = vm_page_lookup(object, new_offset);
1094 				    if (pp != VM_PAGE_NULL && !pp->fake) {
1095 					vm_page_lock_queues();
1096 					vm_page_free(p);
1097 					vm_page_unlock_queues();
1098 				    }
1099 				    else {
1100 					if (pp) {
1101 					    /* may be someone waiting for it */
1102 					    PAGE_WAKEUP(pp);
1103 					    vm_page_lock_queues();
1104 					    vm_page_free(pp);
1105 					    vm_page_unlock_queues();
1106 					}
1107 					vm_page_rename(p, object, new_offset);
1108 				    }
1109 				}
1110 			}
1111 
1112 			/*
1113 			 *	Move the pager from backing_object to object.
1114 			 *
1115 			 *	XXX We're only using part of the paging space
1116 			 *	for keeps now... we ought to discard the
1117 			 *	unused portion.
1118 			 */
1119 
1120 			object->pager = backing_object->pager;
1121 			object->paging_offset += backing_offset;
1122 
1123 			backing_object->pager = vm_pager_null;
1124 
1125 			/*
1126 			 *	Object now shadows whatever backing_object did.
1127 			 *	Note that the reference to backing_object->shadow
1128 			 *	moves from within backing_object to within object.
1129 			 */
1130 
1131 			object->shadow = backing_object->shadow;
1132 			object->shadow_offset += backing_object->shadow_offset;
1133 			if (object->shadow != VM_OBJECT_NULL &&
1134 			    object->shadow->copy != VM_OBJECT_NULL) {
1135 				panic("vm_object_collapse: we collapsed a copy-object!");
1136 			}
1137 			/*
1138 			 *	Discard backing_object.
1139 			 *
1140 			 *	Since the backing object has no pages, no
1141 			 *	pager left, and no object references within it,
1142 			 *	all that is necessary is to dispose of it.
1143 			 */
1144 
1145 			vm_object_unlock(backing_object);
1146 
1147 			simple_lock(&vm_object_list_lock);
1148 			queue_remove(&vm_object_list, backing_object,
1149 						vm_object_t, object_list);
1150 			vm_object_count--;
1151 			simple_unlock(&vm_object_list_lock);
1152 
1153 			free((caddr_t)backing_object, M_VMOBJ);
1154 
1155 			object_collapses++;
1156 		}
1157 		else {
1158 			/*
1159 			 *	If all of the pages in the backing object are
1160 			 *	shadowed by the parent object, the parent
1161 			 *	object no longer has to shadow the backing
1162 			 *	object; it can shadow the next one in the
1163 			 *	chain.
1164 			 *
1165 			 *	The backing object must not be paged out - we'd
1166 			 *	have to check all of the paged-out pages, as
1167 			 *	well.
1168 			 */
1169 
1170 			if (backing_object->pager != vm_pager_null) {
1171 				vm_object_unlock(backing_object);
1172 				return;
1173 			}
1174 
1175 			/*
1176 			 *	Should have a check for a 'small' number
1177 			 *	of pages here.
1178 			 */
1179 
1180 			p = (vm_page_t) queue_first(&backing_object->memq);
1181 			while (!queue_end(&backing_object->memq,
1182 					  (queue_entry_t) p)) {
1183 
1184 				new_offset = (p->offset - backing_offset);
1185 
1186 				/*
1187 				 *	If the parent has a page here, or if
1188 				 *	this page falls outside the parent,
1189 				 *	keep going.
1190 				 *
1191 				 *	Otherwise, the backing_object must be
1192 				 *	left in the chain.
1193 				 */
1194 
1195 				if (p->offset >= backing_offset &&
1196 				    new_offset <= size &&
1197 				    ((pp = vm_page_lookup(object, new_offset))
1198 				      == VM_PAGE_NULL ||
1199 				     pp->fake)) {
1200 					/*
1201 					 *	Page still needed.
1202 					 *	Can't go any further.
1203 					 */
1204 					vm_object_unlock(backing_object);
1205 					return;
1206 				}
1207 				p = (vm_page_t) queue_next(&p->listq);
1208 			}
1209 
1210 			/*
1211 			 *	Make the parent shadow the next object
1212 			 *	in the chain.  Deallocating backing_object
1213 			 *	will not remove it, since its reference
1214 			 *	count is at least 2.
1215 			 */
1216 
1217 			vm_object_reference(object->shadow = backing_object->shadow);
1218 			object->shadow_offset += backing_object->shadow_offset;
1219 
1220 			/*	Drop the reference count on backing_object.
1221 			 *	Since its ref_count was at least 2, it
1222 			 *	will not vanish; so we don't need to call
1223 			 *	vm_object_deallocate.
1224 			 */
1225 			backing_object->ref_count--;
1226 			vm_object_unlock(backing_object);
1227 
1228 			object_bypasses ++;
1229 
1230 		}
1231 
1232 		/*
1233 		 *	Try again with this object's new backing object.
1234 		 */
1235 	}
1236 }
1237 
1238 /*
1239  *	vm_object_page_remove: [internal]
1240  *
1241  *	Removes all physical pages in the specified
1242  *	object range from the object's list of pages.
1243  *
1244  *	The object must be locked.
1245  */
1246 void vm_object_page_remove(object, start, end)
1247 	register vm_object_t	object;
1248 	register vm_offset_t	start;
1249 	register vm_offset_t	end;
1250 {
1251 	register vm_page_t	p, next;
1252 
1253 	if (object == VM_OBJECT_NULL)
1254 		return;
1255 
1256 	p = (vm_page_t) queue_first(&object->memq);
1257 	while (!queue_end(&object->memq, (queue_entry_t) p)) {
1258 		next = (vm_page_t) queue_next(&p->listq);
1259 		if ((start <= p->offset) && (p->offset < end)) {
1260 			pmap_remove_all(VM_PAGE_TO_PHYS(p));
1261 			vm_page_lock_queues();
1262 			vm_page_free(p);
1263 			vm_page_unlock_queues();
1264 		}
1265 		p = next;
1266 	}
1267 }
1268 
1269 /*
1270  *	Routine:	vm_object_coalesce
1271  *	Function:	Coalesces two objects backing up adjoining
1272  *			regions of memory into a single object.
1273  *
1274  *	returns TRUE if objects were combined.
1275  *
1276  *	NOTE:	Only works at the moment if the second object is NULL -
1277  *		if it's not, which object do we lock first?
1278  *
1279  *	Parameters:
1280  *		prev_object	First object to coalesce
1281  *		prev_offset	Offset into prev_object
1282  *		next_object	Second object into coalesce
1283  *		next_offset	Offset into next_object
1284  *
1285  *		prev_size	Size of reference to prev_object
1286  *		next_size	Size of reference to next_object
1287  *
1288  *	Conditions:
1289  *	The object must *not* be locked.
1290  */
1291 boolean_t vm_object_coalesce(prev_object, next_object,
1292 			prev_offset, next_offset,
1293 			prev_size, next_size)
1294 
1295 	register vm_object_t	prev_object;
1296 	vm_object_t	next_object;
1297 	vm_offset_t	prev_offset, next_offset;
1298 	vm_size_t	prev_size, next_size;
1299 {
1300 	vm_size_t	newsize;
1301 
1302 #ifdef	lint
1303 	next_offset++;
1304 #endif	lint
1305 
1306 	if (next_object != VM_OBJECT_NULL) {
1307 		return(FALSE);
1308 	}
1309 
1310 	if (prev_object == VM_OBJECT_NULL) {
1311 		return(TRUE);
1312 	}
1313 
1314 	vm_object_lock(prev_object);
1315 
1316 	/*
1317 	 *	Try to collapse the object first
1318 	 */
1319 	vm_object_collapse(prev_object);
1320 
1321 	/*
1322 	 *	Can't coalesce if:
1323 	 *	. more than one reference
1324 	 *	. paged out
1325 	 *	. shadows another object
1326 	 *	. has a copy elsewhere
1327 	 *	(any of which mean that the pages not mapped to
1328 	 *	prev_entry may be in use anyway)
1329 	 */
1330 
1331 	if (prev_object->ref_count > 1 ||
1332 		prev_object->pager != vm_pager_null ||
1333 		prev_object->shadow != VM_OBJECT_NULL ||
1334 		prev_object->copy != VM_OBJECT_NULL) {
1335 		vm_object_unlock(prev_object);
1336 		return(FALSE);
1337 	}
1338 
1339 	/*
1340 	 *	Remove any pages that may still be in the object from
1341 	 *	a previous deallocation.
1342 	 */
1343 
1344 	vm_object_page_remove(prev_object,
1345 			prev_offset + prev_size,
1346 			prev_offset + prev_size + next_size);
1347 
1348 	/*
1349 	 *	Extend the object if necessary.
1350 	 */
1351 	newsize = prev_offset + prev_size + next_size;
1352 	if (newsize > prev_object->size)
1353 		prev_object->size = newsize;
1354 
1355 	vm_object_unlock(prev_object);
1356 	return(TRUE);
1357 }
1358 
1359 /*
1360  *	vm_object_print:	[ debug ]
1361  */
1362 void vm_object_print(object, full)
1363 	vm_object_t	object;
1364 	boolean_t	full;
1365 {
1366 	register vm_page_t	p;
1367 	extern indent;
1368 
1369 	register int count;
1370 
1371 	if (object == VM_OBJECT_NULL)
1372 		return;
1373 
1374 	iprintf("Object 0x%x: size=0x%x, res=%d, ref=%d, ",
1375 		(int) object, (int) object->size,
1376 		object->resident_page_count, object->ref_count);
1377 	printf("pager=0x%x+0x%x, shadow=(0x%x)+0x%x\n",
1378 	       (int) object->pager, (int) object->paging_offset,
1379 	       (int) object->shadow, (int) object->shadow_offset);
1380 	printf("cache: next=0x%x, prev=0x%x\n",
1381 	       object->cached_list.next, object->cached_list.prev);
1382 
1383 	if (!full)
1384 		return;
1385 
1386 	indent += 2;
1387 	count = 0;
1388 	p = (vm_page_t) queue_first(&object->memq);
1389 	while (!queue_end(&object->memq, (queue_entry_t) p)) {
1390 		if (count == 0)
1391 			iprintf("memory:=");
1392 		else if (count == 6) {
1393 			printf("\n");
1394 			iprintf(" ...");
1395 			count = 0;
1396 		} else
1397 			printf(",");
1398 		count++;
1399 
1400 		printf("(off=0x%x,page=0x%x)", p->offset, VM_PAGE_TO_PHYS(p));
1401 		p = (vm_page_t) queue_next(&p->listq);
1402 	}
1403 	if (count != 0)
1404 		printf("\n");
1405 	indent -= 2;
1406 }
1407