xref: /openbsd/sys/uvm/uvm_amap.c (revision fd84ef7e)
1 /*	$OpenBSD: uvm_amap.c,v 1.18 2001/12/19 08:58:07 art Exp $	*/
2 /*	$NetBSD: uvm_amap.c,v 1.30 2001/02/18 21:19:09 chs Exp $	*/
3 
4 /*
5  *
6  * Copyright (c) 1997 Charles D. Cranor and Washington University.
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. All advertising materials mentioning features or use of this software
18  *    must display the following acknowledgement:
19  *      This product includes software developed by Charles D. Cranor and
20  *      Washington University.
21  * 4. The name of the author may not be used to endorse or promote products
22  *    derived from this software without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
25  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
28  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
29  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
30  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
33  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34  */
35 
36 /*
37  * uvm_amap.c: amap operations
38  */
39 
40 /*
41  * this file contains functions that perform operations on amaps.  see
42  * uvm_amap.h for a brief explanation of the role of amaps in uvm.
43  */
44 
45 #undef UVM_AMAP_INLINE		/* enable/disable amap inlines */
46 
47 #include <sys/param.h>
48 #include <sys/systm.h>
49 #include <sys/proc.h>
50 #include <sys/malloc.h>
51 #include <sys/kernel.h>
52 #include <sys/pool.h>
53 
54 #define UVM_AMAP_C		/* ensure disabled inlines are in */
55 #include <uvm/uvm.h>
56 #include <uvm/uvm_swap.h>
57 
58 /*
59  * pool for allocation of vm_map structures.  note that the pool has
60  * its own simplelock for its protection.  also note that in order to
61  * avoid an endless loop, the amap pool's allocator cannot allocate
62  * memory from an amap (it currently goes through the kernel uobj, so
63  * we are ok).
64  */
65 
66 struct pool uvm_amap_pool;
67 
68 /*
69  * local functions
70  */
71 
72 static struct vm_amap *amap_alloc1 __P((int, int, int));
73 
74 #ifdef UVM_AMAP_PPREF
75 /*
76  * what is ppref?   ppref is an _optional_ amap feature which is used
77  * to keep track of reference counts on a per-page basis.  it is enabled
78  * when UVM_AMAP_PPREF is defined.
79  *
80  * when enabled, an array of ints is allocated for the pprefs.  this
81  * array is allocated only when a partial reference is added to the
82  * map (either by unmapping part of the amap, or gaining a reference
83  * to only a part of an amap).  if the malloc of the array fails
84  * (M_NOWAIT), then we set the array pointer to PPREF_NONE to indicate
85  * that we tried to do ppref's but couldn't alloc the array so just
86  * give up (after all, this is an optional feature!).
87  *
88  * the array is divided into page sized "chunks."   for chunks of length 1,
89  * the chunk reference count plus one is stored in that chunk's slot.
90  * for chunks of length > 1 the first slot contains (the reference count
91  * plus one) * -1.    [the negative value indicates that the length is
92  * greater than one.]   the second slot of the chunk contains the length
93  * of the chunk.   here is an example:
94  *
95  * actual REFS:  2  2  2  2  3  1  1  0  0  0  4  4  0  1  1  1
96  *       ppref: -3  4  x  x  4 -2  2 -1  3  x -5  2  1 -2  3  x
97  *              <----------><-><----><-------><----><-><------->
98  * (x = don't care)
99  *
100  * this allows us to allow one int to contain the ref count for the whole
101  * chunk.    note that the "plus one" part is needed because a reference
102  * count of zero is neither positive or negative (need a way to tell
103  * if we've got one zero or a bunch of them).
104  *
105  * here are some in-line functions to help us.
106  */
107 
108 static __inline void pp_getreflen __P((int *, int, int *, int *));
109 static __inline void pp_setreflen __P((int *, int, int, int));
110 
111 /*
112  * pp_getreflen: get the reference and length for a specific offset
113  *
114  * => ppref's amap must be locked
115  */
116 static __inline void
117 pp_getreflen(ppref, offset, refp, lenp)
118 	int *ppref, offset, *refp, *lenp;
119 {
120 
121 	if (ppref[offset] > 0) {		/* chunk size must be 1 */
122 		*refp = ppref[offset] - 1;	/* don't forget to adjust */
123 		*lenp = 1;
124 	} else {
125 		*refp = (ppref[offset] * -1) - 1;
126 		*lenp = ppref[offset+1];
127 	}
128 }
129 
130 /*
131  * pp_setreflen: set the reference and length for a specific offset
132  *
133  * => ppref's amap must be locked
134  */
135 static __inline void
136 pp_setreflen(ppref, offset, ref, len)
137 	int *ppref, offset, ref, len;
138 {
139 	if (len == 1) {
140 		ppref[offset] = ref + 1;
141 	} else {
142 		ppref[offset] = (ref + 1) * -1;
143 		ppref[offset+1] = len;
144 	}
145 }
146 #endif
147 
148 /*
149  * amap_init: called at boot time to init global amap data structures
150  */
151 
152 void
153 amap_init()
154 
155 {
156 	/*
157 	 * Initialize the vm_amap pool.
158 	 */
159 	pool_init(&uvm_amap_pool, sizeof(struct vm_amap), 0, 0, 0,
160 	    "amappl", 0, pool_page_alloc_nointr, pool_page_free_nointr,
161 	    M_UVMAMAP);
162 }
163 
164 /*
165  * amap_alloc1: internal function that allocates an amap, but does not
166  *	init the overlay.
167  *
168  * => lock on returned amap is init'd
169  */
170 static inline struct vm_amap *
171 amap_alloc1(slots, padslots, waitf)
172 	int slots, padslots, waitf;
173 {
174 	struct vm_amap *amap;
175 	int totalslots = slots + padslots;
176 
177 	amap = pool_get(&uvm_amap_pool, (waitf == M_WAITOK) ? PR_WAITOK : 0);
178 	if (amap == NULL)
179 		return(NULL);
180 
181 	simple_lock_init(&amap->am_l);
182 	amap->am_ref = 1;
183 	amap->am_flags = 0;
184 #ifdef UVM_AMAP_PPREF
185 	amap->am_ppref = NULL;
186 #endif
187 	amap->am_maxslot = totalslots;
188 	amap->am_nslot = slots;
189 	amap->am_nused = 0;
190 
191 	amap->am_slots = malloc(totalslots * sizeof(int), M_UVMAMAP,
192 	    waitf);
193 	if (amap->am_slots == NULL)
194 		goto fail1;
195 
196 	amap->am_bckptr = malloc(totalslots * sizeof(int), M_UVMAMAP, waitf);
197 	if (amap->am_bckptr == NULL)
198 		goto fail2;
199 
200 	amap->am_anon = malloc(totalslots * sizeof(struct vm_anon *),
201 	    M_UVMAMAP, waitf);
202 	if (amap->am_anon == NULL)
203 		goto fail3;
204 
205 	return(amap);
206 
207 fail3:
208 	free(amap->am_bckptr, M_UVMAMAP);
209 fail2:
210 	free(amap->am_slots, M_UVMAMAP);
211 fail1:
212 	pool_put(&uvm_amap_pool, amap);
213 	return (NULL);
214 }
215 
216 /*
217  * amap_alloc: allocate an amap to manage "sz" bytes of anonymous VM
218  *
219  * => caller should ensure sz is a multiple of PAGE_SIZE
220  * => reference count to new amap is set to one
221  * => new amap is returned unlocked
222  */
223 
224 struct vm_amap *
225 amap_alloc(sz, padsz, waitf)
226 	vaddr_t sz, padsz;
227 	int waitf;
228 {
229 	struct vm_amap *amap;
230 	int slots, padslots;
231 	UVMHIST_FUNC("amap_alloc"); UVMHIST_CALLED(maphist);
232 
233 	AMAP_B2SLOT(slots, sz);		/* load slots */
234 	AMAP_B2SLOT(padslots, padsz);
235 
236 	amap = amap_alloc1(slots, padslots, waitf);
237 	if (amap)
238 		memset(amap->am_anon, 0, (slots + padslots) * sizeof(struct vm_anon *));
239 
240 	UVMHIST_LOG(maphist,"<- done, amap = 0x%x, sz=%d", amap, sz, 0, 0);
241 	return(amap);
242 }
243 
244 
245 /*
246  * amap_free: free an amap
247  *
248  * => the amap must be locked (mainly for simplelock accounting)
249  * => the amap should have a zero reference count and be empty
250  */
251 void
252 amap_free(amap)
253 	struct vm_amap *amap;
254 {
255 	UVMHIST_FUNC("amap_free"); UVMHIST_CALLED(maphist);
256 
257 	KASSERT(amap->am_ref == 0 && amap->am_nused == 0);
258 	LOCK_ASSERT(simple_lock_held(&amap->am_l));
259 
260 	free(amap->am_slots, M_UVMAMAP);
261 	free(amap->am_bckptr, M_UVMAMAP);
262 	free(amap->am_anon, M_UVMAMAP);
263 #ifdef UVM_AMAP_PPREF
264 	if (amap->am_ppref && amap->am_ppref != PPREF_NONE)
265 		free(amap->am_ppref, M_UVMAMAP);
266 #endif
267 	amap_unlock(amap);	/* mainly for lock debugging */
268 	pool_put(&uvm_amap_pool, amap);
269 
270 	UVMHIST_LOG(maphist,"<- done, freed amap = 0x%x", amap, 0, 0, 0);
271 }
272 
273 /*
274  * amap_extend: extend the size of an amap (if needed)
275  *
276  * => called from uvm_map when we want to extend an amap to cover
277  *    a new mapping (rather than allocate a new one)
278  * => amap should be unlocked (we will lock it)
279  * => to safely extend an amap it should have a reference count of
280  *    one (thus it can't be shared)
281  * => XXXCDC: needs a waitflag or failure return value?
282  * => XXXCDC: support padding at this level?
283  */
284 void
285 amap_extend(entry, addsize)
286 	vm_map_entry_t entry;
287 	vsize_t addsize;
288 {
289 	struct vm_amap *amap = entry->aref.ar_amap;
290 	int slotoff = entry->aref.ar_pageoff;
291 	int slotmapped, slotadd, slotneed;
292 #ifdef UVM_AMAP_PPREF
293 	int *newppref, *oldppref;
294 #endif
295 	u_int *newsl, *newbck, *oldsl, *oldbck;
296 	struct vm_anon **newover, **oldover;
297 	int slotadded;
298 	UVMHIST_FUNC("amap_extend"); UVMHIST_CALLED(maphist);
299 
300 	UVMHIST_LOG(maphist, "  (entry=0x%x, addsize=0x%x)", entry,addsize,0,0);
301 
302 	/*
303 	 * first, determine how many slots we need in the amap.  don't
304 	 * forget that ar_pageoff could be non-zero: this means that
305 	 * there are some unused slots before us in the amap.
306 	 */
307 
308 	amap_lock(amap);					/* lock! */
309 
310 	AMAP_B2SLOT(slotmapped, entry->end - entry->start); /* slots mapped */
311 	AMAP_B2SLOT(slotadd, addsize);			/* slots to add */
312 	slotneed = slotoff + slotmapped + slotadd;
313 
314 	/*
315 	 * case 1: we already have enough slots in the map and thus
316 	 * only need to bump the reference counts on the slots we are
317 	 * adding.
318 	 */
319 
320 	if (amap->am_nslot >= slotneed) {
321 #ifdef UVM_AMAP_PPREF
322 		if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
323 			amap_pp_adjref(amap, slotoff + slotmapped, slotadd, 1);
324 		}
325 #endif
326 		amap_unlock(amap);
327 		UVMHIST_LOG(maphist,"<- done (case 1), amap = 0x%x, sltneed=%d",
328 		    amap, slotneed, 0, 0);
329 		return;				/* done! */
330 	}
331 
332 	/*
333 	 * case 2: we pre-allocated slots for use and we just need to
334 	 * bump nslot up to take account for these slots.
335 	 */
336 	if (amap->am_maxslot >= slotneed) {
337 #ifdef UVM_AMAP_PPREF
338 		if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
339 			if ((slotoff + slotmapped) < amap->am_nslot)
340 				amap_pp_adjref(amap, slotoff + slotmapped,
341 				    (amap->am_nslot - (slotoff + slotmapped)),
342 				    1);
343 			pp_setreflen(amap->am_ppref, amap->am_nslot, 1,
344 			   slotneed - amap->am_nslot);
345 		}
346 #endif
347 		amap->am_nslot = slotneed;
348 		amap_unlock(amap);
349 		/*
350 		 * no need to zero am_anon since that was done at
351 		 * alloc time and we never shrink an allocation.
352 		 */
353 		UVMHIST_LOG(maphist,"<- done (case 2), amap = 0x%x, slotneed=%d",
354 		    amap, slotneed, 0, 0);
355 		return;
356 	}
357 
358 	/*
359 	 * case 3: we need to malloc a new amap and copy all the amap
360 	 * data over from old amap to the new one.
361 	 *
362 	 * XXXCDC: could we take advantage of a kernel realloc()?
363 	 */
364 
365 	amap_unlock(amap);	/* unlock in case we sleep in malloc */
366 #ifdef UVM_AMAP_PPREF
367 	newppref = NULL;
368 	if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
369 		newppref = malloc(slotneed * sizeof(int), M_UVMAMAP, M_NOWAIT);
370 		if (newppref == NULL) {
371 			/* give up if malloc fails */
372 			free(amap->am_ppref, M_UVMAMAP);
373 			amap->am_ppref = PPREF_NONE;
374 		}
375 	}
376 #endif
377 	newsl = malloc(slotneed * sizeof(int), M_UVMAMAP, M_WAITOK);
378 	newbck = malloc(slotneed * sizeof(int), M_UVMAMAP, M_WAITOK);
379 	newover = malloc(slotneed * sizeof(struct vm_anon *),
380 	    M_UVMAMAP, M_WAITOK);
381 	amap_lock(amap);			/* re-lock! */
382 	KASSERT(amap->am_maxslot < slotneed);
383 
384 	/*
385 	 * now copy everything over to new malloc'd areas...
386 	 */
387 
388 	slotadded = slotneed - amap->am_nslot;
389 
390 	/* do am_slots */
391 	oldsl = amap->am_slots;
392 	memcpy(newsl, oldsl, sizeof(int) * amap->am_nused);
393 	amap->am_slots = newsl;
394 
395 	/* do am_anon */
396 	oldover = amap->am_anon;
397 	memcpy(newover, oldover, sizeof(struct vm_anon *) * amap->am_nslot);
398 	memset(newover + amap->am_nslot, 0, sizeof(struct vm_anon *) * slotadded);
399 	amap->am_anon = newover;
400 
401 	/* do am_bckptr */
402 	oldbck = amap->am_bckptr;
403 	memcpy(newbck, oldbck, sizeof(int) * amap->am_nslot);
404 	memset(newbck + amap->am_nslot, 0, sizeof(int) * slotadded); /* XXX: needed? */
405 	amap->am_bckptr = newbck;
406 
407 #ifdef UVM_AMAP_PPREF
408 	/* do ppref */
409 	oldppref = amap->am_ppref;
410 	if (newppref) {
411 		memcpy(newppref, oldppref, sizeof(int) * amap->am_nslot);
412 		memset(newppref + amap->am_nslot, 0, sizeof(int) * slotadded);
413 		amap->am_ppref = newppref;
414 		if ((slotoff + slotmapped) < amap->am_nslot)
415 			amap_pp_adjref(amap, slotoff + slotmapped,
416 			    (amap->am_nslot - (slotoff + slotmapped)), 1);
417 		pp_setreflen(newppref, amap->am_nslot, 1, slotadded);
418 	}
419 #endif
420 
421 	/* update master values */
422 	amap->am_nslot = slotneed;
423 	amap->am_maxslot = slotneed;
424 
425 	/* unlock */
426 	amap_unlock(amap);
427 
428 	/* and free */
429 	free(oldsl, M_UVMAMAP);
430 	free(oldbck, M_UVMAMAP);
431 	free(oldover, M_UVMAMAP);
432 #ifdef UVM_AMAP_PPREF
433 	if (oldppref && oldppref != PPREF_NONE)
434 		free(oldppref, M_UVMAMAP);
435 #endif
436 	UVMHIST_LOG(maphist,"<- done (case 3), amap = 0x%x, slotneed=%d",
437 	    amap, slotneed, 0, 0);
438 }
439 
440 /*
441  * amap_share_protect: change protection of anons in a shared amap
442  *
443  * for shared amaps, given the current data structure layout, it is
444  * not possible for us to directly locate all maps referencing the
445  * shared anon (to change the protection).  in order to protect data
446  * in shared maps we use pmap_page_protect().  [this is useful for IPC
447  * mechanisms like map entry passing that may want to write-protect
448  * all mappings of a shared amap.]  we traverse am_anon or am_slots
449  * depending on the current state of the amap.
450  *
451  * => entry's map and amap must be locked by the caller
452  */
453 void
454 amap_share_protect(entry, prot)
455 	vm_map_entry_t entry;
456 	vm_prot_t prot;
457 {
458 	struct vm_amap *amap = entry->aref.ar_amap;
459 	int slots, lcv, slot, stop;
460 
461 	LOCK_ASSERT(simple_lock_held(&amap->am_l));
462 
463 	AMAP_B2SLOT(slots, (entry->end - entry->start));
464 	stop = entry->aref.ar_pageoff + slots;
465 
466 	if (slots < amap->am_nused) {
467 		/* cheaper to traverse am_anon */
468 		for (lcv = entry->aref.ar_pageoff ; lcv < stop ; lcv++) {
469 			if (amap->am_anon[lcv] == NULL)
470 				continue;
471 			if (amap->am_anon[lcv]->u.an_page != NULL)
472 				pmap_page_protect(amap->am_anon[lcv]->u.an_page,
473 						  prot);
474 		}
475 		return;
476 	}
477 
478 	/* cheaper to traverse am_slots */
479 	for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
480 		slot = amap->am_slots[lcv];
481 		if (slot < entry->aref.ar_pageoff || slot >= stop)
482 			continue;
483 		if (amap->am_anon[slot]->u.an_page != NULL)
484 			pmap_page_protect(amap->am_anon[slot]->u.an_page, prot);
485 	}
486 	return;
487 }
488 
489 /*
490  * amap_wipeout: wipeout all anon's in an amap; then free the amap!
491  *
492  * => called from amap_unref when the final reference to an amap is
493  *	discarded (i.e. when reference count == 1)
494  * => the amap should be locked (by the caller)
495  */
496 
497 void
498 amap_wipeout(amap)
499 	struct vm_amap *amap;
500 {
501 	int lcv, slot;
502 	struct vm_anon *anon;
503 	UVMHIST_FUNC("amap_wipeout"); UVMHIST_CALLED(maphist);
504 	UVMHIST_LOG(maphist,"(amap=0x%x)", amap, 0,0,0);
505 
506 	LOCK_ASSERT(simple_lock_held(&amap->am_l));
507 
508 	for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
509 		int refs;
510 
511 		slot = amap->am_slots[lcv];
512 		anon = amap->am_anon[slot];
513 
514 		if (anon == NULL || anon->an_ref == 0)
515 			panic("amap_wipeout: corrupt amap");
516 
517 		simple_lock(&anon->an_lock); /* lock anon */
518 
519 		UVMHIST_LOG(maphist,"  processing anon 0x%x, ref=%d", anon,
520 		    anon->an_ref, 0, 0);
521 
522 		refs = --anon->an_ref;
523 		simple_unlock(&anon->an_lock);
524 		if (refs == 0) {
525 			/*
526 			 * we had the last reference to a vm_anon. free it.
527 			 */
528 			uvm_anfree(anon);
529 		}
530 	}
531 
532 	/*
533 	 * now we free the map
534 	 */
535 
536 	amap->am_ref = 0;	/* ... was one */
537 	amap->am_nused = 0;
538 	amap_free(amap);	/* will unlock and free amap */
539 	UVMHIST_LOG(maphist,"<- done!", 0,0,0,0);
540 }
541 
542 /*
543  * amap_copy: ensure that a map entry's "needs_copy" flag is false
544  *	by copying the amap if necessary.
545  *
546  * => an entry with a null amap pointer will get a new (blank) one.
547  * => the map that the map entry belongs to must be locked by caller.
548  * => the amap currently attached to "entry" (if any) must be unlocked.
549  * => if canchunk is true, then we may clip the entry into a chunk
550  * => "startva" and "endva" are used only if canchunk is true.  they are
551  *     used to limit chunking (e.g. if you have a large space that you
552  *     know you are going to need to allocate amaps for, there is no point
553  *     in allowing that to be chunked)
554  */
555 
556 void
557 amap_copy(map, entry, waitf, canchunk, startva, endva)
558 	vm_map_t map;
559 	vm_map_entry_t entry;
560 	int waitf;
561 	boolean_t canchunk;
562 	vaddr_t startva, endva;
563 {
564 	struct vm_amap *amap, *srcamap;
565 	int slots, lcv;
566 	vaddr_t chunksize;
567 	UVMHIST_FUNC("amap_copy"); UVMHIST_CALLED(maphist);
568 	UVMHIST_LOG(maphist, "  (map=%p, entry=%p, waitf=%d)",
569 		    map, entry, waitf, 0);
570 
571 	/*
572 	 * is there a map to copy?   if not, create one from scratch.
573 	 */
574 
575 	if (entry->aref.ar_amap == NULL) {
576 
577 		/*
578 		 * check to see if we have a large amap that we can
579 		 * chunk.  we align startva/endva to chunk-sized
580 		 * boundaries and then clip to them.
581 		 */
582 
583 		if (canchunk && atop(entry->end - entry->start) >=
584 		    UVM_AMAP_LARGE) {
585 			/* convert slots to bytes */
586 			chunksize = UVM_AMAP_CHUNK << PAGE_SHIFT;
587 			startva = (startva / chunksize) * chunksize;
588 			endva = roundup(endva, chunksize);
589 			UVMHIST_LOG(maphist, "  chunk amap ==> clip 0x%x->0x%x"
590 			    "to 0x%x->0x%x", entry->start, entry->end, startva,
591 			    endva);
592 			UVM_MAP_CLIP_START(map, entry, startva);
593 			/* watch out for endva wrap-around! */
594 			if (endva >= startva)
595 				UVM_MAP_CLIP_END(map, entry, endva);
596 		}
597 
598 		UVMHIST_LOG(maphist, "<- done [creating new amap 0x%x->0x%x]",
599 		entry->start, entry->end, 0, 0);
600 		entry->aref.ar_pageoff = 0;
601 		entry->aref.ar_amap = amap_alloc(entry->end - entry->start, 0,
602 		    waitf);
603 		if (entry->aref.ar_amap != NULL)
604 			entry->etype &= ~UVM_ET_NEEDSCOPY;
605 		return;
606 	}
607 
608 	/*
609 	 * first check and see if we are the only map entry
610 	 * referencing the amap we currently have.  if so, then we can
611 	 * just take it over rather than copying it.  note that we are
612 	 * reading am_ref with the amap unlocked... the value can only
613 	 * be one if we have the only reference to the amap (via our
614 	 * locked map).  if we are greater than one we fall through to
615 	 * the next case (where we double check the value).
616 	 */
617 
618 	if (entry->aref.ar_amap->am_ref == 1) {
619 		entry->etype &= ~UVM_ET_NEEDSCOPY;
620 		UVMHIST_LOG(maphist, "<- done [ref cnt = 1, took it over]",
621 		    0, 0, 0, 0);
622 		return;
623 	}
624 
625 	/*
626 	 * looks like we need to copy the map.
627 	 */
628 
629 	UVMHIST_LOG(maphist,"  amap=%p, ref=%d, must copy it",
630 	    entry->aref.ar_amap, entry->aref.ar_amap->am_ref, 0, 0);
631 	AMAP_B2SLOT(slots, entry->end - entry->start);
632 	amap = amap_alloc1(slots, 0, waitf);
633 	if (amap == NULL) {
634 		UVMHIST_LOG(maphist, "  amap_alloc1 failed", 0,0,0,0);
635 		return;
636 	}
637 	srcamap = entry->aref.ar_amap;
638 	amap_lock(srcamap);
639 
640 	/*
641 	 * need to double check reference count now that we've got the
642 	 * src amap locked down.  the reference count could have
643 	 * changed while we were in malloc.  if the reference count
644 	 * dropped down to one we take over the old map rather than
645 	 * copying the amap.
646 	 */
647 
648 	if (srcamap->am_ref == 1) {		/* take it over? */
649 		entry->etype &= ~UVM_ET_NEEDSCOPY;
650 		amap->am_ref--;		/* drop final reference to map */
651 		amap_free(amap);	/* dispose of new (unused) amap */
652 		amap_unlock(srcamap);
653 		return;
654 	}
655 
656 	/*
657 	 * we must copy it now.
658 	 */
659 
660 	UVMHIST_LOG(maphist, "  copying amap now",0, 0, 0, 0);
661 	for (lcv = 0 ; lcv < slots; lcv++) {
662 		amap->am_anon[lcv] =
663 		    srcamap->am_anon[entry->aref.ar_pageoff + lcv];
664 		if (amap->am_anon[lcv] == NULL)
665 			continue;
666 		simple_lock(&amap->am_anon[lcv]->an_lock);
667 		amap->am_anon[lcv]->an_ref++;
668 		simple_unlock(&amap->am_anon[lcv]->an_lock);
669 		amap->am_bckptr[lcv] = amap->am_nused;
670 		amap->am_slots[amap->am_nused] = lcv;
671 		amap->am_nused++;
672 	}
673 
674 	/*
675 	 * drop our reference to the old amap (srcamap) and unlock.
676 	 * we know that the reference count on srcamap is greater than
677 	 * one (we checked above), so there is no way we could drop
678 	 * the count to zero.  [and no need to worry about freeing it]
679 	 */
680 
681 	srcamap->am_ref--;
682 	if (srcamap->am_ref == 1 && (srcamap->am_flags & AMAP_SHARED) != 0)
683 		srcamap->am_flags &= ~AMAP_SHARED;   /* clear shared flag */
684 #ifdef UVM_AMAP_PPREF
685 	if (srcamap->am_ppref && srcamap->am_ppref != PPREF_NONE) {
686 		amap_pp_adjref(srcamap, entry->aref.ar_pageoff,
687 		    (entry->end - entry->start) >> PAGE_SHIFT, -1);
688 	}
689 #endif
690 
691 	amap_unlock(srcamap);
692 
693 	/*
694 	 * install new amap.
695 	 */
696 
697 	entry->aref.ar_pageoff = 0;
698 	entry->aref.ar_amap = amap;
699 	entry->etype &= ~UVM_ET_NEEDSCOPY;
700 
701 	/*
702 	 * done!
703 	 */
704 	UVMHIST_LOG(maphist, "<- done",0, 0, 0, 0);
705 }
706 
707 /*
708  * amap_cow_now: resolve all copy-on-write faults in an amap now for fork(2)
709  *
710  *	called during fork(2) when the parent process has a wired map
711  *	entry.   in that case we want to avoid write-protecting pages
712  *	in the parent's map (e.g. like what you'd do for a COW page)
713  *	so we resolve the COW here.
714  *
715  * => assume parent's entry was wired, thus all pages are resident.
716  * => assume pages that are loaned out (loan_count) are already mapped
717  *	read-only in all maps, and thus no need for us to worry about them
718  * => assume both parent and child vm_map's are locked
719  * => caller passes child's map/entry in to us
720  * => if we run out of memory we will unlock the amap and sleep _with_ the
721  *	parent and child vm_map's locked(!).    we have to do this since
722  *	we are in the middle of a fork(2) and we can't let the parent
723  *	map change until we are done copying all the map entrys.
724  * => XXXCDC: out of memory should cause fork to fail, but there is
725  *	currently no easy way to do this (needs fix)
726  * => page queues must be unlocked (we may lock them)
727  */
728 
729 void
730 amap_cow_now(map, entry)
731 	struct vm_map *map;
732 	struct vm_map_entry *entry;
733 {
734 	struct vm_amap *amap = entry->aref.ar_amap;
735 	int lcv, slot;
736 	struct vm_anon *anon, *nanon;
737 	struct vm_page *pg, *npg;
738 
739 	/*
740 	 * note that if we unlock the amap then we must ReStart the "lcv" for
741 	 * loop because some other process could reorder the anon's in the
742 	 * am_anon[] array on us while the lock is dropped.
743 	 */
744 ReStart:
745 	amap_lock(amap);
746 
747 	for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
748 
749 		/*
750 		 * get the page
751 		 */
752 
753 		slot = amap->am_slots[lcv];
754 		anon = amap->am_anon[slot];
755 		simple_lock(&anon->an_lock);
756 		pg = anon->u.an_page;
757 
758 		/*
759 		 * page must be resident since parent is wired
760 		 */
761 
762 		if (pg == NULL)
763 		    panic("amap_cow_now: non-resident wired page in anon %p",
764 			anon);
765 
766 		/*
767 		 * if the anon ref count is one and the page is not loaned,
768 		 * then we are safe (the child has exclusive access to the
769 		 * page).  if the page is loaned, then it must already be
770 		 * mapped read-only.
771 		 *
772 		 * we only need to get involved when these are not true.
773 		 * [note: if loan_count == 0, then the anon must own the page]
774 		 */
775 
776 		if (anon->an_ref > 1 && pg->loan_count == 0) {
777 
778 			/*
779 			 * if the page is busy then we have to unlock, wait for
780 			 * it and then restart.
781 			 */
782 			if (pg->flags & PG_BUSY) {
783 				pg->flags |= PG_WANTED;
784 				amap_unlock(amap);
785 				UVM_UNLOCK_AND_WAIT(pg, &anon->an_lock, FALSE,
786 				    "cownow", 0);
787 				goto ReStart;
788 			}
789 
790 			/*
791 			 * ok, time to do a copy-on-write to a new anon
792 			 */
793 			nanon = uvm_analloc();
794 			if (nanon) {
795 				/* nanon is locked! */
796 				npg = uvm_pagealloc(NULL, 0, nanon, 0);
797 			} else
798 				npg = NULL;	/* XXX: quiet gcc warning */
799 
800 			if (nanon == NULL || npg == NULL) {
801 				/* out of memory */
802 				/*
803 				 * XXXCDC: we should cause fork to fail, but
804 				 * we can't ...
805 				 */
806 				if (nanon) {
807 					nanon->an_ref--;
808 					simple_unlock(&nanon->an_lock);
809 					uvm_anfree(nanon);
810 				}
811 				simple_unlock(&anon->an_lock);
812 				amap_unlock(amap);
813 				uvm_wait("cownowpage");
814 				goto ReStart;
815 			}
816 
817 			/*
818 			 * got it... now we can copy the data and replace anon
819 			 * with our new one...
820 			 */
821 			uvm_pagecopy(pg, npg);		/* old -> new */
822 			anon->an_ref--;			/* can't drop to zero */
823 			amap->am_anon[slot] = nanon;	/* replace */
824 
825 			/*
826 			 * drop PG_BUSY on new page ... since we have had it's
827 			 * owner locked the whole time it can't be
828 			 * PG_RELEASED | PG_WANTED.
829 			 */
830 			npg->flags &= ~(PG_BUSY|PG_FAKE);
831 			UVM_PAGE_OWN(npg, NULL);
832 			uvm_lock_pageq();
833 			uvm_pageactivate(npg);
834 			uvm_unlock_pageq();
835 			simple_unlock(&nanon->an_lock);
836 		}
837 
838 		simple_unlock(&anon->an_lock);
839 		/*
840 		 * done with this anon, next ...!
841 		 */
842 
843 	}	/* end of 'for' loop */
844 
845 	amap_unlock(amap);
846 }
847 
848 /*
849  * amap_splitref: split a single reference into two separate references
850  *
851  * => called from uvm_map's clip routines
852  * => origref's map should be locked
853  * => origref->ar_amap should be unlocked (we will lock)
854  */
855 void
856 amap_splitref(origref, splitref, offset)
857 	struct vm_aref *origref, *splitref;
858 	vaddr_t offset;
859 {
860 	int leftslots;
861 
862 	AMAP_B2SLOT(leftslots, offset);
863 	if (leftslots == 0)
864 		panic("amap_splitref: split at zero offset");
865 
866 	/*
867 	 * lock the amap
868 	 */
869 	amap_lock(origref->ar_amap);
870 
871 	/*
872 	 * now: amap is locked and we have a valid am_mapped array.
873 	 */
874 
875 	if (origref->ar_amap->am_nslot - origref->ar_pageoff - leftslots <= 0)
876 		panic("amap_splitref: map size check failed");
877 
878 #ifdef UVM_AMAP_PPREF
879         /*
880 	 * establish ppref before we add a duplicate reference to the amap
881 	 */
882 	if (origref->ar_amap->am_ppref == NULL)
883 		amap_pp_establish(origref->ar_amap);
884 #endif
885 
886 	splitref->ar_amap = origref->ar_amap;
887 	splitref->ar_amap->am_ref++;		/* not a share reference */
888 	splitref->ar_pageoff = origref->ar_pageoff + leftslots;
889 
890 	amap_unlock(origref->ar_amap);
891 }
892 
893 #ifdef UVM_AMAP_PPREF
894 
895 /*
896  * amap_pp_establish: add a ppref array to an amap, if possible
897  *
898  * => amap locked by caller
899  */
900 void
901 amap_pp_establish(amap)
902 	struct vm_amap *amap;
903 {
904 
905 	amap->am_ppref = malloc(sizeof(int) * amap->am_maxslot,
906 	    M_UVMAMAP, M_NOWAIT);
907 
908 	/*
909 	 * if we fail then we just won't use ppref for this amap
910 	 */
911 	if (amap->am_ppref == NULL) {
912 		amap->am_ppref = PPREF_NONE;	/* not using it */
913 		return;
914 	}
915 
916 	/*
917 	 * init ppref
918 	 */
919 	memset(amap->am_ppref, 0, sizeof(int) * amap->am_maxslot);
920 	pp_setreflen(amap->am_ppref, 0, amap->am_ref, amap->am_nslot);
921 	return;
922 }
923 
924 /*
925  * amap_pp_adjref: adjust reference count to a part of an amap using the
926  * per-page reference count array.
927  *
928  * => map and amap locked by caller
929  * => caller must check that ppref != PPREF_NONE before calling
930  */
931 void
932 amap_pp_adjref(amap, curslot, slotlen, adjval)
933 	struct vm_amap *amap;
934 	int curslot;
935 	vsize_t slotlen;
936 	int adjval;
937 {
938 	int stopslot, *ppref, lcv;
939 	int ref, len;
940 
941 	/*
942 	 * get init values
943 	 */
944 
945 	stopslot = curslot + slotlen;
946 	ppref = amap->am_ppref;
947 
948 	/*
949 	 * first advance to the correct place in the ppref array, fragment
950 	 * if needed.
951 	 */
952 
953 	for (lcv = 0 ; lcv < curslot ; lcv += len) {
954 		pp_getreflen(ppref, lcv, &ref, &len);
955 		if (lcv + len > curslot) {     /* goes past start? */
956 			pp_setreflen(ppref, lcv, ref, curslot - lcv);
957 			pp_setreflen(ppref, curslot, ref, len - (curslot -lcv));
958 			len = curslot - lcv;   /* new length of entry @ lcv */
959 		}
960 	}
961 
962 	/*
963 	 * now adjust reference counts in range (make sure we dont overshoot)
964 	 */
965 
966 	if (lcv != curslot)
967 		panic("amap_pp_adjref: overshot target");
968 
969 	for (/* lcv already set */; lcv < stopslot ; lcv += len) {
970 		pp_getreflen(ppref, lcv, &ref, &len);
971 		if (lcv + len > stopslot) {     /* goes past end? */
972 			pp_setreflen(ppref, lcv, ref, stopslot - lcv);
973 			pp_setreflen(ppref, stopslot, ref,
974 			    len - (stopslot - lcv));
975 			len = stopslot - lcv;
976 		}
977 		ref = ref + adjval;    /* ADJUST! */
978 		if (ref < 0)
979 			panic("amap_pp_adjref: negative reference count");
980 		pp_setreflen(ppref, lcv, ref, len);
981 		if (ref == 0)
982 			amap_wiperange(amap, lcv, len);
983 	}
984 
985 }
986 
987 /*
988  * amap_wiperange: wipe out a range of an amap
989  * [different from amap_wipeout because the amap is kept intact]
990  *
991  * => both map and amap must be locked by caller.
992  */
993 void
994 amap_wiperange(amap, slotoff, slots)
995 	struct vm_amap *amap;
996 	int slotoff, slots;
997 {
998 	int byanon, lcv, stop, curslot, ptr;
999 	struct vm_anon *anon;
1000 
1001 	/*
1002 	 * we can either traverse the amap by am_anon or by am_slots depending
1003 	 * on which is cheaper.    decide now.
1004 	 */
1005 
1006 	if (slots < amap->am_nused) {
1007 		byanon = TRUE;
1008 		lcv = slotoff;
1009 		stop = slotoff + slots;
1010 	} else {
1011 		byanon = FALSE;
1012 		lcv = 0;
1013 		stop = amap->am_nused;
1014 	}
1015 
1016 	/*
1017 	 * ok, now do it!
1018 	 */
1019 
1020 	for (; lcv < stop; lcv++) {
1021 		int refs;
1022 
1023 		/*
1024 		 * verify the anon is ok.
1025 		 */
1026 		if (byanon) {
1027 			if (amap->am_anon[lcv] == NULL)
1028 				continue;
1029 			curslot = lcv;
1030 		} else {
1031 			curslot = amap->am_slots[lcv];
1032 			if (curslot < slotoff || curslot >= stop)
1033 				continue;
1034 		}
1035 		anon = amap->am_anon[curslot];
1036 
1037 		/*
1038 		 * remove it from the amap
1039 		 */
1040 		amap->am_anon[curslot] = NULL;
1041 		ptr = amap->am_bckptr[curslot];
1042 		if (ptr != (amap->am_nused - 1)) {
1043 			amap->am_slots[ptr] =
1044 			    amap->am_slots[amap->am_nused - 1];
1045 			amap->am_bckptr[amap->am_slots[ptr]] =
1046 			    ptr;    /* back ptr. */
1047 		}
1048 		amap->am_nused--;
1049 
1050 		/*
1051 		 * drop anon reference count
1052 		 */
1053 		simple_lock(&anon->an_lock);
1054 		refs = --anon->an_ref;
1055 		simple_unlock(&anon->an_lock);
1056 		if (refs == 0) {
1057 			/*
1058 			 * we just eliminated the last reference to an anon.
1059 			 * free it.
1060 			 */
1061 			uvm_anfree(anon);
1062 		}
1063 	}
1064 }
1065 
1066 #endif
1067