xref: /netbsd/sys/uvm/uvm_amap.c (revision c4a72b64)
1 /*	$NetBSD: uvm_amap.c,v 1.48 2002/11/30 18:28:04 bouyer Exp $	*/
2 
3 /*
4  *
5  * Copyright (c) 1997 Charles D. Cranor and Washington University.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *      This product includes software developed by Charles D. Cranor and
19  *      Washington University.
20  * 4. The name of the author may not be used to endorse or promote products
21  *    derived from this software without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
24  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
25  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
26  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
27  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
28  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
32  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  */
34 
35 /*
36  * uvm_amap.c: amap operations
37  */
38 
39 /*
40  * this file contains functions that perform operations on amaps.  see
41  * uvm_amap.h for a brief explanation of the role of amaps in uvm.
42  */
43 
44 #include <sys/cdefs.h>
45 __KERNEL_RCSID(0, "$NetBSD: uvm_amap.c,v 1.48 2002/11/30 18:28:04 bouyer Exp $");
46 
47 #undef UVM_AMAP_INLINE		/* enable/disable amap inlines */
48 
49 #include "opt_uvmhist.h"
50 
51 #include <sys/param.h>
52 #include <sys/systm.h>
53 #include <sys/proc.h>
54 #include <sys/malloc.h>
55 #include <sys/kernel.h>
56 #include <sys/pool.h>
57 
58 #define UVM_AMAP_C		/* ensure disabled inlines are in */
59 #include <uvm/uvm.h>
60 #include <uvm/uvm_swap.h>
61 
62 /*
63  * pool for allocation of vm_map structures.  note that the pool has
64  * its own simplelock for its protection.  also note that in order to
65  * avoid an endless loop, the amap pool's allocator cannot allocate
66  * memory from an amap (it currently goes through the kernel uobj, so
67  * we are ok).
68  */
69 
70 struct pool uvm_amap_pool;
71 
72 /*
73  * local functions
74  */
75 
76 static struct vm_amap *amap_alloc1 __P((int, int, int));
77 
78 #ifdef UVM_AMAP_PPREF
79 /*
80  * what is ppref?   ppref is an _optional_ amap feature which is used
81  * to keep track of reference counts on a per-page basis.  it is enabled
82  * when UVM_AMAP_PPREF is defined.
83  *
84  * when enabled, an array of ints is allocated for the pprefs.  this
85  * array is allocated only when a partial reference is added to the
86  * map (either by unmapping part of the amap, or gaining a reference
87  * to only a part of an amap).  if the malloc of the array fails
88  * (M_NOWAIT), then we set the array pointer to PPREF_NONE to indicate
89  * that we tried to do ppref's but couldn't alloc the array so just
90  * give up (after all, this is an optional feature!).
91  *
92  * the array is divided into page sized "chunks."   for chunks of length 1,
93  * the chunk reference count plus one is stored in that chunk's slot.
94  * for chunks of length > 1 the first slot contains (the reference count
95  * plus one) * -1.    [the negative value indicates that the length is
96  * greater than one.]   the second slot of the chunk contains the length
97  * of the chunk.   here is an example:
98  *
99  * actual REFS:  2  2  2  2  3  1  1  0  0  0  4  4  0  1  1  1
100  *       ppref: -3  4  x  x  4 -2  2 -1  3  x -5  2  1 -2  3  x
101  *              <----------><-><----><-------><----><-><------->
102  * (x = don't care)
103  *
104  * this allows us to allow one int to contain the ref count for the whole
105  * chunk.    note that the "plus one" part is needed because a reference
106  * count of zero is neither positive or negative (need a way to tell
107  * if we've got one zero or a bunch of them).
108  *
109  * here are some in-line functions to help us.
110  */
111 
112 static __inline void pp_getreflen __P((int *, int, int *, int *));
113 static __inline void pp_setreflen __P((int *, int, int, int));
114 
115 /*
116  * pp_getreflen: get the reference and length for a specific offset
117  *
118  * => ppref's amap must be locked
119  */
120 static __inline void
121 pp_getreflen(ppref, offset, refp, lenp)
122 	int *ppref, offset, *refp, *lenp;
123 {
124 
125 	if (ppref[offset] > 0) {		/* chunk size must be 1 */
126 		*refp = ppref[offset] - 1;	/* don't forget to adjust */
127 		*lenp = 1;
128 	} else {
129 		*refp = (ppref[offset] * -1) - 1;
130 		*lenp = ppref[offset+1];
131 	}
132 }
133 
134 /*
135  * pp_setreflen: set the reference and length for a specific offset
136  *
137  * => ppref's amap must be locked
138  */
139 static __inline void
140 pp_setreflen(ppref, offset, ref, len)
141 	int *ppref, offset, ref, len;
142 {
143 	if (len == 1) {
144 		ppref[offset] = ref + 1;
145 	} else {
146 		ppref[offset] = (ref + 1) * -1;
147 		ppref[offset+1] = len;
148 	}
149 }
150 #endif
151 
152 /*
153  * amap_init: called at boot time to init global amap data structures
154  */
155 
156 void
157 amap_init(void)
158 {
159 
160 	/*
161 	 * Initialize the vm_amap pool.
162 	 */
163 
164 	pool_init(&uvm_amap_pool, sizeof(struct vm_amap), 0, 0, 0,
165 	    "amappl", &pool_allocator_nointr);
166 }
167 
168 /*
169  * amap_alloc1: internal function that allocates an amap, but does not
170  *	init the overlay.
171  *
172  * => lock on returned amap is init'd
173  */
174 static inline struct vm_amap *
175 amap_alloc1(slots, padslots, waitf)
176 	int slots, padslots, waitf;
177 {
178 	struct vm_amap *amap;
179 	int totalslots;
180 
181 	amap = pool_get(&uvm_amap_pool, (waitf == M_WAITOK) ? PR_WAITOK : 0);
182 	if (amap == NULL)
183 		return(NULL);
184 
185 	totalslots = malloc_roundup((slots + padslots) * sizeof(int)) /
186 	    sizeof(int);
187 	simple_lock_init(&amap->am_l);
188 	amap->am_ref = 1;
189 	amap->am_flags = 0;
190 #ifdef UVM_AMAP_PPREF
191 	amap->am_ppref = NULL;
192 #endif
193 	amap->am_maxslot = totalslots;
194 	amap->am_nslot = slots;
195 	amap->am_nused = 0;
196 
197 	amap->am_slots = malloc(totalslots * sizeof(int), M_UVMAMAP,
198 	    waitf);
199 	if (amap->am_slots == NULL)
200 		goto fail1;
201 
202 	amap->am_bckptr = malloc(totalslots * sizeof(int), M_UVMAMAP, waitf);
203 	if (amap->am_bckptr == NULL)
204 		goto fail2;
205 
206 	amap->am_anon = malloc(totalslots * sizeof(struct vm_anon *),
207 	    M_UVMAMAP, waitf);
208 	if (amap->am_anon == NULL)
209 		goto fail3;
210 
211 	return(amap);
212 
213 fail3:
214 	free(amap->am_bckptr, M_UVMAMAP);
215 fail2:
216 	free(amap->am_slots, M_UVMAMAP);
217 fail1:
218 	pool_put(&uvm_amap_pool, amap);
219 	return (NULL);
220 }
221 
222 /*
223  * amap_alloc: allocate an amap to manage "sz" bytes of anonymous VM
224  *
225  * => caller should ensure sz is a multiple of PAGE_SIZE
226  * => reference count to new amap is set to one
227  * => new amap is returned unlocked
228  */
229 
230 struct vm_amap *
231 amap_alloc(sz, padsz, waitf)
232 	vaddr_t sz, padsz;
233 	int waitf;
234 {
235 	struct vm_amap *amap;
236 	int slots, padslots;
237 	UVMHIST_FUNC("amap_alloc"); UVMHIST_CALLED(maphist);
238 
239 	AMAP_B2SLOT(slots, sz);
240 	AMAP_B2SLOT(padslots, padsz);
241 
242 	amap = amap_alloc1(slots, padslots, waitf);
243 	if (amap)
244 		memset(amap->am_anon, 0,
245 		    amap->am_maxslot * sizeof(struct vm_anon *));
246 
247 	UVMHIST_LOG(maphist,"<- done, amap = 0x%x, sz=%d", amap, sz, 0, 0);
248 	return(amap);
249 }
250 
251 
252 /*
253  * amap_free: free an amap
254  *
255  * => the amap must be unlocked
256  * => the amap should have a zero reference count and be empty
257  */
258 void
259 amap_free(amap)
260 	struct vm_amap *amap;
261 {
262 	UVMHIST_FUNC("amap_free"); UVMHIST_CALLED(maphist);
263 
264 	KASSERT(amap->am_ref == 0 && amap->am_nused == 0);
265 	LOCK_ASSERT(!simple_lock_held(&amap->am_l));
266 	free(amap->am_slots, M_UVMAMAP);
267 	free(amap->am_bckptr, M_UVMAMAP);
268 	free(amap->am_anon, M_UVMAMAP);
269 #ifdef UVM_AMAP_PPREF
270 	if (amap->am_ppref && amap->am_ppref != PPREF_NONE)
271 		free(amap->am_ppref, M_UVMAMAP);
272 #endif
273 	pool_put(&uvm_amap_pool, amap);
274 	UVMHIST_LOG(maphist,"<- done, freed amap = 0x%x", amap, 0, 0, 0);
275 }
276 
277 /*
278  * amap_extend: extend the size of an amap (if needed)
279  *
280  * => called from uvm_map when we want to extend an amap to cover
281  *    a new mapping (rather than allocate a new one)
282  * => amap should be unlocked (we will lock it)
283  * => to safely extend an amap it should have a reference count of
284  *    one (thus it can't be shared)
285  */
286 int
287 amap_extend(entry, addsize, flags)
288 	struct vm_map_entry *entry;
289 	vsize_t addsize;
290 	int flags;
291 {
292 	struct vm_amap *amap = entry->aref.ar_amap;
293 	int slotoff = entry->aref.ar_pageoff;
294 	int slotmapped, slotadd, slotneed, slotadded, slotalloc;
295 	int slotadj, slotspace;
296 #ifdef UVM_AMAP_PPREF
297 	int *newppref, *oldppref;
298 #endif
299 	int i, *newsl, *newbck, *oldsl, *oldbck;
300 	struct vm_anon **newover, **oldover;
301 	int mflag = (flags & AMAP_EXTEND_NOWAIT) ? M_NOWAIT :
302 		        (M_WAITOK | M_CANFAIL);
303 
304 	UVMHIST_FUNC("amap_extend"); UVMHIST_CALLED(maphist);
305 
306 	UVMHIST_LOG(maphist, "  (entry=0x%x, addsize=0x%x, flags=0x%x)",
307 	    entry, addsize, flags, 0);
308 
309 	/*
310 	 * first, determine how many slots we need in the amap.  don't
311 	 * forget that ar_pageoff could be non-zero: this means that
312 	 * there are some unused slots before us in the amap.
313 	 */
314 
315 	amap_lock(amap);
316 	AMAP_B2SLOT(slotmapped, entry->end - entry->start); /* slots mapped */
317 	AMAP_B2SLOT(slotadd, addsize);			/* slots to add */
318 	if (flags & AMAP_EXTEND_FORWARDS) {
319 		slotneed = slotoff + slotmapped + slotadd;
320 		slotadj = 0;
321 		slotspace = 0;
322 	}
323 	else {
324 		slotneed = slotadd + slotmapped;
325 		slotadj = slotadd - slotoff;
326 		slotspace = amap->am_maxslot - slotmapped;
327 	}
328 
329 	/*
330 	 * case 1: we already have enough slots in the map and thus
331 	 * only need to bump the reference counts on the slots we are
332 	 * adding.
333 	 */
334 
335 	if (flags & AMAP_EXTEND_FORWARDS) {
336 		if (amap->am_nslot >= slotneed) {
337 #ifdef UVM_AMAP_PPREF
338 			if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
339 				amap_pp_adjref(amap, slotoff + slotmapped,
340 				    slotadd, 1);
341 			}
342 #endif
343 			amap_unlock(amap);
344 			UVMHIST_LOG(maphist,
345 			    "<- done (case 1f), amap = 0x%x, sltneed=%d",
346 			    amap, slotneed, 0, 0);
347 			return 0;
348 		}
349 	} else {
350 		if (slotadj <= 0) {
351 			slotoff -= slotadd;
352 			entry->aref.ar_pageoff = slotoff;
353 #ifdef UVM_AMAP_PPREF
354 			if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
355 				amap_pp_adjref(amap, slotoff, slotadd, 1);
356 			}
357 #endif
358 			amap_unlock(amap);
359 			UVMHIST_LOG(maphist,
360 			    "<- done (case 1b), amap = 0x%x, sltneed=%d",
361 			    amap, slotneed, 0, 0);
362 			return 0;
363 		}
364 	}
365 
366 	/*
367 	 * case 2: we pre-allocated slots for use and we just need to
368 	 * bump nslot up to take account for these slots.
369 	 */
370 
371 	if (amap->am_maxslot >= slotneed) {
372 		if (flags & AMAP_EXTEND_FORWARDS) {
373 #ifdef UVM_AMAP_PPREF
374 			if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
375 				if ((slotoff + slotmapped) < amap->am_nslot)
376 					amap_pp_adjref(amap,
377 					    slotoff + slotmapped,
378 					    (amap->am_nslot -
379 					    (slotoff + slotmapped)), 1);
380 				pp_setreflen(amap->am_ppref, amap->am_nslot, 1,
381 				    slotneed - amap->am_nslot);
382 			}
383 #endif
384 			amap->am_nslot = slotneed;
385 			amap_unlock(amap);
386 
387 			/*
388 			 * no need to zero am_anon since that was done at
389 			 * alloc time and we never shrink an allocation.
390 			 */
391 
392 			UVMHIST_LOG(maphist,"<- done (case 2f), amap = 0x%x, "
393 			    "slotneed=%d", amap, slotneed, 0, 0);
394 			return 0;
395 		} else {
396 #ifdef UVM_AMAP_PPREF
397 			if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
398 				/*
399 				 * Slide up the ref counts on the pages that
400 				 * are actually in use.
401 				 */
402 				memmove(amap->am_ppref + slotspace,
403 				    amap->am_ppref + slotoff,
404 				    slotmapped * sizeof(int));
405 				/*
406 				 * Mark the (adjusted) gap at the front as
407 				 * referenced/not referenced.
408 				 */
409 				pp_setreflen(amap->am_ppref,
410 				    0, 0, slotspace - slotadd);
411 				pp_setreflen(amap->am_ppref,
412 				    slotspace - slotadd, 1, slotadd);
413 			}
414 #endif
415 
416 			/*
417 			 * Slide the anon pointers up and clear out
418 			 * the space we just made.
419 			 */
420 			memmove(amap->am_anon + slotspace,
421 			    amap->am_anon + slotoff,
422 			    slotmapped * sizeof(struct vm_anon*));
423 			memset(amap->am_anon + slotoff, 0,
424 			    (slotspace - slotoff) * sizeof(struct vm_anon *));
425 
426 			/*
427 			 * Slide the backpointers up, but don't bother
428 			 * wiping out the old slots.
429 			 */
430 			memmove(amap->am_bckptr + slotspace,
431 			    amap->am_bckptr + slotoff,
432 			    slotmapped * sizeof(int));
433 
434 			/*
435 			 * Adjust all the useful active slot numbers.
436 			 */
437 			for (i = 0; i < amap->am_nused; i++)
438 				amap->am_slots[i] += (slotspace - slotoff);
439 
440 			/*
441 			 * We just filled all the empty space in the
442 			 * front of the amap by activating a few new
443 			 * slots.
444 			 */
445 			amap->am_nslot = amap->am_maxslot;
446 			entry->aref.ar_pageoff = slotspace - slotadd;
447 			amap_unlock(amap);
448 
449 			UVMHIST_LOG(maphist,"<- done (case 2b), amap = 0x%x, "
450 			    "slotneed=%d", amap, slotneed, 0, 0);
451 			return 0;
452 		}
453 	}
454 
455 	/*
456 	 * case 3: we need to malloc a new amap and copy all the amap
457 	 * data over from old amap to the new one.
458 	 *
459 	 * XXXCDC: could we take advantage of a kernel realloc()?
460 	 */
461 
462 	amap_unlock(amap);	/* unlock in case we sleep in malloc */
463 	slotalloc = malloc_roundup(slotneed * sizeof(int)) / sizeof(int);
464 #ifdef UVM_AMAP_PPREF
465 	newppref = NULL;
466 	if (amap->am_ppref && amap->am_ppref != PPREF_NONE)
467 		newppref = malloc(slotalloc * sizeof(int), M_UVMAMAP, mflag);
468 #endif
469 	newsl = malloc(slotalloc * sizeof(int), M_UVMAMAP, mflag);
470 	newbck = malloc(slotalloc * sizeof(int), M_UVMAMAP, mflag);
471 	newover = malloc(slotalloc * sizeof(struct vm_anon *), M_UVMAMAP,
472 		    mflag);
473 	if (newsl == NULL || newbck == NULL || newover == NULL) {
474 #ifdef UVM_AMAP_PPREF
475 		if (newppref != NULL) {
476 			free(newppref, M_UVMAMAP);
477 		}
478 #endif
479 		if (newsl != NULL) {
480 			free(newsl, M_UVMAMAP);
481 		}
482 		if (newbck != NULL) {
483 			free(newbck, M_UVMAMAP);
484 		}
485 		if (newover != NULL) {
486 			free(newover, M_UVMAMAP);
487 		}
488 		return ENOMEM;
489 	}
490 	amap_lock(amap);
491 	KASSERT(amap->am_maxslot < slotneed);
492 
493 	/*
494 	 * now copy everything over to new malloc'd areas...
495 	 */
496 
497 	slotadded = slotalloc - amap->am_nslot;
498 	if (!(flags & AMAP_EXTEND_FORWARDS))
499 		slotspace = slotalloc - slotmapped;
500 
501 	/* do am_slots */
502 	oldsl = amap->am_slots;
503 	if (flags & AMAP_EXTEND_FORWARDS)
504 		memcpy(newsl, oldsl, sizeof(int) * amap->am_nused);
505 	else
506 		for (i = 0; i < amap->am_nused; i++)
507 			newsl[i] = oldsl[i] + slotspace - slotoff;
508 	amap->am_slots = newsl;
509 
510 	/* do am_anon */
511 	oldover = amap->am_anon;
512 	if (flags & AMAP_EXTEND_FORWARDS) {
513 		memcpy(newover, oldover,
514 		    sizeof(struct vm_anon *) * amap->am_nslot);
515 		memset(newover + amap->am_nslot, 0,
516 		    sizeof(struct vm_anon *) * slotadded);
517 	} else {
518 		memcpy(newover + slotspace, oldover + slotoff,
519 		    sizeof(struct vm_anon *) * slotmapped);
520 		memset(newover, 0,
521 		    sizeof(struct vm_anon *) * slotspace);
522 	}
523 	amap->am_anon = newover;
524 
525 	/* do am_bckptr */
526 	oldbck = amap->am_bckptr;
527 	if (flags & AMAP_EXTEND_FORWARDS)
528 		memcpy(newbck, oldbck, sizeof(int) * amap->am_nslot);
529 	else
530 		memcpy(newbck + slotspace, oldbck + slotoff,
531 		    sizeof(int) * slotmapped);
532 	amap->am_bckptr = newbck;
533 
534 #ifdef UVM_AMAP_PPREF
535 	/* do ppref */
536 	oldppref = amap->am_ppref;
537 	if (newppref) {
538 		if (flags & AMAP_EXTEND_FORWARDS) {
539 			memcpy(newppref, oldppref,
540 			    sizeof(int) * amap->am_nslot);
541 			memset(newppref + amap->am_nslot, 0,
542 			    sizeof(int) * slotadded);
543 		} else {
544 			memcpy(newppref + slotspace, oldppref + slotoff,
545 			    sizeof(int) * slotmapped);
546 		}
547 		amap->am_ppref = newppref;
548 		if ((flags & AMAP_EXTEND_FORWARDS) &&
549 		    (slotoff + slotmapped) < amap->am_nslot)
550 			amap_pp_adjref(amap, slotoff + slotmapped,
551 			    (amap->am_nslot - (slotoff + slotmapped)), 1);
552 		if (flags & AMAP_EXTEND_FORWARDS)
553 			pp_setreflen(newppref, amap->am_nslot, 1,
554 			    slotneed - amap->am_nslot);
555 		else {
556 			pp_setreflen(newppref, 0, 0,
557 			    slotalloc - slotneed);
558 			pp_setreflen(newppref, slotalloc - slotneed, 1,
559 			    slotneed - slotmapped);
560 		}
561 	} else {
562 		if (amap->am_ppref)
563 			amap->am_ppref = PPREF_NONE;
564 	}
565 #endif
566 
567 	/* update master values */
568 	if (flags & AMAP_EXTEND_FORWARDS)
569 		amap->am_nslot = slotneed;
570 	else {
571 		entry->aref.ar_pageoff = slotspace - slotadd;
572 		amap->am_nslot = slotalloc;
573 	}
574 	amap->am_maxslot = slotalloc;
575 
576 	amap_unlock(amap);
577 	free(oldsl, M_UVMAMAP);
578 	free(oldbck, M_UVMAMAP);
579 	free(oldover, M_UVMAMAP);
580 #ifdef UVM_AMAP_PPREF
581 	if (oldppref && oldppref != PPREF_NONE)
582 		free(oldppref, M_UVMAMAP);
583 #endif
584 	UVMHIST_LOG(maphist,"<- done (case 3), amap = 0x%x, slotneed=%d",
585 	    amap, slotneed, 0, 0);
586 	return 0;
587 }
588 
589 /*
590  * amap_share_protect: change protection of anons in a shared amap
591  *
592  * for shared amaps, given the current data structure layout, it is
593  * not possible for us to directly locate all maps referencing the
594  * shared anon (to change the protection).  in order to protect data
595  * in shared maps we use pmap_page_protect().  [this is useful for IPC
596  * mechanisms like map entry passing that may want to write-protect
597  * all mappings of a shared amap.]  we traverse am_anon or am_slots
598  * depending on the current state of the amap.
599  *
600  * => entry's map and amap must be locked by the caller
601  */
602 void
603 amap_share_protect(entry, prot)
604 	struct vm_map_entry *entry;
605 	vm_prot_t prot;
606 {
607 	struct vm_amap *amap = entry->aref.ar_amap;
608 	int slots, lcv, slot, stop;
609 
610 	LOCK_ASSERT(simple_lock_held(&amap->am_l));
611 
612 	AMAP_B2SLOT(slots, (entry->end - entry->start));
613 	stop = entry->aref.ar_pageoff + slots;
614 
615 	if (slots < amap->am_nused) {
616 		/* cheaper to traverse am_anon */
617 		for (lcv = entry->aref.ar_pageoff ; lcv < stop ; lcv++) {
618 			if (amap->am_anon[lcv] == NULL)
619 				continue;
620 			if (amap->am_anon[lcv]->u.an_page != NULL)
621 				pmap_page_protect(amap->am_anon[lcv]->u.an_page,
622 						  prot);
623 		}
624 		return;
625 	}
626 
627 	/* cheaper to traverse am_slots */
628 	for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
629 		slot = amap->am_slots[lcv];
630 		if (slot < entry->aref.ar_pageoff || slot >= stop)
631 			continue;
632 		if (amap->am_anon[slot]->u.an_page != NULL)
633 			pmap_page_protect(amap->am_anon[slot]->u.an_page, prot);
634 	}
635 }
636 
637 /*
638  * amap_wipeout: wipeout all anon's in an amap; then free the amap!
639  *
640  * => called from amap_unref when the final reference to an amap is
641  *	discarded (i.e. when reference count == 1)
642  * => the amap should be locked (by the caller)
643  */
644 
645 void
646 amap_wipeout(amap)
647 	struct vm_amap *amap;
648 {
649 	int lcv, slot;
650 	struct vm_anon *anon;
651 	UVMHIST_FUNC("amap_wipeout"); UVMHIST_CALLED(maphist);
652 	UVMHIST_LOG(maphist,"(amap=0x%x)", amap, 0,0,0);
653 
654 	amap_unlock(amap);
655 	for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
656 		int refs;
657 
658 		slot = amap->am_slots[lcv];
659 		anon = amap->am_anon[slot];
660 
661 		if (anon == NULL || anon->an_ref == 0)
662 			panic("amap_wipeout: corrupt amap");
663 
664 		simple_lock(&anon->an_lock);
665 		UVMHIST_LOG(maphist,"  processing anon 0x%x, ref=%d", anon,
666 		    anon->an_ref, 0, 0);
667 		refs = --anon->an_ref;
668 		simple_unlock(&anon->an_lock);
669 		if (refs == 0) {
670 
671 			/*
672 			 * we had the last reference to a vm_anon. free it.
673 			 */
674 
675 			uvm_anfree(anon);
676 		}
677 
678 		/*
679 		 * XXX
680 		 * releasing the swap space held by an N anons is an O(N^2)
681 		 * operation because of the implementation of extents.
682 		 * if there are many anons, tearing down an exiting process'
683 		 * address space can take many seconds, which causes very
684 		 * annoying pauses.  we yield here to give other processes
685 		 * a chance to run.  this should be removed once the performance
686 		 * of swap space management is improved.
687 		 */
688 
689 		if (curproc->p_cpu->ci_schedstate.spc_flags & SPCF_SHOULDYIELD)
690 			preempt(NULL);
691 	}
692 
693 	/*
694 	 * now we free the map
695 	 */
696 
697 	amap->am_ref = 0;	/* ... was one */
698 	amap->am_nused = 0;
699 	amap_free(amap);	/* will unlock and free amap */
700 	UVMHIST_LOG(maphist,"<- done!", 0,0,0,0);
701 }
702 
703 /*
704  * amap_copy: ensure that a map entry's "needs_copy" flag is false
705  *	by copying the amap if necessary.
706  *
707  * => an entry with a null amap pointer will get a new (blank) one.
708  * => the map that the map entry belongs to must be locked by caller.
709  * => the amap currently attached to "entry" (if any) must be unlocked.
710  * => if canchunk is true, then we may clip the entry into a chunk
711  * => "startva" and "endva" are used only if canchunk is true.  they are
712  *     used to limit chunking (e.g. if you have a large space that you
713  *     know you are going to need to allocate amaps for, there is no point
714  *     in allowing that to be chunked)
715  */
716 
717 void
718 amap_copy(map, entry, waitf, canchunk, startva, endva)
719 	struct vm_map *map;
720 	struct vm_map_entry *entry;
721 	int waitf;
722 	boolean_t canchunk;
723 	vaddr_t startva, endva;
724 {
725 	struct vm_amap *amap, *srcamap;
726 	int slots, lcv;
727 	vaddr_t chunksize;
728 	UVMHIST_FUNC("amap_copy"); UVMHIST_CALLED(maphist);
729 	UVMHIST_LOG(maphist, "  (map=%p, entry=%p, waitf=%d)",
730 		    map, entry, waitf, 0);
731 
732 	/*
733 	 * is there a map to copy?   if not, create one from scratch.
734 	 */
735 
736 	if (entry->aref.ar_amap == NULL) {
737 
738 		/*
739 		 * check to see if we have a large amap that we can
740 		 * chunk.  we align startva/endva to chunk-sized
741 		 * boundaries and then clip to them.
742 		 */
743 
744 		if (canchunk && atop(entry->end - entry->start) >=
745 		    UVM_AMAP_LARGE) {
746 			/* convert slots to bytes */
747 			chunksize = UVM_AMAP_CHUNK << PAGE_SHIFT;
748 			startva = (startva / chunksize) * chunksize;
749 			endva = roundup(endva, chunksize);
750 			UVMHIST_LOG(maphist, "  chunk amap ==> clip 0x%x->0x%x"
751 			    "to 0x%x->0x%x", entry->start, entry->end, startva,
752 			    endva);
753 			UVM_MAP_CLIP_START(map, entry, startva);
754 			/* watch out for endva wrap-around! */
755 			if (endva >= startva)
756 				UVM_MAP_CLIP_END(map, entry, endva);
757 		}
758 
759 		UVMHIST_LOG(maphist, "<- done [creating new amap 0x%x->0x%x]",
760 		entry->start, entry->end, 0, 0);
761 		entry->aref.ar_pageoff = 0;
762 		entry->aref.ar_amap = amap_alloc(entry->end - entry->start, 0,
763 		    waitf);
764 		if (entry->aref.ar_amap != NULL)
765 			entry->etype &= ~UVM_ET_NEEDSCOPY;
766 		return;
767 	}
768 
769 	/*
770 	 * first check and see if we are the only map entry
771 	 * referencing the amap we currently have.  if so, then we can
772 	 * just take it over rather than copying it.  note that we are
773 	 * reading am_ref with the amap unlocked... the value can only
774 	 * be one if we have the only reference to the amap (via our
775 	 * locked map).  if we are greater than one we fall through to
776 	 * the next case (where we double check the value).
777 	 */
778 
779 	if (entry->aref.ar_amap->am_ref == 1) {
780 		entry->etype &= ~UVM_ET_NEEDSCOPY;
781 		UVMHIST_LOG(maphist, "<- done [ref cnt = 1, took it over]",
782 		    0, 0, 0, 0);
783 		return;
784 	}
785 
786 	/*
787 	 * looks like we need to copy the map.
788 	 */
789 
790 	UVMHIST_LOG(maphist,"  amap=%p, ref=%d, must copy it",
791 	    entry->aref.ar_amap, entry->aref.ar_amap->am_ref, 0, 0);
792 	AMAP_B2SLOT(slots, entry->end - entry->start);
793 	amap = amap_alloc1(slots, 0, waitf);
794 	if (amap == NULL) {
795 		UVMHIST_LOG(maphist, "  amap_alloc1 failed", 0,0,0,0);
796 		return;
797 	}
798 	srcamap = entry->aref.ar_amap;
799 	amap_lock(srcamap);
800 
801 	/*
802 	 * need to double check reference count now that we've got the
803 	 * src amap locked down.  the reference count could have
804 	 * changed while we were in malloc.  if the reference count
805 	 * dropped down to one we take over the old map rather than
806 	 * copying the amap.
807 	 */
808 
809 	if (srcamap->am_ref == 1) {		/* take it over? */
810 		entry->etype &= ~UVM_ET_NEEDSCOPY;
811 		amap->am_ref--;		/* drop final reference to map */
812 		amap_unlock(amap);
813 		amap_free(amap);	/* dispose of new (unused) amap */
814 		amap_unlock(srcamap);
815 		return;
816 	}
817 
818 	/*
819 	 * we must copy it now.
820 	 */
821 
822 	UVMHIST_LOG(maphist, "  copying amap now",0, 0, 0, 0);
823 	for (lcv = 0 ; lcv < slots; lcv++) {
824 		amap->am_anon[lcv] =
825 		    srcamap->am_anon[entry->aref.ar_pageoff + lcv];
826 		if (amap->am_anon[lcv] == NULL)
827 			continue;
828 		simple_lock(&amap->am_anon[lcv]->an_lock);
829 		amap->am_anon[lcv]->an_ref++;
830 		simple_unlock(&amap->am_anon[lcv]->an_lock);
831 		amap->am_bckptr[lcv] = amap->am_nused;
832 		amap->am_slots[amap->am_nused] = lcv;
833 		amap->am_nused++;
834 	}
835 	memset(&amap->am_anon[lcv], 0,
836 	    (amap->am_maxslot - lcv) * sizeof(struct vm_anon *));
837 
838 	/*
839 	 * drop our reference to the old amap (srcamap) and unlock.
840 	 * we know that the reference count on srcamap is greater than
841 	 * one (we checked above), so there is no way we could drop
842 	 * the count to zero.  [and no need to worry about freeing it]
843 	 */
844 
845 	srcamap->am_ref--;
846 	if (srcamap->am_ref == 1 && (srcamap->am_flags & AMAP_SHARED) != 0)
847 		srcamap->am_flags &= ~AMAP_SHARED;   /* clear shared flag */
848 #ifdef UVM_AMAP_PPREF
849 	if (srcamap->am_ppref && srcamap->am_ppref != PPREF_NONE) {
850 		amap_pp_adjref(srcamap, entry->aref.ar_pageoff,
851 		    (entry->end - entry->start) >> PAGE_SHIFT, -1);
852 	}
853 #endif
854 
855 	amap_unlock(srcamap);
856 
857 	/*
858 	 * install new amap.
859 	 */
860 
861 	entry->aref.ar_pageoff = 0;
862 	entry->aref.ar_amap = amap;
863 	entry->etype &= ~UVM_ET_NEEDSCOPY;
864 	UVMHIST_LOG(maphist, "<- done",0, 0, 0, 0);
865 }
866 
867 /*
868  * amap_cow_now: resolve all copy-on-write faults in an amap now for fork(2)
869  *
870  *	called during fork(2) when the parent process has a wired map
871  *	entry.   in that case we want to avoid write-protecting pages
872  *	in the parent's map (e.g. like what you'd do for a COW page)
873  *	so we resolve the COW here.
874  *
875  * => assume parent's entry was wired, thus all pages are resident.
876  * => assume pages that are loaned out (loan_count) are already mapped
877  *	read-only in all maps, and thus no need for us to worry about them
878  * => assume both parent and child vm_map's are locked
879  * => caller passes child's map/entry in to us
880  * => if we run out of memory we will unlock the amap and sleep _with_ the
881  *	parent and child vm_map's locked(!).    we have to do this since
882  *	we are in the middle of a fork(2) and we can't let the parent
883  *	map change until we are done copying all the map entrys.
884  * => XXXCDC: out of memory should cause fork to fail, but there is
885  *	currently no easy way to do this (needs fix)
886  * => page queues must be unlocked (we may lock them)
887  */
888 
889 void
890 amap_cow_now(map, entry)
891 	struct vm_map *map;
892 	struct vm_map_entry *entry;
893 {
894 	struct vm_amap *amap = entry->aref.ar_amap;
895 	int lcv, slot;
896 	struct vm_anon *anon, *nanon;
897 	struct vm_page *pg, *npg;
898 
899 	/*
900 	 * note that if we unlock the amap then we must ReStart the "lcv" for
901 	 * loop because some other process could reorder the anon's in the
902 	 * am_anon[] array on us while the lock is dropped.
903 	 */
904 
905 ReStart:
906 	amap_lock(amap);
907 
908 	for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
909 
910 		/*
911 		 * get the page
912 		 */
913 
914 		slot = amap->am_slots[lcv];
915 		anon = amap->am_anon[slot];
916 		simple_lock(&anon->an_lock);
917 		pg = anon->u.an_page;
918 
919 		/*
920 		 * page must be resident since parent is wired
921 		 */
922 
923 		if (pg == NULL)
924 		    panic("amap_cow_now: non-resident wired page in anon %p",
925 			anon);
926 
927 		/*
928 		 * if the anon ref count is one and the page is not loaned,
929 		 * then we are safe (the child has exclusive access to the
930 		 * page).  if the page is loaned, then it must already be
931 		 * mapped read-only.
932 		 *
933 		 * we only need to get involved when these are not true.
934 		 * [note: if loan_count == 0, then the anon must own the page]
935 		 */
936 
937 		if (anon->an_ref > 1 && pg->loan_count == 0) {
938 
939 			/*
940 			 * if the page is busy then we have to unlock, wait for
941 			 * it and then restart.
942 			 */
943 			if (pg->flags & PG_BUSY) {
944 				pg->flags |= PG_WANTED;
945 				amap_unlock(amap);
946 				UVM_UNLOCK_AND_WAIT(pg, &anon->an_lock, FALSE,
947 				    "cownow", 0);
948 				goto ReStart;
949 			}
950 
951 			/*
952 			 * ok, time to do a copy-on-write to a new anon
953 			 */
954 			nanon = uvm_analloc();
955 			if (nanon) {
956 				/* nanon is locked! */
957 				npg = uvm_pagealloc(NULL, 0, nanon, 0);
958 			} else
959 				npg = NULL;	/* XXX: quiet gcc warning */
960 
961 			if (nanon == NULL || npg == NULL) {
962 				/* out of memory */
963 				/*
964 				 * XXXCDC: we should cause fork to fail, but
965 				 * we can't ...
966 				 */
967 				if (nanon) {
968 					nanon->an_ref--;
969 					simple_unlock(&nanon->an_lock);
970 					uvm_anfree(nanon);
971 				}
972 				simple_unlock(&anon->an_lock);
973 				amap_unlock(amap);
974 				uvm_wait("cownowpage");
975 				goto ReStart;
976 			}
977 
978 			/*
979 			 * got it... now we can copy the data and replace anon
980 			 * with our new one...
981 			 */
982 
983 			uvm_pagecopy(pg, npg);		/* old -> new */
984 			anon->an_ref--;			/* can't drop to zero */
985 			amap->am_anon[slot] = nanon;	/* replace */
986 
987 			/*
988 			 * drop PG_BUSY on new page ... since we have had it's
989 			 * owner locked the whole time it can't be
990 			 * PG_RELEASED | PG_WANTED.
991 			 */
992 
993 			uvm_lock_pageq();
994 			uvm_pageactivate(npg);
995 			uvm_unlock_pageq();
996 			npg->flags &= ~(PG_BUSY|PG_FAKE);
997 			UVM_PAGE_OWN(npg, NULL);
998 			simple_unlock(&nanon->an_lock);
999 		}
1000 		simple_unlock(&anon->an_lock);
1001 	}
1002 	amap_unlock(amap);
1003 }
1004 
1005 /*
1006  * amap_splitref: split a single reference into two separate references
1007  *
1008  * => called from uvm_map's clip routines
1009  * => origref's map should be locked
1010  * => origref->ar_amap should be unlocked (we will lock)
1011  */
1012 void
1013 amap_splitref(origref, splitref, offset)
1014 	struct vm_aref *origref, *splitref;
1015 	vaddr_t offset;
1016 {
1017 	int leftslots;
1018 
1019 	AMAP_B2SLOT(leftslots, offset);
1020 	if (leftslots == 0)
1021 		panic("amap_splitref: split at zero offset");
1022 
1023 	amap_lock(origref->ar_amap);
1024 
1025 	/*
1026 	 * now: amap is locked and we have a valid am_mapped array.
1027 	 */
1028 
1029 	if (origref->ar_amap->am_nslot - origref->ar_pageoff - leftslots <= 0)
1030 		panic("amap_splitref: map size check failed");
1031 
1032 #ifdef UVM_AMAP_PPREF
1033         /*
1034 	 * establish ppref before we add a duplicate reference to the amap
1035 	 */
1036 	if (origref->ar_amap->am_ppref == NULL)
1037 		amap_pp_establish(origref->ar_amap);
1038 #endif
1039 
1040 	splitref->ar_amap = origref->ar_amap;
1041 	splitref->ar_amap->am_ref++;		/* not a share reference */
1042 	splitref->ar_pageoff = origref->ar_pageoff + leftslots;
1043 
1044 	amap_unlock(origref->ar_amap);
1045 }
1046 
1047 #ifdef UVM_AMAP_PPREF
1048 
1049 /*
1050  * amap_pp_establish: add a ppref array to an amap, if possible
1051  *
1052  * => amap locked by caller
1053  */
1054 void
1055 amap_pp_establish(amap)
1056 	struct vm_amap *amap;
1057 {
1058 	amap->am_ppref = malloc(sizeof(int) * amap->am_maxslot,
1059 	    M_UVMAMAP, M_NOWAIT);
1060 
1061 	/*
1062 	 * if we fail then we just won't use ppref for this amap
1063 	 */
1064 
1065 	if (amap->am_ppref == NULL) {
1066 		amap->am_ppref = PPREF_NONE;	/* not using it */
1067 		return;
1068 	}
1069 	memset(amap->am_ppref, 0, sizeof(int) * amap->am_maxslot);
1070 	pp_setreflen(amap->am_ppref, 0, amap->am_ref, amap->am_nslot);
1071 	return;
1072 }
1073 
1074 /*
1075  * amap_pp_adjref: adjust reference count to a part of an amap using the
1076  * per-page reference count array.
1077  *
1078  * => map and amap locked by caller
1079  * => caller must check that ppref != PPREF_NONE before calling
1080  */
1081 void
1082 amap_pp_adjref(amap, curslot, slotlen, adjval)
1083 	struct vm_amap *amap;
1084 	int curslot;
1085 	vsize_t slotlen;
1086 	int adjval;
1087 {
1088 	int stopslot, *ppref, lcv, prevlcv;
1089 	int ref, len, prevref, prevlen;
1090 
1091 	stopslot = curslot + slotlen;
1092 	ppref = amap->am_ppref;
1093 	prevlcv = 0;
1094 
1095 	/*
1096 	 * first advance to the correct place in the ppref array,
1097 	 * fragment if needed.
1098 	 */
1099 
1100 	for (lcv = 0 ; lcv < curslot ; lcv += len) {
1101 		pp_getreflen(ppref, lcv, &ref, &len);
1102 		if (lcv + len > curslot) {     /* goes past start? */
1103 			pp_setreflen(ppref, lcv, ref, curslot - lcv);
1104 			pp_setreflen(ppref, curslot, ref, len - (curslot -lcv));
1105 			len = curslot - lcv;   /* new length of entry @ lcv */
1106 		}
1107 		prevlcv = lcv;
1108 	}
1109 	if (lcv != 0)
1110 		pp_getreflen(ppref, prevlcv, &prevref, &prevlen);
1111 	else {
1112 		/* Ensure that the "prevref == ref" test below always
1113 		 * fails, since we're starting from the beginning of
1114 		 * the ppref array; that is, there is no previous
1115 		 * chunk.
1116 		 */
1117 		prevref = -1;
1118 		prevlen = 0;
1119 	}
1120 
1121 	/*
1122 	 * now adjust reference counts in range.  merge the first
1123 	 * changed entry with the last unchanged entry if possible.
1124 	 */
1125 
1126 	if (lcv != curslot)
1127 		panic("amap_pp_adjref: overshot target");
1128 
1129 	for (/* lcv already set */; lcv < stopslot ; lcv += len) {
1130 		pp_getreflen(ppref, lcv, &ref, &len);
1131 		if (lcv + len > stopslot) {     /* goes past end? */
1132 			pp_setreflen(ppref, lcv, ref, stopslot - lcv);
1133 			pp_setreflen(ppref, stopslot, ref,
1134 			    len - (stopslot - lcv));
1135 			len = stopslot - lcv;
1136 		}
1137 		ref += adjval;
1138 		if (ref < 0)
1139 			panic("amap_pp_adjref: negative reference count");
1140 		if (lcv == prevlcv + prevlen && ref == prevref) {
1141 			pp_setreflen(ppref, prevlcv, ref, prevlen + len);
1142 		} else {
1143 			pp_setreflen(ppref, lcv, ref, len);
1144 		}
1145 		if (ref == 0)
1146 			amap_wiperange(amap, lcv, len);
1147 	}
1148 
1149 }
1150 
1151 /*
1152  * amap_wiperange: wipe out a range of an amap
1153  * [different from amap_wipeout because the amap is kept intact]
1154  *
1155  * => both map and amap must be locked by caller.
1156  */
1157 void
1158 amap_wiperange(amap, slotoff, slots)
1159 	struct vm_amap *amap;
1160 	int slotoff, slots;
1161 {
1162 	int byanon, lcv, stop, curslot, ptr, slotend;
1163 	struct vm_anon *anon;
1164 
1165 	/*
1166 	 * we can either traverse the amap by am_anon or by am_slots depending
1167 	 * on which is cheaper.    decide now.
1168 	 */
1169 
1170 	if (slots < amap->am_nused) {
1171 		byanon = TRUE;
1172 		lcv = slotoff;
1173 		stop = slotoff + slots;
1174 		slotend = 0;
1175 	} else {
1176 		byanon = FALSE;
1177 		lcv = 0;
1178 		stop = amap->am_nused;
1179 		slotend = slotoff + slots;
1180 	}
1181 
1182 	while (lcv < stop) {
1183 		int refs;
1184 
1185 		if (byanon) {
1186 			curslot = lcv++;	/* lcv advances here */
1187 			if (amap->am_anon[curslot] == NULL)
1188 				continue;
1189 		} else {
1190 			curslot = amap->am_slots[lcv];
1191 			if (curslot < slotoff || curslot >= slotend) {
1192 				lcv++;		/* lcv advances here */
1193 				continue;
1194 			}
1195 			stop--;	/* drop stop, since anon will be removed */
1196 		}
1197 		anon = amap->am_anon[curslot];
1198 
1199 		/*
1200 		 * remove it from the amap
1201 		 */
1202 
1203 		amap->am_anon[curslot] = NULL;
1204 		ptr = amap->am_bckptr[curslot];
1205 		if (ptr != (amap->am_nused - 1)) {
1206 			amap->am_slots[ptr] =
1207 			    amap->am_slots[amap->am_nused - 1];
1208 			amap->am_bckptr[amap->am_slots[ptr]] =
1209 			    ptr;    /* back ptr. */
1210 		}
1211 		amap->am_nused--;
1212 
1213 		/*
1214 		 * drop anon reference count
1215 		 */
1216 
1217 		simple_lock(&anon->an_lock);
1218 		refs = --anon->an_ref;
1219 		simple_unlock(&anon->an_lock);
1220 		if (refs == 0) {
1221 
1222 			/*
1223 			 * we just eliminated the last reference to an anon.
1224 			 * free it.
1225 			 */
1226 
1227 			uvm_anfree(anon);
1228 		}
1229 	}
1230 }
1231 
1232 #endif
1233