xref: /openbsd/sys/uvm/uvm_amap.c (revision 91f110e0)
1 /*	$OpenBSD: uvm_amap.c,v 1.50 2013/05/30 16:39:26 tedu Exp $	*/
2 /*	$NetBSD: uvm_amap.c,v 1.27 2000/11/25 06:27:59 chs Exp $	*/
3 
4 /*
5  *
6  * Copyright (c) 1997 Charles D. Cranor and Washington University.
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. All advertising materials mentioning features or use of this software
18  *    must display the following acknowledgement:
19  *      This product includes software developed by Charles D. Cranor and
20  *      Washington University.
21  * 4. The name of the author may not be used to endorse or promote products
22  *    derived from this software without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
25  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
28  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
29  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
30  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
33  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34  */
35 
36 /*
37  * uvm_amap.c: amap operations
38  */
39 
40 /*
41  * this file contains functions that perform operations on amaps.  see
42  * uvm_amap.h for a brief explanation of the role of amaps in uvm.
43  */
44 
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/proc.h>
48 #include <sys/malloc.h>
49 #include <sys/kernel.h>
50 #include <sys/pool.h>
51 
52 #include <uvm/uvm.h>
53 #include <uvm/uvm_swap.h>
54 
55 /*
56  * pool for allocation of vm_map structures.  note that in order to
57  * avoid an endless loop, the amap pool's allocator cannot allocate
58  * memory from an amap (it currently goes through the kernel uobj, so
59  * we are ok).
60  */
61 
62 struct pool uvm_amap_pool;
63 
64 LIST_HEAD(, vm_amap) amap_list;
65 
66 #define MALLOC_SLOT_UNIT (2 * sizeof(int) + sizeof(struct vm_anon *))
67 
68 /*
69  * local functions
70  */
71 
72 static struct vm_amap *amap_alloc1(int, int, int);
73 static __inline void amap_list_insert(struct vm_amap *);
74 static __inline void amap_list_remove(struct vm_amap *);
75 
76 static __inline void
77 amap_list_insert(struct vm_amap *amap)
78 {
79 	LIST_INSERT_HEAD(&amap_list, amap, am_list);
80 }
81 
82 static __inline void
83 amap_list_remove(struct vm_amap *amap)
84 {
85 	LIST_REMOVE(amap, am_list);
86 }
87 
88 #ifdef UVM_AMAP_PPREF
89 /*
90  * what is ppref?   ppref is an _optional_ amap feature which is used
91  * to keep track of reference counts on a per-page basis.  it is enabled
92  * when UVM_AMAP_PPREF is defined.
93  *
94  * when enabled, an array of ints is allocated for the pprefs.  this
95  * array is allocated only when a partial reference is added to the
96  * map (either by unmapping part of the amap, or gaining a reference
97  * to only a part of an amap).  if the malloc of the array fails
98  * (M_NOWAIT), then we set the array pointer to PPREF_NONE to indicate
99  * that we tried to do ppref's but couldn't alloc the array so just
100  * give up (after all, this is an optional feature!).
101  *
102  * the array is divided into page sized "chunks."   for chunks of length 1,
103  * the chunk reference count plus one is stored in that chunk's slot.
104  * for chunks of length > 1 the first slot contains (the reference count
105  * plus one) * -1.    [the negative value indicates that the length is
106  * greater than one.]   the second slot of the chunk contains the length
107  * of the chunk.   here is an example:
108  *
109  * actual REFS:  2  2  2  2  3  1  1  0  0  0  4  4  0  1  1  1
110  *       ppref: -3  4  x  x  4 -2  2 -1  3  x -5  2  1 -2  3  x
111  *              <----------><-><----><-------><----><-><------->
112  * (x = don't care)
113  *
114  * this allows us to allow one int to contain the ref count for the whole
115  * chunk.    note that the "plus one" part is needed because a reference
116  * count of zero is neither positive or negative (need a way to tell
117  * if we've got one zero or a bunch of them).
118  *
119  * here are some in-line functions to help us.
120  */
121 
122 static __inline void pp_getreflen(int *, int, int *, int *);
123 static __inline void pp_setreflen(int *, int, int, int);
124 
125 /*
126  * pp_getreflen: get the reference and length for a specific offset
127  */
128 static __inline void
129 pp_getreflen(int *ppref, int offset, int *refp, int *lenp)
130 {
131 
132 	if (ppref[offset] > 0) {		/* chunk size must be 1 */
133 		*refp = ppref[offset] - 1;	/* don't forget to adjust */
134 		*lenp = 1;
135 	} else {
136 		*refp = (ppref[offset] * -1) - 1;
137 		*lenp = ppref[offset+1];
138 	}
139 }
140 
141 /*
142  * pp_setreflen: set the reference and length for a specific offset
143  */
144 static __inline void
145 pp_setreflen(int *ppref, int offset, int ref, int len)
146 {
147 	if (len == 1) {
148 		ppref[offset] = ref + 1;
149 	} else {
150 		ppref[offset] = (ref + 1) * -1;
151 		ppref[offset+1] = len;
152 	}
153 }
154 #endif
155 
156 /*
157  * amap_init: called at boot time to init global amap data structures
158  */
159 
160 void
161 amap_init(void)
162 {
163 	/*
164 	 * Initialize the vm_amap pool.
165 	 */
166 	pool_init(&uvm_amap_pool, sizeof(struct vm_amap), 0, 0, 0,
167 	    "amappl", &pool_allocator_nointr);
168 	pool_sethiwat(&uvm_amap_pool, 4096);
169 }
170 
171 /*
172  * amap_alloc1: internal function that allocates an amap, but does not
173  *	init the overlay.
174  */
175 static inline struct vm_amap *
176 amap_alloc1(int slots, int padslots, int waitf)
177 {
178 	struct vm_amap *amap;
179 	int totalslots;
180 
181 	amap = pool_get(&uvm_amap_pool, (waitf == M_WAITOK) ? PR_WAITOK
182 	    : PR_NOWAIT);
183 	if (amap == NULL)
184 		return(NULL);
185 
186 	totalslots = malloc_roundup((slots + padslots) * MALLOC_SLOT_UNIT) /
187 	    MALLOC_SLOT_UNIT;
188 	amap->am_ref = 1;
189 	amap->am_flags = 0;
190 #ifdef UVM_AMAP_PPREF
191 	amap->am_ppref = NULL;
192 #endif
193 	amap->am_maxslot = totalslots;
194 	amap->am_nslot = slots;
195 	amap->am_nused = 0;
196 
197 	amap->am_slots = malloc(totalslots * MALLOC_SLOT_UNIT, M_UVMAMAP,
198 	    waitf);
199 	if (amap->am_slots == NULL)
200 		goto fail1;
201 
202 	amap->am_bckptr = (int *)(((char *)amap->am_slots) + totalslots *
203 	    sizeof(int));
204 	amap->am_anon = (struct vm_anon **)(((char *)amap->am_bckptr) +
205 	    totalslots * sizeof(int));
206 
207 	return(amap);
208 
209 fail1:
210 	pool_put(&uvm_amap_pool, amap);
211 	return (NULL);
212 }
213 
214 /*
215  * amap_alloc: allocate an amap to manage "sz" bytes of anonymous VM
216  *
217  * => caller should ensure sz is a multiple of PAGE_SIZE
218  * => reference count to new amap is set to one
219  */
220 
221 struct vm_amap *
222 amap_alloc(vaddr_t sz, vaddr_t padsz, int waitf)
223 {
224 	struct vm_amap *amap;
225 	int slots, padslots;
226 
227 	AMAP_B2SLOT(slots, sz);		/* load slots */
228 	AMAP_B2SLOT(padslots, padsz);
229 
230 	amap = amap_alloc1(slots, padslots, waitf);
231 	if (amap) {
232 		memset(amap->am_anon, 0,
233 		    amap->am_maxslot * sizeof(struct vm_anon *));
234 		amap_list_insert(amap);
235 	}
236 
237 	return(amap);
238 }
239 
240 
241 /*
242  * amap_free: free an amap
243  *
244  * => the amap should have a zero reference count and be empty
245  */
246 void
247 amap_free(struct vm_amap *amap)
248 {
249 
250 	KASSERT(amap->am_ref == 0 && amap->am_nused == 0);
251 	KASSERT((amap->am_flags & AMAP_SWAPOFF) == 0);
252 
253 	free(amap->am_slots, M_UVMAMAP);
254 #ifdef UVM_AMAP_PPREF
255 	if (amap->am_ppref && amap->am_ppref != PPREF_NONE)
256 		free(amap->am_ppref, M_UVMAMAP);
257 #endif
258 	pool_put(&uvm_amap_pool, amap);
259 
260 }
261 
262 /*
263  * amap_extend: extend the size of an amap (if needed)
264  *
265  * => called from uvm_map when we want to extend an amap to cover
266  *    a new mapping (rather than allocate a new one)
267  * => to safely extend an amap it should have a reference count of
268  *    one (thus it can't be shared)
269  * => XXXCDC: support padding at this level?
270  */
271 int
272 amap_extend(struct vm_map_entry *entry, vsize_t addsize)
273 {
274 	struct vm_amap *amap = entry->aref.ar_amap;
275 	int slotoff = entry->aref.ar_pageoff;
276 	int slotmapped, slotadd, slotneed, slotalloc;
277 #ifdef UVM_AMAP_PPREF
278 	int *newppref, *oldppref;
279 #endif
280 	u_int *newsl, *newbck, *oldsl, *oldbck;
281 	struct vm_anon **newover, **oldover;
282 	int slotadded;
283 
284 	/*
285 	 * first, determine how many slots we need in the amap.  don't
286 	 * forget that ar_pageoff could be non-zero: this means that
287 	 * there are some unused slots before us in the amap.
288 	 */
289 
290 	AMAP_B2SLOT(slotmapped, entry->end - entry->start); /* slots mapped */
291 	AMAP_B2SLOT(slotadd, addsize);			/* slots to add */
292 	slotneed = slotoff + slotmapped + slotadd;
293 
294 	/*
295 	 * case 1: we already have enough slots in the map and thus
296 	 * only need to bump the reference counts on the slots we are
297 	 * adding.
298 	 */
299 
300 	if (amap->am_nslot >= slotneed) {
301 #ifdef UVM_AMAP_PPREF
302 		if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
303 			amap_pp_adjref(amap, slotoff + slotmapped, slotadd, 1);
304 		}
305 #endif
306 		return (0);
307 	}
308 
309 	/*
310 	 * case 2: we pre-allocated slots for use and we just need to
311 	 * bump nslot up to take account for these slots.
312 	 */
313 
314 	if (amap->am_maxslot >= slotneed) {
315 #ifdef UVM_AMAP_PPREF
316 		if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
317 			if ((slotoff + slotmapped) < amap->am_nslot)
318 				amap_pp_adjref(amap, slotoff + slotmapped,
319 				    (amap->am_nslot - (slotoff + slotmapped)),
320 				    1);
321 			pp_setreflen(amap->am_ppref, amap->am_nslot, 1,
322 			   slotneed - amap->am_nslot);
323 		}
324 #endif
325 		amap->am_nslot = slotneed;
326 
327 		/*
328 		 * no need to zero am_anon since that was done at
329 		 * alloc time and we never shrink an allocation.
330 		 */
331 		return (0);
332 	}
333 
334 	/*
335 	 * case 3: we need to malloc a new amap and copy all the amap
336 	 * data over from old amap to the new one.
337 	 *
338 	 * XXXCDC: could we take advantage of a kernel realloc()?
339 	 */
340 
341 	if (slotneed >= UVM_AMAP_LARGE)
342 		return E2BIG;
343 
344 	slotalloc = malloc_roundup(slotneed * MALLOC_SLOT_UNIT) /
345 	    MALLOC_SLOT_UNIT;
346 #ifdef UVM_AMAP_PPREF
347 	newppref = NULL;
348 	if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
349 		newppref = malloc(slotalloc *sizeof(int), M_UVMAMAP,
350 		    M_WAITOK | M_CANFAIL);
351 		if (newppref == NULL) {
352 			/* give up if malloc fails */
353 			free(amap->am_ppref, M_UVMAMAP);
354 			amap->am_ppref = PPREF_NONE;
355 		}
356 	}
357 #endif
358 	newsl = malloc(slotalloc * MALLOC_SLOT_UNIT, M_UVMAMAP,
359 	    M_WAITOK | M_CANFAIL);
360 	if (newsl == NULL) {
361 #ifdef UVM_AMAP_PPREF
362 		if (newppref != NULL) {
363 			free(newppref, M_UVMAMAP);
364 		}
365 #endif
366 		return (ENOMEM);
367 	}
368 	newbck = (int *)(((char *)newsl) + slotalloc * sizeof(int));
369 	newover = (struct vm_anon **)(((char *)newbck) + slotalloc *
370 	    sizeof(int));
371 	KASSERT(amap->am_maxslot < slotneed);
372 
373 	/*
374 	 * now copy everything over to new malloc'd areas...
375 	 */
376 
377 	slotadded = slotalloc - amap->am_nslot;
378 
379 	/* do am_slots */
380 	oldsl = amap->am_slots;
381 	memcpy(newsl, oldsl, sizeof(int) * amap->am_nused);
382 	amap->am_slots = newsl;
383 
384 	/* do am_anon */
385 	oldover = amap->am_anon;
386 	memcpy(newover, oldover, sizeof(struct vm_anon *) * amap->am_nslot);
387 	memset(newover + amap->am_nslot, 0, sizeof(struct vm_anon *) *
388 	    slotadded);
389 	amap->am_anon = newover;
390 
391 	/* do am_bckptr */
392 	oldbck = amap->am_bckptr;
393 	memcpy(newbck, oldbck, sizeof(int) * amap->am_nslot);
394 	memset(newbck + amap->am_nslot, 0, sizeof(int) * slotadded); /* XXX: needed? */
395 	amap->am_bckptr = newbck;
396 
397 #ifdef UVM_AMAP_PPREF
398 	/* do ppref */
399 	oldppref = amap->am_ppref;
400 	if (newppref) {
401 		memcpy(newppref, oldppref, sizeof(int) * amap->am_nslot);
402 		memset(newppref + amap->am_nslot, 0, sizeof(int) * slotadded);
403 		amap->am_ppref = newppref;
404 		if ((slotoff + slotmapped) < amap->am_nslot)
405 			amap_pp_adjref(amap, slotoff + slotmapped,
406 			    (amap->am_nslot - (slotoff + slotmapped)), 1);
407 		pp_setreflen(newppref, amap->am_nslot, 1,
408 		    slotneed - amap->am_nslot);
409 	}
410 #endif
411 
412 	/* update master values */
413 	amap->am_nslot = slotneed;
414 	amap->am_maxslot = slotalloc;
415 
416 	/* and free */
417 	free(oldsl, M_UVMAMAP);
418 #ifdef UVM_AMAP_PPREF
419 	if (oldppref && oldppref != PPREF_NONE)
420 		free(oldppref, M_UVMAMAP);
421 #endif
422 	return (0);
423 }
424 
425 /*
426  * amap_share_protect: change protection of anons in a shared amap
427  *
428  * for shared amaps, given the current data structure layout, it is
429  * not possible for us to directly locate all maps referencing the
430  * shared anon (to change the protection).  in order to protect data
431  * in shared maps we use pmap_page_protect().  [this is useful for IPC
432  * mechanisms like map entry passing that may want to write-protect
433  * all mappings of a shared amap.]  we traverse am_anon or am_slots
434  * depending on the current state of the amap.
435  */
436 void
437 amap_share_protect(struct vm_map_entry *entry, vm_prot_t prot)
438 {
439 	struct vm_amap *amap = entry->aref.ar_amap;
440 	int slots, lcv, slot, stop;
441 
442 	AMAP_B2SLOT(slots, (entry->end - entry->start));
443 	stop = entry->aref.ar_pageoff + slots;
444 
445 	if (slots < amap->am_nused) {
446 		/* cheaper to traverse am_anon */
447 		for (lcv = entry->aref.ar_pageoff ; lcv < stop ; lcv++) {
448 			if (amap->am_anon[lcv] == NULL)
449 				continue;
450 			if (amap->am_anon[lcv]->an_page != NULL)
451 				pmap_page_protect(amap->am_anon[lcv]->an_page,
452 						  prot);
453 		}
454 		return;
455 	}
456 
457 	/* cheaper to traverse am_slots */
458 	for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
459 		slot = amap->am_slots[lcv];
460 		if (slot < entry->aref.ar_pageoff || slot >= stop)
461 			continue;
462 		if (amap->am_anon[slot]->an_page != NULL)
463 			pmap_page_protect(amap->am_anon[slot]->an_page, prot);
464 	}
465 	return;
466 }
467 
468 /*
469  * amap_wipeout: wipeout all anon's in an amap; then free the amap!
470  *
471  * => called from amap_unref when the final reference to an amap is
472  *	discarded (i.e. when reference count == 1)
473  */
474 
475 void
476 amap_wipeout(struct vm_amap *amap)
477 {
478 	int lcv, slot;
479 	struct vm_anon *anon;
480 
481 	KASSERT(amap->am_ref == 0);
482 
483 	if (__predict_false((amap->am_flags & AMAP_SWAPOFF) != 0)) {
484 		/*
485 		 * amap_swap_off will call us again.
486 		 */
487 		return;
488 	}
489 	amap_list_remove(amap);
490 
491 	for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
492 		int refs;
493 
494 		slot = amap->am_slots[lcv];
495 		anon = amap->am_anon[slot];
496 
497 		if (anon == NULL || anon->an_ref == 0)
498 			panic("amap_wipeout: corrupt amap");
499 
500 		refs = --anon->an_ref;
501 		if (refs == 0) {
502 			/*
503 			 * we had the last reference to a vm_anon. free it.
504 			 */
505 			uvm_anfree(anon);
506 		}
507 	}
508 
509 	/*
510 	 * now we free the map
511 	 */
512 
513 	amap->am_ref = 0;	/* ... was one */
514 	amap->am_nused = 0;
515 	amap_free(amap);	/* will free amap */
516 }
517 
518 /*
519  * amap_copy: ensure that a map entry's "needs_copy" flag is false
520  *	by copying the amap if necessary.
521  *
522  * => an entry with a null amap pointer will get a new (blank) one.
523  * => if canchunk is true, then we may clip the entry into a chunk
524  * => "startva" and "endva" are used only if canchunk is true.  they are
525  *     used to limit chunking (e.g. if you have a large space that you
526  *     know you are going to need to allocate amaps for, there is no point
527  *     in allowing that to be chunked)
528  */
529 
530 void
531 amap_copy(struct vm_map *map, struct vm_map_entry *entry, int waitf,
532     boolean_t canchunk, vaddr_t startva, vaddr_t endva)
533 {
534 	struct vm_amap *amap, *srcamap;
535 	int slots, lcv;
536 	vaddr_t chunksize;
537 
538 	/*
539 	 * is there a map to copy?   if not, create one from scratch.
540 	 */
541 
542 	if (entry->aref.ar_amap == NULL) {
543 
544 		/*
545 		 * check to see if we have a large amap that we can
546 		 * chunk.  we align startva/endva to chunk-sized
547 		 * boundaries and then clip to them.
548 		 */
549 
550 		if (canchunk && atop(entry->end - entry->start) >=
551 		    UVM_AMAP_LARGE) {
552 			/* convert slots to bytes */
553 			chunksize = UVM_AMAP_CHUNK << PAGE_SHIFT;
554 			startva = (startva / chunksize) * chunksize;
555 			endva = roundup(endva, chunksize);
556 			UVM_MAP_CLIP_START(map, entry, startva);
557 			/* watch out for endva wrap-around! */
558 			if (endva >= startva)
559 				UVM_MAP_CLIP_END(map, entry, endva);
560 		}
561 
562 		entry->aref.ar_pageoff = 0;
563 		entry->aref.ar_amap = amap_alloc(entry->end - entry->start, 0,
564 		    waitf);
565 		if (entry->aref.ar_amap != NULL)
566 			entry->etype &= ~UVM_ET_NEEDSCOPY;
567 		return;
568 	}
569 
570 	/*
571 	 * first check and see if we are the only map entry
572 	 * referencing the amap we currently have.  if so, then we can
573 	 * just take it over rather than copying it.  the value can only
574 	 * be one if we have the only reference to the amap
575 	 */
576 
577 	if (entry->aref.ar_amap->am_ref == 1) {
578 		entry->etype &= ~UVM_ET_NEEDSCOPY;
579 		return;
580 	}
581 
582 	/*
583 	 * looks like we need to copy the map.
584 	 */
585 
586 	AMAP_B2SLOT(slots, entry->end - entry->start);
587 	amap = amap_alloc1(slots, 0, waitf);
588 	if (amap == NULL)
589 		return;
590 	srcamap = entry->aref.ar_amap;
591 
592 	/*
593 	 * need to double check reference count now.  the reference count
594 	 * could have changed while we were in malloc.  if the reference count
595 	 * dropped down to one we take over the old map rather than
596 	 * copying the amap.
597 	 */
598 
599 	if (srcamap->am_ref == 1) {		/* take it over? */
600 		entry->etype &= ~UVM_ET_NEEDSCOPY;
601 		amap->am_ref--;		/* drop final reference to map */
602 		amap_free(amap);	/* dispose of new (unused) amap */
603 		return;
604 	}
605 
606 	/*
607 	 * we must copy it now.
608 	 */
609 
610 	for (lcv = 0 ; lcv < slots; lcv++) {
611 		amap->am_anon[lcv] =
612 		    srcamap->am_anon[entry->aref.ar_pageoff + lcv];
613 		if (amap->am_anon[lcv] == NULL)
614 			continue;
615 		amap->am_anon[lcv]->an_ref++;
616 		amap->am_bckptr[lcv] = amap->am_nused;
617 		amap->am_slots[amap->am_nused] = lcv;
618 		amap->am_nused++;
619 	}
620 	memset(&amap->am_anon[lcv], 0,
621 	    (amap->am_maxslot - lcv) * sizeof(struct vm_anon *));
622 
623 	/*
624 	 * drop our reference to the old amap (srcamap).
625 	 * we know that the reference count on srcamap is greater than
626 	 * one (we checked above), so there is no way we could drop
627 	 * the count to zero.  [and no need to worry about freeing it]
628 	 */
629 
630 	srcamap->am_ref--;
631 	if (srcamap->am_ref == 1 && (srcamap->am_flags & AMAP_SHARED) != 0)
632 		srcamap->am_flags &= ~AMAP_SHARED;   /* clear shared flag */
633 #ifdef UVM_AMAP_PPREF
634 	if (srcamap->am_ppref && srcamap->am_ppref != PPREF_NONE) {
635 		amap_pp_adjref(srcamap, entry->aref.ar_pageoff,
636 		    (entry->end - entry->start) >> PAGE_SHIFT, -1);
637 	}
638 #endif
639 
640 	/*
641 	 * install new amap.
642 	 */
643 
644 	entry->aref.ar_pageoff = 0;
645 	entry->aref.ar_amap = amap;
646 	entry->etype &= ~UVM_ET_NEEDSCOPY;
647 
648 	amap_list_insert(amap);
649 }
650 
651 /*
652  * amap_cow_now: resolve all copy-on-write faults in an amap now for fork(2)
653  *
654  *	called during fork(2) when the parent process has a wired map
655  *	entry.   in that case we want to avoid write-protecting pages
656  *	in the parent's map (e.g. like what you'd do for a COW page)
657  *	so we resolve the COW here.
658  *
659  * => assume parent's entry was wired, thus all pages are resident.
660  * => assume pages that are loaned out (loan_count) are already mapped
661  *	read-only in all maps, and thus no need for us to worry about them
662  * => caller passes child's map/entry in to us
663  * => XXXCDC: out of memory should cause fork to fail, but there is
664  *	currently no easy way to do this (needs fix)
665  */
666 
667 void
668 amap_cow_now(struct vm_map *map, struct vm_map_entry *entry)
669 {
670 	struct vm_amap *amap = entry->aref.ar_amap;
671 	int lcv, slot;
672 	struct vm_anon *anon, *nanon;
673 	struct vm_page *pg, *npg;
674 
675 	/*
676 	 * note that if we wait, we must ReStart the "lcv" for loop because
677 	 * some other process could reorder the anon's in the
678 	 * am_anon[] array on us.
679 	 */
680 ReStart:
681 	for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
682 
683 		/*
684 		 * get the page
685 		 */
686 
687 		slot = amap->am_slots[lcv];
688 		anon = amap->am_anon[slot];
689 		pg = anon->an_page;
690 
691 		/*
692 		 * page must be resident since parent is wired
693 		 */
694 
695 		if (pg == NULL)
696 		    panic("amap_cow_now: non-resident wired page in anon %p",
697 			anon);
698 
699 		/*
700 		 * if the anon ref count is one and the page is not loaned,
701 		 * then we are safe (the child has exclusive access to the
702 		 * page).  if the page is loaned, then it must already be
703 		 * mapped read-only.
704 		 *
705 		 * we only need to get involved when these are not true.
706 		 * [note: if loan_count == 0, then the anon must own the page]
707 		 */
708 
709 		if (anon->an_ref > 1 && pg->loan_count == 0) {
710 
711 			/*
712 			 * if the page is busy then we have to wait for
713 			 * it and then restart.
714 			 */
715 			if (pg->pg_flags & PG_BUSY) {
716 				atomic_setbits_int(&pg->pg_flags, PG_WANTED);
717 				UVM_WAIT(pg, FALSE, "cownow", 0);
718 				goto ReStart;
719 			}
720 
721 			/*
722 			 * ok, time to do a copy-on-write to a new anon
723 			 */
724 			nanon = uvm_analloc();
725 			if (nanon) {
726 				npg = uvm_pagealloc(NULL, 0, nanon, 0);
727 			} else
728 				npg = NULL;	/* XXX: quiet gcc warning */
729 
730 			if (nanon == NULL || npg == NULL) {
731 				/* out of memory */
732 				/*
733 				 * XXXCDC: we should cause fork to fail, but
734 				 * we can't ...
735 				 */
736 				if (nanon) {
737 					uvm_anfree(nanon);
738 				}
739 				uvm_wait("cownowpage");
740 				goto ReStart;
741 			}
742 
743 			/*
744 			 * got it... now we can copy the data and replace anon
745 			 * with our new one...
746 			 */
747 			uvm_pagecopy(pg, npg);		/* old -> new */
748 			anon->an_ref--;			/* can't drop to zero */
749 			amap->am_anon[slot] = nanon;	/* replace */
750 
751 			/*
752 			 * drop PG_BUSY on new page ... since we have had its
753 			 * owner locked the whole time it can't be
754 			 * PG_RELEASED | PG_WANTED.
755 			 */
756 			atomic_clearbits_int(&npg->pg_flags, PG_BUSY|PG_FAKE);
757 			UVM_PAGE_OWN(npg, NULL);
758 			uvm_lock_pageq();
759 			uvm_pageactivate(npg);
760 			uvm_unlock_pageq();
761 		}
762 
763 		/*
764 		 * done with this anon, next ...!
765 		 */
766 
767 	}	/* end of 'for' loop */
768 }
769 
770 /*
771  * amap_splitref: split a single reference into two separate references
772  *
773  * => called from uvm_map's clip routines
774  */
775 void
776 amap_splitref(struct vm_aref *origref, struct vm_aref *splitref, vaddr_t offset)
777 {
778 	int leftslots;
779 
780 	AMAP_B2SLOT(leftslots, offset);
781 	if (leftslots == 0)
782 		panic("amap_splitref: split at zero offset");
783 
784 	/*
785 	 * now: we have a valid am_mapped array.
786 	 */
787 
788 	if (origref->ar_amap->am_nslot - origref->ar_pageoff - leftslots <= 0)
789 		panic("amap_splitref: map size check failed");
790 
791 #ifdef UVM_AMAP_PPREF
792         /*
793 	 * establish ppref before we add a duplicate reference to the amap
794 	 */
795 	if (origref->ar_amap->am_ppref == NULL)
796 		amap_pp_establish(origref->ar_amap);
797 #endif
798 
799 	splitref->ar_amap = origref->ar_amap;
800 	splitref->ar_amap->am_ref++;		/* not a share reference */
801 	splitref->ar_pageoff = origref->ar_pageoff + leftslots;
802 }
803 
804 #ifdef UVM_AMAP_PPREF
805 
806 /*
807  * amap_pp_establish: add a ppref array to an amap, if possible
808  */
809 void
810 amap_pp_establish(struct vm_amap *amap)
811 {
812 
813 	amap->am_ppref = malloc(sizeof(int) * amap->am_maxslot,
814 	    M_UVMAMAP, M_NOWAIT|M_ZERO);
815 
816 	/*
817 	 * if we fail then we just won't use ppref for this amap
818 	 */
819 	if (amap->am_ppref == NULL) {
820 		amap->am_ppref = PPREF_NONE;	/* not using it */
821 		return;
822 	}
823 
824 	/*
825 	 * init ppref
826 	 */
827 	pp_setreflen(amap->am_ppref, 0, amap->am_ref, amap->am_nslot);
828 }
829 
830 /*
831  * amap_pp_adjref: adjust reference count to a part of an amap using the
832  * per-page reference count array.
833  *
834  * => caller must check that ppref != PPREF_NONE before calling
835  */
836 void
837 amap_pp_adjref(struct vm_amap *amap, int curslot, vsize_t slotlen, int adjval)
838 {
839  	int stopslot, *ppref, lcv, prevlcv;
840  	int ref, len, prevref, prevlen;
841 
842 	stopslot = curslot + slotlen;
843 	ppref = amap->am_ppref;
844  	prevlcv = 0;
845 
846 	/*
847  	 * first advance to the correct place in the ppref array,
848  	 * fragment if needed.
849 	 */
850 
851 	for (lcv = 0 ; lcv < curslot ; lcv += len) {
852 		pp_getreflen(ppref, lcv, &ref, &len);
853 		if (lcv + len > curslot) {     /* goes past start? */
854 			pp_setreflen(ppref, lcv, ref, curslot - lcv);
855 			pp_setreflen(ppref, curslot, ref, len - (curslot -lcv));
856 			len = curslot - lcv;   /* new length of entry @ lcv */
857 		}
858 		prevlcv = lcv;
859 	}
860 	if (lcv != 0)
861 		pp_getreflen(ppref, prevlcv, &prevref, &prevlen);
862 	else {
863 		/* Ensure that the "prevref == ref" test below always
864 		 * fails, since we're starting from the beginning of
865 		 * the ppref array; that is, there is no previous
866 		 * chunk.
867 		 */
868 		prevref = -1;
869 		prevlen = 0;
870 	}
871 
872 	/*
873 	 * now adjust reference counts in range.  merge the first
874 	 * changed entry with the last unchanged entry if possible.
875 	 */
876 
877 	if (lcv != curslot)
878 		panic("amap_pp_adjref: overshot target");
879 
880 	for (/* lcv already set */; lcv < stopslot ; lcv += len) {
881 		pp_getreflen(ppref, lcv, &ref, &len);
882 		if (lcv + len > stopslot) {     /* goes past end? */
883 			pp_setreflen(ppref, lcv, ref, stopslot - lcv);
884 			pp_setreflen(ppref, stopslot, ref,
885 			    len - (stopslot - lcv));
886 			len = stopslot - lcv;
887 		}
888 		ref += adjval;
889 		if (ref < 0)
890 			panic("amap_pp_adjref: negative reference count");
891 		if (lcv == prevlcv + prevlen && ref == prevref) {
892 			pp_setreflen(ppref, prevlcv, ref, prevlen + len);
893 		} else {
894 			pp_setreflen(ppref, lcv, ref, len);
895 		}
896 		if (ref == 0)
897 			amap_wiperange(amap, lcv, len);
898 	}
899 
900 }
901 
902 /*
903  * amap_wiperange: wipe out a range of an amap
904  * [different from amap_wipeout because the amap is kept intact]
905  */
906 void
907 amap_wiperange(struct vm_amap *amap, int slotoff, int slots)
908 {
909 	int byanon, lcv, stop, curslot, ptr, slotend;
910 	struct vm_anon *anon;
911 
912 	/*
913 	 * we can either traverse the amap by am_anon or by am_slots depending
914 	 * on which is cheaper.    decide now.
915 	 */
916 
917 	if (slots < amap->am_nused) {
918 		byanon = TRUE;
919 		lcv = slotoff;
920 		stop = slotoff + slots;
921 	} else {
922 		byanon = FALSE;
923 		lcv = 0;
924 		stop = amap->am_nused;
925 		slotend = slotoff + slots;
926 	}
927 
928 	while (lcv < stop) {
929 		int refs;
930 
931   		if (byanon) {
932 			curslot = lcv++;	/* lcv advances here */
933 			if (amap->am_anon[curslot] == NULL)
934 				continue;
935 		} else {
936 			curslot = amap->am_slots[lcv];
937 			if (curslot < slotoff || curslot >= slotend) {
938 				lcv++;		/* lcv advances here */
939 				continue;
940 			}
941 			stop--;	/* drop stop, since anon will be removed */
942 		}
943 		anon = amap->am_anon[curslot];
944 
945 		/*
946 		 * remove it from the amap
947 		 */
948 		amap->am_anon[curslot] = NULL;
949 		ptr = amap->am_bckptr[curslot];
950 		if (ptr != (amap->am_nused - 1)) {
951 			amap->am_slots[ptr] =
952 			    amap->am_slots[amap->am_nused - 1];
953 			amap->am_bckptr[amap->am_slots[ptr]] =
954 			    ptr;    /* back ptr. */
955 		}
956 		amap->am_nused--;
957 
958 		/*
959 		 * drop anon reference count
960 		 */
961 		refs = --anon->an_ref;
962 		if (refs == 0) {
963 			/*
964 			 * we just eliminated the last reference to an anon.
965 			 * free it.
966 			 */
967 			uvm_anfree(anon);
968 		}
969 	}
970 }
971 
972 #endif
973 
974 /*
975  * amap_swap_off: pagein anonymous pages in amaps and drop swap slots.
976  *
977  * => note that we don't always traverse all anons.
978  *    eg. amaps being wiped out, released anons.
979  * => return TRUE if failed.
980  */
981 
982 boolean_t
983 amap_swap_off(int startslot, int endslot)
984 {
985 	struct vm_amap *am;
986 	struct vm_amap *am_next;
987 	struct vm_amap marker_prev;
988 	struct vm_amap marker_next;
989 	boolean_t rv = FALSE;
990 
991 #if defined(DIAGNOSTIC)
992 	memset(&marker_prev, 0, sizeof(marker_prev));
993 	memset(&marker_next, 0, sizeof(marker_next));
994 #endif /* defined(DIAGNOSTIC) */
995 
996 	for (am = LIST_FIRST(&amap_list); am != NULL && !rv; am = am_next) {
997 		int i;
998 
999 		LIST_INSERT_BEFORE(am, &marker_prev, am_list);
1000 		LIST_INSERT_AFTER(am, &marker_next, am_list);
1001 
1002 		if (am->am_nused <= 0) {
1003 			goto next;
1004 		}
1005 
1006 		for (i = 0; i < am->am_nused; i++) {
1007 			int slot;
1008 			int swslot;
1009 			struct vm_anon *anon;
1010 
1011 			slot = am->am_slots[i];
1012 			anon = am->am_anon[slot];
1013 
1014 			swslot = anon->an_swslot;
1015 			if (swslot < startslot || endslot <= swslot) {
1016 				continue;
1017 			}
1018 
1019 			am->am_flags |= AMAP_SWAPOFF;
1020 
1021 			rv = uvm_anon_pagein(anon);
1022 
1023 			am->am_flags &= ~AMAP_SWAPOFF;
1024 			if (amap_refs(am) == 0) {
1025 				amap_wipeout(am);
1026 				am = NULL;
1027 				break;
1028 			}
1029 			if (rv) {
1030 				break;
1031 			}
1032 			i = 0;
1033 		}
1034 
1035 next:
1036 		KASSERT(LIST_NEXT(&marker_prev, am_list) == &marker_next ||
1037 		    LIST_NEXT(LIST_NEXT(&marker_prev, am_list), am_list) ==
1038 		    &marker_next);
1039 		am_next = LIST_NEXT(&marker_next, am_list);
1040 		LIST_REMOVE(&marker_prev, am_list);
1041 		LIST_REMOVE(&marker_next, am_list);
1042 	}
1043 
1044 	return rv;
1045 }
1046 
1047 /*
1048  * amap_lookup: look up a page in an amap
1049  */
1050 struct vm_anon *
1051 amap_lookup(struct vm_aref *aref, vaddr_t offset)
1052 {
1053 	int slot;
1054 	struct vm_amap *amap = aref->ar_amap;
1055 
1056 	AMAP_B2SLOT(slot, offset);
1057 	slot += aref->ar_pageoff;
1058 
1059 	if (slot >= amap->am_nslot)
1060 		panic("amap_lookup: offset out of range");
1061 
1062 	return(amap->am_anon[slot]);
1063 }
1064 
1065 /*
1066  * amap_lookups: look up a range of pages in an amap
1067  *
1068  * => XXXCDC: this interface is biased toward array-based amaps.  fix.
1069  */
1070 void
1071 amap_lookups(struct vm_aref *aref, vaddr_t offset,
1072     struct vm_anon **anons, int npages)
1073 {
1074 	int slot;
1075 	struct vm_amap *amap = aref->ar_amap;
1076 
1077 	AMAP_B2SLOT(slot, offset);
1078 	slot += aref->ar_pageoff;
1079 
1080 	if ((slot + (npages - 1)) >= amap->am_nslot)
1081 		panic("amap_lookups: offset out of range");
1082 
1083 	memcpy(anons, &amap->am_anon[slot], npages * sizeof(struct vm_anon *));
1084 
1085 	return;
1086 }
1087 
1088 /*
1089  * amap_add: add (or replace) a page to an amap
1090  *
1091  * => returns an "offset" which is meaningful to amap_unadd().
1092  */
1093 void
1094 amap_add(struct vm_aref *aref, vaddr_t offset, struct vm_anon *anon,
1095     boolean_t replace)
1096 {
1097 	int slot;
1098 	struct vm_amap *amap = aref->ar_amap;
1099 
1100 	AMAP_B2SLOT(slot, offset);
1101 	slot += aref->ar_pageoff;
1102 
1103 	if (slot >= amap->am_nslot)
1104 		panic("amap_add: offset out of range");
1105 
1106 	if (replace) {
1107 
1108 		if (amap->am_anon[slot] == NULL)
1109 			panic("amap_add: replacing null anon");
1110 		if (amap->am_anon[slot]->an_page != NULL &&
1111 		    (amap->am_flags & AMAP_SHARED) != 0) {
1112 			pmap_page_protect(amap->am_anon[slot]->an_page,
1113 			    VM_PROT_NONE);
1114 			/*
1115 			 * XXX: suppose page is supposed to be wired somewhere?
1116 			 */
1117 		}
1118 	} else {   /* !replace */
1119 		if (amap->am_anon[slot] != NULL)
1120 			panic("amap_add: slot in use");
1121 
1122 		amap->am_bckptr[slot] = amap->am_nused;
1123 		amap->am_slots[amap->am_nused] = slot;
1124 		amap->am_nused++;
1125 	}
1126 	amap->am_anon[slot] = anon;
1127 }
1128 
1129 /*
1130  * amap_unadd: remove a page from an amap
1131  */
1132 void
1133 amap_unadd(struct vm_aref *aref, vaddr_t offset)
1134 {
1135 	int ptr, slot;
1136 	struct vm_amap *amap = aref->ar_amap;
1137 
1138 	AMAP_B2SLOT(slot, offset);
1139 	slot += aref->ar_pageoff;
1140 
1141 	if (slot >= amap->am_nslot)
1142 		panic("amap_unadd: offset out of range");
1143 
1144 	if (amap->am_anon[slot] == NULL)
1145 		panic("amap_unadd: nothing there");
1146 
1147 	amap->am_anon[slot] = NULL;
1148 	ptr = amap->am_bckptr[slot];
1149 
1150 	if (ptr != (amap->am_nused - 1)) {	/* swap to keep slots contig? */
1151 		amap->am_slots[ptr] = amap->am_slots[amap->am_nused - 1];
1152 		amap->am_bckptr[amap->am_slots[ptr]] = ptr;	/* back link */
1153 	}
1154 	amap->am_nused--;
1155 }
1156 
1157 /*
1158  * amap_ref: gain a reference to an amap
1159  *
1160  * => "offset" and "len" are in units of pages
1161  * => called at fork time to gain the child's reference
1162  */
1163 void
1164 amap_ref(struct vm_amap *amap, vaddr_t offset, vsize_t len, int flags)
1165 {
1166 
1167 	amap->am_ref++;
1168 	if (flags & AMAP_SHARED)
1169 		amap->am_flags |= AMAP_SHARED;
1170 #ifdef UVM_AMAP_PPREF
1171 	if (amap->am_ppref == NULL && (flags & AMAP_REFALL) == 0 &&
1172 	    len != amap->am_nslot)
1173 		amap_pp_establish(amap);
1174 	if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
1175 		if (flags & AMAP_REFALL)
1176 			amap_pp_adjref(amap, 0, amap->am_nslot, 1);
1177 		else
1178 			amap_pp_adjref(amap, offset, len, 1);
1179 	}
1180 #endif
1181 }
1182 
1183 /*
1184  * amap_unref: remove a reference to an amap
1185  *
1186  * => caller must remove all pmap-level references to this amap before
1187  *	dropping the reference
1188  * => called from uvm_unmap_detach [only]  ... note that entry is no
1189  *	longer part of a map
1190  */
1191 void
1192 amap_unref(struct vm_amap *amap, vaddr_t offset, vsize_t len, boolean_t all)
1193 {
1194 
1195 	/*
1196 	 * if we are the last reference, free the amap and return.
1197 	 */
1198 
1199 	if (amap->am_ref-- == 1) {
1200 		amap_wipeout(amap);	/* drops final ref and frees */
1201 		return;
1202 	}
1203 
1204 	/*
1205 	 * otherwise just drop the reference count(s)
1206 	 */
1207 	if (amap->am_ref == 1 && (amap->am_flags & AMAP_SHARED) != 0)
1208 		amap->am_flags &= ~AMAP_SHARED;	/* clear shared flag */
1209 #ifdef UVM_AMAP_PPREF
1210 	if (amap->am_ppref == NULL && all == 0 && len != amap->am_nslot)
1211 		amap_pp_establish(amap);
1212 	if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
1213 		if (all)
1214 			amap_pp_adjref(amap, 0, amap->am_nslot, -1);
1215 		else
1216 			amap_pp_adjref(amap, offset, len, -1);
1217 	}
1218 #endif
1219 }
1220