1 /* $NetBSD: uvm_amap.c,v 1.128 2023/06/19 08:23:35 msaitoh Exp $ */
2
3 /*
4 * Copyright (c) 1997 Charles D. Cranor and Washington University.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 /*
29 * uvm_amap.c: amap operations
30 */
31
32 /*
33 * this file contains functions that perform operations on amaps. see
34 * uvm_amap.h for a brief explanation of the role of amaps in uvm.
35 */
36
37 #include <sys/cdefs.h>
38 __KERNEL_RCSID(0, "$NetBSD: uvm_amap.c,v 1.128 2023/06/19 08:23:35 msaitoh Exp $");
39
40 #include "opt_uvmhist.h"
41
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/kmem.h>
46 #include <sys/pool.h>
47 #include <sys/atomic.h>
48
49 #include <uvm/uvm.h>
50 #include <uvm/uvm_swap.h>
51
52 /*
53 * cache for allocation of vm_map structures. note that in order to
54 * avoid an endless loop, the amap cache's allocator cannot allocate
55 * memory from an amap (it currently goes through the kernel uobj, so
56 * we are ok).
57 */
58 static struct pool_cache uvm_amap_cache;
59 static kmutex_t amap_list_lock __cacheline_aligned;
60 static LIST_HEAD(, vm_amap) amap_list;
61
62 /*
63 * local functions
64 */
65
66 static int
amap_roundup_slots(int slots)67 amap_roundup_slots(int slots)
68 {
69
70 return kmem_roundup_size(slots * sizeof(int)) / sizeof(int);
71 }
72
73 #ifdef UVM_AMAP_PPREF
74 /*
75 * what is ppref? ppref is an _optional_ amap feature which is used
76 * to keep track of reference counts on a per-page basis. it is enabled
77 * when UVM_AMAP_PPREF is defined.
78 *
79 * when enabled, an array of ints is allocated for the pprefs. this
80 * array is allocated only when a partial reference is added to the
81 * map (either by unmapping part of the amap, or gaining a reference
82 * to only a part of an amap). if the allocation of the array fails
83 * (KM_NOSLEEP), then we set the array pointer to PPREF_NONE to indicate
84 * that we tried to do ppref's but couldn't alloc the array so just
85 * give up (after all, this is an optional feature!).
86 *
87 * the array is divided into page sized "chunks." for chunks of length 1,
88 * the chunk reference count plus one is stored in that chunk's slot.
89 * for chunks of length > 1 the first slot contains (the reference count
90 * plus one) * -1. [the negative value indicates that the length is
91 * greater than one.] the second slot of the chunk contains the length
92 * of the chunk. here is an example:
93 *
94 * actual REFS: 2 2 2 2 3 1 1 0 0 0 4 4 0 1 1 1
95 * ppref: -3 4 x x 4 -2 2 -1 3 x -5 2 1 -2 3 x
96 * <----------><-><----><-------><----><-><------->
97 * (x = don't care)
98 *
99 * this allows us to allow one int to contain the ref count for the whole
100 * chunk. note that the "plus one" part is needed because a reference
101 * count of zero is neither positive or negative (need a way to tell
102 * if we've got one zero or a bunch of them).
103 *
104 * here are some in-line functions to help us.
105 */
106
107 /*
108 * pp_getreflen: get the reference and length for a specific offset
109 *
110 * => ppref's amap must be locked
111 */
112 static inline void
pp_getreflen(int * ppref,int offset,int * refp,int * lenp)113 pp_getreflen(int *ppref, int offset, int *refp, int *lenp)
114 {
115
116 if (ppref[offset] > 0) { /* chunk size must be 1 */
117 *refp = ppref[offset] - 1; /* don't forget to adjust */
118 *lenp = 1;
119 } else {
120 *refp = (ppref[offset] * -1) - 1;
121 *lenp = ppref[offset+1];
122 }
123 }
124
125 /*
126 * pp_setreflen: set the reference and length for a specific offset
127 *
128 * => ppref's amap must be locked
129 */
130 static inline void
pp_setreflen(int * ppref,int offset,int ref,int len)131 pp_setreflen(int *ppref, int offset, int ref, int len)
132 {
133 if (len == 0)
134 return;
135 if (len == 1) {
136 ppref[offset] = ref + 1;
137 } else {
138 ppref[offset] = (ref + 1) * -1;
139 ppref[offset+1] = len;
140 }
141 }
142 #endif /* UVM_AMAP_PPREF */
143
144 /*
145 * amap_alloc1: allocate an amap, but do not initialise the overlay.
146 *
147 * => Note: lock is not set.
148 */
149 static struct vm_amap *
amap_alloc1(int slots,int padslots,int flags)150 amap_alloc1(int slots, int padslots, int flags)
151 {
152 const bool nowait = (flags & UVM_FLAG_NOWAIT) != 0;
153 const km_flag_t kmflags = nowait ? KM_NOSLEEP : KM_SLEEP;
154 struct vm_amap *amap;
155 krwlock_t *newlock, *oldlock;
156 int totalslots;
157
158 amap = pool_cache_get(&uvm_amap_cache, nowait ? PR_NOWAIT : PR_WAITOK);
159 if (amap == NULL) {
160 return NULL;
161 }
162 KASSERT(amap->am_lock != NULL);
163 KASSERT(amap->am_nused == 0);
164
165 /* Try to privatize the lock if currently shared. */
166 if (rw_obj_refcnt(amap->am_lock) > 1) {
167 newlock = rw_obj_tryalloc();
168 if (newlock != NULL) {
169 oldlock = amap->am_lock;
170 mutex_enter(&amap_list_lock);
171 amap->am_lock = newlock;
172 mutex_exit(&amap_list_lock);
173 rw_obj_free(oldlock);
174 }
175 }
176
177 totalslots = amap_roundup_slots(slots + padslots);
178 amap->am_ref = 1;
179 amap->am_flags = 0;
180 #ifdef UVM_AMAP_PPREF
181 amap->am_ppref = NULL;
182 #endif
183 amap->am_maxslot = totalslots;
184 amap->am_nslot = slots;
185
186 /*
187 * Note: since allocations are likely big, we expect to reduce the
188 * memory fragmentation by allocating them in separate blocks.
189 */
190 amap->am_slots = kmem_alloc(totalslots * sizeof(int), kmflags);
191 if (amap->am_slots == NULL)
192 goto fail1;
193
194 amap->am_bckptr = kmem_alloc(totalslots * sizeof(int), kmflags);
195 if (amap->am_bckptr == NULL)
196 goto fail2;
197
198 amap->am_anon = kmem_alloc(totalslots * sizeof(struct vm_anon *),
199 kmflags);
200 if (amap->am_anon == NULL)
201 goto fail3;
202
203 return amap;
204
205 fail3:
206 kmem_free(amap->am_bckptr, totalslots * sizeof(int));
207 fail2:
208 kmem_free(amap->am_slots, totalslots * sizeof(int));
209 fail1:
210 pool_cache_put(&uvm_amap_cache, amap);
211
212 /*
213 * XXX hack to tell the pagedaemon how many pages we need,
214 * since we can need more than it would normally free.
215 */
216 if (nowait) {
217 extern u_int uvm_extrapages;
218 atomic_add_int(&uvm_extrapages,
219 ((sizeof(int) * 2 + sizeof(struct vm_anon *)) *
220 totalslots) >> PAGE_SHIFT);
221 }
222 return NULL;
223 }
224
225 /*
226 * amap_alloc: allocate an amap to manage "sz" bytes of anonymous VM
227 *
228 * => caller should ensure sz is a multiple of PAGE_SIZE
229 * => reference count to new amap is set to one
230 * => new amap is returned unlocked
231 */
232
233 struct vm_amap *
amap_alloc(vaddr_t sz,vaddr_t padsz,int waitf)234 amap_alloc(vaddr_t sz, vaddr_t padsz, int waitf)
235 {
236 struct vm_amap *amap;
237 int slots, padslots;
238 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
239
240 AMAP_B2SLOT(slots, sz);
241 AMAP_B2SLOT(padslots, padsz);
242
243 amap = amap_alloc1(slots, padslots, waitf);
244 if (amap) {
245 memset(amap->am_anon, 0,
246 amap->am_maxslot * sizeof(struct vm_anon *));
247 }
248
249 UVMHIST_LOG(maphist,"<- done, amap = %#jx, sz=%jd", (uintptr_t)amap,
250 sz, 0, 0);
251 return(amap);
252 }
253
254 /*
255 * amap_ctor: pool_cache constructor for new amaps
256 *
257 * => carefully synchronize with amap_swap_off()
258 */
259 static int
amap_ctor(void * arg,void * obj,int flags)260 amap_ctor(void *arg, void *obj, int flags)
261 {
262 struct vm_amap *amap = obj;
263
264 if ((flags & PR_NOWAIT) != 0) {
265 amap->am_lock = rw_obj_tryalloc();
266 if (amap->am_lock == NULL) {
267 return ENOMEM;
268 }
269 } else {
270 amap->am_lock = rw_obj_alloc();
271 }
272 amap->am_nused = 0;
273 amap->am_flags = 0;
274
275 mutex_enter(&amap_list_lock);
276 LIST_INSERT_HEAD(&amap_list, amap, am_list);
277 mutex_exit(&amap_list_lock);
278 return 0;
279 }
280
281 /*
282 * amap_ctor: pool_cache destructor for amaps
283 *
284 * => carefully synchronize with amap_swap_off()
285 */
286 static void
amap_dtor(void * arg,void * obj)287 amap_dtor(void *arg, void *obj)
288 {
289 struct vm_amap *amap = obj;
290
291 KASSERT(amap->am_nused == 0);
292
293 mutex_enter(&amap_list_lock);
294 LIST_REMOVE(amap, am_list);
295 mutex_exit(&amap_list_lock);
296 rw_obj_free(amap->am_lock);
297 }
298
299 /*
300 * uvm_amap_init: initialize the amap system.
301 */
302 void
uvm_amap_init(void)303 uvm_amap_init(void)
304 {
305
306 mutex_init(&amap_list_lock, MUTEX_DEFAULT, IPL_NONE);
307
308 pool_cache_bootstrap(&uvm_amap_cache, sizeof(struct vm_amap), 0, 0,
309 PR_LARGECACHE, "amappl", NULL, IPL_NONE, amap_ctor, amap_dtor,
310 NULL);
311 }
312
313 /*
314 * amap_free: free an amap
315 *
316 * => the amap must be unlocked
317 * => the amap should have a zero reference count and be empty
318 */
319 void
amap_free(struct vm_amap * amap)320 amap_free(struct vm_amap *amap)
321 {
322 int slots;
323
324 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
325
326 KASSERT(amap->am_ref == 0);
327 KASSERT(amap->am_nused == 0);
328 KASSERT((amap->am_flags & AMAP_SWAPOFF) == 0);
329 slots = amap->am_maxslot;
330 kmem_free(amap->am_slots, slots * sizeof(*amap->am_slots));
331 kmem_free(amap->am_bckptr, slots * sizeof(*amap->am_bckptr));
332 kmem_free(amap->am_anon, slots * sizeof(*amap->am_anon));
333 #ifdef UVM_AMAP_PPREF
334 if (amap->am_ppref && amap->am_ppref != PPREF_NONE)
335 kmem_free(amap->am_ppref, slots * sizeof(*amap->am_ppref));
336 #endif
337 pool_cache_put(&uvm_amap_cache, amap);
338 UVMHIST_LOG(maphist,"<- done, freed amap = %#jx", (uintptr_t)amap,
339 0, 0, 0);
340 }
341
342 /*
343 * amap_extend: extend the size of an amap (if needed)
344 *
345 * => called from uvm_map when we want to extend an amap to cover
346 * a new mapping (rather than allocate a new one)
347 * => amap should be unlocked (we will lock it)
348 * => to safely extend an amap it should have a reference count of
349 * one (thus it can't be shared)
350 */
351 int
amap_extend(struct vm_map_entry * entry,vsize_t addsize,int flags)352 amap_extend(struct vm_map_entry *entry, vsize_t addsize, int flags)
353 {
354 struct vm_amap *amap = entry->aref.ar_amap;
355 int slotoff = entry->aref.ar_pageoff;
356 int slotmapped, slotadd, slotneed, slotadded, slotalloc;
357 int slotadj, slotarea, slotendoff;
358 int oldnslots;
359 #ifdef UVM_AMAP_PPREF
360 int *newppref, *oldppref;
361 #endif
362 int i, *newsl, *newbck, *oldsl, *oldbck;
363 struct vm_anon **newover, **oldover;
364 const km_flag_t kmflags =
365 (flags & AMAP_EXTEND_NOWAIT) ? KM_NOSLEEP : KM_SLEEP;
366
367 UVMHIST_FUNC(__func__);
368 UVMHIST_CALLARGS(maphist, " (entry=%#jx, addsize=%#jx, flags=%#jx)",
369 (uintptr_t)entry, addsize, flags, 0);
370
371 /*
372 * first, determine how many slots we need in the amap. don't
373 * forget that ar_pageoff could be non-zero: this means that
374 * there are some unused slots before us in the amap.
375 */
376
377 amap_lock(amap, RW_WRITER);
378 KASSERT(amap_refs(amap) == 1); /* amap can't be shared */
379 AMAP_B2SLOT(slotmapped, entry->end - entry->start); /* slots mapped */
380 AMAP_B2SLOT(slotadd, addsize); /* slots to add */
381 if (flags & AMAP_EXTEND_FORWARDS) {
382 slotneed = slotoff + slotmapped + slotadd;
383 slotadj = 0;
384 slotarea = 0;
385 } else {
386 slotneed = slotadd + slotmapped;
387 slotadj = slotadd - slotoff;
388 slotarea = amap->am_maxslot - slotmapped;
389 }
390
391 /*
392 * Because this amap only has 1 ref, we know that there is
393 * only one vm_map_entry pointing to it, and the one entry is
394 * using slots between slotoff and slotoff + slotmapped. If
395 * we have been using ppref then we know that only slots in
396 * the one map entry's range can have anons, since ppref
397 * allowed us to free any anons outside that range as other map
398 * entries which used this amap were removed. But without ppref,
399 * we couldn't know which slots were still needed by other map
400 * entries, so we couldn't free any anons as we removed map
401 * entries, and so any slot from 0 to am_nslot can have an
402 * anon. But now that we know there is only one map entry
403 * left and we know its range, we can free up any anons
404 * outside that range. This is necessary because the rest of
405 * this function assumes that there are no anons in the amap
406 * outside of the one map entry's range.
407 */
408
409 slotendoff = slotoff + slotmapped;
410 if (amap->am_ppref == PPREF_NONE) {
411 amap_wiperange(amap, 0, slotoff);
412 amap_wiperange(amap, slotendoff, amap->am_nslot - slotendoff);
413 }
414 for (i = 0; i < slotoff; i++) {
415 KASSERT(amap->am_anon[i] == NULL);
416 }
417 for (i = slotendoff; i < amap->am_nslot - slotendoff; i++) {
418 KASSERT(amap->am_anon[i] == NULL);
419 }
420
421 /*
422 * case 1: we already have enough slots in the map and thus
423 * only need to bump the reference counts on the slots we are
424 * adding.
425 */
426
427 if (flags & AMAP_EXTEND_FORWARDS) {
428 if (amap->am_nslot >= slotneed) {
429 #ifdef UVM_AMAP_PPREF
430 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
431 amap_pp_adjref(amap, slotoff + slotmapped,
432 slotadd, 1);
433 }
434 #endif
435 amap_unlock(amap);
436 UVMHIST_LOG(maphist,
437 "<- done (case 1f), amap = %#jx, sltneed=%jd",
438 (uintptr_t)amap, slotneed, 0, 0);
439 return 0;
440 }
441 } else {
442 if (slotadj <= 0) {
443 slotoff -= slotadd;
444 entry->aref.ar_pageoff = slotoff;
445 #ifdef UVM_AMAP_PPREF
446 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
447 amap_pp_adjref(amap, slotoff, slotadd, 1);
448 }
449 #endif
450 amap_unlock(amap);
451 UVMHIST_LOG(maphist,
452 "<- done (case 1b), amap = %#jx, sltneed=%jd",
453 (uintptr_t)amap, slotneed, 0, 0);
454 return 0;
455 }
456 }
457
458 /*
459 * case 2: we pre-allocated slots for use and we just need to
460 * bump nslot up to take account for these slots.
461 */
462
463 if (amap->am_maxslot >= slotneed) {
464 if (flags & AMAP_EXTEND_FORWARDS) {
465 #ifdef UVM_AMAP_PPREF
466 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
467 if ((slotoff + slotmapped) < amap->am_nslot)
468 amap_pp_adjref(amap,
469 slotoff + slotmapped,
470 (amap->am_nslot -
471 (slotoff + slotmapped)), 1);
472 pp_setreflen(amap->am_ppref, amap->am_nslot, 1,
473 slotneed - amap->am_nslot);
474 }
475 #endif
476 amap->am_nslot = slotneed;
477 amap_unlock(amap);
478
479 /*
480 * no need to zero am_anon since that was done at
481 * alloc time and we never shrink an allocation.
482 */
483
484 UVMHIST_LOG(maphist,"<- done (case 2f), amap = %#jx, "
485 "slotneed=%jd", (uintptr_t)amap, slotneed, 0, 0);
486 return 0;
487 } else {
488 #ifdef UVM_AMAP_PPREF
489 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
490 /*
491 * Slide up the ref counts on the pages that
492 * are actually in use.
493 */
494 memmove(amap->am_ppref + slotarea,
495 amap->am_ppref + slotoff,
496 slotmapped * sizeof(int));
497 /*
498 * Mark the (adjusted) gap at the front as
499 * referenced/not referenced.
500 */
501 pp_setreflen(amap->am_ppref,
502 0, 0, slotarea - slotadd);
503 pp_setreflen(amap->am_ppref,
504 slotarea - slotadd, 1, slotadd);
505 }
506 #endif
507
508 /*
509 * Slide the anon pointers up and clear out
510 * the space we just made.
511 */
512 memmove(amap->am_anon + slotarea,
513 amap->am_anon + slotoff,
514 slotmapped * sizeof(struct vm_anon*));
515 memset(amap->am_anon + slotoff, 0,
516 (slotarea - slotoff) * sizeof(struct vm_anon *));
517
518 /*
519 * Slide the backpointers up, but don't bother
520 * wiping out the old slots.
521 */
522 memmove(amap->am_bckptr + slotarea,
523 amap->am_bckptr + slotoff,
524 slotmapped * sizeof(int));
525
526 /*
527 * Adjust all the useful active slot numbers.
528 */
529 for (i = 0; i < amap->am_nused; i++)
530 amap->am_slots[i] += (slotarea - slotoff);
531
532 /*
533 * We just filled all the empty space in the
534 * front of the amap by activating a few new
535 * slots.
536 */
537 amap->am_nslot = amap->am_maxslot;
538 entry->aref.ar_pageoff = slotarea - slotadd;
539 amap_unlock(amap);
540
541 UVMHIST_LOG(maphist,"<- done (case 2b), amap = %#jx, "
542 "slotneed=%jd", (uintptr_t)amap, slotneed, 0, 0);
543 return 0;
544 }
545 }
546
547 /*
548 * Case 3: we need to allocate a new amap and copy all the amap
549 * data over from old amap to the new one. Drop the lock before
550 * performing allocation.
551 *
552 * Note: since allocations are likely big, we expect to reduce the
553 * memory fragmentation by allocating them in separate blocks.
554 */
555
556 amap_unlock(amap);
557
558 if (slotneed >= UVM_AMAP_LARGE) {
559 return E2BIG;
560 }
561
562 slotalloc = amap_roundup_slots(slotneed);
563 #ifdef UVM_AMAP_PPREF
564 newppref = NULL;
565 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
566 /* Will be handled later if fails. */
567 newppref = kmem_alloc(slotalloc * sizeof(*newppref), kmflags);
568 }
569 #endif
570 newsl = kmem_alloc(slotalloc * sizeof(*newsl), kmflags);
571 newbck = kmem_alloc(slotalloc * sizeof(*newbck), kmflags);
572 newover = kmem_alloc(slotalloc * sizeof(*newover), kmflags);
573 if (newsl == NULL || newbck == NULL || newover == NULL) {
574 #ifdef UVM_AMAP_PPREF
575 if (newppref != NULL) {
576 kmem_free(newppref, slotalloc * sizeof(*newppref));
577 }
578 #endif
579 if (newsl != NULL) {
580 kmem_free(newsl, slotalloc * sizeof(*newsl));
581 }
582 if (newbck != NULL) {
583 kmem_free(newbck, slotalloc * sizeof(*newbck));
584 }
585 if (newover != NULL) {
586 kmem_free(newover, slotalloc * sizeof(*newover));
587 }
588 return ENOMEM;
589 }
590 amap_lock(amap, RW_WRITER);
591 KASSERT(amap->am_maxslot < slotneed);
592
593 /*
594 * Copy everything over to new allocated areas.
595 */
596
597 slotadded = slotalloc - amap->am_nslot;
598 if (!(flags & AMAP_EXTEND_FORWARDS))
599 slotarea = slotalloc - slotmapped;
600
601 /* do am_slots */
602 oldsl = amap->am_slots;
603 if (flags & AMAP_EXTEND_FORWARDS)
604 memcpy(newsl, oldsl, sizeof(int) * amap->am_nused);
605 else
606 for (i = 0; i < amap->am_nused; i++)
607 newsl[i] = oldsl[i] + slotarea - slotoff;
608 amap->am_slots = newsl;
609
610 /* do am_anon */
611 oldover = amap->am_anon;
612 if (flags & AMAP_EXTEND_FORWARDS) {
613 memcpy(newover, oldover,
614 sizeof(struct vm_anon *) * amap->am_nslot);
615 memset(newover + amap->am_nslot, 0,
616 sizeof(struct vm_anon *) * slotadded);
617 } else {
618 memcpy(newover + slotarea, oldover + slotoff,
619 sizeof(struct vm_anon *) * slotmapped);
620 memset(newover, 0,
621 sizeof(struct vm_anon *) * slotarea);
622 }
623 amap->am_anon = newover;
624
625 /* do am_bckptr */
626 oldbck = amap->am_bckptr;
627 if (flags & AMAP_EXTEND_FORWARDS)
628 memcpy(newbck, oldbck, sizeof(int) * amap->am_nslot);
629 else
630 memcpy(newbck + slotarea, oldbck + slotoff,
631 sizeof(int) * slotmapped);
632 amap->am_bckptr = newbck;
633
634 #ifdef UVM_AMAP_PPREF
635 /* do ppref */
636 oldppref = amap->am_ppref;
637 if (newppref) {
638 if (flags & AMAP_EXTEND_FORWARDS) {
639 memcpy(newppref, oldppref,
640 sizeof(int) * amap->am_nslot);
641 memset(newppref + amap->am_nslot, 0,
642 sizeof(int) * slotadded);
643 } else {
644 memcpy(newppref + slotarea, oldppref + slotoff,
645 sizeof(int) * slotmapped);
646 }
647 amap->am_ppref = newppref;
648 if ((flags & AMAP_EXTEND_FORWARDS) &&
649 (slotoff + slotmapped) < amap->am_nslot)
650 amap_pp_adjref(amap, slotoff + slotmapped,
651 (amap->am_nslot - (slotoff + slotmapped)), 1);
652 if (flags & AMAP_EXTEND_FORWARDS)
653 pp_setreflen(newppref, amap->am_nslot, 1,
654 slotneed - amap->am_nslot);
655 else {
656 pp_setreflen(newppref, 0, 0,
657 slotalloc - slotneed);
658 pp_setreflen(newppref, slotalloc - slotneed, 1,
659 slotneed - slotmapped);
660 }
661 } else {
662 if (amap->am_ppref)
663 amap->am_ppref = PPREF_NONE;
664 }
665 #endif
666
667 /* update master values */
668 if (flags & AMAP_EXTEND_FORWARDS)
669 amap->am_nslot = slotneed;
670 else {
671 entry->aref.ar_pageoff = slotarea - slotadd;
672 amap->am_nslot = slotalloc;
673 }
674 oldnslots = amap->am_maxslot;
675 amap->am_maxslot = slotalloc;
676 amap_unlock(amap);
677
678 kmem_free(oldsl, oldnslots * sizeof(*oldsl));
679 kmem_free(oldbck, oldnslots * sizeof(*oldbck));
680 kmem_free(oldover, oldnslots * sizeof(*oldover));
681 #ifdef UVM_AMAP_PPREF
682 if (oldppref && oldppref != PPREF_NONE)
683 kmem_free(oldppref, oldnslots * sizeof(*oldppref));
684 #endif
685 UVMHIST_LOG(maphist,"<- done (case 3), amap = %#jx, slotneed=%jd",
686 (uintptr_t)amap, slotneed, 0, 0);
687 return 0;
688 }
689
690 /*
691 * amap_share_protect: change protection of anons in a shared amap
692 *
693 * for shared amaps, given the current data structure layout, it is
694 * not possible for us to directly locate all maps referencing the
695 * shared anon (to change the protection). in order to protect data
696 * in shared maps we use pmap_page_protect(). [this is useful for IPC
697 * mechanisms like map entry passing that may want to write-protect
698 * all mappings of a shared amap.] we traverse am_anon or am_slots
699 * depending on the current state of the amap.
700 *
701 * => entry's map and amap must be locked by the caller
702 */
703 void
amap_share_protect(struct vm_map_entry * entry,vm_prot_t prot)704 amap_share_protect(struct vm_map_entry *entry, vm_prot_t prot)
705 {
706 struct vm_amap *amap = entry->aref.ar_amap;
707 u_int slots, lcv, slot, stop;
708 struct vm_anon *anon;
709
710 KASSERT(rw_write_held(amap->am_lock));
711
712 AMAP_B2SLOT(slots, (entry->end - entry->start));
713 stop = entry->aref.ar_pageoff + slots;
714
715 if (slots < amap->am_nused) {
716 /*
717 * Cheaper to traverse am_anon.
718 */
719 for (lcv = entry->aref.ar_pageoff ; lcv < stop ; lcv++) {
720 anon = amap->am_anon[lcv];
721 if (anon == NULL) {
722 continue;
723 }
724 if (anon->an_page) {
725 pmap_page_protect(anon->an_page, prot);
726 }
727 }
728 return;
729 }
730
731 /*
732 * Cheaper to traverse am_slots.
733 */
734 for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
735 slot = amap->am_slots[lcv];
736 if (slot < entry->aref.ar_pageoff || slot >= stop) {
737 continue;
738 }
739 anon = amap->am_anon[slot];
740 if (anon->an_page) {
741 pmap_page_protect(anon->an_page, prot);
742 }
743 }
744 }
745
746 /*
747 * amap_wipeout: wipeout all anon's in an amap; then free the amap!
748 *
749 * => Called from amap_unref(), when reference count drops to zero.
750 * => amap must be locked.
751 */
752
753 void
amap_wipeout(struct vm_amap * amap)754 amap_wipeout(struct vm_amap *amap)
755 {
756 u_int lcv;
757
758 UVMHIST_FUNC(__func__);
759 UVMHIST_CALLARGS(maphist,"(amap=%#jx)", (uintptr_t)amap, 0,0,0);
760
761 KASSERT(rw_write_held(amap->am_lock));
762 KASSERT(amap->am_ref == 0);
763
764 if (__predict_false(amap->am_flags & AMAP_SWAPOFF)) {
765 /*
766 * Note: amap_swap_off() will call us again.
767 */
768 amap_unlock(amap);
769 return;
770 }
771
772 for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
773 struct vm_anon *anon;
774 u_int slot;
775
776 slot = amap->am_slots[lcv];
777 anon = amap->am_anon[slot];
778 KASSERT(anon != NULL);
779 KASSERT(anon->an_ref != 0);
780
781 KASSERT(anon->an_lock == amap->am_lock);
782 UVMHIST_LOG(maphist," processing anon %#jx, ref=%jd",
783 (uintptr_t)anon, anon->an_ref, 0, 0);
784
785 /*
786 * Drop the reference.
787 */
788
789 if (__predict_true(--anon->an_ref == 0)) {
790 uvm_anfree(anon);
791 }
792 if (__predict_false((lcv & 31) == 31)) {
793 preempt_point();
794 }
795 }
796
797 /*
798 * Finally, destroy the amap.
799 */
800
801 amap->am_nused = 0;
802 amap_unlock(amap);
803 amap_free(amap);
804 UVMHIST_LOG(maphist,"<- done!", 0,0,0,0);
805 }
806
807 /*
808 * amap_copy: ensure that a map entry's "needs_copy" flag is false
809 * by copying the amap if necessary.
810 *
811 * => an entry with a null amap pointer will get a new (blank) one.
812 * => the map that the map entry belongs to must be locked by caller.
813 * => the amap currently attached to "entry" (if any) must be unlocked.
814 * => if canchunk is true, then we may clip the entry into a chunk
815 * => "startva" and "endva" are used only if canchunk is true. they are
816 * used to limit chunking (e.g. if you have a large space that you
817 * know you are going to need to allocate amaps for, there is no point
818 * in allowing that to be chunked)
819 */
820
821 void
amap_copy(struct vm_map * map,struct vm_map_entry * entry,int flags,vaddr_t startva,vaddr_t endva)822 amap_copy(struct vm_map *map, struct vm_map_entry *entry, int flags,
823 vaddr_t startva, vaddr_t endva)
824 {
825 const int waitf = (flags & AMAP_COPY_NOWAIT) ? UVM_FLAG_NOWAIT : 0;
826 struct vm_amap *amap, *srcamap;
827 u_int slots, lcv;
828 krwlock_t *oldlock;
829 vsize_t len;
830
831 UVMHIST_FUNC(__func__);
832 UVMHIST_CALLARGS(maphist, " (map=%#jx, entry=%#jx, flags=%#jx)",
833 (uintptr_t)map, (uintptr_t)entry, flags, -2);
834
835 KASSERT(map != kernel_map); /* we use nointr pool */
836
837 srcamap = entry->aref.ar_amap;
838 len = entry->end - entry->start;
839
840 /*
841 * Is there an amap to copy? If not, create one.
842 */
843
844 if (srcamap == NULL) {
845 const bool canchunk = (flags & AMAP_COPY_NOCHUNK) == 0;
846
847 /*
848 * Check to see if we have a large amap that we can
849 * chunk. We align startva/endva to chunk-sized
850 * boundaries and then clip to them.
851 */
852
853 if (canchunk && atop(len) >= UVM_AMAP_LARGE) {
854 vsize_t chunksize;
855
856 /* Convert slots to bytes. */
857 chunksize = UVM_AMAP_CHUNK << PAGE_SHIFT;
858 startva = (startva / chunksize) * chunksize;
859 endva = roundup(endva, chunksize);
860 UVMHIST_LOG(maphist,
861 " chunk amap ==> clip %#jx->%#jx to %#jx->%#jx",
862 entry->start, entry->end, startva, endva);
863 UVM_MAP_CLIP_START(map, entry, startva);
864
865 /* Watch out for endva wrap-around! */
866 if (endva >= startva) {
867 UVM_MAP_CLIP_END(map, entry, endva);
868 }
869 }
870
871 if ((flags & AMAP_COPY_NOMERGE) == 0 &&
872 uvm_mapent_trymerge(map, entry, UVM_MERGE_COPYING)) {
873 return;
874 }
875
876 UVMHIST_LOG(maphist, "<- done [creating new amap %#jx->%#jx]",
877 entry->start, entry->end, 0, 0);
878
879 /*
880 * Allocate an initialised amap and install it.
881 * Note: we must update the length after clipping.
882 */
883 len = entry->end - entry->start;
884 entry->aref.ar_pageoff = 0;
885 entry->aref.ar_amap = amap_alloc(len, 0, waitf);
886 if (entry->aref.ar_amap != NULL) {
887 entry->etype &= ~UVM_ET_NEEDSCOPY;
888 }
889 return;
890 }
891
892 /*
893 * First check and see if we are the only map entry referencing
894 * he amap we currently have. If so, then just take it over instead
895 * of copying it. Note that we are reading am_ref without lock held
896 * as the value can only be one if we have the only reference
897 * to the amap (via our locked map). If the value is greater than
898 * one, then allocate amap and re-check the value.
899 */
900
901 if (srcamap->am_ref == 1) {
902 entry->etype &= ~UVM_ET_NEEDSCOPY;
903 UVMHIST_LOG(maphist, "<- done [ref cnt = 1, took it over]",
904 0, 0, 0, 0);
905 return;
906 }
907
908 UVMHIST_LOG(maphist," amap=%#jx, ref=%jd, must copy it",
909 (uintptr_t)srcamap, srcamap->am_ref, 0, 0);
910
911 /*
912 * Allocate a new amap (note: not initialised, etc).
913 */
914
915 AMAP_B2SLOT(slots, len);
916 amap = amap_alloc1(slots, 0, waitf);
917 if (amap == NULL) {
918 UVMHIST_LOG(maphist, " amap_alloc1 failed", 0,0,0,0);
919 return;
920 }
921
922 /*
923 * Make the new amap share the source amap's lock, and then lock
924 * both. We must do this before we set am_nused != 0, otherwise
925 * amap_swap_off() can become interested in the amap.
926 */
927
928 oldlock = amap->am_lock;
929 mutex_enter(&amap_list_lock);
930 amap->am_lock = srcamap->am_lock;
931 mutex_exit(&amap_list_lock);
932 rw_obj_hold(amap->am_lock);
933 rw_obj_free(oldlock);
934
935 amap_lock(srcamap, RW_WRITER);
936
937 /*
938 * Re-check the reference count with the lock held. If it has
939 * dropped to one - we can take over the existing map.
940 */
941
942 if (srcamap->am_ref == 1) {
943 /* Just take over the existing amap. */
944 entry->etype &= ~UVM_ET_NEEDSCOPY;
945 amap_unlock(srcamap);
946 /* Destroy the new (unused) amap. */
947 amap->am_ref--;
948 amap_free(amap);
949 return;
950 }
951
952 /*
953 * Copy the slots. Zero the padded part.
954 */
955
956 UVMHIST_LOG(maphist, " copying amap now",0, 0, 0, 0);
957 for (lcv = 0 ; lcv < slots; lcv++) {
958 amap->am_anon[lcv] =
959 srcamap->am_anon[entry->aref.ar_pageoff + lcv];
960 if (amap->am_anon[lcv] == NULL)
961 continue;
962 KASSERT(amap->am_anon[lcv]->an_lock == srcamap->am_lock);
963 KASSERT(amap->am_anon[lcv]->an_ref > 0);
964 KASSERT(amap->am_nused < amap->am_maxslot);
965 amap->am_anon[lcv]->an_ref++;
966 amap->am_bckptr[lcv] = amap->am_nused;
967 amap->am_slots[amap->am_nused] = lcv;
968 amap->am_nused++;
969 }
970 memset(&amap->am_anon[lcv], 0,
971 (amap->am_maxslot - lcv) * sizeof(struct vm_anon *));
972
973 /*
974 * Drop our reference to the old amap (srcamap) and unlock.
975 * Since the reference count on srcamap is greater than one,
976 * (we checked above), it cannot drop to zero while it is locked.
977 */
978
979 srcamap->am_ref--;
980 KASSERT(srcamap->am_ref > 0);
981
982 if (srcamap->am_ref == 1 && (srcamap->am_flags & AMAP_SHARED) != 0) {
983 srcamap->am_flags &= ~AMAP_SHARED;
984 }
985 #ifdef UVM_AMAP_PPREF
986 if (srcamap->am_ppref && srcamap->am_ppref != PPREF_NONE) {
987 amap_pp_adjref(srcamap, entry->aref.ar_pageoff,
988 len >> PAGE_SHIFT, -1);
989 }
990 #endif
991
992 amap_unlock(srcamap);
993
994 /*
995 * Install new amap.
996 */
997
998 entry->aref.ar_pageoff = 0;
999 entry->aref.ar_amap = amap;
1000 entry->etype &= ~UVM_ET_NEEDSCOPY;
1001 UVMHIST_LOG(maphist, "<- done",0, 0, 0, 0);
1002 }
1003
1004 /*
1005 * amap_cow_now: resolve all copy-on-write faults in an amap now for fork(2)
1006 *
1007 * called during fork(2) when the parent process has a wired map
1008 * entry. in that case we want to avoid write-protecting pages
1009 * in the parent's map (e.g. like what you'd do for a COW page)
1010 * so we resolve the COW here.
1011 *
1012 * => assume parent's entry was wired, thus all pages are resident.
1013 * => assume pages that are loaned out (loan_count) are already mapped
1014 * read-only in all maps, and thus no need for us to worry about them
1015 * => assume both parent and child vm_map's are locked
1016 * => caller passes child's map/entry in to us
1017 * => if we run out of memory we will unlock the amap and sleep _with_ the
1018 * parent and child vm_map's locked(!). we have to do this since
1019 * we are in the middle of a fork(2) and we can't let the parent
1020 * map change until we are done copying all the map entrys.
1021 * => XXXCDC: out of memory should cause fork to fail, but there is
1022 * currently no easy way to do this (needs fix)
1023 */
1024
1025 void
amap_cow_now(struct vm_map * map,struct vm_map_entry * entry)1026 amap_cow_now(struct vm_map *map, struct vm_map_entry *entry)
1027 {
1028 struct vm_amap *amap = entry->aref.ar_amap;
1029 struct vm_anon *anon, *nanon;
1030 struct vm_page *pg, *npg;
1031 u_int lcv, slot;
1032
1033 /*
1034 * note that if we unlock the amap then we must ReStart the "lcv" for
1035 * loop because some other process could reorder the anon's in the
1036 * am_anon[] array on us while the lock is dropped.
1037 */
1038
1039 ReStart:
1040 amap_lock(amap, RW_WRITER);
1041 for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
1042 slot = amap->am_slots[lcv];
1043 anon = amap->am_anon[slot];
1044 KASSERT(anon->an_lock == amap->am_lock);
1045
1046 /*
1047 * If anon has only one reference - we must have already
1048 * copied it. This can happen if we needed to sleep waiting
1049 * for memory in a previous run through this loop. The new
1050 * page might even have been paged out, since is not wired.
1051 */
1052
1053 if (anon->an_ref == 1) {
1054 KASSERT(anon->an_page != NULL || anon->an_swslot != 0);
1055 continue;
1056 }
1057
1058 /*
1059 * The old page must be resident since the parent is wired.
1060 */
1061
1062 pg = anon->an_page;
1063 KASSERT(pg != NULL);
1064 KASSERT(pg->wire_count > 0);
1065
1066 /*
1067 * If the page is loaned then it must already be mapped
1068 * read-only and we don't need to copy it.
1069 */
1070
1071 if (pg->loan_count != 0) {
1072 continue;
1073 }
1074 KASSERT(pg->uanon == anon);
1075 KASSERT(pg->uobject == NULL);
1076
1077 /*
1078 * If the page is busy, then we have to unlock, wait for
1079 * it and then restart.
1080 */
1081
1082 if (pg->flags & PG_BUSY) {
1083 uvm_pagewait(pg, amap->am_lock, "cownow");
1084 goto ReStart;
1085 }
1086
1087 /*
1088 * Perform a copy-on-write.
1089 * First - get a new anon and a page.
1090 */
1091
1092 nanon = uvm_analloc();
1093 if (nanon) {
1094 nanon->an_lock = amap->am_lock;
1095 npg = uvm_pagealloc(NULL, 0, nanon, 0);
1096 } else {
1097 npg = NULL;
1098 }
1099 if (nanon == NULL || npg == NULL) {
1100 amap_unlock(amap);
1101 if (nanon) {
1102 nanon->an_lock = NULL;
1103 nanon->an_ref--;
1104 KASSERT(nanon->an_ref == 0);
1105 uvm_anfree(nanon);
1106 }
1107 uvm_wait("cownowpage");
1108 goto ReStart;
1109 }
1110
1111 /*
1112 * Copy the data and replace anon with the new one.
1113 * Also, setup its lock (share the with amap's lock).
1114 */
1115
1116 uvm_pagecopy(pg, npg);
1117 anon->an_ref--;
1118 KASSERT(anon->an_ref > 0);
1119 amap->am_anon[slot] = nanon;
1120
1121 /*
1122 * Drop PG_BUSY on new page. Since its owner was write
1123 * locked all this time - it cannot be PG_RELEASED or
1124 * waited on.
1125 */
1126 uvm_pagelock(npg);
1127 uvm_pageactivate(npg);
1128 uvm_pageunlock(npg);
1129 npg->flags &= ~(PG_BUSY|PG_FAKE);
1130 UVM_PAGE_OWN(npg, NULL);
1131 }
1132 amap_unlock(amap);
1133 }
1134
1135 /*
1136 * amap_splitref: split a single reference into two separate references
1137 *
1138 * => called from uvm_map's clip routines
1139 * => origref's map should be locked
1140 * => origref->ar_amap should be unlocked (we will lock)
1141 */
1142 void
amap_splitref(struct vm_aref * origref,struct vm_aref * splitref,vaddr_t offset)1143 amap_splitref(struct vm_aref *origref, struct vm_aref *splitref, vaddr_t offset)
1144 {
1145 struct vm_amap *amap = origref->ar_amap;
1146 u_int leftslots;
1147
1148 KASSERT(splitref->ar_amap == origref->ar_amap);
1149 AMAP_B2SLOT(leftslots, offset);
1150 KASSERT(leftslots != 0);
1151
1152 amap_lock(amap, RW_WRITER);
1153 KASSERT(amap->am_nslot - origref->ar_pageoff - leftslots > 0);
1154
1155 #ifdef UVM_AMAP_PPREF
1156 /* Establish ppref before we add a duplicate reference to the amap. */
1157 if (amap->am_ppref == NULL) {
1158 amap_pp_establish(amap, origref->ar_pageoff);
1159 }
1160 #endif
1161 /* Note: not a share reference. */
1162 amap->am_ref++;
1163 splitref->ar_pageoff = origref->ar_pageoff + leftslots;
1164 amap_unlock(amap);
1165 }
1166
1167 #ifdef UVM_AMAP_PPREF
1168
1169 /*
1170 * amap_pp_establish: add a ppref array to an amap, if possible.
1171 *
1172 * => amap should be locked by caller.
1173 */
1174 void
amap_pp_establish(struct vm_amap * amap,vaddr_t offset)1175 amap_pp_establish(struct vm_amap *amap, vaddr_t offset)
1176 {
1177 const size_t sz = amap->am_maxslot * sizeof(*amap->am_ppref);
1178
1179 KASSERT(rw_write_held(amap->am_lock));
1180
1181 amap->am_ppref = kmem_zalloc(sz, KM_NOSLEEP);
1182 if (amap->am_ppref == NULL) {
1183 /* Failure - just do not use ppref. */
1184 amap->am_ppref = PPREF_NONE;
1185 return;
1186 }
1187 pp_setreflen(amap->am_ppref, 0, 0, offset);
1188 pp_setreflen(amap->am_ppref, offset, amap->am_ref,
1189 amap->am_nslot - offset);
1190 }
1191
1192 /*
1193 * amap_pp_adjref: adjust reference count to a part of an amap using the
1194 * per-page reference count array.
1195 *
1196 * => caller must check that ppref != PPREF_NONE before calling.
1197 * => map and amap must be locked.
1198 */
1199 void
amap_pp_adjref(struct vm_amap * amap,int curslot,vsize_t slotlen,int adjval)1200 amap_pp_adjref(struct vm_amap *amap, int curslot, vsize_t slotlen, int adjval)
1201 {
1202 int stopslot, *ppref, lcv, prevlcv;
1203 int ref, len, prevref, prevlen;
1204
1205 KASSERT(rw_write_held(amap->am_lock));
1206
1207 stopslot = curslot + slotlen;
1208 ppref = amap->am_ppref;
1209 prevlcv = 0;
1210
1211 /*
1212 * Advance to the correct place in the array, fragment if needed.
1213 */
1214
1215 for (lcv = 0 ; lcv < curslot ; lcv += len) {
1216 pp_getreflen(ppref, lcv, &ref, &len);
1217 if (lcv + len > curslot) { /* goes past start? */
1218 pp_setreflen(ppref, lcv, ref, curslot - lcv);
1219 pp_setreflen(ppref, curslot, ref, len - (curslot -lcv));
1220 len = curslot - lcv; /* new length of entry @ lcv */
1221 }
1222 prevlcv = lcv;
1223 }
1224 if (lcv == 0) {
1225 /*
1226 * Ensure that the "prevref == ref" test below always
1227 * fails, since we are starting from the beginning of
1228 * the ppref array; that is, there is no previous chunk.
1229 */
1230 prevref = -1;
1231 prevlen = 0;
1232 } else {
1233 pp_getreflen(ppref, prevlcv, &prevref, &prevlen);
1234 }
1235
1236 /*
1237 * Now adjust reference counts in range. Merge the first
1238 * changed entry with the last unchanged entry if possible.
1239 */
1240 KASSERT(lcv == curslot);
1241 for (/* lcv already set */; lcv < stopslot ; lcv += len) {
1242 pp_getreflen(ppref, lcv, &ref, &len);
1243 if (lcv + len > stopslot) { /* goes past end? */
1244 pp_setreflen(ppref, lcv, ref, stopslot - lcv);
1245 pp_setreflen(ppref, stopslot, ref,
1246 len - (stopslot - lcv));
1247 len = stopslot - lcv;
1248 }
1249 ref += adjval;
1250 KASSERT(ref >= 0);
1251 KASSERT(ref <= amap->am_ref);
1252 if (lcv == prevlcv + prevlen && ref == prevref) {
1253 pp_setreflen(ppref, prevlcv, ref, prevlen + len);
1254 } else {
1255 pp_setreflen(ppref, lcv, ref, len);
1256 }
1257 if (ref == 0) {
1258 amap_wiperange(amap, lcv, len);
1259 }
1260 }
1261 }
1262
1263 /*
1264 * amap_wiperange: wipe out a range of an amap.
1265 * Note: different from amap_wipeout because the amap is kept intact.
1266 *
1267 * => Both map and amap must be locked by caller.
1268 */
1269 void
amap_wiperange(struct vm_amap * amap,int slotoff,int slots)1270 amap_wiperange(struct vm_amap *amap, int slotoff, int slots)
1271 {
1272 u_int lcv, stop, slotend;
1273 bool byanon;
1274
1275 KASSERT(rw_write_held(amap->am_lock));
1276
1277 /*
1278 * We can either traverse the amap by am_anon or by am_slots.
1279 * Determine which way is less expensive.
1280 */
1281
1282 if (slots < amap->am_nused) {
1283 byanon = true;
1284 lcv = slotoff;
1285 stop = slotoff + slots;
1286 slotend = 0;
1287 } else {
1288 byanon = false;
1289 lcv = 0;
1290 stop = amap->am_nused;
1291 slotend = slotoff + slots;
1292 }
1293
1294 while (lcv < stop) {
1295 struct vm_anon *anon;
1296 u_int curslot, ptr, last;
1297
1298 if (byanon) {
1299 curslot = lcv++; /* lcv advances here */
1300 if (amap->am_anon[curslot] == NULL)
1301 continue;
1302 } else {
1303 curslot = amap->am_slots[lcv];
1304 if (curslot < slotoff || curslot >= slotend) {
1305 lcv++; /* lcv advances here */
1306 continue;
1307 }
1308 stop--; /* drop stop, since anon will be removed */
1309 }
1310 anon = amap->am_anon[curslot];
1311 KASSERT(anon->an_lock == amap->am_lock);
1312
1313 /*
1314 * Remove anon from the amap.
1315 */
1316
1317 amap->am_anon[curslot] = NULL;
1318 ptr = amap->am_bckptr[curslot];
1319 last = amap->am_nused - 1;
1320 if (ptr != last) {
1321 amap->am_slots[ptr] = amap->am_slots[last];
1322 amap->am_bckptr[amap->am_slots[ptr]] = ptr;
1323 }
1324 amap->am_nused--;
1325
1326 /*
1327 * Drop its reference count.
1328 */
1329
1330 KASSERT(anon->an_lock == amap->am_lock);
1331 if (--anon->an_ref == 0) {
1332 uvm_anfree(anon);
1333 }
1334 }
1335 }
1336
1337 #endif
1338
1339 #if defined(VMSWAP)
1340
1341 /*
1342 * amap_swap_off: pagein anonymous pages in amaps and drop swap slots.
1343 *
1344 * => called with swap_syscall_lock held.
1345 * => note that we don't always traverse all anons.
1346 * eg. amaps being wiped out, released anons.
1347 * => return true if failed.
1348 */
1349
1350 bool
amap_swap_off(int startslot,int endslot)1351 amap_swap_off(int startslot, int endslot)
1352 {
1353 struct vm_amap *am;
1354 struct vm_amap *am_next;
1355 struct vm_amap marker_prev;
1356 struct vm_amap marker_next;
1357 bool rv = false;
1358
1359 #if defined(DIAGNOSTIC)
1360 memset(&marker_prev, 0, sizeof(marker_prev));
1361 memset(&marker_next, 0, sizeof(marker_next));
1362 #endif /* defined(DIAGNOSTIC) */
1363
1364 mutex_enter(&amap_list_lock);
1365 for (am = LIST_FIRST(&amap_list); am != NULL && !rv; am = am_next) {
1366 int i;
1367
1368 LIST_INSERT_BEFORE(am, &marker_prev, am_list);
1369 LIST_INSERT_AFTER(am, &marker_next, am_list);
1370
1371 /* amap_list_lock prevents the lock pointer from changing. */
1372 if (!amap_lock_try(am, RW_WRITER)) {
1373 (void)kpause("amapswpo", false, 1, &amap_list_lock);
1374 am_next = LIST_NEXT(&marker_prev, am_list);
1375 if (am_next == &marker_next) {
1376 am_next = LIST_NEXT(am_next, am_list);
1377 } else {
1378 KASSERT(LIST_NEXT(am_next, am_list) ==
1379 &marker_next);
1380 }
1381 LIST_REMOVE(&marker_prev, am_list);
1382 LIST_REMOVE(&marker_next, am_list);
1383 continue;
1384 }
1385
1386 mutex_exit(&amap_list_lock);
1387
1388 /* If am_nused == 0, the amap could be free - careful. */
1389 for (i = 0; i < am->am_nused; i++) {
1390 int slot;
1391 int swslot;
1392 struct vm_anon *anon;
1393
1394 slot = am->am_slots[i];
1395 anon = am->am_anon[slot];
1396 KASSERT(anon->an_lock == am->am_lock);
1397
1398 swslot = anon->an_swslot;
1399 if (swslot < startslot || endslot <= swslot) {
1400 continue;
1401 }
1402
1403 am->am_flags |= AMAP_SWAPOFF;
1404
1405 rv = uvm_anon_pagein(am, anon);
1406 amap_lock(am, RW_WRITER);
1407
1408 am->am_flags &= ~AMAP_SWAPOFF;
1409 if (amap_refs(am) == 0) {
1410 amap_wipeout(am);
1411 am = NULL;
1412 break;
1413 }
1414 if (rv) {
1415 break;
1416 }
1417 i = 0;
1418 }
1419
1420 if (am) {
1421 amap_unlock(am);
1422 }
1423
1424 mutex_enter(&amap_list_lock);
1425 KASSERT(LIST_NEXT(&marker_prev, am_list) == &marker_next ||
1426 LIST_NEXT(LIST_NEXT(&marker_prev, am_list), am_list) ==
1427 &marker_next);
1428 am_next = LIST_NEXT(&marker_next, am_list);
1429 LIST_REMOVE(&marker_prev, am_list);
1430 LIST_REMOVE(&marker_next, am_list);
1431 }
1432 mutex_exit(&amap_list_lock);
1433
1434 return rv;
1435 }
1436
1437 #endif /* defined(VMSWAP) */
1438
1439 /*
1440 * amap_lookup: look up a page in an amap.
1441 *
1442 * => amap should be locked by caller.
1443 */
1444 struct vm_anon *
amap_lookup(struct vm_aref * aref,vaddr_t offset)1445 amap_lookup(struct vm_aref *aref, vaddr_t offset)
1446 {
1447 struct vm_amap *amap = aref->ar_amap;
1448 struct vm_anon *an;
1449 u_int slot;
1450
1451 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
1452 KASSERT(rw_lock_held(amap->am_lock));
1453
1454 AMAP_B2SLOT(slot, offset);
1455 slot += aref->ar_pageoff;
1456 an = amap->am_anon[slot];
1457
1458 UVMHIST_LOG(maphist,
1459 "<- done (amap=%#jx, offset=%#jx, result=%#jx)",
1460 (uintptr_t)amap, offset, (uintptr_t)an, 0);
1461
1462 KASSERT(slot < amap->am_nslot);
1463 KASSERT(an == NULL || an->an_ref != 0);
1464 KASSERT(an == NULL || an->an_lock == amap->am_lock);
1465 return an;
1466 }
1467
1468 /*
1469 * amap_lookups: look up a range of pages in an amap.
1470 *
1471 * => amap should be locked by caller.
1472 */
1473 void
amap_lookups(struct vm_aref * aref,vaddr_t offset,struct vm_anon ** anons,int npages)1474 amap_lookups(struct vm_aref *aref, vaddr_t offset, struct vm_anon **anons,
1475 int npages)
1476 {
1477 struct vm_amap *amap = aref->ar_amap;
1478 u_int slot;
1479
1480 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
1481 KASSERT(rw_lock_held(amap->am_lock));
1482
1483 AMAP_B2SLOT(slot, offset);
1484 slot += aref->ar_pageoff;
1485
1486 UVMHIST_LOG(maphist, " slot=%u, npages=%d, nslot=%d",
1487 slot, npages, amap->am_nslot, 0);
1488
1489 KASSERT((slot + (npages - 1)) < amap->am_nslot);
1490 memcpy(anons, &amap->am_anon[slot], npages * sizeof(struct vm_anon *));
1491
1492 #if defined(DIAGNOSTIC)
1493 for (int i = 0; i < npages; i++) {
1494 struct vm_anon * const an = anons[i];
1495 if (an == NULL) {
1496 continue;
1497 }
1498 KASSERT(an->an_ref != 0);
1499 KASSERT(an->an_lock == amap->am_lock);
1500 }
1501 #endif
1502 UVMHIST_LOG(maphist, "<- done", 0, 0, 0, 0);
1503 }
1504
1505 /*
1506 * amap_add: add (or replace) a page to an amap.
1507 *
1508 * => amap should be locked by caller.
1509 * => anon must have the lock associated with this amap.
1510 */
1511 void
amap_add(struct vm_aref * aref,vaddr_t offset,struct vm_anon * anon,bool replace)1512 amap_add(struct vm_aref *aref, vaddr_t offset, struct vm_anon *anon,
1513 bool replace)
1514 {
1515 struct vm_amap *amap = aref->ar_amap;
1516 u_int slot;
1517
1518 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
1519 KASSERT(rw_write_held(amap->am_lock));
1520 KASSERT(anon->an_lock == amap->am_lock);
1521
1522 AMAP_B2SLOT(slot, offset);
1523 slot += aref->ar_pageoff;
1524 KASSERT(slot < amap->am_nslot);
1525
1526 if (replace) {
1527 struct vm_anon *oanon = amap->am_anon[slot];
1528
1529 KASSERT(oanon != NULL);
1530 if (oanon->an_page && (amap->am_flags & AMAP_SHARED) != 0) {
1531 pmap_page_protect(oanon->an_page, VM_PROT_NONE);
1532 /*
1533 * XXX: suppose page is supposed to be wired somewhere?
1534 */
1535 }
1536 } else {
1537 KASSERT(amap->am_anon[slot] == NULL);
1538 KASSERT(amap->am_nused < amap->am_maxslot);
1539 amap->am_bckptr[slot] = amap->am_nused;
1540 amap->am_slots[amap->am_nused] = slot;
1541 amap->am_nused++;
1542 }
1543 amap->am_anon[slot] = anon;
1544 UVMHIST_LOG(maphist,
1545 "<- done (amap=%#jx, offset=%#x, anon=%#jx, rep=%d)",
1546 (uintptr_t)amap, offset, (uintptr_t)anon, replace);
1547 }
1548
1549 /*
1550 * amap_unadd: remove a page from an amap.
1551 *
1552 * => amap should be locked by caller.
1553 */
1554 void
amap_unadd(struct vm_aref * aref,vaddr_t offset)1555 amap_unadd(struct vm_aref *aref, vaddr_t offset)
1556 {
1557 struct vm_amap *amap = aref->ar_amap;
1558 u_int slot, ptr, last;
1559
1560 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
1561 KASSERT(rw_write_held(amap->am_lock));
1562
1563 AMAP_B2SLOT(slot, offset);
1564 slot += aref->ar_pageoff;
1565 KASSERT(slot < amap->am_nslot);
1566 KASSERT(amap->am_anon[slot] != NULL);
1567 KASSERT(amap->am_anon[slot]->an_lock == amap->am_lock);
1568
1569 amap->am_anon[slot] = NULL;
1570 ptr = amap->am_bckptr[slot];
1571
1572 last = amap->am_nused - 1;
1573 if (ptr != last) {
1574 /* Move the last entry to keep the slots contiguous. */
1575 amap->am_slots[ptr] = amap->am_slots[last];
1576 amap->am_bckptr[amap->am_slots[ptr]] = ptr;
1577 }
1578 amap->am_nused--;
1579 UVMHIST_LOG(maphist, "<- done (amap=%#jx, slot=%#jx)",
1580 (uintptr_t)amap, slot,0, 0);
1581 }
1582
1583 /*
1584 * amap_adjref_anons: adjust the reference count(s) on amap and its anons.
1585 */
1586 static void
amap_adjref_anons(struct vm_amap * amap,vaddr_t offset,vsize_t len,int refv,bool all)1587 amap_adjref_anons(struct vm_amap *amap, vaddr_t offset, vsize_t len,
1588 int refv, bool all)
1589 {
1590
1591 #ifdef UVM_AMAP_PPREF
1592 KASSERT(rw_write_held(amap->am_lock));
1593
1594 /*
1595 * We must establish the ppref array before changing am_ref
1596 * so that the ppref values match the current amap refcount.
1597 */
1598
1599 if (amap->am_ppref == NULL) {
1600 amap_pp_establish(amap, offset);
1601 }
1602 #endif
1603
1604 amap->am_ref += refv;
1605
1606 #ifdef UVM_AMAP_PPREF
1607 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
1608 amap_pp_adjref(amap, offset, len, refv);
1609 }
1610 #endif
1611 amap_unlock(amap);
1612 }
1613
1614 /*
1615 * amap_ref: gain a reference to an amap.
1616 *
1617 * => amap must not be locked (we will lock).
1618 * => "offset" and "len" are in units of pages.
1619 * => Called at fork time to gain the child's reference.
1620 */
1621 void
amap_ref(struct vm_amap * amap,vaddr_t offset,vsize_t len,int flags)1622 amap_ref(struct vm_amap *amap, vaddr_t offset, vsize_t len, int flags)
1623 {
1624 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
1625
1626 amap_lock(amap, RW_WRITER);
1627 if (flags & AMAP_SHARED) {
1628 amap->am_flags |= AMAP_SHARED;
1629 }
1630 amap_adjref_anons(amap, offset, len, 1, (flags & AMAP_REFALL) != 0);
1631
1632 UVMHIST_LOG(maphist,"<- done! amap=%#jx", (uintptr_t)amap, 0, 0, 0);
1633 }
1634
1635 /*
1636 * amap_unref: remove a reference to an amap.
1637 *
1638 * => All pmap-level references to this amap must be already removed.
1639 * => Called from uvm_unmap_detach(); entry is already removed from the map.
1640 * => We will lock amap, so it must be unlocked.
1641 */
1642 void
amap_unref(struct vm_amap * amap,vaddr_t offset,vsize_t len,bool all)1643 amap_unref(struct vm_amap *amap, vaddr_t offset, vsize_t len, bool all)
1644 {
1645 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
1646
1647 amap_lock(amap, RW_WRITER);
1648
1649 UVMHIST_LOG(maphist," amap=%#jx refs=%d, nused=%d",
1650 (uintptr_t)amap, amap->am_ref, amap->am_nused, 0);
1651 KASSERT(amap->am_ref > 0);
1652
1653 if (amap->am_ref == 1) {
1654
1655 /*
1656 * If the last reference - wipeout and destroy the amap.
1657 */
1658 amap->am_ref--;
1659 amap_wipeout(amap);
1660 UVMHIST_LOG(maphist,"<- done (was last ref)!", 0, 0, 0, 0);
1661 return;
1662 }
1663
1664 /*
1665 * Otherwise, drop the reference count(s) on anons.
1666 */
1667
1668 if (amap->am_ref == 2 && (amap->am_flags & AMAP_SHARED) != 0) {
1669 amap->am_flags &= ~AMAP_SHARED;
1670 }
1671 amap_adjref_anons(amap, offset, len, -1, all);
1672
1673 UVMHIST_LOG(maphist,"<- done!", 0, 0, 0, 0);
1674 }
1675