1 /* $OpenBSD: uvm_aobj.c,v 1.110 2024/04/13 23:44:11 jsg Exp $ */
2 /* $NetBSD: uvm_aobj.c,v 1.39 2001/02/18 21:19:08 chs Exp $ */
3
4 /*
5 * Copyright (c) 1998 Chuck Silvers, Charles D. Cranor and
6 * Washington University.
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 *
29 * from: Id: uvm_aobj.c,v 1.1.2.5 1998/02/06 05:14:38 chs Exp
30 */
31 /*
32 * uvm_aobj.c: anonymous memory uvm_object pager
33 *
34 * author: Chuck Silvers <chuq@chuq.com>
35 * started: Jan-1998
36 *
37 * - design mostly from Chuck Cranor
38 */
39
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/malloc.h>
43 #include <sys/kernel.h>
44 #include <sys/pool.h>
45 #include <sys/stdint.h>
46 #include <sys/atomic.h>
47
48 #include <uvm/uvm.h>
49
50 /*
51 * An anonymous UVM object (aobj) manages anonymous-memory. In addition to
52 * keeping the list of resident pages, it may also keep a list of allocated
53 * swap blocks. Depending on the size of the object, this list is either
54 * stored in an array (small objects) or in a hash table (large objects).
55 */
56
57 /*
58 * Note: for hash tables, we break the address space of the aobj into blocks
59 * of UAO_SWHASH_CLUSTER_SIZE pages, which shall be a power of two.
60 */
61 #define UAO_SWHASH_CLUSTER_SHIFT 4
62 #define UAO_SWHASH_CLUSTER_SIZE (1 << UAO_SWHASH_CLUSTER_SHIFT)
63
64 /* Get the "tag" for this page index. */
65 #define UAO_SWHASH_ELT_TAG(idx) ((idx) >> UAO_SWHASH_CLUSTER_SHIFT)
66 #define UAO_SWHASH_ELT_PAGESLOT_IDX(idx) \
67 ((idx) & (UAO_SWHASH_CLUSTER_SIZE - 1))
68
69 /* Given an ELT and a page index, find the swap slot. */
70 #define UAO_SWHASH_ELT_PAGESLOT(elt, idx) \
71 ((elt)->slots[UAO_SWHASH_ELT_PAGESLOT_IDX(idx)])
72
73 /* Given an ELT, return its pageidx base. */
74 #define UAO_SWHASH_ELT_PAGEIDX_BASE(elt) \
75 ((elt)->tag << UAO_SWHASH_CLUSTER_SHIFT)
76
77 /* The hash function. */
78 #define UAO_SWHASH_HASH(aobj, idx) \
79 (&(aobj)->u_swhash[(((idx) >> UAO_SWHASH_CLUSTER_SHIFT) \
80 & (aobj)->u_swhashmask)])
81
82 /*
83 * The threshold which determines whether we will use an array or a
84 * hash table to store the list of allocated swap blocks.
85 */
86 #define UAO_SWHASH_THRESHOLD (UAO_SWHASH_CLUSTER_SIZE * 4)
87 #define UAO_USES_SWHASH(aobj) \
88 ((aobj)->u_pages > UAO_SWHASH_THRESHOLD)
89
90 /* The number of buckets in a hash, with an upper bound. */
91 #define UAO_SWHASH_MAXBUCKETS 256
92 #define UAO_SWHASH_BUCKETS(pages) \
93 (min((pages) >> UAO_SWHASH_CLUSTER_SHIFT, UAO_SWHASH_MAXBUCKETS))
94
95
96 /*
97 * uao_swhash_elt: when a hash table is being used, this structure defines
98 * the format of an entry in the bucket list.
99 */
100 struct uao_swhash_elt {
101 LIST_ENTRY(uao_swhash_elt) list; /* the hash list */
102 voff_t tag; /* our 'tag' */
103 int count; /* our number of active slots */
104 int slots[UAO_SWHASH_CLUSTER_SIZE]; /* the slots */
105 };
106
107 /*
108 * uao_swhash: the swap hash table structure
109 */
110 LIST_HEAD(uao_swhash, uao_swhash_elt);
111
112 /*
113 * uao_swhash_elt_pool: pool of uao_swhash_elt structures
114 */
115 struct pool uao_swhash_elt_pool;
116
117 /*
118 * uvm_aobj: the actual anon-backed uvm_object
119 *
120 * => the uvm_object is at the top of the structure, this allows
121 * (struct uvm_aobj *) == (struct uvm_object *)
122 * => only one of u_swslots and u_swhash is used in any given aobj
123 */
124 struct uvm_aobj {
125 struct uvm_object u_obj; /* has: pgops, memt, #pages, #refs */
126 int u_pages; /* number of pages in entire object */
127 int u_flags; /* the flags (see uvm_aobj.h) */
128 /*
129 * Either an array or hashtable (array of bucket heads) of
130 * offset -> swapslot mappings for the aobj.
131 */
132 #define u_swslots u_swap.slot_array
133 #define u_swhash u_swap.slot_hash
134 union swslots {
135 int *slot_array;
136 struct uao_swhash *slot_hash;
137 } u_swap;
138 u_long u_swhashmask; /* mask for hashtable */
139 LIST_ENTRY(uvm_aobj) u_list; /* global list of aobjs */
140 };
141
142 struct pool uvm_aobj_pool;
143
144 static struct uao_swhash_elt *uao_find_swhash_elt(struct uvm_aobj *, int,
145 boolean_t);
146 static boolean_t uao_flush(struct uvm_object *, voff_t,
147 voff_t, int);
148 static void uao_free(struct uvm_aobj *);
149 static int uao_get(struct uvm_object *, voff_t,
150 vm_page_t *, int *, int, vm_prot_t,
151 int, int);
152 static boolean_t uao_pagein(struct uvm_aobj *, int, int);
153 static boolean_t uao_pagein_page(struct uvm_aobj *, int);
154
155 void uao_dropswap_range(struct uvm_object *, voff_t, voff_t);
156 void uao_shrink_flush(struct uvm_object *, int, int);
157 int uao_shrink_hash(struct uvm_object *, int);
158 int uao_shrink_array(struct uvm_object *, int);
159 int uao_shrink_convert(struct uvm_object *, int);
160
161 int uao_grow_hash(struct uvm_object *, int);
162 int uao_grow_array(struct uvm_object *, int);
163 int uao_grow_convert(struct uvm_object *, int);
164
165 /*
166 * aobj_pager
167 *
168 * note that some functions (e.g. put) are handled elsewhere
169 */
170 const struct uvm_pagerops aobj_pager = {
171 .pgo_reference = uao_reference,
172 .pgo_detach = uao_detach,
173 .pgo_flush = uao_flush,
174 .pgo_get = uao_get,
175 };
176
177 /*
178 * uao_list: global list of active aobjs, locked by uao_list_lock
179 *
180 * Lock ordering: generally the locking order is object lock, then list lock.
181 * in the case of swap off we have to iterate over the list, and thus the
182 * ordering is reversed. In that case we must use trylocking to prevent
183 * deadlock.
184 */
185 static LIST_HEAD(aobjlist, uvm_aobj) uao_list = LIST_HEAD_INITIALIZER(uao_list);
186 static struct mutex uao_list_lock = MUTEX_INITIALIZER(IPL_MPFLOOR);
187
188
189 /*
190 * functions
191 */
192 /*
193 * hash table/array related functions
194 */
195 /*
196 * uao_find_swhash_elt: find (or create) a hash table entry for a page
197 * offset.
198 */
199 static struct uao_swhash_elt *
uao_find_swhash_elt(struct uvm_aobj * aobj,int pageidx,boolean_t create)200 uao_find_swhash_elt(struct uvm_aobj *aobj, int pageidx, boolean_t create)
201 {
202 struct uao_swhash *swhash;
203 struct uao_swhash_elt *elt;
204 voff_t page_tag;
205
206 swhash = UAO_SWHASH_HASH(aobj, pageidx); /* first hash to get bucket */
207 page_tag = UAO_SWHASH_ELT_TAG(pageidx); /* tag to search for */
208
209 /*
210 * now search the bucket for the requested tag
211 */
212 LIST_FOREACH(elt, swhash, list) {
213 if (elt->tag == page_tag)
214 return elt;
215 }
216
217 if (!create)
218 return NULL;
219
220 /*
221 * allocate a new entry for the bucket and init/insert it in
222 */
223 elt = pool_get(&uao_swhash_elt_pool, PR_NOWAIT | PR_ZERO);
224 /*
225 * XXX We cannot sleep here as the hash table might disappear
226 * from under our feet. And we run the risk of deadlocking
227 * the pagedeamon. In fact this code will only be called by
228 * the pagedaemon and allocation will only fail if we
229 * exhausted the pagedeamon reserve. In that case we're
230 * doomed anyway, so panic.
231 */
232 if (elt == NULL)
233 panic("%s: can't allocate entry", __func__);
234 LIST_INSERT_HEAD(swhash, elt, list);
235 elt->tag = page_tag;
236
237 return elt;
238 }
239
240 /*
241 * uao_find_swslot: find the swap slot number for an aobj/pageidx
242 */
243 int
uao_find_swslot(struct uvm_object * uobj,int pageidx)244 uao_find_swslot(struct uvm_object *uobj, int pageidx)
245 {
246 struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
247
248 KASSERT(UVM_OBJ_IS_AOBJ(uobj));
249
250 /*
251 * if noswap flag is set, then we never return a slot
252 */
253 if (aobj->u_flags & UAO_FLAG_NOSWAP)
254 return 0;
255
256 /*
257 * if hashing, look in hash table.
258 */
259 if (UAO_USES_SWHASH(aobj)) {
260 struct uao_swhash_elt *elt =
261 uao_find_swhash_elt(aobj, pageidx, FALSE);
262
263 if (elt)
264 return UAO_SWHASH_ELT_PAGESLOT(elt, pageidx);
265 else
266 return 0;
267 }
268
269 /*
270 * otherwise, look in the array
271 */
272 return aobj->u_swslots[pageidx];
273 }
274
275 /*
276 * uao_set_swslot: set the swap slot for a page in an aobj.
277 *
278 * => setting a slot to zero frees the slot
279 * => object must be locked by caller
280 * => we return the old slot number, or -1 if we failed to allocate
281 * memory to record the new slot number
282 */
283 int
uao_set_swslot(struct uvm_object * uobj,int pageidx,int slot)284 uao_set_swslot(struct uvm_object *uobj, int pageidx, int slot)
285 {
286 struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
287 int oldslot;
288
289 KASSERT(rw_write_held(uobj->vmobjlock) || uobj->uo_refs == 0);
290 KASSERT(UVM_OBJ_IS_AOBJ(uobj));
291
292 /*
293 * if noswap flag is set, then we can't set a slot
294 */
295 if (aobj->u_flags & UAO_FLAG_NOSWAP) {
296 if (slot == 0)
297 return 0; /* a clear is ok */
298
299 /* but a set is not */
300 printf("uao_set_swslot: uobj = %p\n", uobj);
301 panic("uao_set_swslot: attempt to set a slot on a NOSWAP object");
302 }
303
304 /*
305 * are we using a hash table? if so, add it in the hash.
306 */
307 if (UAO_USES_SWHASH(aobj)) {
308 /*
309 * Avoid allocating an entry just to free it again if
310 * the page had not swap slot in the first place, and
311 * we are freeing.
312 */
313 struct uao_swhash_elt *elt =
314 uao_find_swhash_elt(aobj, pageidx, slot ? TRUE : FALSE);
315 if (elt == NULL) {
316 KASSERT(slot == 0);
317 return 0;
318 }
319
320 oldslot = UAO_SWHASH_ELT_PAGESLOT(elt, pageidx);
321 UAO_SWHASH_ELT_PAGESLOT(elt, pageidx) = slot;
322
323 /*
324 * now adjust the elt's reference counter and free it if we've
325 * dropped it to zero.
326 */
327 if (slot) {
328 if (oldslot == 0)
329 elt->count++;
330 } else {
331 if (oldslot)
332 elt->count--;
333
334 if (elt->count == 0) {
335 LIST_REMOVE(elt, list);
336 pool_put(&uao_swhash_elt_pool, elt);
337 }
338 }
339 } else {
340 /* we are using an array */
341 oldslot = aobj->u_swslots[pageidx];
342 aobj->u_swslots[pageidx] = slot;
343 }
344 return oldslot;
345 }
346 /*
347 * end of hash/array functions
348 */
349
350 /*
351 * uao_free: free all resources held by an aobj, and then free the aobj
352 *
353 * => the aobj should be dead
354 */
355 static void
uao_free(struct uvm_aobj * aobj)356 uao_free(struct uvm_aobj *aobj)
357 {
358 struct uvm_object *uobj = &aobj->u_obj;
359
360 KASSERT(UVM_OBJ_IS_AOBJ(uobj));
361 KASSERT(rw_write_held(uobj->vmobjlock));
362 uao_dropswap_range(uobj, 0, 0);
363 rw_exit(uobj->vmobjlock);
364
365 if (UAO_USES_SWHASH(aobj)) {
366 /*
367 * free the hash table itself.
368 */
369 hashfree(aobj->u_swhash, UAO_SWHASH_BUCKETS(aobj->u_pages), M_UVMAOBJ);
370 } else {
371 free(aobj->u_swslots, M_UVMAOBJ, aobj->u_pages * sizeof(int));
372 }
373
374 /*
375 * finally free the aobj itself
376 */
377 uvm_obj_destroy(uobj);
378 pool_put(&uvm_aobj_pool, aobj);
379 }
380
381 /*
382 * pager functions
383 */
384
385 #ifdef TMPFS
386 /*
387 * Shrink an aobj to a given number of pages. The procedure is always the same:
388 * assess the necessity of data structure conversion (hash to array), secure
389 * resources, flush pages and drop swap slots.
390 *
391 */
392
393 void
uao_shrink_flush(struct uvm_object * uobj,int startpg,int endpg)394 uao_shrink_flush(struct uvm_object *uobj, int startpg, int endpg)
395 {
396 KASSERT(startpg < endpg);
397 KASSERT(uobj->uo_refs == 1);
398 uao_flush(uobj, (voff_t)startpg << PAGE_SHIFT,
399 (voff_t)endpg << PAGE_SHIFT, PGO_FREE);
400 uao_dropswap_range(uobj, startpg, endpg);
401 }
402
403 int
uao_shrink_hash(struct uvm_object * uobj,int pages)404 uao_shrink_hash(struct uvm_object *uobj, int pages)
405 {
406 struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
407 struct uao_swhash *new_swhash;
408 struct uao_swhash_elt *elt;
409 unsigned long new_hashmask;
410 int i;
411
412 KASSERT(UAO_USES_SWHASH(aobj));
413
414 /*
415 * If the size of the hash table doesn't change, all we need to do is
416 * to adjust the page count.
417 */
418 if (UAO_SWHASH_BUCKETS(aobj->u_pages) == UAO_SWHASH_BUCKETS(pages)) {
419 uao_shrink_flush(uobj, pages, aobj->u_pages);
420 aobj->u_pages = pages;
421 return 0;
422 }
423
424 new_swhash = hashinit(UAO_SWHASH_BUCKETS(pages), M_UVMAOBJ,
425 M_WAITOK | M_CANFAIL, &new_hashmask);
426 if (new_swhash == NULL)
427 return ENOMEM;
428
429 uao_shrink_flush(uobj, pages, aobj->u_pages);
430
431 /*
432 * Even though the hash table size is changing, the hash of the buckets
433 * we are interested in copying should not change.
434 */
435 for (i = 0; i < UAO_SWHASH_BUCKETS(aobj->u_pages); i++) {
436 while (LIST_EMPTY(&aobj->u_swhash[i]) == 0) {
437 elt = LIST_FIRST(&aobj->u_swhash[i]);
438 LIST_REMOVE(elt, list);
439 LIST_INSERT_HEAD(&new_swhash[i], elt, list);
440 }
441 }
442
443 hashfree(aobj->u_swhash, UAO_SWHASH_BUCKETS(aobj->u_pages), M_UVMAOBJ);
444
445 aobj->u_swhash = new_swhash;
446 aobj->u_pages = pages;
447 aobj->u_swhashmask = new_hashmask;
448
449 return 0;
450 }
451
452 int
uao_shrink_convert(struct uvm_object * uobj,int pages)453 uao_shrink_convert(struct uvm_object *uobj, int pages)
454 {
455 struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
456 struct uao_swhash_elt *elt;
457 int i, *new_swslots;
458
459 new_swslots = mallocarray(pages, sizeof(int), M_UVMAOBJ,
460 M_WAITOK | M_CANFAIL | M_ZERO);
461 if (new_swslots == NULL)
462 return ENOMEM;
463
464 uao_shrink_flush(uobj, pages, aobj->u_pages);
465
466 /* Convert swap slots from hash to array. */
467 for (i = 0; i < pages; i++) {
468 elt = uao_find_swhash_elt(aobj, i, FALSE);
469 if (elt != NULL) {
470 new_swslots[i] = UAO_SWHASH_ELT_PAGESLOT(elt, i);
471 if (new_swslots[i] != 0)
472 elt->count--;
473 if (elt->count == 0) {
474 LIST_REMOVE(elt, list);
475 pool_put(&uao_swhash_elt_pool, elt);
476 }
477 }
478 }
479
480 hashfree(aobj->u_swhash, UAO_SWHASH_BUCKETS(aobj->u_pages), M_UVMAOBJ);
481
482 aobj->u_swslots = new_swslots;
483 aobj->u_pages = pages;
484
485 return 0;
486 }
487
488 int
uao_shrink_array(struct uvm_object * uobj,int pages)489 uao_shrink_array(struct uvm_object *uobj, int pages)
490 {
491 struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
492 int i, *new_swslots;
493
494 new_swslots = mallocarray(pages, sizeof(int), M_UVMAOBJ,
495 M_WAITOK | M_CANFAIL | M_ZERO);
496 if (new_swslots == NULL)
497 return ENOMEM;
498
499 uao_shrink_flush(uobj, pages, aobj->u_pages);
500
501 for (i = 0; i < pages; i++)
502 new_swslots[i] = aobj->u_swslots[i];
503
504 free(aobj->u_swslots, M_UVMAOBJ, aobj->u_pages * sizeof(int));
505
506 aobj->u_swslots = new_swslots;
507 aobj->u_pages = pages;
508
509 return 0;
510 }
511
512 int
uao_shrink(struct uvm_object * uobj,int pages)513 uao_shrink(struct uvm_object *uobj, int pages)
514 {
515 struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
516
517 KASSERT(pages < aobj->u_pages);
518
519 /*
520 * Distinguish between three possible cases:
521 * 1. aobj uses hash and must be converted to array.
522 * 2. aobj uses array and array size needs to be adjusted.
523 * 3. aobj uses hash and hash size needs to be adjusted.
524 */
525 if (pages > UAO_SWHASH_THRESHOLD)
526 return uao_shrink_hash(uobj, pages); /* case 3 */
527 else if (aobj->u_pages > UAO_SWHASH_THRESHOLD)
528 return uao_shrink_convert(uobj, pages); /* case 1 */
529 else
530 return uao_shrink_array(uobj, pages); /* case 2 */
531 }
532
533 /*
534 * Grow an aobj to a given number of pages. Right now we only adjust the swap
535 * slots. We could additionally handle page allocation directly, so that they
536 * don't happen through uvm_fault(). That would allow us to use another
537 * mechanism for the swap slots other than malloc(). It is thus mandatory that
538 * the caller of these functions does not allow faults to happen in case of
539 * growth error.
540 */
541 int
uao_grow_array(struct uvm_object * uobj,int pages)542 uao_grow_array(struct uvm_object *uobj, int pages)
543 {
544 struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
545 int i, *new_swslots;
546
547 KASSERT(aobj->u_pages <= UAO_SWHASH_THRESHOLD);
548
549 new_swslots = mallocarray(pages, sizeof(int), M_UVMAOBJ,
550 M_WAITOK | M_CANFAIL | M_ZERO);
551 if (new_swslots == NULL)
552 return ENOMEM;
553
554 for (i = 0; i < aobj->u_pages; i++)
555 new_swslots[i] = aobj->u_swslots[i];
556
557 free(aobj->u_swslots, M_UVMAOBJ, aobj->u_pages * sizeof(int));
558
559 aobj->u_swslots = new_swslots;
560 aobj->u_pages = pages;
561
562 return 0;
563 }
564
565 int
uao_grow_hash(struct uvm_object * uobj,int pages)566 uao_grow_hash(struct uvm_object *uobj, int pages)
567 {
568 struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
569 struct uao_swhash *new_swhash;
570 struct uao_swhash_elt *elt;
571 unsigned long new_hashmask;
572 int i;
573
574 KASSERT(pages > UAO_SWHASH_THRESHOLD);
575
576 /*
577 * If the size of the hash table doesn't change, all we need to do is
578 * to adjust the page count.
579 */
580 if (UAO_SWHASH_BUCKETS(aobj->u_pages) == UAO_SWHASH_BUCKETS(pages)) {
581 aobj->u_pages = pages;
582 return 0;
583 }
584
585 KASSERT(UAO_SWHASH_BUCKETS(aobj->u_pages) < UAO_SWHASH_BUCKETS(pages));
586
587 new_swhash = hashinit(UAO_SWHASH_BUCKETS(pages), M_UVMAOBJ,
588 M_WAITOK | M_CANFAIL, &new_hashmask);
589 if (new_swhash == NULL)
590 return ENOMEM;
591
592 for (i = 0; i < UAO_SWHASH_BUCKETS(aobj->u_pages); i++) {
593 while (LIST_EMPTY(&aobj->u_swhash[i]) == 0) {
594 elt = LIST_FIRST(&aobj->u_swhash[i]);
595 LIST_REMOVE(elt, list);
596 LIST_INSERT_HEAD(&new_swhash[i], elt, list);
597 }
598 }
599
600 hashfree(aobj->u_swhash, UAO_SWHASH_BUCKETS(aobj->u_pages), M_UVMAOBJ);
601
602 aobj->u_swhash = new_swhash;
603 aobj->u_pages = pages;
604 aobj->u_swhashmask = new_hashmask;
605
606 return 0;
607 }
608
609 int
uao_grow_convert(struct uvm_object * uobj,int pages)610 uao_grow_convert(struct uvm_object *uobj, int pages)
611 {
612 struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
613 struct uao_swhash *new_swhash;
614 struct uao_swhash_elt *elt;
615 unsigned long new_hashmask;
616 int i, *old_swslots;
617
618 new_swhash = hashinit(UAO_SWHASH_BUCKETS(pages), M_UVMAOBJ,
619 M_WAITOK | M_CANFAIL, &new_hashmask);
620 if (new_swhash == NULL)
621 return ENOMEM;
622
623 /* Set these now, so we can use uao_find_swhash_elt(). */
624 old_swslots = aobj->u_swslots;
625 aobj->u_swhash = new_swhash;
626 aobj->u_swhashmask = new_hashmask;
627
628 for (i = 0; i < aobj->u_pages; i++) {
629 if (old_swslots[i] != 0) {
630 elt = uao_find_swhash_elt(aobj, i, TRUE);
631 elt->count++;
632 UAO_SWHASH_ELT_PAGESLOT(elt, i) = old_swslots[i];
633 }
634 }
635
636 free(old_swslots, M_UVMAOBJ, aobj->u_pages * sizeof(int));
637 aobj->u_pages = pages;
638
639 return 0;
640 }
641
642 int
uao_grow(struct uvm_object * uobj,int pages)643 uao_grow(struct uvm_object *uobj, int pages)
644 {
645 struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
646
647 KASSERT(pages > aobj->u_pages);
648
649 /*
650 * Distinguish between three possible cases:
651 * 1. aobj uses hash and hash size needs to be adjusted.
652 * 2. aobj uses array and array size needs to be adjusted.
653 * 3. aobj uses array and must be converted to hash.
654 */
655 if (pages <= UAO_SWHASH_THRESHOLD)
656 return uao_grow_array(uobj, pages); /* case 2 */
657 else if (aobj->u_pages > UAO_SWHASH_THRESHOLD)
658 return uao_grow_hash(uobj, pages); /* case 1 */
659 else
660 return uao_grow_convert(uobj, pages);
661 }
662 #endif /* TMPFS */
663
664 /*
665 * uao_create: create an aobj of the given size and return its uvm_object.
666 *
667 * => for normal use, flags are zero or UAO_FLAG_CANFAIL.
668 * => for the kernel object, the flags are:
669 * UAO_FLAG_KERNOBJ - allocate the kernel object (can only happen once)
670 * UAO_FLAG_KERNSWAP - enable swapping of kernel object (" ")
671 */
672 struct uvm_object *
uao_create(vsize_t size,int flags)673 uao_create(vsize_t size, int flags)
674 {
675 static struct uvm_aobj kernel_object_store;
676 static struct rwlock bootstrap_kernel_object_lock;
677 static int kobj_alloced = 0;
678 int pages = round_page(size) >> PAGE_SHIFT;
679 struct uvm_aobj *aobj;
680 int refs;
681
682 /*
683 * Allocate a new aobj, unless kernel object is requested.
684 */
685 if (flags & UAO_FLAG_KERNOBJ) {
686 KASSERT(!kobj_alloced);
687 aobj = &kernel_object_store;
688 aobj->u_pages = pages;
689 aobj->u_flags = UAO_FLAG_NOSWAP;
690 refs = UVM_OBJ_KERN;
691 kobj_alloced = UAO_FLAG_KERNOBJ;
692 } else if (flags & UAO_FLAG_KERNSWAP) {
693 KASSERT(kobj_alloced == UAO_FLAG_KERNOBJ);
694 aobj = &kernel_object_store;
695 kobj_alloced = UAO_FLAG_KERNSWAP;
696 } else {
697 aobj = pool_get(&uvm_aobj_pool, PR_WAITOK);
698 aobj->u_pages = pages;
699 aobj->u_flags = 0;
700 refs = 1;
701 }
702
703 /*
704 * allocate hash/array if necessary
705 */
706 if (flags == 0 || (flags & (UAO_FLAG_KERNSWAP | UAO_FLAG_CANFAIL))) {
707 int mflags;
708
709 if (flags)
710 mflags = M_NOWAIT;
711 else
712 mflags = M_WAITOK;
713
714 /* allocate hash table or array depending on object size */
715 if (UAO_USES_SWHASH(aobj)) {
716 aobj->u_swhash = hashinit(UAO_SWHASH_BUCKETS(pages),
717 M_UVMAOBJ, mflags, &aobj->u_swhashmask);
718 if (aobj->u_swhash == NULL) {
719 if (flags & UAO_FLAG_CANFAIL) {
720 pool_put(&uvm_aobj_pool, aobj);
721 return NULL;
722 }
723 panic("uao_create: hashinit swhash failed");
724 }
725 } else {
726 aobj->u_swslots = mallocarray(pages, sizeof(int),
727 M_UVMAOBJ, mflags|M_ZERO);
728 if (aobj->u_swslots == NULL) {
729 if (flags & UAO_FLAG_CANFAIL) {
730 pool_put(&uvm_aobj_pool, aobj);
731 return NULL;
732 }
733 panic("uao_create: malloc swslots failed");
734 }
735 }
736
737 if (flags & UAO_FLAG_KERNSWAP) {
738 aobj->u_flags &= ~UAO_FLAG_NOSWAP; /* clear noswap */
739 return &aobj->u_obj;
740 /* done! */
741 }
742 }
743
744 /*
745 * Initialise UVM object.
746 */
747 uvm_obj_init(&aobj->u_obj, &aobj_pager, refs);
748 if (flags & UAO_FLAG_KERNOBJ) {
749 /* Use a temporary static lock for kernel_object. */
750 rw_init(&bootstrap_kernel_object_lock, "kobjlk");
751 uvm_obj_setlock(&aobj->u_obj, &bootstrap_kernel_object_lock);
752 }
753
754 /*
755 * now that aobj is ready, add it to the global list
756 */
757 mtx_enter(&uao_list_lock);
758 LIST_INSERT_HEAD(&uao_list, aobj, u_list);
759 mtx_leave(&uao_list_lock);
760
761 return &aobj->u_obj;
762 }
763
764
765
766 /*
767 * uao_init: set up aobj pager subsystem
768 *
769 * => called at boot time from uvm_pager_init()
770 */
771 void
uao_init(void)772 uao_init(void)
773 {
774 /*
775 * NOTE: Pages for this pool must not come from a pageable
776 * kernel map!
777 */
778 pool_init(&uao_swhash_elt_pool, sizeof(struct uao_swhash_elt), 0,
779 IPL_NONE, PR_WAITOK, "uaoeltpl", NULL);
780 pool_init(&uvm_aobj_pool, sizeof(struct uvm_aobj), 0,
781 IPL_NONE, PR_WAITOK, "aobjpl", NULL);
782 }
783
784 /*
785 * uao_reference: hold a reference to an anonymous UVM object.
786 */
787 void
uao_reference(struct uvm_object * uobj)788 uao_reference(struct uvm_object *uobj)
789 {
790 /* Kernel object is persistent. */
791 if (UVM_OBJ_IS_KERN_OBJECT(uobj))
792 return;
793
794 atomic_inc_int(&uobj->uo_refs);
795 }
796
797
798 /*
799 * uao_detach: drop a reference to an anonymous UVM object.
800 */
801 void
uao_detach(struct uvm_object * uobj)802 uao_detach(struct uvm_object *uobj)
803 {
804 struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
805 struct vm_page *pg;
806
807 /*
808 * Detaching from kernel_object is a NOP.
809 */
810 if (UVM_OBJ_IS_KERN_OBJECT(uobj))
811 return;
812
813 /*
814 * Drop the reference. If it was the last one, destroy the object.
815 */
816 if (atomic_dec_int_nv(&uobj->uo_refs) > 0) {
817 return;
818 }
819
820 /*
821 * Remove the aobj from the global list.
822 */
823 mtx_enter(&uao_list_lock);
824 LIST_REMOVE(aobj, u_list);
825 mtx_leave(&uao_list_lock);
826
827 /*
828 * Free all the pages left in the aobj. For each page, when the
829 * page is no longer busy (and thus after any disk I/O that it is
830 * involved in is complete), release any swap resources and free
831 * the page itself.
832 */
833 rw_enter(uobj->vmobjlock, RW_WRITE);
834 while ((pg = RBT_ROOT(uvm_objtree, &uobj->memt)) != NULL) {
835 pmap_page_protect(pg, PROT_NONE);
836 if (pg->pg_flags & PG_BUSY) {
837 uvm_pagewait(pg, uobj->vmobjlock, "uao_det");
838 rw_enter(uobj->vmobjlock, RW_WRITE);
839 continue;
840 }
841 uao_dropswap(&aobj->u_obj, pg->offset >> PAGE_SHIFT);
842 uvm_lock_pageq();
843 uvm_pagefree(pg);
844 uvm_unlock_pageq();
845 }
846
847 /*
848 * Finally, free the anonymous UVM object itself.
849 */
850 uao_free(aobj);
851 }
852
853 /*
854 * uao_flush: flush pages out of a uvm object
855 *
856 * => if PGO_CLEANIT is not set, then we will not block.
857 * => if PGO_ALLPAGE is set, then all pages in the object are valid targets
858 * for flushing.
859 * => NOTE: we are allowed to lock the page queues, so the caller
860 * must not be holding the lock on them [e.g. pagedaemon had
861 * better not call us with the queues locked]
862 * => we return TRUE unless we encountered some sort of I/O error
863 * XXXJRT currently never happens, as we never directly initiate
864 * XXXJRT I/O
865 */
866 boolean_t
uao_flush(struct uvm_object * uobj,voff_t start,voff_t stop,int flags)867 uao_flush(struct uvm_object *uobj, voff_t start, voff_t stop, int flags)
868 {
869 struct uvm_aobj *aobj = (struct uvm_aobj *) uobj;
870 struct vm_page *pg;
871 voff_t curoff;
872
873 KASSERT(UVM_OBJ_IS_AOBJ(uobj));
874 KASSERT(rw_write_held(uobj->vmobjlock));
875
876 if (flags & PGO_ALLPAGES) {
877 start = 0;
878 stop = (voff_t)aobj->u_pages << PAGE_SHIFT;
879 } else {
880 start = trunc_page(start);
881 stop = round_page(stop);
882 if (stop > ((voff_t)aobj->u_pages << PAGE_SHIFT)) {
883 printf("uao_flush: strange, got an out of range "
884 "flush (fixed)\n");
885 stop = (voff_t)aobj->u_pages << PAGE_SHIFT;
886 }
887 }
888
889 /*
890 * Don't need to do any work here if we're not freeing
891 * or deactivating pages.
892 */
893 if ((flags & (PGO_DEACTIVATE|PGO_FREE)) == 0) {
894 return TRUE;
895 }
896
897 curoff = start;
898 for (;;) {
899 if (curoff < stop) {
900 pg = uvm_pagelookup(uobj, curoff);
901 curoff += PAGE_SIZE;
902 if (pg == NULL)
903 continue;
904 } else {
905 break;
906 }
907
908 /* Make sure page is unbusy, else wait for it. */
909 if (pg->pg_flags & PG_BUSY) {
910 uvm_pagewait(pg, uobj->vmobjlock, "uaoflsh");
911 rw_enter(uobj->vmobjlock, RW_WRITE);
912 curoff -= PAGE_SIZE;
913 continue;
914 }
915
916 switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) {
917 /*
918 * XXX In these first 3 cases, we always just
919 * XXX deactivate the page. We may want to
920 * XXX handle the different cases more specifically
921 * XXX in the future.
922 */
923 case PGO_CLEANIT|PGO_FREE:
924 /* FALLTHROUGH */
925 case PGO_CLEANIT|PGO_DEACTIVATE:
926 /* FALLTHROUGH */
927 case PGO_DEACTIVATE:
928 deactivate_it:
929 if (pg->wire_count != 0)
930 continue;
931
932 uvm_lock_pageq();
933 pmap_page_protect(pg, PROT_NONE);
934 uvm_pagedeactivate(pg);
935 uvm_unlock_pageq();
936
937 continue;
938 case PGO_FREE:
939 /*
940 * If there are multiple references to
941 * the object, just deactivate the page.
942 */
943 if (uobj->uo_refs > 1)
944 goto deactivate_it;
945
946 /* XXX skip the page if it's wired */
947 if (pg->wire_count != 0)
948 continue;
949
950 /*
951 * free the swap slot and the page.
952 */
953 pmap_page_protect(pg, PROT_NONE);
954
955 /*
956 * freeing swapslot here is not strictly necessary.
957 * however, leaving it here doesn't save much
958 * because we need to update swap accounting anyway.
959 */
960 uao_dropswap(uobj, pg->offset >> PAGE_SHIFT);
961 uvm_lock_pageq();
962 uvm_pagefree(pg);
963 uvm_unlock_pageq();
964
965 continue;
966 default:
967 panic("uao_flush: weird flags");
968 }
969 }
970
971 return TRUE;
972 }
973
974 /*
975 * uao_get: fetch me a page
976 *
977 * we have three cases:
978 * 1: page is resident -> just return the page.
979 * 2: page is zero-fill -> allocate a new page and zero it.
980 * 3: page is swapped out -> fetch the page from swap.
981 *
982 * cases 1 can be handled with PGO_LOCKED, cases 2 and 3 cannot.
983 * so, if the "center" page hits case 3 (or any page, with PGO_ALLPAGES),
984 * then we will need to return VM_PAGER_UNLOCK.
985 *
986 * => flags: PGO_ALLPAGES: get all of the pages
987 * PGO_LOCKED: fault data structures are locked
988 * => NOTE: offset is the offset of pps[0], _NOT_ pps[centeridx]
989 * => NOTE: caller must check for released pages!!
990 */
991 static int
uao_get(struct uvm_object * uobj,voff_t offset,struct vm_page ** pps,int * npagesp,int centeridx,vm_prot_t access_type,int advice,int flags)992 uao_get(struct uvm_object *uobj, voff_t offset, struct vm_page **pps,
993 int *npagesp, int centeridx, vm_prot_t access_type, int advice, int flags)
994 {
995 struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
996 voff_t current_offset;
997 vm_page_t ptmp;
998 int lcv, gotpages, maxpages, swslot, rv, pageidx;
999 boolean_t done;
1000
1001 KASSERT(UVM_OBJ_IS_AOBJ(uobj));
1002 KASSERT(rw_write_held(uobj->vmobjlock));
1003
1004 /*
1005 * get number of pages
1006 */
1007 maxpages = *npagesp;
1008
1009 if (flags & PGO_LOCKED) {
1010 /*
1011 * step 1a: get pages that are already resident. only do
1012 * this if the data structures are locked (i.e. the first
1013 * time through).
1014 */
1015
1016 done = TRUE; /* be optimistic */
1017 gotpages = 0; /* # of pages we got so far */
1018
1019 for (lcv = 0, current_offset = offset ; lcv < maxpages ;
1020 lcv++, current_offset += PAGE_SIZE) {
1021 /* do we care about this page? if not, skip it */
1022 if (pps[lcv] == PGO_DONTCARE)
1023 continue;
1024
1025 ptmp = uvm_pagelookup(uobj, current_offset);
1026
1027 /*
1028 * if page is new, attempt to allocate the page,
1029 * zero-fill'd.
1030 */
1031 if (ptmp == NULL && uao_find_swslot(uobj,
1032 current_offset >> PAGE_SHIFT) == 0) {
1033 ptmp = uvm_pagealloc(uobj, current_offset,
1034 NULL, UVM_PGA_ZERO);
1035 if (ptmp) {
1036 /* new page */
1037 atomic_clearbits_int(&ptmp->pg_flags,
1038 PG_BUSY|PG_FAKE);
1039 atomic_setbits_int(&ptmp->pg_flags,
1040 PQ_AOBJ);
1041 UVM_PAGE_OWN(ptmp, NULL);
1042 }
1043 }
1044
1045 /*
1046 * to be useful must get a non-busy page
1047 */
1048 if (ptmp == NULL ||
1049 (ptmp->pg_flags & PG_BUSY) != 0) {
1050 if (lcv == centeridx ||
1051 (flags & PGO_ALLPAGES) != 0)
1052 /* need to do a wait or I/O! */
1053 done = FALSE;
1054 continue;
1055 }
1056
1057 /*
1058 * useful page: plug it in our result array
1059 */
1060 atomic_setbits_int(&ptmp->pg_flags, PG_BUSY);
1061 UVM_PAGE_OWN(ptmp, "uao_get1");
1062 pps[lcv] = ptmp;
1063 gotpages++;
1064
1065 }
1066
1067 /*
1068 * step 1b: now we've either done everything needed or we
1069 * to unlock and do some waiting or I/O.
1070 */
1071 *npagesp = gotpages;
1072 if (done)
1073 /* bingo! */
1074 return VM_PAGER_OK;
1075 else
1076 /* EEK! Need to unlock and I/O */
1077 return VM_PAGER_UNLOCK;
1078 }
1079
1080 /*
1081 * step 2: get non-resident or busy pages.
1082 * data structures are unlocked.
1083 */
1084 for (lcv = 0, current_offset = offset ; lcv < maxpages ;
1085 lcv++, current_offset += PAGE_SIZE) {
1086 /*
1087 * - skip over pages we've already gotten or don't want
1088 * - skip over pages we don't _have_ to get
1089 */
1090 if (pps[lcv] != NULL ||
1091 (lcv != centeridx && (flags & PGO_ALLPAGES) == 0))
1092 continue;
1093
1094 pageidx = current_offset >> PAGE_SHIFT;
1095
1096 /*
1097 * we have yet to locate the current page (pps[lcv]). we
1098 * first look for a page that is already at the current offset.
1099 * if we find a page, we check to see if it is busy or
1100 * released. if that is the case, then we sleep on the page
1101 * until it is no longer busy or released and repeat the lookup.
1102 * if the page we found is neither busy nor released, then we
1103 * busy it (so we own it) and plug it into pps[lcv]. this
1104 * 'break's the following while loop and indicates we are
1105 * ready to move on to the next page in the "lcv" loop above.
1106 *
1107 * if we exit the while loop with pps[lcv] still set to NULL,
1108 * then it means that we allocated a new busy/fake/clean page
1109 * ptmp in the object and we need to do I/O to fill in the data.
1110 */
1111
1112 /* top of "pps" while loop */
1113 while (pps[lcv] == NULL) {
1114 /* look for a resident page */
1115 ptmp = uvm_pagelookup(uobj, current_offset);
1116
1117 /* not resident? allocate one now (if we can) */
1118 if (ptmp == NULL) {
1119
1120 ptmp = uvm_pagealloc(uobj, current_offset,
1121 NULL, 0);
1122
1123 /* out of RAM? */
1124 if (ptmp == NULL) {
1125 rw_exit(uobj->vmobjlock);
1126 uvm_wait("uao_getpage");
1127 rw_enter(uobj->vmobjlock, RW_WRITE);
1128 /* goto top of pps while loop */
1129 continue;
1130 }
1131
1132 /*
1133 * safe with PQ's unlocked: because we just
1134 * alloc'd the page
1135 */
1136 atomic_setbits_int(&ptmp->pg_flags, PQ_AOBJ);
1137
1138 /*
1139 * got new page ready for I/O. break pps while
1140 * loop. pps[lcv] is still NULL.
1141 */
1142 break;
1143 }
1144
1145 /* page is there, see if we need to wait on it */
1146 if ((ptmp->pg_flags & PG_BUSY) != 0) {
1147 uvm_pagewait(ptmp, uobj->vmobjlock, "uao_get");
1148 rw_enter(uobj->vmobjlock, RW_WRITE);
1149 continue; /* goto top of pps while loop */
1150 }
1151
1152 /*
1153 * if we get here then the page is resident and
1154 * unbusy. we busy it now (so we own it).
1155 */
1156 /* we own it, caller must un-busy */
1157 atomic_setbits_int(&ptmp->pg_flags, PG_BUSY);
1158 UVM_PAGE_OWN(ptmp, "uao_get2");
1159 pps[lcv] = ptmp;
1160 }
1161
1162 /*
1163 * if we own the valid page at the correct offset, pps[lcv] will
1164 * point to it. nothing more to do except go to the next page.
1165 */
1166 if (pps[lcv])
1167 continue; /* next lcv */
1168
1169 /*
1170 * we have a "fake/busy/clean" page that we just allocated.
1171 * do the needed "i/o", either reading from swap or zeroing.
1172 */
1173 swslot = uao_find_swslot(uobj, pageidx);
1174
1175 /* just zero the page if there's nothing in swap. */
1176 if (swslot == 0) {
1177 /* page hasn't existed before, just zero it. */
1178 uvm_pagezero(ptmp);
1179 } else {
1180 /*
1181 * page in the swapped-out page.
1182 * unlock object for i/o, relock when done.
1183 */
1184
1185 rw_exit(uobj->vmobjlock);
1186 rv = uvm_swap_get(ptmp, swslot, PGO_SYNCIO);
1187 rw_enter(uobj->vmobjlock, RW_WRITE);
1188
1189 /*
1190 * I/O done. check for errors.
1191 */
1192 if (rv != VM_PAGER_OK) {
1193 /*
1194 * remove the swap slot from the aobj
1195 * and mark the aobj as having no real slot.
1196 * don't free the swap slot, thus preventing
1197 * it from being used again.
1198 */
1199 swslot = uao_set_swslot(&aobj->u_obj, pageidx,
1200 SWSLOT_BAD);
1201 uvm_swap_markbad(swslot, 1);
1202
1203 if (ptmp->pg_flags & PG_WANTED)
1204 wakeup(ptmp);
1205 atomic_clearbits_int(&ptmp->pg_flags,
1206 PG_WANTED|PG_BUSY);
1207 UVM_PAGE_OWN(ptmp, NULL);
1208 uvm_lock_pageq();
1209 uvm_pagefree(ptmp);
1210 uvm_unlock_pageq();
1211 rw_exit(uobj->vmobjlock);
1212
1213 return rv;
1214 }
1215 }
1216
1217 /*
1218 * we got the page! clear the fake flag (indicates valid
1219 * data now in page) and plug into our result array. note
1220 * that page is still busy.
1221 *
1222 * it is the callers job to:
1223 * => check if the page is released
1224 * => unbusy the page
1225 * => activate the page
1226 */
1227 atomic_clearbits_int(&ptmp->pg_flags, PG_FAKE);
1228 pmap_clear_modify(ptmp); /* ... and clean */
1229 pps[lcv] = ptmp;
1230
1231 } /* lcv loop */
1232
1233 rw_exit(uobj->vmobjlock);
1234 return VM_PAGER_OK;
1235 }
1236
1237 /*
1238 * uao_dropswap: release any swap resources from this aobj page.
1239 *
1240 * => aobj must be locked or have a reference count of 0.
1241 */
1242 int
uao_dropswap(struct uvm_object * uobj,int pageidx)1243 uao_dropswap(struct uvm_object *uobj, int pageidx)
1244 {
1245 int slot;
1246
1247 KASSERT(UVM_OBJ_IS_AOBJ(uobj));
1248
1249 slot = uao_set_swslot(uobj, pageidx, 0);
1250 if (slot) {
1251 uvm_swap_free(slot, 1);
1252 }
1253 return slot;
1254 }
1255
1256 /*
1257 * page in every page in every aobj that is paged-out to a range of swslots.
1258 *
1259 * => aobj must be locked and is returned locked.
1260 * => returns TRUE if pagein was aborted due to lack of memory.
1261 */
1262 boolean_t
uao_swap_off(int startslot,int endslot)1263 uao_swap_off(int startslot, int endslot)
1264 {
1265 struct uvm_aobj *aobj;
1266
1267 /*
1268 * Walk the list of all anonymous UVM objects. Grab the first.
1269 */
1270 mtx_enter(&uao_list_lock);
1271 if ((aobj = LIST_FIRST(&uao_list)) == NULL) {
1272 mtx_leave(&uao_list_lock);
1273 return FALSE;
1274 }
1275 uao_reference(&aobj->u_obj);
1276
1277 do {
1278 struct uvm_aobj *nextaobj;
1279 boolean_t rv;
1280
1281 /*
1282 * Prefetch the next object and immediately hold a reference
1283 * on it, so neither the current nor the next entry could
1284 * disappear while we are iterating.
1285 */
1286 if ((nextaobj = LIST_NEXT(aobj, u_list)) != NULL) {
1287 uao_reference(&nextaobj->u_obj);
1288 }
1289 mtx_leave(&uao_list_lock);
1290
1291 /*
1292 * Page in all pages in the swap slot range.
1293 */
1294 rw_enter(aobj->u_obj.vmobjlock, RW_WRITE);
1295 rv = uao_pagein(aobj, startslot, endslot);
1296 rw_exit(aobj->u_obj.vmobjlock);
1297
1298 /* Drop the reference of the current object. */
1299 uao_detach(&aobj->u_obj);
1300 if (rv) {
1301 if (nextaobj) {
1302 uao_detach(&nextaobj->u_obj);
1303 }
1304 return rv;
1305 }
1306
1307 aobj = nextaobj;
1308 mtx_enter(&uao_list_lock);
1309 } while (aobj);
1310
1311 /*
1312 * done with traversal, unlock the list
1313 */
1314 mtx_leave(&uao_list_lock);
1315 return FALSE;
1316 }
1317
1318 /*
1319 * page in any pages from aobj in the given range.
1320 *
1321 * => returns TRUE if pagein was aborted due to lack of memory.
1322 */
1323 static boolean_t
uao_pagein(struct uvm_aobj * aobj,int startslot,int endslot)1324 uao_pagein(struct uvm_aobj *aobj, int startslot, int endslot)
1325 {
1326 boolean_t rv;
1327
1328 if (UAO_USES_SWHASH(aobj)) {
1329 struct uao_swhash_elt *elt;
1330 int bucket;
1331
1332 restart:
1333 for (bucket = aobj->u_swhashmask; bucket >= 0; bucket--) {
1334 for (elt = LIST_FIRST(&aobj->u_swhash[bucket]);
1335 elt != NULL;
1336 elt = LIST_NEXT(elt, list)) {
1337 int i;
1338
1339 for (i = 0; i < UAO_SWHASH_CLUSTER_SIZE; i++) {
1340 int slot = elt->slots[i];
1341
1342 /*
1343 * if the slot isn't in range, skip it.
1344 */
1345 if (slot < startslot ||
1346 slot >= endslot) {
1347 continue;
1348 }
1349
1350 /*
1351 * process the page,
1352 * the start over on this object
1353 * since the swhash elt
1354 * may have been freed.
1355 */
1356 rv = uao_pagein_page(aobj,
1357 UAO_SWHASH_ELT_PAGEIDX_BASE(elt) + i);
1358 if (rv) {
1359 return rv;
1360 }
1361 goto restart;
1362 }
1363 }
1364 }
1365 } else {
1366 int i;
1367
1368 for (i = 0; i < aobj->u_pages; i++) {
1369 int slot = aobj->u_swslots[i];
1370
1371 /*
1372 * if the slot isn't in range, skip it
1373 */
1374 if (slot < startslot || slot >= endslot) {
1375 continue;
1376 }
1377
1378 /*
1379 * process the page.
1380 */
1381 rv = uao_pagein_page(aobj, i);
1382 if (rv) {
1383 return rv;
1384 }
1385 }
1386 }
1387
1388 return FALSE;
1389 }
1390
1391 /*
1392 * uao_pagein_page: page in a single page from an anonymous UVM object.
1393 *
1394 * => Returns TRUE if pagein was aborted due to lack of memory.
1395 */
1396 static boolean_t
uao_pagein_page(struct uvm_aobj * aobj,int pageidx)1397 uao_pagein_page(struct uvm_aobj *aobj, int pageidx)
1398 {
1399 struct uvm_object *uobj = &aobj->u_obj;
1400 struct vm_page *pg;
1401 int rv, npages;
1402
1403 pg = NULL;
1404 npages = 1;
1405
1406 KASSERT(rw_write_held(uobj->vmobjlock));
1407 rv = uao_get(&aobj->u_obj, (voff_t)pageidx << PAGE_SHIFT,
1408 &pg, &npages, 0, PROT_READ | PROT_WRITE, 0, 0);
1409
1410 /*
1411 * relock and finish up.
1412 */
1413 rw_enter(uobj->vmobjlock, RW_WRITE);
1414 switch (rv) {
1415 case VM_PAGER_OK:
1416 break;
1417
1418 case VM_PAGER_ERROR:
1419 case VM_PAGER_REFAULT:
1420 /*
1421 * nothing more to do on errors.
1422 * VM_PAGER_REFAULT can only mean that the anon was freed,
1423 * so again there's nothing to do.
1424 */
1425 return FALSE;
1426 }
1427
1428 /*
1429 * ok, we've got the page now.
1430 * mark it as dirty, clear its swslot and un-busy it.
1431 */
1432 uao_dropswap(&aobj->u_obj, pageidx);
1433 atomic_clearbits_int(&pg->pg_flags, PG_BUSY|PG_CLEAN|PG_FAKE);
1434 UVM_PAGE_OWN(pg, NULL);
1435
1436 /*
1437 * deactivate the page (to put it on a page queue).
1438 */
1439 pmap_clear_reference(pg);
1440 uvm_lock_pageq();
1441 uvm_pagedeactivate(pg);
1442 uvm_unlock_pageq();
1443
1444 return FALSE;
1445 }
1446
1447 /*
1448 * uao_dropswap_range: drop swapslots in the range.
1449 *
1450 * => aobj must be locked and is returned locked.
1451 * => start is inclusive. end is exclusive.
1452 */
1453 void
uao_dropswap_range(struct uvm_object * uobj,voff_t start,voff_t end)1454 uao_dropswap_range(struct uvm_object *uobj, voff_t start, voff_t end)
1455 {
1456 struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
1457 int swpgonlydelta = 0;
1458
1459 KASSERT(UVM_OBJ_IS_AOBJ(uobj));
1460 KASSERT(rw_write_held(uobj->vmobjlock));
1461
1462 if (end == 0) {
1463 end = INT64_MAX;
1464 }
1465
1466 if (UAO_USES_SWHASH(aobj)) {
1467 int i, hashbuckets = aobj->u_swhashmask + 1;
1468 voff_t taghi;
1469 voff_t taglo;
1470
1471 taglo = UAO_SWHASH_ELT_TAG(start);
1472 taghi = UAO_SWHASH_ELT_TAG(end);
1473
1474 for (i = 0; i < hashbuckets; i++) {
1475 struct uao_swhash_elt *elt, *next;
1476
1477 for (elt = LIST_FIRST(&aobj->u_swhash[i]);
1478 elt != NULL;
1479 elt = next) {
1480 int startidx, endidx;
1481 int j;
1482
1483 next = LIST_NEXT(elt, list);
1484
1485 if (elt->tag < taglo || taghi < elt->tag) {
1486 continue;
1487 }
1488
1489 if (elt->tag == taglo) {
1490 startidx =
1491 UAO_SWHASH_ELT_PAGESLOT_IDX(start);
1492 } else {
1493 startidx = 0;
1494 }
1495
1496 if (elt->tag == taghi) {
1497 endidx =
1498 UAO_SWHASH_ELT_PAGESLOT_IDX(end);
1499 } else {
1500 endidx = UAO_SWHASH_CLUSTER_SIZE;
1501 }
1502
1503 for (j = startidx; j < endidx; j++) {
1504 int slot = elt->slots[j];
1505
1506 KASSERT(uvm_pagelookup(&aobj->u_obj,
1507 (voff_t)(UAO_SWHASH_ELT_PAGEIDX_BASE(elt)
1508 + j) << PAGE_SHIFT) == NULL);
1509
1510 if (slot > 0) {
1511 uvm_swap_free(slot, 1);
1512 swpgonlydelta++;
1513 KASSERT(elt->count > 0);
1514 elt->slots[j] = 0;
1515 elt->count--;
1516 }
1517 }
1518
1519 if (elt->count == 0) {
1520 LIST_REMOVE(elt, list);
1521 pool_put(&uao_swhash_elt_pool, elt);
1522 }
1523 }
1524 }
1525 } else {
1526 int i;
1527
1528 if (aobj->u_pages < end) {
1529 end = aobj->u_pages;
1530 }
1531 for (i = start; i < end; i++) {
1532 int slot = aobj->u_swslots[i];
1533
1534 if (slot > 0) {
1535 uvm_swap_free(slot, 1);
1536 swpgonlydelta++;
1537 }
1538 }
1539 }
1540
1541 /*
1542 * adjust the counter of pages only in swap for all
1543 * the swap slots we've freed.
1544 */
1545 if (swpgonlydelta > 0) {
1546 KASSERT(uvmexp.swpgonly >= swpgonlydelta);
1547 atomic_add_int(&uvmexp.swpgonly, -swpgonlydelta);
1548 }
1549 }
1550