1 /* $OpenBSD: uvm_aobj.c,v 1.115 2024/12/27 12:04:40 mpi Exp $ */
2 /* $NetBSD: uvm_aobj.c,v 1.39 2001/02/18 21:19:08 chs Exp $ */
3
4 /*
5 * Copyright (c) 1998 Chuck Silvers, Charles D. Cranor and
6 * Washington University.
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 *
29 * from: Id: uvm_aobj.c,v 1.1.2.5 1998/02/06 05:14:38 chs Exp
30 */
31 /*
32 * uvm_aobj.c: anonymous memory uvm_object pager
33 *
34 * author: Chuck Silvers <chuq@chuq.com>
35 * started: Jan-1998
36 *
37 * - design mostly from Chuck Cranor
38 */
39
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/malloc.h>
43 #include <sys/kernel.h>
44 #include <sys/pool.h>
45 #include <sys/stdint.h>
46 #include <sys/atomic.h>
47
48 #include <uvm/uvm.h>
49
50 /*
51 * An anonymous UVM object (aobj) manages anonymous-memory. In addition to
52 * keeping the list of resident pages, it may also keep a list of allocated
53 * swap blocks. Depending on the size of the object, this list is either
54 * stored in an array (small objects) or in a hash table (large objects).
55 */
56
57 /*
58 * Note: for hash tables, we break the address space of the aobj into blocks
59 * of UAO_SWHASH_CLUSTER_SIZE pages, which shall be a power of two.
60 */
61 #define UAO_SWHASH_CLUSTER_SHIFT 4
62 #define UAO_SWHASH_CLUSTER_SIZE (1 << UAO_SWHASH_CLUSTER_SHIFT)
63
64 /* Get the "tag" for this page index. */
65 #define UAO_SWHASH_ELT_TAG(idx) ((idx) >> UAO_SWHASH_CLUSTER_SHIFT)
66 #define UAO_SWHASH_ELT_PAGESLOT_IDX(idx) \
67 ((idx) & (UAO_SWHASH_CLUSTER_SIZE - 1))
68
69 /* Given an ELT and a page index, find the swap slot. */
70 #define UAO_SWHASH_ELT_PAGESLOT(elt, idx) \
71 ((elt)->slots[UAO_SWHASH_ELT_PAGESLOT_IDX(idx)])
72
73 /* Given an ELT, return its pageidx base. */
74 #define UAO_SWHASH_ELT_PAGEIDX_BASE(elt) \
75 ((elt)->tag << UAO_SWHASH_CLUSTER_SHIFT)
76
77 /* The hash function. */
78 #define UAO_SWHASH_HASH(aobj, idx) \
79 (&(aobj)->u_swhash[(((idx) >> UAO_SWHASH_CLUSTER_SHIFT) \
80 & (aobj)->u_swhashmask)])
81
82 /*
83 * The threshold which determines whether we will use an array or a
84 * hash table to store the list of allocated swap blocks.
85 */
86 #define UAO_SWHASH_THRESHOLD (UAO_SWHASH_CLUSTER_SIZE * 4)
87 #define UAO_USES_SWHASH(aobj) \
88 ((aobj)->u_pages > UAO_SWHASH_THRESHOLD)
89
90 /* The number of buckets in a hash, with an upper bound. */
91 #define UAO_SWHASH_MAXBUCKETS 256
92 #define UAO_SWHASH_BUCKETS(pages) \
93 (min((pages) >> UAO_SWHASH_CLUSTER_SHIFT, UAO_SWHASH_MAXBUCKETS))
94
95
96 /*
97 * uao_swhash_elt: when a hash table is being used, this structure defines
98 * the format of an entry in the bucket list.
99 */
100 struct uao_swhash_elt {
101 LIST_ENTRY(uao_swhash_elt) list; /* the hash list */
102 voff_t tag; /* our 'tag' */
103 int count; /* our number of active slots */
104 int slots[UAO_SWHASH_CLUSTER_SIZE]; /* the slots */
105 };
106
107 /*
108 * uao_swhash: the swap hash table structure
109 */
110 LIST_HEAD(uao_swhash, uao_swhash_elt);
111
112 /*
113 * uao_swhash_elt_pool: pool of uao_swhash_elt structures
114 */
115 struct pool uao_swhash_elt_pool;
116
117 /*
118 * uvm_aobj: the actual anon-backed uvm_object
119 *
120 * => the uvm_object is at the top of the structure, this allows
121 * (struct uvm_aobj *) == (struct uvm_object *)
122 * => only one of u_swslots and u_swhash is used in any given aobj
123 */
124 struct uvm_aobj {
125 struct uvm_object u_obj; /* has: pgops, memt, #pages, #refs */
126 int u_pages; /* number of pages in entire object */
127 int u_flags; /* the flags (see uvm_aobj.h) */
128 /*
129 * Either an array or hashtable (array of bucket heads) of
130 * offset -> swapslot mappings for the aobj.
131 */
132 #define u_swslots u_swap.slot_array
133 #define u_swhash u_swap.slot_hash
134 union swslots {
135 int *slot_array;
136 struct uao_swhash *slot_hash;
137 } u_swap;
138 u_long u_swhashmask; /* mask for hashtable */
139 LIST_ENTRY(uvm_aobj) u_list; /* global list of aobjs */
140 };
141
142 struct pool uvm_aobj_pool;
143
144 static struct uao_swhash_elt *uao_find_swhash_elt(struct uvm_aobj *, int,
145 boolean_t);
146 static boolean_t uao_flush(struct uvm_object *, voff_t,
147 voff_t, int);
148 static void uao_free(struct uvm_aobj *);
149 static int uao_get(struct uvm_object *, voff_t,
150 vm_page_t *, int *, int, vm_prot_t,
151 int, int);
152 static boolean_t uao_pagein(struct uvm_aobj *, int, int);
153 static boolean_t uao_pagein_page(struct uvm_aobj *, int);
154
155 void uao_dropswap_range(struct uvm_object *, voff_t, voff_t);
156 void uao_shrink_flush(struct uvm_object *, int, int);
157 int uao_shrink_hash(struct uvm_object *, int);
158 int uao_shrink_array(struct uvm_object *, int);
159 int uao_shrink_convert(struct uvm_object *, int);
160
161 int uao_grow_hash(struct uvm_object *, int);
162 int uao_grow_array(struct uvm_object *, int);
163 int uao_grow_convert(struct uvm_object *, int);
164
165 /*
166 * aobj_pager
167 *
168 * note that some functions (e.g. put) are handled elsewhere
169 */
170 const struct uvm_pagerops aobj_pager = {
171 .pgo_reference = uao_reference,
172 .pgo_detach = uao_detach,
173 .pgo_flush = uao_flush,
174 .pgo_get = uao_get,
175 };
176
177 /*
178 * uao_list: global list of active aobjs, locked by uao_list_lock
179 *
180 * Lock ordering: generally the locking order is object lock, then list lock.
181 * in the case of swap off we have to iterate over the list, and thus the
182 * ordering is reversed. In that case we must use trylocking to prevent
183 * deadlock.
184 */
185 static LIST_HEAD(aobjlist, uvm_aobj) uao_list = LIST_HEAD_INITIALIZER(uao_list);
186 static struct mutex uao_list_lock = MUTEX_INITIALIZER(IPL_MPFLOOR);
187
188
189 /*
190 * functions
191 */
192 /*
193 * hash table/array related functions
194 */
195 /*
196 * uao_find_swhash_elt: find (or create) a hash table entry for a page
197 * offset.
198 */
199 static struct uao_swhash_elt *
uao_find_swhash_elt(struct uvm_aobj * aobj,int pageidx,boolean_t create)200 uao_find_swhash_elt(struct uvm_aobj *aobj, int pageidx, boolean_t create)
201 {
202 struct uao_swhash *swhash;
203 struct uao_swhash_elt *elt;
204 voff_t page_tag;
205
206 swhash = UAO_SWHASH_HASH(aobj, pageidx); /* first hash to get bucket */
207 page_tag = UAO_SWHASH_ELT_TAG(pageidx); /* tag to search for */
208
209 /*
210 * now search the bucket for the requested tag
211 */
212 LIST_FOREACH(elt, swhash, list) {
213 if (elt->tag == page_tag)
214 return elt;
215 }
216
217 if (!create)
218 return NULL;
219
220 /*
221 * allocate a new entry for the bucket and init/insert it in
222 */
223 elt = pool_get(&uao_swhash_elt_pool, PR_NOWAIT | PR_ZERO);
224 /*
225 * XXX We cannot sleep here as the hash table might disappear
226 * from under our feet. And we run the risk of deadlocking
227 * the pagedeamon. In fact this code will only be called by
228 * the pagedaemon and allocation will only fail if we
229 * exhausted the pagedeamon reserve. In that case we're
230 * doomed anyway, so panic.
231 */
232 if (elt == NULL)
233 panic("%s: can't allocate entry", __func__);
234 LIST_INSERT_HEAD(swhash, elt, list);
235 elt->tag = page_tag;
236
237 return elt;
238 }
239
240 /*
241 * uao_find_swslot: find the swap slot number for an aobj/pageidx
242 */
243 int
uao_find_swslot(struct uvm_object * uobj,int pageidx)244 uao_find_swslot(struct uvm_object *uobj, int pageidx)
245 {
246 struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
247
248 KASSERT(UVM_OBJ_IS_AOBJ(uobj));
249
250 /*
251 * if noswap flag is set, then we never return a slot
252 */
253 if (aobj->u_flags & UAO_FLAG_NOSWAP)
254 return 0;
255
256 /*
257 * if hashing, look in hash table.
258 */
259 if (UAO_USES_SWHASH(aobj)) {
260 struct uao_swhash_elt *elt =
261 uao_find_swhash_elt(aobj, pageidx, FALSE);
262
263 if (elt)
264 return UAO_SWHASH_ELT_PAGESLOT(elt, pageidx);
265 else
266 return 0;
267 }
268
269 /*
270 * otherwise, look in the array
271 */
272 return aobj->u_swslots[pageidx];
273 }
274
275 /*
276 * uao_set_swslot: set the swap slot for a page in an aobj.
277 *
278 * => setting a slot to zero frees the slot
279 * => object must be locked by caller
280 * => we return the old slot number, or -1 if we failed to allocate
281 * memory to record the new slot number
282 */
283 int
uao_set_swslot(struct uvm_object * uobj,int pageidx,int slot)284 uao_set_swslot(struct uvm_object *uobj, int pageidx, int slot)
285 {
286 struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
287 int oldslot;
288
289 KASSERT(rw_write_held(uobj->vmobjlock) || uobj->uo_refs == 0);
290 KASSERT(UVM_OBJ_IS_AOBJ(uobj));
291
292 /*
293 * if noswap flag is set, then we can't set a slot
294 */
295 if (aobj->u_flags & UAO_FLAG_NOSWAP) {
296 if (slot == 0)
297 return 0; /* a clear is ok */
298
299 /* but a set is not */
300 printf("uao_set_swslot: uobj = %p\n", uobj);
301 panic("uao_set_swslot: attempt to set a slot on a NOSWAP object");
302 }
303
304 /*
305 * are we using a hash table? if so, add it in the hash.
306 */
307 if (UAO_USES_SWHASH(aobj)) {
308 /*
309 * Avoid allocating an entry just to free it again if
310 * the page had not swap slot in the first place, and
311 * we are freeing.
312 */
313 struct uao_swhash_elt *elt =
314 uao_find_swhash_elt(aobj, pageidx, slot ? TRUE : FALSE);
315 if (elt == NULL) {
316 KASSERT(slot == 0);
317 return 0;
318 }
319
320 oldslot = UAO_SWHASH_ELT_PAGESLOT(elt, pageidx);
321 UAO_SWHASH_ELT_PAGESLOT(elt, pageidx) = slot;
322
323 /*
324 * now adjust the elt's reference counter and free it if we've
325 * dropped it to zero.
326 */
327 if (slot) {
328 if (oldslot == 0)
329 elt->count++;
330 } else {
331 if (oldslot)
332 elt->count--;
333
334 if (elt->count == 0) {
335 LIST_REMOVE(elt, list);
336 pool_put(&uao_swhash_elt_pool, elt);
337 }
338 }
339 } else {
340 /* we are using an array */
341 oldslot = aobj->u_swslots[pageidx];
342 aobj->u_swslots[pageidx] = slot;
343 }
344 return oldslot;
345 }
346 /*
347 * end of hash/array functions
348 */
349
350 /*
351 * uao_free: free all resources held by an aobj, and then free the aobj
352 *
353 * => the aobj should be dead
354 */
355 static void
uao_free(struct uvm_aobj * aobj)356 uao_free(struct uvm_aobj *aobj)
357 {
358 struct uvm_object *uobj = &aobj->u_obj;
359
360 KASSERT(UVM_OBJ_IS_AOBJ(uobj));
361 KASSERT(rw_write_held(uobj->vmobjlock));
362 uao_dropswap_range(uobj, 0, 0);
363 rw_exit(uobj->vmobjlock);
364
365 if (UAO_USES_SWHASH(aobj)) {
366 /*
367 * free the hash table itself.
368 */
369 hashfree(aobj->u_swhash, UAO_SWHASH_BUCKETS(aobj->u_pages), M_UVMAOBJ);
370 } else {
371 free(aobj->u_swslots, M_UVMAOBJ, aobj->u_pages * sizeof(int));
372 }
373
374 /*
375 * finally free the aobj itself
376 */
377 uvm_obj_destroy(uobj);
378 pool_put(&uvm_aobj_pool, aobj);
379 }
380
381 /*
382 * pager functions
383 */
384
385 #ifdef TMPFS
386 /*
387 * Shrink an aobj to a given number of pages. The procedure is always the same:
388 * assess the necessity of data structure conversion (hash to array), secure
389 * resources, flush pages and drop swap slots.
390 *
391 */
392
393 void
uao_shrink_flush(struct uvm_object * uobj,int startpg,int endpg)394 uao_shrink_flush(struct uvm_object *uobj, int startpg, int endpg)
395 {
396 KASSERT(startpg < endpg);
397 KASSERT(uobj->uo_refs == 1);
398 uao_flush(uobj, (voff_t)startpg << PAGE_SHIFT,
399 (voff_t)endpg << PAGE_SHIFT, PGO_FREE);
400 uao_dropswap_range(uobj, startpg, endpg);
401 }
402
403 int
uao_shrink_hash(struct uvm_object * uobj,int pages)404 uao_shrink_hash(struct uvm_object *uobj, int pages)
405 {
406 struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
407 struct uao_swhash *new_swhash;
408 struct uao_swhash_elt *elt;
409 unsigned long new_hashmask;
410 int i;
411
412 KASSERT(UAO_USES_SWHASH(aobj));
413
414 /*
415 * If the size of the hash table doesn't change, all we need to do is
416 * to adjust the page count.
417 */
418 if (UAO_SWHASH_BUCKETS(aobj->u_pages) == UAO_SWHASH_BUCKETS(pages)) {
419 uao_shrink_flush(uobj, pages, aobj->u_pages);
420 aobj->u_pages = pages;
421 return 0;
422 }
423
424 new_swhash = hashinit(UAO_SWHASH_BUCKETS(pages), M_UVMAOBJ,
425 M_WAITOK | M_CANFAIL, &new_hashmask);
426 if (new_swhash == NULL)
427 return ENOMEM;
428
429 uao_shrink_flush(uobj, pages, aobj->u_pages);
430
431 /*
432 * Even though the hash table size is changing, the hash of the buckets
433 * we are interested in copying should not change.
434 */
435 for (i = 0; i < UAO_SWHASH_BUCKETS(aobj->u_pages); i++) {
436 while (LIST_EMPTY(&aobj->u_swhash[i]) == 0) {
437 elt = LIST_FIRST(&aobj->u_swhash[i]);
438 LIST_REMOVE(elt, list);
439 LIST_INSERT_HEAD(&new_swhash[i], elt, list);
440 }
441 }
442
443 hashfree(aobj->u_swhash, UAO_SWHASH_BUCKETS(aobj->u_pages), M_UVMAOBJ);
444
445 aobj->u_swhash = new_swhash;
446 aobj->u_pages = pages;
447 aobj->u_swhashmask = new_hashmask;
448
449 return 0;
450 }
451
452 int
uao_shrink_convert(struct uvm_object * uobj,int pages)453 uao_shrink_convert(struct uvm_object *uobj, int pages)
454 {
455 struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
456 struct uao_swhash_elt *elt;
457 int i, *new_swslots;
458
459 new_swslots = mallocarray(pages, sizeof(int), M_UVMAOBJ,
460 M_WAITOK | M_CANFAIL | M_ZERO);
461 if (new_swslots == NULL)
462 return ENOMEM;
463
464 uao_shrink_flush(uobj, pages, aobj->u_pages);
465
466 /* Convert swap slots from hash to array. */
467 for (i = 0; i < pages; i++) {
468 elt = uao_find_swhash_elt(aobj, i, FALSE);
469 if (elt != NULL) {
470 new_swslots[i] = UAO_SWHASH_ELT_PAGESLOT(elt, i);
471 if (new_swslots[i] != 0)
472 elt->count--;
473 if (elt->count == 0) {
474 LIST_REMOVE(elt, list);
475 pool_put(&uao_swhash_elt_pool, elt);
476 }
477 }
478 }
479
480 hashfree(aobj->u_swhash, UAO_SWHASH_BUCKETS(aobj->u_pages), M_UVMAOBJ);
481
482 aobj->u_swslots = new_swslots;
483 aobj->u_pages = pages;
484
485 return 0;
486 }
487
488 int
uao_shrink_array(struct uvm_object * uobj,int pages)489 uao_shrink_array(struct uvm_object *uobj, int pages)
490 {
491 struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
492 int i, *new_swslots;
493
494 new_swslots = mallocarray(pages, sizeof(int), M_UVMAOBJ,
495 M_WAITOK | M_CANFAIL | M_ZERO);
496 if (new_swslots == NULL)
497 return ENOMEM;
498
499 uao_shrink_flush(uobj, pages, aobj->u_pages);
500
501 for (i = 0; i < pages; i++)
502 new_swslots[i] = aobj->u_swslots[i];
503
504 free(aobj->u_swslots, M_UVMAOBJ, aobj->u_pages * sizeof(int));
505
506 aobj->u_swslots = new_swslots;
507 aobj->u_pages = pages;
508
509 return 0;
510 }
511
512 int
uao_shrink(struct uvm_object * uobj,int pages)513 uao_shrink(struct uvm_object *uobj, int pages)
514 {
515 struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
516
517 KASSERT(pages < aobj->u_pages);
518
519 /*
520 * Distinguish between three possible cases:
521 * 1. aobj uses hash and must be converted to array.
522 * 2. aobj uses array and array size needs to be adjusted.
523 * 3. aobj uses hash and hash size needs to be adjusted.
524 */
525 if (pages > UAO_SWHASH_THRESHOLD)
526 return uao_shrink_hash(uobj, pages); /* case 3 */
527 else if (aobj->u_pages > UAO_SWHASH_THRESHOLD)
528 return uao_shrink_convert(uobj, pages); /* case 1 */
529 else
530 return uao_shrink_array(uobj, pages); /* case 2 */
531 }
532
533 /*
534 * Grow an aobj to a given number of pages. Right now we only adjust the swap
535 * slots. We could additionally handle page allocation directly, so that they
536 * don't happen through uvm_fault(). That would allow us to use another
537 * mechanism for the swap slots other than malloc(). It is thus mandatory that
538 * the caller of these functions does not allow faults to happen in case of
539 * growth error.
540 */
541 int
uao_grow_array(struct uvm_object * uobj,int pages)542 uao_grow_array(struct uvm_object *uobj, int pages)
543 {
544 struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
545 int i, *new_swslots;
546
547 KASSERT(aobj->u_pages <= UAO_SWHASH_THRESHOLD);
548
549 new_swslots = mallocarray(pages, sizeof(int), M_UVMAOBJ,
550 M_WAITOK | M_CANFAIL | M_ZERO);
551 if (new_swslots == NULL)
552 return ENOMEM;
553
554 for (i = 0; i < aobj->u_pages; i++)
555 new_swslots[i] = aobj->u_swslots[i];
556
557 free(aobj->u_swslots, M_UVMAOBJ, aobj->u_pages * sizeof(int));
558
559 aobj->u_swslots = new_swslots;
560 aobj->u_pages = pages;
561
562 return 0;
563 }
564
565 int
uao_grow_hash(struct uvm_object * uobj,int pages)566 uao_grow_hash(struct uvm_object *uobj, int pages)
567 {
568 struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
569 struct uao_swhash *new_swhash;
570 struct uao_swhash_elt *elt;
571 unsigned long new_hashmask;
572 int i;
573
574 KASSERT(pages > UAO_SWHASH_THRESHOLD);
575
576 /*
577 * If the size of the hash table doesn't change, all we need to do is
578 * to adjust the page count.
579 */
580 if (UAO_SWHASH_BUCKETS(aobj->u_pages) == UAO_SWHASH_BUCKETS(pages)) {
581 aobj->u_pages = pages;
582 return 0;
583 }
584
585 KASSERT(UAO_SWHASH_BUCKETS(aobj->u_pages) < UAO_SWHASH_BUCKETS(pages));
586
587 new_swhash = hashinit(UAO_SWHASH_BUCKETS(pages), M_UVMAOBJ,
588 M_WAITOK | M_CANFAIL, &new_hashmask);
589 if (new_swhash == NULL)
590 return ENOMEM;
591
592 for (i = 0; i < UAO_SWHASH_BUCKETS(aobj->u_pages); i++) {
593 while (LIST_EMPTY(&aobj->u_swhash[i]) == 0) {
594 elt = LIST_FIRST(&aobj->u_swhash[i]);
595 LIST_REMOVE(elt, list);
596 LIST_INSERT_HEAD(&new_swhash[i], elt, list);
597 }
598 }
599
600 hashfree(aobj->u_swhash, UAO_SWHASH_BUCKETS(aobj->u_pages), M_UVMAOBJ);
601
602 aobj->u_swhash = new_swhash;
603 aobj->u_pages = pages;
604 aobj->u_swhashmask = new_hashmask;
605
606 return 0;
607 }
608
609 int
uao_grow_convert(struct uvm_object * uobj,int pages)610 uao_grow_convert(struct uvm_object *uobj, int pages)
611 {
612 struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
613 struct uao_swhash *new_swhash;
614 struct uao_swhash_elt *elt;
615 unsigned long new_hashmask;
616 int i, *old_swslots;
617
618 new_swhash = hashinit(UAO_SWHASH_BUCKETS(pages), M_UVMAOBJ,
619 M_WAITOK | M_CANFAIL, &new_hashmask);
620 if (new_swhash == NULL)
621 return ENOMEM;
622
623 /* Set these now, so we can use uao_find_swhash_elt(). */
624 old_swslots = aobj->u_swslots;
625 aobj->u_swhash = new_swhash;
626 aobj->u_swhashmask = new_hashmask;
627
628 for (i = 0; i < aobj->u_pages; i++) {
629 if (old_swslots[i] != 0) {
630 elt = uao_find_swhash_elt(aobj, i, TRUE);
631 elt->count++;
632 UAO_SWHASH_ELT_PAGESLOT(elt, i) = old_swslots[i];
633 }
634 }
635
636 free(old_swslots, M_UVMAOBJ, aobj->u_pages * sizeof(int));
637 aobj->u_pages = pages;
638
639 return 0;
640 }
641
642 int
uao_grow(struct uvm_object * uobj,int pages)643 uao_grow(struct uvm_object *uobj, int pages)
644 {
645 struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
646
647 KASSERT(pages > aobj->u_pages);
648
649 /*
650 * Distinguish between three possible cases:
651 * 1. aobj uses hash and hash size needs to be adjusted.
652 * 2. aobj uses array and array size needs to be adjusted.
653 * 3. aobj uses array and must be converted to hash.
654 */
655 if (pages <= UAO_SWHASH_THRESHOLD)
656 return uao_grow_array(uobj, pages); /* case 2 */
657 else if (aobj->u_pages > UAO_SWHASH_THRESHOLD)
658 return uao_grow_hash(uobj, pages); /* case 1 */
659 else
660 return uao_grow_convert(uobj, pages);
661 }
662 #endif /* TMPFS */
663
664 /*
665 * uao_create: create an aobj of the given size and return its uvm_object.
666 *
667 * => for normal use, flags are zero or UAO_FLAG_CANFAIL.
668 * => for the kernel object, the flags are:
669 * UAO_FLAG_KERNOBJ - allocate the kernel object (can only happen once)
670 * UAO_FLAG_KERNSWAP - enable swapping of kernel object (" ")
671 */
672 struct uvm_object *
uao_create(vsize_t size,int flags)673 uao_create(vsize_t size, int flags)
674 {
675 static struct uvm_aobj kernel_object_store;
676 static struct rwlock bootstrap_kernel_object_lock;
677 static int kobj_alloced = 0;
678 int pages = round_page(size) >> PAGE_SHIFT;
679 struct uvm_aobj *aobj;
680 int refs;
681
682 /*
683 * Allocate a new aobj, unless kernel object is requested.
684 */
685 if (flags & UAO_FLAG_KERNOBJ) {
686 KASSERT(!kobj_alloced);
687 aobj = &kernel_object_store;
688 aobj->u_pages = pages;
689 aobj->u_flags = UAO_FLAG_NOSWAP;
690 refs = UVM_OBJ_KERN;
691 kobj_alloced = UAO_FLAG_KERNOBJ;
692 } else if (flags & UAO_FLAG_KERNSWAP) {
693 KASSERT(kobj_alloced == UAO_FLAG_KERNOBJ);
694 aobj = &kernel_object_store;
695 kobj_alloced = UAO_FLAG_KERNSWAP;
696 } else {
697 aobj = pool_get(&uvm_aobj_pool, PR_WAITOK);
698 aobj->u_pages = pages;
699 aobj->u_flags = 0;
700 refs = 1;
701 }
702
703 /*
704 * allocate hash/array if necessary
705 */
706 if (flags == 0 || (flags & (UAO_FLAG_KERNSWAP | UAO_FLAG_CANFAIL))) {
707 int mflags;
708
709 if (flags)
710 mflags = M_NOWAIT;
711 else
712 mflags = M_WAITOK;
713
714 /* allocate hash table or array depending on object size */
715 if (UAO_USES_SWHASH(aobj)) {
716 aobj->u_swhash = hashinit(UAO_SWHASH_BUCKETS(pages),
717 M_UVMAOBJ, mflags, &aobj->u_swhashmask);
718 if (aobj->u_swhash == NULL) {
719 if (flags & UAO_FLAG_CANFAIL) {
720 pool_put(&uvm_aobj_pool, aobj);
721 return NULL;
722 }
723 panic("uao_create: hashinit swhash failed");
724 }
725 } else {
726 aobj->u_swslots = mallocarray(pages, sizeof(int),
727 M_UVMAOBJ, mflags|M_ZERO);
728 if (aobj->u_swslots == NULL) {
729 if (flags & UAO_FLAG_CANFAIL) {
730 pool_put(&uvm_aobj_pool, aobj);
731 return NULL;
732 }
733 panic("uao_create: malloc swslots failed");
734 }
735 }
736
737 if (flags & UAO_FLAG_KERNSWAP) {
738 aobj->u_flags &= ~UAO_FLAG_NOSWAP; /* clear noswap */
739 return &aobj->u_obj;
740 /* done! */
741 }
742 }
743
744 /*
745 * Initialise UVM object.
746 */
747 uvm_obj_init(&aobj->u_obj, &aobj_pager, refs);
748 if (flags & UAO_FLAG_KERNOBJ) {
749 /* Use a temporary static lock for kernel_object. */
750 rw_init(&bootstrap_kernel_object_lock, "kobjlk");
751 uvm_obj_setlock(&aobj->u_obj, &bootstrap_kernel_object_lock);
752 }
753
754 /*
755 * now that aobj is ready, add it to the global list
756 */
757 mtx_enter(&uao_list_lock);
758 LIST_INSERT_HEAD(&uao_list, aobj, u_list);
759 mtx_leave(&uao_list_lock);
760
761 return &aobj->u_obj;
762 }
763
764
765
766 /*
767 * uao_init: set up aobj pager subsystem
768 *
769 * => called at boot time from uvm_pager_init()
770 */
771 void
uao_init(void)772 uao_init(void)
773 {
774 /*
775 * NOTE: Pages for this pool must not come from a pageable
776 * kernel map!
777 */
778 pool_init(&uao_swhash_elt_pool, sizeof(struct uao_swhash_elt), 0,
779 IPL_NONE, PR_WAITOK, "uaoeltpl", NULL);
780 pool_init(&uvm_aobj_pool, sizeof(struct uvm_aobj), 0,
781 IPL_NONE, PR_WAITOK, "aobjpl", NULL);
782 }
783
784 /*
785 * uao_reference: hold a reference to an anonymous UVM object.
786 */
787 void
uao_reference(struct uvm_object * uobj)788 uao_reference(struct uvm_object *uobj)
789 {
790 /* Kernel object is persistent. */
791 if (UVM_OBJ_IS_KERN_OBJECT(uobj))
792 return;
793
794 atomic_inc_int(&uobj->uo_refs);
795 }
796
797
798 /*
799 * uao_detach: drop a reference to an anonymous UVM object.
800 */
801 void
uao_detach(struct uvm_object * uobj)802 uao_detach(struct uvm_object *uobj)
803 {
804 struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
805 struct vm_page *pg;
806
807 /*
808 * Detaching from kernel_object is a NOP.
809 */
810 if (UVM_OBJ_IS_KERN_OBJECT(uobj))
811 return;
812
813 /*
814 * Drop the reference. If it was the last one, destroy the object.
815 */
816 if (atomic_dec_int_nv(&uobj->uo_refs) > 0) {
817 return;
818 }
819
820 /*
821 * Remove the aobj from the global list.
822 */
823 mtx_enter(&uao_list_lock);
824 LIST_REMOVE(aobj, u_list);
825 mtx_leave(&uao_list_lock);
826
827 /*
828 * Free all the pages left in the aobj. For each page, when the
829 * page is no longer busy (and thus after any disk I/O that it is
830 * involved in is complete), release any swap resources and free
831 * the page itself.
832 */
833 rw_enter(uobj->vmobjlock, RW_WRITE);
834 while ((pg = RBT_ROOT(uvm_objtree, &uobj->memt)) != NULL) {
835 pmap_page_protect(pg, PROT_NONE);
836 if (pg->pg_flags & PG_BUSY) {
837 uvm_pagewait(pg, uobj->vmobjlock, "uao_det");
838 rw_enter(uobj->vmobjlock, RW_WRITE);
839 continue;
840 }
841 uao_dropswap(&aobj->u_obj, pg->offset >> PAGE_SHIFT);
842 uvm_lock_pageq();
843 uvm_pagefree(pg);
844 uvm_unlock_pageq();
845 }
846
847 /*
848 * Finally, free the anonymous UVM object itself.
849 */
850 uao_free(aobj);
851 }
852
853 /*
854 * uao_flush: flush pages out of a uvm object
855 *
856 * => if PGO_CLEANIT is not set, then we will not block.
857 * => if PGO_ALLPAGE is set, then all pages in the object are valid targets
858 * for flushing.
859 * => NOTE: we are allowed to lock the page queues, so the caller
860 * must not be holding the lock on them [e.g. pagedaemon had
861 * better not call us with the queues locked]
862 * => we return TRUE unless we encountered some sort of I/O error
863 * XXXJRT currently never happens, as we never directly initiate
864 * XXXJRT I/O
865 */
866 boolean_t
uao_flush(struct uvm_object * uobj,voff_t start,voff_t stop,int flags)867 uao_flush(struct uvm_object *uobj, voff_t start, voff_t stop, int flags)
868 {
869 struct uvm_aobj *aobj = (struct uvm_aobj *) uobj;
870 struct vm_page *pg;
871 voff_t curoff;
872
873 KASSERT(UVM_OBJ_IS_AOBJ(uobj));
874 KASSERT(rw_write_held(uobj->vmobjlock));
875
876 if (flags & PGO_ALLPAGES) {
877 start = 0;
878 stop = (voff_t)aobj->u_pages << PAGE_SHIFT;
879 } else {
880 start = trunc_page(start);
881 stop = round_page(stop);
882 if (stop > ((voff_t)aobj->u_pages << PAGE_SHIFT)) {
883 printf("uao_flush: strange, got an out of range "
884 "flush (fixed)\n");
885 stop = (voff_t)aobj->u_pages << PAGE_SHIFT;
886 }
887 }
888
889 /*
890 * Don't need to do any work here if we're not freeing
891 * or deactivating pages.
892 */
893 if ((flags & (PGO_DEACTIVATE|PGO_FREE)) == 0) {
894 return TRUE;
895 }
896
897 curoff = start;
898 for (;;) {
899 if (curoff < stop) {
900 pg = uvm_pagelookup(uobj, curoff);
901 curoff += PAGE_SIZE;
902 if (pg == NULL)
903 continue;
904 } else {
905 break;
906 }
907
908 /* Make sure page is unbusy, else wait for it. */
909 if (pg->pg_flags & PG_BUSY) {
910 uvm_pagewait(pg, uobj->vmobjlock, "uaoflsh");
911 rw_enter(uobj->vmobjlock, RW_WRITE);
912 curoff -= PAGE_SIZE;
913 continue;
914 }
915
916 switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) {
917 /*
918 * XXX In these first 3 cases, we always just
919 * XXX deactivate the page. We may want to
920 * XXX handle the different cases more specifically
921 * XXX in the future.
922 */
923 case PGO_CLEANIT|PGO_FREE:
924 /* FALLTHROUGH */
925 case PGO_CLEANIT|PGO_DEACTIVATE:
926 /* FALLTHROUGH */
927 case PGO_DEACTIVATE:
928 deactivate_it:
929 if (pg->wire_count != 0)
930 continue;
931
932 uvm_lock_pageq();
933 uvm_pagedeactivate(pg);
934 uvm_unlock_pageq();
935
936 continue;
937 case PGO_FREE:
938 /*
939 * If there are multiple references to
940 * the object, just deactivate the page.
941 */
942 if (uobj->uo_refs > 1)
943 goto deactivate_it;
944
945 /* XXX skip the page if it's wired */
946 if (pg->wire_count != 0)
947 continue;
948
949 /*
950 * free the swap slot and the page.
951 */
952 pmap_page_protect(pg, PROT_NONE);
953
954 /*
955 * freeing swapslot here is not strictly necessary.
956 * however, leaving it here doesn't save much
957 * because we need to update swap accounting anyway.
958 */
959 uao_dropswap(uobj, pg->offset >> PAGE_SHIFT);
960 uvm_lock_pageq();
961 uvm_pagefree(pg);
962 uvm_unlock_pageq();
963
964 continue;
965 default:
966 panic("uao_flush: weird flags");
967 }
968 }
969
970 return TRUE;
971 }
972
973 /*
974 * uao_get: fetch me a page
975 *
976 * we have three cases:
977 * 1: page is resident -> just return the page.
978 * 2: page is zero-fill -> allocate a new page and zero it.
979 * 3: page is swapped out -> fetch the page from swap.
980 *
981 * cases 1 can be handled with PGO_LOCKED, cases 2 and 3 cannot.
982 * so, if the "center" page hits case 3 (or any page, with PGO_ALLPAGES),
983 * then we will need to return VM_PAGER_UNLOCK.
984 *
985 * => flags: PGO_ALLPAGES: get all of the pages
986 * PGO_LOCKED: fault data structures are locked
987 * => NOTE: offset is the offset of pps[0], _NOT_ pps[centeridx]
988 * => NOTE: caller must check for released pages!!
989 */
990 static int
uao_get(struct uvm_object * uobj,voff_t offset,struct vm_page ** pps,int * npagesp,int centeridx,vm_prot_t access_type,int advice,int flags)991 uao_get(struct uvm_object *uobj, voff_t offset, struct vm_page **pps,
992 int *npagesp, int centeridx, vm_prot_t access_type, int advice, int flags)
993 {
994 struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
995 voff_t current_offset;
996 vm_page_t ptmp;
997 int lcv, gotpages, maxpages, swslot, rv, pageidx;
998 boolean_t done;
999
1000 KASSERT(UVM_OBJ_IS_AOBJ(uobj));
1001 KASSERT(rw_lock_held(uobj->vmobjlock));
1002 KASSERT(rw_write_held(uobj->vmobjlock) ||
1003 ((flags & PGO_LOCKED) != 0 && (access_type & PROT_WRITE) == 0));
1004
1005 /*
1006 * get number of pages
1007 */
1008 maxpages = *npagesp;
1009
1010 if (flags & PGO_LOCKED) {
1011 /*
1012 * step 1a: get pages that are already resident. only do
1013 * this if the data structures are locked (i.e. the first
1014 * time through).
1015 */
1016 done = TRUE; /* be optimistic */
1017 gotpages = 0; /* # of pages we got so far */
1018
1019 for (lcv = 0, current_offset = offset ; lcv < maxpages ;
1020 lcv++, current_offset += PAGE_SIZE) {
1021 /* do we care about this page? if not, skip it */
1022 if (pps[lcv] == PGO_DONTCARE)
1023 continue;
1024
1025 /* lookup page */
1026 ptmp = uvm_pagelookup(uobj, current_offset);
1027
1028 /*
1029 * to be useful must get a non-busy page
1030 */
1031 if (ptmp == NULL || (ptmp->pg_flags & PG_BUSY) != 0) {
1032 if (lcv == centeridx ||
1033 (flags & PGO_ALLPAGES) != 0)
1034 /* need to do a wait or I/O! */
1035 done = FALSE;
1036 continue;
1037 }
1038
1039 /*
1040 * useful page: plug it in our result array
1041 */
1042 pps[lcv] = ptmp;
1043 gotpages++;
1044 }
1045
1046 /*
1047 * step 1b: now we've either done everything needed or we
1048 * to unlock and do some waiting or I/O.
1049 */
1050 *npagesp = gotpages;
1051 return done ? VM_PAGER_OK : VM_PAGER_UNLOCK;
1052 }
1053
1054 /*
1055 * step 2: get non-resident or busy pages.
1056 * data structures are unlocked.
1057 */
1058 for (lcv = 0, current_offset = offset ; lcv < maxpages ;
1059 lcv++, current_offset += PAGE_SIZE) {
1060 /*
1061 * - skip over pages we've already gotten or don't want
1062 * - skip over pages we don't _have_ to get
1063 */
1064 if (pps[lcv] != NULL ||
1065 (lcv != centeridx && (flags & PGO_ALLPAGES) == 0))
1066 continue;
1067
1068 pageidx = current_offset >> PAGE_SHIFT;
1069
1070 /*
1071 * we have yet to locate the current page (pps[lcv]). we
1072 * first look for a page that is already at the current offset.
1073 * if we find a page, we check to see if it is busy or
1074 * released. if that is the case, then we sleep on the page
1075 * until it is no longer busy or released and repeat the lookup.
1076 * if the page we found is neither busy nor released, then we
1077 * busy it (so we own it) and plug it into pps[lcv]. this
1078 * 'break's the following while loop and indicates we are
1079 * ready to move on to the next page in the "lcv" loop above.
1080 *
1081 * if we exit the while loop with pps[lcv] still set to NULL,
1082 * then it means that we allocated a new busy/fake/clean page
1083 * ptmp in the object and we need to do I/O to fill in the data.
1084 */
1085
1086 /* top of "pps" while loop */
1087 while (pps[lcv] == NULL) {
1088 /* look for a resident page */
1089 ptmp = uvm_pagelookup(uobj, current_offset);
1090
1091 /* not resident? allocate one now (if we can) */
1092 if (ptmp == NULL) {
1093
1094 ptmp = uvm_pagealloc(uobj, current_offset,
1095 NULL, 0);
1096
1097 /* out of RAM? */
1098 if (ptmp == NULL) {
1099 rw_exit(uobj->vmobjlock);
1100 uvm_wait("uao_getpage");
1101 rw_enter(uobj->vmobjlock, RW_WRITE);
1102 /* goto top of pps while loop */
1103 continue;
1104 }
1105
1106 /*
1107 * safe with PQ's unlocked: because we just
1108 * alloc'd the page
1109 */
1110 atomic_setbits_int(&ptmp->pg_flags, PQ_AOBJ);
1111
1112 /*
1113 * got new page ready for I/O. break pps while
1114 * loop. pps[lcv] is still NULL.
1115 */
1116 break;
1117 }
1118
1119 /* page is there, see if we need to wait on it */
1120 if ((ptmp->pg_flags & PG_BUSY) != 0) {
1121 uvm_pagewait(ptmp, uobj->vmobjlock, "uao_get");
1122 rw_enter(uobj->vmobjlock, RW_WRITE);
1123 continue; /* goto top of pps while loop */
1124 }
1125
1126 /*
1127 * if we get here then the page is resident and
1128 * unbusy. we busy it now (so we own it).
1129 */
1130 /* we own it, caller must un-busy */
1131 atomic_setbits_int(&ptmp->pg_flags, PG_BUSY);
1132 UVM_PAGE_OWN(ptmp, "uao_get2");
1133 pps[lcv] = ptmp;
1134 }
1135
1136 /*
1137 * if we own the valid page at the correct offset, pps[lcv] will
1138 * point to it. nothing more to do except go to the next page.
1139 */
1140 if (pps[lcv])
1141 continue; /* next lcv */
1142
1143 /*
1144 * we have a "fake/busy/clean" page that we just allocated.
1145 * do the needed "i/o", either reading from swap or zeroing.
1146 */
1147 swslot = uao_find_swslot(uobj, pageidx);
1148
1149 /* just zero the page if there's nothing in swap. */
1150 if (swslot == 0) {
1151 /* page hasn't existed before, just zero it. */
1152 uvm_pagezero(ptmp);
1153 } else {
1154 /*
1155 * page in the swapped-out page.
1156 * unlock object for i/o, relock when done.
1157 */
1158
1159 rw_exit(uobj->vmobjlock);
1160 rv = uvm_swap_get(ptmp, swslot, PGO_SYNCIO);
1161 rw_enter(uobj->vmobjlock, RW_WRITE);
1162
1163 /*
1164 * I/O done. check for errors.
1165 */
1166 if (rv != VM_PAGER_OK) {
1167 /*
1168 * remove the swap slot from the aobj
1169 * and mark the aobj as having no real slot.
1170 * don't free the swap slot, thus preventing
1171 * it from being used again.
1172 */
1173 swslot = uao_set_swslot(&aobj->u_obj, pageidx,
1174 SWSLOT_BAD);
1175 uvm_swap_markbad(swslot, 1);
1176
1177 if (ptmp->pg_flags & PG_WANTED)
1178 wakeup(ptmp);
1179 atomic_clearbits_int(&ptmp->pg_flags,
1180 PG_WANTED|PG_BUSY);
1181 UVM_PAGE_OWN(ptmp, NULL);
1182 uvm_lock_pageq();
1183 uvm_pagefree(ptmp);
1184 uvm_unlock_pageq();
1185 rw_exit(uobj->vmobjlock);
1186
1187 return rv;
1188 }
1189 }
1190
1191 /*
1192 * we got the page! clear the fake flag (indicates valid
1193 * data now in page) and plug into our result array. note
1194 * that page is still busy.
1195 *
1196 * it is the callers job to:
1197 * => check if the page is released
1198 * => unbusy the page
1199 * => activate the page
1200 */
1201 atomic_clearbits_int(&ptmp->pg_flags, PG_FAKE);
1202 pmap_clear_modify(ptmp); /* ... and clean */
1203 pps[lcv] = ptmp;
1204
1205 } /* lcv loop */
1206
1207 rw_exit(uobj->vmobjlock);
1208 return VM_PAGER_OK;
1209 }
1210
1211 /*
1212 * uao_dropswap: release any swap resources from this aobj page.
1213 *
1214 * => aobj must be locked or have a reference count of 0.
1215 */
1216 int
uao_dropswap(struct uvm_object * uobj,int pageidx)1217 uao_dropswap(struct uvm_object *uobj, int pageidx)
1218 {
1219 int slot;
1220
1221 KASSERT(UVM_OBJ_IS_AOBJ(uobj));
1222
1223 slot = uao_set_swslot(uobj, pageidx, 0);
1224 if (slot) {
1225 uvm_swap_free(slot, 1);
1226 }
1227 return slot;
1228 }
1229
1230 /*
1231 * page in every page in every aobj that is paged-out to a range of swslots.
1232 *
1233 * => aobj must be locked and is returned locked.
1234 * => returns TRUE if pagein was aborted due to lack of memory.
1235 */
1236 boolean_t
uao_swap_off(int startslot,int endslot)1237 uao_swap_off(int startslot, int endslot)
1238 {
1239 struct uvm_aobj *aobj;
1240
1241 /*
1242 * Walk the list of all anonymous UVM objects. Grab the first.
1243 */
1244 mtx_enter(&uao_list_lock);
1245 if ((aobj = LIST_FIRST(&uao_list)) == NULL) {
1246 mtx_leave(&uao_list_lock);
1247 return FALSE;
1248 }
1249 uao_reference(&aobj->u_obj);
1250
1251 do {
1252 struct uvm_aobj *nextaobj;
1253 boolean_t rv;
1254
1255 /*
1256 * Prefetch the next object and immediately hold a reference
1257 * on it, so neither the current nor the next entry could
1258 * disappear while we are iterating.
1259 */
1260 if ((nextaobj = LIST_NEXT(aobj, u_list)) != NULL) {
1261 uao_reference(&nextaobj->u_obj);
1262 }
1263 mtx_leave(&uao_list_lock);
1264
1265 /*
1266 * Page in all pages in the swap slot range.
1267 */
1268 rw_enter(aobj->u_obj.vmobjlock, RW_WRITE);
1269 rv = uao_pagein(aobj, startslot, endslot);
1270 rw_exit(aobj->u_obj.vmobjlock);
1271
1272 /* Drop the reference of the current object. */
1273 uao_detach(&aobj->u_obj);
1274 if (rv) {
1275 if (nextaobj) {
1276 uao_detach(&nextaobj->u_obj);
1277 }
1278 return rv;
1279 }
1280
1281 aobj = nextaobj;
1282 mtx_enter(&uao_list_lock);
1283 } while (aobj);
1284
1285 /*
1286 * done with traversal, unlock the list
1287 */
1288 mtx_leave(&uao_list_lock);
1289 return FALSE;
1290 }
1291
1292 /*
1293 * page in any pages from aobj in the given range.
1294 *
1295 * => returns TRUE if pagein was aborted due to lack of memory.
1296 */
1297 static boolean_t
uao_pagein(struct uvm_aobj * aobj,int startslot,int endslot)1298 uao_pagein(struct uvm_aobj *aobj, int startslot, int endslot)
1299 {
1300 boolean_t rv;
1301
1302 if (UAO_USES_SWHASH(aobj)) {
1303 struct uao_swhash_elt *elt;
1304 int bucket;
1305
1306 restart:
1307 for (bucket = aobj->u_swhashmask; bucket >= 0; bucket--) {
1308 for (elt = LIST_FIRST(&aobj->u_swhash[bucket]);
1309 elt != NULL;
1310 elt = LIST_NEXT(elt, list)) {
1311 int i;
1312
1313 for (i = 0; i < UAO_SWHASH_CLUSTER_SIZE; i++) {
1314 int slot = elt->slots[i];
1315
1316 /*
1317 * if the slot isn't in range, skip it.
1318 */
1319 if (slot < startslot ||
1320 slot >= endslot) {
1321 continue;
1322 }
1323
1324 /*
1325 * process the page,
1326 * the start over on this object
1327 * since the swhash elt
1328 * may have been freed.
1329 */
1330 rv = uao_pagein_page(aobj,
1331 UAO_SWHASH_ELT_PAGEIDX_BASE(elt) + i);
1332 if (rv) {
1333 return rv;
1334 }
1335 goto restart;
1336 }
1337 }
1338 }
1339 } else {
1340 int i;
1341
1342 for (i = 0; i < aobj->u_pages; i++) {
1343 int slot = aobj->u_swslots[i];
1344
1345 /*
1346 * if the slot isn't in range, skip it
1347 */
1348 if (slot < startslot || slot >= endslot) {
1349 continue;
1350 }
1351
1352 /*
1353 * process the page.
1354 */
1355 rv = uao_pagein_page(aobj, i);
1356 if (rv) {
1357 return rv;
1358 }
1359 }
1360 }
1361
1362 return FALSE;
1363 }
1364
1365 /*
1366 * uao_pagein_page: page in a single page from an anonymous UVM object.
1367 *
1368 * => Returns TRUE if pagein was aborted due to lack of memory.
1369 */
1370 static boolean_t
uao_pagein_page(struct uvm_aobj * aobj,int pageidx)1371 uao_pagein_page(struct uvm_aobj *aobj, int pageidx)
1372 {
1373 struct uvm_object *uobj = &aobj->u_obj;
1374 struct vm_page *pg;
1375 int rv, npages;
1376
1377 pg = NULL;
1378 npages = 1;
1379
1380 KASSERT(rw_write_held(uobj->vmobjlock));
1381 rv = uao_get(&aobj->u_obj, (voff_t)pageidx << PAGE_SHIFT,
1382 &pg, &npages, 0, PROT_READ | PROT_WRITE, 0, 0);
1383
1384 /*
1385 * relock and finish up.
1386 */
1387 rw_enter(uobj->vmobjlock, RW_WRITE);
1388 switch (rv) {
1389 case VM_PAGER_OK:
1390 break;
1391
1392 case VM_PAGER_ERROR:
1393 case VM_PAGER_REFAULT:
1394 /*
1395 * nothing more to do on errors.
1396 * VM_PAGER_REFAULT can only mean that the anon was freed,
1397 * so again there's nothing to do.
1398 */
1399 return FALSE;
1400 }
1401
1402 /*
1403 * ok, we've got the page now.
1404 * mark it as dirty, clear its swslot and un-busy it.
1405 */
1406 uao_dropswap(&aobj->u_obj, pageidx);
1407 atomic_clearbits_int(&pg->pg_flags, PG_BUSY|PG_CLEAN|PG_FAKE);
1408 UVM_PAGE_OWN(pg, NULL);
1409
1410 /*
1411 * deactivate the page (to put it on a page queue).
1412 */
1413 uvm_lock_pageq();
1414 uvm_pagedeactivate(pg);
1415 uvm_unlock_pageq();
1416
1417 return FALSE;
1418 }
1419
1420 /*
1421 * uao_dropswap_range: drop swapslots in the range.
1422 *
1423 * => aobj must be locked and is returned locked.
1424 * => start is inclusive. end is exclusive.
1425 */
1426 void
uao_dropswap_range(struct uvm_object * uobj,voff_t start,voff_t end)1427 uao_dropswap_range(struct uvm_object *uobj, voff_t start, voff_t end)
1428 {
1429 struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
1430 int swpgonlydelta = 0;
1431
1432 KASSERT(UVM_OBJ_IS_AOBJ(uobj));
1433 KASSERT(rw_write_held(uobj->vmobjlock));
1434
1435 if (end == 0) {
1436 end = INT64_MAX;
1437 }
1438
1439 if (UAO_USES_SWHASH(aobj)) {
1440 int i, hashbuckets = aobj->u_swhashmask + 1;
1441 voff_t taghi;
1442 voff_t taglo;
1443
1444 taglo = UAO_SWHASH_ELT_TAG(start);
1445 taghi = UAO_SWHASH_ELT_TAG(end);
1446
1447 for (i = 0; i < hashbuckets; i++) {
1448 struct uao_swhash_elt *elt, *next;
1449
1450 for (elt = LIST_FIRST(&aobj->u_swhash[i]);
1451 elt != NULL;
1452 elt = next) {
1453 int startidx, endidx;
1454 int j;
1455
1456 next = LIST_NEXT(elt, list);
1457
1458 if (elt->tag < taglo || taghi < elt->tag) {
1459 continue;
1460 }
1461
1462 if (elt->tag == taglo) {
1463 startidx =
1464 UAO_SWHASH_ELT_PAGESLOT_IDX(start);
1465 } else {
1466 startidx = 0;
1467 }
1468
1469 if (elt->tag == taghi) {
1470 endidx =
1471 UAO_SWHASH_ELT_PAGESLOT_IDX(end);
1472 } else {
1473 endidx = UAO_SWHASH_CLUSTER_SIZE;
1474 }
1475
1476 for (j = startidx; j < endidx; j++) {
1477 int slot = elt->slots[j];
1478
1479 KASSERT(uvm_pagelookup(&aobj->u_obj,
1480 (voff_t)(UAO_SWHASH_ELT_PAGEIDX_BASE(elt)
1481 + j) << PAGE_SHIFT) == NULL);
1482
1483 if (slot > 0) {
1484 uvm_swap_free(slot, 1);
1485 swpgonlydelta++;
1486 KASSERT(elt->count > 0);
1487 elt->slots[j] = 0;
1488 elt->count--;
1489 }
1490 }
1491
1492 if (elt->count == 0) {
1493 LIST_REMOVE(elt, list);
1494 pool_put(&uao_swhash_elt_pool, elt);
1495 }
1496 }
1497 }
1498 } else {
1499 int i;
1500
1501 if (aobj->u_pages < end) {
1502 end = aobj->u_pages;
1503 }
1504 for (i = start; i < end; i++) {
1505 int slot = aobj->u_swslots[i];
1506
1507 if (slot > 0) {
1508 uvm_swap_free(slot, 1);
1509 swpgonlydelta++;
1510 }
1511 }
1512 }
1513
1514 /*
1515 * adjust the counter of pages only in swap for all
1516 * the swap slots we've freed.
1517 */
1518 if (swpgonlydelta > 0) {
1519 KASSERT(uvmexp.swpgonly >= swpgonlydelta);
1520 atomic_add_int(&uvmexp.swpgonly, -swpgonlydelta);
1521 }
1522 }
1523