1 /* $NetBSD: vm.c,v 1.196 2023/04/22 13:53:53 riastradh Exp $ */
2
3 /*
4 * Copyright (c) 2007-2011 Antti Kantee. All Rights Reserved.
5 *
6 * Development of this software was supported by
7 * The Finnish Cultural Foundation and the Research Foundation of
8 * The Helsinki University of Technology.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
20 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 /*
33 * Virtual memory emulation routines.
34 */
35
36 /*
37 * XXX: we abuse pg->uanon for the virtual address of the storage
38 * for each page. phys_addr would fit the job description better,
39 * except that it will create unnecessary lossage on some platforms
40 * due to not being a pointer type.
41 */
42
43 #include <sys/cdefs.h>
44 __KERNEL_RCSID(0, "$NetBSD: vm.c,v 1.196 2023/04/22 13:53:53 riastradh Exp $");
45
46 #include <sys/param.h>
47 #include <sys/atomic.h>
48 #include <sys/buf.h>
49 #include <sys/kernel.h>
50 #include <sys/kmem.h>
51 #include <sys/vmem.h>
52 #include <sys/mman.h>
53 #include <sys/null.h>
54 #include <sys/vnode.h>
55 #include <sys/radixtree.h>
56 #include <sys/module.h>
57
58 #include <machine/pmap.h>
59
60 #if defined(__i386__) || defined(__x86_64__)
61 /*
62 * This file abuses the pmap abstraction to create its own statically
63 * allocated struct pmap object, even though it can't do anything
64 * useful with such a thing from userland. On x86 the struct pmap
65 * definition is private, so we have to go to extra effort to abuse it
66 * there. This should be fixed -- all of the struct pmap definitions
67 * should be private, and then rump can furnish its own fake struct
68 * pmap without clashing with anything.
69 */
70 #include <machine/pmap_private.h>
71 #endif
72
73 #include <uvm/uvm.h>
74 #include <uvm/uvm_ddb.h>
75 #include <uvm/uvm_pdpolicy.h>
76 #include <uvm/uvm_prot.h>
77 #include <uvm/uvm_readahead.h>
78 #include <uvm/uvm_device.h>
79
80 #include <rump-sys/kern.h>
81 #include <rump-sys/vfs.h>
82
83 #include <rump/rumpuser.h>
84
85 kmutex_t vmpage_lruqueue_lock; /* non-free page lock */
86 kmutex_t uvm_swap_data_lock;
87
88 struct uvmexp uvmexp;
89 struct uvm uvm;
90
91 #ifdef __uvmexp_pagesize
92 const int * const uvmexp_pagesize = &uvmexp.pagesize;
93 const int * const uvmexp_pagemask = &uvmexp.pagemask;
94 const int * const uvmexp_pageshift = &uvmexp.pageshift;
95 #endif
96
97 static struct vm_map kernel_map_store;
98 struct vm_map *kernel_map = &kernel_map_store;
99
100 static struct vm_map module_map_store;
101
102 static struct pmap pmap_kernel;
103 struct pmap rump_pmap_local;
104 struct pmap *const kernel_pmap_ptr = &pmap_kernel;
105
106 vmem_t *kmem_arena;
107 vmem_t *kmem_va_arena;
108
109 static unsigned int pdaemon_waiters;
110 static kmutex_t pdaemonmtx;
111 static kcondvar_t pdaemoncv, oomwait;
112
113 /* all local non-proc0 processes share this vmspace */
114 struct vmspace *rump_vmspace_local;
115
116 unsigned long rump_physmemlimit = RUMPMEM_UNLIMITED;
117 static unsigned long pdlimit = RUMPMEM_UNLIMITED; /* page daemon memlimit */
118 static unsigned long curphysmem;
119 static unsigned long dddlim; /* 90% of memory limit used */
120 #define NEED_PAGEDAEMON() \
121 (rump_physmemlimit != RUMPMEM_UNLIMITED && curphysmem > dddlim)
122 #define PDRESERVE (2*MAXPHYS)
123
124 /*
125 * Try to free two pages worth of pages from objects.
126 * If this successfully frees a full page cache page, we'll
127 * free the released page plus PAGE_SIZE/sizeof(vm_page).
128 */
129 #define PAGEDAEMON_OBJCHUNK (2*PAGE_SIZE / sizeof(struct vm_page))
130
131 /*
132 * Keep a list of least recently used pages. Since the only way a
133 * rump kernel can "access" a page is via lookup, we put the page
134 * at the back of queue every time a lookup for it is done. If the
135 * page is in front of this global queue and we're short of memory,
136 * it's a candidate for pageout.
137 */
138 static struct pglist vmpage_lruqueue;
139 static unsigned vmpage_onqueue;
140
141 /*
142 * vm pages
143 */
144
145 static int
pgctor(void * arg,void * obj,int flags)146 pgctor(void *arg, void *obj, int flags)
147 {
148 struct vm_page *pg = obj;
149
150 memset(pg, 0, sizeof(*pg));
151 pg->uanon = rump_hypermalloc(PAGE_SIZE, PAGE_SIZE,
152 (flags & PR_WAITOK) == PR_WAITOK, "pgalloc");
153 return pg->uanon == NULL;
154 }
155
156 static void
pgdtor(void * arg,void * obj)157 pgdtor(void *arg, void *obj)
158 {
159 struct vm_page *pg = obj;
160
161 rump_hyperfree(pg->uanon, PAGE_SIZE);
162 }
163
164 static struct pool_cache pagecache;
165
166 /* stub for UVM_OBJ_IS_VNODE */
167 struct uvm_pagerops rump_uvm_vnodeops;
168 __weak_alias(uvm_vnodeops,rump_uvm_vnodeops);
169
170 /*
171 * Called with the object locked. We don't support anons.
172 */
173 struct vm_page *
uvm_pagealloc_strat(struct uvm_object * uobj,voff_t off,struct vm_anon * anon,int flags,int strat,int free_list)174 uvm_pagealloc_strat(struct uvm_object *uobj, voff_t off, struct vm_anon *anon,
175 int flags, int strat, int free_list)
176 {
177 struct vm_page *pg;
178
179 KASSERT(uobj && rw_write_held(uobj->vmobjlock));
180 KASSERT(anon == NULL);
181
182 pg = pool_cache_get(&pagecache, PR_NOWAIT);
183 if (__predict_false(pg == NULL)) {
184 return NULL;
185 }
186 mutex_init(&pg->interlock, MUTEX_DEFAULT, IPL_NONE);
187
188 pg->offset = off;
189 pg->uobject = uobj;
190
191 if (radix_tree_insert_node(&uobj->uo_pages, off >> PAGE_SHIFT,
192 pg) != 0) {
193 pool_cache_put(&pagecache, pg);
194 return NULL;
195 }
196
197 if (UVM_OBJ_IS_VNODE(uobj)) {
198 if (uobj->uo_npages == 0) {
199 struct vnode *vp = (struct vnode *)uobj;
200 mutex_enter(vp->v_interlock);
201 vp->v_iflag |= VI_PAGES;
202 mutex_exit(vp->v_interlock);
203 }
204 pg->flags |= PG_FILE;
205 }
206 uobj->uo_npages++;
207
208 pg->flags = PG_CLEAN|PG_BUSY|PG_FAKE;
209 if (flags & UVM_PGA_ZERO) {
210 uvm_pagezero(pg);
211 }
212
213 /*
214 * Don't put anons on the LRU page queue. We can't flush them
215 * (there's no concept of swap in a rump kernel), so no reason
216 * to bother with them.
217 */
218 if (!UVM_OBJ_IS_AOBJ(uobj)) {
219 atomic_inc_uint(&vmpage_onqueue);
220 mutex_enter(&vmpage_lruqueue_lock);
221 TAILQ_INSERT_TAIL(&vmpage_lruqueue, pg, pageq.queue);
222 mutex_exit(&vmpage_lruqueue_lock);
223 } else {
224 pg->flags |= PG_AOBJ;
225 }
226
227 return pg;
228 }
229
230 /*
231 * Release a page.
232 *
233 * Called with the vm object locked.
234 */
235 void
uvm_pagefree(struct vm_page * pg)236 uvm_pagefree(struct vm_page *pg)
237 {
238 struct uvm_object *uobj = pg->uobject;
239 struct vm_page *pg2 __unused;
240
241 KASSERT(rw_write_held(uobj->vmobjlock));
242
243 mutex_enter(&pg->interlock);
244 uvm_pagewakeup(pg);
245 mutex_exit(&pg->interlock);
246
247 uobj->uo_npages--;
248 pg2 = radix_tree_remove_node(&uobj->uo_pages, pg->offset >> PAGE_SHIFT);
249 KASSERT(pg == pg2);
250
251 if (!UVM_OBJ_IS_AOBJ(uobj)) {
252 mutex_enter(&vmpage_lruqueue_lock);
253 TAILQ_REMOVE(&vmpage_lruqueue, pg, pageq.queue);
254 mutex_exit(&vmpage_lruqueue_lock);
255 atomic_dec_uint(&vmpage_onqueue);
256 }
257
258 if (UVM_OBJ_IS_VNODE(uobj) && uobj->uo_npages == 0) {
259 struct vnode *vp = (struct vnode *)uobj;
260 mutex_enter(vp->v_interlock);
261 vp->v_iflag &= ~VI_PAGES;
262 mutex_exit(vp->v_interlock);
263 }
264
265 mutex_destroy(&pg->interlock);
266 pool_cache_put(&pagecache, pg);
267 }
268
269 void
uvm_pagezero(struct vm_page * pg)270 uvm_pagezero(struct vm_page *pg)
271 {
272
273 uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY);
274 memset((void *)pg->uanon, 0, PAGE_SIZE);
275 }
276
277 /*
278 * uvm_page_owner_locked_p: return true if object associated with page is
279 * locked. this is a weak check for runtime assertions only.
280 */
281
282 bool
uvm_page_owner_locked_p(struct vm_page * pg,bool exclusive)283 uvm_page_owner_locked_p(struct vm_page *pg, bool exclusive)
284 {
285
286 if (exclusive)
287 return rw_write_held(pg->uobject->vmobjlock);
288 else
289 return rw_lock_held(pg->uobject->vmobjlock);
290 }
291
292 /*
293 * Misc routines
294 */
295
296 static kmutex_t pagermtx;
297
298 void
uvm_init(void)299 uvm_init(void)
300 {
301 char buf[64];
302
303 if (rumpuser_getparam("RUMP_MEMLIMIT", buf, sizeof(buf)) == 0) {
304 unsigned long tmp;
305 char *ep;
306 int mult;
307
308 tmp = strtoul(buf, &ep, 10);
309 if (strlen(ep) > 1)
310 panic("uvm_init: invalid RUMP_MEMLIMIT: %s", buf);
311
312 /* mini-dehumanize-number */
313 mult = 1;
314 switch (*ep) {
315 case 'k':
316 mult = 1024;
317 break;
318 case 'm':
319 mult = 1024*1024;
320 break;
321 case 'g':
322 mult = 1024*1024*1024;
323 break;
324 case 0:
325 break;
326 default:
327 panic("uvm_init: invalid RUMP_MEMLIMIT: %s", buf);
328 }
329 rump_physmemlimit = tmp * mult;
330
331 if (rump_physmemlimit / mult != tmp)
332 panic("uvm_init: RUMP_MEMLIMIT overflow: %s", buf);
333
334 /* reserve some memory for the pager */
335 if (rump_physmemlimit <= PDRESERVE)
336 panic("uvm_init: system reserves %d bytes of mem, "
337 "only %lu bytes given",
338 PDRESERVE, rump_physmemlimit);
339 pdlimit = rump_physmemlimit;
340 rump_physmemlimit -= PDRESERVE;
341
342 if (pdlimit < 1024*1024)
343 printf("uvm_init: WARNING: <1MB RAM limit, "
344 "hope you know what you're doing\n");
345
346 #define HUMANIZE_BYTES 9
347 CTASSERT(sizeof(buf) >= HUMANIZE_BYTES);
348 format_bytes(buf, HUMANIZE_BYTES, rump_physmemlimit);
349 #undef HUMANIZE_BYTES
350 dddlim = 9 * (rump_physmemlimit / 10);
351 } else {
352 strlcpy(buf, "unlimited (host limit)", sizeof(buf));
353 }
354 aprint_verbose("total memory = %s\n", buf);
355
356 TAILQ_INIT(&vmpage_lruqueue);
357
358 if (rump_physmemlimit == RUMPMEM_UNLIMITED) {
359 uvmexp.npages = physmem;
360 } else {
361 uvmexp.npages = pdlimit >> PAGE_SHIFT;
362 uvmexp.reserve_pagedaemon = PDRESERVE >> PAGE_SHIFT;
363 uvmexp.freetarg = (rump_physmemlimit-dddlim) >> PAGE_SHIFT;
364 }
365 /*
366 * uvmexp.free is not used internally or updated. The reason is
367 * that the memory hypercall allocator is allowed to allocate
368 * non-page sized chunks. We use a byte count in curphysmem
369 * instead.
370 */
371 uvmexp.free = uvmexp.npages;
372
373 #ifndef __uvmexp_pagesize
374 uvmexp.pagesize = PAGE_SIZE;
375 uvmexp.pagemask = PAGE_MASK;
376 uvmexp.pageshift = PAGE_SHIFT;
377 #else
378 #define FAKE_PAGE_SHIFT 12
379 uvmexp.pageshift = FAKE_PAGE_SHIFT;
380 uvmexp.pagesize = 1<<FAKE_PAGE_SHIFT;
381 uvmexp.pagemask = (1<<FAKE_PAGE_SHIFT)-1;
382 #undef FAKE_PAGE_SHIFT
383 #endif
384
385 mutex_init(&pagermtx, MUTEX_DEFAULT, IPL_NONE);
386 mutex_init(&vmpage_lruqueue_lock, MUTEX_DEFAULT, IPL_NONE);
387 mutex_init(&uvm_swap_data_lock, MUTEX_DEFAULT, IPL_NONE);
388 mutex_init(&pdaemonmtx, MUTEX_DEFAULT, IPL_NONE);
389
390 cv_init(&pdaemoncv, "pdaemon");
391 cv_init(&oomwait, "oomwait");
392
393 module_map = &module_map_store;
394
395 kernel_map->pmap = pmap_kernel();
396
397 pool_subsystem_init();
398
399 kmem_arena = vmem_create("kmem", 0, 1024*1024, PAGE_SIZE,
400 NULL, NULL, NULL,
401 0, VM_NOSLEEP | VM_BOOTSTRAP, IPL_VM);
402
403 vmem_subsystem_init(kmem_arena);
404
405 kmem_va_arena = vmem_create("kva", 0, 0, PAGE_SIZE,
406 vmem_alloc, vmem_free, kmem_arena,
407 8 * PAGE_SIZE, VM_NOSLEEP | VM_BOOTSTRAP, IPL_VM);
408
409 pool_cache_bootstrap(&pagecache, sizeof(struct vm_page), 0, 0, 0,
410 "page$", NULL, IPL_NONE, pgctor, pgdtor, NULL);
411
412 radix_tree_init();
413
414 /* create vmspace used by local clients */
415 rump_vmspace_local = kmem_zalloc(sizeof(*rump_vmspace_local), KM_SLEEP);
416 uvmspace_init(rump_vmspace_local, &rump_pmap_local, 0, 0, false);
417 }
418
419 void
uvmspace_init(struct vmspace * vm,struct pmap * pmap,vaddr_t vmin,vaddr_t vmax,bool topdown)420 uvmspace_init(struct vmspace *vm, struct pmap *pmap, vaddr_t vmin, vaddr_t vmax,
421 bool topdown)
422 {
423
424 vm->vm_map.pmap = pmap;
425 vm->vm_refcnt = 1;
426 }
427
428 int
uvm_map_pageable(struct vm_map * map,vaddr_t start,vaddr_t end,bool new_pageable,int lockflags)429 uvm_map_pageable(struct vm_map *map, vaddr_t start, vaddr_t end,
430 bool new_pageable, int lockflags)
431 {
432 return 0;
433 }
434
435 void
uvm_pagewire(struct vm_page * pg)436 uvm_pagewire(struct vm_page *pg)
437 {
438
439 /* nada */
440 }
441
442 void
uvm_pageunwire(struct vm_page * pg)443 uvm_pageunwire(struct vm_page *pg)
444 {
445
446 /* nada */
447 }
448
449 int
uvm_availmem(bool cached)450 uvm_availmem(bool cached)
451 {
452
453 return uvmexp.free;
454 }
455
456 void
uvm_pagelock(struct vm_page * pg)457 uvm_pagelock(struct vm_page *pg)
458 {
459
460 mutex_enter(&pg->interlock);
461 }
462
463 void
uvm_pagelock2(struct vm_page * pg1,struct vm_page * pg2)464 uvm_pagelock2(struct vm_page *pg1, struct vm_page *pg2)
465 {
466
467 if (pg1 < pg2) {
468 mutex_enter(&pg1->interlock);
469 mutex_enter(&pg2->interlock);
470 } else {
471 mutex_enter(&pg2->interlock);
472 mutex_enter(&pg1->interlock);
473 }
474 }
475
476 void
uvm_pageunlock(struct vm_page * pg)477 uvm_pageunlock(struct vm_page *pg)
478 {
479
480 mutex_exit(&pg->interlock);
481 }
482
483 void
uvm_pageunlock2(struct vm_page * pg1,struct vm_page * pg2)484 uvm_pageunlock2(struct vm_page *pg1, struct vm_page *pg2)
485 {
486
487 mutex_exit(&pg1->interlock);
488 mutex_exit(&pg2->interlock);
489 }
490
491 /* where's your schmonz now? */
492 #define PUNLIMIT(a) \
493 p->p_rlimit[a].rlim_cur = p->p_rlimit[a].rlim_max = RLIM_INFINITY;
494 void
uvm_init_limits(struct proc * p)495 uvm_init_limits(struct proc *p)
496 {
497
498 #ifndef DFLSSIZ
499 #define DFLSSIZ (16*1024*1024)
500 #endif
501 p->p_rlimit[RLIMIT_STACK].rlim_cur = DFLSSIZ;
502 p->p_rlimit[RLIMIT_STACK].rlim_max = MAXSSIZ;
503 PUNLIMIT(RLIMIT_DATA);
504 PUNLIMIT(RLIMIT_RSS);
505 PUNLIMIT(RLIMIT_AS);
506 /* nice, cascade */
507 }
508 #undef PUNLIMIT
509
510 /*
511 * This satisfies the "disgusting mmap hack" used by proplib.
512 */
513 int
uvm_mmap_anon(struct proc * p,void ** addrp,size_t size)514 uvm_mmap_anon(struct proc *p, void **addrp, size_t size)
515 {
516 int error;
517
518 /* no reason in particular, but cf. uvm_default_mapaddr() */
519 if (*addrp != NULL)
520 panic("uvm_mmap() variant unsupported");
521
522 if (RUMP_LOCALPROC_P(curproc)) {
523 error = rumpuser_anonmmap(NULL, size, 0, 0, addrp);
524 } else {
525 error = rump_sysproxy_anonmmap(RUMP_SPVM2CTL(p->p_vmspace),
526 size, addrp);
527 }
528 return error;
529 }
530
531 /*
532 * Stubs for things referenced from vfs_vnode.c but not used.
533 */
534 const dev_t zerodev;
535
536 struct uvm_object *
udv_attach(dev_t device,vm_prot_t accessprot,voff_t off,vsize_t size)537 udv_attach(dev_t device, vm_prot_t accessprot, voff_t off, vsize_t size)
538 {
539 return NULL;
540 }
541
542 struct pagerinfo {
543 vaddr_t pgr_kva;
544 int pgr_npages;
545 struct vm_page **pgr_pgs;
546 bool pgr_read;
547
548 LIST_ENTRY(pagerinfo) pgr_entries;
549 };
550 static LIST_HEAD(, pagerinfo) pagerlist = LIST_HEAD_INITIALIZER(pagerlist);
551
552 /*
553 * Pager "map" in routine. Instead of mapping, we allocate memory
554 * and copy page contents there. The reason for copying instead of
555 * mapping is simple: we do not assume we are running on virtual
556 * memory. Even if we could emulate virtual memory in some envs
557 * such as userspace, copying is much faster than trying to awkardly
558 * cope with remapping (see "Design and Implementation" pp.95-98).
559 * The downside of the approach is that the pager requires MAXPHYS
560 * free memory to perform paging, but short of virtual memory or
561 * making the pager do I/O in page-sized chunks we cannot do much
562 * about that.
563 */
564 vaddr_t
uvm_pagermapin(struct vm_page ** pgs,int npages,int flags)565 uvm_pagermapin(struct vm_page **pgs, int npages, int flags)
566 {
567 struct pagerinfo *pgri;
568 vaddr_t curkva;
569 int i;
570
571 /* allocate structures */
572 pgri = kmem_alloc(sizeof(*pgri), KM_SLEEP);
573 pgri->pgr_kva = (vaddr_t)kmem_alloc(npages * PAGE_SIZE, KM_SLEEP);
574 pgri->pgr_npages = npages;
575 pgri->pgr_pgs = kmem_alloc(sizeof(struct vm_page *) * npages, KM_SLEEP);
576 pgri->pgr_read = (flags & UVMPAGER_MAPIN_READ) != 0;
577
578 /* copy contents to "mapped" memory */
579 for (i = 0, curkva = pgri->pgr_kva;
580 i < npages;
581 i++, curkva += PAGE_SIZE) {
582 /*
583 * We need to copy the previous contents of the pages to
584 * the window even if we are reading from the
585 * device, since the device might not fill the contents of
586 * the full mapped range and we will end up corrupting
587 * data when we unmap the window.
588 */
589 memcpy((void*)curkva, pgs[i]->uanon, PAGE_SIZE);
590 pgri->pgr_pgs[i] = pgs[i];
591 }
592
593 mutex_enter(&pagermtx);
594 LIST_INSERT_HEAD(&pagerlist, pgri, pgr_entries);
595 mutex_exit(&pagermtx);
596
597 return pgri->pgr_kva;
598 }
599
600 /*
601 * map out the pager window. return contents from VA to page storage
602 * and free structures.
603 *
604 * Note: does not currently support partial frees
605 */
606 void
uvm_pagermapout(vaddr_t kva,int npages)607 uvm_pagermapout(vaddr_t kva, int npages)
608 {
609 struct pagerinfo *pgri;
610 vaddr_t curkva;
611 int i;
612
613 mutex_enter(&pagermtx);
614 LIST_FOREACH(pgri, &pagerlist, pgr_entries) {
615 if (pgri->pgr_kva == kva)
616 break;
617 }
618 KASSERT(pgri);
619 if (pgri->pgr_npages != npages)
620 panic("uvm_pagermapout: partial unmapping not supported");
621 LIST_REMOVE(pgri, pgr_entries);
622 mutex_exit(&pagermtx);
623
624 if (pgri->pgr_read) {
625 for (i = 0, curkva = pgri->pgr_kva;
626 i < pgri->pgr_npages;
627 i++, curkva += PAGE_SIZE) {
628 memcpy(pgri->pgr_pgs[i]->uanon,(void*)curkva,PAGE_SIZE);
629 }
630 }
631
632 kmem_free(pgri->pgr_pgs, npages * sizeof(struct vm_page *));
633 kmem_free((void*)pgri->pgr_kva, npages * PAGE_SIZE);
634 kmem_free(pgri, sizeof(*pgri));
635 }
636
637 /*
638 * convert va in pager window to page structure.
639 * XXX: how expensive is this (global lock, list traversal)?
640 */
641 struct vm_page *
uvm_pageratop(vaddr_t va)642 uvm_pageratop(vaddr_t va)
643 {
644 struct pagerinfo *pgri;
645 struct vm_page *pg = NULL;
646 int i;
647
648 mutex_enter(&pagermtx);
649 LIST_FOREACH(pgri, &pagerlist, pgr_entries) {
650 if (pgri->pgr_kva <= va
651 && va < pgri->pgr_kva + pgri->pgr_npages*PAGE_SIZE)
652 break;
653 }
654 if (pgri) {
655 i = (va - pgri->pgr_kva) >> PAGE_SHIFT;
656 pg = pgri->pgr_pgs[i];
657 }
658 mutex_exit(&pagermtx);
659
660 return pg;
661 }
662
663 /*
664 * Called with the vm object locked.
665 *
666 * Put vnode object pages at the end of the access queue to indicate
667 * they have been recently accessed and should not be immediate
668 * candidates for pageout. Do not do this for lookups done by
669 * the pagedaemon to mimic pmap_kentered mappings which don't track
670 * access information.
671 */
672 struct vm_page *
uvm_pagelookup(struct uvm_object * uobj,voff_t off)673 uvm_pagelookup(struct uvm_object *uobj, voff_t off)
674 {
675 struct vm_page *pg;
676 bool ispagedaemon = curlwp == uvm.pagedaemon_lwp;
677
678 pg = radix_tree_lookup_node(&uobj->uo_pages, off >> PAGE_SHIFT);
679 if (pg && !UVM_OBJ_IS_AOBJ(pg->uobject) && !ispagedaemon) {
680 mutex_enter(&vmpage_lruqueue_lock);
681 TAILQ_REMOVE(&vmpage_lruqueue, pg, pageq.queue);
682 TAILQ_INSERT_TAIL(&vmpage_lruqueue, pg, pageq.queue);
683 mutex_exit(&vmpage_lruqueue_lock);
684 }
685
686 return pg;
687 }
688
689 void
uvm_page_unbusy(struct vm_page ** pgs,int npgs)690 uvm_page_unbusy(struct vm_page **pgs, int npgs)
691 {
692 struct vm_page *pg;
693 int i, pageout_done;
694
695 KASSERT(npgs > 0);
696
697 pageout_done = 0;
698 for (i = 0; i < npgs; i++) {
699 pg = pgs[i];
700 if (pg == NULL || pg == PGO_DONTCARE) {
701 continue;
702 }
703
704 #if 0
705 KASSERT(uvm_page_owner_locked_p(pg, true));
706 #else
707 /*
708 * uvm_page_owner_locked_p() is not available in rump,
709 * and rump doesn't support amaps anyway.
710 */
711 KASSERT(rw_write_held(pg->uobject->vmobjlock));
712 #endif
713 KASSERT(pg->flags & PG_BUSY);
714
715 if (pg->flags & PG_PAGEOUT) {
716 pg->flags &= ~PG_PAGEOUT;
717 pg->flags |= PG_RELEASED;
718 pageout_done++;
719 atomic_inc_uint(&uvmexp.pdfreed);
720 }
721 if (pg->flags & PG_RELEASED) {
722 KASSERT(pg->uobject != NULL ||
723 (pg->uanon != NULL && pg->uanon->an_ref > 0));
724 pg->flags &= ~PG_RELEASED;
725 uvm_pagefree(pg);
726 } else {
727 KASSERT((pg->flags & PG_FAKE) == 0);
728 pg->flags &= ~PG_BUSY;
729 uvm_pagelock(pg);
730 uvm_pagewakeup(pg);
731 uvm_pageunlock(pg);
732 UVM_PAGE_OWN(pg, NULL);
733 }
734 }
735 if (pageout_done != 0) {
736 uvm_pageout_done(pageout_done);
737 }
738 }
739
740 void
uvm_pagewait(struct vm_page * pg,krwlock_t * lock,const char * wmesg)741 uvm_pagewait(struct vm_page *pg, krwlock_t *lock, const char *wmesg)
742 {
743
744 KASSERT(rw_lock_held(lock));
745 KASSERT((pg->flags & PG_BUSY) != 0);
746
747 mutex_enter(&pg->interlock);
748 pg->pqflags |= PQ_WANTED;
749 rw_exit(lock);
750 UVM_UNLOCK_AND_WAIT(pg, &pg->interlock, false, wmesg, 0);
751 }
752
753 void
uvm_pagewakeup(struct vm_page * pg)754 uvm_pagewakeup(struct vm_page *pg)
755 {
756
757 KASSERT(mutex_owned(&pg->interlock));
758
759 if ((pg->pqflags & PQ_WANTED) != 0) {
760 pg->pqflags &= ~PQ_WANTED;
761 wakeup(pg);
762 }
763 }
764
765 void
uvm_estimatepageable(int * active,int * inactive)766 uvm_estimatepageable(int *active, int *inactive)
767 {
768
769 /* XXX: guessing game */
770 *active = 1024;
771 *inactive = 1024;
772 }
773
774 int
uvm_loan(struct vm_map * map,vaddr_t start,vsize_t len,void * v,int flags)775 uvm_loan(struct vm_map *map, vaddr_t start, vsize_t len, void *v, int flags)
776 {
777
778 panic("%s: unimplemented", __func__);
779 }
780
781 void
uvm_unloan(void * v,int npages,int flags)782 uvm_unloan(void *v, int npages, int flags)
783 {
784
785 panic("%s: unimplemented", __func__);
786 }
787
788 int
uvm_loanuobjpages(struct uvm_object * uobj,voff_t pgoff,int orignpages,struct vm_page ** opp)789 uvm_loanuobjpages(struct uvm_object *uobj, voff_t pgoff, int orignpages,
790 struct vm_page **opp)
791 {
792
793 return EBUSY;
794 }
795
796 struct vm_page *
uvm_loanbreak(struct vm_page * pg)797 uvm_loanbreak(struct vm_page *pg)
798 {
799
800 panic("%s: unimplemented", __func__);
801 }
802
803 void
ubc_purge(struct uvm_object * uobj)804 ubc_purge(struct uvm_object *uobj)
805 {
806
807 }
808
809 vaddr_t
uvm_default_mapaddr(struct proc * p,vaddr_t base,vsize_t sz,int topdown)810 uvm_default_mapaddr(struct proc *p, vaddr_t base, vsize_t sz, int topdown)
811 {
812
813 return 0;
814 }
815
816 int
uvm_map_protect(struct vm_map * map,vaddr_t start,vaddr_t end,vm_prot_t prot,bool set_max)817 uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end,
818 vm_prot_t prot, bool set_max)
819 {
820
821 return EOPNOTSUPP;
822 }
823
824 int
uvm_map(struct vm_map * map,vaddr_t * startp,vsize_t size,struct uvm_object * uobj,voff_t uoffset,vsize_t align,uvm_flag_t flags)825 uvm_map(struct vm_map *map, vaddr_t *startp, vsize_t size,
826 struct uvm_object *uobj, voff_t uoffset, vsize_t align,
827 uvm_flag_t flags)
828 {
829
830 *startp = (vaddr_t)rump_hypermalloc(size, align, true, "uvm_map");
831 return *startp != 0 ? 0 : ENOMEM;
832 }
833
834 void
uvm_unmap1(struct vm_map * map,vaddr_t start,vaddr_t end,int flags)835 uvm_unmap1(struct vm_map *map, vaddr_t start, vaddr_t end, int flags)
836 {
837
838 rump_hyperfree((void*)start, end-start);
839 }
840
841
842 /*
843 * UVM km
844 */
845
846 vaddr_t
uvm_km_alloc(struct vm_map * map,vsize_t size,vsize_t align,uvm_flag_t flags)847 uvm_km_alloc(struct vm_map *map, vsize_t size, vsize_t align, uvm_flag_t flags)
848 {
849 void *rv, *desired = NULL;
850 int alignbit, error;
851
852 #ifdef __x86_64__
853 /*
854 * On amd64, allocate all module memory from the lowest 2GB.
855 * This is because NetBSD kernel modules are compiled
856 * with -mcmodel=kernel and reserve only 4 bytes for
857 * offsets. If we load code compiled with -mcmodel=kernel
858 * anywhere except the lowest or highest 2GB, it will not
859 * work. Since userspace does not have access to the highest
860 * 2GB, use the lowest 2GB.
861 *
862 * Note: this assumes the rump kernel resides in
863 * the lowest 2GB as well.
864 *
865 * Note2: yes, it's a quick hack, but since this the only
866 * place where we care about the map we're allocating from,
867 * just use a simple "if" instead of coming up with a fancy
868 * generic solution.
869 */
870 if (map == module_map) {
871 desired = (void *)(0x80000000 - size);
872 }
873 #endif
874
875 if (__predict_false(map == module_map)) {
876 alignbit = 0;
877 if (align) {
878 alignbit = ffs(align)-1;
879 }
880 error = rumpuser_anonmmap(desired, size, alignbit,
881 flags & UVM_KMF_EXEC, &rv);
882 } else {
883 error = rumpuser_malloc(size, align, &rv);
884 }
885
886 if (error) {
887 if (flags & (UVM_KMF_CANFAIL | UVM_KMF_NOWAIT))
888 return 0;
889 else
890 panic("uvm_km_alloc failed");
891 }
892
893 if (flags & UVM_KMF_ZERO)
894 memset(rv, 0, size);
895
896 return (vaddr_t)rv;
897 }
898
899 void
uvm_km_free(struct vm_map * map,vaddr_t vaddr,vsize_t size,uvm_flag_t flags)900 uvm_km_free(struct vm_map *map, vaddr_t vaddr, vsize_t size, uvm_flag_t flags)
901 {
902
903 if (__predict_false(map == module_map))
904 rumpuser_unmap((void *)vaddr, size);
905 else
906 rumpuser_free((void *)vaddr, size);
907 }
908
909 int
uvm_km_protect(struct vm_map * map,vaddr_t vaddr,vsize_t size,vm_prot_t prot)910 uvm_km_protect(struct vm_map *map, vaddr_t vaddr, vsize_t size, vm_prot_t prot)
911 {
912 return 0;
913 }
914
915 struct vm_map *
uvm_km_suballoc(struct vm_map * map,vaddr_t * minaddr,vaddr_t * maxaddr,vsize_t size,int pageable,bool fixed,struct vm_map * submap)916 uvm_km_suballoc(struct vm_map *map, vaddr_t *minaddr, vaddr_t *maxaddr,
917 vsize_t size, int pageable, bool fixed, struct vm_map *submap)
918 {
919
920 return (struct vm_map *)417416;
921 }
922
923 int
uvm_km_kmem_alloc(vmem_t * vm,vmem_size_t size,vm_flag_t flags,vmem_addr_t * addr)924 uvm_km_kmem_alloc(vmem_t *vm, vmem_size_t size, vm_flag_t flags,
925 vmem_addr_t *addr)
926 {
927 vaddr_t va;
928 va = (vaddr_t)rump_hypermalloc(size, PAGE_SIZE,
929 (flags & VM_SLEEP), "kmalloc");
930
931 if (va) {
932 *addr = va;
933 return 0;
934 } else {
935 return ENOMEM;
936 }
937 }
938
939 void
uvm_km_kmem_free(vmem_t * vm,vmem_addr_t addr,vmem_size_t size)940 uvm_km_kmem_free(vmem_t *vm, vmem_addr_t addr, vmem_size_t size)
941 {
942
943 rump_hyperfree((void *)addr, size);
944 }
945
946 /*
947 * VM space locking routines. We don't really have to do anything,
948 * since the pages are always "wired" (both local and remote processes).
949 */
950 int
uvm_vslock(struct vmspace * vs,void * addr,size_t len,vm_prot_t access)951 uvm_vslock(struct vmspace *vs, void *addr, size_t len, vm_prot_t access)
952 {
953
954 return 0;
955 }
956
957 void
uvm_vsunlock(struct vmspace * vs,void * addr,size_t len)958 uvm_vsunlock(struct vmspace *vs, void *addr, size_t len)
959 {
960
961 }
962
963 /*
964 * For the local case the buffer mappers don't need to do anything.
965 * For the remote case we need to reserve space and copy data in or
966 * out, depending on B_READ/B_WRITE.
967 */
968 int
vmapbuf(struct buf * bp,vsize_t len)969 vmapbuf(struct buf *bp, vsize_t len)
970 {
971 int error = 0;
972
973 bp->b_saveaddr = bp->b_data;
974
975 /* remote case */
976 if (!RUMP_LOCALPROC_P(curproc)) {
977 bp->b_data = rump_hypermalloc(len, 0, true, "vmapbuf");
978 if (BUF_ISWRITE(bp)) {
979 error = copyin(bp->b_saveaddr, bp->b_data, len);
980 if (error) {
981 rump_hyperfree(bp->b_data, len);
982 bp->b_data = bp->b_saveaddr;
983 bp->b_saveaddr = 0;
984 }
985 }
986 }
987
988 return error;
989 }
990
991 void
vunmapbuf(struct buf * bp,vsize_t len)992 vunmapbuf(struct buf *bp, vsize_t len)
993 {
994
995 /* remote case */
996 if (!RUMP_LOCALPROC_P(bp->b_proc)) {
997 if (BUF_ISREAD(bp)) {
998 bp->b_error = copyout_proc(bp->b_proc,
999 bp->b_data, bp->b_saveaddr, len);
1000 }
1001 rump_hyperfree(bp->b_data, len);
1002 }
1003
1004 bp->b_data = bp->b_saveaddr;
1005 bp->b_saveaddr = 0;
1006 }
1007
1008 void
uvmspace_addref(struct vmspace * vm)1009 uvmspace_addref(struct vmspace *vm)
1010 {
1011
1012 /*
1013 * No dynamically allocated vmspaces exist.
1014 */
1015 }
1016
1017 void
uvmspace_free(struct vmspace * vm)1018 uvmspace_free(struct vmspace *vm)
1019 {
1020
1021 /* nothing for now */
1022 }
1023
1024 /*
1025 * page life cycle stuff. it really doesn't exist, so just stubs.
1026 */
1027
1028 void
uvm_pageactivate(struct vm_page * pg)1029 uvm_pageactivate(struct vm_page *pg)
1030 {
1031
1032 /* nada */
1033 }
1034
1035 void
uvm_pagedeactivate(struct vm_page * pg)1036 uvm_pagedeactivate(struct vm_page *pg)
1037 {
1038
1039 /* nada */
1040 }
1041
1042 void
uvm_pagedequeue(struct vm_page * pg)1043 uvm_pagedequeue(struct vm_page *pg)
1044 {
1045
1046 /* nada*/
1047 }
1048
1049 void
uvm_pageenqueue(struct vm_page * pg)1050 uvm_pageenqueue(struct vm_page *pg)
1051 {
1052
1053 /* nada */
1054 }
1055
1056 void
uvmpdpol_anfree(struct vm_anon * an)1057 uvmpdpol_anfree(struct vm_anon *an)
1058 {
1059
1060 /* nada */
1061 }
1062
1063 /*
1064 * Physical address accessors.
1065 */
1066
1067 struct vm_page *
uvm_phys_to_vm_page(paddr_t pa)1068 uvm_phys_to_vm_page(paddr_t pa)
1069 {
1070
1071 return NULL;
1072 }
1073
1074 paddr_t
uvm_vm_page_to_phys(const struct vm_page * pg)1075 uvm_vm_page_to_phys(const struct vm_page *pg)
1076 {
1077
1078 return 0;
1079 }
1080
1081 vaddr_t
uvm_uarea_alloc(void)1082 uvm_uarea_alloc(void)
1083 {
1084
1085 /* non-zero */
1086 return (vaddr_t)11;
1087 }
1088
1089 void
uvm_uarea_free(vaddr_t uarea)1090 uvm_uarea_free(vaddr_t uarea)
1091 {
1092
1093 /* nata, so creamy */
1094 }
1095
1096 /*
1097 * Routines related to the Page Baroness.
1098 */
1099
1100 void
uvm_wait(const char * msg)1101 uvm_wait(const char *msg)
1102 {
1103
1104 if (__predict_false(rump_threads == 0))
1105 panic("pagedaemon missing (RUMP_THREADS = 0)");
1106
1107 if (curlwp == uvm.pagedaemon_lwp) {
1108 /* is it possible for us to later get memory? */
1109 if (!uvmexp.paging)
1110 panic("pagedaemon out of memory");
1111 }
1112
1113 mutex_enter(&pdaemonmtx);
1114 pdaemon_waiters++;
1115 cv_signal(&pdaemoncv);
1116 cv_wait(&oomwait, &pdaemonmtx);
1117 mutex_exit(&pdaemonmtx);
1118 }
1119
1120 void
uvm_pageout_start(int npages)1121 uvm_pageout_start(int npages)
1122 {
1123
1124 mutex_enter(&pdaemonmtx);
1125 uvmexp.paging += npages;
1126 mutex_exit(&pdaemonmtx);
1127 }
1128
1129 void
uvm_pageout_done(int npages)1130 uvm_pageout_done(int npages)
1131 {
1132
1133 if (!npages)
1134 return;
1135
1136 mutex_enter(&pdaemonmtx);
1137 KASSERT(uvmexp.paging >= npages);
1138 uvmexp.paging -= npages;
1139
1140 if (pdaemon_waiters) {
1141 pdaemon_waiters = 0;
1142 cv_broadcast(&oomwait);
1143 }
1144 mutex_exit(&pdaemonmtx);
1145 }
1146
1147 static bool
processpage(struct vm_page * pg)1148 processpage(struct vm_page *pg)
1149 {
1150 struct uvm_object *uobj;
1151
1152 uobj = pg->uobject;
1153 if (rw_tryenter(uobj->vmobjlock, RW_WRITER)) {
1154 if ((pg->flags & PG_BUSY) == 0) {
1155 mutex_exit(&vmpage_lruqueue_lock);
1156 uobj->pgops->pgo_put(uobj, pg->offset,
1157 pg->offset + PAGE_SIZE,
1158 PGO_CLEANIT|PGO_FREE);
1159 KASSERT(!rw_write_held(uobj->vmobjlock));
1160 return true;
1161 } else {
1162 rw_exit(uobj->vmobjlock);
1163 }
1164 }
1165
1166 return false;
1167 }
1168
1169 /*
1170 * The Diabolical pageDaemon Director (DDD).
1171 *
1172 * This routine can always use better heuristics.
1173 */
1174 void
uvm_pageout(void * arg)1175 uvm_pageout(void *arg)
1176 {
1177 struct vm_page *pg;
1178 struct pool *pp, *pp_first;
1179 int cleaned, skip, skipped;
1180 bool succ;
1181
1182 mutex_enter(&pdaemonmtx);
1183 for (;;) {
1184 if (pdaemon_waiters) {
1185 pdaemon_waiters = 0;
1186 cv_broadcast(&oomwait);
1187 }
1188 if (!NEED_PAGEDAEMON()) {
1189 kernel_map->flags &= ~VM_MAP_WANTVA;
1190 cv_wait(&pdaemoncv, &pdaemonmtx);
1191 }
1192 uvmexp.pdwoke++;
1193
1194 /* tell the world that we are hungry */
1195 kernel_map->flags |= VM_MAP_WANTVA;
1196 mutex_exit(&pdaemonmtx);
1197
1198 /*
1199 * step one: reclaim the page cache. this should give
1200 * us the biggest earnings since whole pages are released
1201 * into backing memory.
1202 */
1203 pool_cache_reclaim(&pagecache);
1204 if (!NEED_PAGEDAEMON()) {
1205 mutex_enter(&pdaemonmtx);
1206 continue;
1207 }
1208
1209 /*
1210 * Ok, so that didn't help. Next, try to hunt memory
1211 * by pushing out vnode pages. The pages might contain
1212 * useful cached data, but we need the memory.
1213 */
1214 cleaned = 0;
1215 skip = 0;
1216 again:
1217 mutex_enter(&vmpage_lruqueue_lock);
1218 while (cleaned < PAGEDAEMON_OBJCHUNK) {
1219 skipped = 0;
1220 TAILQ_FOREACH(pg, &vmpage_lruqueue, pageq.queue) {
1221
1222 /*
1223 * skip over pages we _might_ have tried
1224 * to handle earlier. they might not be
1225 * exactly the same ones, but I'm not too
1226 * concerned.
1227 */
1228 while (skipped++ < skip)
1229 continue;
1230
1231 if (processpage(pg)) {
1232 cleaned++;
1233 goto again;
1234 }
1235
1236 skip++;
1237 }
1238 break;
1239 }
1240 mutex_exit(&vmpage_lruqueue_lock);
1241
1242 /*
1243 * And of course we need to reclaim the page cache
1244 * again to actually release memory.
1245 */
1246 pool_cache_reclaim(&pagecache);
1247 if (!NEED_PAGEDAEMON()) {
1248 mutex_enter(&pdaemonmtx);
1249 continue;
1250 }
1251
1252 /*
1253 * And then drain the pools. Wipe them out ... all of them.
1254 */
1255 for (pp_first = NULL;;) {
1256 rump_vfs_drainbufs(10 /* XXX: estimate! */);
1257
1258 succ = pool_drain(&pp);
1259 if (succ || pp == pp_first)
1260 break;
1261
1262 if (pp_first == NULL)
1263 pp_first = pp;
1264 }
1265
1266 /*
1267 * Need to use PYEC on our bag of tricks.
1268 * Unfortunately, the wife just borrowed it.
1269 */
1270
1271 mutex_enter(&pdaemonmtx);
1272 if (!succ && cleaned == 0 && pdaemon_waiters &&
1273 uvmexp.paging == 0) {
1274 kpause("pddlk", false, hz, &pdaemonmtx);
1275 }
1276 }
1277
1278 panic("you can swap out any time you like, but you can never leave");
1279 }
1280
1281 void
uvm_kick_pdaemon()1282 uvm_kick_pdaemon()
1283 {
1284
1285 /*
1286 * Wake up the diabolical pagedaemon director if we are over
1287 * 90% of the memory limit. This is a complete and utter
1288 * stetson-harrison decision which you are allowed to finetune.
1289 * Don't bother locking. If we have some unflushed caches,
1290 * other waker-uppers will deal with the issue.
1291 */
1292 if (NEED_PAGEDAEMON()) {
1293 cv_signal(&pdaemoncv);
1294 }
1295 }
1296
1297 void *
rump_hypermalloc(size_t howmuch,int alignment,bool waitok,const char * wmsg)1298 rump_hypermalloc(size_t howmuch, int alignment, bool waitok, const char *wmsg)
1299 {
1300 const unsigned long thelimit =
1301 curlwp == uvm.pagedaemon_lwp ? pdlimit : rump_physmemlimit;
1302 unsigned long newmem;
1303 void *rv;
1304 int error;
1305
1306 uvm_kick_pdaemon(); /* ouch */
1307
1308 /* first we must be within the limit */
1309 limitagain:
1310 if (thelimit != RUMPMEM_UNLIMITED) {
1311 newmem = atomic_add_long_nv(&curphysmem, howmuch);
1312 if (newmem > thelimit) {
1313 newmem = atomic_add_long_nv(&curphysmem, -howmuch);
1314 if (!waitok) {
1315 return NULL;
1316 }
1317 uvm_wait(wmsg);
1318 goto limitagain;
1319 }
1320 }
1321
1322 /* second, we must get something from the backend */
1323 again:
1324 error = rumpuser_malloc(howmuch, alignment, &rv);
1325 if (__predict_false(error && waitok)) {
1326 uvm_wait(wmsg);
1327 goto again;
1328 }
1329
1330 return rv;
1331 }
1332
1333 void
rump_hyperfree(void * what,size_t size)1334 rump_hyperfree(void *what, size_t size)
1335 {
1336
1337 if (rump_physmemlimit != RUMPMEM_UNLIMITED) {
1338 atomic_add_long(&curphysmem, -size);
1339 }
1340 rumpuser_free(what, size);
1341 }
1342
1343 /*
1344 * UBC
1345 */
1346
1347 #define PAGERFLAGS (PGO_SYNCIO | PGO_NOBLOCKALLOC | PGO_NOTIMESTAMP)
1348
1349 void
ubc_zerorange(struct uvm_object * uobj,off_t off,size_t len,int flags)1350 ubc_zerorange(struct uvm_object *uobj, off_t off, size_t len, int flags)
1351 {
1352 struct vm_page **pgs;
1353 int maxpages = MIN(32, round_page(len) >> PAGE_SHIFT);
1354 int npages, i;
1355
1356 if (maxpages == 0)
1357 return;
1358
1359 pgs = kmem_alloc(maxpages * sizeof(pgs), KM_SLEEP);
1360 rw_enter(uobj->vmobjlock, RW_WRITER);
1361 while (len) {
1362 npages = MIN(maxpages, round_page(len) >> PAGE_SHIFT);
1363 memset(pgs, 0, npages * sizeof(struct vm_page *));
1364 (void)uobj->pgops->pgo_get(uobj, trunc_page(off),
1365 pgs, &npages, 0, VM_PROT_READ | VM_PROT_WRITE,
1366 0, PAGERFLAGS | PGO_PASTEOF);
1367 KASSERT(npages > 0);
1368
1369 rw_enter(uobj->vmobjlock, RW_WRITER);
1370 for (i = 0; i < npages; i++) {
1371 struct vm_page *pg;
1372 uint8_t *start;
1373 size_t chunkoff, chunklen;
1374
1375 pg = pgs[i];
1376 if (pg == NULL)
1377 break;
1378
1379 KASSERT(pg->uobject != NULL);
1380 KASSERT(uobj->vmobjlock == pg->uobject->vmobjlock);
1381
1382 chunkoff = off & PAGE_MASK;
1383 chunklen = MIN(PAGE_SIZE - chunkoff, len);
1384 start = (uint8_t *)pg->uanon + chunkoff;
1385
1386 memset(start, 0, chunklen);
1387 uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY);
1388
1389 off += chunklen;
1390 len -= chunklen;
1391 }
1392 uvm_page_unbusy(pgs, npages);
1393 }
1394 rw_exit(uobj->vmobjlock);
1395 kmem_free(pgs, maxpages * sizeof(pgs));
1396 }
1397
1398 #define len2npages(off, len) \
1399 ((round_page(off+len) - trunc_page(off)) >> PAGE_SHIFT)
1400
1401 int
ubc_uiomove(struct uvm_object * uobj,struct uio * uio,vsize_t todo,int advice,int flags)1402 ubc_uiomove(struct uvm_object *uobj, struct uio *uio, vsize_t todo,
1403 int advice, int flags)
1404 {
1405 struct vm_page **pgs;
1406 int npages = len2npages(uio->uio_offset, todo);
1407 size_t pgalloc;
1408 int i, rv, pagerflags;
1409 vm_prot_t prot;
1410
1411 pgalloc = npages * sizeof(pgs);
1412 pgs = kmem_alloc(pgalloc, KM_SLEEP);
1413
1414 pagerflags = PAGERFLAGS;
1415 if (flags & UBC_WRITE)
1416 pagerflags |= PGO_PASTEOF;
1417 if (flags & UBC_FAULTBUSY)
1418 pagerflags |= PGO_OVERWRITE;
1419
1420 prot = VM_PROT_READ;
1421 if (flags & UBC_WRITE)
1422 prot |= VM_PROT_WRITE;
1423
1424 rw_enter(uobj->vmobjlock, RW_WRITER);
1425 do {
1426 npages = len2npages(uio->uio_offset, todo);
1427 memset(pgs, 0, pgalloc);
1428 rv = uobj->pgops->pgo_get(uobj, trunc_page(uio->uio_offset),
1429 pgs, &npages, 0, prot, 0, pagerflags);
1430 if (rv)
1431 goto out;
1432
1433 rw_enter(uobj->vmobjlock, RW_WRITER);
1434 for (i = 0; i < npages; i++) {
1435 struct vm_page *pg;
1436 size_t xfersize;
1437 off_t pageoff;
1438
1439 pg = pgs[i];
1440 if (pg == NULL)
1441 break;
1442
1443 KASSERT(pg->uobject != NULL);
1444 KASSERT(uobj->vmobjlock == pg->uobject->vmobjlock);
1445 pageoff = uio->uio_offset & PAGE_MASK;
1446
1447 xfersize = MIN(MIN(todo, PAGE_SIZE), PAGE_SIZE-pageoff);
1448 KASSERT(xfersize > 0);
1449 rv = uiomove((uint8_t *)pg->uanon + pageoff,
1450 xfersize, uio);
1451 if (rv) {
1452 uvm_page_unbusy(pgs, npages);
1453 rw_exit(uobj->vmobjlock);
1454 goto out;
1455 }
1456 if (uio->uio_rw == UIO_WRITE) {
1457 pg->flags &= ~PG_FAKE;
1458 uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY);
1459 }
1460 todo -= xfersize;
1461 }
1462 uvm_page_unbusy(pgs, npages);
1463 } while (todo);
1464 rw_exit(uobj->vmobjlock);
1465
1466 out:
1467 kmem_free(pgs, pgalloc);
1468 return rv;
1469 }
1470