1 /* $OpenBSD: uvm_glue.c,v 1.87 2024/10/28 08:25:32 mpi Exp $ */
2 /* $NetBSD: uvm_glue.c,v 1.44 2001/02/06 19:54:44 eeh Exp $ */
3
4 /*
5 * Copyright (c) 1997 Charles D. Cranor and Washington University.
6 * Copyright (c) 1991, 1993, The Regents of the University of California.
7 *
8 * All rights reserved.
9 *
10 * This code is derived from software contributed to Berkeley by
11 * The Mach Operating System project at Carnegie-Mellon University.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 * 3. Neither the name of the University nor the names of its contributors
22 * may be used to endorse or promote products derived from this software
23 * without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 * @(#)vm_glue.c 8.6 (Berkeley) 1/5/94
38 * from: Id: uvm_glue.c,v 1.1.2.8 1998/02/07 01:16:54 chs Exp
39 *
40 *
41 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
42 * All rights reserved.
43 *
44 * Permission to use, copy, modify and distribute this software and
45 * its documentation is hereby granted, provided that both the copyright
46 * notice and this permission notice appear in all copies of the
47 * software, derivative works or modified versions, and any portions
48 * thereof, and that both notices appear in supporting documentation.
49 *
50 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
51 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
52 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
53 *
54 * Carnegie Mellon requests users of this software to return to
55 *
56 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
57 * School of Computer Science
58 * Carnegie Mellon University
59 * Pittsburgh PA 15213-3890
60 *
61 * any improvements or extensions that they make and grant Carnegie the
62 * rights to redistribute these changes.
63 */
64
65 /*
66 * uvm_glue.c: glue functions
67 */
68
69 #include <sys/param.h>
70 #include <sys/systm.h>
71 #include <sys/proc.h>
72 #include <sys/resourcevar.h>
73 #include <sys/buf.h>
74 #ifdef SYSVSHM
75 #include <sys/shm.h>
76 #endif
77
78 #include <uvm/uvm.h>
79
80 /*
81 * uvm_kernacc: can the kernel access a region of memory
82 *
83 * - called from malloc [DIAGNOSTIC], and /dev/kmem driver (mem.c)
84 */
85 boolean_t
uvm_kernacc(caddr_t addr,size_t len,int rw)86 uvm_kernacc(caddr_t addr, size_t len, int rw)
87 {
88 boolean_t rv;
89 vaddr_t saddr, eaddr;
90 vm_prot_t prot = rw == B_READ ? PROT_READ : PROT_WRITE;
91
92 saddr = trunc_page((vaddr_t)addr);
93 eaddr = round_page((vaddr_t)addr + len);
94 vm_map_lock_read(kernel_map);
95 rv = uvm_map_checkprot(kernel_map, saddr, eaddr, prot);
96 vm_map_unlock_read(kernel_map);
97
98 return rv;
99 }
100
101 /*
102 * uvm_vslock: wire user memory for I/O
103 *
104 * - called from sys_sysctl
105 */
106 int
uvm_vslock(struct proc * p,caddr_t addr,size_t len,vm_prot_t access_type)107 uvm_vslock(struct proc *p, caddr_t addr, size_t len, vm_prot_t access_type)
108 {
109 struct vm_map *map = &p->p_vmspace->vm_map;
110 vaddr_t start, end;
111
112 start = trunc_page((vaddr_t)addr);
113 end = round_page((vaddr_t)addr + len);
114 if (end <= start)
115 return (EINVAL);
116
117 return uvm_fault_wire(map, start, end, access_type);
118 }
119
120 /*
121 * uvm_vsunlock: unwire user memory wired by uvm_vslock()
122 *
123 * - called from sys_sysctl
124 */
125 void
uvm_vsunlock(struct proc * p,caddr_t addr,size_t len)126 uvm_vsunlock(struct proc *p, caddr_t addr, size_t len)
127 {
128 vaddr_t start, end;
129
130 start = trunc_page((vaddr_t)addr);
131 end = round_page((vaddr_t)addr + len);
132 KASSERT(end > start);
133
134 uvm_fault_unwire(&p->p_vmspace->vm_map, start, end);
135 }
136
137 /*
138 * uvm_vslock_device: wire user memory, make sure it's device reachable
139 * and bounce if necessary.
140 *
141 * - called from physio
142 */
143 int
uvm_vslock_device(struct proc * p,void * addr,size_t len,vm_prot_t access_type,void ** retp)144 uvm_vslock_device(struct proc *p, void *addr, size_t len,
145 vm_prot_t access_type, void **retp)
146 {
147 struct vm_map *map = &p->p_vmspace->vm_map;
148 struct vm_page *pg;
149 struct pglist pgl;
150 int npages;
151 vaddr_t start, end, off;
152 vaddr_t sva, va;
153 vsize_t sz;
154 int error, mapv, i;
155
156 start = trunc_page((vaddr_t)addr);
157 end = round_page((vaddr_t)addr + len);
158 sz = end - start;
159 off = (vaddr_t)addr - start;
160 if (end <= start)
161 return (EINVAL);
162
163 vm_map_lock_read(map);
164 retry:
165 mapv = map->timestamp;
166 vm_map_unlock_read(map);
167
168 if ((error = uvm_fault_wire(map, start, end, access_type)))
169 return (error);
170
171 vm_map_lock_read(map);
172 if (mapv != map->timestamp)
173 goto retry;
174
175 npages = atop(sz);
176 for (i = 0; i < npages; i++) {
177 paddr_t pa;
178
179 if (!pmap_extract(map->pmap, start + ptoa(i), &pa)) {
180 error = EFAULT;
181 goto out_unwire;
182 }
183 if (!PADDR_IS_DMA_REACHABLE(pa))
184 break;
185 }
186 if (i == npages) {
187 *retp = NULL;
188 return (0);
189 }
190
191 va = (vaddr_t)km_alloc(sz, &kv_any, &kp_none, &kd_nowait);
192 if (va == 0) {
193 error = ENOMEM;
194 goto out_unwire;
195 }
196 sva = va;
197
198 TAILQ_INIT(&pgl);
199 error = uvm_pglistalloc(npages * PAGE_SIZE, dma_constraint.ucr_low,
200 dma_constraint.ucr_high, 0, 0, &pgl, npages, UVM_PLA_WAITOK);
201 if (error)
202 goto out_unmap;
203
204 while ((pg = TAILQ_FIRST(&pgl)) != NULL) {
205 TAILQ_REMOVE(&pgl, pg, pageq);
206 pmap_kenter_pa(va, VM_PAGE_TO_PHYS(pg), PROT_READ | PROT_WRITE);
207 va += PAGE_SIZE;
208 }
209 pmap_update(pmap_kernel());
210 KASSERT(va == sva + sz);
211 *retp = (void *)(sva + off);
212
213 if ((error = copyin(addr, *retp, len)) == 0)
214 return 0;
215
216 uvm_km_pgremove_intrsafe(sva, sva + sz);
217 pmap_kremove(sva, sz);
218 pmap_update(pmap_kernel());
219 out_unmap:
220 km_free((void *)sva, sz, &kv_any, &kp_none);
221 out_unwire:
222 uvm_fault_unwire_locked(map, start, end);
223 vm_map_unlock_read(map);
224 return (error);
225 }
226
227 /*
228 * uvm_vsunlock_device: unwire user memory wired by uvm_vslock_device()
229 *
230 * - called from physio
231 */
232 void
uvm_vsunlock_device(struct proc * p,void * addr,size_t len,void * map)233 uvm_vsunlock_device(struct proc *p, void *addr, size_t len, void *map)
234 {
235 vaddr_t start, end;
236 vaddr_t kva;
237 vsize_t sz;
238
239 start = trunc_page((vaddr_t)addr);
240 end = round_page((vaddr_t)addr + len);
241 KASSERT(end > start);
242 sz = end - start;
243
244 if (map)
245 copyout(map, addr, len);
246
247 uvm_fault_unwire_locked(&p->p_vmspace->vm_map, start, end);
248 vm_map_unlock_read(&p->p_vmspace->vm_map);
249
250 if (!map)
251 return;
252
253 kva = trunc_page((vaddr_t)map);
254 uvm_km_pgremove_intrsafe(kva, kva + sz);
255 pmap_kremove(kva, sz);
256 pmap_update(pmap_kernel());
257 uvm_km_free(kernel_map, kva, sz);
258 }
259
260 const struct kmem_va_mode kv_uarea = {
261 .kv_map = &kernel_map,
262 .kv_align = USPACE_ALIGN
263 };
264
265 /*
266 * uvm_uarea_alloc: allocate the u-area for a new thread
267 */
268 vaddr_t
uvm_uarea_alloc(void)269 uvm_uarea_alloc(void)
270 {
271 return (vaddr_t)km_alloc(USPACE, &kv_uarea, &kp_zero, &kd_waitok);
272 }
273
274 /*
275 * uvm_uarea_free: free a dead thread's stack
276 *
277 * - the thread passed to us is a dead thread; we
278 * are running on a different context now (the reaper).
279 */
280 void
uvm_uarea_free(struct proc * p)281 uvm_uarea_free(struct proc *p)
282 {
283 km_free(p->p_addr, USPACE, &kv_uarea, &kp_zero);
284 p->p_addr = NULL;
285 }
286
287 /*
288 * uvm_exit: exit a virtual address space
289 */
290 void
uvm_exit(struct process * pr)291 uvm_exit(struct process *pr)
292 {
293 struct vmspace *vm = pr->ps_vmspace;
294
295 pr->ps_vmspace = NULL;
296 uvmspace_free(vm);
297 }
298
299 /*
300 * uvm_init_limit: init per-process VM limits
301 *
302 * - called for process 0 and then inherited by all others.
303 */
304 void
uvm_init_limits(struct plimit * limit0)305 uvm_init_limits(struct plimit *limit0)
306 {
307 /*
308 * Set up the initial limits on process VM. Set the maximum
309 * resident set size to be all of (reasonably) available memory.
310 * This causes any single, large process to start random page
311 * replacement once it fills memory.
312 */
313 limit0->pl_rlimit[RLIMIT_STACK].rlim_cur = DFLSSIZ;
314 limit0->pl_rlimit[RLIMIT_STACK].rlim_max = MAXSSIZ;
315 limit0->pl_rlimit[RLIMIT_DATA].rlim_cur = DFLDSIZ;
316 limit0->pl_rlimit[RLIMIT_DATA].rlim_max = MAXDSIZ;
317 limit0->pl_rlimit[RLIMIT_RSS].rlim_cur = ptoa(uvmexp.free);
318 }
319
320 #ifdef __HAVE_PMAP_COLLECT
321
322 #ifdef DEBUG
323 int enableswap = 1;
324 int swapdebug = 0;
325 #define SDB_FOLLOW 1
326 #define SDB_SWAPIN 2
327 #define SDB_SWAPOUT 4
328 #endif
329
330
331 /*
332 * swapout_threads: find threads that can be swapped
333 *
334 * - called by the pagedaemon
335 * - try and swap at least one process
336 * - processes that are sleeping or stopped for maxslp or more seconds
337 * are swapped... otherwise the longest-sleeping or stopped process
338 * is swapped, otherwise the longest resident process...
339 */
340 int
uvm_swapout_threads(void)341 uvm_swapout_threads(void)
342 {
343 struct process *pr;
344 struct proc *p, *slpp;
345 struct process *outpr;
346 int free, outpri;
347 int didswap = 0;
348 extern int maxslp;
349 /* XXXCDC: should move off to uvmexp. or uvm., also in uvm_meter */
350
351 #ifdef DEBUG
352 if (!enableswap)
353 return;
354 #endif
355
356 free = uvmexp.free;
357
358 /*
359 * outpr/outpri : stop/sleep process whose most active thread has
360 * the largest sleeptime < maxslp
361 */
362 outpr = NULL;
363 outpri = 0;
364 LIST_FOREACH(pr, &allprocess, ps_list) {
365 if (pr->ps_flags & (PS_SYSTEM | PS_EXITING))
366 continue;
367
368 /*
369 * slpp: the sleeping or stopped thread in pr with
370 * the smallest p_slptime
371 */
372 slpp = NULL;
373 TAILQ_FOREACH(p, &pr->ps_threads, p_thr_link) {
374 switch (p->p_stat) {
375 case SRUN:
376 case SONPROC:
377 goto next_process;
378
379 case SSLEEP:
380 case SSTOP:
381 if (slpp == NULL ||
382 slpp->p_slptime < p->p_slptime)
383 slpp = p;
384 continue;
385 }
386 }
387
388 if (slpp != NULL) {
389 if (slpp->p_slptime >= maxslp) {
390 pmap_collect(pr->ps_vmspace->vm_map.pmap);
391 didswap++;
392 } else if (slpp->p_slptime > outpri) {
393 outpr = pr;
394 outpri = slpp->p_slptime;
395 }
396 }
397 next_process: ;
398 }
399
400 /*
401 * If we didn't get rid of any real duds, toss out the next most
402 * likely sleeping/stopped or running candidate. We only do this
403 * if we are real low on memory since we don't gain much by doing
404 * it.
405 */
406 if (didswap == 0 && free <= atop(round_page(USPACE)) && outpr != NULL) {
407 #ifdef DEBUG
408 if (swapdebug & SDB_SWAPOUT)
409 printf("swapout_threads: no duds, try procpr %p\n",
410 outpr);
411 #endif
412 pmap_collect(outpr->ps_vmspace->vm_map.pmap);
413 }
414
415 /*
416 * XXX might return a non-0 value even if pmap_collect() didn't
417 * free anything.
418 */
419 return (uvmexp.free - free);
420 }
421
422 #endif /* __HAVE_PMAP_COLLECT */
423
424 /*
425 * uvm_atopg: convert KVAs back to their page structures.
426 */
427 struct vm_page *
uvm_atopg(vaddr_t kva)428 uvm_atopg(vaddr_t kva)
429 {
430 struct vm_page *pg;
431 paddr_t pa;
432 boolean_t rv;
433
434 rv = pmap_extract(pmap_kernel(), kva, &pa);
435 KASSERT(rv);
436 pg = PHYS_TO_VM_PAGE(pa);
437 KASSERT(pg != NULL);
438 return (pg);
439 }
440
441 #ifndef SMALL_KERNEL
442 int
fill_vmmap(struct process * pr,struct kinfo_vmentry * kve,size_t * lenp)443 fill_vmmap(struct process *pr, struct kinfo_vmentry *kve,
444 size_t *lenp)
445 {
446 struct vm_map *map;
447
448 if (pr != NULL)
449 map = &pr->ps_vmspace->vm_map;
450 else
451 map = kernel_map;
452 return uvm_map_fill_vmmap(map, kve, lenp);
453 }
454 #endif
455