xref: /openbsd/sys/uvm/uvm_glue.c (revision e451d413)
1 /*	$OpenBSD: uvm_glue.c,v 1.87 2024/10/28 08:25:32 mpi Exp $	*/
2 /*	$NetBSD: uvm_glue.c,v 1.44 2001/02/06 19:54:44 eeh Exp $	*/
3 
4 /*
5  * Copyright (c) 1997 Charles D. Cranor and Washington University.
6  * Copyright (c) 1991, 1993, The Regents of the University of California.
7  *
8  * All rights reserved.
9  *
10  * This code is derived from software contributed to Berkeley by
11  * The Mach Operating System project at Carnegie-Mellon University.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  *
37  *	@(#)vm_glue.c	8.6 (Berkeley) 1/5/94
38  * from: Id: uvm_glue.c,v 1.1.2.8 1998/02/07 01:16:54 chs Exp
39  *
40  *
41  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
42  * All rights reserved.
43  *
44  * Permission to use, copy, modify and distribute this software and
45  * its documentation is hereby granted, provided that both the copyright
46  * notice and this permission notice appear in all copies of the
47  * software, derivative works or modified versions, and any portions
48  * thereof, and that both notices appear in supporting documentation.
49  *
50  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
51  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
52  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
53  *
54  * Carnegie Mellon requests users of this software to return to
55  *
56  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
57  *  School of Computer Science
58  *  Carnegie Mellon University
59  *  Pittsburgh PA 15213-3890
60  *
61  * any improvements or extensions that they make and grant Carnegie the
62  * rights to redistribute these changes.
63  */
64 
65 /*
66  * uvm_glue.c: glue functions
67  */
68 
69 #include <sys/param.h>
70 #include <sys/systm.h>
71 #include <sys/proc.h>
72 #include <sys/resourcevar.h>
73 #include <sys/buf.h>
74 #ifdef SYSVSHM
75 #include <sys/shm.h>
76 #endif
77 
78 #include <uvm/uvm.h>
79 
80 /*
81  * uvm_kernacc: can the kernel access a region of memory
82  *
83  * - called from malloc [DIAGNOSTIC], and /dev/kmem driver (mem.c)
84  */
85 boolean_t
uvm_kernacc(caddr_t addr,size_t len,int rw)86 uvm_kernacc(caddr_t addr, size_t len, int rw)
87 {
88 	boolean_t rv;
89 	vaddr_t saddr, eaddr;
90 	vm_prot_t prot = rw == B_READ ? PROT_READ : PROT_WRITE;
91 
92 	saddr = trunc_page((vaddr_t)addr);
93 	eaddr = round_page((vaddr_t)addr + len);
94 	vm_map_lock_read(kernel_map);
95 	rv = uvm_map_checkprot(kernel_map, saddr, eaddr, prot);
96 	vm_map_unlock_read(kernel_map);
97 
98 	return rv;
99 }
100 
101 /*
102  * uvm_vslock: wire user memory for I/O
103  *
104  * - called from sys_sysctl
105  */
106 int
uvm_vslock(struct proc * p,caddr_t addr,size_t len,vm_prot_t access_type)107 uvm_vslock(struct proc *p, caddr_t addr, size_t len, vm_prot_t access_type)
108 {
109 	struct vm_map *map = &p->p_vmspace->vm_map;
110 	vaddr_t start, end;
111 
112 	start = trunc_page((vaddr_t)addr);
113 	end = round_page((vaddr_t)addr + len);
114 	if (end <= start)
115 		return (EINVAL);
116 
117 	return uvm_fault_wire(map, start, end, access_type);
118 }
119 
120 /*
121  * uvm_vsunlock: unwire user memory wired by uvm_vslock()
122  *
123  * - called from sys_sysctl
124  */
125 void
uvm_vsunlock(struct proc * p,caddr_t addr,size_t len)126 uvm_vsunlock(struct proc *p, caddr_t addr, size_t len)
127 {
128 	vaddr_t start, end;
129 
130 	start = trunc_page((vaddr_t)addr);
131 	end = round_page((vaddr_t)addr + len);
132 	KASSERT(end > start);
133 
134 	uvm_fault_unwire(&p->p_vmspace->vm_map, start, end);
135 }
136 
137 /*
138  * uvm_vslock_device: wire user memory, make sure it's device reachable
139  *  and bounce if necessary.
140  *
141  * - called from physio
142  */
143 int
uvm_vslock_device(struct proc * p,void * addr,size_t len,vm_prot_t access_type,void ** retp)144 uvm_vslock_device(struct proc *p, void *addr, size_t len,
145     vm_prot_t access_type, void **retp)
146 {
147 	struct vm_map *map = &p->p_vmspace->vm_map;
148 	struct vm_page *pg;
149 	struct pglist pgl;
150 	int npages;
151 	vaddr_t start, end, off;
152 	vaddr_t sva, va;
153 	vsize_t sz;
154 	int error, mapv, i;
155 
156 	start = trunc_page((vaddr_t)addr);
157 	end = round_page((vaddr_t)addr + len);
158 	sz = end - start;
159 	off = (vaddr_t)addr - start;
160 	if (end <= start)
161 		return (EINVAL);
162 
163 	vm_map_lock_read(map);
164 retry:
165 	mapv = map->timestamp;
166 	vm_map_unlock_read(map);
167 
168 	if ((error = uvm_fault_wire(map, start, end, access_type)))
169 		return (error);
170 
171 	vm_map_lock_read(map);
172 	if (mapv != map->timestamp)
173 		goto retry;
174 
175 	npages = atop(sz);
176 	for (i = 0; i < npages; i++) {
177 		paddr_t pa;
178 
179 		if (!pmap_extract(map->pmap, start + ptoa(i), &pa)) {
180 			error = EFAULT;
181 			goto out_unwire;
182 		}
183 		if (!PADDR_IS_DMA_REACHABLE(pa))
184 			break;
185 	}
186 	if (i == npages) {
187 		*retp = NULL;
188 		return (0);
189 	}
190 
191 	va = (vaddr_t)km_alloc(sz, &kv_any, &kp_none, &kd_nowait);
192 	if (va == 0) {
193 		error = ENOMEM;
194 		goto out_unwire;
195 	}
196 	sva = va;
197 
198 	TAILQ_INIT(&pgl);
199 	error = uvm_pglistalloc(npages * PAGE_SIZE, dma_constraint.ucr_low,
200 	    dma_constraint.ucr_high, 0, 0, &pgl, npages, UVM_PLA_WAITOK);
201 	if (error)
202 		goto out_unmap;
203 
204 	while ((pg = TAILQ_FIRST(&pgl)) != NULL) {
205 		TAILQ_REMOVE(&pgl, pg, pageq);
206 		pmap_kenter_pa(va, VM_PAGE_TO_PHYS(pg), PROT_READ | PROT_WRITE);
207 		va += PAGE_SIZE;
208 	}
209 	pmap_update(pmap_kernel());
210 	KASSERT(va == sva + sz);
211 	*retp = (void *)(sva + off);
212 
213 	if ((error = copyin(addr, *retp, len)) == 0)
214 		return 0;
215 
216 	uvm_km_pgremove_intrsafe(sva, sva + sz);
217 	pmap_kremove(sva, sz);
218 	pmap_update(pmap_kernel());
219 out_unmap:
220 	km_free((void *)sva, sz, &kv_any, &kp_none);
221 out_unwire:
222 	uvm_fault_unwire_locked(map, start, end);
223 	vm_map_unlock_read(map);
224 	return (error);
225 }
226 
227 /*
228  * uvm_vsunlock_device: unwire user memory wired by uvm_vslock_device()
229  *
230  * - called from physio
231  */
232 void
uvm_vsunlock_device(struct proc * p,void * addr,size_t len,void * map)233 uvm_vsunlock_device(struct proc *p, void *addr, size_t len, void *map)
234 {
235 	vaddr_t start, end;
236 	vaddr_t kva;
237 	vsize_t sz;
238 
239 	start = trunc_page((vaddr_t)addr);
240 	end = round_page((vaddr_t)addr + len);
241 	KASSERT(end > start);
242 	sz = end - start;
243 
244 	if (map)
245 		copyout(map, addr, len);
246 
247 	uvm_fault_unwire_locked(&p->p_vmspace->vm_map, start, end);
248 	vm_map_unlock_read(&p->p_vmspace->vm_map);
249 
250 	if (!map)
251 		return;
252 
253 	kva = trunc_page((vaddr_t)map);
254 	uvm_km_pgremove_intrsafe(kva, kva + sz);
255 	pmap_kremove(kva, sz);
256 	pmap_update(pmap_kernel());
257 	uvm_km_free(kernel_map, kva, sz);
258 }
259 
260 const struct kmem_va_mode kv_uarea = {
261 	.kv_map = &kernel_map,
262 	.kv_align = USPACE_ALIGN
263 };
264 
265 /*
266  * uvm_uarea_alloc: allocate the u-area for a new thread
267  */
268 vaddr_t
uvm_uarea_alloc(void)269 uvm_uarea_alloc(void)
270 {
271 	return (vaddr_t)km_alloc(USPACE, &kv_uarea, &kp_zero, &kd_waitok);
272 }
273 
274 /*
275  * uvm_uarea_free: free a dead thread's stack
276  *
277  * - the thread passed to us is a dead thread; we
278  *   are running on a different context now (the reaper).
279  */
280 void
uvm_uarea_free(struct proc * p)281 uvm_uarea_free(struct proc *p)
282 {
283 	km_free(p->p_addr, USPACE, &kv_uarea, &kp_zero);
284 	p->p_addr = NULL;
285 }
286 
287 /*
288  * uvm_exit: exit a virtual address space
289  */
290 void
uvm_exit(struct process * pr)291 uvm_exit(struct process *pr)
292 {
293 	struct vmspace *vm = pr->ps_vmspace;
294 
295 	pr->ps_vmspace = NULL;
296 	uvmspace_free(vm);
297 }
298 
299 /*
300  * uvm_init_limit: init per-process VM limits
301  *
302  * - called for process 0 and then inherited by all others.
303  */
304 void
uvm_init_limits(struct plimit * limit0)305 uvm_init_limits(struct plimit *limit0)
306 {
307 	/*
308 	 * Set up the initial limits on process VM.  Set the maximum
309 	 * resident set size to be all of (reasonably) available memory.
310 	 * This causes any single, large process to start random page
311 	 * replacement once it fills memory.
312 	 */
313 	limit0->pl_rlimit[RLIMIT_STACK].rlim_cur = DFLSSIZ;
314 	limit0->pl_rlimit[RLIMIT_STACK].rlim_max = MAXSSIZ;
315 	limit0->pl_rlimit[RLIMIT_DATA].rlim_cur = DFLDSIZ;
316 	limit0->pl_rlimit[RLIMIT_DATA].rlim_max = MAXDSIZ;
317 	limit0->pl_rlimit[RLIMIT_RSS].rlim_cur = ptoa(uvmexp.free);
318 }
319 
320 #ifdef __HAVE_PMAP_COLLECT
321 
322 #ifdef DEBUG
323 int	enableswap = 1;
324 int	swapdebug = 0;
325 #define	SDB_FOLLOW	1
326 #define SDB_SWAPIN	2
327 #define SDB_SWAPOUT	4
328 #endif
329 
330 
331 /*
332  * swapout_threads: find threads that can be swapped
333  *
334  * - called by the pagedaemon
335  * - try and swap at least one process
336  * - processes that are sleeping or stopped for maxslp or more seconds
337  *   are swapped... otherwise the longest-sleeping or stopped process
338  *   is swapped, otherwise the longest resident process...
339  */
340 int
uvm_swapout_threads(void)341 uvm_swapout_threads(void)
342 {
343 	struct process *pr;
344 	struct proc *p, *slpp;
345 	struct process *outpr;
346 	int free, outpri;
347 	int didswap = 0;
348 	extern int maxslp;
349 	/* XXXCDC: should move off to uvmexp. or uvm., also in uvm_meter */
350 
351 #ifdef DEBUG
352 	if (!enableswap)
353 		return;
354 #endif
355 
356 	free = uvmexp.free;
357 
358 	/*
359 	 * outpr/outpri  : stop/sleep process whose most active thread has
360 	 *	the largest sleeptime < maxslp
361 	 */
362 	outpr = NULL;
363 	outpri = 0;
364 	LIST_FOREACH(pr, &allprocess, ps_list) {
365 		if (pr->ps_flags & (PS_SYSTEM | PS_EXITING))
366 			continue;
367 
368 		/*
369 		 * slpp: the sleeping or stopped thread in pr with
370 		 * the smallest p_slptime
371 		 */
372 		slpp = NULL;
373 		TAILQ_FOREACH(p, &pr->ps_threads, p_thr_link) {
374 			switch (p->p_stat) {
375 			case SRUN:
376 			case SONPROC:
377 				goto next_process;
378 
379 			case SSLEEP:
380 			case SSTOP:
381 				if (slpp == NULL ||
382 				    slpp->p_slptime < p->p_slptime)
383 					slpp = p;
384 				continue;
385 			}
386 		}
387 
388 		if (slpp != NULL) {
389 			if (slpp->p_slptime >= maxslp) {
390 				pmap_collect(pr->ps_vmspace->vm_map.pmap);
391 				didswap++;
392 			} else if (slpp->p_slptime > outpri) {
393 				outpr = pr;
394 				outpri = slpp->p_slptime;
395 			}
396 		}
397 next_process:	;
398 	}
399 
400 	/*
401 	 * If we didn't get rid of any real duds, toss out the next most
402 	 * likely sleeping/stopped or running candidate.  We only do this
403 	 * if we are real low on memory since we don't gain much by doing
404 	 * it.
405 	 */
406 	if (didswap == 0 && free <= atop(round_page(USPACE)) && outpr != NULL) {
407 #ifdef DEBUG
408 		if (swapdebug & SDB_SWAPOUT)
409 			printf("swapout_threads: no duds, try procpr %p\n",
410 			    outpr);
411 #endif
412 		pmap_collect(outpr->ps_vmspace->vm_map.pmap);
413 	}
414 
415 	/*
416 	 * XXX might return a non-0 value even if pmap_collect() didn't
417 	 * free anything.
418 	 */
419 	return (uvmexp.free - free);
420 }
421 
422 #endif	/* __HAVE_PMAP_COLLECT */
423 
424 /*
425  * uvm_atopg: convert KVAs back to their page structures.
426  */
427 struct vm_page *
uvm_atopg(vaddr_t kva)428 uvm_atopg(vaddr_t kva)
429 {
430 	struct vm_page *pg;
431 	paddr_t pa;
432 	boolean_t rv;
433 
434 	rv = pmap_extract(pmap_kernel(), kva, &pa);
435 	KASSERT(rv);
436 	pg = PHYS_TO_VM_PAGE(pa);
437 	KASSERT(pg != NULL);
438 	return (pg);
439 }
440 
441 #ifndef SMALL_KERNEL
442 int
fill_vmmap(struct process * pr,struct kinfo_vmentry * kve,size_t * lenp)443 fill_vmmap(struct process *pr, struct kinfo_vmentry *kve,
444     size_t *lenp)
445 {
446 	struct vm_map *map;
447 
448 	if (pr != NULL)
449 		map = &pr->ps_vmspace->vm_map;
450 	else
451 		map = kernel_map;
452 	return uvm_map_fill_vmmap(map, kve, lenp);
453 }
454 #endif
455