xref: /openbsd/sys/uvm/uvm_glue.c (revision 8529ddd3)
1 /*	$OpenBSD: uvm_glue.c,v 1.69 2014/12/15 20:38:22 tedu Exp $	*/
2 /*	$NetBSD: uvm_glue.c,v 1.44 2001/02/06 19:54:44 eeh Exp $	*/
3 
4 /*
5  * Copyright (c) 1997 Charles D. Cranor and Washington University.
6  * Copyright (c) 1991, 1993, The Regents of the University of California.
7  *
8  * All rights reserved.
9  *
10  * This code is derived from software contributed to Berkeley by
11  * The Mach Operating System project at Carnegie-Mellon University.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  *
37  *	@(#)vm_glue.c	8.6 (Berkeley) 1/5/94
38  * from: Id: uvm_glue.c,v 1.1.2.8 1998/02/07 01:16:54 chs Exp
39  *
40  *
41  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
42  * All rights reserved.
43  *
44  * Permission to use, copy, modify and distribute this software and
45  * its documentation is hereby granted, provided that both the copyright
46  * notice and this permission notice appear in all copies of the
47  * software, derivative works or modified versions, and any portions
48  * thereof, and that both notices appear in supporting documentation.
49  *
50  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
51  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
52  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
53  *
54  * Carnegie Mellon requests users of this software to return to
55  *
56  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
57  *  School of Computer Science
58  *  Carnegie Mellon University
59  *  Pittsburgh PA 15213-3890
60  *
61  * any improvements or extensions that they make and grant Carnegie the
62  * rights to redistribute these changes.
63  */
64 
65 /*
66  * uvm_glue.c: glue functions
67  */
68 
69 #include <sys/param.h>
70 #include <sys/systm.h>
71 #include <sys/proc.h>
72 #include <sys/resourcevar.h>
73 #include <sys/buf.h>
74 #include <sys/user.h>
75 #ifdef SYSVSHM
76 #include <sys/shm.h>
77 #endif
78 #include <sys/sched.h>
79 
80 #include <uvm/uvm.h>
81 
82 /*
83  * uvm_kernacc: can the kernel access a region of memory
84  *
85  * - called from malloc [DIAGNOSTIC], and /dev/kmem driver (mem.c)
86  */
87 boolean_t
88 uvm_kernacc(caddr_t addr, size_t len, int rw)
89 {
90 	boolean_t rv;
91 	vaddr_t saddr, eaddr;
92 	vm_prot_t prot = rw == B_READ ? PROT_READ : PROT_WRITE;
93 
94 	saddr = trunc_page((vaddr_t)addr);
95 	eaddr = round_page((vaddr_t)addr + len);
96 	vm_map_lock_read(kernel_map);
97 	rv = uvm_map_checkprot(kernel_map, saddr, eaddr, prot);
98 	vm_map_unlock_read(kernel_map);
99 
100 	return(rv);
101 }
102 
103 #ifdef KGDB
104 /*
105  * Change protections on kernel pages from addr to addr+len
106  * (presumably so debugger can plant a breakpoint).
107  *
108  * We force the protection change at the pmap level.  If we were
109  * to use vm_map_protect a change to allow writing would be lazily-
110  * applied meaning we would still take a protection fault, something
111  * we really don't want to do.  It would also fragment the kernel
112  * map unnecessarily.  We cannot use pmap_protect since it also won't
113  * enforce a write-enable request.  Using pmap_enter is the only way
114  * we can ensure the change takes place properly.
115  */
116 void
117 uvm_chgkprot(caddr_t addr, size_t len, int rw)
118 {
119 	vm_prot_t prot;
120 	paddr_t pa;
121 	vaddr_t sva, eva;
122 
123 	prot = rw == B_READ ? PROT_READ : PROT_READ | PROT_WRITE;
124 	eva = round_page((vaddr_t)addr + len);
125 	for (sva = trunc_page((vaddr_t)addr); sva < eva; sva += PAGE_SIZE) {
126 		/*
127 		 * Extract physical address for the page.
128 		 * We use a cheezy hack to differentiate physical
129 		 * page 0 from an invalid mapping, not that it
130 		 * really matters...
131 		 */
132 		if (pmap_extract(pmap_kernel(), sva, &pa) == FALSE)
133 			panic("chgkprot: invalid page");
134 		pmap_enter(pmap_kernel(), sva, pa, prot, PMAP_WIRED);
135 	}
136 	pmap_update(pmap_kernel());
137 }
138 #endif
139 
140 /*
141  * uvm_vslock: wire user memory for I/O
142  *
143  * - called from physio and sys___sysctl
144  */
145 
146 int
147 uvm_vslock(struct proc *p, caddr_t addr, size_t len, vm_prot_t access_type)
148 {
149 	struct vm_map *map;
150 	vaddr_t start, end;
151 	int rv;
152 
153 	map = &p->p_vmspace->vm_map;
154 	start = trunc_page((vaddr_t)addr);
155 	end = round_page((vaddr_t)addr + len);
156 	if (end <= start)
157 		return (EINVAL);
158 
159 	rv = uvm_fault_wire(map, start, end, access_type);
160 
161 	return (rv);
162 }
163 
164 /*
165  * uvm_vsunlock: unwire user memory wired by uvm_vslock()
166  *
167  * - called from physio and sys___sysctl
168  */
169 
170 void
171 uvm_vsunlock(struct proc *p, caddr_t addr, size_t len)
172 {
173 	vaddr_t start, end;
174 
175 	start = trunc_page((vaddr_t)addr);
176 	end = round_page((vaddr_t)addr + len);
177 	if (end <= start)
178 		return;
179 
180 	uvm_fault_unwire(&p->p_vmspace->vm_map, start, end);
181 }
182 
183 /*
184  * uvm_vslock_device: wire user memory, make sure it's device reachable
185  *  and bounce if necessary.
186  * Always bounces for now.
187  */
188 int
189 uvm_vslock_device(struct proc *p, void *addr, size_t len,
190     vm_prot_t access_type, void **retp)
191 {
192 	struct vm_page *pg;
193 	struct pglist pgl;
194 	int npages;
195 	vaddr_t start, end, off;
196 	vaddr_t sva, va;
197 	vsize_t sz;
198 	int error, i;
199 
200 	start = trunc_page((vaddr_t)addr);
201 	end = round_page((vaddr_t)addr + len);
202 	sz = end - start;
203 	off = (vaddr_t)addr - start;
204 	if (end <= start)
205 		return (EINVAL);
206 
207 	if ((error = uvm_fault_wire(&p->p_vmspace->vm_map, start, end,
208 	    access_type))) {
209 		return (error);
210 	}
211 
212 	npages = atop(sz);
213 	for (i = 0; i < npages; i++) {
214 		paddr_t pa;
215 
216 		if (!pmap_extract(p->p_vmspace->vm_map.pmap,
217 		    start + ptoa(i), &pa)) {
218 			error = EFAULT;
219 			goto out_unwire;
220 		}
221 		if (!PADDR_IS_DMA_REACHABLE(pa))
222 			break;
223 	}
224 	if (i == npages) {
225 		*retp = NULL;
226 		return (0);
227 	}
228 
229 	if ((va = uvm_km_valloc(kernel_map, sz)) == 0) {
230 		error = ENOMEM;
231 		goto out_unwire;
232 	}
233 	sva = va;
234 
235 	TAILQ_INIT(&pgl);
236 	error = uvm_pglistalloc(npages * PAGE_SIZE, dma_constraint.ucr_low,
237 	    dma_constraint.ucr_high, 0, 0, &pgl, npages, UVM_PLA_WAITOK);
238 	if (error)
239 		goto out_unmap;
240 
241 	while ((pg = TAILQ_FIRST(&pgl)) != NULL) {
242 		TAILQ_REMOVE(&pgl, pg, pageq);
243 		pmap_kenter_pa(va, VM_PAGE_TO_PHYS(pg), PROT_READ | PROT_WRITE);
244 		va += PAGE_SIZE;
245 	}
246 	pmap_update(pmap_kernel());
247 	KASSERT(va == sva + sz);
248 	*retp = (void *)(sva + off);
249 
250 	if ((error = copyin(addr, *retp, len)) == 0)
251 		return 0;
252 
253 	uvm_km_pgremove_intrsafe(sva, sva + sz);
254 	pmap_kremove(sva, sz);
255 	pmap_update(pmap_kernel());
256 out_unmap:
257 	uvm_km_free(kernel_map, sva, sz);
258 out_unwire:
259 	uvm_fault_unwire(&p->p_vmspace->vm_map, start, end);
260 	return (error);
261 }
262 
263 void
264 uvm_vsunlock_device(struct proc *p, void *addr, size_t len, void *map)
265 {
266 	vaddr_t start, end;
267 	vaddr_t kva;
268 	vsize_t sz;
269 
270 	start = trunc_page((vaddr_t)addr);
271 	end = round_page((vaddr_t)addr + len);
272 	sz = end - start;
273 	if (end <= start)
274 		return;
275 
276 	if (map)
277 		copyout(map, addr, len);
278 	uvm_fault_unwire(&p->p_vmspace->vm_map, start, end);
279 
280 	if (!map)
281 		return;
282 
283 	kva = trunc_page((vaddr_t)map);
284 	uvm_km_pgremove_intrsafe(kva, kva + sz);
285 	pmap_kremove(kva, sz);
286 	pmap_update(pmap_kernel());
287 	uvm_km_free(kernel_map, kva, sz);
288 }
289 
290 /*
291  * uvm_uarea_alloc: allocate the u-area for a new thread
292  */
293 vaddr_t
294 uvm_uarea_alloc(void)
295 {
296 	vaddr_t uaddr;
297 
298 	uaddr = uvm_km_kmemalloc_pla(kernel_map, uvm.kernel_object, USPACE,
299 	    USPACE_ALIGN, UVM_KMF_ZERO,
300 	    no_constraint.ucr_low, no_constraint.ucr_high,
301 	    0, 0, USPACE/PAGE_SIZE);
302 
303 #ifdef PMAP_UAREA
304 	/* Tell the pmap this is a u-area mapping */
305 	if (uaddr != 0)
306 		PMAP_UAREA(uaddr);
307 #endif
308 
309 	return (uaddr);
310 }
311 
312 /*
313  * uvm_uarea_free: free a dead thread's stack
314  *
315  * - the thread passed to us is a dead thread; we
316  *   are running on a different context now (the reaper).
317  */
318 void
319 uvm_uarea_free(struct proc *p)
320 {
321 	uvm_km_free(kernel_map, (vaddr_t)p->p_addr, USPACE);
322 	p->p_addr = NULL;
323 }
324 
325 /*
326  * uvm_exit: exit a virtual address space
327  */
328 void
329 uvm_exit(struct process *pr)
330 {
331 	uvmspace_free(pr->ps_vmspace);
332 	pr->ps_vmspace = NULL;
333 }
334 
335 /*
336  * uvm_init_limit: init per-process VM limits
337  *
338  * - called for process 0 and then inherited by all others.
339  */
340 void
341 uvm_init_limits(struct proc *p)
342 {
343 
344 	/*
345 	 * Set up the initial limits on process VM.  Set the maximum
346 	 * resident set size to be all of (reasonably) available memory.
347 	 * This causes any single, large process to start random page
348 	 * replacement once it fills memory.
349 	 */
350 	p->p_rlimit[RLIMIT_STACK].rlim_cur = DFLSSIZ;
351 	p->p_rlimit[RLIMIT_STACK].rlim_max = MAXSSIZ;
352 	p->p_rlimit[RLIMIT_DATA].rlim_cur = DFLDSIZ;
353 	p->p_rlimit[RLIMIT_DATA].rlim_max = MAXDSIZ;
354 	p->p_rlimit[RLIMIT_RSS].rlim_cur = ptoa(uvmexp.free);
355 }
356 
357 #ifdef DEBUG
358 int	enableswap = 1;
359 int	swapdebug = 0;
360 #define	SDB_FOLLOW	1
361 #define SDB_SWAPIN	2
362 #define SDB_SWAPOUT	4
363 #endif
364 
365 
366 /*
367  * swapout_threads: find threads that can be swapped
368  *
369  * - called by the pagedaemon
370  * - try and swap at least one processs
371  * - processes that are sleeping or stopped for maxslp or more seconds
372  *   are swapped... otherwise the longest-sleeping or stopped process
373  *   is swapped, otherwise the longest resident process...
374  */
375 void
376 uvm_swapout_threads(void)
377 {
378 	struct process *pr;
379 	struct proc *p, *slpp;
380 	struct process *outpr;
381 	int outpri;
382 	int didswap = 0;
383 	extern int maxslp;
384 	/* XXXCDC: should move off to uvmexp. or uvm., also in uvm_meter */
385 
386 #ifdef DEBUG
387 	if (!enableswap)
388 		return;
389 #endif
390 
391 	/*
392 	 * outpr/outpri  : stop/sleep process whose most active thread has
393 	 *	the largest sleeptime < maxslp
394 	 */
395 	outpr = NULL;
396 	outpri = 0;
397 	LIST_FOREACH(pr, &allprocess, ps_list) {
398 		if (pr->ps_flags & (PS_SYSTEM | PS_EXITING))
399 			continue;
400 
401 		/*
402 		 * slpp: the sleeping or stopped thread in pr with
403 		 * the smallest p_slptime
404 		 */
405 		slpp = NULL;
406 		TAILQ_FOREACH(p, &pr->ps_threads, p_thr_link) {
407 			switch (p->p_stat) {
408 			case SRUN:
409 			case SONPROC:
410 				goto next_process;
411 
412 			case SSLEEP:
413 			case SSTOP:
414 				if (slpp == NULL ||
415 				    slpp->p_slptime < p->p_slptime)
416 					slpp = p;
417 				continue;
418 			}
419 		}
420 
421 		if (slpp != NULL) {
422 			if (slpp->p_slptime >= maxslp) {
423 				pmap_collect(pr->ps_vmspace->vm_map.pmap);
424 				didswap++;
425 			} else if (slpp->p_slptime > outpri) {
426 				outpr = pr;
427 				outpri = slpp->p_slptime;
428 			}
429 		}
430 next_process:	;
431 	}
432 
433 	/*
434 	 * If we didn't get rid of any real duds, toss out the next most
435 	 * likely sleeping/stopped or running candidate.  We only do this
436 	 * if we are real low on memory since we don't gain much by doing
437 	 * it.
438 	 */
439 	if (didswap == 0 && uvmexp.free <= atop(round_page(USPACE)) &&
440 	    outpr != NULL) {
441 #ifdef DEBUG
442 		if (swapdebug & SDB_SWAPOUT)
443 			printf("swapout_threads: no duds, try procpr %p\n",
444 			    outpr);
445 #endif
446 		pmap_collect(outpr->ps_vmspace->vm_map.pmap);
447 	}
448 }
449 
450 /*
451  * uvm_atopg: convert KVAs back to their page structures.
452  */
453 struct vm_page *
454 uvm_atopg(vaddr_t kva)
455 {
456 	struct vm_page *pg;
457 	paddr_t pa;
458 	boolean_t rv;
459 
460 	rv = pmap_extract(pmap_kernel(), kva, &pa);
461 	KASSERT(rv);
462 	pg = PHYS_TO_VM_PAGE(pa);
463 	KASSERT(pg != NULL);
464 	return (pg);
465 }
466 
467 void
468 uvm_pause(void)
469 {
470 	static unsigned int toggle;
471 	if (toggle++ > 128) {
472 		toggle = 0;
473 		KERNEL_UNLOCK();
474 		KERNEL_LOCK();
475 	}
476 	if (curcpu()->ci_schedstate.spc_schedflags & SPCF_SHOULDYIELD)
477 		preempt(NULL);
478 }
479 
480 #ifndef SMALL_KERNEL
481 int
482 fill_vmmap(struct process *pr, struct kinfo_vmentry *kve,
483     size_t *lenp)
484 {
485 	struct vm_map *map;
486 
487 	if (pr != NULL)
488 		map = &pr->ps_vmspace->vm_map;
489 	else
490 		map = kernel_map;
491 	return uvm_map_fill_vmmap(map, kve, lenp);
492 }
493 #endif
494