xref: /netbsd/sys/uvm/uvm_loan.c (revision f77127b1)
1 /*	$NetBSD: uvm_loan.c,v 1.104 2020/06/11 22:21:05 ad Exp $	*/
2 
3 /*
4  * Copyright (c) 1997 Charles D. Cranor and Washington University.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  *
27  * from: Id: uvm_loan.c,v 1.1.6.4 1998/02/06 05:08:43 chs Exp
28  */
29 
30 /*
31  * uvm_loan.c: page loanout handler
32  */
33 
34 #include <sys/cdefs.h>
35 __KERNEL_RCSID(0, "$NetBSD: uvm_loan.c,v 1.104 2020/06/11 22:21:05 ad Exp $");
36 
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/kernel.h>
40 #include <sys/mman.h>
41 
42 #include <uvm/uvm.h>
43 
44 #ifdef UVMHIST
45 UVMHIST_DEFINE(loanhist);
46 #endif
47 
48 /*
49  * "loaned" pages are pages which are (read-only, copy-on-write) loaned
50  * from the VM system to other parts of the kernel.   this allows page
51  * copying to be avoided (e.g. you can loan pages from objs/anons to
52  * the mbuf system).
53  *
54  * there are 3 types of loans possible:
55  *  O->K  uvm_object page to wired kernel page (e.g. mbuf data area)
56  *  A->K  anon page to wired kernel page (e.g. mbuf data area)
57  *  O->A  uvm_object to anon loan (e.g. vnode page to an anon)
58  * note that it possible to have an O page loaned to both an A and K
59  * at the same time.
60  *
61  * loans are tracked by pg->loan_count.  an O->A page will have both
62  * a uvm_object and a vm_anon, but PG_ANON will not be set.   this sort
63  * of page is considered "owned" by the uvm_object (not the anon).
64  *
65  * each loan of a page to the kernel bumps the pg->wire_count.  the
66  * kernel mappings for these pages will be read-only and wired.  since
67  * the page will also be wired, it will not be a candidate for pageout,
68  * and thus will never be pmap_page_protect()'d with VM_PROT_NONE.  a
69  * write fault in the kernel to one of these pages will not cause
70  * copy-on-write.  instead, the page fault is considered fatal.  this
71  * is because the kernel mapping will have no way to look up the
72  * object/anon which the page is owned by.  this is a good side-effect,
73  * since a kernel write to a loaned page is an error.
74  *
75  * owners that want to free their pages and discover that they are
76  * loaned out simply "disown" them (the page becomes an orphan).  these
77  * pages should be freed when the last loan is dropped.   in some cases
78  * an anon may "adopt" an orphaned page.
79  *
80  * locking: to read pg->loan_count either the owner or pg->interlock
81  * must be locked.   to modify pg->loan_count, both the owner of the page
82  * and pg->interlock must be locked.   pg->flags is (as always) locked by
83  * the owner of the page.
84  *
85  * note that locking from the "loaned" side is tricky since the object
86  * getting the loaned page has no reference to the page's owner and thus
87  * the owner could "die" at any time.   in order to prevent the owner
88  * from dying pg->interlock should be locked.   this forces us to sometimes
89  * use "try" locking.
90  *
91  * loans are typically broken by the following events:
92  *  1. user-level xwrite fault to a loaned page
93  *  2. pageout of clean+inactive O->A loaned page
94  *  3. owner frees page (e.g. pager flush)
95  *
96  * note that loaning a page causes all mappings of the page to become
97  * read-only (via pmap_page_protect).   this could have an unexpected
98  * effect on normal "wired" pages if one is not careful (XXX).
99  */
100 
101 /*
102  * local prototypes
103  */
104 
105 static int	uvm_loananon(struct uvm_faultinfo *, void ***,
106 			     int, struct vm_anon *);
107 static int	uvm_loanuobj(struct uvm_faultinfo *, void ***,
108 			     int, vaddr_t);
109 static int	uvm_loanzero(struct uvm_faultinfo *, void ***, int);
110 static void	uvm_unloananon(struct vm_anon **, int);
111 static void	uvm_unloanpage(struct vm_page **, int);
112 static int	uvm_loanpage(struct vm_page **, int, bool);
113 
114 
115 /*
116  * inlines
117  */
118 
119 /*
120  * uvm_loanentry: loan out pages in a map entry (helper fn for uvm_loan())
121  *
122  * => "ufi" is the result of a successful map lookup (meaning that
123  *	on entry the map is locked by the caller)
124  * => we may unlock and then relock the map if needed (for I/O)
125  * => we put our output result in "output"
126  * => we always return with the map unlocked
127  * => possible return values:
128  *	-1 == error, map is unlocked
129  *	 0 == map relock error (try again!), map is unlocked
130  *	>0 == number of pages we loaned, map is unlocked
131  *
132  * NOTE: We can live with this being an inline, because it is only called
133  * from one place.
134  */
135 
136 static inline int
uvm_loanentry(struct uvm_faultinfo * ufi,void *** output,int flags)137 uvm_loanentry(struct uvm_faultinfo *ufi, void ***output, int flags)
138 {
139 	vaddr_t curaddr = ufi->orig_rvaddr;
140 	vsize_t togo = ufi->size;
141 	struct vm_aref *aref = &ufi->entry->aref;
142 	struct uvm_object *uobj = ufi->entry->object.uvm_obj;
143 	struct vm_anon *anon;
144 	int rv, result = 0;
145 
146 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
147 
148 	/*
149 	 * lock us the rest of the way down (we unlock before return)
150 	 */
151 	if (aref->ar_amap) {
152 		amap_lock(aref->ar_amap, RW_WRITER);
153 	}
154 
155 	/*
156 	 * loop until done
157 	 */
158 	while (togo) {
159 
160 		/*
161 		 * find the page we want.   check the anon layer first.
162 		 */
163 
164 		if (aref->ar_amap) {
165 			anon = amap_lookup(aref, curaddr - ufi->entry->start);
166 		} else {
167 			anon = NULL;
168 		}
169 
170 		/* locked: map, amap, uobj */
171 		if (anon) {
172 			rv = uvm_loananon(ufi, output, flags, anon);
173 		} else if (uobj) {
174 			rv = uvm_loanuobj(ufi, output, flags, curaddr);
175 		} else if (UVM_ET_ISCOPYONWRITE(ufi->entry)) {
176 			rv = uvm_loanzero(ufi, output, flags);
177 		} else {
178 			uvmfault_unlockall(ufi, aref->ar_amap, uobj);
179 			rv = -1;
180 		}
181 		/* locked: if (rv > 0) => map, amap, uobj  [o.w. unlocked] */
182 		KASSERT(rv > 0 || aref->ar_amap == NULL ||
183 		    !rw_write_held(aref->ar_amap->am_lock));
184 		KASSERT(rv > 0 || uobj == NULL ||
185 		    !rw_write_held(uobj->vmobjlock));
186 
187 		/* total failure */
188 		if (rv < 0) {
189 			UVMHIST_LOG(loanhist, "failure %jd", rv, 0,0,0);
190 			return (-1);
191 		}
192 
193 		/* relock failed, need to do another lookup */
194 		if (rv == 0) {
195 			UVMHIST_LOG(loanhist, "relock failure %jd", result
196 			    ,0,0,0);
197 			return (result);
198 		}
199 
200 		/*
201 		 * got it... advance to next page
202 		 */
203 
204 		result++;
205 		togo -= PAGE_SIZE;
206 		curaddr += PAGE_SIZE;
207 	}
208 
209 	/*
210 	 * unlock what we locked, unlock the maps and return
211 	 */
212 
213 	if (aref->ar_amap) {
214 		amap_unlock(aref->ar_amap);
215 	}
216 	uvmfault_unlockmaps(ufi, false);
217 	UVMHIST_LOG(loanhist, "done %jd", result, 0,0,0);
218 	return (result);
219 }
220 
221 /*
222  * normal functions
223  */
224 
225 /*
226  * uvm_loan: loan pages in a map out to anons or to the kernel
227  *
228  * => map should be unlocked
229  * => start and len should be multiples of PAGE_SIZE
230  * => result is either an array of anon's or vm_pages (depending on flags)
231  * => flag values: UVM_LOAN_TOANON - loan to anons
232  *                 UVM_LOAN_TOPAGE - loan to wired kernel page
233  *    one and only one of these flags must be set!
234  * => returns 0 (success), or an appropriate error number
235  */
236 
237 int
uvm_loan(struct vm_map * map,vaddr_t start,vsize_t len,void * v,int flags)238 uvm_loan(struct vm_map *map, vaddr_t start, vsize_t len, void *v, int flags)
239 {
240 	struct uvm_faultinfo ufi;
241 	void **result, **output;
242 	int rv, error;
243 
244 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
245 
246 	/*
247 	 * ensure that one and only one of the flags is set
248 	 */
249 
250 	KASSERT(((flags & UVM_LOAN_TOANON) == 0) ^
251 		((flags & UVM_LOAN_TOPAGE) == 0));
252 
253 	/*
254 	 * "output" is a pointer to the current place to put the loaned page.
255 	 */
256 
257 	result = v;
258 	output = &result[0];	/* start at the beginning ... */
259 
260 	/*
261 	 * while we've got pages to do
262 	 */
263 
264 	while (len > 0) {
265 
266 		/*
267 		 * fill in params for a call to uvmfault_lookup
268 		 */
269 
270 		ufi.orig_map = map;
271 		ufi.orig_rvaddr = start;
272 		ufi.orig_size = len;
273 
274 		/*
275 		 * do the lookup, the only time this will fail is if we hit on
276 		 * an unmapped region (an error)
277 		 */
278 
279 		if (!uvmfault_lookup(&ufi, false)) {
280 			error = ENOENT;
281 			goto fail;
282 		}
283 
284 		/*
285 		 * map now locked.  now do the loanout...
286 		 */
287 
288 		rv = uvm_loanentry(&ufi, &output, flags);
289 		if (rv < 0) {
290 			/* all unlocked due to error */
291 			error = EINVAL;
292 			goto fail;
293 		}
294 
295 		/*
296 		 * done!  the map is unlocked.  advance, if possible.
297 		 *
298 		 * XXXCDC: could be recoded to hold the map lock with
299 		 *	   smarter code (but it only happens on map entry
300 		 *	   boundaries, so it isn't that bad).
301 		 */
302 
303 		if (rv) {
304 			rv <<= PAGE_SHIFT;
305 			len -= rv;
306 			start += rv;
307 		}
308 	}
309 	UVMHIST_LOG(loanhist, "success", 0,0,0,0);
310 	return 0;
311 
312 fail:
313 	/*
314 	 * failed to complete loans.  drop any loans and return failure code.
315 	 * map is already unlocked.
316 	 */
317 
318 	if (output - result) {
319 		if (flags & UVM_LOAN_TOANON) {
320 			uvm_unloananon((struct vm_anon **)result,
321 			    output - result);
322 		} else {
323 			uvm_unloanpage((struct vm_page **)result,
324 			    output - result);
325 		}
326 	}
327 	UVMHIST_LOG(loanhist, "error %jd", error,0,0,0);
328 	return (error);
329 }
330 
331 /*
332  * uvm_loananon: loan a page from an anon out
333  *
334  * => called with map, amap, uobj locked
335  * => return value:
336  *	-1 = fatal error, everything is unlocked, abort.
337  *	 0 = lookup in ufi went stale, everything unlocked, relookup and
338  *		try again
339  *	 1 = got it, everything still locked
340  */
341 
342 int
uvm_loananon(struct uvm_faultinfo * ufi,void *** output,int flags,struct vm_anon * anon)343 uvm_loananon(struct uvm_faultinfo *ufi, void ***output, int flags,
344     struct vm_anon *anon)
345 {
346 	struct vm_page *pg;
347 	int error;
348 
349 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
350 
351 	/*
352 	 * if we are loaning to "another" anon then it is easy, we just
353 	 * bump the reference count on the current anon and return a
354 	 * pointer to it (it becomes copy-on-write shared).
355 	 */
356 
357 	if (flags & UVM_LOAN_TOANON) {
358 		KASSERT(rw_write_held(anon->an_lock));
359 		pg = anon->an_page;
360 		if (pg && (pg->flags & PG_ANON) != 0 && anon->an_ref == 1) {
361 			if (pg->wire_count > 0) {
362 				UVMHIST_LOG(loanhist, "->A wired %#jx",
363 				    (uintptr_t)pg, 0, 0, 0);
364 				uvmfault_unlockall(ufi,
365 				    ufi->entry->aref.ar_amap,
366 				    ufi->entry->object.uvm_obj);
367 				return (-1);
368 			}
369 			pmap_page_protect(pg, VM_PROT_READ);
370 		}
371 		anon->an_ref++;
372 		**output = anon;
373 		(*output)++;
374 		UVMHIST_LOG(loanhist, "->A done", 0,0,0,0);
375 		return (1);
376 	}
377 
378 	/*
379 	 * we are loaning to a kernel-page.   we need to get the page
380 	 * resident so we can wire it.   uvmfault_anonget will handle
381 	 * this for us.
382 	 */
383 
384 	KASSERT(rw_write_held(anon->an_lock));
385 	error = uvmfault_anonget(ufi, ufi->entry->aref.ar_amap, anon);
386 
387 	/*
388 	 * if we were unable to get the anon, then uvmfault_anonget has
389 	 * unlocked everything and returned an error code.
390 	 */
391 
392 	if (error) {
393 		UVMHIST_LOG(loanhist, "error %jd", error,0,0,0);
394 		KASSERT(error != ENOLCK);
395 
396 		/* need to refault (i.e. refresh our lookup) ? */
397 		if (error == ERESTART) {
398 			return (0);
399 		}
400 
401 		/* "try again"?   sleep a bit and retry ... */
402 		if (error == EAGAIN) {
403 			kpause("loanagain", false, hz/2, NULL);
404 			return (0);
405 		}
406 
407 		/* otherwise flag it as an error */
408 		return (-1);
409 	}
410 
411 	/*
412 	 * we have the page and its owner locked: do the loan now.
413 	 */
414 
415 	pg = anon->an_page;
416 	if (pg->wire_count > 0) {
417 		UVMHIST_LOG(loanhist, "->K wired %#jx", (uintptr_t)pg, 0, 0, 0);
418 		KASSERT(pg->uobject == NULL);
419 		uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, NULL);
420 		return (-1);
421 	}
422 	if (pg->loan_count == 0) {
423 		pmap_page_protect(pg, VM_PROT_READ);
424 	}
425 	uvm_pagelock(pg);
426 	pg->loan_count++;
427 	KASSERT(pg->loan_count > 0);	/* detect wrap-around */
428 	uvm_pageactivate(pg);
429 	uvm_pageunlock(pg);
430 	**output = pg;
431 	(*output)++;
432 
433 	/* unlock and return success */
434 	if (pg->uobject)
435 		rw_exit(pg->uobject->vmobjlock);
436 	UVMHIST_LOG(loanhist, "->K done", 0,0,0,0);
437 	return (1);
438 }
439 
440 /*
441  * uvm_loanpage: loan out pages to kernel (->K)
442  *
443  * => pages should be object-owned and the object should be locked.
444  * => in the case of error, the object might be unlocked and relocked.
445  * => pages will be unbusied (if busied is true).
446  * => fail with EBUSY if meet a wired page.
447  */
448 static int
uvm_loanpage(struct vm_page ** pgpp,int npages,bool busied)449 uvm_loanpage(struct vm_page **pgpp, int npages, bool busied)
450 {
451 	int i;
452 	int error = 0;
453 
454 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
455 
456 	for (i = 0; i < npages; i++) {
457 		struct vm_page *pg = pgpp[i];
458 
459 		KASSERT(pg->uobject != NULL);
460 		KASSERT(pg->uobject == pgpp[0]->uobject);
461 		KASSERT(!(pg->flags & (PG_RELEASED|PG_PAGEOUT)));
462 		KASSERT(rw_write_held(pg->uobject->vmobjlock));
463 		KASSERT(busied == ((pg->flags & PG_BUSY) != 0));
464 
465 		if (pg->wire_count > 0) {
466 			UVMHIST_LOG(loanhist, "wired %#jx", (uintptr_t)pg,
467 			    0, 0, 0);
468 			error = EBUSY;
469 			break;
470 		}
471 		if (pg->loan_count == 0) {
472 			pmap_page_protect(pg, VM_PROT_READ);
473 		}
474 		uvm_pagelock(pg);
475 		pg->loan_count++;
476 		KASSERT(pg->loan_count > 0);	/* detect wrap-around */
477 		uvm_pageactivate(pg);
478 		uvm_pageunlock(pg);
479 	}
480 
481 	if (busied) {
482 		uvm_page_unbusy(pgpp, npages);
483 	}
484 
485 	if (error) {
486 		/*
487 		 * backout what we've done
488 		 */
489 		krwlock_t *slock = pgpp[0]->uobject->vmobjlock;
490 
491 		rw_exit(slock);
492 		uvm_unloan(pgpp, i, UVM_LOAN_TOPAGE);
493 		rw_enter(slock, RW_WRITER);
494 	}
495 
496 	UVMHIST_LOG(loanhist, "done %jd", error, 0, 0, 0);
497 	return error;
498 }
499 
500 /*
501  * XXX UBC temp limit
502  * number of pages to get at once.
503  * should be <= MAX_READ_AHEAD in genfs_vnops.c
504  */
505 #define	UVM_LOAN_GET_CHUNK	16
506 
507 /*
508  * uvm_loanuobjchunk: helper for uvm_loanuobjpages()
509  */
510 static int
uvm_loanuobjchunk(struct uvm_object * uobj,voff_t pgoff,int orignpages,struct vm_page ** pgpp)511 uvm_loanuobjchunk(struct uvm_object *uobj, voff_t pgoff, int orignpages,
512     struct vm_page **pgpp)
513 {
514 	int error, npages;
515 
516 	rw_enter(uobj->vmobjlock, RW_WRITER);
517  reget:
518  	npages = orignpages;
519 	error = (*uobj->pgops->pgo_get)(uobj, pgoff, pgpp, &npages, 0,
520 	    VM_PROT_READ, 0, PGO_SYNCIO);
521 	switch (error) {
522 	case 0:
523 		KASSERT(npages == orignpages);
524 
525 		/* check for released pages */
526 		rw_enter(uobj->vmobjlock, RW_WRITER);
527 		for (int i = 0; i < npages; i++) {
528 			KASSERT(pgpp[i]->uobject->vmobjlock == uobj->vmobjlock);
529 			if ((pgpp[i]->flags & PG_RELEASED) != 0) {
530 				/*
531 				 * release pages and try again.
532 				 */
533 				uvm_page_unbusy(pgpp, npages);
534 				goto reget;
535 			}
536 		}
537 
538 		/* loan out pages.  they will be unbusied whatever happens. */
539 		error = uvm_loanpage(pgpp, npages, true);
540 		rw_exit(uobj->vmobjlock);
541 		if (error != 0) {
542 			memset(pgpp, 0, sizeof(pgpp[0]) * npages);
543 		}
544 		return error;
545 
546 	case EAGAIN:
547 		kpause("loanuopg", false, hz/2, NULL);
548 		rw_enter(uobj->vmobjlock, RW_WRITER);
549 		goto reget;
550 
551 	default:
552 		return error;
553 	}
554 }
555 
556 /*
557  * uvm_loanuobjpages: loan pages from a uobj out (O->K)
558  *
559  * => uobj shouldn't be locked.  (we'll lock it)
560  * => fail with EBUSY if we meet a wired page.
561  */
562 int
uvm_loanuobjpages(struct uvm_object * uobj,voff_t pgoff,int npages,struct vm_page ** pgpp)563 uvm_loanuobjpages(struct uvm_object *uobj, voff_t pgoff, int npages,
564     struct vm_page **pgpp)
565 {
566 	int ndone, error, chunk;
567 
568 	KASSERT(npages > 0);
569 
570 	memset(pgpp, 0, sizeof(pgpp[0]) * npages);
571 	for (ndone = 0; ndone < npages; ndone += chunk) {
572 		chunk = MIN(UVM_LOAN_GET_CHUNK, npages - ndone);
573 		error = uvm_loanuobjchunk(uobj, pgoff + (ndone << PAGE_SHIFT),
574 		    chunk, pgpp + ndone);
575 		if (error != 0) {
576 			if (ndone != 0) {
577 				uvm_unloan(pgpp, ndone, UVM_LOAN_TOPAGE);
578 			}
579 			break;
580 		}
581 	}
582 
583 	return error;
584 }
585 
586 /*
587  * uvm_loanuobj: loan a page from a uobj out
588  *
589  * => called with map, amap, uobj locked
590  * => return value:
591  *	-1 = fatal error, everything is unlocked, abort.
592  *	 0 = lookup in ufi went stale, everything unlocked, relookup and
593  *		try again
594  *	 1 = got it, everything still locked
595  */
596 
597 static int
uvm_loanuobj(struct uvm_faultinfo * ufi,void *** output,int flags,vaddr_t va)598 uvm_loanuobj(struct uvm_faultinfo *ufi, void ***output, int flags, vaddr_t va)
599 {
600 	struct vm_amap *amap = ufi->entry->aref.ar_amap;
601 	struct uvm_object *uobj = ufi->entry->object.uvm_obj;
602 	struct vm_page *pg;
603 	int error, npages;
604 	bool locked;
605 
606 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
607 
608 	/*
609 	 * first we must make sure the page is resident.
610 	 *
611 	 * XXXCDC: duplicate code with uvm_fault().
612 	 */
613 
614 	/* locked: maps(read), amap(if there) */
615 	rw_enter(uobj->vmobjlock, RW_WRITER);
616 	/* locked: maps(read), amap(if there), uobj */
617 
618 	if (uobj->pgops->pgo_get) {	/* try locked pgo_get */
619 		npages = 1;
620 		pg = NULL;
621 		error = (*uobj->pgops->pgo_get)(uobj,
622 		    va - ufi->entry->start + ufi->entry->offset,
623 		    &pg, &npages, 0, VM_PROT_READ, MADV_NORMAL, PGO_LOCKED);
624 	} else {
625 		error = EIO;		/* must have pgo_get op */
626 	}
627 
628 	/*
629 	 * check the result of the locked pgo_get.  if there is a problem,
630 	 * then we fail the loan.
631 	 */
632 
633 	if (error && error != EBUSY) {
634 		uvmfault_unlockall(ufi, amap, uobj);
635 		return (-1);
636 	}
637 
638 	/*
639 	 * if we need to unlock for I/O, do so now.
640 	 */
641 
642 	if (error == EBUSY) {
643 		uvmfault_unlockall(ufi, amap, NULL);
644 
645 		/* locked: uobj */
646 		npages = 1;
647 		error = (*uobj->pgops->pgo_get)(uobj,
648 		    va - ufi->entry->start + ufi->entry->offset,
649 		    &pg, &npages, 0, VM_PROT_READ, MADV_NORMAL, PGO_SYNCIO);
650 		/* locked: <nothing> */
651 
652 		if (error) {
653 			if (error == EAGAIN) {
654 				kpause("fltagain2", false, hz/2, NULL);
655 				return (0);
656 			}
657 			return (-1);
658 		}
659 
660 		/*
661 		 * pgo_get was a success.   attempt to relock everything.
662 		 */
663 
664 		locked = uvmfault_relock(ufi);
665 		if (locked && amap)
666 			amap_lock(amap, RW_WRITER);
667 		uobj = pg->uobject;
668 		rw_enter(uobj->vmobjlock, RW_WRITER);
669 
670 		/*
671 		 * verify that the page has not be released and re-verify
672 		 * that amap slot is still free.   if there is a problem we
673 		 * drop our lock (thus force a lookup refresh/retry).
674 		 */
675 
676 		if ((pg->flags & PG_RELEASED) != 0 ||
677 		    (locked && amap && amap_lookup(&ufi->entry->aref,
678 		    ufi->orig_rvaddr - ufi->entry->start))) {
679 			if (locked)
680 				uvmfault_unlockall(ufi, amap, NULL);
681 			locked = false;
682 		}
683 
684 		/*
685 		 * unbusy the page.
686 		 */
687 
688 		if ((pg->flags & PG_RELEASED) == 0) {
689 			uvm_pagelock(pg);
690 			uvm_pagewakeup(pg);
691 			uvm_pageunlock(pg);
692 			pg->flags &= ~PG_BUSY;
693 			UVM_PAGE_OWN(pg, NULL);
694 		}
695 
696 		/*
697 		 * didn't get the lock?   release the page and retry.
698 		 */
699 
700  		if (locked == false) {
701 			if (pg->flags & PG_RELEASED) {
702 				uvm_pagefree(pg);
703 			}
704 			rw_exit(uobj->vmobjlock);
705 			return (0);
706 		}
707 	}
708 
709 	/*
710 	 * for tmpfs vnodes, the page will be from a UAO rather than
711 	 * the vnode.  just check the locks match.
712 	 */
713 
714 	KASSERT(uobj->vmobjlock == pg->uobject->vmobjlock);
715 
716 	/*
717 	 * at this point we have the page we want ("pg") and we have
718 	 * all data structures locked.  do the loanout.  page can not
719 	 * be PG_RELEASED (we caught this above).
720 	 */
721 
722 	if ((flags & UVM_LOAN_TOANON) == 0) {
723 		if (uvm_loanpage(&pg, 1, false)) {
724 			uvmfault_unlockall(ufi, amap, uobj);
725 			return (-1);
726 		}
727 		rw_exit(uobj->vmobjlock);
728 		**output = pg;
729 		(*output)++;
730 		return (1);
731 	}
732 
733 #ifdef notdef
734 	/*
735 	 * must be a loan to an anon.   check to see if there is already
736 	 * an anon associated with this page.  if so, then just return
737 	 * a reference to this object.   the page should already be
738 	 * mapped read-only because it is already on loan.
739 	 */
740 
741 	if (pg->uanon) {
742 		/* XXX: locking */
743 		anon = pg->uanon;
744 		anon->an_ref++;
745 		uvm_pagelock(pg);
746 		uvm_pagewakeup(pg);
747 		uvm_pageunlock(pg);
748 		pg->flags &= ~PG_BUSY;
749 		UVM_PAGE_OWN(pg, NULL);
750 		rw_exit(uobj->vmobjlock);
751 		**output = anon;
752 		(*output)++;
753 		return (1);
754 	}
755 
756 	/*
757 	 * need to allocate a new anon
758 	 */
759 
760 	anon = uvm_analloc();
761 	if (anon == NULL) {
762 		goto fail;
763 	}
764 	if (pg->wire_count > 0) {
765 		UVMHIST_LOG(loanhist, "wired %#jx", (uintptr_t)pg, 0, 0, 0);
766 		goto fail;
767 	}
768 	if (pg->loan_count == 0) {
769 		pmap_page_protect(pg, VM_PROT_READ);
770 	}
771 	uvm_pagelock(pg);
772 	pg->loan_count++;
773 	KASSERT(pg->loan_count > 0);	/* detect wrap-around */
774 	pg->uanon = anon;
775 	anon->an_page = pg;
776 	anon->an_lock = /* TODO: share amap lock */
777 	uvm_pageactivate(pg);
778 	uvm_pagewakeup(pg);
779 	uvm_pageunlock(pg);
780 	pg->flags &= ~PG_BUSY;
781 	UVM_PAGE_OWN(pg, NULL);
782 	rw_exit(uobj->vmobjlock);
783 	rw_exit(&anon->an_lock);
784 	**output = anon;
785 	(*output)++;
786 	return (1);
787 
788 fail:
789 	UVMHIST_LOG(loanhist, "fail", 0,0,0,0);
790 	/*
791 	 * unlock everything and bail out.
792 	 */
793 	uvm_pagelock(pg);
794 	uvm_pagewakeup(pg);
795 	uvm_pageunlock(pg);
796 	pg->flags &= ~PG_BUSY;
797 	UVM_PAGE_OWN(pg, NULL);
798 	uvmfault_unlockall(ufi, amap, uobj, NULL);
799 	if (anon) {
800 		anon->an_ref--;
801 		uvm_anfree(anon);
802 	}
803 #endif	/* notdef */
804 	return (-1);
805 }
806 
807 /*
808  * uvm_loanzero: loan a zero-fill page out
809  *
810  * => called with map, amap, uobj locked
811  * => return value:
812  *	-1 = fatal error, everything is unlocked, abort.
813  *	 0 = lookup in ufi went stale, everything unlocked, relookup and
814  *		try again
815  *	 1 = got it, everything still locked
816  */
817 
818 static struct uvm_object uvm_loanzero_object;
819 static krwlock_t uvm_loanzero_lock __cacheline_aligned;
820 
821 static int
uvm_loanzero(struct uvm_faultinfo * ufi,void *** output,int flags)822 uvm_loanzero(struct uvm_faultinfo *ufi, void ***output, int flags)
823 {
824 	struct vm_page *pg;
825 	struct vm_amap *amap = ufi->entry->aref.ar_amap;
826 
827 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
828 again:
829 	rw_enter(uvm_loanzero_object.vmobjlock, RW_WRITER);
830 
831 	/*
832 	 * first, get ahold of our single zero page.
833 	 */
834 
835 	pg = uvm_pagelookup(&uvm_loanzero_object, 0);
836 	if (__predict_false(pg == NULL)) {
837 		while ((pg = uvm_pagealloc(&uvm_loanzero_object, 0, NULL,
838 					   UVM_PGA_ZERO)) == NULL) {
839 			rw_exit(uvm_loanzero_object.vmobjlock);
840 			uvmfault_unlockall(ufi, amap, NULL);
841 			uvm_wait("loanzero");
842 			if (!uvmfault_relock(ufi)) {
843 				return (0);
844 			}
845 			if (amap) {
846 				amap_lock(amap, RW_WRITER);
847 			}
848 			goto again;
849 		}
850 
851 		/* got a zero'd page. */
852 		pg->flags &= ~(PG_BUSY|PG_FAKE);
853 		pg->flags |= PG_RDONLY;
854 		uvm_pagelock(pg);
855 		uvm_pageactivate(pg);
856 		uvm_pagewakeup(pg);
857 		uvm_pageunlock(pg);
858 		UVM_PAGE_OWN(pg, NULL);
859 	}
860 
861 	if ((flags & UVM_LOAN_TOANON) == 0) {	/* loaning to kernel-page */
862 		mutex_enter(&pg->interlock);
863 		pg->loan_count++;
864 		KASSERT(pg->loan_count > 0);	/* detect wrap-around */
865 		mutex_exit(&pg->interlock);
866 		rw_exit(uvm_loanzero_object.vmobjlock);
867 		**output = pg;
868 		(*output)++;
869 		return (1);
870 	}
871 
872 #ifdef notdef
873 	/*
874 	 * loaning to an anon.  check to see if there is already an anon
875 	 * associated with this page.  if so, then just return a reference
876 	 * to this object.
877 	 */
878 
879 	if (pg->uanon) {
880 		anon = pg->uanon;
881 		rw_enter(&anon->an_lock, RW_WRITER);
882 		anon->an_ref++;
883 		rw_exit(&anon->an_lock);
884 		rw_exit(uvm_loanzero_object.vmobjlock);
885 		**output = anon;
886 		(*output)++;
887 		return (1);
888 	}
889 
890 	/*
891 	 * need to allocate a new anon
892 	 */
893 
894 	anon = uvm_analloc();
895 	if (anon == NULL) {
896 		/* out of swap causes us to fail */
897 		rw_exit(uvm_loanzero_object.vmobjlock);
898 		uvmfault_unlockall(ufi, amap, NULL, NULL);
899 		return (-1);
900 	}
901 	anon->an_page = pg;
902 	pg->uanon = anon;
903 	uvm_pagelock(pg);
904 	pg->loan_count++;
905 	KASSERT(pg->loan_count > 0);	/* detect wrap-around */
906 	uvm_pageactivate(pg);
907 	uvm_pageunlock(pg);
908 	rw_exit(&anon->an_lock);
909 	rw_exit(uvm_loanzero_object.vmobjlock);
910 	**output = anon;
911 	(*output)++;
912 	return (1);
913 #else
914 	return (-1);
915 #endif
916 }
917 
918 
919 /*
920  * uvm_unloananon: kill loans on anons (basically a normal ref drop)
921  *
922  * => we expect all our resources to be unlocked
923  */
924 
925 static void
uvm_unloananon(struct vm_anon ** aloans,int nanons)926 uvm_unloananon(struct vm_anon **aloans, int nanons)
927 {
928 #ifdef notdef
929 	struct vm_anon *anon, *to_free = NULL;
930 
931 	/* TODO: locking */
932 	amap_lock(amap, RW_WRITER);
933 	while (nanons-- > 0) {
934 		anon = *aloans++;
935 		if (--anon->an_ref == 0) {
936 			uvm_anfree(anon);
937 		}
938 	}
939 	amap_unlock(amap);
940 #endif	/* notdef */
941 }
942 
943 /*
944  * uvm_unloanpage: kill loans on pages loaned out to the kernel
945  *
946  * => we expect all our resources to be unlocked
947  */
948 
949 static void
uvm_unloanpage(struct vm_page ** ploans,int npages)950 uvm_unloanpage(struct vm_page **ploans, int npages)
951 {
952 	struct vm_page *pg;
953 	krwlock_t *slock;
954 
955 	while (npages-- > 0) {
956 		pg = *ploans++;
957 
958 		/*
959 		 * do a little dance to acquire the object or anon lock
960 		 * as appropriate.  we are locking in the wrong order,
961 		 * so we have to do a try-lock here.
962 		 */
963 
964 		mutex_enter(&pg->interlock);
965 		slock = NULL;
966 		while (pg->uobject != NULL || pg->uanon != NULL) {
967 			if (pg->uobject != NULL) {
968 				slock = pg->uobject->vmobjlock;
969 			} else {
970 				slock = pg->uanon->an_lock;
971 			}
972 			if (rw_tryenter(slock, RW_WRITER)) {
973 				break;
974 			}
975 			/* XXX Better than yielding but inadequate. */
976 			kpause("livelock", false, 1, &pg->interlock);
977 			slock = NULL;
978 		}
979 
980 		/*
981 		 * drop our loan.  if page is owned by an anon but
982 		 * PG_ANON is not set, the page was loaned to the anon
983 		 * from an object which dropped ownership, so resolve
984 		 * this by turning the anon's loan into real ownership
985 		 * (ie. decrement loan_count again and set PG_ANON).
986 		 * after all this, if there are no loans left, put the
987 		 * page back a paging queue (if the page is owned by
988 		 * an anon) or free it (if the page is now unowned).
989 		 */
990 
991 		KASSERT(pg->loan_count > 0);
992 		pg->loan_count--;
993 		if (pg->uobject == NULL && pg->uanon != NULL &&
994 		    (pg->flags & PG_ANON) == 0) {
995 			KASSERT(pg->loan_count > 0);
996 			pg->loan_count--;
997 			pg->flags |= PG_ANON;
998 		}
999 		mutex_exit(&pg->interlock);
1000 		if (pg->loan_count == 0 && pg->uobject == NULL &&
1001 		    pg->uanon == NULL) {
1002 			KASSERT((pg->flags & PG_BUSY) == 0);
1003 			uvm_pagefree(pg);
1004 		}
1005 		if (slock != NULL) {
1006 			rw_exit(slock);
1007 		}
1008 	}
1009 }
1010 
1011 /*
1012  * uvm_unloan: kill loans on pages or anons.
1013  */
1014 
1015 void
uvm_unloan(void * v,int npages,int flags)1016 uvm_unloan(void *v, int npages, int flags)
1017 {
1018 	if (flags & UVM_LOAN_TOANON) {
1019 		uvm_unloananon(v, npages);
1020 	} else {
1021 		uvm_unloanpage(v, npages);
1022 	}
1023 }
1024 
1025 /*
1026  * Minimal pager for uvm_loanzero_object.  We need to provide a "put"
1027  * method, because the page can end up on a paging queue, and the
1028  * page daemon will want to call pgo_put when it encounters the page
1029  * on the inactive list.
1030  */
1031 
1032 static int
ulz_put(struct uvm_object * uobj,voff_t start,voff_t stop,int flags)1033 ulz_put(struct uvm_object *uobj, voff_t start, voff_t stop, int flags)
1034 {
1035 	struct vm_page *pg;
1036 
1037 	KDASSERT(uobj == &uvm_loanzero_object);
1038 
1039 	/*
1040 	 * Don't need to do any work here if we're not freeing pages.
1041 	 */
1042 
1043 	if ((flags & PGO_FREE) == 0) {
1044 		rw_exit(uobj->vmobjlock);
1045 		return 0;
1046 	}
1047 
1048 	/*
1049 	 * we don't actually want to ever free the uvm_loanzero_page, so
1050 	 * just reactivate or dequeue it.
1051 	 */
1052 
1053 	pg = uvm_pagelookup(uobj, 0);
1054 	KASSERT(pg != NULL);
1055 
1056 	uvm_pagelock(pg);
1057 	if (pg->uanon) {
1058 		uvm_pageactivate(pg);
1059 	} else {
1060 		uvm_pagedequeue(pg);
1061 	}
1062 	uvm_pageunlock(pg);
1063 
1064 	rw_exit(uobj->vmobjlock);
1065 	return 0;
1066 }
1067 
1068 static const struct uvm_pagerops ulz_pager = {
1069 	.pgo_put = ulz_put,
1070 };
1071 
1072 /*
1073  * uvm_loan_init(): initialize the uvm_loan() facility.
1074  */
1075 
1076 void
uvm_loan_init(void)1077 uvm_loan_init(void)
1078 {
1079 
1080 	rw_init(&uvm_loanzero_lock);
1081 	uvm_obj_init(&uvm_loanzero_object, &ulz_pager, false, 0);
1082 	uvm_obj_setlock(&uvm_loanzero_object, &uvm_loanzero_lock);
1083 
1084 	UVMHIST_INIT(loanhist, 300);
1085 }
1086 
1087 /*
1088  * uvm_loanbreak: break loan on a uobj page
1089  *
1090  * => called with uobj locked
1091  * => the page may be busy; if it's busy, it will be unbusied
1092  * => return value:
1093  *	newly allocated page if succeeded
1094  */
1095 struct vm_page *
uvm_loanbreak(struct vm_page * uobjpage)1096 uvm_loanbreak(struct vm_page *uobjpage)
1097 {
1098 	struct vm_page *pg;
1099 	struct uvm_object *uobj __diagused = uobjpage->uobject;
1100 
1101 	KASSERT(uobj != NULL);
1102 	KASSERT(rw_write_held(uobj->vmobjlock));
1103 
1104 	/* alloc new un-owned page */
1105 	pg = uvm_pagealloc(NULL, 0, NULL, 0);
1106 	if (pg == NULL)
1107 		return NULL;
1108 
1109 	/*
1110 	 * copy the data from the old page to the new
1111 	 * one and clear the fake flags on the new page (keep it busy).
1112 	 * force a reload of the old page by clearing it from all
1113 	 * pmaps.
1114 	 * then rename the pages.
1115 	 */
1116 
1117 	uvm_pagecopy(uobjpage, pg);	/* old -> new */
1118 	pg->flags &= ~PG_FAKE;
1119 	KASSERT(uvm_pagegetdirty(pg) == UVM_PAGE_STATUS_DIRTY);
1120 	pmap_page_protect(uobjpage, VM_PROT_NONE);
1121 	/* uobj still locked */
1122 	if ((uobjpage->flags & PG_BUSY) != 0) {
1123 		uobjpage->flags &= ~PG_BUSY;
1124 		UVM_PAGE_OWN(uobjpage, NULL);
1125 	}
1126 
1127 	/*
1128 	 * if the page is no longer referenced by
1129 	 * an anon (i.e. we are breaking an O->K
1130 	 * loan), then remove it from any pageq's.
1131 	 */
1132 
1133 	uvm_pagelock2(uobjpage, pg);
1134 	uvm_pagewakeup(uobjpage);
1135 	if (uobjpage->uanon == NULL)
1136 		uvm_pagedequeue(uobjpage);
1137 
1138 	/*
1139 	 * replace uobjpage with new page.
1140 	 */
1141 
1142 	uvm_pagereplace(uobjpage, pg);
1143 
1144 	/*
1145 	 * at this point we have absolutely no
1146 	 * control over uobjpage
1147 	 */
1148 
1149 	uvm_pageactivate(pg);
1150 	uvm_pageunlock2(uobjpage, pg);
1151 
1152 	/*
1153 	 * done!  loan is broken and "pg" is
1154 	 * PG_BUSY.   it can now replace uobjpage.
1155 	 */
1156 
1157 	return pg;
1158 }
1159 
1160 int
uvm_loanbreak_anon(struct vm_anon * anon,struct uvm_object * uobj)1161 uvm_loanbreak_anon(struct vm_anon *anon, struct uvm_object *uobj)
1162 {
1163 	struct vm_page *newpg, *oldpg;
1164 	unsigned oldstatus;
1165 
1166 	KASSERT(rw_write_held(anon->an_lock));
1167 	KASSERT(uobj == NULL || rw_write_held(uobj->vmobjlock));
1168 	KASSERT(anon->an_page->loan_count > 0);
1169 
1170 	/* get new un-owned replacement page */
1171 	newpg = uvm_pagealloc(NULL, 0, NULL, 0);
1172 	if (newpg == NULL) {
1173 		return ENOMEM;
1174 	}
1175 
1176 	oldpg = anon->an_page;
1177 	/* copy old -> new */
1178 	uvm_pagecopy(oldpg, newpg);
1179 	KASSERT(uvm_pagegetdirty(newpg) == UVM_PAGE_STATUS_DIRTY);
1180 
1181 	/* force reload */
1182 	pmap_page_protect(oldpg, VM_PROT_NONE);
1183 	oldstatus = uvm_pagegetdirty(anon->an_page);
1184 
1185 	uvm_pagelock2(oldpg, newpg);
1186 	if (uobj == NULL) {
1187 		/*
1188 		 * we were the lender (A->K); need to remove the page from
1189 		 * pageq's.
1190 		 *
1191 		 * PG_ANON is updated by the caller.
1192 		 */
1193 		KASSERT((oldpg->flags & PG_ANON) != 0);
1194 		oldpg->flags &= ~PG_ANON;
1195 		uvm_pagedequeue(oldpg);
1196 	}
1197 	oldpg->uanon = NULL;
1198 
1199 	if (uobj) {
1200 		/* if we were receiver of loan */
1201 		KASSERT((oldpg->pqflags & PG_ANON) == 0);
1202 		oldpg->loan_count--;
1203 	}
1204 
1205 	/* install new page in anon */
1206 	anon->an_page = newpg;
1207 	newpg->uanon = anon;
1208 	newpg->flags |= PG_ANON;
1209 
1210 	uvm_pageactivate(newpg);
1211 	uvm_pageunlock2(oldpg, newpg);
1212 
1213 	newpg->flags &= ~(PG_BUSY|PG_FAKE);
1214 	UVM_PAGE_OWN(newpg, NULL);
1215 
1216 	if (uobj) {
1217 		rw_exit(uobj->vmobjlock);
1218 	}
1219 
1220 	/* done! */
1221 	kpreempt_disable();
1222 	if (uobj == NULL) {
1223 		CPU_COUNT(CPU_COUNT_ANONUNKNOWN + oldstatus, -1);
1224 	}
1225 	CPU_COUNT(CPU_COUNT_ANONDIRTY, 1);
1226 	kpreempt_enable();
1227 	return 0;
1228 }
1229