xref: /openbsd/sys/kern/vfs_biomem.c (revision 0b4f309d)
1 /*	$OpenBSD: vfs_biomem.c,v 1.52 2024/11/05 17:28:31 mpi Exp $ */
2 
3 /*
4  * Copyright (c) 2007 Artur Grabowski <art@openbsd.org>
5  * Copyright (c) 2012-2016,2019 Bob Beck <beck@openbsd.org>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 
20 
21 #include <sys/param.h>
22 #include <sys/systm.h>
23 #include <sys/buf.h>
24 #include <sys/pool.h>
25 #include <sys/proc.h>		/* XXX for atomic */
26 #include <sys/mount.h>
27 
28 #include <uvm/uvm_extern.h>
29 
30 vaddr_t buf_kva_start, buf_kva_end;
31 int buf_needva;
32 TAILQ_HEAD(,buf) buf_valist;
33 
34 extern struct bcachestats bcstats;
35 
36 vaddr_t buf_unmap(struct buf *);
37 
38 void
buf_mem_init(vsize_t size)39 buf_mem_init(vsize_t size)
40 {
41 	TAILQ_INIT(&buf_valist);
42 
43 	buf_kva_start = vm_map_min(kernel_map);
44 	if (uvm_map(kernel_map, &buf_kva_start, size, NULL,
45 	    UVM_UNKNOWN_OFFSET, PAGE_SIZE, UVM_MAPFLAG(PROT_NONE,
46 	    PROT_NONE, MAP_INHERIT_NONE, MADV_NORMAL, 0)))
47 		panic("%s: can't reserve VM for buffers", __func__);
48 	buf_kva_end = buf_kva_start + size;
49 
50 	/* Contiguous mapping */
51 	bcstats.kvaslots = bcstats.kvaslots_avail = size / MAXPHYS;
52 }
53 
54 /*
55  * buf_acquire and buf_release manage the kvm mappings of buffers.
56  */
57 void
buf_acquire(struct buf * bp)58 buf_acquire(struct buf *bp)
59 {
60 	KASSERT((bp->b_flags & B_BUSY) == 0);
61 	splassert(IPL_BIO);
62 	/*
63 	 * Busy before waiting for kvm.
64 	 */
65 	SET(bp->b_flags, B_BUSY);
66 	buf_map(bp);
67 }
68 
69 /*
70  * Acquire a buf but do not map it. Preserve any mapping it did have.
71  */
72 void
buf_acquire_nomap(struct buf * bp)73 buf_acquire_nomap(struct buf *bp)
74 {
75 	splassert(IPL_BIO);
76 	SET(bp->b_flags, B_BUSY);
77 	if (bp->b_data != NULL) {
78 		TAILQ_REMOVE(&buf_valist, bp, b_valist);
79 		bcstats.kvaslots_avail--;
80 		bcstats.busymapped++;
81 	}
82 }
83 
84 void
buf_map(struct buf * bp)85 buf_map(struct buf *bp)
86 {
87 	vaddr_t va;
88 
89 	splassert(IPL_BIO);
90 
91 	if (bp->b_data == NULL) {
92 		unsigned long i;
93 
94 		/*
95 		 * First, just use the pre-allocated space until we run out.
96 		 */
97 		if (buf_kva_start < buf_kva_end) {
98 			va = buf_kva_start;
99 			buf_kva_start += MAXPHYS;
100 			bcstats.kvaslots_avail--;
101 		} else {
102 			struct buf *vbp;
103 
104 			/*
105 			 * Find some buffer we can steal the space from.
106 			 */
107 			vbp = TAILQ_FIRST(&buf_valist);
108 			while ((curproc != syncerproc &&
109 			   curproc != cleanerproc &&
110 			   bcstats.kvaslots_avail <= RESERVE_SLOTS) ||
111 			   vbp == NULL) {
112 				buf_needva++;
113 				tsleep_nsec(&buf_needva, PRIBIO, "buf_needva",
114 				    INFSLP);
115 				vbp = TAILQ_FIRST(&buf_valist);
116 			}
117 			va = buf_unmap(vbp);
118 		}
119 
120 		for (i = 0; i < atop(bp->b_bufsize); i++) {
121 			struct vm_page *pg = uvm_pagelookup(bp->b_pobj,
122 			    bp->b_poffs + ptoa(i));
123 
124 			KASSERT(pg != NULL);
125 
126 			pmap_kenter_pa(va + ptoa(i), VM_PAGE_TO_PHYS(pg),
127 			    PROT_READ | PROT_WRITE);
128 		}
129 		pmap_update(pmap_kernel());
130 		bp->b_data = (caddr_t)va;
131 	} else {
132 		TAILQ_REMOVE(&buf_valist, bp, b_valist);
133 		bcstats.kvaslots_avail--;
134 	}
135 
136 	bcstats.busymapped++;
137 }
138 
139 void
buf_release(struct buf * bp)140 buf_release(struct buf *bp)
141 {
142 
143 	KASSERT(bp->b_flags & B_BUSY);
144 	splassert(IPL_BIO);
145 
146 	if (bp->b_data) {
147 		bcstats.busymapped--;
148 		TAILQ_INSERT_TAIL(&buf_valist, bp, b_valist);
149 		bcstats.kvaslots_avail++;
150 		if (buf_needva) {
151 			buf_needva=0;
152 			wakeup(&buf_needva);
153 		}
154 	}
155 	CLR(bp->b_flags, B_BUSY);
156 }
157 
158 /*
159  * Deallocate all memory resources for this buffer. We need to be careful
160  * to not drop kvm since we have no way to reclaim it. So, if the buffer
161  * has kvm, we need to free it later. We put it on the front of the
162  * freelist just so it gets picked up faster.
163  *
164  * Also, lots of assertions count on bp->b_data being NULL, so we
165  * set it temporarily to NULL.
166  *
167  * Return non-zero if we take care of the freeing later.
168  */
169 int
buf_dealloc_mem(struct buf * bp)170 buf_dealloc_mem(struct buf *bp)
171 {
172 	caddr_t data;
173 
174 	splassert(IPL_BIO);
175 
176 	data = bp->b_data;
177 	bp->b_data = NULL;
178 
179 	if (data) {
180 		if (bp->b_flags & B_BUSY)
181 			bcstats.busymapped--;
182 		pmap_kremove((vaddr_t)data, bp->b_bufsize);
183 		pmap_update(pmap_kernel());
184 	}
185 
186 	if (bp->b_pobj)
187 		buf_free_pages(bp);
188 
189 	if (data == NULL)
190 		return (0);
191 
192 	bp->b_data = data;
193 	if (!(bp->b_flags & B_BUSY)) {		/* XXX - need better test */
194 		TAILQ_REMOVE(&buf_valist, bp, b_valist);
195 		bcstats.kvaslots_avail--;
196 	} else {
197 		CLR(bp->b_flags, B_BUSY);
198 		if (buf_needva) {
199 			buf_needva = 0;
200 			wakeup(&buf_needva);
201 		}
202 	}
203 	SET(bp->b_flags, B_RELEASED);
204 	TAILQ_INSERT_HEAD(&buf_valist, bp, b_valist);
205 	bcstats.kvaslots_avail++;
206 
207 	return (1);
208 }
209 
210 /*
211  * Only used by bread_cluster.
212  */
213 void
buf_fix_mapping(struct buf * bp,vsize_t newsize)214 buf_fix_mapping(struct buf *bp, vsize_t newsize)
215 {
216 	vaddr_t va = (vaddr_t)bp->b_data;
217 
218 	if (newsize < bp->b_bufsize) {
219 		pmap_kremove(va + newsize, bp->b_bufsize - newsize);
220 		pmap_update(pmap_kernel());
221 		/*
222 		 * Note: the size we lost is actually with the other
223 		 * buffers read in by bread_cluster
224 		 */
225 		bp->b_bufsize = newsize;
226 	}
227 }
228 
229 vaddr_t
buf_unmap(struct buf * bp)230 buf_unmap(struct buf *bp)
231 {
232 	vaddr_t va;
233 
234 	KASSERT((bp->b_flags & B_BUSY) == 0);
235 	KASSERT(bp->b_data != NULL);
236 	splassert(IPL_BIO);
237 
238 	TAILQ_REMOVE(&buf_valist, bp, b_valist);
239 	bcstats.kvaslots_avail--;
240 	va = (vaddr_t)bp->b_data;
241 	bp->b_data = NULL;
242 	pmap_kremove(va, bp->b_bufsize);
243 	pmap_update(pmap_kernel());
244 
245 	if (bp->b_flags & B_RELEASED)
246 		pool_put(&bufpool, bp);
247 
248 	return (va);
249 }
250 
251 /* Always allocates in dma-reachable memory */
252 void
buf_alloc_pages(struct buf * bp,vsize_t size)253 buf_alloc_pages(struct buf *bp, vsize_t size)
254 {
255 	int i;
256 
257 	KASSERT(size == round_page(size));
258 	KASSERT(bp->b_pobj == NULL);
259 	KASSERT(bp->b_data == NULL);
260 	splassert(IPL_BIO);
261 
262 	uvm_obj_init(&bp->b_uobj, &bufcache_pager, 1);
263 
264 	/*
265 	 * Attempt to allocate with NOWAIT. if we can't, then throw
266 	 * away some clean pages and try again. Finally, if that
267 	 * fails, do a WAITOK allocation so the page daemon can find
268 	 * memory for us.
269 	 */
270 	do {
271 		i = uvm_pagealloc_multi(&bp->b_uobj, 0, size,
272 		    UVM_PLA_NOWAIT | UVM_PLA_NOWAKE);
273 		if (i == 0)
274 			break;
275 	} while	(bufbackoff(&dma_constraint, size) >= size);
276 	if (i != 0)
277 		i = uvm_pagealloc_multi(&bp->b_uobj, 0, size,
278 		    UVM_PLA_WAITOK);
279 	/* should not happen */
280 	if (i != 0)
281 		panic("uvm_pagealloc_multi unable to allocate an buf_object "
282 		    "of size %lu", size);
283 
284 	bcstats.numbufpages += atop(size);
285 	bcstats.dmapages += atop(size);
286 	SET(bp->b_flags, B_DMA);
287 	bp->b_pobj = &bp->b_uobj;
288 	bp->b_poffs = 0;
289 	bp->b_bufsize = size;
290 }
291 
292 void
buf_free_pages(struct buf * bp)293 buf_free_pages(struct buf *bp)
294 {
295 	struct uvm_object *uobj = bp->b_pobj;
296 	struct vm_page *pg;
297 	voff_t off, i;
298 
299 	KASSERT(bp->b_data == NULL);
300 	KASSERT(uobj != NULL);
301 	splassert(IPL_BIO);
302 
303 	off = bp->b_poffs;
304 	bp->b_pobj = NULL;
305 	bp->b_poffs = 0;
306 
307 	for (i = 0; i < atop(bp->b_bufsize); i++) {
308 		pg = uvm_pagelookup(uobj, off + ptoa(i));
309 		KASSERT(pg != NULL);
310 		KASSERT(pg->wire_count == 1);
311 		pg->wire_count = 0;
312 		bcstats.numbufpages--;
313 		if (ISSET(bp->b_flags, B_DMA))
314 			bcstats.dmapages--;
315 	}
316 	CLR(bp->b_flags, B_DMA);
317 
318 	/* XXX refactor to do this without splbio later */
319 	uvm_obj_free(uobj);
320 }
321 
322 /* Reallocate a buf into a particular pmem range specified by "where". */
323 int
buf_realloc_pages(struct buf * bp,struct uvm_constraint_range * where,int flags)324 buf_realloc_pages(struct buf *bp, struct uvm_constraint_range *where,
325     int flags)
326 {
327 	vsize_t size;
328 	vaddr_t va;
329 	int dma;
330   	int i, r;
331 	KASSERT(!(flags & UVM_PLA_WAITOK) ^ !(flags & UVM_PLA_NOWAIT));
332 
333 	splassert(IPL_BIO);
334 	KASSERT(ISSET(bp->b_flags, B_BUSY));
335 	dma = ISSET(bp->b_flags, B_DMA);
336 
337 	/* if the original buf is mapped, unmap it */
338 	if (bp->b_data != NULL) {
339 		va = (vaddr_t)bp->b_data;
340 		pmap_kremove(va, bp->b_bufsize);
341 		pmap_update(pmap_kernel());
342 	}
343 
344 	do {
345 		r = uvm_pagerealloc_multi(bp->b_pobj, bp->b_poffs,
346 		    bp->b_bufsize, UVM_PLA_NOWAIT | UVM_PLA_NOWAKE, where);
347 		if (r == 0)
348 			break;
349 		size = atop(bp->b_bufsize);
350 	} while	((bufbackoff(where, size) >= size));
351 
352 	/*
353 	 * bufbackoff() failed, so there's no more we can do without
354 	 * waiting.  If allowed do, make that attempt.
355 	 */
356 	if (r != 0 && (flags & UVM_PLA_WAITOK))
357 		r = uvm_pagerealloc_multi(bp->b_pobj, bp->b_poffs,
358 		    bp->b_bufsize, flags, where);
359 
360 	/*
361 	 * If the allocation has succeeded, we may be somewhere different.
362 	 * If the allocation has failed, we are in the same place.
363 	 *
364 	 * We still have to re-map the buffer before returning.
365 	 */
366 
367 	/* take it out of dma stats until we know where we are */
368 	if (dma)
369 		bcstats.dmapages -= atop(bp->b_bufsize);
370 
371 	dma = 1;
372 	/* if the original buf was mapped, re-map it */
373 	for (i = 0; i < atop(bp->b_bufsize); i++) {
374 		struct vm_page *pg = uvm_pagelookup(bp->b_pobj,
375 		    bp->b_poffs + ptoa(i));
376 		KASSERT(pg != NULL);
377 		if  (!PADDR_IS_DMA_REACHABLE(VM_PAGE_TO_PHYS(pg)))
378 			dma = 0;
379 		if (bp->b_data != NULL) {
380 			pmap_kenter_pa(va + ptoa(i), VM_PAGE_TO_PHYS(pg),
381 			    PROT_READ|PROT_WRITE);
382 			pmap_update(pmap_kernel());
383 		}
384 	}
385 	if (dma) {
386 		SET(bp->b_flags, B_DMA);
387 		bcstats.dmapages += atop(bp->b_bufsize);
388 	} else
389 		CLR(bp->b_flags, B_DMA);
390 	return(r);
391 }
392