1 /* $OpenBSD: vfs_biomem.c,v 1.52 2024/11/05 17:28:31 mpi Exp $ */
2
3 /*
4 * Copyright (c) 2007 Artur Grabowski <art@openbsd.org>
5 * Copyright (c) 2012-2016,2019 Bob Beck <beck@openbsd.org>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 */
19
20
21 #include <sys/param.h>
22 #include <sys/systm.h>
23 #include <sys/buf.h>
24 #include <sys/pool.h>
25 #include <sys/proc.h> /* XXX for atomic */
26 #include <sys/mount.h>
27
28 #include <uvm/uvm_extern.h>
29
30 vaddr_t buf_kva_start, buf_kva_end;
31 int buf_needva;
32 TAILQ_HEAD(,buf) buf_valist;
33
34 extern struct bcachestats bcstats;
35
36 vaddr_t buf_unmap(struct buf *);
37
38 void
buf_mem_init(vsize_t size)39 buf_mem_init(vsize_t size)
40 {
41 TAILQ_INIT(&buf_valist);
42
43 buf_kva_start = vm_map_min(kernel_map);
44 if (uvm_map(kernel_map, &buf_kva_start, size, NULL,
45 UVM_UNKNOWN_OFFSET, PAGE_SIZE, UVM_MAPFLAG(PROT_NONE,
46 PROT_NONE, MAP_INHERIT_NONE, MADV_NORMAL, 0)))
47 panic("%s: can't reserve VM for buffers", __func__);
48 buf_kva_end = buf_kva_start + size;
49
50 /* Contiguous mapping */
51 bcstats.kvaslots = bcstats.kvaslots_avail = size / MAXPHYS;
52 }
53
54 /*
55 * buf_acquire and buf_release manage the kvm mappings of buffers.
56 */
57 void
buf_acquire(struct buf * bp)58 buf_acquire(struct buf *bp)
59 {
60 KASSERT((bp->b_flags & B_BUSY) == 0);
61 splassert(IPL_BIO);
62 /*
63 * Busy before waiting for kvm.
64 */
65 SET(bp->b_flags, B_BUSY);
66 buf_map(bp);
67 }
68
69 /*
70 * Acquire a buf but do not map it. Preserve any mapping it did have.
71 */
72 void
buf_acquire_nomap(struct buf * bp)73 buf_acquire_nomap(struct buf *bp)
74 {
75 splassert(IPL_BIO);
76 SET(bp->b_flags, B_BUSY);
77 if (bp->b_data != NULL) {
78 TAILQ_REMOVE(&buf_valist, bp, b_valist);
79 bcstats.kvaslots_avail--;
80 bcstats.busymapped++;
81 }
82 }
83
84 void
buf_map(struct buf * bp)85 buf_map(struct buf *bp)
86 {
87 vaddr_t va;
88
89 splassert(IPL_BIO);
90
91 if (bp->b_data == NULL) {
92 unsigned long i;
93
94 /*
95 * First, just use the pre-allocated space until we run out.
96 */
97 if (buf_kva_start < buf_kva_end) {
98 va = buf_kva_start;
99 buf_kva_start += MAXPHYS;
100 bcstats.kvaslots_avail--;
101 } else {
102 struct buf *vbp;
103
104 /*
105 * Find some buffer we can steal the space from.
106 */
107 vbp = TAILQ_FIRST(&buf_valist);
108 while ((curproc != syncerproc &&
109 curproc != cleanerproc &&
110 bcstats.kvaslots_avail <= RESERVE_SLOTS) ||
111 vbp == NULL) {
112 buf_needva++;
113 tsleep_nsec(&buf_needva, PRIBIO, "buf_needva",
114 INFSLP);
115 vbp = TAILQ_FIRST(&buf_valist);
116 }
117 va = buf_unmap(vbp);
118 }
119
120 for (i = 0; i < atop(bp->b_bufsize); i++) {
121 struct vm_page *pg = uvm_pagelookup(bp->b_pobj,
122 bp->b_poffs + ptoa(i));
123
124 KASSERT(pg != NULL);
125
126 pmap_kenter_pa(va + ptoa(i), VM_PAGE_TO_PHYS(pg),
127 PROT_READ | PROT_WRITE);
128 }
129 pmap_update(pmap_kernel());
130 bp->b_data = (caddr_t)va;
131 } else {
132 TAILQ_REMOVE(&buf_valist, bp, b_valist);
133 bcstats.kvaslots_avail--;
134 }
135
136 bcstats.busymapped++;
137 }
138
139 void
buf_release(struct buf * bp)140 buf_release(struct buf *bp)
141 {
142
143 KASSERT(bp->b_flags & B_BUSY);
144 splassert(IPL_BIO);
145
146 if (bp->b_data) {
147 bcstats.busymapped--;
148 TAILQ_INSERT_TAIL(&buf_valist, bp, b_valist);
149 bcstats.kvaslots_avail++;
150 if (buf_needva) {
151 buf_needva=0;
152 wakeup(&buf_needva);
153 }
154 }
155 CLR(bp->b_flags, B_BUSY);
156 }
157
158 /*
159 * Deallocate all memory resources for this buffer. We need to be careful
160 * to not drop kvm since we have no way to reclaim it. So, if the buffer
161 * has kvm, we need to free it later. We put it on the front of the
162 * freelist just so it gets picked up faster.
163 *
164 * Also, lots of assertions count on bp->b_data being NULL, so we
165 * set it temporarily to NULL.
166 *
167 * Return non-zero if we take care of the freeing later.
168 */
169 int
buf_dealloc_mem(struct buf * bp)170 buf_dealloc_mem(struct buf *bp)
171 {
172 caddr_t data;
173
174 splassert(IPL_BIO);
175
176 data = bp->b_data;
177 bp->b_data = NULL;
178
179 if (data) {
180 if (bp->b_flags & B_BUSY)
181 bcstats.busymapped--;
182 pmap_kremove((vaddr_t)data, bp->b_bufsize);
183 pmap_update(pmap_kernel());
184 }
185
186 if (bp->b_pobj)
187 buf_free_pages(bp);
188
189 if (data == NULL)
190 return (0);
191
192 bp->b_data = data;
193 if (!(bp->b_flags & B_BUSY)) { /* XXX - need better test */
194 TAILQ_REMOVE(&buf_valist, bp, b_valist);
195 bcstats.kvaslots_avail--;
196 } else {
197 CLR(bp->b_flags, B_BUSY);
198 if (buf_needva) {
199 buf_needva = 0;
200 wakeup(&buf_needva);
201 }
202 }
203 SET(bp->b_flags, B_RELEASED);
204 TAILQ_INSERT_HEAD(&buf_valist, bp, b_valist);
205 bcstats.kvaslots_avail++;
206
207 return (1);
208 }
209
210 /*
211 * Only used by bread_cluster.
212 */
213 void
buf_fix_mapping(struct buf * bp,vsize_t newsize)214 buf_fix_mapping(struct buf *bp, vsize_t newsize)
215 {
216 vaddr_t va = (vaddr_t)bp->b_data;
217
218 if (newsize < bp->b_bufsize) {
219 pmap_kremove(va + newsize, bp->b_bufsize - newsize);
220 pmap_update(pmap_kernel());
221 /*
222 * Note: the size we lost is actually with the other
223 * buffers read in by bread_cluster
224 */
225 bp->b_bufsize = newsize;
226 }
227 }
228
229 vaddr_t
buf_unmap(struct buf * bp)230 buf_unmap(struct buf *bp)
231 {
232 vaddr_t va;
233
234 KASSERT((bp->b_flags & B_BUSY) == 0);
235 KASSERT(bp->b_data != NULL);
236 splassert(IPL_BIO);
237
238 TAILQ_REMOVE(&buf_valist, bp, b_valist);
239 bcstats.kvaslots_avail--;
240 va = (vaddr_t)bp->b_data;
241 bp->b_data = NULL;
242 pmap_kremove(va, bp->b_bufsize);
243 pmap_update(pmap_kernel());
244
245 if (bp->b_flags & B_RELEASED)
246 pool_put(&bufpool, bp);
247
248 return (va);
249 }
250
251 /* Always allocates in dma-reachable memory */
252 void
buf_alloc_pages(struct buf * bp,vsize_t size)253 buf_alloc_pages(struct buf *bp, vsize_t size)
254 {
255 int i;
256
257 KASSERT(size == round_page(size));
258 KASSERT(bp->b_pobj == NULL);
259 KASSERT(bp->b_data == NULL);
260 splassert(IPL_BIO);
261
262 uvm_obj_init(&bp->b_uobj, &bufcache_pager, 1);
263
264 /*
265 * Attempt to allocate with NOWAIT. if we can't, then throw
266 * away some clean pages and try again. Finally, if that
267 * fails, do a WAITOK allocation so the page daemon can find
268 * memory for us.
269 */
270 do {
271 i = uvm_pagealloc_multi(&bp->b_uobj, 0, size,
272 UVM_PLA_NOWAIT | UVM_PLA_NOWAKE);
273 if (i == 0)
274 break;
275 } while (bufbackoff(&dma_constraint, size) >= size);
276 if (i != 0)
277 i = uvm_pagealloc_multi(&bp->b_uobj, 0, size,
278 UVM_PLA_WAITOK);
279 /* should not happen */
280 if (i != 0)
281 panic("uvm_pagealloc_multi unable to allocate an buf_object "
282 "of size %lu", size);
283
284 bcstats.numbufpages += atop(size);
285 bcstats.dmapages += atop(size);
286 SET(bp->b_flags, B_DMA);
287 bp->b_pobj = &bp->b_uobj;
288 bp->b_poffs = 0;
289 bp->b_bufsize = size;
290 }
291
292 void
buf_free_pages(struct buf * bp)293 buf_free_pages(struct buf *bp)
294 {
295 struct uvm_object *uobj = bp->b_pobj;
296 struct vm_page *pg;
297 voff_t off, i;
298
299 KASSERT(bp->b_data == NULL);
300 KASSERT(uobj != NULL);
301 splassert(IPL_BIO);
302
303 off = bp->b_poffs;
304 bp->b_pobj = NULL;
305 bp->b_poffs = 0;
306
307 for (i = 0; i < atop(bp->b_bufsize); i++) {
308 pg = uvm_pagelookup(uobj, off + ptoa(i));
309 KASSERT(pg != NULL);
310 KASSERT(pg->wire_count == 1);
311 pg->wire_count = 0;
312 bcstats.numbufpages--;
313 if (ISSET(bp->b_flags, B_DMA))
314 bcstats.dmapages--;
315 }
316 CLR(bp->b_flags, B_DMA);
317
318 /* XXX refactor to do this without splbio later */
319 uvm_obj_free(uobj);
320 }
321
322 /* Reallocate a buf into a particular pmem range specified by "where". */
323 int
buf_realloc_pages(struct buf * bp,struct uvm_constraint_range * where,int flags)324 buf_realloc_pages(struct buf *bp, struct uvm_constraint_range *where,
325 int flags)
326 {
327 vsize_t size;
328 vaddr_t va;
329 int dma;
330 int i, r;
331 KASSERT(!(flags & UVM_PLA_WAITOK) ^ !(flags & UVM_PLA_NOWAIT));
332
333 splassert(IPL_BIO);
334 KASSERT(ISSET(bp->b_flags, B_BUSY));
335 dma = ISSET(bp->b_flags, B_DMA);
336
337 /* if the original buf is mapped, unmap it */
338 if (bp->b_data != NULL) {
339 va = (vaddr_t)bp->b_data;
340 pmap_kremove(va, bp->b_bufsize);
341 pmap_update(pmap_kernel());
342 }
343
344 do {
345 r = uvm_pagerealloc_multi(bp->b_pobj, bp->b_poffs,
346 bp->b_bufsize, UVM_PLA_NOWAIT | UVM_PLA_NOWAKE, where);
347 if (r == 0)
348 break;
349 size = atop(bp->b_bufsize);
350 } while ((bufbackoff(where, size) >= size));
351
352 /*
353 * bufbackoff() failed, so there's no more we can do without
354 * waiting. If allowed do, make that attempt.
355 */
356 if (r != 0 && (flags & UVM_PLA_WAITOK))
357 r = uvm_pagerealloc_multi(bp->b_pobj, bp->b_poffs,
358 bp->b_bufsize, flags, where);
359
360 /*
361 * If the allocation has succeeded, we may be somewhere different.
362 * If the allocation has failed, we are in the same place.
363 *
364 * We still have to re-map the buffer before returning.
365 */
366
367 /* take it out of dma stats until we know where we are */
368 if (dma)
369 bcstats.dmapages -= atop(bp->b_bufsize);
370
371 dma = 1;
372 /* if the original buf was mapped, re-map it */
373 for (i = 0; i < atop(bp->b_bufsize); i++) {
374 struct vm_page *pg = uvm_pagelookup(bp->b_pobj,
375 bp->b_poffs + ptoa(i));
376 KASSERT(pg != NULL);
377 if (!PADDR_IS_DMA_REACHABLE(VM_PAGE_TO_PHYS(pg)))
378 dma = 0;
379 if (bp->b_data != NULL) {
380 pmap_kenter_pa(va + ptoa(i), VM_PAGE_TO_PHYS(pg),
381 PROT_READ|PROT_WRITE);
382 pmap_update(pmap_kernel());
383 }
384 }
385 if (dma) {
386 SET(bp->b_flags, B_DMA);
387 bcstats.dmapages += atop(bp->b_bufsize);
388 } else
389 CLR(bp->b_flags, B_DMA);
390 return(r);
391 }
392