xref: /dragonfly/sys/kern/kern_xio.c (revision fcf53d9b)
1 /*
2  * Copyright (c) 2003,2004 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  * $DragonFly: src/sys/kern/kern_xio.c,v 1.16 2008/05/09 07:24:45 dillon Exp $
35  */
36 /*
37  * Kernel XIO interface.  An initialized XIO is basically a collection of
38  * appropriately held vm_page_t's.  XIO buffers are vmspace agnostic and
39  * can represent userspace or kernelspace buffers, and can be passed to
40  * foreign threads outside of the originating vmspace.  XIO buffers are
41  * not mapped into KVM and thus can be manipulated and passed around with
42  * very low overheads.
43  *
44  * The intent is for XIO to be used in the I/O path, VFS, CAPS, and other
45  * places that need to pass (possibly userspace) data between threads.
46  *
47  * TODO: check for busy page when modifying, check writeable.
48  */
49 
50 #include <sys/param.h>
51 #include <sys/systm.h>
52 #include <sys/malloc.h>
53 #include <sys/proc.h>
54 #include <sys/vmmeter.h>
55 #include <sys/vnode.h>
56 #include <sys/xio.h>
57 
58 #include <cpu/lwbuf.h>
59 
60 #include <vm/vm.h>
61 #include <vm/vm_param.h>
62 #include <sys/lock.h>
63 #include <vm/vm_kern.h>
64 #include <vm/pmap.h>
65 #include <vm/vm_map.h>
66 #include <vm/vm_object.h>
67 #include <vm/vm_page.h>
68 #include <vm/vm_pageout.h>
69 #include <vm/vm_pager.h>
70 #include <vm/vm_extern.h>
71 #include <vm/vm_page2.h>
72 
73 /*
74  * Just do basic initialization of an empty XIO
75  */
76 void
77 xio_init(xio_t xio)
78 {
79     xio->xio_flags = 0;
80     xio->xio_bytes = 0;
81     xio->xio_error = 0;
82     xio->xio_offset = 0;
83     xio->xio_npages = 0;
84     xio->xio_pages = xio->xio_internal_pages;
85 }
86 
87 /*
88  * Initialize an XIO given a userspace buffer.  0 is returned on success,
89  * an error code on failure.  The actual number of bytes that could be
90  * accomodated in the XIO will be stored in xio_bytes and the page offset
91  * will be stored in xio_offset.
92  */
93 int
94 xio_init_ubuf(xio_t xio, void *ubase, size_t ubytes, int flags)
95 {
96     vm_offset_t addr;
97     vm_page_t m;
98     vm_page_t m0;
99     int error;
100     int i;
101     int n;
102     int vmprot;
103 
104     addr = trunc_page((vm_offset_t)ubase);
105     xio->xio_flags = flags;
106     xio->xio_bytes = 0;
107     xio->xio_error = 0;
108     if (ubytes == 0) {
109 	xio->xio_offset = 0;
110 	xio->xio_npages = 0;
111     } else {
112 	vmprot = (flags & XIOF_WRITE) ? VM_PROT_WRITE : VM_PROT_READ;
113 	xio->xio_offset = (vm_offset_t)ubase & PAGE_MASK;
114 	xio->xio_pages = xio->xio_internal_pages;
115 	if ((n = PAGE_SIZE - xio->xio_offset) > ubytes)
116 	    n = ubytes;
117 	m0 = NULL;
118 	for (i = 0; n && i < XIO_INTERNAL_PAGES; ++i) {
119 	    m = vm_fault_page_quick(addr, vmprot, &error);
120 	    if (m == NULL)
121 		break;
122 	    xio->xio_pages[i] = m;
123 	    ubytes -= n;
124 	    xio->xio_bytes += n;
125 	    if ((n = ubytes) > PAGE_SIZE)
126 		n = PAGE_SIZE;
127 	    addr += PAGE_SIZE;
128 
129 	    /*
130 	     * Check linearity, used by syslink to memory map DMA buffers.
131 	     */
132 	    if (flags & XIOF_VMLINEAR) {
133 		if (i == 0) {
134 		    m0 = m;
135 		} else
136 		if (m->object != m0->object || m->pindex != m0->pindex + i) {
137 		    error = EINVAL;
138 		    break;
139 		}
140 	    }
141 	}
142 	xio->xio_npages = i;
143 
144 	/*
145 	 * If a failure occured clean out what we loaded and return EFAULT.
146 	 * Return 0 on success.  Do not dirty the pages.
147 	 */
148 	if (i < XIO_INTERNAL_PAGES && n) {
149 	    xio->xio_flags &= ~XIOF_WRITE;
150 	    xio_release(xio);
151 	    xio->xio_error = EFAULT;
152 	}
153     }
154     return(xio->xio_error);
155 }
156 
157 /*
158  * Initialize an XIO given a kernelspace buffer.  0 is returned on success,
159  * an error code on failure.  The actual number of bytes that could be
160  * accomodated in the XIO will be stored in xio_bytes and the page offset
161  * will be stored in xio_offset.
162  */
163 int
164 xio_init_kbuf(xio_t xio, void *kbase, size_t kbytes)
165 {
166     vm_offset_t addr;
167     vm_paddr_t paddr;
168     vm_page_t m;
169     int i;
170     int n;
171 
172     addr = trunc_page((vm_offset_t)kbase);
173     xio->xio_flags = 0;
174     xio->xio_offset = (vm_offset_t)kbase & PAGE_MASK;
175     xio->xio_bytes = 0;
176     xio->xio_pages = xio->xio_internal_pages;
177     xio->xio_error = 0;
178     if ((n = PAGE_SIZE - xio->xio_offset) > kbytes)
179 	n = kbytes;
180     lwkt_gettoken(&vm_token);
181     crit_enter();
182     for (i = 0; n && i < XIO_INTERNAL_PAGES; ++i) {
183 	if ((paddr = pmap_kextract(addr)) == 0)
184 	    break;
185 	m = PHYS_TO_VM_PAGE(paddr);
186 	vm_page_hold(m);
187 	xio->xio_pages[i] = m;
188 	kbytes -= n;
189 	xio->xio_bytes += n;
190 	if ((n = kbytes) > PAGE_SIZE)
191 	    n = PAGE_SIZE;
192 	addr += PAGE_SIZE;
193     }
194     crit_exit();
195     lwkt_reltoken(&vm_token);
196     xio->xio_npages = i;
197 
198     /*
199      * If a failure occured clean out what we loaded and return EFAULT.
200      * Return 0 on success.
201      */
202     if (i < XIO_INTERNAL_PAGES && n) {
203 	xio_release(xio);
204 	xio->xio_error = EFAULT;
205     }
206     return(xio->xio_error);
207 }
208 
209 /*
210  * Initialize an XIO given an array of vm_page pointers.  The caller is
211  * responsible for any modified state changes for the pages.
212  */
213 int
214 xio_init_pages(xio_t xio, struct vm_page **mbase, int npages, int xflags)
215 {
216     int i;
217 
218     KKASSERT(npages <= XIO_INTERNAL_PAGES);
219 
220     xio->xio_flags = xflags;
221     xio->xio_offset = 0;
222     xio->xio_bytes = npages * PAGE_SIZE;
223     xio->xio_pages = xio->xio_internal_pages;
224     xio->xio_npages = npages;
225     xio->xio_error = 0;
226     lwkt_gettoken(&vm_token);
227     crit_enter();
228     for (i = 0; i < npages; ++i) {
229 	vm_page_hold(mbase[i]);
230 	xio->xio_pages[i] = mbase[i];
231     }
232     crit_exit();
233     lwkt_reltoken(&vm_token);
234     return(0);
235 }
236 
237 /*
238  * Cleanup an XIO so it can be destroyed.  The pages associated with the
239  * XIO are released.
240  */
241 void
242 xio_release(xio_t xio)
243 {
244     int i;
245     vm_page_t m;
246 
247     lwkt_gettoken(&vm_token);
248     crit_enter();
249     for (i = 0; i < xio->xio_npages; ++i) {
250 	m = xio->xio_pages[i];
251 	if (xio->xio_flags & XIOF_WRITE)
252 		vm_page_dirty(m);
253 	vm_page_unhold(m);
254     }
255     crit_exit();
256     lwkt_reltoken(&vm_token);
257     xio->xio_offset = 0;
258     xio->xio_npages = 0;
259     xio->xio_bytes = 0;
260     xio->xio_error = ENOBUFS;
261 }
262 
263 /*
264  * Copy data between an XIO and a UIO.  If the UIO represents userspace it
265  * must be relative to the current context.
266  *
267  * uoffset is the abstracted starting offset in the XIO, not the actual
268  * offset, and usually starts at 0.
269  *
270  * The XIO is not modified.  The UIO is updated to reflect the copy.
271  *
272  * UIO_READ	xio -> uio
273  * UIO_WRITE	uio -> xio
274  */
275 int
276 xio_uio_copy(xio_t xio, int uoffset, struct uio *uio, size_t *sizep)
277 {
278     size_t bytes;
279     int error;
280 
281     bytes = xio->xio_bytes - uoffset;
282     if (bytes > uio->uio_resid)
283 	bytes = uio->uio_resid;
284     KKASSERT(bytes >= 0);
285     error = uiomove_fromphys(xio->xio_pages, xio->xio_offset + uoffset,
286 				bytes, uio);
287     if (error == 0)
288 	*sizep = bytes;
289     else
290 	*sizep = 0;
291     return(error);
292 }
293 
294 /*
295  * Copy the specified number of bytes from the xio to a userland
296  * buffer.  Return an error code or 0 on success.
297  *
298  * uoffset is the abstracted starting offset in the XIO, not the actual
299  * offset, and usually starts at 0.
300  *
301  * The XIO is not modified.
302  */
303 int
304 xio_copy_xtou(xio_t xio, int uoffset, void *uptr, int bytes)
305 {
306     int i;
307     int n;
308     int error;
309     int offset;
310     vm_page_t m;
311     struct lwbuf *lwb;
312     struct lwbuf lwb_cache;
313 
314     if (uoffset + bytes > xio->xio_bytes)
315 	return(EFAULT);
316 
317     offset = (xio->xio_offset + uoffset) & PAGE_MASK;
318     if ((n = PAGE_SIZE - offset) > bytes)
319 	n = bytes;
320 
321     error = 0;
322     for (i = (xio->xio_offset + uoffset) >> PAGE_SHIFT;
323 	 i < xio->xio_npages;
324 	 ++i
325     ) {
326 	m = xio->xio_pages[i];
327 	lwb = lwbuf_alloc(m, &lwb_cache);
328 	error = copyout((char *)lwbuf_kva(lwb) + offset, uptr, n);
329 	lwbuf_free(lwb);
330 	if (error)
331 	    break;
332 	bytes -= n;
333 	uptr = (char *)uptr + n;
334 	if (bytes == 0)
335 	    break;
336 	if ((n = bytes) > PAGE_SIZE)
337 	    n = PAGE_SIZE;
338 	offset = 0;
339     }
340     return(error);
341 }
342 
343 /*
344  * Copy the specified number of bytes from the xio to a kernel
345  * buffer.  Return an error code or 0 on success.
346  *
347  * uoffset is the abstracted starting offset in the XIO, not the actual
348  * offset, and usually starts at 0.
349  *
350  * The XIO is not modified.
351  */
352 int
353 xio_copy_xtok(xio_t xio, int uoffset, void *kptr, int bytes)
354 {
355     int i;
356     int n;
357     int error;
358     int offset;
359     vm_page_t m;
360     struct lwbuf *lwb;
361     struct lwbuf lwb_cache;
362 
363     if (bytes + uoffset > xio->xio_bytes)
364 	return(EFAULT);
365 
366     offset = (xio->xio_offset + uoffset) & PAGE_MASK;
367     if ((n = PAGE_SIZE - offset) > bytes)
368 	n = bytes;
369 
370     error = 0;
371     for (i = (xio->xio_offset + uoffset) >> PAGE_SHIFT;
372 	 i < xio->xio_npages;
373 	 ++i
374     ) {
375 	m = xio->xio_pages[i];
376 	lwb = lwbuf_alloc(m, &lwb_cache);
377 	bcopy((char *)lwbuf_kva(lwb) + offset, kptr, n);
378 	lwbuf_free(lwb);
379 	bytes -= n;
380 	kptr = (char *)kptr + n;
381 	if (bytes == 0)
382 	    break;
383 	if ((n = bytes) > PAGE_SIZE)
384 	    n = PAGE_SIZE;
385 	offset = 0;
386     }
387     return(error);
388 }
389 
390 /*
391  * Copy the specified number of bytes from userland to the xio.
392  * Return an error code or 0 on success.
393  *
394  * uoffset is the abstracted starting offset in the XIO, not the actual
395  * offset, and usually starts at 0.
396  *
397  * Data in pages backing the XIO will be modified.
398  */
399 int
400 xio_copy_utox(xio_t xio, int uoffset, const void *uptr, int bytes)
401 {
402     int i;
403     int n;
404     int error;
405     int offset;
406     vm_page_t m;
407     struct lwbuf *lwb;
408     struct lwbuf lwb_cache;
409 
410     if (uoffset + bytes > xio->xio_bytes)
411 	return(EFAULT);
412 
413     offset = (xio->xio_offset + uoffset) & PAGE_MASK;
414     if ((n = PAGE_SIZE - offset) > bytes)
415 	n = bytes;
416 
417     error = 0;
418     for (i = (xio->xio_offset + uoffset) >> PAGE_SHIFT;
419 	 i < xio->xio_npages;
420 	 ++i
421     ) {
422 	m = xio->xio_pages[i];
423 	lwb = lwbuf_alloc(m, &lwb_cache);
424 	error = copyin(uptr, (char *)lwbuf_kva(lwb) + offset, n);
425 	lwbuf_free(lwb);
426 	if (error)
427 	    break;
428 	bytes -= n;
429 	uptr = (const char *)uptr + n;
430 	if (bytes == 0)
431 	    break;
432 	if ((n = bytes) > PAGE_SIZE)
433 	    n = PAGE_SIZE;
434 	offset = 0;
435     }
436     return(error);
437 }
438 
439 /*
440  * Copy the specified number of bytes from the kernel to the xio.
441  * Return an error code or 0 on success.
442  *
443  * uoffset is the abstracted starting offset in the XIO, not the actual
444  * offset, and usually starts at 0.
445  *
446  * Data in pages backing the XIO will be modified.
447  */
448 int
449 xio_copy_ktox(xio_t xio, int uoffset, const void *kptr, int bytes)
450 {
451     int i;
452     int n;
453     int error;
454     int offset;
455     vm_page_t m;
456     struct lwbuf *lwb;
457     struct lwbuf lwb_cache;
458 
459     if (uoffset + bytes > xio->xio_bytes)
460 	return(EFAULT);
461 
462     offset = (xio->xio_offset + uoffset) & PAGE_MASK;
463     if ((n = PAGE_SIZE - offset) > bytes)
464 	n = bytes;
465 
466     error = 0;
467     for (i = (xio->xio_offset + uoffset) >> PAGE_SHIFT;
468 	 i < xio->xio_npages;
469 	 ++i
470     ) {
471 	m = xio->xio_pages[i];
472 	lwb = lwbuf_alloc(m, &lwb_cache);
473 	bcopy(kptr, (char *)lwbuf_kva(lwb) + offset, n);
474 	lwbuf_free(lwb);
475 	bytes -= n;
476 	kptr = (const char *)kptr + n;
477 	if (bytes == 0)
478 	    break;
479 	if ((n = bytes) > PAGE_SIZE)
480 	    n = PAGE_SIZE;
481 	offset = 0;
482     }
483     return(error);
484 }
485