xref: /dragonfly/sys/kern/kern_xio.c (revision 9ddb8543)
1 /*
2  * Copyright (c) 2003,2004 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  * $DragonFly: src/sys/kern/kern_xio.c,v 1.16 2008/05/09 07:24:45 dillon Exp $
35  */
36 /*
37  * Kernel XIO interface.  An initialized XIO is basically a collection of
38  * appropriately held vm_page_t's.  XIO buffers are vmspace agnostic and
39  * can represent userspace or kernelspace buffers, and can be passed to
40  * foreign threads outside of the originating vmspace.  XIO buffers are
41  * not mapped into KVM and thus can be manipulated and passed around with
42  * very low overheads.
43  *
44  * The intent is for XIO to be used in the I/O path, VFS, CAPS, and other
45  * places that need to pass (possibly userspace) data between threads.
46  *
47  * TODO: check for busy page when modifying, check writeable.
48  */
49 
50 #include <sys/param.h>
51 #include <sys/systm.h>
52 #include <sys/malloc.h>
53 #include <sys/proc.h>
54 #include <sys/vmmeter.h>
55 #include <sys/vnode.h>
56 #include <sys/xio.h>
57 #include <sys/sfbuf.h>
58 
59 #include <vm/vm.h>
60 #include <vm/vm_param.h>
61 #include <sys/lock.h>
62 #include <vm/vm_kern.h>
63 #include <vm/pmap.h>
64 #include <vm/vm_map.h>
65 #include <vm/vm_object.h>
66 #include <vm/vm_page.h>
67 #include <vm/vm_pageout.h>
68 #include <vm/vm_pager.h>
69 #include <vm/vm_extern.h>
70 #include <vm/vm_page2.h>
71 
72 /*
73  * Just do basic initialization of an empty XIO
74  */
75 void
76 xio_init(xio_t xio)
77 {
78     xio->xio_flags = 0;
79     xio->xio_bytes = 0;
80     xio->xio_error = 0;
81     xio->xio_offset = 0;
82     xio->xio_npages = 0;
83     xio->xio_pages = xio->xio_internal_pages;
84 }
85 
86 /*
87  * Initialize an XIO given a userspace buffer.  0 is returned on success,
88  * an error code on failure.  The actual number of bytes that could be
89  * accomodated in the XIO will be stored in xio_bytes and the page offset
90  * will be stored in xio_offset.
91  */
92 int
93 xio_init_ubuf(xio_t xio, void *ubase, size_t ubytes, int flags)
94 {
95     vm_offset_t addr;
96     vm_page_t m;
97     vm_page_t m0;
98     int error;
99     int i;
100     int n;
101     int vmprot;
102 
103     addr = trunc_page((vm_offset_t)ubase);
104     xio->xio_flags = flags;
105     xio->xio_bytes = 0;
106     xio->xio_error = 0;
107     if (ubytes == 0) {
108 	xio->xio_offset = 0;
109 	xio->xio_npages = 0;
110     } else {
111 	vmprot = (flags & XIOF_WRITE) ? VM_PROT_WRITE : VM_PROT_READ;
112 	xio->xio_offset = (vm_offset_t)ubase & PAGE_MASK;
113 	xio->xio_pages = xio->xio_internal_pages;
114 	if ((n = PAGE_SIZE - xio->xio_offset) > ubytes)
115 	    n = ubytes;
116 	m0 = NULL;
117 	for (i = 0; n && i < XIO_INTERNAL_PAGES; ++i) {
118 	    m = vm_fault_page_quick(addr, vmprot, &error);
119 	    if (m == NULL)
120 		break;
121 	    xio->xio_pages[i] = m;
122 	    ubytes -= n;
123 	    xio->xio_bytes += n;
124 	    if ((n = ubytes) > PAGE_SIZE)
125 		n = PAGE_SIZE;
126 	    addr += PAGE_SIZE;
127 
128 	    /*
129 	     * Check linearity, used by syslink to memory map DMA buffers.
130 	     */
131 	    if (flags & XIOF_VMLINEAR) {
132 		if (i == 0) {
133 		    m0 = m;
134 		} else
135 		if (m->object != m0->object || m->pindex != m0->pindex + i) {
136 		    error = EINVAL;
137 		    break;
138 		}
139 	    }
140 	}
141 	xio->xio_npages = i;
142 
143 	/*
144 	 * If a failure occured clean out what we loaded and return EFAULT.
145 	 * Return 0 on success.  Do not dirty the pages.
146 	 */
147 	if (i < XIO_INTERNAL_PAGES && n) {
148 	    xio->xio_flags &= ~XIOF_WRITE;
149 	    xio_release(xio);
150 	    xio->xio_error = EFAULT;
151 	}
152     }
153     return(xio->xio_error);
154 }
155 
156 /*
157  * Initialize an XIO given a kernelspace buffer.  0 is returned on success,
158  * an error code on failure.  The actual number of bytes that could be
159  * accomodated in the XIO will be stored in xio_bytes and the page offset
160  * will be stored in xio_offset.
161  */
162 int
163 xio_init_kbuf(xio_t xio, void *kbase, size_t kbytes)
164 {
165     vm_offset_t addr;
166     vm_paddr_t paddr;
167     vm_page_t m;
168     int i;
169     int n;
170 
171     addr = trunc_page((vm_offset_t)kbase);
172     xio->xio_flags = 0;
173     xio->xio_offset = (vm_offset_t)kbase & PAGE_MASK;
174     xio->xio_bytes = 0;
175     xio->xio_pages = xio->xio_internal_pages;
176     xio->xio_error = 0;
177     if ((n = PAGE_SIZE - xio->xio_offset) > kbytes)
178 	n = kbytes;
179     for (i = 0; n && i < XIO_INTERNAL_PAGES; ++i) {
180 	if ((paddr = pmap_kextract(addr)) == 0)
181 	    break;
182 	crit_enter();
183 	m = PHYS_TO_VM_PAGE(paddr);
184 	vm_page_hold(m);
185 	crit_exit();
186 	xio->xio_pages[i] = m;
187 	kbytes -= n;
188 	xio->xio_bytes += n;
189 	if ((n = kbytes) > PAGE_SIZE)
190 	    n = PAGE_SIZE;
191 	addr += PAGE_SIZE;
192     }
193     xio->xio_npages = i;
194 
195     /*
196      * If a failure occured clean out what we loaded and return EFAULT.
197      * Return 0 on success.
198      */
199     if (i < XIO_INTERNAL_PAGES && n) {
200 	xio_release(xio);
201 	xio->xio_error = EFAULT;
202     }
203     return(xio->xio_error);
204 }
205 
206 /*
207  * Initialize an XIO given an array of vm_page pointers.  The caller is
208  * responsible for any modified state changes for the pages.
209  */
210 int
211 xio_init_pages(xio_t xio, struct vm_page **mbase, int npages, int xflags)
212 {
213     int i;
214 
215     KKASSERT(npages <= XIO_INTERNAL_PAGES);
216 
217     xio->xio_flags = xflags;
218     xio->xio_offset = 0;
219     xio->xio_bytes = 0;
220     xio->xio_pages = xio->xio_internal_pages;
221     xio->xio_npages = npages;
222     xio->xio_error = 0;
223     crit_enter();
224     for (i = 0; i < npages; ++i) {
225 	vm_page_hold(mbase[i]);
226 	xio->xio_pages[i] = mbase[i];
227     }
228     crit_exit();
229     return(0);
230 }
231 
232 /*
233  * Cleanup an XIO so it can be destroyed.  The pages associated with the
234  * XIO are released.
235  */
236 void
237 xio_release(xio_t xio)
238 {
239     int i;
240     vm_page_t m;
241 
242     crit_enter();
243     for (i = 0; i < xio->xio_npages; ++i) {
244 	m = xio->xio_pages[i];
245 	if (xio->xio_flags & XIOF_WRITE)
246 		vm_page_dirty(m);
247 	vm_page_unhold(m);
248     }
249     crit_exit();
250     xio->xio_offset = 0;
251     xio->xio_npages = 0;
252     xio->xio_bytes = 0;
253     xio->xio_error = ENOBUFS;
254 }
255 
256 /*
257  * Copy data between an XIO and a UIO.  If the UIO represents userspace it
258  * must be relative to the current context.
259  *
260  * uoffset is the abstracted starting offset in the XIO, not the actual
261  * offset, and usually starts at 0.
262  *
263  * The XIO is not modified.  The UIO is updated to reflect the copy.
264  *
265  * UIO_READ	xio -> uio
266  * UIO_WRITE	uio -> xio
267  */
268 int
269 xio_uio_copy(xio_t xio, int uoffset, struct uio *uio, size_t *sizep)
270 {
271     size_t bytes;
272     int error;
273 
274     bytes = xio->xio_bytes - uoffset;
275     if (bytes > uio->uio_resid)
276 	bytes = uio->uio_resid;
277     KKASSERT(bytes >= 0);
278     error = uiomove_fromphys(xio->xio_pages, xio->xio_offset + uoffset,
279 				bytes, uio);
280     if (error == 0)
281 	*sizep = bytes;
282     else
283 	*sizep = 0;
284     return(error);
285 }
286 
287 /*
288  * Copy the specified number of bytes from the xio to a userland
289  * buffer.  Return an error code or 0 on success.
290  *
291  * uoffset is the abstracted starting offset in the XIO, not the actual
292  * offset, and usually starts at 0.
293  *
294  * The XIO is not modified.
295  */
296 int
297 xio_copy_xtou(xio_t xio, int uoffset, void *uptr, int bytes)
298 {
299     int i;
300     int n;
301     int error;
302     int offset;
303     vm_page_t m;
304     struct sf_buf *sf;
305 
306     if (uoffset + bytes > xio->xio_bytes)
307 	return(EFAULT);
308 
309     offset = (xio->xio_offset + uoffset) & PAGE_MASK;
310     if ((n = PAGE_SIZE - offset) > bytes)
311 	n = bytes;
312 
313     error = 0;
314     for (i = (xio->xio_offset + uoffset) >> PAGE_SHIFT;
315 	 i < xio->xio_npages;
316 	 ++i
317     ) {
318 	m = xio->xio_pages[i];
319 	sf = sf_buf_alloc(m, SFB_CPUPRIVATE);
320 	error = copyout((char *)sf_buf_kva(sf) + offset, uptr, n);
321 	sf_buf_free(sf);
322 	if (error)
323 	    break;
324 	bytes -= n;
325 	uptr = (char *)uptr + n;
326 	if (bytes == 0)
327 	    break;
328 	if ((n = bytes) > PAGE_SIZE)
329 	    n = PAGE_SIZE;
330 	offset = 0;
331     }
332     return(error);
333 }
334 
335 /*
336  * Copy the specified number of bytes from the xio to a kernel
337  * buffer.  Return an error code or 0 on success.
338  *
339  * uoffset is the abstracted starting offset in the XIO, not the actual
340  * offset, and usually starts at 0.
341  *
342  * The XIO is not modified.
343  */
344 int
345 xio_copy_xtok(xio_t xio, int uoffset, void *kptr, int bytes)
346 {
347     int i;
348     int n;
349     int error;
350     int offset;
351     vm_page_t m;
352     struct sf_buf *sf;
353 
354     if (bytes + uoffset > xio->xio_bytes)
355 	return(EFAULT);
356 
357     offset = (xio->xio_offset + uoffset) & PAGE_MASK;
358     if ((n = PAGE_SIZE - offset) > bytes)
359 	n = bytes;
360 
361     error = 0;
362     for (i = (xio->xio_offset + uoffset) >> PAGE_SHIFT;
363 	 i < xio->xio_npages;
364 	 ++i
365     ) {
366 	m = xio->xio_pages[i];
367 	sf = sf_buf_alloc(m, SFB_CPUPRIVATE);
368 	bcopy((char *)sf_buf_kva(sf) + offset, kptr, n);
369 	sf_buf_free(sf);
370 	bytes -= n;
371 	kptr = (char *)kptr + n;
372 	if (bytes == 0)
373 	    break;
374 	if ((n = bytes) > PAGE_SIZE)
375 	    n = PAGE_SIZE;
376 	offset = 0;
377     }
378     return(error);
379 }
380 
381 /*
382  * Copy the specified number of bytes from userland to the xio.
383  * Return an error code or 0 on success.
384  *
385  * uoffset is the abstracted starting offset in the XIO, not the actual
386  * offset, and usually starts at 0.
387  *
388  * Data in pages backing the XIO will be modified.
389  */
390 int
391 xio_copy_utox(xio_t xio, int uoffset, const void *uptr, int bytes)
392 {
393     int i;
394     int n;
395     int error;
396     int offset;
397     vm_page_t m;
398     struct sf_buf *sf;
399 
400     if (uoffset + bytes > xio->xio_bytes)
401 	return(EFAULT);
402 
403     offset = (xio->xio_offset + uoffset) & PAGE_MASK;
404     if ((n = PAGE_SIZE - offset) > bytes)
405 	n = bytes;
406 
407     error = 0;
408     for (i = (xio->xio_offset + uoffset) >> PAGE_SHIFT;
409 	 i < xio->xio_npages;
410 	 ++i
411     ) {
412 	m = xio->xio_pages[i];
413 	sf = sf_buf_alloc(m, SFB_CPUPRIVATE);
414 	error = copyin(uptr, (char *)sf_buf_kva(sf) + offset, n);
415 	sf_buf_free(sf);
416 	if (error)
417 	    break;
418 	bytes -= n;
419 	uptr = (const char *)uptr + n;
420 	if (bytes == 0)
421 	    break;
422 	if ((n = bytes) > PAGE_SIZE)
423 	    n = PAGE_SIZE;
424 	offset = 0;
425     }
426     return(error);
427 }
428 
429 /*
430  * Copy the specified number of bytes from the kernel to the xio.
431  * Return an error code or 0 on success.
432  *
433  * uoffset is the abstracted starting offset in the XIO, not the actual
434  * offset, and usually starts at 0.
435  *
436  * Data in pages backing the XIO will be modified.
437  */
438 int
439 xio_copy_ktox(xio_t xio, int uoffset, const void *kptr, int bytes)
440 {
441     int i;
442     int n;
443     int error;
444     int offset;
445     vm_page_t m;
446     struct sf_buf *sf;
447 
448     if (uoffset + bytes > xio->xio_bytes)
449 	return(EFAULT);
450 
451     offset = (xio->xio_offset + uoffset) & PAGE_MASK;
452     if ((n = PAGE_SIZE - offset) > bytes)
453 	n = bytes;
454 
455     error = 0;
456     for (i = (xio->xio_offset + uoffset) >> PAGE_SHIFT;
457 	 i < xio->xio_npages;
458 	 ++i
459     ) {
460 	m = xio->xio_pages[i];
461 	sf = sf_buf_alloc(m, SFB_CPUPRIVATE);
462 	bcopy(kptr, (char *)sf_buf_kva(sf) + offset, n);
463 	sf_buf_free(sf);
464 	bytes -= n;
465 	kptr = (const char *)kptr + n;
466 	if (bytes == 0)
467 	    break;
468 	if ((n = bytes) > PAGE_SIZE)
469 	    n = PAGE_SIZE;
470 	offset = 0;
471     }
472     return(error);
473 }
474