xref: /dragonfly/sys/kern/kern_device.c (revision dcd37f7d)
1 /*
2  * Copyright (c) 2003 Matthew Dillon <dillon@backplane.com> All rights reserved.
3  * cdevsw from kern/kern_conf.c Copyright (c) 1995 Terrence R. Lambert
4  * cdevsw from kern/kern_conf.c Copyright (c) 1995 Julian R. Elishcer,
5  *							All rights reserved.
6  * Copyright (c) 1982, 1986, 1991, 1993
7  *	The Regents of the University of California.  All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  *
30  * $DragonFly: src/sys/kern/kern_device.c,v 1.27 2007/07/23 18:59:50 dillon Exp $
31  */
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/kernel.h>
35 #include <sys/sysctl.h>
36 #include <sys/systm.h>
37 #include <sys/module.h>
38 #include <sys/malloc.h>
39 #include <sys/conf.h>
40 #include <sys/bio.h>
41 #include <sys/buf.h>
42 #include <sys/vnode.h>
43 #include <sys/queue.h>
44 #include <sys/device.h>
45 #include <sys/tree.h>
46 #include <sys/syslink_rpc.h>
47 #include <sys/proc.h>
48 #include <machine/stdarg.h>
49 #include <sys/thread2.h>
50 #include <sys/devfs.h>
51 #include <sys/dsched.h>
52 
53 /*
54  * system link descriptors identify the command in the
55  * arguments structure.
56  */
57 #define DDESCNAME(name) __CONCAT(__CONCAT(dev_,name),_desc)
58 
59 #define DEVOP_DESC_INIT(name)						\
60 	    struct syslink_desc DDESCNAME(name) = {			\
61 		__offsetof(struct dev_ops, __CONCAT(d_, name)),	\
62 	    #name }
63 
64 DEVOP_DESC_INIT(default);
65 DEVOP_DESC_INIT(open);
66 DEVOP_DESC_INIT(close);
67 DEVOP_DESC_INIT(read);
68 DEVOP_DESC_INIT(write);
69 DEVOP_DESC_INIT(ioctl);
70 DEVOP_DESC_INIT(dump);
71 DEVOP_DESC_INIT(psize);
72 DEVOP_DESC_INIT(mmap);
73 DEVOP_DESC_INIT(strategy);
74 DEVOP_DESC_INIT(kqfilter);
75 DEVOP_DESC_INIT(revoke);
76 DEVOP_DESC_INIT(clone);
77 
78 /*
79  * Misc default ops
80  */
81 struct dev_ops dead_dev_ops;
82 
83 struct dev_ops default_dev_ops = {
84 	{ "null" },
85 	.d_default = NULL,	/* must be NULL */
86 	.d_open = noopen,
87 	.d_close = noclose,
88 	.d_read = noread,
89 	.d_write = nowrite,
90 	.d_ioctl = noioctl,
91 	.d_mmap = nommap,
92 	.d_strategy = nostrategy,
93 	.d_dump = nodump,
94 	.d_psize = nopsize,
95 	.d_kqfilter = nokqfilter,
96 	.d_revoke = norevoke,
97 	.d_clone = noclone
98 };
99 
100 /************************************************************************
101  *			GENERAL DEVICE API FUNCTIONS			*
102  ************************************************************************/
103 
104 int
105 dev_dopen(cdev_t dev, int oflags, int devtype, struct ucred *cred)
106 {
107 	struct dev_open_args ap;
108 
109 	ap.a_head.a_desc = &dev_open_desc;
110 	ap.a_head.a_dev = dev;
111 	ap.a_oflags = oflags;
112 	ap.a_devtype = devtype;
113 	ap.a_cred = cred;
114 	return(dev->si_ops->d_open(&ap));
115 }
116 
117 int
118 dev_dclose(cdev_t dev, int fflag, int devtype)
119 {
120 	struct dev_close_args ap;
121 
122 	ap.a_head.a_desc = &dev_close_desc;
123 	ap.a_head.a_dev = dev;
124 	ap.a_fflag = fflag;
125 	ap.a_devtype = devtype;
126 	return(dev->si_ops->d_close(&ap));
127 }
128 
129 int
130 dev_dread(cdev_t dev, struct uio *uio, int ioflag)
131 {
132 	struct dev_read_args ap;
133 	int error;
134 
135 	ap.a_head.a_desc = &dev_read_desc;
136 	ap.a_head.a_dev = dev;
137 	ap.a_uio = uio;
138 	ap.a_ioflag = ioflag;
139 	error = dev->si_ops->d_read(&ap);
140 	if (error == 0)
141 		dev->si_lastread = time_second;
142 	return (error);
143 }
144 
145 int
146 dev_dwrite(cdev_t dev, struct uio *uio, int ioflag)
147 {
148 	struct dev_write_args ap;
149 	int error;
150 
151 	dev->si_lastwrite = time_second;
152 	ap.a_head.a_desc = &dev_write_desc;
153 	ap.a_head.a_dev = dev;
154 	ap.a_uio = uio;
155 	ap.a_ioflag = ioflag;
156 	error = dev->si_ops->d_write(&ap);
157 	return (error);
158 }
159 
160 int
161 dev_dioctl(cdev_t dev, u_long cmd, caddr_t data, int fflag, struct ucred *cred,
162 	   struct sysmsg *msg)
163 {
164 	struct dev_ioctl_args ap;
165 
166 	ap.a_head.a_desc = &dev_ioctl_desc;
167 	ap.a_head.a_dev = dev;
168 	ap.a_cmd = cmd;
169 	ap.a_data = data;
170 	ap.a_fflag = fflag;
171 	ap.a_cred = cred;
172 	ap.a_sysmsg = msg;
173 	return(dev->si_ops->d_ioctl(&ap));
174 }
175 
176 int
177 dev_dmmap(cdev_t dev, vm_offset_t offset, int nprot)
178 {
179 	struct dev_mmap_args ap;
180 	int error;
181 
182 	ap.a_head.a_desc = &dev_mmap_desc;
183 	ap.a_head.a_dev = dev;
184 	ap.a_offset = offset;
185 	ap.a_nprot = nprot;
186 	error = dev->si_ops->d_mmap(&ap);
187 	if (error == 0)
188 		return(ap.a_result);
189 	return(-1);
190 }
191 
192 int
193 dev_dclone(cdev_t dev)
194 {
195 	struct dev_clone_args ap;
196 
197 	ap.a_head.a_desc = &dev_clone_desc;
198 	ap.a_head.a_dev = dev;
199 	return (dev->si_ops->d_clone(&ap));
200 }
201 
202 int
203 dev_drevoke(cdev_t dev)
204 {
205 	struct dev_revoke_args ap;
206 
207 	ap.a_head.a_desc = &dev_revoke_desc;
208 	ap.a_head.a_dev = dev;
209 	return (dev->si_ops->d_revoke(&ap));
210 }
211 
212 /*
213  * Core device strategy call, used to issue I/O on a device.  There are
214  * two versions, a non-chained version and a chained version.  The chained
215  * version reuses a BIO set up by vn_strategy().  The only difference is
216  * that, for now, we do not push a new tracking structure when chaining
217  * from vn_strategy.  XXX this will ultimately have to change.
218  */
219 void
220 dev_dstrategy(cdev_t dev, struct bio *bio)
221 {
222 	struct dev_strategy_args ap;
223 	struct bio_track *track;
224 
225 	ap.a_head.a_desc = &dev_strategy_desc;
226 	ap.a_head.a_dev = dev;
227 	ap.a_bio = bio;
228 
229 	KKASSERT(bio->bio_track == NULL);
230 	KKASSERT(bio->bio_buf->b_cmd != BUF_CMD_DONE);
231 	if (bio->bio_buf->b_cmd == BUF_CMD_READ)
232 	    track = &dev->si_track_read;
233 	else
234 	    track = &dev->si_track_write;
235 	bio_track_ref(track);
236 	bio->bio_track = track;
237 
238 	if (dsched_is_clear_buf_priv(bio->bio_buf))
239 		dsched_new_buf(bio->bio_buf);
240 
241 	KKASSERT((bio->bio_flags & BIO_DONE) == 0);
242 	(void)dev->si_ops->d_strategy(&ap);
243 }
244 
245 void
246 dev_dstrategy_chain(cdev_t dev, struct bio *bio)
247 {
248 	struct dev_strategy_args ap;
249 
250 	ap.a_head.a_desc = &dev_strategy_desc;
251 	ap.a_head.a_dev = dev;
252 	ap.a_bio = bio;
253 
254 	KKASSERT(bio->bio_track != NULL);
255 	KKASSERT((bio->bio_flags & BIO_DONE) == 0);
256 	(void)dev->si_ops->d_strategy(&ap);
257 }
258 
259 /*
260  * note: the disk layer is expected to set count, blkno, and secsize before
261  * forwarding the message.
262  */
263 int
264 dev_ddump(cdev_t dev, void *virtual, vm_offset_t physical, off_t offset,
265     size_t length)
266 {
267 	struct dev_dump_args ap;
268 
269 	ap.a_head.a_desc = &dev_dump_desc;
270 	ap.a_head.a_dev = dev;
271 	ap.a_count = 0;
272 	ap.a_blkno = 0;
273 	ap.a_secsize = 0;
274 	ap.a_virtual = virtual;
275 	ap.a_physical = physical;
276 	ap.a_offset = offset;
277 	ap.a_length = length;
278 	return(dev->si_ops->d_dump(&ap));
279 }
280 
281 int64_t
282 dev_dpsize(cdev_t dev)
283 {
284 	struct dev_psize_args ap;
285 	int error;
286 
287 	ap.a_head.a_desc = &dev_psize_desc;
288 	ap.a_head.a_dev = dev;
289 	error = dev->si_ops->d_psize(&ap);
290 	if (error == 0)
291 		return (ap.a_result);
292 	return(-1);
293 }
294 
295 /*
296  * Pass-thru to the device kqfilter.
297  *
298  * NOTE: We explicitly preset a_result to 0 so d_kqfilter() functions
299  *	 which return 0 do not have to bother setting a_result.
300  */
301 int
302 dev_dkqfilter(cdev_t dev, struct knote *kn)
303 {
304 	struct dev_kqfilter_args ap;
305 	int error;
306 
307 	ap.a_head.a_desc = &dev_kqfilter_desc;
308 	ap.a_head.a_dev = dev;
309 	ap.a_kn = kn;
310 	ap.a_result = 0;
311 	error = dev->si_ops->d_kqfilter(&ap);
312 	if (error == 0)
313 		return(ap.a_result);
314 	return(ENODEV);
315 }
316 
317 /************************************************************************
318  *			DEVICE HELPER FUNCTIONS				*
319  ************************************************************************/
320 
321 /*
322  * MPSAFE
323  */
324 int
325 dev_drefs(cdev_t dev)
326 {
327     return(dev->si_sysref.refcnt);
328 }
329 
330 /*
331  * MPSAFE
332  */
333 const char *
334 dev_dname(cdev_t dev)
335 {
336     return(dev->si_ops->head.name);
337 }
338 
339 /*
340  * MPSAFE
341  */
342 int
343 dev_dflags(cdev_t dev)
344 {
345     return(dev->si_ops->head.flags);
346 }
347 
348 /*
349  * MPSAFE
350  */
351 int
352 dev_dmaj(cdev_t dev)
353 {
354     return(dev->si_ops->head.maj);
355 }
356 
357 /*
358  * Used when forwarding a request through layers.  The caller adjusts
359  * ap->a_head.a_dev and then calls this function.
360  */
361 int
362 dev_doperate(struct dev_generic_args *ap)
363 {
364     int (*func)(struct dev_generic_args *);
365 
366     func = *(void **)((char *)ap->a_dev->si_ops + ap->a_desc->sd_offset);
367     return (func(ap));
368 }
369 
370 /*
371  * Used by the console intercept code only.  Issue an operation through
372  * a foreign ops structure allowing the ops structure associated
373  * with the device to remain intact.
374  */
375 int
376 dev_doperate_ops(struct dev_ops *ops, struct dev_generic_args *ap)
377 {
378     int (*func)(struct dev_generic_args *);
379 
380     func = *(void **)((char *)ops + ap->a_desc->sd_offset);
381     return (func(ap));
382 }
383 
384 /*
385  * Convert a template dev_ops into the real thing by filling in
386  * uninitialized fields.
387  */
388 void
389 compile_dev_ops(struct dev_ops *ops)
390 {
391 	int offset;
392 
393 	for (offset = offsetof(struct dev_ops, dev_ops_first_field);
394 	     offset <= offsetof(struct dev_ops, dev_ops_last_field);
395 	     offset += sizeof(void *)
396 	) {
397 		void **func_p = (void **)((char *)ops + offset);
398 		void **def_p = (void **)((char *)&default_dev_ops + offset);
399 		if (*func_p == NULL) {
400 			if (ops->d_default)
401 				*func_p = ops->d_default;
402 			else
403 				*func_p = *def_p;
404 		}
405 	}
406 }
407 
408 /************************************************************************
409  *			MAJOR/MINOR SPACE FUNCTION 			*
410  ************************************************************************/
411 
412 /*
413  * This makes a dev_ops entry visible to userland (e.g /dev/<blah>).
414  *
415  * Disk devices typically register their major, e.g. 'ad0', and then call
416  * into the disk label management code which overloads its own onto e.g. 'ad0'
417  * to support all the various slice and partition combinations.
418  *
419  * The mask/match supplied in this call are a full 32 bits and the same
420  * mask and match must be specified in a later dev_ops_remove() call to
421  * match this add.  However, the match value for the minor number should never
422  * have any bits set in the major number's bit range (8-15).  The mask value
423  * may be conveniently specified as -1 without creating any major number
424  * interference.
425  */
426 
427 static
428 int
429 rb_dev_ops_compare(struct dev_ops_maj *a, struct dev_ops_maj *b)
430 {
431     if (a->maj < b->maj)
432 	return(-1);
433     else if (a->maj > b->maj)
434 	return(1);
435     return(0);
436 }
437 
438 RB_GENERATE2(dev_ops_rb_tree, dev_ops_maj, rbnode, rb_dev_ops_compare, int, maj);
439 
440 struct dev_ops_rb_tree dev_ops_rbhead = RB_INITIALIZER(dev_ops_rbhead);
441 
442 int
443 dev_ops_remove_all(struct dev_ops *ops)
444 {
445 	return devfs_destroy_dev_by_ops(ops, -1);
446 }
447 
448 int
449 dev_ops_remove_minor(struct dev_ops *ops, int minor)
450 {
451 	return devfs_destroy_dev_by_ops(ops, minor);
452 }
453 
454 struct dev_ops *
455 dev_ops_intercept(cdev_t dev, struct dev_ops *iops)
456 {
457 	struct dev_ops *oops = dev->si_ops;
458 
459 	compile_dev_ops(iops);
460 	iops->head.maj = oops->head.maj;
461 	iops->head.data = oops->head.data;
462 	iops->head.flags = oops->head.flags;
463 	dev->si_ops = iops;
464 	dev->si_flags |= SI_INTERCEPTED;
465 
466 	return (oops);
467 }
468 
469 void
470 dev_ops_restore(cdev_t dev, struct dev_ops *oops)
471 {
472 	struct dev_ops *iops = dev->si_ops;
473 
474 	dev->si_ops = oops;
475 	dev->si_flags &= ~SI_INTERCEPTED;
476 	iops->head.maj = 0;
477 	iops->head.data = NULL;
478 	iops->head.flags = 0;
479 }
480 
481 /************************************************************************
482  *			DEFAULT DEV OPS FUNCTIONS			*
483  ************************************************************************/
484 
485 
486 /*
487  * Unsupported devswitch functions (e.g. for writing to read-only device).
488  * XXX may belong elsewhere.
489  */
490 int
491 norevoke(struct dev_revoke_args *ap)
492 {
493 	/* take no action */
494 	return(0);
495 }
496 
497 int
498 noclone(struct dev_clone_args *ap)
499 {
500 	/* take no action */
501 	return (0);	/* allow the clone */
502 }
503 
504 int
505 noopen(struct dev_open_args *ap)
506 {
507 	return (ENODEV);
508 }
509 
510 int
511 noclose(struct dev_close_args *ap)
512 {
513 	return (ENODEV);
514 }
515 
516 int
517 noread(struct dev_read_args *ap)
518 {
519 	return (ENODEV);
520 }
521 
522 int
523 nowrite(struct dev_write_args *ap)
524 {
525 	return (ENODEV);
526 }
527 
528 int
529 noioctl(struct dev_ioctl_args *ap)
530 {
531 	return (ENODEV);
532 }
533 
534 int
535 nokqfilter(struct dev_kqfilter_args *ap)
536 {
537 	return (ENODEV);
538 }
539 
540 int
541 nommap(struct dev_mmap_args *ap)
542 {
543 	return (ENODEV);
544 }
545 
546 int
547 nostrategy(struct dev_strategy_args *ap)
548 {
549 	struct bio *bio = ap->a_bio;
550 
551 	bio->bio_buf->b_flags |= B_ERROR;
552 	bio->bio_buf->b_error = EOPNOTSUPP;
553 	biodone(bio);
554 	return(0);
555 }
556 
557 int
558 nopsize(struct dev_psize_args *ap)
559 {
560 	ap->a_result = 0;
561 	return(0);
562 }
563 
564 int
565 nodump(struct dev_dump_args *ap)
566 {
567 	return (ENODEV);
568 }
569 
570 /*
571  * XXX this is probably bogus.  Any device that uses it isn't checking the
572  * minor number.
573  */
574 int
575 nullopen(struct dev_open_args *ap)
576 {
577 	return (0);
578 }
579 
580 int
581 nullclose(struct dev_close_args *ap)
582 {
583 	return (0);
584 }
585 
586