xref: /dragonfly/sys/kern/kern_device.c (revision 4362c066)
1 /*
2  * Copyright (c) 2003 Matthew Dillon <dillon@backplane.com> All rights reserved.
3  * cdevsw from kern/kern_conf.c Copyright (c) 1995 Terrence R. Lambert
4  * cdevsw from kern/kern_conf.c Copyright (c) 1995 Julian R. Elishcer,
5  *							All rights reserved.
6  * Copyright (c) 1982, 1986, 1991, 1993
7  *	The Regents of the University of California.  All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/kernel.h>
34 #include <sys/sysctl.h>
35 #include <sys/module.h>
36 #include <sys/malloc.h>
37 #include <sys/conf.h>
38 #include <sys/bio.h>
39 #include <sys/buf.h>
40 #include <sys/vnode.h>
41 #include <sys/queue.h>
42 #include <sys/device.h>
43 #include <sys/tree.h>
44 #include <sys/syslink_rpc.h>
45 #include <sys/proc.h>
46 #include <sys/dsched.h>
47 #include <sys/devfs.h>
48 
49 #include <machine/stdarg.h>
50 
51 #include <sys/thread2.h>
52 #include <sys/mplock2.h>
53 
54 /*
55  * system link descriptors identify the command in the
56  * arguments structure.
57  */
58 #define DDESCNAME(name) __CONCAT(__CONCAT(dev_,name),_desc)
59 
60 #define DEVOP_DESC_INIT(name)						\
61 	    struct syslink_desc DDESCNAME(name) = {			\
62 		__offsetof(struct dev_ops, __CONCAT(d_, name)),	\
63 	    #name }
64 
65 DEVOP_DESC_INIT(default);
66 DEVOP_DESC_INIT(open);
67 DEVOP_DESC_INIT(close);
68 DEVOP_DESC_INIT(read);
69 DEVOP_DESC_INIT(write);
70 DEVOP_DESC_INIT(ioctl);
71 DEVOP_DESC_INIT(dump);
72 DEVOP_DESC_INIT(psize);
73 DEVOP_DESC_INIT(mmap);
74 DEVOP_DESC_INIT(mmap_single);
75 DEVOP_DESC_INIT(strategy);
76 DEVOP_DESC_INIT(kqfilter);
77 DEVOP_DESC_INIT(revoke);
78 DEVOP_DESC_INIT(clone);
79 
80 /*
81  * Misc default ops
82  */
83 struct dev_ops dead_dev_ops;
84 
85 static d_open_t		noopen;
86 static d_close_t	noclose;
87 static d_read_t		noread;
88 static d_write_t	nowrite;
89 static d_ioctl_t	noioctl;
90 static d_mmap_t		nommap;
91 static d_mmap_single_t	nommap_single;
92 static d_strategy_t	nostrategy;
93 static d_dump_t		nodump;
94 static d_psize_t	nopsize;
95 static d_kqfilter_t	nokqfilter;
96 static d_clone_t	noclone;
97 static d_revoke_t	norevoke;
98 
99 struct dev_ops default_dev_ops = {
100 	{ "null" },
101 	.d_default = NULL,	/* must be NULL */
102 	.d_open = noopen,
103 	.d_close = noclose,
104 	.d_read = noread,
105 	.d_write = nowrite,
106 	.d_ioctl = noioctl,
107 	.d_mmap = nommap,
108 	.d_mmap_single = nommap_single,
109 	.d_strategy = nostrategy,
110 	.d_dump = nodump,
111 	.d_psize = nopsize,
112 	.d_kqfilter = nokqfilter,
113 	.d_revoke = norevoke,
114 	.d_clone = noclone
115 };
116 
117 static __inline
118 int
119 dev_needmplock(cdev_t dev)
120 {
121     return((dev->si_ops->head.flags & D_MPSAFE) == 0);
122 }
123 
124 /************************************************************************
125  *			GENERAL DEVICE API FUNCTIONS			*
126  ************************************************************************
127  *
128  * The MPSAFEness of these depends on dev->si_ops->head.flags
129  */
130 int
131 dev_dopen(cdev_t dev, int oflags, int devtype, struct ucred *cred, struct file *fp)
132 {
133 	struct dev_open_args ap;
134 	int needmplock = dev_needmplock(dev);
135 	int error;
136 
137 	ap.a_head.a_desc = &dev_open_desc;
138 	ap.a_head.a_dev = dev;
139 	ap.a_oflags = oflags;
140 	ap.a_devtype = devtype;
141 	ap.a_cred = cred;
142 	ap.a_fp = fp;
143 
144 	if (needmplock)
145 		get_mplock();
146 	error = dev->si_ops->d_open(&ap);
147 	if (needmplock)
148 		rel_mplock();
149 	return (error);
150 }
151 
152 int
153 dev_dclose(cdev_t dev, int fflag, int devtype, struct file *fp)
154 {
155 	struct dev_close_args ap;
156 	int needmplock = dev_needmplock(dev);
157 	int error;
158 
159 	ap.a_head.a_desc = &dev_close_desc;
160 	ap.a_head.a_dev = dev;
161 	ap.a_fflag = fflag;
162 	ap.a_devtype = devtype;
163 	ap.a_fp = fp;
164 
165 	if (needmplock)
166 		get_mplock();
167 	error = dev->si_ops->d_close(&ap);
168 	if (needmplock)
169 		rel_mplock();
170 	return (error);
171 }
172 
173 int
174 dev_dread(cdev_t dev, struct uio *uio, int ioflag, struct file *fp)
175 {
176 	struct dev_read_args ap;
177 	int needmplock = dev_needmplock(dev);
178 	int error;
179 
180 	ap.a_head.a_desc = &dev_read_desc;
181 	ap.a_head.a_dev = dev;
182 	ap.a_uio = uio;
183 	ap.a_ioflag = ioflag;
184 	ap.a_fp = fp;
185 
186 	if (needmplock)
187 		get_mplock();
188 	error = dev->si_ops->d_read(&ap);
189 	if (needmplock)
190 		rel_mplock();
191 	if (error == 0)
192 		dev->si_lastread = time_uptime;
193 	return (error);
194 }
195 
196 int
197 dev_dwrite(cdev_t dev, struct uio *uio, int ioflag, struct file *fp)
198 {
199 	struct dev_write_args ap;
200 	int needmplock = dev_needmplock(dev);
201 	int error;
202 
203 	dev->si_lastwrite = time_uptime;
204 	ap.a_head.a_desc = &dev_write_desc;
205 	ap.a_head.a_dev = dev;
206 	ap.a_uio = uio;
207 	ap.a_ioflag = ioflag;
208 	ap.a_fp = fp;
209 
210 	if (needmplock)
211 		get_mplock();
212 	error = dev->si_ops->d_write(&ap);
213 	if (needmplock)
214 		rel_mplock();
215 	return (error);
216 }
217 
218 int
219 dev_dioctl(cdev_t dev, u_long cmd, caddr_t data, int fflag, struct ucred *cred,
220 	   struct sysmsg *msg, struct file *fp)
221 {
222 	struct dev_ioctl_args ap;
223 	int needmplock = dev_needmplock(dev);
224 	int error;
225 
226 	ap.a_head.a_desc = &dev_ioctl_desc;
227 	ap.a_head.a_dev = dev;
228 	ap.a_cmd = cmd;
229 	ap.a_data = data;
230 	ap.a_fflag = fflag;
231 	ap.a_cred = cred;
232 	ap.a_sysmsg = msg;
233 	ap.a_fp = fp;
234 
235 	if (needmplock)
236 		get_mplock();
237 	error = dev->si_ops->d_ioctl(&ap);
238 	if (needmplock)
239 		rel_mplock();
240 	return (error);
241 }
242 
243 int64_t
244 dev_dmmap(cdev_t dev, vm_offset_t offset, int nprot, struct file *fp)
245 {
246 	struct dev_mmap_args ap;
247 	int needmplock = dev_needmplock(dev);
248 	int error;
249 
250 	ap.a_head.a_desc = &dev_mmap_desc;
251 	ap.a_head.a_dev = dev;
252 	ap.a_offset = offset;
253 	ap.a_nprot = nprot;
254 	ap.a_fp = fp;
255 
256 	if (needmplock)
257 		get_mplock();
258 	error = dev->si_ops->d_mmap(&ap);
259 	if (needmplock)
260 		rel_mplock();
261 
262 	if (error == 0)
263 		return(ap.a_result);
264 	return(-1);
265 }
266 
267 int
268 dev_dmmap_single(cdev_t dev, vm_ooffset_t *offset, vm_size_t size,
269                  struct vm_object **object, int nprot, struct file *fp)
270 {
271 	struct dev_mmap_single_args ap;
272 	int needmplock = dev_needmplock(dev);
273 	int error;
274 
275 	ap.a_head.a_desc = &dev_mmap_single_desc;
276 	ap.a_head.a_dev = dev;
277 	ap.a_offset = offset;
278 	ap.a_size = size;
279 	ap.a_object = object;
280 	ap.a_nprot = nprot;
281 	ap.a_fp = fp;
282 
283 	if (needmplock)
284 		get_mplock();
285 	error = dev->si_ops->d_mmap_single(&ap);
286 	if (needmplock)
287 		rel_mplock();
288 
289 	return(error);
290 }
291 
292 int
293 dev_dclone(cdev_t dev)
294 {
295 	struct dev_clone_args ap;
296 	int needmplock = dev_needmplock(dev);
297 	int error;
298 
299 	ap.a_head.a_desc = &dev_clone_desc;
300 	ap.a_head.a_dev = dev;
301 
302 	if (needmplock)
303 		get_mplock();
304 	error = dev->si_ops->d_clone(&ap);
305 	if (needmplock)
306 		rel_mplock();
307 	return (error);
308 }
309 
310 int
311 dev_drevoke(cdev_t dev)
312 {
313 	struct dev_revoke_args ap;
314 	int needmplock = dev_needmplock(dev);
315 	int error;
316 
317 	ap.a_head.a_desc = &dev_revoke_desc;
318 	ap.a_head.a_dev = dev;
319 
320 	if (needmplock)
321 		get_mplock();
322 	error = dev->si_ops->d_revoke(&ap);
323 	if (needmplock)
324 		rel_mplock();
325 
326 	return (error);
327 }
328 
329 /*
330  * Core device strategy call, used to issue I/O on a device.  There are
331  * two versions, a non-chained version and a chained version.  The chained
332  * version reuses a BIO set up by vn_strategy().  The only difference is
333  * that, for now, we do not push a new tracking structure when chaining
334  * from vn_strategy.  XXX this will ultimately have to change.
335  */
336 void
337 dev_dstrategy(cdev_t dev, struct bio *bio)
338 {
339 	struct dev_strategy_args ap;
340 	struct bio_track *track;
341 	int needmplock = dev_needmplock(dev);
342 
343 	ap.a_head.a_desc = &dev_strategy_desc;
344 	ap.a_head.a_dev = dev;
345 	ap.a_bio = bio;
346 
347 	KKASSERT(bio->bio_track == NULL);
348 	KKASSERT(bio->bio_buf->b_cmd != BUF_CMD_DONE);
349 	if (bio->bio_buf->b_cmd == BUF_CMD_READ)
350 	    track = &dev->si_track_read;
351 	else
352 	    track = &dev->si_track_write;
353 	bio_track_ref(track);
354 	bio->bio_track = track;
355 	dsched_buf_enter(bio->bio_buf);	/* might stack */
356 
357 	KKASSERT((bio->bio_flags & BIO_DONE) == 0);
358 	if (needmplock)
359 		get_mplock();
360 	(void)dev->si_ops->d_strategy(&ap);
361 	if (needmplock)
362 		rel_mplock();
363 }
364 
365 void
366 dev_dstrategy_chain(cdev_t dev, struct bio *bio)
367 {
368 	struct dev_strategy_args ap;
369 	int needmplock = dev_needmplock(dev);
370 
371 	ap.a_head.a_desc = &dev_strategy_desc;
372 	ap.a_head.a_dev = dev;
373 	ap.a_bio = bio;
374 
375 	KKASSERT(bio->bio_track != NULL);
376 	KKASSERT((bio->bio_flags & BIO_DONE) == 0);
377 	if (needmplock)
378 		get_mplock();
379 	(void)dev->si_ops->d_strategy(&ap);
380 	if (needmplock)
381 		rel_mplock();
382 }
383 
384 /*
385  * note: the disk layer is expected to set count, blkno, and secsize before
386  * forwarding the message.
387  */
388 int
389 dev_ddump(cdev_t dev, void *virtual, vm_offset_t physical, off_t offset,
390     size_t length)
391 {
392 	struct dev_dump_args ap;
393 	int needmplock = dev_needmplock(dev);
394 	int error;
395 
396 	ap.a_head.a_desc = &dev_dump_desc;
397 	ap.a_head.a_dev = dev;
398 	ap.a_count = 0;
399 	ap.a_blkno = 0;
400 	ap.a_secsize = 0;
401 	ap.a_virtual = virtual;
402 	ap.a_physical = physical;
403 	ap.a_offset = offset;
404 	ap.a_length = length;
405 
406 	if (needmplock)
407 		get_mplock();
408 	error = dev->si_ops->d_dump(&ap);
409 	if (needmplock)
410 		rel_mplock();
411 	return (error);
412 }
413 
414 int64_t
415 dev_dpsize(cdev_t dev)
416 {
417 	struct dev_psize_args ap;
418 	int needmplock = dev_needmplock(dev);
419 	int error;
420 
421 	ap.a_head.a_desc = &dev_psize_desc;
422 	ap.a_head.a_dev = dev;
423 
424 	if (needmplock)
425 		get_mplock();
426 	error = dev->si_ops->d_psize(&ap);
427 	if (needmplock)
428 		rel_mplock();
429 
430 	if (error == 0)
431 		return (ap.a_result);
432 	return(-1);
433 }
434 
435 /*
436  * Pass-thru to the device kqfilter.
437  *
438  * NOTE: We explicitly preset a_result to 0 so d_kqfilter() functions
439  *	 which return 0 do not have to bother setting a_result.
440  */
441 int
442 dev_dkqfilter(cdev_t dev, struct knote *kn, struct file *fp)
443 {
444 	struct dev_kqfilter_args ap;
445 	int needmplock = dev_needmplock(dev);
446 	int error;
447 
448 	ap.a_head.a_desc = &dev_kqfilter_desc;
449 	ap.a_head.a_dev = dev;
450 	ap.a_kn = kn;
451 	ap.a_result = 0;
452 	ap.a_fp = fp;
453 
454 	if (needmplock)
455 		get_mplock();
456 	error = dev->si_ops->d_kqfilter(&ap);
457 	if (needmplock)
458 		rel_mplock();
459 
460 	if (error == 0)
461 		return(ap.a_result);
462 	return(ENODEV);
463 }
464 
465 /************************************************************************
466  *			DEVICE HELPER FUNCTIONS				*
467  ************************************************************************/
468 
469 /*
470  * MPSAFE
471  */
472 int
473 dev_drefs(cdev_t dev)
474 {
475     return(dev->si_sysref.refcnt);
476 }
477 
478 /*
479  * MPSAFE
480  */
481 const char *
482 dev_dname(cdev_t dev)
483 {
484     return(dev->si_ops->head.name);
485 }
486 
487 /*
488  * MPSAFE
489  */
490 int
491 dev_dflags(cdev_t dev)
492 {
493     return(dev->si_ops->head.flags);
494 }
495 
496 /*
497  * MPSAFE
498  */
499 int
500 dev_dmaj(cdev_t dev)
501 {
502     return(dev->si_ops->head.maj);
503 }
504 
505 /*
506  * Used when forwarding a request through layers.  The caller adjusts
507  * ap->a_head.a_dev and then calls this function.
508  */
509 int
510 dev_doperate(struct dev_generic_args *ap)
511 {
512     int (*func)(struct dev_generic_args *);
513     int needmplock = dev_needmplock(ap->a_dev);
514     int error;
515 
516     func = *(void **)((char *)ap->a_dev->si_ops + ap->a_desc->sd_offset);
517 
518     if (needmplock)
519 	    get_mplock();
520     error = func(ap);
521     if (needmplock)
522 	    rel_mplock();
523 
524     return (error);
525 }
526 
527 /*
528  * Used by the console intercept code only.  Issue an operation through
529  * a foreign ops structure allowing the ops structure associated
530  * with the device to remain intact.
531  */
532 int
533 dev_doperate_ops(struct dev_ops *ops, struct dev_generic_args *ap)
534 {
535     int (*func)(struct dev_generic_args *);
536     int needmplock = ((ops->head.flags & D_MPSAFE) == 0);
537     int error;
538 
539     func = *(void **)((char *)ops + ap->a_desc->sd_offset);
540 
541     if (needmplock)
542 	    get_mplock();
543     error = func(ap);
544     if (needmplock)
545 	    rel_mplock();
546 
547     return (error);
548 }
549 
550 /*
551  * Convert a template dev_ops into the real thing by filling in
552  * uninitialized fields.
553  */
554 void
555 compile_dev_ops(struct dev_ops *ops)
556 {
557 	int offset;
558 
559 	for (offset = offsetof(struct dev_ops, dev_ops_first_field);
560 	     offset <= offsetof(struct dev_ops, dev_ops_last_field);
561 	     offset += sizeof(void *)
562 	) {
563 		void **func_p = (void **)((char *)ops + offset);
564 		void **def_p = (void **)((char *)&default_dev_ops + offset);
565 		if (*func_p == NULL) {
566 			if (ops->d_default)
567 				*func_p = ops->d_default;
568 			else
569 				*func_p = *def_p;
570 		}
571 	}
572 }
573 
574 /************************************************************************
575  *			MAJOR/MINOR SPACE FUNCTION 			*
576  ************************************************************************/
577 
578 /*
579  * This makes a dev_ops entry visible to userland (e.g /dev/<blah>).
580  *
581  * Disk devices typically register their major, e.g. 'ad0', and then call
582  * into the disk label management code which overloads its own onto e.g. 'ad0'
583  * to support all the various slice and partition combinations.
584  *
585  * The mask/match supplied in this call are a full 32 bits and the same
586  * mask and match must be specified in a later dev_ops_remove() call to
587  * match this add.  However, the match value for the minor number should never
588  * have any bits set in the major number's bit range (8-15).  The mask value
589  * may be conveniently specified as -1 without creating any major number
590  * interference.
591  */
592 
593 static
594 int
595 rb_dev_ops_compare(struct dev_ops_maj *a, struct dev_ops_maj *b)
596 {
597     if (a->maj < b->maj)
598 	return(-1);
599     else if (a->maj > b->maj)
600 	return(1);
601     return(0);
602 }
603 
604 RB_GENERATE2(dev_ops_rb_tree, dev_ops_maj, rbnode, rb_dev_ops_compare, int, maj);
605 
606 struct dev_ops_rb_tree dev_ops_rbhead = RB_INITIALIZER(dev_ops_rbhead);
607 
608 int
609 dev_ops_remove_all(struct dev_ops *ops)
610 {
611 	return devfs_destroy_dev_by_ops(ops, -1);
612 }
613 
614 int
615 dev_ops_remove_minor(struct dev_ops *ops, int minor)
616 {
617 	return devfs_destroy_dev_by_ops(ops, minor);
618 }
619 
620 struct dev_ops *
621 dev_ops_intercept(cdev_t dev, struct dev_ops *iops)
622 {
623 	struct dev_ops *oops = dev->si_ops;
624 
625 	compile_dev_ops(iops);
626 	iops->head.maj = oops->head.maj;
627 	iops->head.data = oops->head.data;
628 	iops->head.flags = oops->head.flags;
629 	dev->si_ops = iops;
630 	dev->si_flags |= SI_INTERCEPTED;
631 
632 	return (oops);
633 }
634 
635 void
636 dev_ops_restore(cdev_t dev, struct dev_ops *oops)
637 {
638 	struct dev_ops *iops = dev->si_ops;
639 
640 	dev->si_ops = oops;
641 	dev->si_flags &= ~SI_INTERCEPTED;
642 	iops->head.maj = 0;
643 	iops->head.data = NULL;
644 	iops->head.flags = 0;
645 }
646 
647 /************************************************************************
648  *			DEFAULT DEV OPS FUNCTIONS			*
649  ************************************************************************/
650 
651 
652 /*
653  * Unsupported devswitch functions (e.g. for writing to read-only device).
654  * XXX may belong elsewhere.
655  */
656 static int
657 norevoke(struct dev_revoke_args *ap)
658 {
659 	/* take no action */
660 	return(0);
661 }
662 
663 static int
664 noclone(struct dev_clone_args *ap)
665 {
666 	/* take no action */
667 	return (0);	/* allow the clone */
668 }
669 
670 static int
671 noopen(struct dev_open_args *ap)
672 {
673 	return (ENODEV);
674 }
675 
676 static int
677 noclose(struct dev_close_args *ap)
678 {
679 	return (ENODEV);
680 }
681 
682 static int
683 noread(struct dev_read_args *ap)
684 {
685 	return (ENODEV);
686 }
687 
688 static int
689 nowrite(struct dev_write_args *ap)
690 {
691 	return (ENODEV);
692 }
693 
694 static int
695 noioctl(struct dev_ioctl_args *ap)
696 {
697 	return (ENODEV);
698 }
699 
700 static int
701 nokqfilter(struct dev_kqfilter_args *ap)
702 {
703 	return (ENODEV);
704 }
705 
706 static int
707 nommap(struct dev_mmap_args *ap)
708 {
709 	return (ENODEV);
710 }
711 
712 static int
713 nommap_single(struct dev_mmap_single_args *ap)
714 {
715 	return (ENODEV);
716 }
717 
718 static int
719 nostrategy(struct dev_strategy_args *ap)
720 {
721 	struct bio *bio = ap->a_bio;
722 
723 	bio->bio_buf->b_flags |= B_ERROR;
724 	bio->bio_buf->b_error = EOPNOTSUPP;
725 	biodone(bio);
726 	return(0);
727 }
728 
729 static int
730 nopsize(struct dev_psize_args *ap)
731 {
732 	ap->a_result = 0;
733 	return(0);
734 }
735 
736 static int
737 nodump(struct dev_dump_args *ap)
738 {
739 	return (ENODEV);
740 }
741 
742 /*
743  * XXX this is probably bogus.  Any device that uses it isn't checking the
744  * minor number.
745  */
746 int
747 nullopen(struct dev_open_args *ap)
748 {
749 	return (0);
750 }
751 
752 int
753 nullclose(struct dev_close_args *ap)
754 {
755 	return (0);
756 }
757 
758