xref: /dragonfly/sys/kern/kern_device.c (revision f02303f9)
1 /*
2  * Copyright (c) 2003 Matthew Dillon <dillon@backplane.com> All rights reserved.
3  * cdevsw from kern/kern_conf.c Copyright (c) 1995 Terrence R. Lambert
4  * cdevsw from kern/kern_conf.c Copyright (c) 1995 Julian R. Elishcer,
5  *							All rights reserved.
6  * Copyright (c) 1982, 1986, 1991, 1993
7  *	The Regents of the University of California.  All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  *
30  * $DragonFly: src/sys/kern/kern_device.c,v 1.22 2006/12/23 00:35:04 swildner Exp $
31  */
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/kernel.h>
35 #include <sys/sysctl.h>
36 #include <sys/systm.h>
37 #include <sys/module.h>
38 #include <sys/malloc.h>
39 #include <sys/conf.h>
40 #include <sys/bio.h>
41 #include <sys/buf.h>
42 #include <sys/vnode.h>
43 #include <sys/queue.h>
44 #include <sys/device.h>
45 #include <sys/syslink.h>
46 #include <sys/proc.h>
47 #include <machine/stdarg.h>
48 #include <sys/thread2.h>
49 
50 /*
51  * system link descriptors identify the command in the
52  * arguments structure.
53  */
54 #define DDESCNAME(name) __CONCAT(__CONCAT(dev_,name),_desc)
55 
56 #define DEVOP_DESC_INIT(name)						\
57 	    struct syslink_desc DDESCNAME(name) = {			\
58 		__offsetof(struct dev_ops, __CONCAT(d_, name)),	\
59 	    #name }
60 
61 DEVOP_DESC_INIT(default);
62 DEVOP_DESC_INIT(open);
63 DEVOP_DESC_INIT(close);
64 DEVOP_DESC_INIT(read);
65 DEVOP_DESC_INIT(write);
66 DEVOP_DESC_INIT(ioctl);
67 DEVOP_DESC_INIT(dump);
68 DEVOP_DESC_INIT(psize);
69 DEVOP_DESC_INIT(poll);
70 DEVOP_DESC_INIT(mmap);
71 DEVOP_DESC_INIT(strategy);
72 DEVOP_DESC_INIT(kqfilter);
73 DEVOP_DESC_INIT(clone);
74 
75 /*
76  * Misc default ops
77  */
78 struct dev_ops dead_dev_ops;
79 
80 struct dev_ops default_dev_ops = {
81 	{ "null" },
82 	.d_default = NULL,	/* must be NULL */
83 	.d_open = noopen,
84 	.d_close = noclose,
85 	.d_read = noread,
86 	.d_write = nowrite,
87 	.d_ioctl = noioctl,
88 	.d_poll = nopoll,
89 	.d_mmap = nommap,
90 	.d_strategy = nostrategy,
91 	.d_dump = nodump,
92 	.d_psize = nopsize,
93 	.d_kqfilter = nokqfilter,
94 	.d_clone = noclone
95 };
96 
97 /*
98  * This is used to look-up devices
99  */
100 static struct dev_ops_link *dev_ops_array[NUMCDEVSW];
101 
102 /************************************************************************
103  *			GENERAL DEVICE API FUNCTIONS			*
104  ************************************************************************/
105 
106 int
107 dev_dopen(cdev_t dev, int oflags, int devtype, struct ucred *cred)
108 {
109 	struct dev_open_args ap;
110 
111 	ap.a_head.a_desc = &dev_open_desc;
112 	ap.a_head.a_dev = dev;
113 	ap.a_oflags = oflags;
114 	ap.a_devtype = devtype;
115 	ap.a_cred = cred;
116 	return(dev->si_ops->d_open(&ap));
117 }
118 
119 int
120 dev_dclose(cdev_t dev, int fflag, int devtype)
121 {
122 	struct dev_close_args ap;
123 
124 	ap.a_head.a_desc = &dev_close_desc;
125 	ap.a_head.a_dev = dev;
126 	ap.a_fflag = fflag;
127 	ap.a_devtype = devtype;
128 	return(dev->si_ops->d_close(&ap));
129 }
130 
131 int
132 dev_dread(cdev_t dev, struct uio *uio, int ioflag)
133 {
134 	struct dev_read_args ap;
135 	int error;
136 
137 	ap.a_head.a_desc = &dev_read_desc;
138 	ap.a_head.a_dev = dev;
139 	ap.a_uio = uio;
140 	ap.a_ioflag = ioflag;
141 	error = dev->si_ops->d_read(&ap);
142 	if (error == 0)
143 		dev->si_lastread = time_second;
144 	return (error);
145 }
146 
147 int
148 dev_dwrite(cdev_t dev, struct uio *uio, int ioflag)
149 {
150 	struct dev_write_args ap;
151 	int error;
152 
153 	dev->si_lastwrite = time_second;
154 	ap.a_head.a_desc = &dev_write_desc;
155 	ap.a_head.a_dev = dev;
156 	ap.a_uio = uio;
157 	ap.a_ioflag = ioflag;
158 	error = dev->si_ops->d_write(&ap);
159 	return (error);
160 }
161 
162 int
163 dev_dioctl(cdev_t dev, u_long cmd, caddr_t data, int fflag, struct ucred *cred)
164 {
165 	struct dev_ioctl_args ap;
166 
167 	ap.a_head.a_desc = &dev_ioctl_desc;
168 	ap.a_head.a_dev = dev;
169 	ap.a_cmd = cmd;
170 	ap.a_data = data;
171 	ap.a_fflag = fflag;
172 	ap.a_cred = cred;
173 	return(dev->si_ops->d_ioctl(&ap));
174 }
175 
176 int
177 dev_dpoll(cdev_t dev, int events)
178 {
179 	struct dev_poll_args ap;
180 	int error;
181 
182 	ap.a_head.a_desc = &dev_poll_desc;
183 	ap.a_head.a_dev = dev;
184 	ap.a_events = events;
185 	error = dev->si_ops->d_poll(&ap);
186 	if (error == 0)
187 		return(ap.a_events);
188 	return (seltrue(dev, events));
189 }
190 
191 int
192 dev_dmmap(cdev_t dev, vm_offset_t offset, int nprot)
193 {
194 	struct dev_mmap_args ap;
195 	int error;
196 
197 	ap.a_head.a_desc = &dev_mmap_desc;
198 	ap.a_head.a_dev = dev;
199 	ap.a_offset = offset;
200 	ap.a_nprot = nprot;
201 	error = dev->si_ops->d_mmap(&ap);
202 	if (error == 0)
203 		return(ap.a_result);
204 	return(-1);
205 }
206 
207 int
208 dev_dclone(cdev_t dev)
209 {
210 	struct dev_clone_args ap;
211 
212 	ap.a_head.a_desc = &dev_clone_desc;
213 	ap.a_head.a_dev = dev;
214 	return (dev->si_ops->d_clone(&ap));
215 }
216 
217 /*
218  * Core device strategy call, used to issue I/O on a device.  There are
219  * two versions, a non-chained version and a chained version.  The chained
220  * version reuses a BIO set up by vn_strategy().  The only difference is
221  * that, for now, we do not push a new tracking structure when chaining
222  * from vn_strategy.  XXX this will ultimately have to change.
223  */
224 void
225 dev_dstrategy(cdev_t dev, struct bio *bio)
226 {
227 	struct dev_strategy_args ap;
228 	struct bio_track *track;
229 
230 	ap.a_head.a_desc = &dev_strategy_desc;
231 	ap.a_head.a_dev = dev;
232 	ap.a_bio = bio;
233 
234 	KKASSERT(bio->bio_track == NULL);
235 	KKASSERT(bio->bio_buf->b_cmd != BUF_CMD_DONE);
236 	if (bio->bio_buf->b_cmd == BUF_CMD_READ)
237 	    track = &dev->si_track_read;
238 	else
239 	    track = &dev->si_track_write;
240 	atomic_add_int(&track->bk_active, 1);
241 	bio->bio_track = track;
242 	(void)dev->si_ops->d_strategy(&ap);
243 }
244 
245 void
246 dev_dstrategy_chain(cdev_t dev, struct bio *bio)
247 {
248 	struct dev_strategy_args ap;
249 
250 	KKASSERT(bio->bio_track != NULL);
251 	ap.a_head.a_desc = &dev_strategy_desc;
252 	ap.a_head.a_dev = dev;
253 	ap.a_bio = bio;
254 	(void)dev->si_ops->d_strategy(&ap);
255 }
256 
257 /*
258  * note: the disk layer is expected to set count, blkno, and secsize before
259  * forwarding the message.
260  */
261 int
262 dev_ddump(cdev_t dev)
263 {
264 	struct dev_dump_args ap;
265 
266 	ap.a_head.a_desc = &dev_dump_desc;
267 	ap.a_head.a_dev = dev;
268 	ap.a_count = 0;
269 	ap.a_blkno = 0;
270 	ap.a_secsize = 0;
271 	return(dev->si_ops->d_dump(&ap));
272 }
273 
274 int
275 dev_dpsize(cdev_t dev)
276 {
277 	struct dev_psize_args ap;
278 	int error;
279 
280 	ap.a_head.a_desc = &dev_psize_desc;
281 	ap.a_head.a_dev = dev;
282 	error = dev->si_ops->d_psize(&ap);
283 	if (error == 0)
284 		return (ap.a_result);
285 	return(-1);
286 }
287 
288 int
289 dev_dkqfilter(cdev_t dev, struct knote *kn)
290 {
291 	struct dev_kqfilter_args ap;
292 	int error;
293 
294 	ap.a_head.a_desc = &dev_kqfilter_desc;
295 	ap.a_head.a_dev = dev;
296 	ap.a_kn = kn;
297 	error = dev->si_ops->d_kqfilter(&ap);
298 	if (error == 0)
299 		return(ap.a_result);
300 	return(ENODEV);
301 }
302 
303 /************************************************************************
304  *			DEVICE HELPER FUNCTIONS				*
305  ************************************************************************/
306 
307 const char *
308 dev_dname(cdev_t dev)
309 {
310     return(dev->si_ops->head.name);
311 }
312 
313 int
314 dev_dflags(cdev_t dev)
315 {
316     return(dev->si_ops->head.flags);
317 }
318 
319 int
320 dev_dmaj(cdev_t dev)
321 {
322     return(dev->si_ops->head.maj);
323 }
324 
325 /*
326  * Used when forwarding a request through layers.  The caller adjusts
327  * ap->a_head.a_dev and then calls this function.
328  */
329 int
330 dev_doperate(struct dev_generic_args *ap)
331 {
332     int (*func)(struct dev_generic_args *);
333 
334     func = *(void **)((char *)ap->a_dev->si_ops + ap->a_desc->sd_offset);
335     return (func(ap));
336 }
337 
338 /*
339  * Used by the console intercept code only.  Issue an operation through
340  * a foreign ops structure allowing the ops structure associated
341  * with the device to remain intact.
342  */
343 int
344 dev_doperate_ops(struct dev_ops *ops, struct dev_generic_args *ap)
345 {
346     int (*func)(struct dev_generic_args *);
347 
348     func = *(void **)((char *)ops + ap->a_desc->sd_offset);
349     return (func(ap));
350 }
351 
352 /*
353  * Convert a template dev_ops into the real thing by filling in
354  * uninitialized fields.
355  */
356 void
357 compile_dev_ops(struct dev_ops *ops)
358 {
359 	int offset;
360 
361 	for (offset = offsetof(struct dev_ops, dev_ops_first_field);
362 	     offset <= offsetof(struct dev_ops, dev_ops_last_field);
363 	     offset += sizeof(void *)
364 	) {
365 		void **func_p = (void **)((char *)ops + offset);
366 		void **def_p = (void **)((char *)&default_dev_ops + offset);
367 		if (*func_p == NULL) {
368 			if (ops->d_default)
369 				*func_p = ops->d_default;
370 			else
371 				*func_p = *def_p;
372 		}
373 	}
374 }
375 
376 /************************************************************************
377  *			MAJOR/MINOR SPACE FUNCTION 			*
378  ************************************************************************/
379 
380 /*
381  * This makes a dev_ops entry visible to userland (e.g /dev/<blah>).
382  *
383  * The kernel can overload a major number by making multiple dev_ops_add()
384  * calls, but only the most recent one (the first one in the dev_ops_array[]
385  * list matching the mask/match) will be visible to userland.  make_dev() does
386  * not automatically call dev_ops_add() (nor do we want it to, since
387  * partition-managed disk devices are overloaded on top of the raw device).
388  *
389  * Disk devices typically register their major, e.g. 'ad0', and then call
390  * into the disk label management code which overloads its own onto e.g. 'ad0'
391  * to support all the various slice and partition combinations.
392  *
393  * The mask/match supplied in this call are a full 32 bits and the same
394  * mask and match must be specified in a later dev_ops_remove() call to
395  * match this add.  However, the match value for the minor number should never
396  * have any bits set in the major number's bit range (8-15).  The mask value
397  * may be conveniently specified as -1 without creating any major number
398  * interference.
399  */
400 int
401 dev_ops_add(struct dev_ops *ops, u_int mask, u_int match)
402 {
403     int maj;
404     struct dev_ops_link *link;
405 
406     compile_dev_ops(ops);
407     maj = ops->head.maj;
408     if (maj < 0 || maj >= NUMCDEVSW) {
409 	    kprintf("%s: ERROR: driver has bogus dev_ops->head.maj = %d\n",
410 		   ops->head.name, maj);
411 	    return (EINVAL);
412     }
413     for (link = dev_ops_array[maj]; link; link = link->next) {
414 	    /*
415 	     * If we get an exact match we usurp the target, but we only print
416 	     * a warning message if a different device switch is installed.
417 	     */
418 	    if (link->mask == mask && link->match == match) {
419 		    if (link->ops != ops) {
420 			    kprintf("WARNING: \"%s\" (%p) is usurping \"%s\"'s"
421 				" (%p) dev_ops_array[]\n",
422 				ops->head.name, ops,
423 				link->ops->head.name, link->ops);
424 			    link->ops = ops;
425 			    ++ops->head.refs;
426 		    }
427 		    return(0);
428 	    }
429 	    /*
430 	     * XXX add additional warnings for overlaps
431 	     */
432     }
433 
434     link = kmalloc(sizeof(struct dev_ops_link), M_DEVBUF, M_INTWAIT|M_ZERO);
435     link->mask = mask;
436     link->match = match;
437     link->ops = ops;
438     link->next = dev_ops_array[maj];
439     dev_ops_array[maj] = link;
440     ++ops->head.refs;
441     return(0);
442 }
443 
444 /*
445  * Should only be used by udev2dev().
446  *
447  * If the minor number is -1, we match the first ops we find for this
448  * major.   If the mask is not -1 then multiple minor numbers can match
449  * the same ops.
450  *
451  * Note that this function will return NULL if the minor number is not within
452  * the bounds of the installed mask(s).
453  *
454  * The specified minor number should NOT include any major bits.
455  */
456 struct dev_ops *
457 dev_ops_get(int x, int y)
458 {
459 	struct dev_ops_link *link;
460 
461 	if (x < 0 || x >= NUMCDEVSW)
462 		return(NULL);
463 	for (link = dev_ops_array[x]; link; link = link->next) {
464 		if (y == -1 || (link->mask & y) == link->match)
465 			return(link->ops);
466 	}
467 	return(NULL);
468 }
469 
470 /*
471  * Take a cookie cutter to the major/minor device space for the passed
472  * device and generate a new dev_ops visible to userland which the caller
473  * can then modify.  The original device is not modified but portions of
474  * its major/minor space will no longer be visible to userland.
475  */
476 struct dev_ops *
477 dev_ops_add_override(cdev_t backing_dev, struct dev_ops *template,
478 		     u_int mask, u_int match)
479 {
480 	struct dev_ops *ops;
481 	struct dev_ops *backing_ops = backing_dev->si_ops;
482 
483 	ops = kmalloc(sizeof(struct dev_ops), M_DEVBUF, M_INTWAIT);
484 	*ops = *template;
485 	ops->head.name = backing_ops->head.name;
486 	ops->head.maj = backing_ops->head.maj;
487 	ops->head.flags = backing_ops->head.flags;
488 	compile_dev_ops(ops);
489 	dev_ops_add(ops, mask, match);
490 
491 	return(ops);
492 }
493 
494 /*
495  * Remove all matching dev_ops entries from the dev_ops_array[] major
496  * array so no new user opens can be performed, and destroy all devices
497  * installed in the hash table that are associated with this dev_ops.  (see
498  * destroy_all_devs()).
499  *
500  * The mask and match should match a previous call to dev_ops_add*().
501  */
502 int
503 dev_ops_remove(struct dev_ops *ops, u_int mask, u_int match)
504 {
505 	int maj = ops->head.maj;
506 	struct dev_ops_link *link;
507 	struct dev_ops_link **plink;
508 
509 	if (maj < 0 || maj >= NUMCDEVSW) {
510 		kprintf("%s: ERROR: driver has bogus ops->d_maj = %d\n",
511 			ops->head.name, maj);
512 		return EINVAL;
513 	}
514 	if (ops != &dead_dev_ops)
515 		destroy_all_devs(ops, mask, match);
516 	for (plink = &dev_ops_array[maj]; (link = *plink) != NULL;
517 	     plink = &link->next) {
518 		if (link->mask == mask && link->match == match) {
519 			if (link->ops == ops)
520 				break;
521 			kprintf("%s: ERROR: cannot remove from dev_ops_array[], "
522 			       "its major number %d was stolen by %s\n",
523 				ops->head.name, maj,
524 				link->ops->head.name
525 			);
526 		}
527 	}
528 	if (link == NULL) {
529 		kprintf("%s(%d)[%08x/%08x]: WARNING: ops removed "
530 		       "multiple times!\n",
531 		       ops->head.name, maj, mask, match);
532 	} else {
533 		*plink = link->next;
534 		--ops->head.refs; /* XXX ops_release() / record refs */
535 		kfree(link, M_DEVBUF);
536 	}
537 	if (dev_ops_array[maj] == NULL && ops->head.refs != 0) {
538 		kprintf("%s(%d)[%08x/%08x]: Warning: dev_ops_remove() called "
539 			"while %d device refs still exist!\n",
540 			ops->head.name, maj, mask, match, ops->head.refs);
541 	} else {
542 		kprintf("%s: ops removed\n", ops->head.name);
543 	}
544 	return 0;
545 }
546 
547 /*
548  * Release a ops entry.  When the ref count reaches zero, recurse
549  * through the stack.
550  */
551 void
552 dev_ops_release(struct dev_ops *ops)
553 {
554     --ops->head.refs;
555     if (ops->head.refs == 0) {
556 	/* XXX */
557     }
558 }
559 
560 struct dev_ops *
561 dev_ops_intercept(cdev_t dev, struct dev_ops *iops)
562 {
563 	struct dev_ops *oops = dev->si_ops;
564 
565 	compile_dev_ops(iops);
566 	iops->head.maj = oops->head.maj;
567 	iops->head.data = oops->head.data;
568 	iops->head.flags = oops->head.flags;
569 	dev->si_ops = iops;
570 	dev->si_flags |= SI_INTERCEPTED;
571 
572 	return (oops);
573 }
574 
575 void
576 dev_ops_restore(cdev_t dev, struct dev_ops *oops)
577 {
578 	struct dev_ops *iops = dev->si_ops;
579 
580 	dev->si_ops = oops;
581 	dev->si_flags &= ~SI_INTERCEPTED;
582 	iops->head.maj = 0;
583 	iops->head.data = NULL;
584 	iops->head.flags = 0;
585 }
586 
587 /************************************************************************
588  *			DEFAULT DEV OPS FUNCTIONS			*
589  ************************************************************************/
590 
591 
592 /*
593  * Unsupported devswitch functions (e.g. for writing to read-only device).
594  * XXX may belong elsewhere.
595  */
596 
597 int
598 noclone(struct dev_clone_args *ap)
599 {
600 	/* take no action */
601 	return (0);	/* allow the clone */
602 }
603 
604 int
605 noopen(struct dev_open_args *ap)
606 {
607 	return (ENODEV);
608 }
609 
610 int
611 noclose(struct dev_close_args *ap)
612 {
613 	return (ENODEV);
614 }
615 
616 int
617 noread(struct dev_read_args *ap)
618 {
619 	return (ENODEV);
620 }
621 
622 int
623 nowrite(struct dev_write_args *ap)
624 {
625 	return (ENODEV);
626 }
627 
628 int
629 noioctl(struct dev_ioctl_args *ap)
630 {
631 	return (ENODEV);
632 }
633 
634 int
635 nokqfilter(struct dev_kqfilter_args *ap)
636 {
637 	return (ENODEV);
638 }
639 
640 int
641 nommap(struct dev_mmap_args *ap)
642 {
643 	return (ENODEV);
644 }
645 
646 int
647 nopoll(struct dev_poll_args *ap)
648 {
649 	ap->a_events = 0;
650 	return(0);
651 }
652 
653 int
654 nostrategy(struct dev_strategy_args *ap)
655 {
656 	struct bio *bio = ap->a_bio;
657 
658 	bio->bio_buf->b_flags |= B_ERROR;
659 	bio->bio_buf->b_error = EOPNOTSUPP;
660 	biodone(bio);
661 	return(0);
662 }
663 
664 int
665 nopsize(struct dev_psize_args *ap)
666 {
667 	ap->a_result = 0;
668 	return(0);
669 }
670 
671 int
672 nodump(struct dev_dump_args *ap)
673 {
674 	return (ENODEV);
675 }
676 
677 /*
678  * XXX this is probably bogus.  Any device that uses it isn't checking the
679  * minor number.
680  */
681 int
682 nullopen(struct dev_open_args *ap)
683 {
684 	return (0);
685 }
686 
687 int
688 nullclose(struct dev_close_args *ap)
689 {
690 	return (0);
691 }
692 
693