xref: /dragonfly/sys/kern/kern_device.c (revision d600454b)
1 /*
2  * Copyright (c) 2003 Matthew Dillon <dillon@backplane.com> All rights reserved.
3  * cdevsw from kern/kern_conf.c Copyright (c) 1995 Terrence R. Lambert
4  * cdevsw from kern/kern_conf.c Copyright (c) 1995 Julian R. Elishcer,
5  *							All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  *
28  * $DragonFly: src/sys/kern/kern_device.c,v 1.16 2006/02/17 19:18:06 dillon Exp $
29  */
30 #include <sys/param.h>
31 #include <sys/kernel.h>
32 #include <sys/sysctl.h>
33 #include <sys/systm.h>
34 #include <sys/module.h>
35 #include <sys/malloc.h>
36 #include <sys/conf.h>
37 #include <sys/bio.h>
38 #include <sys/buf.h>
39 #include <sys/vnode.h>
40 #include <sys/queue.h>
41 #include <sys/msgport.h>
42 #include <sys/device.h>
43 #include <machine/stdarg.h>
44 #include <sys/proc.h>
45 #include <sys/thread2.h>
46 #include <sys/msgport2.h>
47 
48 static struct cdevlink 	*cdevbase[NUMCDEVSW];
49 
50 static int cdevsw_putport(lwkt_port_t port, lwkt_msg_t msg);
51 
52 struct cdevsw dead_cdevsw;
53 
54 /*
55  * Initialize a message port to serve as the default message-handling port
56  * for device operations.  This message port provides compatibility with
57  * traditional cdevsw dispatch functions by running them synchronously.
58  *
59  * YYY NOTE: ms_cmd can now hold a function pointer, should this code be
60  * converted from an integer op to a function pointer with a flag to
61  * indicate legacy operation?
62  */
63 static void
64 init_default_cdevsw_port(lwkt_port_t port)
65 {
66     lwkt_initport(port, NULL);
67     port->mp_putport = cdevsw_putport;
68 }
69 
70 static
71 int
72 cdevsw_putport(lwkt_port_t port, lwkt_msg_t lmsg)
73 {
74     cdevallmsg_t msg = (cdevallmsg_t)lmsg;
75     struct cdevsw *devsw = msg->am_msg.dev->si_devsw;
76     int error;
77 
78     /*
79      * Run the device switch function synchronously in the context of the
80      * caller and return a synchronous error code (anything not EASYNC).
81      */
82     switch(msg->am_lmsg.ms_cmd.cm_op) {
83     case CDEV_CMD_OPEN:
84 	error = devsw->old_open(
85 		    msg->am_open.msg.dev,
86 		    msg->am_open.oflags,
87 		    msg->am_open.devtype,
88 		    msg->am_open.td);
89 	break;
90     case CDEV_CMD_CLOSE:
91 	error = devsw->old_close(
92 		    msg->am_close.msg.dev,
93 		    msg->am_close.fflag,
94 		    msg->am_close.devtype,
95 		    msg->am_close.td);
96 	break;
97     case CDEV_CMD_STRATEGY:
98 	devsw->old_strategy(msg->am_strategy.msg.dev, msg->am_strategy.bio);
99 	error = 0;
100 	break;
101     case CDEV_CMD_IOCTL:
102 	error = devsw->old_ioctl(
103 		    msg->am_ioctl.msg.dev,
104 		    msg->am_ioctl.cmd,
105 		    msg->am_ioctl.data,
106 		    msg->am_ioctl.fflag,
107 		    msg->am_ioctl.td);
108 	break;
109     case CDEV_CMD_DUMP:
110 	error = devsw->old_dump(
111 		    msg->am_dump.msg.dev,
112 		    msg->am_dump.count,
113 		    msg->am_dump.blkno,
114 		    msg->am_dump.secsize);
115 	break;
116     case CDEV_CMD_PSIZE:
117 	msg->am_psize.result = devsw->old_psize(msg->am_psize.msg.dev);
118 	error = 0;	/* XXX */
119 	break;
120     case CDEV_CMD_READ:
121 	error = devsw->old_read(
122 		    msg->am_read.msg.dev,
123 		    msg->am_read.uio,
124 		    msg->am_read.ioflag);
125 	break;
126     case CDEV_CMD_WRITE:
127 	error = devsw->old_write(
128 		    msg->am_read.msg.dev,
129 		    msg->am_read.uio,
130 		    msg->am_read.ioflag);
131 	break;
132     case CDEV_CMD_POLL:
133 	msg->am_poll.events = devsw->old_poll(
134 				msg->am_poll.msg.dev,
135 				msg->am_poll.events,
136 				msg->am_poll.td);
137 	error = 0;
138 	break;
139     case CDEV_CMD_KQFILTER:
140 	msg->am_kqfilter.result = devsw->old_kqfilter(
141 				msg->am_kqfilter.msg.dev,
142 				msg->am_kqfilter.kn);
143 	error = 0;
144 	break;
145     case CDEV_CMD_MMAP:
146 	msg->am_mmap.result = devsw->old_mmap(
147 		    msg->am_mmap.msg.dev,
148 		    msg->am_mmap.offset,
149 		    msg->am_mmap.nprot);
150 	error = 0;	/* XXX */
151 	break;
152     default:
153 	error = ENOSYS;
154 	break;
155     }
156     KKASSERT(error != EASYNC);
157     return(error);
158 }
159 
160 static __inline
161 lwkt_port_t
162 _init_cdevmsg(dev_t dev, cdevmsg_t msg, int cmd)
163 {
164     lwkt_initmsg_simple(&msg->msg, cmd);
165     msg->dev = dev;
166     return(dev->si_port);
167 }
168 
169 int
170 dev_dopen(dev_t dev, int oflags, int devtype, thread_t td)
171 {
172     struct cdevmsg_open	msg;
173     lwkt_port_t port;
174 
175     port = _init_cdevmsg(dev, &msg.msg, CDEV_CMD_OPEN);
176     if (port == NULL)
177 	return(ENXIO);
178     msg.oflags = oflags;
179     msg.devtype = devtype;
180     msg.td = td;
181     return(lwkt_domsg(port, &msg.msg.msg));
182 }
183 
184 int
185 dev_dclose(dev_t dev, int fflag, int devtype, thread_t td)
186 {
187     struct cdevmsg_close msg;
188     lwkt_port_t port;
189 
190     port = _init_cdevmsg(dev, &msg.msg, CDEV_CMD_CLOSE);
191     if (port == NULL)
192 	return(ENXIO);
193     msg.fflag = fflag;
194     msg.devtype = devtype;
195     msg.td = td;
196     return(lwkt_domsg(port, &msg.msg.msg));
197 }
198 
199 /*
200  * Core device strategy call, used to issue I/O on a device.  There are
201  * two versions, a non-chained version and a chained version.  The chained
202  * version reuses a BIO set up by vn_strategy().  The only difference is
203  * that, for now, we do not push a new tracking structure when chaining
204  * from vn_strategy.  XXX this will ultimately have to change.
205  */
206 void
207 dev_dstrategy(dev_t dev, struct bio *bio)
208 {
209     struct cdevmsg_strategy msg;
210     struct bio_track *track;
211     lwkt_port_t port;
212 
213     KKASSERT(bio->bio_track == NULL);
214     if (bio->bio_buf->b_flags & B_READ)
215 	track = &dev->si_track_read;
216     else
217 	track = &dev->si_track_write;
218     atomic_add_int(&track->bk_active, 1);
219     bio->bio_track = track;
220 
221     port = _init_cdevmsg(dev, &msg.msg, CDEV_CMD_STRATEGY);
222     KKASSERT(port);	/* 'nostrategy' function is NULL YYY */
223     msg.bio = bio;
224     lwkt_domsg(port, &msg.msg.msg);
225 }
226 
227 void
228 dev_dstrategy_chain(dev_t dev, struct bio *bio)
229 {
230     struct cdevmsg_strategy msg;
231     lwkt_port_t port;
232 
233     KKASSERT(bio->bio_track != NULL);
234     port = _init_cdevmsg(dev, &msg.msg, CDEV_CMD_STRATEGY);
235     KKASSERT(port);	/* 'nostrategy' function is NULL YYY */
236     msg.bio = bio;
237     lwkt_domsg(port, &msg.msg.msg);
238 }
239 
240 int
241 dev_dioctl(dev_t dev, u_long cmd, caddr_t data, int fflag, thread_t td)
242 {
243     struct cdevmsg_ioctl msg;
244     lwkt_port_t port;
245 
246     port = _init_cdevmsg(dev, &msg.msg, CDEV_CMD_IOCTL);
247     if (port == NULL)
248 	return(ENXIO);
249     msg.cmd = cmd;
250     msg.data = data;
251     msg.fflag = fflag;
252     msg.td = td;
253     return(lwkt_domsg(port, &msg.msg.msg));
254 }
255 
256 /*
257  * note: the disk layer is expected to set count, blkno, and secsize before
258  * forwarding the message.
259  */
260 int
261 dev_ddump(dev_t dev)
262 {
263     struct cdevmsg_dump	msg;
264     lwkt_port_t port;
265 
266     port = _init_cdevmsg(dev, &msg.msg, CDEV_CMD_DUMP);
267     if (port == NULL)
268 	return(ENXIO);
269     msg.count = 0;
270     msg.blkno = 0;
271     msg.secsize = 0;
272     return(lwkt_domsg(port, &msg.msg.msg));
273 }
274 
275 int
276 dev_dpsize(dev_t dev)
277 {
278     struct cdevmsg_psize msg;
279     lwkt_port_t port;
280     int error;
281 
282     port = _init_cdevmsg(dev, &msg.msg, CDEV_CMD_PSIZE);
283     if (port == NULL)
284 	return(-1);
285     error = lwkt_domsg(port, &msg.msg.msg);
286     if (error == 0)
287 	return(msg.result);
288     return(-1);
289 }
290 
291 int
292 dev_dread(dev_t dev, struct uio *uio, int ioflag)
293 {
294     struct cdevmsg_read msg;
295     lwkt_port_t port;
296     int error;
297 
298     port = _init_cdevmsg(dev, &msg.msg, CDEV_CMD_READ);
299     if (port == NULL)
300 	return(ENXIO);
301     msg.uio = uio;
302     msg.ioflag = ioflag;
303     error = lwkt_domsg(port, &msg.msg.msg);
304     if (error == 0)
305 	dev->si_lastread = time_second;
306     return (error);
307 }
308 
309 int
310 dev_dwrite(dev_t dev, struct uio *uio, int ioflag)
311 {
312     struct cdevmsg_write msg;
313     lwkt_port_t port;
314 
315     port = _init_cdevmsg(dev, &msg.msg, CDEV_CMD_WRITE);
316     if (port == NULL)
317 	return(ENXIO);
318     dev->si_lastwrite = time_second;
319     msg.uio = uio;
320     msg.ioflag = ioflag;
321     return(lwkt_domsg(port, &msg.msg.msg));
322 }
323 
324 int
325 dev_dpoll(dev_t dev, int events, thread_t td)
326 {
327     struct cdevmsg_poll msg;
328     lwkt_port_t port;
329     int error;
330 
331     port = _init_cdevmsg(dev, &msg.msg, CDEV_CMD_POLL);
332     if (port == NULL)
333 	return(ENXIO);
334     msg.events = events;
335     msg.td = td;
336     error = lwkt_domsg(port, &msg.msg.msg);
337     if (error == 0)
338 	return(msg.events);
339     return(seltrue(dev, msg.events, td));
340 }
341 
342 int
343 dev_dkqfilter(dev_t dev, struct knote *kn)
344 {
345     struct cdevmsg_kqfilter msg;
346     lwkt_port_t port;
347     int error;
348 
349     port = _init_cdevmsg(dev, &msg.msg, CDEV_CMD_KQFILTER);
350     if (port == NULL)
351 	return(ENXIO);
352     msg.kn = kn;
353     error = lwkt_domsg(port, &msg.msg.msg);
354     if (error == 0)
355 	return(msg.result);
356     return(ENODEV);
357 }
358 
359 int
360 dev_dmmap(dev_t dev, vm_offset_t offset, int nprot)
361 {
362     struct cdevmsg_mmap msg;
363     lwkt_port_t port;
364     int error;
365 
366     port = _init_cdevmsg(dev, &msg.msg, CDEV_CMD_MMAP);
367     if (port == NULL)
368 	return(-1);
369     msg.offset = offset;
370     msg.nprot = nprot;
371     error = lwkt_domsg(port, &msg.msg.msg);
372     if (error == 0)
373 	return(msg.result);
374     return(-1);
375 }
376 
377 const char *
378 dev_dname(dev_t dev)
379 {
380     return(dev->si_devsw->d_name);
381 }
382 
383 int
384 dev_dflags(dev_t dev)
385 {
386     return(dev->si_devsw->d_flags);
387 }
388 
389 int
390 dev_dmaj(dev_t dev)
391 {
392     return(dev->si_devsw->d_maj);
393 }
394 
395 lwkt_port_t
396 dev_dport(dev_t dev)
397 {
398     return(dev->si_port);
399 }
400 
401 /*
402  * Convert a cdevsw template into the real thing, filling in fields the
403  * device left empty with appropriate defaults.
404  */
405 void
406 compile_devsw(struct cdevsw *devsw)
407 {
408     static lwkt_port devsw_compat_port;
409 
410     if (devsw_compat_port.mp_putport == NULL)
411 	init_default_cdevsw_port(&devsw_compat_port);
412 
413     if (devsw->old_open == NULL)
414 	devsw->old_open = noopen;
415     if (devsw->old_close == NULL)
416 	devsw->old_close = noclose;
417     if (devsw->old_read == NULL)
418 	devsw->old_read = noread;
419     if (devsw->old_write == NULL)
420 	devsw->old_write = nowrite;
421     if (devsw->old_ioctl == NULL)
422 	devsw->old_ioctl = noioctl;
423     if (devsw->old_poll == NULL)
424 	devsw->old_poll = nopoll;
425     if (devsw->old_mmap == NULL)
426 	devsw->old_mmap = nommap;
427     if (devsw->old_strategy == NULL)
428 	devsw->old_strategy = nostrategy;
429     if (devsw->old_dump == NULL)
430 	devsw->old_dump = nodump;
431     if (devsw->old_psize == NULL)
432 	devsw->old_psize = nopsize;
433     if (devsw->old_kqfilter == NULL)
434 	devsw->old_kqfilter = nokqfilter;
435 
436     if (devsw->d_port == NULL)
437 	devsw->d_port = &devsw_compat_port;
438     if (devsw->d_clone == NULL)
439 	devsw->d_clone = noclone;
440 }
441 
442 /*
443  * This makes a cdevsw entry visible to userland (e.g /dev/<blah>).
444  *
445  * The kernel can overload a major number by making multiple cdevsw_add()
446  * calls, but only the most recent one (the first one in the cdevbase[] list
447  * matching the mask/match) will be visible to userland.  make_dev() does
448  * not automatically call cdevsw_add() (nor do we want it to, since
449  * partition-managed disk devices are overloaded on top of the raw device).
450  *
451  * Disk devices typically register their major, e.g. 'ad0', and then call
452  * into the disk label management code which overloads its own onto e.g. 'ad0'
453  * to support all the various slice and partition combinations.
454  *
455  * The mask/match supplied in this call are a full 32 bits and the same
456  * mask and match must be specified in a later cdevsw_remove() call to
457  * match this add.  However, the match value for the minor number should never
458  * have any bits set in the major number's bit range (8-15).  The mask value
459  * may be conveniently specified as -1 without creating any major number
460  * interference.
461  */
462 int
463 cdevsw_add(struct cdevsw *devsw, u_int mask, u_int match)
464 {
465     int maj;
466     struct cdevlink *link;
467 
468     compile_devsw(devsw);
469     maj = devsw->d_maj;
470     if (maj < 0 || maj >= NUMCDEVSW) {
471 	printf("%s: ERROR: driver has bogus cdevsw->d_maj = %d\n",
472 	    devsw->d_name, maj);
473 	return (EINVAL);
474     }
475     for (link = cdevbase[maj]; link; link = link->next) {
476 	/*
477 	 * If we get an exact match we usurp the target, but we only print
478 	 * a warning message if a different device switch is installed.
479 	 */
480 	if (link->mask == mask && link->match == match) {
481 	    if (link->devsw != devsw) {
482 		    printf("WARNING: \"%s\" (%p) is usurping \"%s\"'s (%p)"
483 			" cdevsw[]\n",
484 			devsw->d_name, devsw,
485 			link->devsw->d_name, link->devsw);
486 		    link->devsw = devsw;
487 		    ++devsw->d_refs;
488 	    }
489 	    return(0);
490 	}
491 	/*
492 	 * XXX add additional warnings for overlaps
493 	 */
494     }
495 
496     link = malloc(sizeof(struct cdevlink), M_DEVBUF, M_INTWAIT|M_ZERO);
497     link->mask = mask;
498     link->match = match;
499     link->devsw = devsw;
500     link->next = cdevbase[maj];
501     cdevbase[maj] = link;
502     ++devsw->d_refs;
503     return(0);
504 }
505 
506 /*
507  * Should only be used by udev2dev().
508  *
509  * If the minor number is -1, we match the first cdevsw we find for this
510  * major.   If the mask is not -1 then multiple minor numbers can match
511  * the same devsw.
512  *
513  * Note that this function will return NULL if the minor number is not within
514  * the bounds of the installed mask(s).
515  *
516  * The specified minor number should NOT include any major bits.
517  */
518 struct cdevsw *
519 cdevsw_get(int x, int y)
520 {
521     struct cdevlink *link;
522 
523     if (x < 0 || x >= NUMCDEVSW)
524 	return(NULL);
525     for (link = cdevbase[x]; link; link = link->next) {
526 	if (y == -1 || (link->mask & y) == link->match)
527 	    return(link->devsw);
528     }
529     return(NULL);
530 }
531 
532 /*
533  * Use the passed cdevsw as a template to create our intercept cdevsw,
534  * and install and return ours.
535  */
536 struct cdevsw *
537 cdevsw_add_override(dev_t backing_dev, u_int mask, u_int match)
538 {
539     struct cdevsw *devsw;
540     struct cdevsw *bsw = backing_dev->si_devsw;
541 
542     devsw = malloc(sizeof(struct cdevsw), M_DEVBUF, M_INTWAIT|M_ZERO);
543     devsw->d_name = bsw->d_name;
544     devsw->d_maj = bsw->d_maj;
545     devsw->d_flags = bsw->d_flags;
546     compile_devsw(devsw);
547     cdevsw_add(devsw, mask, match);
548 
549     return(devsw);
550 }
551 
552 /*
553  * Override a device's port, returning the previously installed port.  This
554  * is XXX very dangerous.
555  */
556 lwkt_port_t
557 cdevsw_dev_override(dev_t dev, lwkt_port_t port)
558 {
559     lwkt_port_t oport;
560 
561     oport = dev->si_port;
562     dev->si_port = port;
563     return(oport);
564 }
565 
566 /*
567  * Remove a cdevsw entry from the cdevbase[] major array so no new user opens
568  * can be performed, and destroy all devices installed in the hash table
569  * which are associated with this cdevsw.  (see destroy_all_dev()).
570  */
571 int
572 cdevsw_remove(struct cdevsw *devsw, u_int mask, u_int match)
573 {
574     int maj = devsw->d_maj;
575     struct cdevlink *link;
576     struct cdevlink **plink;
577 
578     if (maj < 0 || maj >= NUMCDEVSW) {
579 	printf("%s: ERROR: driver has bogus cdevsw->d_maj = %d\n",
580 	    devsw->d_name, maj);
581 	return EINVAL;
582     }
583     if (devsw != &dead_cdevsw)
584 	destroy_all_dev(devsw, mask, match);
585     for (plink = &cdevbase[maj]; (link = *plink) != NULL; plink = &link->next) {
586 	if (link->mask == mask && link->match == match) {
587 	    if (link->devsw == devsw)
588 		break;
589 	    printf("%s: ERROR: cannot remove from cdevsw[], its major"
590 		    " number %d was stolen by %s\n",
591 		    devsw->d_name, maj,
592 		    link->devsw->d_name
593 	    );
594 	}
595     }
596     if (link == NULL) {
597 	printf("%s(%d)[%08x/%08x]: WARNING: cdevsw removed multiple times!\n",
598 		devsw->d_name, maj, mask, match);
599     } else {
600 	*plink = link->next;
601 	--devsw->d_refs; /* XXX cdevsw_release() / record refs */
602 	free(link, M_DEVBUF);
603     }
604     if (cdevbase[maj] == NULL && devsw->d_refs != 0) {
605 	printf("%s(%d)[%08x/%08x]: Warning: cdevsw_remove() called while "
606 		"%d device refs still exist!\n",
607 		devsw->d_name, maj, mask, match, devsw->d_refs);
608     } else {
609 	printf("%s: cdevsw removed\n", devsw->d_name);
610     }
611     return 0;
612 }
613 
614 /*
615  * Release a cdevsw entry.  When the ref count reaches zero, recurse
616  * through the stack.
617  */
618 void
619 cdevsw_release(struct cdevsw *devsw)
620 {
621     --devsw->d_refs;
622     if (devsw->d_refs == 0) {
623 	/* XXX */
624     }
625 }
626 
627