xref: /dragonfly/sys/kern/kern_device.c (revision 6bd457ed)
1 /*
2  * Copyright (c) 2003 Matthew Dillon <dillon@backplane.com> All rights reserved.
3  * cdevsw from kern/kern_conf.c Copyright (c) 1995 Terrence R. Lambert
4  * cdevsw from kern/kern_conf.c Copyright (c) 1995 Julian R. Elishcer,
5  *							All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  *
28  * $DragonFly: src/sys/kern/kern_device.c,v 1.15 2005/03/23 02:50:53 dillon Exp $
29  */
30 #include <sys/param.h>
31 #include <sys/kernel.h>
32 #include <sys/sysctl.h>
33 #include <sys/systm.h>
34 #include <sys/module.h>
35 #include <sys/malloc.h>
36 #include <sys/conf.h>
37 #include <sys/vnode.h>
38 #include <sys/queue.h>
39 #include <sys/msgport.h>
40 #include <sys/device.h>
41 #include <machine/stdarg.h>
42 #include <sys/proc.h>
43 #include <sys/thread2.h>
44 #include <sys/msgport2.h>
45 
46 static struct cdevlink 	*cdevbase[NUMCDEVSW];
47 
48 static int cdevsw_putport(lwkt_port_t port, lwkt_msg_t msg);
49 
50 struct cdevsw dead_cdevsw;
51 
52 /*
53  * Initialize a message port to serve as the default message-handling port
54  * for device operations.  This message port provides compatibility with
55  * traditional cdevsw dispatch functions by running them synchronously.
56  *
57  * YYY NOTE: ms_cmd can now hold a function pointer, should this code be
58  * converted from an integer op to a function pointer with a flag to
59  * indicate legacy operation?
60  */
61 static void
62 init_default_cdevsw_port(lwkt_port_t port)
63 {
64     lwkt_initport(port, NULL);
65     port->mp_putport = cdevsw_putport;
66 }
67 
68 static
69 int
70 cdevsw_putport(lwkt_port_t port, lwkt_msg_t lmsg)
71 {
72     cdevallmsg_t msg = (cdevallmsg_t)lmsg;
73     struct cdevsw *devsw = msg->am_msg.dev->si_devsw;
74     int error;
75 
76     /*
77      * Run the device switch function synchronously in the context of the
78      * caller and return a synchronous error code (anything not EASYNC).
79      */
80     switch(msg->am_lmsg.ms_cmd.cm_op) {
81     case CDEV_CMD_OPEN:
82 	error = devsw->old_open(
83 		    msg->am_open.msg.dev,
84 		    msg->am_open.oflags,
85 		    msg->am_open.devtype,
86 		    msg->am_open.td);
87 	break;
88     case CDEV_CMD_CLOSE:
89 	error = devsw->old_close(
90 		    msg->am_close.msg.dev,
91 		    msg->am_close.fflag,
92 		    msg->am_close.devtype,
93 		    msg->am_close.td);
94 	break;
95     case CDEV_CMD_STRATEGY:
96 	devsw->old_strategy(msg->am_strategy.bp);
97 	error = 0;
98 	break;
99     case CDEV_CMD_IOCTL:
100 	error = devsw->old_ioctl(
101 		    msg->am_ioctl.msg.dev,
102 		    msg->am_ioctl.cmd,
103 		    msg->am_ioctl.data,
104 		    msg->am_ioctl.fflag,
105 		    msg->am_ioctl.td);
106 	break;
107     case CDEV_CMD_DUMP:
108 	error = devsw->old_dump(
109 		    msg->am_dump.msg.dev,
110 		    msg->am_dump.count,
111 		    msg->am_dump.blkno,
112 		    msg->am_dump.secsize);
113 	break;
114     case CDEV_CMD_PSIZE:
115 	msg->am_psize.result = devsw->old_psize(msg->am_psize.msg.dev);
116 	error = 0;	/* XXX */
117 	break;
118     case CDEV_CMD_READ:
119 	error = devsw->old_read(
120 		    msg->am_read.msg.dev,
121 		    msg->am_read.uio,
122 		    msg->am_read.ioflag);
123 	break;
124     case CDEV_CMD_WRITE:
125 	error = devsw->old_write(
126 		    msg->am_read.msg.dev,
127 		    msg->am_read.uio,
128 		    msg->am_read.ioflag);
129 	break;
130     case CDEV_CMD_POLL:
131 	msg->am_poll.events = devsw->old_poll(
132 				msg->am_poll.msg.dev,
133 				msg->am_poll.events,
134 				msg->am_poll.td);
135 	error = 0;
136 	break;
137     case CDEV_CMD_KQFILTER:
138 	msg->am_kqfilter.result = devsw->old_kqfilter(
139 				msg->am_kqfilter.msg.dev,
140 				msg->am_kqfilter.kn);
141 	error = 0;
142 	break;
143     case CDEV_CMD_MMAP:
144 	msg->am_mmap.result = devsw->old_mmap(
145 		    msg->am_mmap.msg.dev,
146 		    msg->am_mmap.offset,
147 		    msg->am_mmap.nprot);
148 	error = 0;	/* XXX */
149 	break;
150     default:
151 	error = ENOSYS;
152 	break;
153     }
154     KKASSERT(error != EASYNC);
155     return(error);
156 }
157 
158 static __inline
159 lwkt_port_t
160 _init_cdevmsg(dev_t dev, cdevmsg_t msg, int cmd)
161 {
162     lwkt_initmsg_simple(&msg->msg, cmd);
163     msg->dev = dev;
164     return(dev->si_port);
165 }
166 
167 int
168 dev_dopen(dev_t dev, int oflags, int devtype, thread_t td)
169 {
170     struct cdevmsg_open	msg;
171     lwkt_port_t port;
172 
173     port = _init_cdevmsg(dev, &msg.msg, CDEV_CMD_OPEN);
174     if (port == NULL)
175 	return(ENXIO);
176     msg.oflags = oflags;
177     msg.devtype = devtype;
178     msg.td = td;
179     return(lwkt_domsg(port, &msg.msg.msg));
180 }
181 
182 int
183 dev_dclose(dev_t dev, int fflag, int devtype, thread_t td)
184 {
185     struct cdevmsg_close msg;
186     lwkt_port_t port;
187 
188     port = _init_cdevmsg(dev, &msg.msg, CDEV_CMD_CLOSE);
189     if (port == NULL)
190 	return(ENXIO);
191     msg.fflag = fflag;
192     msg.devtype = devtype;
193     msg.td = td;
194     return(lwkt_domsg(port, &msg.msg.msg));
195 }
196 
197 void
198 dev_dstrategy(dev_t dev, struct buf *bp)
199 {
200     struct cdevmsg_strategy msg;
201     lwkt_port_t port;
202 
203     port = _init_cdevmsg(dev, &msg.msg, CDEV_CMD_STRATEGY);
204     KKASSERT(port);	/* 'nostrategy' function is NULL YYY */
205     msg.bp = bp;
206     lwkt_domsg(port, &msg.msg.msg);
207 }
208 
209 int
210 dev_dioctl(dev_t dev, u_long cmd, caddr_t data, int fflag, thread_t td)
211 {
212     struct cdevmsg_ioctl msg;
213     lwkt_port_t port;
214 
215     port = _init_cdevmsg(dev, &msg.msg, CDEV_CMD_IOCTL);
216     if (port == NULL)
217 	return(ENXIO);
218     msg.cmd = cmd;
219     msg.data = data;
220     msg.fflag = fflag;
221     msg.td = td;
222     return(lwkt_domsg(port, &msg.msg.msg));
223 }
224 
225 /*
226  * note: the disk layer is expected to set count, blkno, and secsize before
227  * forwarding the message.
228  */
229 int
230 dev_ddump(dev_t dev)
231 {
232     struct cdevmsg_dump	msg;
233     lwkt_port_t port;
234 
235     port = _init_cdevmsg(dev, &msg.msg, CDEV_CMD_DUMP);
236     if (port == NULL)
237 	return(ENXIO);
238     msg.count = 0;
239     msg.blkno = 0;
240     msg.secsize = 0;
241     return(lwkt_domsg(port, &msg.msg.msg));
242 }
243 
244 int
245 dev_dpsize(dev_t dev)
246 {
247     struct cdevmsg_psize msg;
248     lwkt_port_t port;
249     int error;
250 
251     port = _init_cdevmsg(dev, &msg.msg, CDEV_CMD_PSIZE);
252     if (port == NULL)
253 	return(-1);
254     error = lwkt_domsg(port, &msg.msg.msg);
255     if (error == 0)
256 	return(msg.result);
257     return(-1);
258 }
259 
260 int
261 dev_dread(dev_t dev, struct uio *uio, int ioflag)
262 {
263     struct cdevmsg_read msg;
264     lwkt_port_t port;
265     int error;
266 
267     port = _init_cdevmsg(dev, &msg.msg, CDEV_CMD_READ);
268     if (port == NULL)
269 	return(ENXIO);
270     msg.uio = uio;
271     msg.ioflag = ioflag;
272     error = lwkt_domsg(port, &msg.msg.msg);
273     if (error == 0)
274 	dev->si_lastread = time_second;
275     return (error);
276 }
277 
278 int
279 dev_dwrite(dev_t dev, struct uio *uio, int ioflag)
280 {
281     struct cdevmsg_write msg;
282     lwkt_port_t port;
283 
284     port = _init_cdevmsg(dev, &msg.msg, CDEV_CMD_WRITE);
285     if (port == NULL)
286 	return(ENXIO);
287     dev->si_lastwrite = time_second;
288     msg.uio = uio;
289     msg.ioflag = ioflag;
290     return(lwkt_domsg(port, &msg.msg.msg));
291 }
292 
293 int
294 dev_dpoll(dev_t dev, int events, thread_t td)
295 {
296     struct cdevmsg_poll msg;
297     lwkt_port_t port;
298     int error;
299 
300     port = _init_cdevmsg(dev, &msg.msg, CDEV_CMD_POLL);
301     if (port == NULL)
302 	return(ENXIO);
303     msg.events = events;
304     msg.td = td;
305     error = lwkt_domsg(port, &msg.msg.msg);
306     if (error == 0)
307 	return(msg.events);
308     return(seltrue(dev, msg.events, td));
309 }
310 
311 int
312 dev_dkqfilter(dev_t dev, struct knote *kn)
313 {
314     struct cdevmsg_kqfilter msg;
315     lwkt_port_t port;
316     int error;
317 
318     port = _init_cdevmsg(dev, &msg.msg, CDEV_CMD_KQFILTER);
319     if (port == NULL)
320 	return(ENXIO);
321     msg.kn = kn;
322     error = lwkt_domsg(port, &msg.msg.msg);
323     if (error == 0)
324 	return(msg.result);
325     return(ENODEV);
326 }
327 
328 int
329 dev_dmmap(dev_t dev, vm_offset_t offset, int nprot)
330 {
331     struct cdevmsg_mmap msg;
332     lwkt_port_t port;
333     int error;
334 
335     port = _init_cdevmsg(dev, &msg.msg, CDEV_CMD_MMAP);
336     if (port == NULL)
337 	return(-1);
338     msg.offset = offset;
339     msg.nprot = nprot;
340     error = lwkt_domsg(port, &msg.msg.msg);
341     if (error == 0)
342 	return(msg.result);
343     return(-1);
344 }
345 
346 const char *
347 dev_dname(dev_t dev)
348 {
349     return(dev->si_devsw->d_name);
350 }
351 
352 int
353 dev_dflags(dev_t dev)
354 {
355     return(dev->si_devsw->d_flags);
356 }
357 
358 int
359 dev_dmaj(dev_t dev)
360 {
361     return(dev->si_devsw->d_maj);
362 }
363 
364 lwkt_port_t
365 dev_dport(dev_t dev)
366 {
367     return(dev->si_port);
368 }
369 
370 /*
371  * Convert a cdevsw template into the real thing, filling in fields the
372  * device left empty with appropriate defaults.
373  */
374 void
375 compile_devsw(struct cdevsw *devsw)
376 {
377     static lwkt_port devsw_compat_port;
378 
379     if (devsw_compat_port.mp_putport == NULL)
380 	init_default_cdevsw_port(&devsw_compat_port);
381 
382     if (devsw->old_open == NULL)
383 	devsw->old_open = noopen;
384     if (devsw->old_close == NULL)
385 	devsw->old_close = noclose;
386     if (devsw->old_read == NULL)
387 	devsw->old_read = noread;
388     if (devsw->old_write == NULL)
389 	devsw->old_write = nowrite;
390     if (devsw->old_ioctl == NULL)
391 	devsw->old_ioctl = noioctl;
392     if (devsw->old_poll == NULL)
393 	devsw->old_poll = nopoll;
394     if (devsw->old_mmap == NULL)
395 	devsw->old_mmap = nommap;
396     if (devsw->old_strategy == NULL)
397 	devsw->old_strategy = nostrategy;
398     if (devsw->old_dump == NULL)
399 	devsw->old_dump = nodump;
400     if (devsw->old_psize == NULL)
401 	devsw->old_psize = nopsize;
402     if (devsw->old_kqfilter == NULL)
403 	devsw->old_kqfilter = nokqfilter;
404 
405     if (devsw->d_port == NULL)
406 	devsw->d_port = &devsw_compat_port;
407     if (devsw->d_clone == NULL)
408 	devsw->d_clone = noclone;
409 }
410 
411 /*
412  * This makes a cdevsw entry visible to userland (e.g /dev/<blah>).
413  *
414  * The kernel can overload a major number by making multiple cdevsw_add()
415  * calls, but only the most recent one (the first one in the cdevbase[] list
416  * matching the mask/match) will be visible to userland.  make_dev() does
417  * not automatically call cdevsw_add() (nor do we want it to, since
418  * partition-managed disk devices are overloaded on top of the raw device).
419  *
420  * Disk devices typically register their major, e.g. 'ad0', and then call
421  * into the disk label management code which overloads its own onto e.g. 'ad0'
422  * to support all the various slice and partition combinations.
423  *
424  * The mask/match supplied in this call are a full 32 bits and the same
425  * mask and match must be specified in a later cdevsw_remove() call to
426  * match this add.  However, the match value for the minor number should never
427  * have any bits set in the major number's bit range (8-15).  The mask value
428  * may be conveniently specified as -1 without creating any major number
429  * interference.
430  */
431 int
432 cdevsw_add(struct cdevsw *devsw, u_int mask, u_int match)
433 {
434     int maj;
435     struct cdevlink *link;
436 
437     compile_devsw(devsw);
438     maj = devsw->d_maj;
439     if (maj < 0 || maj >= NUMCDEVSW) {
440 	printf("%s: ERROR: driver has bogus cdevsw->d_maj = %d\n",
441 	    devsw->d_name, maj);
442 	return (EINVAL);
443     }
444     for (link = cdevbase[maj]; link; link = link->next) {
445 	/*
446 	 * If we get an exact match we usurp the target, but we only print
447 	 * a warning message if a different device switch is installed.
448 	 */
449 	if (link->mask == mask && link->match == match) {
450 	    if (link->devsw != devsw) {
451 		    printf("WARNING: \"%s\" (%p) is usurping \"%s\"'s (%p)"
452 			" cdevsw[]\n",
453 			devsw->d_name, devsw,
454 			link->devsw->d_name, link->devsw);
455 		    link->devsw = devsw;
456 		    ++devsw->d_refs;
457 	    }
458 	    return(0);
459 	}
460 	/*
461 	 * XXX add additional warnings for overlaps
462 	 */
463     }
464 
465     link = malloc(sizeof(struct cdevlink), M_DEVBUF, M_INTWAIT|M_ZERO);
466     link->mask = mask;
467     link->match = match;
468     link->devsw = devsw;
469     link->next = cdevbase[maj];
470     cdevbase[maj] = link;
471     ++devsw->d_refs;
472     return(0);
473 }
474 
475 /*
476  * Should only be used by udev2dev().
477  *
478  * If the minor number is -1, we match the first cdevsw we find for this
479  * major.   If the mask is not -1 then multiple minor numbers can match
480  * the same devsw.
481  *
482  * Note that this function will return NULL if the minor number is not within
483  * the bounds of the installed mask(s).
484  *
485  * The specified minor number should NOT include any major bits.
486  */
487 struct cdevsw *
488 cdevsw_get(int x, int y)
489 {
490     struct cdevlink *link;
491 
492     if (x < 0 || x >= NUMCDEVSW)
493 	return(NULL);
494     for (link = cdevbase[x]; link; link = link->next) {
495 	if (y == -1 || (link->mask & y) == link->match)
496 	    return(link->devsw);
497     }
498     return(NULL);
499 }
500 
501 /*
502  * Use the passed cdevsw as a template to create our intercept cdevsw,
503  * and install and return ours.
504  */
505 struct cdevsw *
506 cdevsw_add_override(dev_t backing_dev, u_int mask, u_int match)
507 {
508     struct cdevsw *devsw;
509     struct cdevsw *bsw = backing_dev->si_devsw;
510 
511     devsw = malloc(sizeof(struct cdevsw), M_DEVBUF, M_INTWAIT|M_ZERO);
512     devsw->d_name = bsw->d_name;
513     devsw->d_maj = bsw->d_maj;
514     devsw->d_flags = bsw->d_flags;
515     compile_devsw(devsw);
516     cdevsw_add(devsw, mask, match);
517 
518     return(devsw);
519 }
520 
521 /*
522  * Override a device's port, returning the previously installed port.  This
523  * is XXX very dangerous.
524  */
525 lwkt_port_t
526 cdevsw_dev_override(dev_t dev, lwkt_port_t port)
527 {
528     lwkt_port_t oport;
529 
530     oport = dev->si_port;
531     dev->si_port = port;
532     return(oport);
533 }
534 
535 /*
536  * Remove a cdevsw entry from the cdevbase[] major array so no new user opens
537  * can be performed, and destroy all devices installed in the hash table
538  * which are associated with this cdevsw.  (see destroy_all_dev()).
539  */
540 int
541 cdevsw_remove(struct cdevsw *devsw, u_int mask, u_int match)
542 {
543     int maj = devsw->d_maj;
544     struct cdevlink *link;
545     struct cdevlink **plink;
546 
547     if (maj < 0 || maj >= NUMCDEVSW) {
548 	printf("%s: ERROR: driver has bogus cdevsw->d_maj = %d\n",
549 	    devsw->d_name, maj);
550 	return EINVAL;
551     }
552     if (devsw != &dead_cdevsw)
553 	destroy_all_dev(devsw, mask, match);
554     for (plink = &cdevbase[maj]; (link = *plink) != NULL; plink = &link->next) {
555 	if (link->mask == mask && link->match == match) {
556 	    if (link->devsw == devsw)
557 		break;
558 	    printf("%s: ERROR: cannot remove from cdevsw[], its major"
559 		    " number %d was stolen by %s\n",
560 		    devsw->d_name, maj,
561 		    link->devsw->d_name
562 	    );
563 	}
564     }
565     if (link == NULL) {
566 	printf("%s(%d)[%08x/%08x]: WARNING: cdevsw removed multiple times!\n",
567 		devsw->d_name, maj, mask, match);
568     } else {
569 	*plink = link->next;
570 	--devsw->d_refs; /* XXX cdevsw_release() / record refs */
571 	free(link, M_DEVBUF);
572     }
573     if (cdevbase[maj] == NULL && devsw->d_refs != 0) {
574 	printf("%s(%d)[%08x/%08x]: Warning: cdevsw_remove() called while "
575 		"%d device refs still exist!\n",
576 		devsw->d_name, maj, mask, match, devsw->d_refs);
577     } else {
578 	printf("%s: cdevsw removed\n", devsw->d_name);
579     }
580     return 0;
581 }
582 
583 /*
584  * Release a cdevsw entry.  When the ref count reaches zero, recurse
585  * through the stack.
586  */
587 void
588 cdevsw_release(struct cdevsw *devsw)
589 {
590     --devsw->d_refs;
591     if (devsw->d_refs == 0) {
592 	/* XXX */
593     }
594 }
595 
596