1 /*-
2 * Copyright (c) 1997, 1998
3 * Nan Yang Computer Services Limited. All rights reserved.
4 *
5 * Written by Greg Lehey
6 *
7 * This software is distributed under the so-called ``Berkeley
8 * License'':
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by Nan Yang Computer
21 * Services Limited.
22 * 4. Neither the name of the Company nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * This software is provided ``as is'', and any express or implied
27 * warranties, including, but not limited to, the implied warranties of
28 * merchantability and fitness for a particular purpose are disclaimed.
29 * In no event shall the company or contributors be liable for any
30 * direct, indirect, incidental, special, exemplary, or consequential
31 * damages (including, but not limited to, procurement of substitute
32 * goods or services; loss of use, data, or profits; or business
33 * interruption) however caused and on any theory of liability, whether
34 * in contract, strict liability, or tort (including negligence or
35 * otherwise) arising in any way out of the use of this software, even if
36 * advised of the possibility of such damage.
37 *
38 * $Id: vinum.c,v 1.33 2001/01/09 06:19:15 grog Exp grog $
39 * $FreeBSD: src/sys/dev/vinum/vinum.c,v 1.38.2.3 2003/01/07 12:14:16 joerg Exp $
40 */
41
42 #define STATIC static /* nothing while we're testing XXX */
43
44 #include "vinumhdr.h"
45 #include <sys/sysmsg.h> /* for sync(2) */
46 #include <sys/poll.h> /* XXX: poll ops used in kq filters */
47 #include <sys/event.h>
48 #include <sys/udev.h>
49 #ifdef VINUMDEBUG
50 #include <sys/reboot.h>
51 int debug = 0;
52 extern int total_malloced;
53 extern int malloccount;
54 extern struct mc malloced[];
55 #endif
56 #include "request.h"
57
58 struct dev_ops vinum_ops =
59 {
60 { "vinum", 0, D_DISK },
61 .d_open = vinumopen,
62 .d_close = vinumclose,
63 .d_read = physread,
64 .d_write = physwrite,
65 .d_ioctl = vinumioctl,
66 .d_kqfilter = vinumkqfilter,
67 .d_strategy = vinumstrategy,
68 .d_dump = vinumdump,
69 .d_psize = vinumsize,
70 };
71
72 /* Called by main() during pseudo-device attachment. */
73 STATIC void vinumattach(void *);
74
75 STATIC int vinum_modevent(module_t mod, modeventtype_t type, void *unused);
76 STATIC void vinum_initconf(void);
77
78 struct _vinum_conf vinum_conf; /* configuration information */
79 cdev_t vinum_super_dev;
80 cdev_t vinum_wsuper_dev;
81 cdev_t vinum_daemon_dev;
82
83 /*
84 * Called by main() during pseudo-device attachment. All we need
85 * to do is allocate enough space for devices to be configured later, and
86 * add devsw entries.
87 */
88 static void
vinumattach(void * dummy)89 vinumattach(void *dummy)
90 {
91 char *cp, *cp1, *cp2, **drives;
92 int i, rv;
93 struct volume *vol;
94
95 /* modload should prevent multiple loads, so this is worth a panic */
96 if ((vinum_conf.flags & VF_LOADED) != 0)
97 panic("vinum: already loaded");
98
99 log(LOG_INFO, "vinum: loaded\n");
100 vinum_conf.flags |= VF_LOADED; /* we're loaded now */
101
102 daemonq = NULL; /* initialize daemon's work queue */
103 dqend = NULL;
104
105 #if 0
106 dev_ops_add(&vinum_ops, 0, 0);
107 #endif
108
109 vinum_initconf();
110
111 /*
112 * Create superdev, wrongsuperdev, and controld devices.
113 */
114 vinum_super_dev = make_dev(&vinum_ops, VINUM_SUPERDEV,
115 UID_ROOT, GID_WHEEL, 0600,
116 VINUM_SUPERDEV_BASE);
117 vinum_wsuper_dev = make_dev(&vinum_ops, VINUM_WRONGSUPERDEV,
118 UID_ROOT, GID_WHEEL, 0600,
119 VINUM_WRONGSUPERDEV_BASE);
120 vinum_daemon_dev = make_dev(&vinum_ops, VINUM_DAEMON_DEV,
121 UID_ROOT, GID_WHEEL, 0600,
122 VINUM_DAEMON_DEV_BASE);
123
124 /*
125 * See if the loader has passed us a disk to
126 * read the initial configuration from.
127 */
128 if ((cp = kgetenv("vinum.drives")) != NULL) {
129 for (cp1 = cp, i = 0, drives = NULL; *cp1 != '\0'; i++) {
130 cp2 = cp1;
131 while (*cp1 != '\0' && *cp1 != ',' && *cp1 != ' ')
132 cp1++;
133 if (*cp1 != '\0')
134 *cp1++ = '\0';
135 drives = krealloc(drives, (unsigned long)((i + 1) * sizeof(char *)),
136 M_TEMP, M_WAITOK);
137 drives[i] = cp2;
138 }
139 if (i == 0)
140 goto bailout;
141 rv = vinum_scandisk(drives, i);
142 if (rv)
143 log(LOG_NOTICE, "vinum_scandisk() returned %d", rv);
144 bailout:
145 kfree(drives, M_TEMP);
146 }
147 if ((cp = kgetenv("vinum.root")) != NULL) {
148 for (i = 0; i < vinum_conf.volumes_used; i++) {
149 vol = &vinum_conf.volume[i];
150 if ((vol->state == volume_up)
151 && (strcmp (vol->name, cp) == 0)
152 ) {
153 rootdev = make_dev(&vinum_ops, i, UID_ROOT, GID_OPERATOR,
154 0640, VINUM_BASE "vinumroot");
155 udev_dict_set_cstr(rootdev, "subsystem", "raid");
156 udev_dict_set_cstr(rootdev, "disk-type", "raid");
157 log(LOG_INFO, "vinum: using volume %s for root device\n", cp);
158 break;
159 }
160 }
161 }
162 }
163
164 /*
165 * Check if we have anything open. If confopen is != 0,
166 * that goes for the super device as well, otherwise
167 * only for volumes.
168 *
169 * Return 0 if not inactive, 1 if inactive.
170 */
171 int
vinum_inactive(int confopen)172 vinum_inactive(int confopen)
173 {
174 int i;
175 int can_do = 1; /* assume we can do it */
176
177 if (confopen && (vinum_conf.flags & VF_OPEN)) /* open by vinum(8)? */
178 return 0; /* can't do it while we're open */
179 lock_config();
180 for (i = 0; i < vinum_conf.volumes_allocated; i++) {
181 if ((VOL[i].state > volume_down)
182 && (VOL[i].flags & VF_OPEN)) { /* volume is open */
183 can_do = 0;
184 break;
185 }
186 }
187 unlock_config();
188 return can_do;
189 }
190
191 /*
192 * Free all structures.
193 * If cleardrive is 0, save the configuration; otherwise
194 * remove the configuration from the drive.
195 *
196 * Before coming here, ensure that no volumes are open.
197 */
198 void
free_vinum(int cleardrive)199 free_vinum(int cleardrive)
200 {
201 union daemoninfo di = { .nothing = 0 };
202 int i;
203 int drives_allocated = vinum_conf.drives_allocated;
204
205 if (DRIVE != NULL) {
206 if (cleardrive) { /* remove the vinum config */
207 for (i = 0; i < drives_allocated; i++)
208 remove_drive(i); /* remove the drive */
209 } else { /* keep the config */
210 for (i = 0; i < drives_allocated; i++)
211 free_drive(&DRIVE[i]); /* close files and things */
212 }
213 Free(DRIVE);
214 }
215 while ((vinum_conf.flags & (VF_STOPPING | VF_DAEMONOPEN))
216 == (VF_STOPPING | VF_DAEMONOPEN)) { /* at least one daemon open, we're stopping */
217 queue_daemon_request(daemonrq_return, di); /* stop the daemon */
218 tsleep(&vinumclose, 0, "vstop", 1); /* and wait for it */
219 }
220 if (SD != NULL) {
221 for (i = 0; i < vinum_conf.subdisks_allocated; i++) {
222 struct sd *sd = &vinum_conf.sd[i];
223 if (sd->sd_dev) {
224 destroy_dev(sd->sd_dev);
225 sd->sd_dev = NULL;
226 }
227 }
228 Free(SD);
229 }
230 if (PLEX != NULL) {
231 for (i = 0; i < vinum_conf.plexes_allocated; i++) {
232 struct plex *plex = &vinum_conf.plex[i];
233
234 if (plex->plex_dev) {
235 destroy_dev(plex->plex_dev);
236 plex->plex_dev = NULL;
237 }
238
239 if (plex->state != plex_unallocated) { /* we have real data there */
240 if (plex->sdnos)
241 Free(plex->sdnos);
242 }
243 }
244 Free(PLEX);
245 }
246 if (VOL != NULL) {
247 for (i = 0; i < vinum_conf.volumes_allocated; i++) {
248 struct volume *vol = &vinum_conf.volume[i];
249
250 if (vol->vol_dev) {
251 destroy_dev(vol->vol_dev);
252 vol->vol_dev = NULL;
253 }
254 }
255 Free(VOL);
256 }
257 bzero(&vinum_conf, sizeof(vinum_conf));
258 vinum_initconf();
259 }
260
261 STATIC void
vinum_initconf(void)262 vinum_initconf(void)
263 {
264 vinum_conf.physbufs = nswbuf_kva / 2 + 1;
265
266 /* allocate space: drives... */
267 DRIVE = (struct drive *) Malloc(sizeof(struct drive) * INITIAL_DRIVES);
268 CHECKALLOC(DRIVE, "vinum: no memory\n");
269 bzero(DRIVE, sizeof(struct drive) * INITIAL_DRIVES);
270 vinum_conf.drives_allocated = INITIAL_DRIVES;
271 vinum_conf.drives_used = 0;
272
273 /* volumes, ... */
274 VOL = (struct volume *) Malloc(sizeof(struct volume) * INITIAL_VOLUMES);
275 CHECKALLOC(VOL, "vinum: no memory\n");
276 bzero(VOL, sizeof(struct volume) * INITIAL_VOLUMES);
277 vinum_conf.volumes_allocated = INITIAL_VOLUMES;
278 vinum_conf.volumes_used = 0;
279
280 /* plexes, ... */
281 PLEX = (struct plex *) Malloc(sizeof(struct plex) * INITIAL_PLEXES);
282 CHECKALLOC(PLEX, "vinum: no memory\n");
283 bzero(PLEX, sizeof(struct plex) * INITIAL_PLEXES);
284 vinum_conf.plexes_allocated = INITIAL_PLEXES;
285 vinum_conf.plexes_used = 0;
286
287 /* and subdisks */
288 SD = (struct sd *) Malloc(sizeof(struct sd) * INITIAL_SUBDISKS);
289 CHECKALLOC(SD, "vinum: no memory\n");
290 bzero(SD, sizeof(struct sd) * INITIAL_SUBDISKS);
291 vinum_conf.subdisks_allocated = INITIAL_SUBDISKS;
292 vinum_conf.subdisks_used = 0;
293 }
294
295 STATIC int
vinum_modevent(module_t mod,modeventtype_t type,void * unused)296 vinum_modevent(module_t mod, modeventtype_t type, void *unused)
297 {
298 switch (type) {
299 case MOD_LOAD:
300 vinumattach(NULL);
301 return 0; /* OK */
302 case MOD_UNLOAD:
303 if (!vinum_inactive(1)) /* is anything open? */
304 return EBUSY; /* yes, we can't do it */
305 vinum_conf.flags |= VF_STOPPING; /* note that we want to stop */
306 sys_sync(NULL, NULL); /* write out buffers */
307 free_vinum(0); /* clean up */
308
309 if (vinum_super_dev) {
310 destroy_dev(vinum_super_dev);
311 vinum_super_dev = NULL;
312 }
313 if (vinum_wsuper_dev) {
314 destroy_dev(vinum_wsuper_dev);
315 vinum_wsuper_dev = NULL;
316 }
317 if (vinum_daemon_dev) {
318 destroy_dev(vinum_daemon_dev);
319 vinum_daemon_dev = NULL;
320 }
321
322 sync_devs();
323 #ifdef VINUMDEBUG
324 if (total_malloced) {
325 int i;
326 #ifdef INVARIANTS
327 int *poke;
328 #endif
329
330 for (i = 0; i < malloccount; i++) {
331 if (debug & DEBUG_WARNINGS) /* want to hear about them */
332 log(LOG_WARNING,
333 "vinum: exiting with %d bytes malloced from %s:%d\n",
334 malloced[i].size,
335 malloced[i].file,
336 malloced[i].line);
337 #ifdef INVARIANTS
338 poke = &((int *) malloced[i].address)
339 [malloced[i].size / (2 * sizeof(int))]; /* middle of the area */
340 if (*poke == 0xdeadc0de) /* already freed */
341 log(LOG_ERR,
342 "vinum: exiting with malloc table inconsistency at %p from %s:%d\n",
343 malloced[i].address,
344 malloced[i].file,
345 malloced[i].line);
346 #endif
347 Free(malloced[i].address);
348 }
349 }
350 #endif
351 dev_ops_remove_all(&vinum_ops);
352 log(LOG_INFO, "vinum: unloaded\n"); /* tell the world */
353 return 0;
354 default:
355 break;
356 }
357 return 0;
358 }
359
360 moduledata_t vinum_mod =
361 {
362 "vinum",
363 (modeventhand_t) vinum_modevent,
364 0
365 };
366 DECLARE_MODULE(vinum, vinum_mod, SI_SUB_RAID, SI_ORDER_MIDDLE);
367 MODULE_VERSION(vinum, 1);
368
369 /* ARGSUSED */
370 /* Open a vinum object */
371 int
vinumopen(struct dev_open_args * ap)372 vinumopen(struct dev_open_args *ap)
373 {
374 cdev_t dev = ap->a_head.a_dev;
375 int error;
376 unsigned int index;
377 struct volume *vol;
378 struct plex *plex;
379 struct sd *sd;
380 int devminor; /* minor number */
381
382 devminor = minor(dev);
383 error = 0;
384 /* First, decide what we're looking at */
385 switch (DEVTYPE(dev)) {
386 case VINUM_VOLUME_TYPE:
387 index = Volno(dev);
388 if (index >= vinum_conf.volumes_allocated)
389 return ENXIO; /* no such device */
390 vol = &VOL[index];
391
392 switch (vol->state) {
393 case volume_unallocated:
394 case volume_uninit:
395 return ENXIO;
396
397 case volume_up:
398 vol->flags |= VF_OPEN; /* note we're open */
399 return 0;
400
401 case volume_down:
402 return EIO;
403
404 default:
405 return EINVAL;
406 }
407
408 case VINUM_PLEX_TYPE:
409 if (Volno(dev) >= vinum_conf.volumes_allocated)
410 return ENXIO;
411 /* FALLTHROUGH */
412
413 case VINUM_RAWPLEX_TYPE:
414 index = Plexno(dev); /* get plex index in vinum_conf */
415 if (index >= vinum_conf.plexes_allocated)
416 return ENXIO; /* no such device */
417 plex = &PLEX[index];
418
419 switch (plex->state) {
420 case plex_referenced:
421 case plex_unallocated:
422 return EINVAL;
423
424 default:
425 plex->flags |= VF_OPEN; /* note we're open */
426 return 0;
427 }
428
429 case VINUM_SD_TYPE:
430 if ((Volno(dev) >= vinum_conf.volumes_allocated) /* no such volume */
431 ||(Plexno(dev) >= vinum_conf.plexes_allocated)) /* or no such plex */
432 return ENXIO; /* no such device */
433
434 /* FALLTHROUGH */
435
436 case VINUM_RAWSD_TYPE:
437 index = Sdno(dev); /* get the subdisk number */
438 if ((index >= vinum_conf.subdisks_allocated) /* not a valid SD entry */
439 ||(SD[index].state < sd_init)) /* or SD is not real */
440 return ENXIO; /* no such device */
441 sd = &SD[index];
442
443 /*
444 * Opening a subdisk is always a special operation, so we
445 * ignore the state as long as it represents a real subdisk
446 */
447 switch (sd->state) {
448 case sd_unallocated:
449 case sd_uninit:
450 return EINVAL;
451
452 default:
453 sd->flags |= VF_OPEN; /* note we're open */
454 return 0;
455 }
456
457 case VINUM_SUPERDEV_TYPE:
458 /* are we root? */
459 error = caps_priv_check(ap->a_cred, SYSCAP_RESTRICTEDROOT);
460 if (error == 0) { /* yes, can do */
461 if (devminor == VINUM_DAEMON_DEV) /* daemon device */
462 vinum_conf.flags |= VF_DAEMONOPEN; /* we're open */
463 else if (devminor == VINUM_SUPERDEV)
464 vinum_conf.flags |= VF_OPEN; /* we're open */
465 else
466 error = ENODEV; /* nothing, maybe a debug mismatch */
467 }
468 return error;
469
470 /* Vinum drives are disks. We already have a disk
471 * driver, so don't handle them here */
472 case VINUM_DRIVE_TYPE:
473 default:
474 return ENODEV; /* don't know what to do with these */
475 }
476 }
477
478 /* ARGSUSED */
479 int
vinumclose(struct dev_close_args * ap)480 vinumclose(struct dev_close_args *ap)
481 {
482 cdev_t dev = ap->a_head.a_dev;
483 unsigned int index;
484 struct volume *vol;
485 int devminor;
486
487 devminor = minor(dev);
488 index = Volno(dev);
489 /* First, decide what we're looking at */
490 switch (DEVTYPE(dev)) {
491 case VINUM_VOLUME_TYPE:
492 if (index >= vinum_conf.volumes_allocated)
493 return ENXIO; /* no such device */
494 vol = &VOL[index];
495
496 switch (vol->state) {
497 case volume_unallocated:
498 case volume_uninit:
499 return ENXIO;
500
501 case volume_up:
502 vol->flags &= ~VF_OPEN; /* reset our flags */
503 return 0;
504
505 case volume_down:
506 return EIO;
507
508 default:
509 return EINVAL;
510 }
511
512 case VINUM_PLEX_TYPE:
513 if (Volno(dev) >= vinum_conf.volumes_allocated)
514 return ENXIO;
515 /* FALLTHROUGH */
516
517 case VINUM_RAWPLEX_TYPE:
518 index = Plexno(dev); /* get plex index in vinum_conf */
519 if (index >= vinum_conf.plexes_allocated)
520 return ENXIO; /* no such device */
521 PLEX[index].flags &= ~VF_OPEN; /* reset our flags */
522 return 0;
523
524 case VINUM_SD_TYPE:
525 if ((Volno(dev) >= vinum_conf.volumes_allocated) || /* no such volume */
526 (Plexno(dev) >= vinum_conf.plexes_allocated)) /* or no such plex */
527 return ENXIO; /* no such device */
528 /* FALLTHROUGH */
529
530 case VINUM_RAWSD_TYPE:
531 index = Sdno(dev); /* get the subdisk number */
532 if (index >= vinum_conf.subdisks_allocated)
533 return ENXIO; /* no such device */
534 SD[index].flags &= ~VF_OPEN; /* reset our flags */
535 return 0;
536
537 case VINUM_SUPERDEV_TYPE:
538 /*
539 * don't worry about whether we're root:
540 * nobody else would get this far.
541 */
542 if (devminor == VINUM_SUPERDEV) /* normal superdev */
543 vinum_conf.flags &= ~VF_OPEN; /* no longer open */
544 else if (devminor == VINUM_DAEMON_DEV) { /* the daemon device */
545 vinum_conf.flags &= ~VF_DAEMONOPEN; /* no longer open */
546 if (vinum_conf.flags & VF_STOPPING) /* we're stopping, */
547 wakeup(&vinumclose); /* we can continue stopping now */
548 }
549 return 0;
550
551 case VINUM_DRIVE_TYPE:
552 default:
553 return ENODEV; /* don't know what to do with these */
554 }
555 }
556
557 /* size routine */
558 int
vinumsize(struct dev_psize_args * ap)559 vinumsize(struct dev_psize_args *ap)
560 {
561 cdev_t dev = ap->a_head.a_dev;
562 struct volume *vol;
563
564 vol = &VOL[Volno(dev)];
565
566 if (vol->state == volume_up) {
567 ap->a_result = (int64_t)vol->size;
568 return(0);
569 } else {
570 return(ENXIO);
571 }
572 }
573
574 int
vinumdump(struct dev_dump_args * ap)575 vinumdump(struct dev_dump_args *ap)
576 {
577 /* Not implemented. */
578 return ENXIO;
579 }
580
581 void
vinumfilt_detach(struct knote * kn)582 vinumfilt_detach(struct knote *kn) {}
583
584 int
vinumfilt_rd(struct knote * kn,long hint)585 vinumfilt_rd(struct knote *kn, long hint)
586 {
587 cdev_t dev = (cdev_t)kn->kn_hook;
588
589 if (seltrue(dev, POLLIN | POLLRDNORM))
590 return (1);
591
592 return (0);
593 }
594
595 int
vinumfilt_wr(struct knote * kn,long hint)596 vinumfilt_wr(struct knote *kn, long hint)
597 {
598 /* Writing is always OK */
599 return (1);
600 }
601
602 struct filterops vinumfiltops_rd =
603 { FILTEROP_ISFD, NULL, vinumfilt_detach, vinumfilt_rd };
604 struct filterops vinumfiltops_wr =
605 { FILTEROP_ISFD, NULL, vinumfilt_detach, vinumfilt_wr };
606
607 int
vinumkqfilter(struct dev_kqfilter_args * ap)608 vinumkqfilter(struct dev_kqfilter_args *ap)
609 {
610 if (ap->a_kn->kn_filter == EVFILT_READ) {
611 ap->a_kn->kn_fop = &vinumfiltops_rd;
612 ap->a_kn->kn_hook = (caddr_t)ap->a_head.a_dev;
613 ap->a_result = 0;
614 } else if (ap->a_kn->kn_filter == EVFILT_WRITE) {
615 ap->a_kn->kn_fop = &vinumfiltops_wr;
616 ap->a_result = 0;
617 } else {
618 ap->a_result = EOPNOTSUPP;
619 }
620
621 return (0);
622 }
623