1 /*- 2 * Copyright (c) 1997, 1998 3 * Nan Yang Computer Services Limited. All rights reserved. 4 * 5 * Written by Greg Lehey 6 * 7 * This software is distributed under the so-called ``Berkeley 8 * License'': 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by Nan Yang Computer 21 * Services Limited. 22 * 4. Neither the name of the Company nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * This software is provided ``as is'', and any express or implied 27 * warranties, including, but not limited to, the implied warranties of 28 * merchantability and fitness for a particular purpose are disclaimed. 29 * In no event shall the company or contributors be liable for any 30 * direct, indirect, incidental, special, exemplary, or consequential 31 * damages (including, but not limited to, procurement of substitute 32 * goods or services; loss of use, data, or profits; or business 33 * interruption) however caused and on any theory of liability, whether 34 * in contract, strict liability, or tort (including negligence or 35 * otherwise) arising in any way out of the use of this software, even if 36 * advised of the possibility of such damage. 37 * 38 * $Id: vinum.c,v 1.33 2001/01/09 06:19:15 grog Exp grog $ 39 * $FreeBSD: src/sys/dev/vinum/vinum.c,v 1.38.2.3 2003/01/07 12:14:16 joerg Exp $ 40 */ 41 42 #define STATIC static /* nothing while we're testing XXX */ 43 44 #include "vinumhdr.h" 45 #include <sys/sysmsg.h> /* for sync(2) */ 46 #include <sys/poll.h> /* XXX: poll ops used in kq filters */ 47 #include <sys/event.h> 48 #include <sys/udev.h> 49 #ifdef VINUMDEBUG 50 #include <sys/reboot.h> 51 int debug = 0; 52 extern int total_malloced; 53 extern int malloccount; 54 extern struct mc malloced[]; 55 #endif 56 #include "request.h" 57 58 struct dev_ops vinum_ops = 59 { 60 { "vinum", 0, D_DISK }, 61 .d_open = vinumopen, 62 .d_close = vinumclose, 63 .d_read = physread, 64 .d_write = physwrite, 65 .d_ioctl = vinumioctl, 66 .d_kqfilter = vinumkqfilter, 67 .d_strategy = vinumstrategy, 68 .d_dump = vinumdump, 69 .d_psize = vinumsize, 70 }; 71 72 /* Called by main() during pseudo-device attachment. */ 73 STATIC void vinumattach(void *); 74 75 STATIC int vinum_modevent(module_t mod, modeventtype_t type, void *unused); 76 STATIC void vinum_initconf(void); 77 78 struct _vinum_conf vinum_conf; /* configuration information */ 79 cdev_t vinum_super_dev; 80 cdev_t vinum_wsuper_dev; 81 cdev_t vinum_daemon_dev; 82 83 /* 84 * Called by main() during pseudo-device attachment. All we need 85 * to do is allocate enough space for devices to be configured later, and 86 * add devsw entries. 87 */ 88 static void 89 vinumattach(void *dummy) 90 { 91 char *cp, *cp1, *cp2, **drives; 92 int i, rv; 93 struct volume *vol; 94 95 /* modload should prevent multiple loads, so this is worth a panic */ 96 if ((vinum_conf.flags & VF_LOADED) != 0) 97 panic("vinum: already loaded"); 98 99 log(LOG_INFO, "vinum: loaded\n"); 100 vinum_conf.flags |= VF_LOADED; /* we're loaded now */ 101 102 daemonq = NULL; /* initialize daemon's work queue */ 103 dqend = NULL; 104 105 #if 0 106 dev_ops_add(&vinum_ops, 0, 0); 107 #endif 108 109 vinum_initconf(); 110 111 /* 112 * Create superdev, wrongsuperdev, and controld devices. 113 */ 114 vinum_super_dev = make_dev(&vinum_ops, VINUM_SUPERDEV, 115 UID_ROOT, GID_WHEEL, 0600, 116 VINUM_SUPERDEV_BASE); 117 vinum_wsuper_dev = make_dev(&vinum_ops, VINUM_WRONGSUPERDEV, 118 UID_ROOT, GID_WHEEL, 0600, 119 VINUM_WRONGSUPERDEV_BASE); 120 vinum_daemon_dev = make_dev(&vinum_ops, VINUM_DAEMON_DEV, 121 UID_ROOT, GID_WHEEL, 0600, 122 VINUM_DAEMON_DEV_BASE); 123 124 /* 125 * See if the loader has passed us a disk to 126 * read the initial configuration from. 127 */ 128 if ((cp = kgetenv("vinum.drives")) != NULL) { 129 for (cp1 = cp, i = 0, drives = NULL; *cp1 != '\0'; i++) { 130 cp2 = cp1; 131 while (*cp1 != '\0' && *cp1 != ',' && *cp1 != ' ') 132 cp1++; 133 if (*cp1 != '\0') 134 *cp1++ = '\0'; 135 drives = krealloc(drives, (unsigned long)((i + 1) * sizeof(char *)), 136 M_TEMP, M_WAITOK); 137 drives[i] = cp2; 138 } 139 if (i == 0) 140 goto bailout; 141 rv = vinum_scandisk(drives, i); 142 if (rv) 143 log(LOG_NOTICE, "vinum_scandisk() returned %d", rv); 144 bailout: 145 kfree(drives, M_TEMP); 146 } 147 if ((cp = kgetenv("vinum.root")) != NULL) { 148 for (i = 0; i < vinum_conf.volumes_used; i++) { 149 vol = &vinum_conf.volume[i]; 150 if ((vol->state == volume_up) 151 && (strcmp (vol->name, cp) == 0) 152 ) { 153 rootdev = make_dev(&vinum_ops, i, UID_ROOT, GID_OPERATOR, 154 0640, VINUM_BASE "vinumroot"); 155 udev_dict_set_cstr(rootdev, "subsystem", "raid"); 156 udev_dict_set_cstr(rootdev, "disk-type", "raid"); 157 log(LOG_INFO, "vinum: using volume %s for root device\n", cp); 158 break; 159 } 160 } 161 } 162 } 163 164 /* 165 * Check if we have anything open. If confopen is != 0, 166 * that goes for the super device as well, otherwise 167 * only for volumes. 168 * 169 * Return 0 if not inactive, 1 if inactive. 170 */ 171 int 172 vinum_inactive(int confopen) 173 { 174 int i; 175 int can_do = 1; /* assume we can do it */ 176 177 if (confopen && (vinum_conf.flags & VF_OPEN)) /* open by vinum(8)? */ 178 return 0; /* can't do it while we're open */ 179 lock_config(); 180 for (i = 0; i < vinum_conf.volumes_allocated; i++) { 181 if ((VOL[i].state > volume_down) 182 && (VOL[i].flags & VF_OPEN)) { /* volume is open */ 183 can_do = 0; 184 break; 185 } 186 } 187 unlock_config(); 188 return can_do; 189 } 190 191 /* 192 * Free all structures. 193 * If cleardrive is 0, save the configuration; otherwise 194 * remove the configuration from the drive. 195 * 196 * Before coming here, ensure that no volumes are open. 197 */ 198 void 199 free_vinum(int cleardrive) 200 { 201 union daemoninfo di = { .nothing = 0 }; 202 int i; 203 int drives_allocated = vinum_conf.drives_allocated; 204 205 if (DRIVE != NULL) { 206 if (cleardrive) { /* remove the vinum config */ 207 for (i = 0; i < drives_allocated; i++) 208 remove_drive(i); /* remove the drive */ 209 } else { /* keep the config */ 210 for (i = 0; i < drives_allocated; i++) 211 free_drive(&DRIVE[i]); /* close files and things */ 212 } 213 Free(DRIVE); 214 } 215 while ((vinum_conf.flags & (VF_STOPPING | VF_DAEMONOPEN)) 216 == (VF_STOPPING | VF_DAEMONOPEN)) { /* at least one daemon open, we're stopping */ 217 queue_daemon_request(daemonrq_return, di); /* stop the daemon */ 218 tsleep(&vinumclose, 0, "vstop", 1); /* and wait for it */ 219 } 220 if (SD != NULL) { 221 for (i = 0; i < vinum_conf.subdisks_allocated; i++) { 222 struct sd *sd = &vinum_conf.sd[i]; 223 if (sd->sd_dev) { 224 destroy_dev(sd->sd_dev); 225 sd->sd_dev = NULL; 226 } 227 } 228 Free(SD); 229 } 230 if (PLEX != NULL) { 231 for (i = 0; i < vinum_conf.plexes_allocated; i++) { 232 struct plex *plex = &vinum_conf.plex[i]; 233 234 if (plex->plex_dev) { 235 destroy_dev(plex->plex_dev); 236 plex->plex_dev = NULL; 237 } 238 239 if (plex->state != plex_unallocated) { /* we have real data there */ 240 if (plex->sdnos) 241 Free(plex->sdnos); 242 } 243 } 244 Free(PLEX); 245 } 246 if (VOL != NULL) { 247 for (i = 0; i < vinum_conf.volumes_allocated; i++) { 248 struct volume *vol = &vinum_conf.volume[i]; 249 250 if (vol->vol_dev) { 251 destroy_dev(vol->vol_dev); 252 vol->vol_dev = NULL; 253 } 254 } 255 Free(VOL); 256 } 257 bzero(&vinum_conf, sizeof(vinum_conf)); 258 vinum_initconf(); 259 } 260 261 STATIC void 262 vinum_initconf(void) 263 { 264 vinum_conf.physbufs = nswbuf_kva / 2 + 1; 265 266 /* allocate space: drives... */ 267 DRIVE = (struct drive *) Malloc(sizeof(struct drive) * INITIAL_DRIVES); 268 CHECKALLOC(DRIVE, "vinum: no memory\n"); 269 bzero(DRIVE, sizeof(struct drive) * INITIAL_DRIVES); 270 vinum_conf.drives_allocated = INITIAL_DRIVES; 271 vinum_conf.drives_used = 0; 272 273 /* volumes, ... */ 274 VOL = (struct volume *) Malloc(sizeof(struct volume) * INITIAL_VOLUMES); 275 CHECKALLOC(VOL, "vinum: no memory\n"); 276 bzero(VOL, sizeof(struct volume) * INITIAL_VOLUMES); 277 vinum_conf.volumes_allocated = INITIAL_VOLUMES; 278 vinum_conf.volumes_used = 0; 279 280 /* plexes, ... */ 281 PLEX = (struct plex *) Malloc(sizeof(struct plex) * INITIAL_PLEXES); 282 CHECKALLOC(PLEX, "vinum: no memory\n"); 283 bzero(PLEX, sizeof(struct plex) * INITIAL_PLEXES); 284 vinum_conf.plexes_allocated = INITIAL_PLEXES; 285 vinum_conf.plexes_used = 0; 286 287 /* and subdisks */ 288 SD = (struct sd *) Malloc(sizeof(struct sd) * INITIAL_SUBDISKS); 289 CHECKALLOC(SD, "vinum: no memory\n"); 290 bzero(SD, sizeof(struct sd) * INITIAL_SUBDISKS); 291 vinum_conf.subdisks_allocated = INITIAL_SUBDISKS; 292 vinum_conf.subdisks_used = 0; 293 } 294 295 STATIC int 296 vinum_modevent(module_t mod, modeventtype_t type, void *unused) 297 { 298 switch (type) { 299 case MOD_LOAD: 300 vinumattach(NULL); 301 return 0; /* OK */ 302 case MOD_UNLOAD: 303 if (!vinum_inactive(1)) /* is anything open? */ 304 return EBUSY; /* yes, we can't do it */ 305 vinum_conf.flags |= VF_STOPPING; /* note that we want to stop */ 306 sys_sync(NULL, NULL); /* write out buffers */ 307 free_vinum(0); /* clean up */ 308 309 if (vinum_super_dev) { 310 destroy_dev(vinum_super_dev); 311 vinum_super_dev = NULL; 312 } 313 if (vinum_wsuper_dev) { 314 destroy_dev(vinum_wsuper_dev); 315 vinum_wsuper_dev = NULL; 316 } 317 if (vinum_daemon_dev) { 318 destroy_dev(vinum_daemon_dev); 319 vinum_daemon_dev = NULL; 320 } 321 322 sync_devs(); 323 #ifdef VINUMDEBUG 324 if (total_malloced) { 325 int i; 326 #ifdef INVARIANTS 327 int *poke; 328 #endif 329 330 for (i = 0; i < malloccount; i++) { 331 if (debug & DEBUG_WARNINGS) /* want to hear about them */ 332 log(LOG_WARNING, 333 "vinum: exiting with %d bytes malloced from %s:%d\n", 334 malloced[i].size, 335 malloced[i].file, 336 malloced[i].line); 337 #ifdef INVARIANTS 338 poke = &((int *) malloced[i].address) 339 [malloced[i].size / (2 * sizeof(int))]; /* middle of the area */ 340 if (*poke == 0xdeadc0de) /* already freed */ 341 log(LOG_ERR, 342 "vinum: exiting with malloc table inconsistency at %p from %s:%d\n", 343 malloced[i].address, 344 malloced[i].file, 345 malloced[i].line); 346 #endif 347 Free(malloced[i].address); 348 } 349 } 350 #endif 351 dev_ops_remove_all(&vinum_ops); 352 log(LOG_INFO, "vinum: unloaded\n"); /* tell the world */ 353 return 0; 354 default: 355 break; 356 } 357 return 0; 358 } 359 360 moduledata_t vinum_mod = 361 { 362 "vinum", 363 (modeventhand_t) vinum_modevent, 364 0 365 }; 366 DECLARE_MODULE(vinum, vinum_mod, SI_SUB_RAID, SI_ORDER_MIDDLE); 367 MODULE_VERSION(vinum, 1); 368 369 /* ARGSUSED */ 370 /* Open a vinum object */ 371 int 372 vinumopen(struct dev_open_args *ap) 373 { 374 cdev_t dev = ap->a_head.a_dev; 375 int error; 376 unsigned int index; 377 struct volume *vol; 378 struct plex *plex; 379 struct sd *sd; 380 int devminor; /* minor number */ 381 382 devminor = minor(dev); 383 error = 0; 384 /* First, decide what we're looking at */ 385 switch (DEVTYPE(dev)) { 386 case VINUM_VOLUME_TYPE: 387 index = Volno(dev); 388 if (index >= vinum_conf.volumes_allocated) 389 return ENXIO; /* no such device */ 390 vol = &VOL[index]; 391 392 switch (vol->state) { 393 case volume_unallocated: 394 case volume_uninit: 395 return ENXIO; 396 397 case volume_up: 398 vol->flags |= VF_OPEN; /* note we're open */ 399 return 0; 400 401 case volume_down: 402 return EIO; 403 404 default: 405 return EINVAL; 406 } 407 408 case VINUM_PLEX_TYPE: 409 if (Volno(dev) >= vinum_conf.volumes_allocated) 410 return ENXIO; 411 /* FALLTHROUGH */ 412 413 case VINUM_RAWPLEX_TYPE: 414 index = Plexno(dev); /* get plex index in vinum_conf */ 415 if (index >= vinum_conf.plexes_allocated) 416 return ENXIO; /* no such device */ 417 plex = &PLEX[index]; 418 419 switch (plex->state) { 420 case plex_referenced: 421 case plex_unallocated: 422 return EINVAL; 423 424 default: 425 plex->flags |= VF_OPEN; /* note we're open */ 426 return 0; 427 } 428 429 case VINUM_SD_TYPE: 430 if ((Volno(dev) >= vinum_conf.volumes_allocated) /* no such volume */ 431 ||(Plexno(dev) >= vinum_conf.plexes_allocated)) /* or no such plex */ 432 return ENXIO; /* no such device */ 433 434 /* FALLTHROUGH */ 435 436 case VINUM_RAWSD_TYPE: 437 index = Sdno(dev); /* get the subdisk number */ 438 if ((index >= vinum_conf.subdisks_allocated) /* not a valid SD entry */ 439 ||(SD[index].state < sd_init)) /* or SD is not real */ 440 return ENXIO; /* no such device */ 441 sd = &SD[index]; 442 443 /* 444 * Opening a subdisk is always a special operation, so we 445 * ignore the state as long as it represents a real subdisk 446 */ 447 switch (sd->state) { 448 case sd_unallocated: 449 case sd_uninit: 450 return EINVAL; 451 452 default: 453 sd->flags |= VF_OPEN; /* note we're open */ 454 return 0; 455 } 456 457 case VINUM_SUPERDEV_TYPE: 458 error = priv_check_cred(ap->a_cred, PRIV_ROOT, 0); /* are we root? */ 459 if (error == 0) { /* yes, can do */ 460 if (devminor == VINUM_DAEMON_DEV) /* daemon device */ 461 vinum_conf.flags |= VF_DAEMONOPEN; /* we're open */ 462 else if (devminor == VINUM_SUPERDEV) 463 vinum_conf.flags |= VF_OPEN; /* we're open */ 464 else 465 error = ENODEV; /* nothing, maybe a debug mismatch */ 466 } 467 return error; 468 469 /* Vinum drives are disks. We already have a disk 470 * driver, so don't handle them here */ 471 case VINUM_DRIVE_TYPE: 472 default: 473 return ENODEV; /* don't know what to do with these */ 474 } 475 } 476 477 /* ARGSUSED */ 478 int 479 vinumclose(struct dev_close_args *ap) 480 { 481 cdev_t dev = ap->a_head.a_dev; 482 unsigned int index; 483 struct volume *vol; 484 int devminor; 485 486 devminor = minor(dev); 487 index = Volno(dev); 488 /* First, decide what we're looking at */ 489 switch (DEVTYPE(dev)) { 490 case VINUM_VOLUME_TYPE: 491 if (index >= vinum_conf.volumes_allocated) 492 return ENXIO; /* no such device */ 493 vol = &VOL[index]; 494 495 switch (vol->state) { 496 case volume_unallocated: 497 case volume_uninit: 498 return ENXIO; 499 500 case volume_up: 501 vol->flags &= ~VF_OPEN; /* reset our flags */ 502 return 0; 503 504 case volume_down: 505 return EIO; 506 507 default: 508 return EINVAL; 509 } 510 511 case VINUM_PLEX_TYPE: 512 if (Volno(dev) >= vinum_conf.volumes_allocated) 513 return ENXIO; 514 /* FALLTHROUGH */ 515 516 case VINUM_RAWPLEX_TYPE: 517 index = Plexno(dev); /* get plex index in vinum_conf */ 518 if (index >= vinum_conf.plexes_allocated) 519 return ENXIO; /* no such device */ 520 PLEX[index].flags &= ~VF_OPEN; /* reset our flags */ 521 return 0; 522 523 case VINUM_SD_TYPE: 524 if ((Volno(dev) >= vinum_conf.volumes_allocated) || /* no such volume */ 525 (Plexno(dev) >= vinum_conf.plexes_allocated)) /* or no such plex */ 526 return ENXIO; /* no such device */ 527 /* FALLTHROUGH */ 528 529 case VINUM_RAWSD_TYPE: 530 index = Sdno(dev); /* get the subdisk number */ 531 if (index >= vinum_conf.subdisks_allocated) 532 return ENXIO; /* no such device */ 533 SD[index].flags &= ~VF_OPEN; /* reset our flags */ 534 return 0; 535 536 case VINUM_SUPERDEV_TYPE: 537 /* 538 * don't worry about whether we're root: 539 * nobody else would get this far. 540 */ 541 if (devminor == VINUM_SUPERDEV) /* normal superdev */ 542 vinum_conf.flags &= ~VF_OPEN; /* no longer open */ 543 else if (devminor == VINUM_DAEMON_DEV) { /* the daemon device */ 544 vinum_conf.flags &= ~VF_DAEMONOPEN; /* no longer open */ 545 if (vinum_conf.flags & VF_STOPPING) /* we're stopping, */ 546 wakeup(&vinumclose); /* we can continue stopping now */ 547 } 548 return 0; 549 550 case VINUM_DRIVE_TYPE: 551 default: 552 return ENODEV; /* don't know what to do with these */ 553 } 554 } 555 556 /* size routine */ 557 int 558 vinumsize(struct dev_psize_args *ap) 559 { 560 cdev_t dev = ap->a_head.a_dev; 561 struct volume *vol; 562 563 vol = &VOL[Volno(dev)]; 564 565 if (vol->state == volume_up) { 566 ap->a_result = (int64_t)vol->size; 567 return(0); 568 } else { 569 return(ENXIO); 570 } 571 } 572 573 int 574 vinumdump(struct dev_dump_args *ap) 575 { 576 /* Not implemented. */ 577 return ENXIO; 578 } 579 580 void 581 vinumfilt_detach(struct knote *kn) {} 582 583 int 584 vinumfilt_rd(struct knote *kn, long hint) 585 { 586 cdev_t dev = (cdev_t)kn->kn_hook; 587 588 if (seltrue(dev, POLLIN | POLLRDNORM)) 589 return (1); 590 591 return (0); 592 } 593 594 int 595 vinumfilt_wr(struct knote *kn, long hint) 596 { 597 /* Writing is always OK */ 598 return (1); 599 } 600 601 struct filterops vinumfiltops_rd = 602 { FILTEROP_ISFD, NULL, vinumfilt_detach, vinumfilt_rd }; 603 struct filterops vinumfiltops_wr = 604 { FILTEROP_ISFD, NULL, vinumfilt_detach, vinumfilt_wr }; 605 606 int 607 vinumkqfilter(struct dev_kqfilter_args *ap) 608 { 609 if (ap->a_kn->kn_filter == EVFILT_READ) { 610 ap->a_kn->kn_fop = &vinumfiltops_rd; 611 ap->a_kn->kn_hook = (caddr_t)ap->a_head.a_dev; 612 ap->a_result = 0; 613 } else if (ap->a_kn->kn_filter == EVFILT_WRITE) { 614 ap->a_kn->kn_fop = &vinumfiltops_wr; 615 ap->a_result = 0; 616 } else { 617 ap->a_result = EOPNOTSUPP; 618 } 619 620 return (0); 621 } 622