1 /*- 2 * Copyright (c) 1997, 1998 3 * Nan Yang Computer Services Limited. All rights reserved. 4 * 5 * Written by Greg Lehey 6 * 7 * This software is distributed under the so-called ``Berkeley 8 * License'': 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by Nan Yang Computer 21 * Services Limited. 22 * 4. Neither the name of the Company nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * This software is provided ``as is'', and any express or implied 27 * warranties, including, but not limited to, the implied warranties of 28 * merchantability and fitness for a particular purpose are disclaimed. 29 * In no event shall the company or contributors be liable for any 30 * direct, indirect, incidental, special, exemplary, or consequential 31 * damages (including, but not limited to, procurement of substitute 32 * goods or services; loss of use, data, or profits; or business 33 * interruption) however caused and on any theory of liability, whether 34 * in contract, strict liability, or tort (including negligence or 35 * otherwise) arising in any way out of the use of this software, even if 36 * advised of the possibility of such damage. 37 * 38 * $Id: vinum.c,v 1.33 2001/01/09 06:19:15 grog Exp grog $ 39 * $FreeBSD: src/sys/dev/vinum/vinum.c,v 1.38.2.3 2003/01/07 12:14:16 joerg Exp $ 40 */ 41 42 #define STATIC static /* nothing while we're testing XXX */ 43 44 #include "vinumhdr.h" 45 #include <sys/sysproto.h> /* for sync(2) */ 46 #include <sys/poll.h> /* XXX: poll ops used in kq filters */ 47 #include <sys/event.h> 48 #include <sys/udev.h> 49 #ifdef VINUMDEBUG 50 #include <sys/reboot.h> 51 int debug = 0; 52 extern int total_malloced; 53 extern int malloccount; 54 extern struct mc malloced[]; 55 #endif 56 #include "request.h" 57 58 struct dev_ops vinum_ops = 59 { 60 { "vinum", 0, D_DISK }, 61 .d_open = vinumopen, 62 .d_close = vinumclose, 63 .d_read = physread, 64 .d_write = physwrite, 65 .d_ioctl = vinumioctl, 66 .d_kqfilter = vinumkqfilter, 67 .d_strategy = vinumstrategy, 68 .d_dump = vinumdump, 69 .d_psize = vinumsize, 70 }; 71 72 /* Called by main() during pseudo-device attachment. */ 73 STATIC void vinumattach(void *); 74 75 STATIC int vinum_modevent(module_t mod, modeventtype_t type, void *unused); 76 STATIC void vinum_initconf(void); 77 78 struct _vinum_conf vinum_conf; /* configuration information */ 79 cdev_t vinum_super_dev; 80 cdev_t vinum_wsuper_dev; 81 cdev_t vinum_daemon_dev; 82 83 /* 84 * Called by main() during pseudo-device attachment. All we need 85 * to do is allocate enough space for devices to be configured later, and 86 * add devsw entries. 87 */ 88 static void 89 vinumattach(void *dummy) 90 { 91 char *cp, *cp1, *cp2, **drives; 92 int i, rv; 93 struct volume *vol; 94 95 /* modload should prevent multiple loads, so this is worth a panic */ 96 if ((vinum_conf.flags & VF_LOADED) != 0) 97 panic("vinum: already loaded"); 98 99 log(LOG_INFO, "vinum: loaded\n"); 100 vinum_conf.flags |= VF_LOADED; /* we're loaded now */ 101 102 daemonq = NULL; /* initialize daemon's work queue */ 103 dqend = NULL; 104 105 #if 0 106 dev_ops_add(&vinum_ops, 0, 0); 107 #endif 108 109 vinum_initconf(); 110 111 /* 112 * Create superdev, wrongsuperdev, and controld devices. 113 */ 114 vinum_super_dev = make_dev(&vinum_ops, VINUM_SUPERDEV, 115 UID_ROOT, GID_WHEEL, 0600, 116 VINUM_SUPERDEV_BASE); 117 vinum_wsuper_dev = make_dev(&vinum_ops, VINUM_WRONGSUPERDEV, 118 UID_ROOT, GID_WHEEL, 0600, 119 VINUM_WRONGSUPERDEV_BASE); 120 vinum_daemon_dev = make_dev(&vinum_ops, VINUM_DAEMON_DEV, 121 UID_ROOT, GID_WHEEL, 0600, 122 VINUM_DAEMON_DEV_BASE); 123 124 /* 125 * See if the loader has passed us a disk to 126 * read the initial configuration from. 127 */ 128 if ((cp = kgetenv("vinum.drives")) != NULL) { 129 for (cp1 = cp, i = 0, drives = NULL; *cp1 != '\0'; i++) { 130 cp2 = cp1; 131 while (*cp1 != '\0' && *cp1 != ',' && *cp1 != ' ') 132 cp1++; 133 if (*cp1 != '\0') 134 *cp1++ = '\0'; 135 drives = krealloc(drives, (unsigned long)((i + 1) * sizeof(char *)), 136 M_TEMP, M_WAITOK); 137 drives[i] = cp2; 138 } 139 if (i == 0) 140 goto bailout; 141 rv = vinum_scandisk(drives, i); 142 if (rv) 143 log(LOG_NOTICE, "vinum_scandisk() returned %d", rv); 144 bailout: 145 kfree(drives, M_TEMP); 146 } 147 if ((cp = kgetenv("vinum.root")) != NULL) { 148 for (i = 0; i < vinum_conf.volumes_used; i++) { 149 vol = &vinum_conf.volume[i]; 150 if ((vol->state == volume_up) 151 && (strcmp (vol->name, cp) == 0) 152 ) { 153 rootdev = make_dev(&vinum_ops, i, UID_ROOT, GID_OPERATOR, 154 0640, VINUM_BASE "vinumroot"); 155 udev_dict_set_cstr(rootdev, "subsystem", "raid"); 156 udev_dict_set_cstr(rootdev, "disk-type", "raid"); 157 log(LOG_INFO, "vinum: using volume %s for root device\n", cp); 158 break; 159 } 160 } 161 } 162 } 163 164 /* 165 * Check if we have anything open. If confopen is != 0, 166 * that goes for the super device as well, otherwise 167 * only for volumes. 168 * 169 * Return 0 if not inactive, 1 if inactive. 170 */ 171 int 172 vinum_inactive(int confopen) 173 { 174 int i; 175 int can_do = 1; /* assume we can do it */ 176 177 if (confopen && (vinum_conf.flags & VF_OPEN)) /* open by vinum(8)? */ 178 return 0; /* can't do it while we're open */ 179 lock_config(); 180 for (i = 0; i < vinum_conf.volumes_allocated; i++) { 181 if ((VOL[i].state > volume_down) 182 && (VOL[i].flags & VF_OPEN)) { /* volume is open */ 183 can_do = 0; 184 break; 185 } 186 } 187 unlock_config(); 188 return can_do; 189 } 190 191 /* 192 * Free all structures. 193 * If cleardrive is 0, save the configuration; otherwise 194 * remove the configuration from the drive. 195 * 196 * Before coming here, ensure that no volumes are open. 197 */ 198 void 199 free_vinum(int cleardrive) 200 { 201 union daemoninfo di = { .nothing = 0 }; 202 int i; 203 int drives_allocated = vinum_conf.drives_allocated; 204 205 if (DRIVE != NULL) { 206 if (cleardrive) { /* remove the vinum config */ 207 for (i = 0; i < drives_allocated; i++) 208 remove_drive(i); /* remove the drive */ 209 } else { /* keep the config */ 210 for (i = 0; i < drives_allocated; i++) 211 free_drive(&DRIVE[i]); /* close files and things */ 212 } 213 Free(DRIVE); 214 } 215 while ((vinum_conf.flags & (VF_STOPPING | VF_DAEMONOPEN)) 216 == (VF_STOPPING | VF_DAEMONOPEN)) { /* at least one daemon open, we're stopping */ 217 queue_daemon_request(daemonrq_return, di); /* stop the daemon */ 218 tsleep(&vinumclose, 0, "vstop", 1); /* and wait for it */ 219 } 220 if (SD != NULL) { 221 for (i = 0; i < vinum_conf.subdisks_allocated; i++) { 222 struct sd *sd = &vinum_conf.sd[i]; 223 if (sd->sd_dev) { 224 destroy_dev(sd->sd_dev); 225 sd->sd_dev = NULL; 226 } 227 } 228 Free(SD); 229 } 230 if (PLEX != NULL) { 231 for (i = 0; i < vinum_conf.plexes_allocated; i++) { 232 struct plex *plex = &vinum_conf.plex[i]; 233 234 if (plex->plex_dev) { 235 destroy_dev(plex->plex_dev); 236 plex->plex_dev = NULL; 237 } 238 239 if (plex->state != plex_unallocated) { /* we have real data there */ 240 if (plex->sdnos) 241 Free(plex->sdnos); 242 } 243 } 244 Free(PLEX); 245 } 246 if (VOL != NULL) { 247 for (i = 0; i < vinum_conf.volumes_allocated; i++) { 248 struct volume *vol = &vinum_conf.volume[i]; 249 250 if (vol->vol_dev) { 251 destroy_dev(vol->vol_dev); 252 vol->vol_dev = NULL; 253 } 254 } 255 Free(VOL); 256 } 257 bzero(&vinum_conf, sizeof(vinum_conf)); 258 vinum_initconf(); 259 } 260 261 STATIC void 262 vinum_initconf(void) 263 { 264 vinum_conf.physbufs = nswbuf_kva / 2 + 1; 265 266 /* allocate space: drives... */ 267 DRIVE = (struct drive *) Malloc(sizeof(struct drive) * INITIAL_DRIVES); 268 CHECKALLOC(DRIVE, "vinum: no memory\n"); 269 bzero(DRIVE, sizeof(struct drive) * INITIAL_DRIVES); 270 vinum_conf.drives_allocated = INITIAL_DRIVES; 271 vinum_conf.drives_used = 0; 272 273 /* volumes, ... */ 274 VOL = (struct volume *) Malloc(sizeof(struct volume) * INITIAL_VOLUMES); 275 CHECKALLOC(VOL, "vinum: no memory\n"); 276 bzero(VOL, sizeof(struct volume) * INITIAL_VOLUMES); 277 vinum_conf.volumes_allocated = INITIAL_VOLUMES; 278 vinum_conf.volumes_used = 0; 279 280 /* plexes, ... */ 281 PLEX = (struct plex *) Malloc(sizeof(struct plex) * INITIAL_PLEXES); 282 CHECKALLOC(PLEX, "vinum: no memory\n"); 283 bzero(PLEX, sizeof(struct plex) * INITIAL_PLEXES); 284 vinum_conf.plexes_allocated = INITIAL_PLEXES; 285 vinum_conf.plexes_used = 0; 286 287 /* and subdisks */ 288 SD = (struct sd *) Malloc(sizeof(struct sd) * INITIAL_SUBDISKS); 289 CHECKALLOC(SD, "vinum: no memory\n"); 290 bzero(SD, sizeof(struct sd) * INITIAL_SUBDISKS); 291 vinum_conf.subdisks_allocated = INITIAL_SUBDISKS; 292 vinum_conf.subdisks_used = 0; 293 } 294 295 STATIC int 296 vinum_modevent(module_t mod, modeventtype_t type, void *unused) 297 { 298 switch (type) { 299 case MOD_LOAD: 300 vinumattach(NULL); 301 return 0; /* OK */ 302 case MOD_UNLOAD: 303 if (!vinum_inactive(1)) /* is anything open? */ 304 return EBUSY; /* yes, we can't do it */ 305 vinum_conf.flags |= VF_STOPPING; /* note that we want to stop */ 306 sys_sync(NULL); /* write out buffers */ 307 free_vinum(0); /* clean up */ 308 309 if (vinum_super_dev) { 310 destroy_dev(vinum_super_dev); 311 vinum_super_dev = NULL; 312 } 313 if (vinum_wsuper_dev) { 314 destroy_dev(vinum_wsuper_dev); 315 vinum_wsuper_dev = NULL; 316 } 317 if (vinum_daemon_dev) { 318 destroy_dev(vinum_daemon_dev); 319 vinum_daemon_dev = NULL; 320 } 321 322 sync_devs(); 323 #ifdef VINUMDEBUG 324 if (total_malloced) { 325 int i; 326 #ifdef INVARIANTS 327 int *poke; 328 #endif 329 330 for (i = 0; i < malloccount; i++) { 331 if (debug & DEBUG_WARNINGS) /* want to hear about them */ 332 log(LOG_WARNING, 333 "vinum: exiting with %d bytes malloced from %s:%d\n", 334 malloced[i].size, 335 malloced[i].file, 336 malloced[i].line); 337 #ifdef INVARIANTS 338 poke = &((int *) malloced[i].address) 339 [malloced[i].size / (2 * sizeof(int))]; /* middle of the area */ 340 if (*poke == 0xdeadc0de) /* already freed */ 341 log(LOG_ERR, 342 "vinum: exiting with malloc table inconsistency at %p from %s:%d\n", 343 malloced[i].address, 344 malloced[i].file, 345 malloced[i].line); 346 #endif 347 Free(malloced[i].address); 348 } 349 } 350 #endif 351 dev_ops_remove_all(&vinum_ops); 352 log(LOG_INFO, "vinum: unloaded\n"); /* tell the world */ 353 return 0; 354 default: 355 break; 356 } 357 return 0; 358 } 359 360 moduledata_t vinum_mod = 361 { 362 "vinum", 363 (modeventhand_t) vinum_modevent, 364 0 365 }; 366 DECLARE_MODULE(vinum, vinum_mod, SI_SUB_RAID, SI_ORDER_MIDDLE); 367 368 /* ARGSUSED */ 369 /* Open a vinum object */ 370 int 371 vinumopen(struct dev_open_args *ap) 372 { 373 cdev_t dev = ap->a_head.a_dev; 374 int error; 375 unsigned int index; 376 struct volume *vol; 377 struct plex *plex; 378 struct sd *sd; 379 int devminor; /* minor number */ 380 381 devminor = minor(dev); 382 error = 0; 383 /* First, decide what we're looking at */ 384 switch (DEVTYPE(dev)) { 385 case VINUM_VOLUME_TYPE: 386 index = Volno(dev); 387 if (index >= vinum_conf.volumes_allocated) 388 return ENXIO; /* no such device */ 389 vol = &VOL[index]; 390 391 switch (vol->state) { 392 case volume_unallocated: 393 case volume_uninit: 394 return ENXIO; 395 396 case volume_up: 397 vol->flags |= VF_OPEN; /* note we're open */ 398 return 0; 399 400 case volume_down: 401 return EIO; 402 403 default: 404 return EINVAL; 405 } 406 407 case VINUM_PLEX_TYPE: 408 if (Volno(dev) >= vinum_conf.volumes_allocated) 409 return ENXIO; 410 /* FALLTHROUGH */ 411 412 case VINUM_RAWPLEX_TYPE: 413 index = Plexno(dev); /* get plex index in vinum_conf */ 414 if (index >= vinum_conf.plexes_allocated) 415 return ENXIO; /* no such device */ 416 plex = &PLEX[index]; 417 418 switch (plex->state) { 419 case plex_referenced: 420 case plex_unallocated: 421 return EINVAL; 422 423 default: 424 plex->flags |= VF_OPEN; /* note we're open */ 425 return 0; 426 } 427 428 case VINUM_SD_TYPE: 429 if ((Volno(dev) >= vinum_conf.volumes_allocated) /* no such volume */ 430 ||(Plexno(dev) >= vinum_conf.plexes_allocated)) /* or no such plex */ 431 return ENXIO; /* no such device */ 432 433 /* FALLTHROUGH */ 434 435 case VINUM_RAWSD_TYPE: 436 index = Sdno(dev); /* get the subdisk number */ 437 if ((index >= vinum_conf.subdisks_allocated) /* not a valid SD entry */ 438 ||(SD[index].state < sd_init)) /* or SD is not real */ 439 return ENXIO; /* no such device */ 440 sd = &SD[index]; 441 442 /* 443 * Opening a subdisk is always a special operation, so we 444 * ignore the state as long as it represents a real subdisk 445 */ 446 switch (sd->state) { 447 case sd_unallocated: 448 case sd_uninit: 449 return EINVAL; 450 451 default: 452 sd->flags |= VF_OPEN; /* note we're open */ 453 return 0; 454 } 455 456 case VINUM_SUPERDEV_TYPE: 457 error = priv_check_cred(ap->a_cred, PRIV_ROOT, 0); /* are we root? */ 458 if (error == 0) { /* yes, can do */ 459 if (devminor == VINUM_DAEMON_DEV) /* daemon device */ 460 vinum_conf.flags |= VF_DAEMONOPEN; /* we're open */ 461 else if (devminor == VINUM_SUPERDEV) 462 vinum_conf.flags |= VF_OPEN; /* we're open */ 463 else 464 error = ENODEV; /* nothing, maybe a debug mismatch */ 465 } 466 return error; 467 468 /* Vinum drives are disks. We already have a disk 469 * driver, so don't handle them here */ 470 case VINUM_DRIVE_TYPE: 471 default: 472 return ENODEV; /* don't know what to do with these */ 473 } 474 } 475 476 /* ARGSUSED */ 477 int 478 vinumclose(struct dev_close_args *ap) 479 { 480 cdev_t dev = ap->a_head.a_dev; 481 unsigned int index; 482 struct volume *vol; 483 int devminor; 484 485 devminor = minor(dev); 486 index = Volno(dev); 487 /* First, decide what we're looking at */ 488 switch (DEVTYPE(dev)) { 489 case VINUM_VOLUME_TYPE: 490 if (index >= vinum_conf.volumes_allocated) 491 return ENXIO; /* no such device */ 492 vol = &VOL[index]; 493 494 switch (vol->state) { 495 case volume_unallocated: 496 case volume_uninit: 497 return ENXIO; 498 499 case volume_up: 500 vol->flags &= ~VF_OPEN; /* reset our flags */ 501 return 0; 502 503 case volume_down: 504 return EIO; 505 506 default: 507 return EINVAL; 508 } 509 510 case VINUM_PLEX_TYPE: 511 if (Volno(dev) >= vinum_conf.volumes_allocated) 512 return ENXIO; 513 /* FALLTHROUGH */ 514 515 case VINUM_RAWPLEX_TYPE: 516 index = Plexno(dev); /* get plex index in vinum_conf */ 517 if (index >= vinum_conf.plexes_allocated) 518 return ENXIO; /* no such device */ 519 PLEX[index].flags &= ~VF_OPEN; /* reset our flags */ 520 return 0; 521 522 case VINUM_SD_TYPE: 523 if ((Volno(dev) >= vinum_conf.volumes_allocated) || /* no such volume */ 524 (Plexno(dev) >= vinum_conf.plexes_allocated)) /* or no such plex */ 525 return ENXIO; /* no such device */ 526 /* FALLTHROUGH */ 527 528 case VINUM_RAWSD_TYPE: 529 index = Sdno(dev); /* get the subdisk number */ 530 if (index >= vinum_conf.subdisks_allocated) 531 return ENXIO; /* no such device */ 532 SD[index].flags &= ~VF_OPEN; /* reset our flags */ 533 return 0; 534 535 case VINUM_SUPERDEV_TYPE: 536 /* 537 * don't worry about whether we're root: 538 * nobody else would get this far. 539 */ 540 if (devminor == VINUM_SUPERDEV) /* normal superdev */ 541 vinum_conf.flags &= ~VF_OPEN; /* no longer open */ 542 else if (devminor == VINUM_DAEMON_DEV) { /* the daemon device */ 543 vinum_conf.flags &= ~VF_DAEMONOPEN; /* no longer open */ 544 if (vinum_conf.flags & VF_STOPPING) /* we're stopping, */ 545 wakeup(&vinumclose); /* we can continue stopping now */ 546 } 547 return 0; 548 549 case VINUM_DRIVE_TYPE: 550 default: 551 return ENODEV; /* don't know what to do with these */ 552 } 553 } 554 555 /* size routine */ 556 int 557 vinumsize(struct dev_psize_args *ap) 558 { 559 cdev_t dev = ap->a_head.a_dev; 560 struct volume *vol; 561 562 vol = &VOL[Volno(dev)]; 563 564 if (vol->state == volume_up) { 565 ap->a_result = (int64_t)vol->size; 566 return(0); 567 } else { 568 return(ENXIO); 569 } 570 } 571 572 int 573 vinumdump(struct dev_dump_args *ap) 574 { 575 /* Not implemented. */ 576 return ENXIO; 577 } 578 579 void 580 vinumfilt_detach(struct knote *kn) {} 581 582 int 583 vinumfilt_rd(struct knote *kn, long hint) 584 { 585 cdev_t dev = (cdev_t)kn->kn_hook; 586 587 if (seltrue(dev, POLLIN | POLLRDNORM)) 588 return (1); 589 590 return (0); 591 } 592 593 int 594 vinumfilt_wr(struct knote *kn, long hint) 595 { 596 /* Writing is always OK */ 597 return (1); 598 } 599 600 struct filterops vinumfiltops_rd = 601 { FILTEROP_ISFD, NULL, vinumfilt_detach, vinumfilt_rd }; 602 struct filterops vinumfiltops_wr = 603 { FILTEROP_ISFD, NULL, vinumfilt_detach, vinumfilt_wr }; 604 605 int 606 vinumkqfilter(struct dev_kqfilter_args *ap) 607 { 608 if (ap->a_kn->kn_filter == EVFILT_READ) { 609 ap->a_kn->kn_fop = &vinumfiltops_rd; 610 ap->a_kn->kn_hook = (caddr_t)ap->a_head.a_dev; 611 ap->a_result = 0; 612 } else if (ap->a_kn->kn_filter == EVFILT_WRITE) { 613 ap->a_kn->kn_fop = &vinumfiltops_wr; 614 ap->a_result = 0; 615 } else { 616 ap->a_result = EOPNOTSUPP; 617 } 618 619 return (0); 620 } 621