1 /*- 2 * Copyright (c) 1997, 1998 3 * Nan Yang Computer Services Limited. All rights reserved. 4 * 5 * This software is distributed under the so-called ``Berkeley 6 * License'': 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by Nan Yang Computer 19 * Services Limited. 20 * 4. Neither the name of the Company nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * This software is provided ``as is'', and any express or implied 25 * warranties, including, but not limited to, the implied warranties of 26 * merchantability and fitness for a particular purpose are disclaimed. 27 * In no event shall the company or contributors be liable for any 28 * direct, indirect, incidental, special, exemplary, or consequential 29 * damages (including, but not limited to, procurement of substitute 30 * goods or services; loss of use, data, or profits; or business 31 * interruption) however caused and on any theory of liability, whether 32 * in contract, strict liability, or tort (including negligence or 33 * otherwise) arising in any way out of the use of this software, even if 34 * advised of the possibility of such damage. 35 * 36 * $Id: vinumio.c,v 1.30 2000/05/10 23:23:30 grog Exp grog $ 37 * $FreeBSD: src/sys/dev/vinum/vinumio.c,v 1.52.2.6 2002/05/02 08:43:44 grog Exp $ 38 * $DragonFly: src/sys/dev/raid/vinum/vinumio.c,v 1.5 2003/08/07 21:17:09 dillon Exp $ 39 */ 40 41 #include "vinumhdr.h" 42 #include "request.h" 43 #include <vm/vm_zone.h> 44 45 static char *sappend(char *txt, char *s); 46 static int drivecmp(const void *va, const void *vb); 47 48 /* 49 * Open the device associated with the drive, and set drive's vp. 50 * Return an error number 51 */ 52 int 53 open_drive(struct drive *drive, struct proc *p, int verbose) 54 { 55 int devmajor; /* major devs for disk device */ 56 int devminor; /* minor devs for disk device */ 57 int unit; 58 char *dname; 59 60 if (bcmp(drive->devicename, "/dev/", 5)) /* device name doesn't start with /dev */ 61 return ENOENT; /* give up */ 62 if (drive->flags & VF_OPEN) /* open already, */ 63 return EBUSY; /* don't do it again */ 64 65 /* 66 * Yes, Bruce, I know this is horrible, but we 67 * don't have a root file system when we first 68 * try to do this. If you can come up with a 69 * better solution, I'd really like it. I'm 70 * just putting it in now to add ammuntion to 71 * moving the system to devfs. 72 */ 73 dname = &drive->devicename[5]; 74 drive->dev = NULL; /* no device yet */ 75 76 /* Find the device */ 77 if (bcmp(dname, "ad", 2) == 0) /* IDE disk */ 78 devmajor = 116; 79 else if (bcmp(dname, "wd", 2) == 0) /* IDE disk */ 80 devmajor = 3; 81 else if (bcmp(dname, "da", 2) == 0) 82 devmajor = 13; 83 else if (bcmp(dname, "vn", 2) == 0) 84 devmajor = 43; 85 else if (bcmp(dname, "md", 2) == 0) 86 devmajor = 95; 87 else if (bcmp(dname, "amrd", 4) == 0) { 88 devmajor = 133; 89 dname += 2; 90 } else if (bcmp(dname, "mlxd", 4) == 0) { 91 devmajor = 131; 92 dname += 2; 93 } else if (bcmp(dname, "idad", 4) == 0) { 94 devmajor = 109; 95 dname += 2; 96 } else if (bcmp(dname, "twed", 4) == 0) { /* 3ware raid */ 97 devmajor = 147; 98 dname += 2; 99 } else 100 return ENODEV; 101 dname += 2; /* point past */ 102 103 /* 104 * Found the device. We can expect one of 105 * two formats for the rest: a unit number, 106 * then either a partition letter for the 107 * compatiblity partition (e.g. h) or a 108 * slice ID and partition (e.g. s2e). 109 * Create a minor number for each of them. 110 */ 111 unit = 0; 112 while ((*dname >= '0') /* unit number */ 113 &&(*dname <= '9')) { 114 unit = unit * 10 + *dname - '0'; 115 dname++; 116 } 117 118 if (*dname == 's') { /* slice */ 119 if (((dname[1] < '1') || (dname[1] > '4')) /* invalid slice */ 120 ||((dname[2] < 'a') || (dname[2] > 'h'))) /* or invalid partition */ 121 return ENODEV; 122 devminor = ((unit & 31) << 3) /* unit */ 123 +(dname[2] - 'a') /* partition */ 124 +((dname[1] - '0' + 1) << 16) /* slice */ 125 +((unit & ~31) << 16); /* high-order unit bits */ 126 } else { /* compatibility partition */ 127 if ((*dname < 'a') || (*dname > 'h')) /* or invalid partition */ 128 return ENODEV; 129 devminor = (*dname - 'a') /* partition */ 130 +((unit & 31) << 3) /* unit */ 131 +((unit & ~31) << 16); /* high-order unit bits */ 132 } 133 134 if ((devminor & 7) == 2) /* partition c */ 135 return ENOTTY; /* not buying that */ 136 137 drive->dev = makedev(devmajor, devminor); /* find the device */ 138 if (drive->dev == NULL) /* didn't find anything */ 139 return ENODEV; 140 141 drive->dev->si_iosize_max = DFLTPHYS; 142 if (dev_dport(drive->dev) == NULL) 143 drive->lasterror = ENOENT; 144 else 145 drive->lasterror = dev_dopen(drive->dev, FWRITE, 0, NULL); 146 147 if (drive->lasterror != 0) { /* failed */ 148 drive->state = drive_down; /* just force it down */ 149 if (verbose) 150 log(LOG_WARNING, 151 "vinum open_drive %s: failed with error %d\n", 152 drive->devicename, drive->lasterror); 153 } else 154 drive->flags |= VF_OPEN; /* we're open now */ 155 156 return drive->lasterror; 157 } 158 159 /* 160 * Set some variables in the drive struct 161 * in more convenient form. Return error indication 162 */ 163 int 164 set_drive_parms(struct drive *drive) 165 { 166 drive->blocksize = BLKDEV_IOSIZE; /* do we need this? */ 167 drive->secsperblock = drive->blocksize /* number of sectors per block */ 168 / drive->partinfo.disklab->d_secsize; 169 170 /* Now update the label part */ 171 bcopy(hostname, drive->label.sysname, VINUMHOSTNAMELEN); /* put in host name */ 172 getmicrotime(&drive->label.date_of_birth); /* and current time */ 173 drive->label.drive_size = ((u_int64_t) drive->partinfo.part->p_size) /* size of the drive in bytes */ 174 *((u_int64_t) drive->partinfo.disklab->d_secsize); 175 #if VINUMDEBUG 176 if (debug & DEBUG_BIGDRIVE) /* pretend we're 100 times as big */ 177 drive->label.drive_size *= 100; 178 #endif 179 180 /* number of sectors available for subdisks */ 181 drive->sectors_available = drive->label.drive_size / DEV_BSIZE - DATASTART; 182 183 /* 184 * Bug in 3.0 as of January 1998: you can open 185 * non-existent slices. They have a length of 0. 186 */ 187 if (drive->label.drive_size < MINVINUMSLICE) { /* too small to worry about */ 188 set_drive_state(drive->driveno, drive_down, setstate_force); 189 drive->lasterror = ENOSPC; 190 return ENOSPC; 191 } 192 drive->freelist_size = INITIAL_DRIVE_FREELIST; /* initial number of entries */ 193 drive->freelist = (struct drive_freelist *) 194 Malloc(INITIAL_DRIVE_FREELIST * sizeof(struct drive_freelist)); 195 if (drive->freelist == NULL) /* can't malloc, dammit */ 196 return ENOSPC; 197 drive->freelist_entries = 1; /* just (almost) the complete drive */ 198 drive->freelist[0].offset = DATASTART; /* starts here */ 199 drive->freelist[0].sectors = (drive->label.drive_size >> DEV_BSHIFT) - DATASTART; /* and it's this long */ 200 if (drive->label.name[0] != '\0') /* got a name */ 201 set_drive_state(drive->driveno, drive_up, setstate_force); /* our drive is accessible */ 202 else /* we know about it, but that's all */ 203 drive->state = drive_referenced; 204 return 0; 205 } 206 207 /* 208 * Initialize a drive: open the device and add device 209 * information 210 */ 211 int 212 init_drive(struct drive *drive, int verbose) 213 { 214 if (drive->devicename[0] != '/') { 215 drive->lasterror = EINVAL; 216 log(LOG_ERR, "vinum: Can't open drive without drive name\n"); 217 return EINVAL; 218 } 219 drive->lasterror = open_drive(drive, curproc, verbose); /* open the drive */ 220 if (drive->lasterror) 221 return drive->lasterror; 222 223 drive->lasterror = dev_dioctl( 224 drive->dev, 225 DIOCGPART, 226 (caddr_t) & drive->partinfo, 227 FREAD, 228 curthread); 229 if (drive->lasterror) { 230 if (verbose) 231 log(LOG_WARNING, 232 "vinum open_drive %s: Can't get partition information, drive->lasterror %d\n", 233 drive->devicename, 234 drive->lasterror); 235 close_drive(drive); 236 return drive->lasterror; 237 } 238 if (drive->partinfo.part->p_fstype != FS_VINUM) { /* not Vinum */ 239 drive->lasterror = EFTYPE; 240 if (verbose) 241 log(LOG_WARNING, 242 "vinum open_drive %s: Wrong partition type for vinum\n", 243 drive->devicename); 244 close_drive(drive); 245 return EFTYPE; 246 } 247 return set_drive_parms(drive); /* set various odds and ends */ 248 } 249 250 /* Close a drive if it's open. */ 251 void 252 close_drive(struct drive *drive) 253 { 254 LOCKDRIVE(drive); /* keep the daemon out */ 255 if (drive->flags & VF_OPEN) 256 close_locked_drive(drive); /* and close it */ 257 if (drive->state > drive_down) /* if it's up */ 258 drive->state = drive_down; /* make sure it's down */ 259 unlockdrive(drive); 260 } 261 262 /* 263 * Real drive close code, called with drive already locked. 264 * We have also checked that the drive is open. No errors. 265 */ 266 void 267 close_locked_drive(struct drive *drive) 268 { 269 /* 270 * If we can't access the drive, we can't flush 271 * the queues, which spec_close() will try to 272 * do. Get rid of them here first. 273 */ 274 drive->lasterror = dev_dclose(drive->dev, 0, 0, NULL); 275 drive->flags &= ~VF_OPEN; /* no longer open */ 276 } 277 278 /* 279 * Remove drive from the configuration. 280 * Caller must ensure that it isn't active. 281 */ 282 void 283 remove_drive(int driveno) 284 { 285 struct drive *drive = &vinum_conf.drive[driveno]; 286 struct vinum_hdr *vhdr; /* buffer for header */ 287 int error; 288 289 if (drive->state > drive_referenced) { /* real drive */ 290 if (drive->state == drive_up) { 291 vhdr = (struct vinum_hdr *) Malloc(VINUMHEADERLEN); /* allocate buffer */ 292 CHECKALLOC(vhdr, "Can't allocate memory"); 293 error = read_drive(drive, (void *) vhdr, VINUMHEADERLEN, VINUM_LABEL_OFFSET); 294 if (error) 295 drive->lasterror = error; 296 else { 297 vhdr->magic = VINUM_NOMAGIC; /* obliterate the magic, but leave the rest */ 298 write_drive(drive, (void *) vhdr, VINUMHEADERLEN, VINUM_LABEL_OFFSET); 299 } 300 Free(vhdr); 301 } 302 free_drive(drive); /* close it and free resources */ 303 save_config(); /* and save the updated configuration */ 304 } 305 } 306 307 /* 308 * Transfer drive data. Usually called from one of these defines; 309 * #define read_drive(a, b, c, d) driveio (a, b, c, d, B_READ) 310 * #define write_drive(a, b, c, d) driveio (a, b, c, d, B_WRITE) 311 * 312 * length and offset are in bytes, but must be multiples of sector 313 * size. The function *does not check* for this condition, and 314 * truncates ruthlessly. 315 * Return error number 316 */ 317 int 318 driveio(struct drive *drive, char *buf, size_t length, off_t offset, int flag) 319 { 320 int error; 321 struct buf *bp; 322 323 error = 0; /* to keep the compiler happy */ 324 while (length) { /* divide into small enough blocks */ 325 int len = min(length, MAXBSIZE); /* maximum block device transfer is MAXBSIZE */ 326 327 bp = geteblk(len); /* get a buffer header */ 328 bp->b_flags = flag; 329 bp->b_dev = drive->dev; /* device */ 330 bp->b_blkno = offset / drive->partinfo.disklab->d_secsize; /* block number */ 331 bp->b_saveaddr = bp->b_data; 332 bp->b_data = buf; 333 bp->b_bcount = len; 334 BUF_STRATEGY(bp, 0); /* initiate the transfer */ 335 error = biowait(bp); 336 bp->b_data = bp->b_saveaddr; 337 bp->b_flags |= B_INVAL | B_AGE; 338 bp->b_flags &= ~B_ERROR; 339 brelse(bp); 340 if (error) 341 break; 342 length -= len; /* update pointers */ 343 buf += len; 344 offset += len; 345 } 346 return error; 347 } 348 349 /* 350 * Check a drive for a vinum header. If found, 351 * update the drive information. We come here 352 * with a partially populated drive structure 353 * which includes the device name. 354 * 355 * Return information on what we found. 356 * 357 * This function is called from two places: check_drive, 358 * which wants to find out whether the drive is a 359 * Vinum drive, and config_drive, which asserts that 360 * it is a vinum drive. In the first case, we don't 361 * print error messages (verbose==0), in the second 362 * we do (verbose==1). 363 */ 364 enum drive_label_info 365 read_drive_label(struct drive *drive, int verbose) 366 { 367 int error; 368 int result; /* result of our search */ 369 struct vinum_hdr *vhdr; /* and as header */ 370 371 error = init_drive(drive, 0); /* find the drive */ 372 if (error) /* find the drive */ 373 return DL_CANT_OPEN; /* not ours */ 374 375 vhdr = (struct vinum_hdr *) Malloc(VINUMHEADERLEN); /* allocate buffers */ 376 CHECKALLOC(vhdr, "Can't allocate memory"); 377 378 drive->state = drive_up; /* be optimistic */ 379 error = read_drive(drive, (void *) vhdr, VINUMHEADERLEN, VINUM_LABEL_OFFSET); 380 if (vhdr->magic == VINUM_MAGIC) { /* ours! */ 381 if (drive->label.name[0] /* we have a name for this drive */ 382 &&(strcmp(drive->label.name, vhdr->label.name))) { /* but it doesn't match the real name */ 383 drive->lasterror = EINVAL; 384 result = DL_WRONG_DRIVE; /* it's the wrong drive */ 385 drive->state = drive_unallocated; /* put it back, it's not ours */ 386 } else 387 result = DL_OURS; 388 /* 389 * We copy the drive anyway so that we have 390 * the correct name in the drive info. This 391 * may not be the name specified 392 */ 393 drive->label = vhdr->label; /* put in the label information */ 394 } else if (vhdr->magic == VINUM_NOMAGIC) /* was ours, but we gave it away */ 395 result = DL_DELETED_LABEL; /* and return the info */ 396 else 397 result = DL_NOT_OURS; /* we could have it, but we don't yet */ 398 Free(vhdr); /* that's all. */ 399 return result; 400 } 401 402 /* 403 * Check a drive for a vinum header. If found, 404 * read configuration information from the drive and 405 * incorporate the data into the configuration. 406 * 407 * Return drive number. 408 */ 409 struct drive * 410 check_drive(char *devicename) 411 { 412 int driveno; 413 int i; 414 struct drive *drive; 415 416 driveno = find_drive_by_dev(devicename, 1); /* if entry doesn't exist, create it */ 417 drive = &vinum_conf.drive[driveno]; /* and get a pointer */ 418 419 if (read_drive_label(drive, 0) == DL_OURS) { /* one of ours */ 420 for (i = 0; i < vinum_conf.drives_allocated; i++) { /* see if the name already exists */ 421 if ((i != driveno) /* not this drive */ 422 &&(DRIVE[i].state != drive_unallocated) /* and it's allocated */ 423 &&(strcmp(DRIVE[i].label.name, 424 DRIVE[driveno].label.name) == 0)) { /* and it has the same name */ 425 struct drive *mydrive = &DRIVE[i]; 426 427 if (mydrive->devicename[0] == '/') { /* we know a device name for it */ 428 /* 429 * set an error, but don't take the 430 * drive down: that would cause unneeded 431 * error messages. 432 */ 433 drive->lasterror = EEXIST; 434 break; 435 } else { /* it's just a place holder, */ 436 int sdno; 437 438 for (sdno = 0; sdno < vinum_conf.subdisks_allocated; sdno++) { /* look at each subdisk */ 439 if ((SD[sdno].driveno == i) /* it's pointing to this one, */ 440 &&(SD[sdno].state != sd_unallocated)) { /* and it's a real subdisk */ 441 SD[sdno].driveno = drive->driveno; /* point to the one we found */ 442 update_sd_state(sdno); /* and update its state */ 443 } 444 } 445 bzero(mydrive, sizeof(struct drive)); /* don't deallocate it, just remove it */ 446 } 447 } 448 } 449 } else { 450 if (drive->lasterror == 0) 451 drive->lasterror = ENODEV; 452 close_drive(drive); 453 drive->state = drive_down; 454 } 455 return drive; 456 } 457 458 static char * 459 sappend(char *txt, char *s) 460 { 461 while ((*s++ = *txt++) != 0); 462 return s - 1; 463 } 464 465 void 466 format_config(char *config, int len) 467 { 468 int i; 469 int j; 470 char *s = config; 471 char *configend = &config[len]; 472 473 bzero(config, len); 474 475 /* First write the volume configuration */ 476 for (i = 0; i < vinum_conf.volumes_allocated; i++) { 477 struct volume *vol; 478 479 vol = &vinum_conf.volume[i]; 480 if ((vol->state > volume_uninit) 481 && (vol->name[0] != '\0')) { /* paranoia */ 482 snprintf(s, 483 configend - s, 484 "volume %s state %s", 485 vol->name, 486 volume_state(vol->state)); 487 while (*s) 488 s++; /* find the end */ 489 if (vol->preferred_plex >= 0) /* preferences, */ 490 snprintf(s, 491 configend - s, 492 " readpol prefer %s", 493 vinum_conf.plex[vol->preferred_plex].name); 494 while (*s) 495 s++; /* find the end */ 496 s = sappend("\n", s); 497 } 498 } 499 500 /* Then the plex configuration */ 501 for (i = 0; i < vinum_conf.plexes_allocated; i++) { 502 struct plex *plex; 503 504 plex = &vinum_conf.plex[i]; 505 if ((plex->state > plex_referenced) 506 && (plex->name[0] != '\0')) { /* paranoia */ 507 snprintf(s, 508 configend - s, 509 "plex name %s state %s org %s ", 510 plex->name, 511 plex_state(plex->state), 512 plex_org(plex->organization)); 513 while (*s) 514 s++; /* find the end */ 515 if (isstriped(plex)) { 516 snprintf(s, 517 configend - s, 518 "%ds ", 519 (int) plex->stripesize); 520 while (*s) 521 s++; /* find the end */ 522 } 523 if (plex->volno >= 0) /* we have a volume */ 524 snprintf(s, 525 configend - s, 526 "vol %s ", 527 vinum_conf.volume[plex->volno].name); 528 while (*s) 529 s++; /* find the end */ 530 for (j = 0; j < plex->subdisks; j++) { 531 snprintf(s, 532 configend - s, 533 " sd %s", 534 vinum_conf.sd[plex->sdnos[j]].name); 535 } 536 s = sappend("\n", s); 537 } 538 } 539 540 /* And finally the subdisk configuration */ 541 for (i = 0; i < vinum_conf.subdisks_allocated; i++) { 542 struct sd *sd; 543 char *drivename; 544 545 sd = &SD[i]; 546 if ((sd->state != sd_referenced) 547 && (sd->state != sd_unallocated) 548 && (sd->name[0] != '\0')) { /* paranoia */ 549 drivename = vinum_conf.drive[sd->driveno].label.name; 550 /* 551 * XXX We've seen cases of dead subdisks 552 * which don't have a drive. If we let them 553 * through here, the drive name is null, so 554 * they get the drive named 'plex'. 555 * 556 * This is a breakage limiter, not a fix. 557 */ 558 if (drivename[0] == '\0') 559 drivename = "*invalid*"; 560 snprintf(s, 561 configend - s, 562 "sd name %s drive %s plex %s len %llus driveoffset %llus state %s", 563 sd->name, 564 drivename, 565 vinum_conf.plex[sd->plexno].name, 566 (unsigned long long) sd->sectors, 567 (unsigned long long) sd->driveoffset, 568 sd_state(sd->state)); 569 while (*s) 570 s++; /* find the end */ 571 if (sd->plexno >= 0) 572 snprintf(s, 573 configend - s, 574 " plexoffset %llds", 575 (long long) sd->plexoffset); 576 else 577 snprintf(s, configend - s, " detached"); 578 while (*s) 579 s++; /* find the end */ 580 if (sd->flags & VF_RETRYERRORS) { 581 snprintf(s, configend - s, " retryerrors"); 582 while (*s) 583 s++; /* find the end */ 584 } 585 snprintf(s, configend - s, " \n"); 586 while (*s) 587 s++; /* find the end */ 588 } 589 } 590 if (s > &config[len - 2]) 591 panic("vinum: configuration data overflow"); 592 } 593 594 /* 595 * issue a save config request to the d�mon. The actual work 596 * is done in process context by daemon_save_config 597 */ 598 void 599 save_config(void) 600 { 601 queue_daemon_request(daemonrq_saveconfig, (union daemoninfo) NULL); 602 } 603 604 /* 605 * Write the configuration to all vinum slices. This 606 * is performed by the d�mon only 607 */ 608 void 609 daemon_save_config(void) 610 { 611 int error; 612 int written_config; /* set when we first write the config to disk */ 613 int driveno; 614 struct drive *drive; /* point to current drive info */ 615 struct vinum_hdr *vhdr; /* and as header */ 616 char *config; /* point to config data */ 617 int wlabel_on; /* to set writing label on/off */ 618 619 /* don't save the configuration while we're still working on it */ 620 if (vinum_conf.flags & VF_CONFIGURING) 621 return; 622 written_config = 0; /* no config written yet */ 623 /* Build a volume header */ 624 vhdr = (struct vinum_hdr *) Malloc(VINUMHEADERLEN); /* get space for the config data */ 625 CHECKALLOC(vhdr, "Can't allocate config data"); 626 vhdr->magic = VINUM_MAGIC; /* magic number */ 627 vhdr->config_length = MAXCONFIG; /* length of following config info */ 628 629 config = Malloc(MAXCONFIG); /* get space for the config data */ 630 CHECKALLOC(config, "Can't allocate config data"); 631 632 format_config(config, MAXCONFIG); 633 error = 0; /* no errors yet */ 634 for (driveno = 0; driveno < vinum_conf.drives_allocated; driveno++) { 635 drive = &vinum_conf.drive[driveno]; /* point to drive */ 636 if (drive->state > drive_referenced) { 637 LOCKDRIVE(drive); /* don't let it change */ 638 639 /* 640 * First, do some drive consistency checks. Some 641 * of these are kludges, others require a process 642 * context and couldn't be done before 643 */ 644 if ((drive->devicename[0] == '\0') 645 || (drive->label.name[0] == '\0')) { 646 unlockdrive(drive); 647 free_drive(drive); /* get rid of it */ 648 break; 649 } 650 if (((drive->flags & VF_OPEN) == 0) /* drive not open */ 651 &&(drive->state > drive_down)) { /* and it thinks it's not down */ 652 unlockdrive(drive); 653 set_drive_state(driveno, drive_down, setstate_force); /* tell it what's what */ 654 continue; 655 } 656 if ((drive->state == drive_down) /* it's down */ 657 &&(drive->flags & VF_OPEN)) { /* but open, */ 658 unlockdrive(drive); 659 close_drive(drive); /* close it */ 660 } else if (drive->state > drive_down) { 661 getmicrotime(&drive->label.last_update); /* time of last update is now */ 662 bcopy((char *) &drive->label, /* and the label info from the drive structure */ 663 (char *) &vhdr->label, 664 sizeof(vhdr->label)); 665 if ((drive->state != drive_unallocated) 666 && (drive->state != drive_referenced)) { /* and it's a real drive */ 667 wlabel_on = 1; /* enable writing the label */ 668 error = dev_dioctl(drive->dev, /* make the label writeable */ 669 DIOCWLABEL, 670 (caddr_t) & wlabel_on, 671 FWRITE, 672 curthread); 673 if (error == 0) 674 error = write_drive(drive, (char *) vhdr, VINUMHEADERLEN, VINUM_LABEL_OFFSET); 675 if (error == 0) 676 error = write_drive(drive, config, MAXCONFIG, VINUM_CONFIG_OFFSET); /* first config copy */ 677 if (error == 0) 678 error = write_drive(drive, config, MAXCONFIG, VINUM_CONFIG_OFFSET + MAXCONFIG); /* second copy */ 679 wlabel_on = 0; /* enable writing the label */ 680 if (error == 0) 681 error = dev_dioctl(drive->dev, /* make the label non-writeable again */ 682 DIOCWLABEL, 683 (caddr_t) & wlabel_on, 684 FWRITE, 685 curthread); 686 unlockdrive(drive); 687 if (error) { 688 log(LOG_ERR, 689 "vinum: Can't write config to %s, error %d\n", 690 drive->devicename, 691 error); 692 set_drive_state(drive->driveno, drive_down, setstate_force); 693 } else 694 written_config = 1; /* we've written it on at least one drive */ 695 } 696 } else /* not worth looking at, */ 697 unlockdrive(drive); /* just unlock it again */ 698 } 699 } 700 Free(vhdr); 701 Free(config); 702 } 703 704 /* 705 * Disk labels are a mess. The correct way to 706 * access them is with the DIOC[GSW]DINFO ioctls, 707 * but some programs, such as newfs, access the 708 * disk directly, so we have to write things 709 * there. We do this only on request. If a user 710 * request tries to read it directly, we fake up 711 * one on the fly. 712 */ 713 714 /* 715 * get_volume_label returns a label structure to lp, which 716 * is allocated by the caller 717 */ 718 void 719 get_volume_label(char *name, int plexes, u_int64_t size, struct disklabel *lp) 720 { 721 bzero(lp, sizeof(struct disklabel)); 722 723 strncpy(lp->d_typename, "vinum", sizeof(lp->d_typename)); 724 lp->d_type = DTYPE_VINUM; 725 strncpy(lp->d_packname, name, min(sizeof(lp->d_packname), sizeof(name))); 726 lp->d_rpm = 14400 * plexes; /* to keep them guessing */ 727 lp->d_interleave = 1; 728 lp->d_flags = 0; 729 730 /* 731 * A Vinum volume has a single track with all 732 * its sectors. 733 */ 734 lp->d_secsize = DEV_BSIZE; /* bytes per sector */ 735 lp->d_nsectors = size; /* data sectors per track */ 736 lp->d_ntracks = 1; /* tracks per cylinder */ 737 lp->d_ncylinders = 1; /* data cylinders per unit */ 738 lp->d_secpercyl = size; /* data sectors per cylinder */ 739 lp->d_secperunit = size; /* data sectors per unit */ 740 741 lp->d_bbsize = BBSIZE; 742 lp->d_sbsize = SBSIZE; 743 744 lp->d_magic = DISKMAGIC; 745 lp->d_magic2 = DISKMAGIC; 746 747 /* 748 * Set up partitions a, b and c to be identical 749 * and the size of the volume. a is UFS, b is 750 * swap, c is nothing. 751 */ 752 lp->d_partitions[0].p_size = size; 753 lp->d_partitions[0].p_fsize = 1024; 754 lp->d_partitions[0].p_fstype = FS_BSDFFS; /* FreeBSD File System :-) */ 755 lp->d_partitions[0].p_fsize = 1024; /* FS fragment size */ 756 lp->d_partitions[0].p_frag = 8; /* and fragments per block */ 757 lp->d_partitions[SWAP_PART].p_size = size; 758 lp->d_partitions[SWAP_PART].p_fstype = FS_SWAP; /* swap partition */ 759 lp->d_partitions[LABEL_PART].p_size = size; 760 lp->d_npartitions = LABEL_PART + 1; 761 strncpy(lp->d_packname, name, min(sizeof(lp->d_packname), sizeof(name))); 762 lp->d_checksum = dkcksum(lp); 763 } 764 765 /* Write a volume label. This implements the VINUM_LABEL ioctl. */ 766 int 767 write_volume_label(int volno) 768 { 769 struct disklabel *lp; 770 struct buf *bp; 771 struct disklabel *dlp; 772 struct volume *vol; 773 int error; 774 775 lp = (struct disklabel *) Malloc((sizeof(struct disklabel) + (DEV_BSIZE - 1)) & (DEV_BSIZE - 1)); 776 if (lp == 0) 777 return ENOMEM; 778 779 if ((unsigned) (volno) >= (unsigned) vinum_conf.volumes_allocated) /* invalid volume */ 780 return ENOENT; 781 782 vol = &VOL[volno]; /* volume in question */ 783 if (vol->state <= volume_uninit) /* nothing there */ 784 return ENXIO; 785 else if (vol->state < volume_up) /* not accessible */ 786 return EIO; /* I/O error */ 787 788 get_volume_label(vol->name, vol->plexes, vol->size, lp); /* get the label */ 789 790 /* 791 * Now write to disk. This code is derived from the 792 * system writedisklabel (), which does silly things 793 * like reading the label and refusing to write 794 * unless it's already there. 795 */ 796 bp = geteblk((int) lp->d_secsize); /* get a buffer */ 797 bp->b_dev = makedev(VINUM_CDEV_MAJOR, vol->volno); /* our own raw volume */ 798 bp->b_blkno = LABELSECTOR * ((int) lp->d_secsize / DEV_BSIZE); 799 bp->b_bcount = lp->d_secsize; 800 bzero(bp->b_data, lp->d_secsize); 801 dlp = (struct disklabel *) bp->b_data; 802 *dlp = *lp; 803 bp->b_flags &= ~B_INVAL; 804 bp->b_flags |= B_WRITE; 805 806 /* 807 * This should read: 808 * 809 * vinumstrategy (bp); 810 * 811 * Negotiate with phk to get it fixed. 812 */ 813 BUF_STRATEGY(bp, 0); 814 error = biowait(bp); 815 bp->b_flags |= B_INVAL | B_AGE; 816 bp->b_flags &= ~B_ERROR; 817 818 brelse(bp); 819 return error; 820 } 821 822 /* Look at all disks on the system for vinum slices */ 823 int 824 vinum_scandisk(char *devicename[], int drives) 825 { 826 struct drive *volatile drive; 827 volatile int driveno; 828 int firstdrive; /* first drive in this list */ 829 volatile int gooddrives; /* number of usable drives found */ 830 int firsttime; /* set if we have never configured before */ 831 int error; 832 char *config_text; /* read the config info from disk into here */ 833 char *volatile cptr; /* pointer into config information */ 834 char *eptr; /* end pointer into config information */ 835 char *config_line; /* copy the config line to */ 836 volatile int status; 837 int *volatile drivelist; /* list of drive indices */ 838 #define DRIVENAMELEN 64 839 #define DRIVEPARTS 35 /* max partitions per drive, excluding c */ 840 char partname[DRIVENAMELEN]; /* for creating partition names */ 841 842 status = 0; /* success indication */ 843 vinum_conf.flags |= VF_READING_CONFIG; /* reading config from disk */ 844 845 gooddrives = 0; /* number of usable drives found */ 846 firstdrive = vinum_conf.drives_used; /* the first drive */ 847 firsttime = vinum_conf.drives_used == 0; /* are we a virgin? */ 848 849 /* allocate a drive pointer list */ 850 drivelist = (int *) Malloc(drives * DRIVEPARTS * sizeof(int)); 851 CHECKALLOC(drivelist, "Can't allocate memory"); 852 853 /* Open all drives and find which was modified most recently */ 854 for (driveno = 0; driveno < drives; driveno++) { 855 char part; /* UNIX partition */ 856 int slice; 857 int founddrive; /* flag when we find a vinum drive */ 858 859 founddrive = 0; /* no vinum drive found yet on this spindle */ 860 /* first try the partition table */ 861 for (slice = 1; slice < 5; slice++) 862 for (part = 'a'; part < 'i'; part++) { 863 if (part != 'c') { /* don't do the c partition */ 864 snprintf(partname, 865 DRIVENAMELEN, 866 "%ss%d%c", 867 devicename[driveno], 868 slice, 869 part); 870 drive = check_drive(partname); /* try to open it */ 871 if ((drive->lasterror != 0) /* didn't work, */ 872 ||(drive->state != drive_up)) 873 free_drive(drive); /* get rid of it */ 874 else if (drive->flags & VF_CONFIGURED) /* already read this config, */ 875 log(LOG_WARNING, 876 "vinum: already read config from %s\n", /* say so */ 877 drive->label.name); 878 else { 879 drivelist[gooddrives] = drive->driveno; /* keep the drive index */ 880 drive->flags &= ~VF_NEWBORN; /* which is no longer newly born */ 881 gooddrives++; 882 founddrive++; 883 } 884 } 885 } 886 if (founddrive == 0) { /* didn't find anything, */ 887 for (part = 'a'; part < 'i'; part++) /* try the compatibility partition */ 888 if (part != 'c') { /* don't do the c partition */ 889 snprintf(partname, /* /dev/sd0a */ 890 DRIVENAMELEN, 891 "%s%c", 892 devicename[driveno], 893 part); 894 drive = check_drive(partname); /* try to open it */ 895 if ((drive->lasterror != 0) /* didn't work, */ 896 ||(drive->state != drive_up)) 897 free_drive(drive); /* get rid of it */ 898 else if (drive->flags & VF_CONFIGURED) /* already read this config, */ 899 log(LOG_WARNING, 900 "vinum: already read config from %s\n", /* say so */ 901 drive->label.name); 902 else { 903 drivelist[gooddrives] = drive->driveno; /* keep the drive index */ 904 drive->flags &= ~VF_NEWBORN; /* which is no longer newly born */ 905 gooddrives++; 906 } 907 } 908 } 909 } 910 911 if (gooddrives == 0) { 912 if (firsttime) 913 log(LOG_WARNING, "vinum: no drives found\n"); 914 else 915 log(LOG_INFO, "vinum: no additional drives found\n"); 916 return ENOENT; 917 } 918 /* 919 * We now have at least one drive 920 * open. Sort them in order of config time 921 * and merge the config info with what we 922 * have already. 923 */ 924 qsort(drivelist, gooddrives, sizeof(int), drivecmp); 925 config_text = (char *) Malloc(MAXCONFIG * 2); /* allocate buffers */ 926 CHECKALLOC(config_text, "Can't allocate memory"); 927 config_line = (char *) Malloc(MAXCONFIGLINE * 2); /* allocate buffers */ 928 CHECKALLOC(config_line, "Can't allocate memory"); 929 for (driveno = 0; driveno < gooddrives; driveno++) { /* now include the config */ 930 drive = &DRIVE[drivelist[driveno]]; /* point to the drive */ 931 932 if (firsttime && (driveno == 0)) /* we've never configured before, */ 933 log(LOG_INFO, "vinum: reading configuration from %s\n", drive->devicename); 934 else 935 log(LOG_INFO, "vinum: updating configuration from %s\n", drive->devicename); 936 937 if (drive->state == drive_up) 938 /* Read in both copies of the configuration information */ 939 error = read_drive(drive, config_text, MAXCONFIG * 2, VINUM_CONFIG_OFFSET); 940 else { 941 error = EIO; 942 printf("vinum_scandisk: %s is %s\n", drive->devicename, drive_state(drive->state)); 943 } 944 945 if (error != 0) { 946 log(LOG_ERR, "vinum: Can't read device %s, error %d\n", drive->devicename, error); 947 free_drive(drive); /* give it back */ 948 status = error; 949 } 950 /* 951 * At this point, check that the two copies 952 * are the same, and do something useful if 953 * not. In particular, consider which is 954 * newer, and what this means for the 955 * integrity of the data on the drive. 956 */ 957 else { 958 vinum_conf.drives_used++; /* another drive in use */ 959 /* Parse the configuration, and add it to the global configuration */ 960 for (cptr = config_text; *cptr != '\0';) { /* love this style(9) */ 961 volatile int parse_status; /* return value from parse_config */ 962 963 for (eptr = config_line; (*cptr != '\n') && (*cptr != '\0');) /* until the end of the line */ 964 *eptr++ = *cptr++; 965 *eptr = '\0'; /* and delimit */ 966 if (setjmp(command_fail) == 0) { /* come back here on error and continue */ 967 parse_status = parse_config(config_line, &keyword_set, 1); /* parse the config line */ 968 if (parse_status < 0) { /* error in config */ 969 /* 970 * This config should have been parsed in user 971 * space. If we run into problems here, something 972 * serious is afoot. Complain and let the user 973 * snarf the config to see what's wrong. 974 */ 975 log(LOG_ERR, 976 "vinum: Config error on %s, aborting integration\n", 977 drive->devicename); 978 free_drive(drive); /* give it back */ 979 status = EINVAL; 980 } 981 } 982 while (*cptr == '\n') 983 cptr++; /* skip to next line */ 984 } 985 } 986 drive->flags |= VF_CONFIGURED; /* read this drive's configuration */ 987 } 988 989 Free(config_text); 990 Free(drivelist); 991 vinum_conf.flags &= ~VF_READING_CONFIG; /* no longer reading from disk */ 992 if (status != 0) 993 printf("vinum: couldn't read configuration"); 994 else 995 updateconfig(VF_READING_CONFIG); /* update from disk config */ 996 return status; 997 } 998 999 /* 1000 * Compare the modification dates of the drives, for qsort. 1001 * Return 1 if a < b, 0 if a == b, 01 if a > b: in other 1002 * words, sort backwards. 1003 */ 1004 int 1005 drivecmp(const void *va, const void *vb) 1006 { 1007 const struct drive *a = &DRIVE[*(const int *) va]; 1008 const struct drive *b = &DRIVE[*(const int *) vb]; 1009 1010 if ((a->label.last_update.tv_sec == b->label.last_update.tv_sec) 1011 && (a->label.last_update.tv_usec == b->label.last_update.tv_usec)) 1012 return 0; 1013 else if ((a->label.last_update.tv_sec > b->label.last_update.tv_sec) 1014 || ((a->label.last_update.tv_sec == b->label.last_update.tv_sec) 1015 && (a->label.last_update.tv_usec > b->label.last_update.tv_usec))) 1016 return -1; 1017 else 1018 return 1; 1019 } 1020 /* Local Variables: */ 1021 /* fill-column: 50 */ 1022 /* End: */ 1023