1 /*- 2 * Copyright (c) 1997, 1998 3 * Nan Yang Computer Services Limited. All rights reserved. 4 * 5 * This software is distributed under the so-called ``Berkeley 6 * License'': 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by Nan Yang Computer 19 * Services Limited. 20 * 4. Neither the name of the Company nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * This software is provided ``as is'', and any express or implied 25 * warranties, including, but not limited to, the implied warranties of 26 * merchantability and fitness for a particular purpose are disclaimed. 27 * In no event shall the company or contributors be liable for any 28 * direct, indirect, incidental, special, exemplary, or consequential 29 * damages (including, but not limited to, procurement of substitute 30 * goods or services; loss of use, data, or profits; or business 31 * interruption) however caused and on any theory of liability, whether 32 * in contract, strict liability, or tort (including negligence or 33 * otherwise) arising in any way out of the use of this software, even if 34 * advised of the possibility of such damage. 35 * 36 * $Id: vinumio.c,v 1.30 2000/05/10 23:23:30 grog Exp grog $ 37 * $FreeBSD: src/sys/dev/vinum/vinumio.c,v 1.52.2.6 2002/05/02 08:43:44 grog Exp $ 38 * $DragonFly: src/sys/dev/raid/vinum/vinumio.c,v 1.2 2003/06/17 04:28:33 dillon Exp $ 39 */ 40 41 #include <dev/vinum/vinumhdr.h> 42 #include <dev/vinum/request.h> 43 #include <vm/vm_zone.h> 44 45 static char *sappend(char *txt, char *s); 46 static int drivecmp(const void *va, const void *vb); 47 48 /* 49 * Open the device associated with the drive, and set drive's vp. 50 * Return an error number 51 */ 52 int 53 open_drive(struct drive *drive, struct proc *p, int verbose) 54 { 55 int devmajor; /* major devs for disk device */ 56 int devminor; /* minor devs for disk device */ 57 int unit; 58 char *dname; 59 struct cdevsw *dsw; /* pointer to cdevsw entry */ 60 61 if (bcmp(drive->devicename, "/dev/", 5)) /* device name doesn't start with /dev */ 62 return ENOENT; /* give up */ 63 if (drive->flags & VF_OPEN) /* open already, */ 64 return EBUSY; /* don't do it again */ 65 66 /* 67 * Yes, Bruce, I know this is horrible, but we 68 * don't have a root file system when we first 69 * try to do this. If you can come up with a 70 * better solution, I'd really like it. I'm 71 * just putting it in now to add ammuntion to 72 * moving the system to devfs. 73 */ 74 dname = &drive->devicename[5]; 75 drive->dev = NULL; /* no device yet */ 76 77 /* Find the device */ 78 if (bcmp(dname, "ad", 2) == 0) /* IDE disk */ 79 devmajor = 116; 80 else if (bcmp(dname, "wd", 2) == 0) /* IDE disk */ 81 devmajor = 3; 82 else if (bcmp(dname, "da", 2) == 0) 83 devmajor = 13; 84 else if (bcmp(dname, "vn", 2) == 0) 85 devmajor = 43; 86 else if (bcmp(dname, "md", 2) == 0) 87 devmajor = 95; 88 else if (bcmp(dname, "amrd", 4) == 0) { 89 devmajor = 133; 90 dname += 2; 91 } else if (bcmp(dname, "mlxd", 4) == 0) { 92 devmajor = 131; 93 dname += 2; 94 } else if (bcmp(dname, "idad", 4) == 0) { 95 devmajor = 109; 96 dname += 2; 97 } else if (bcmp(dname, "twed", 4) == 0) { /* 3ware raid */ 98 devmajor = 147; 99 dname += 2; 100 } else 101 return ENODEV; 102 dname += 2; /* point past */ 103 104 /* 105 * Found the device. We can expect one of 106 * two formats for the rest: a unit number, 107 * then either a partition letter for the 108 * compatiblity partition (e.g. h) or a 109 * slice ID and partition (e.g. s2e). 110 * Create a minor number for each of them. 111 */ 112 unit = 0; 113 while ((*dname >= '0') /* unit number */ 114 &&(*dname <= '9')) { 115 unit = unit * 10 + *dname - '0'; 116 dname++; 117 } 118 119 if (*dname == 's') { /* slice */ 120 if (((dname[1] < '1') || (dname[1] > '4')) /* invalid slice */ 121 ||((dname[2] < 'a') || (dname[2] > 'h'))) /* or invalid partition */ 122 return ENODEV; 123 devminor = ((unit & 31) << 3) /* unit */ 124 +(dname[2] - 'a') /* partition */ 125 +((dname[1] - '0' + 1) << 16) /* slice */ 126 +((unit & ~31) << 16); /* high-order unit bits */ 127 } else { /* compatibility partition */ 128 if ((*dname < 'a') || (*dname > 'h')) /* or invalid partition */ 129 return ENODEV; 130 devminor = (*dname - 'a') /* partition */ 131 +((unit & 31) << 3) /* unit */ 132 +((unit & ~31) << 16); /* high-order unit bits */ 133 } 134 135 if ((devminor & 7) == 2) /* partition c */ 136 return ENOTTY; /* not buying that */ 137 138 drive->dev = makedev(devmajor, devminor); /* find the device */ 139 if (drive->dev == NULL) /* didn't find anything */ 140 return ENODEV; 141 142 drive->dev->si_iosize_max = DFLTPHYS; 143 dsw = devsw(drive->dev); 144 if (dsw == NULL) 145 drive->lasterror = ENOENT; 146 else 147 drive->lasterror = (dsw->d_open) (drive->dev, FWRITE, 0, NULL); 148 149 if (drive->lasterror != 0) { /* failed */ 150 drive->state = drive_down; /* just force it down */ 151 if (verbose) 152 log(LOG_WARNING, 153 "vinum open_drive %s: failed with error %d\n", 154 drive->devicename, drive->lasterror); 155 } else 156 drive->flags |= VF_OPEN; /* we're open now */ 157 158 return drive->lasterror; 159 } 160 161 /* 162 * Set some variables in the drive struct 163 * in more convenient form. Return error indication 164 */ 165 int 166 set_drive_parms(struct drive *drive) 167 { 168 drive->blocksize = BLKDEV_IOSIZE; /* do we need this? */ 169 drive->secsperblock = drive->blocksize /* number of sectors per block */ 170 / drive->partinfo.disklab->d_secsize; 171 172 /* Now update the label part */ 173 bcopy(hostname, drive->label.sysname, VINUMHOSTNAMELEN); /* put in host name */ 174 getmicrotime(&drive->label.date_of_birth); /* and current time */ 175 drive->label.drive_size = ((u_int64_t) drive->partinfo.part->p_size) /* size of the drive in bytes */ 176 *((u_int64_t) drive->partinfo.disklab->d_secsize); 177 #if VINUMDEBUG 178 if (debug & DEBUG_BIGDRIVE) /* pretend we're 100 times as big */ 179 drive->label.drive_size *= 100; 180 #endif 181 182 /* number of sectors available for subdisks */ 183 drive->sectors_available = drive->label.drive_size / DEV_BSIZE - DATASTART; 184 185 /* 186 * Bug in 3.0 as of January 1998: you can open 187 * non-existent slices. They have a length of 0. 188 */ 189 if (drive->label.drive_size < MINVINUMSLICE) { /* too small to worry about */ 190 set_drive_state(drive->driveno, drive_down, setstate_force); 191 drive->lasterror = ENOSPC; 192 return ENOSPC; 193 } 194 drive->freelist_size = INITIAL_DRIVE_FREELIST; /* initial number of entries */ 195 drive->freelist = (struct drive_freelist *) 196 Malloc(INITIAL_DRIVE_FREELIST * sizeof(struct drive_freelist)); 197 if (drive->freelist == NULL) /* can't malloc, dammit */ 198 return ENOSPC; 199 drive->freelist_entries = 1; /* just (almost) the complete drive */ 200 drive->freelist[0].offset = DATASTART; /* starts here */ 201 drive->freelist[0].sectors = (drive->label.drive_size >> DEV_BSHIFT) - DATASTART; /* and it's this long */ 202 if (drive->label.name[0] != '\0') /* got a name */ 203 set_drive_state(drive->driveno, drive_up, setstate_force); /* our drive is accessible */ 204 else /* we know about it, but that's all */ 205 drive->state = drive_referenced; 206 return 0; 207 } 208 209 /* 210 * Initialize a drive: open the device and add device 211 * information 212 */ 213 int 214 init_drive(struct drive *drive, int verbose) 215 { 216 if (drive->devicename[0] != '/') { 217 drive->lasterror = EINVAL; 218 log(LOG_ERR, "vinum: Can't open drive without drive name\n"); 219 return EINVAL; 220 } 221 drive->lasterror = open_drive(drive, curproc, verbose); /* open the drive */ 222 if (drive->lasterror) 223 return drive->lasterror; 224 225 drive->lasterror = (*devsw(drive->dev)->d_ioctl) (drive->dev, 226 DIOCGPART, 227 (caddr_t) & drive->partinfo, 228 FREAD, 229 curproc); 230 if (drive->lasterror) { 231 if (verbose) 232 log(LOG_WARNING, 233 "vinum open_drive %s: Can't get partition information, drive->lasterror %d\n", 234 drive->devicename, 235 drive->lasterror); 236 close_drive(drive); 237 return drive->lasterror; 238 } 239 if (drive->partinfo.part->p_fstype != FS_VINUM) { /* not Vinum */ 240 drive->lasterror = EFTYPE; 241 if (verbose) 242 log(LOG_WARNING, 243 "vinum open_drive %s: Wrong partition type for vinum\n", 244 drive->devicename); 245 close_drive(drive); 246 return EFTYPE; 247 } 248 return set_drive_parms(drive); /* set various odds and ends */ 249 } 250 251 /* Close a drive if it's open. */ 252 void 253 close_drive(struct drive *drive) 254 { 255 LOCKDRIVE(drive); /* keep the daemon out */ 256 if (drive->flags & VF_OPEN) 257 close_locked_drive(drive); /* and close it */ 258 if (drive->state > drive_down) /* if it's up */ 259 drive->state = drive_down; /* make sure it's down */ 260 unlockdrive(drive); 261 } 262 263 /* 264 * Real drive close code, called with drive already locked. 265 * We have also checked that the drive is open. No errors. 266 */ 267 void 268 close_locked_drive(struct drive *drive) 269 { 270 /* 271 * If we can't access the drive, we can't flush 272 * the queues, which spec_close() will try to 273 * do. Get rid of them here first. 274 */ 275 drive->lasterror = (*devsw(drive->dev)->d_close) (drive->dev, 0, 0, NULL); 276 drive->flags &= ~VF_OPEN; /* no longer open */ 277 } 278 279 /* 280 * Remove drive from the configuration. 281 * Caller must ensure that it isn't active. 282 */ 283 void 284 remove_drive(int driveno) 285 { 286 struct drive *drive = &vinum_conf.drive[driveno]; 287 struct vinum_hdr *vhdr; /* buffer for header */ 288 int error; 289 290 if (drive->state > drive_referenced) { /* real drive */ 291 if (drive->state == drive_up) { 292 vhdr = (struct vinum_hdr *) Malloc(VINUMHEADERLEN); /* allocate buffer */ 293 CHECKALLOC(vhdr, "Can't allocate memory"); 294 error = read_drive(drive, (void *) vhdr, VINUMHEADERLEN, VINUM_LABEL_OFFSET); 295 if (error) 296 drive->lasterror = error; 297 else { 298 vhdr->magic = VINUM_NOMAGIC; /* obliterate the magic, but leave the rest */ 299 write_drive(drive, (void *) vhdr, VINUMHEADERLEN, VINUM_LABEL_OFFSET); 300 } 301 Free(vhdr); 302 } 303 free_drive(drive); /* close it and free resources */ 304 save_config(); /* and save the updated configuration */ 305 } 306 } 307 308 /* 309 * Transfer drive data. Usually called from one of these defines; 310 * #define read_drive(a, b, c, d) driveio (a, b, c, d, B_READ) 311 * #define write_drive(a, b, c, d) driveio (a, b, c, d, B_WRITE) 312 * 313 * length and offset are in bytes, but must be multiples of sector 314 * size. The function *does not check* for this condition, and 315 * truncates ruthlessly. 316 * Return error number 317 */ 318 int 319 driveio(struct drive *drive, char *buf, size_t length, off_t offset, int flag) 320 { 321 int error; 322 struct buf *bp; 323 324 error = 0; /* to keep the compiler happy */ 325 while (length) { /* divide into small enough blocks */ 326 int len = min(length, MAXBSIZE); /* maximum block device transfer is MAXBSIZE */ 327 328 bp = geteblk(len); /* get a buffer header */ 329 bp->b_flags = flag; 330 bp->b_dev = drive->dev; /* device */ 331 bp->b_blkno = offset / drive->partinfo.disklab->d_secsize; /* block number */ 332 bp->b_saveaddr = bp->b_data; 333 bp->b_data = buf; 334 bp->b_bcount = len; 335 BUF_STRATEGY(bp, 0); /* initiate the transfer */ 336 error = biowait(bp); 337 bp->b_data = bp->b_saveaddr; 338 bp->b_flags |= B_INVAL | B_AGE; 339 bp->b_flags &= ~B_ERROR; 340 brelse(bp); 341 if (error) 342 break; 343 length -= len; /* update pointers */ 344 buf += len; 345 offset += len; 346 } 347 return error; 348 } 349 350 /* 351 * Check a drive for a vinum header. If found, 352 * update the drive information. We come here 353 * with a partially populated drive structure 354 * which includes the device name. 355 * 356 * Return information on what we found. 357 * 358 * This function is called from two places: check_drive, 359 * which wants to find out whether the drive is a 360 * Vinum drive, and config_drive, which asserts that 361 * it is a vinum drive. In the first case, we don't 362 * print error messages (verbose==0), in the second 363 * we do (verbose==1). 364 */ 365 enum drive_label_info 366 read_drive_label(struct drive *drive, int verbose) 367 { 368 int error; 369 int result; /* result of our search */ 370 struct vinum_hdr *vhdr; /* and as header */ 371 372 error = init_drive(drive, 0); /* find the drive */ 373 if (error) /* find the drive */ 374 return DL_CANT_OPEN; /* not ours */ 375 376 vhdr = (struct vinum_hdr *) Malloc(VINUMHEADERLEN); /* allocate buffers */ 377 CHECKALLOC(vhdr, "Can't allocate memory"); 378 379 drive->state = drive_up; /* be optimistic */ 380 error = read_drive(drive, (void *) vhdr, VINUMHEADERLEN, VINUM_LABEL_OFFSET); 381 if (vhdr->magic == VINUM_MAGIC) { /* ours! */ 382 if (drive->label.name[0] /* we have a name for this drive */ 383 &&(strcmp(drive->label.name, vhdr->label.name))) { /* but it doesn't match the real name */ 384 drive->lasterror = EINVAL; 385 result = DL_WRONG_DRIVE; /* it's the wrong drive */ 386 drive->state = drive_unallocated; /* put it back, it's not ours */ 387 } else 388 result = DL_OURS; 389 /* 390 * We copy the drive anyway so that we have 391 * the correct name in the drive info. This 392 * may not be the name specified 393 */ 394 drive->label = vhdr->label; /* put in the label information */ 395 } else if (vhdr->magic == VINUM_NOMAGIC) /* was ours, but we gave it away */ 396 result = DL_DELETED_LABEL; /* and return the info */ 397 else 398 result = DL_NOT_OURS; /* we could have it, but we don't yet */ 399 Free(vhdr); /* that's all. */ 400 return result; 401 } 402 403 /* 404 * Check a drive for a vinum header. If found, 405 * read configuration information from the drive and 406 * incorporate the data into the configuration. 407 * 408 * Return drive number. 409 */ 410 struct drive * 411 check_drive(char *devicename) 412 { 413 int driveno; 414 int i; 415 struct drive *drive; 416 417 driveno = find_drive_by_dev(devicename, 1); /* if entry doesn't exist, create it */ 418 drive = &vinum_conf.drive[driveno]; /* and get a pointer */ 419 420 if (read_drive_label(drive, 0) == DL_OURS) { /* one of ours */ 421 for (i = 0; i < vinum_conf.drives_allocated; i++) { /* see if the name already exists */ 422 if ((i != driveno) /* not this drive */ 423 &&(DRIVE[i].state != drive_unallocated) /* and it's allocated */ 424 &&(strcmp(DRIVE[i].label.name, 425 DRIVE[driveno].label.name) == 0)) { /* and it has the same name */ 426 struct drive *mydrive = &DRIVE[i]; 427 428 if (mydrive->devicename[0] == '/') { /* we know a device name for it */ 429 /* 430 * set an error, but don't take the 431 * drive down: that would cause unneeded 432 * error messages. 433 */ 434 drive->lasterror = EEXIST; 435 break; 436 } else { /* it's just a place holder, */ 437 int sdno; 438 439 for (sdno = 0; sdno < vinum_conf.subdisks_allocated; sdno++) { /* look at each subdisk */ 440 if ((SD[sdno].driveno == i) /* it's pointing to this one, */ 441 &&(SD[sdno].state != sd_unallocated)) { /* and it's a real subdisk */ 442 SD[sdno].driveno = drive->driveno; /* point to the one we found */ 443 update_sd_state(sdno); /* and update its state */ 444 } 445 } 446 bzero(mydrive, sizeof(struct drive)); /* don't deallocate it, just remove it */ 447 } 448 } 449 } 450 } else { 451 if (drive->lasterror == 0) 452 drive->lasterror = ENODEV; 453 close_drive(drive); 454 drive->state = drive_down; 455 } 456 return drive; 457 } 458 459 static char * 460 sappend(char *txt, char *s) 461 { 462 while ((*s++ = *txt++) != 0); 463 return s - 1; 464 } 465 466 void 467 format_config(char *config, int len) 468 { 469 int i; 470 int j; 471 char *s = config; 472 char *configend = &config[len]; 473 474 bzero(config, len); 475 476 /* First write the volume configuration */ 477 for (i = 0; i < vinum_conf.volumes_allocated; i++) { 478 struct volume *vol; 479 480 vol = &vinum_conf.volume[i]; 481 if ((vol->state > volume_uninit) 482 && (vol->name[0] != '\0')) { /* paranoia */ 483 snprintf(s, 484 configend - s, 485 "volume %s state %s", 486 vol->name, 487 volume_state(vol->state)); 488 while (*s) 489 s++; /* find the end */ 490 if (vol->preferred_plex >= 0) /* preferences, */ 491 snprintf(s, 492 configend - s, 493 " readpol prefer %s", 494 vinum_conf.plex[vol->preferred_plex].name); 495 while (*s) 496 s++; /* find the end */ 497 s = sappend("\n", s); 498 } 499 } 500 501 /* Then the plex configuration */ 502 for (i = 0; i < vinum_conf.plexes_allocated; i++) { 503 struct plex *plex; 504 505 plex = &vinum_conf.plex[i]; 506 if ((plex->state > plex_referenced) 507 && (plex->name[0] != '\0')) { /* paranoia */ 508 snprintf(s, 509 configend - s, 510 "plex name %s state %s org %s ", 511 plex->name, 512 plex_state(plex->state), 513 plex_org(plex->organization)); 514 while (*s) 515 s++; /* find the end */ 516 if (isstriped(plex)) { 517 snprintf(s, 518 configend - s, 519 "%ds ", 520 (int) plex->stripesize); 521 while (*s) 522 s++; /* find the end */ 523 } 524 if (plex->volno >= 0) /* we have a volume */ 525 snprintf(s, 526 configend - s, 527 "vol %s ", 528 vinum_conf.volume[plex->volno].name); 529 while (*s) 530 s++; /* find the end */ 531 for (j = 0; j < plex->subdisks; j++) { 532 snprintf(s, 533 configend - s, 534 " sd %s", 535 vinum_conf.sd[plex->sdnos[j]].name); 536 } 537 s = sappend("\n", s); 538 } 539 } 540 541 /* And finally the subdisk configuration */ 542 for (i = 0; i < vinum_conf.subdisks_allocated; i++) { 543 struct sd *sd; 544 char *drivename; 545 546 sd = &SD[i]; 547 if ((sd->state != sd_referenced) 548 && (sd->state != sd_unallocated) 549 && (sd->name[0] != '\0')) { /* paranoia */ 550 drivename = vinum_conf.drive[sd->driveno].label.name; 551 /* 552 * XXX We've seen cases of dead subdisks 553 * which don't have a drive. If we let them 554 * through here, the drive name is null, so 555 * they get the drive named 'plex'. 556 * 557 * This is a breakage limiter, not a fix. 558 */ 559 if (drivename[0] == '\0') 560 drivename = "*invalid*"; 561 snprintf(s, 562 configend - s, 563 "sd name %s drive %s plex %s len %llus driveoffset %llus state %s", 564 sd->name, 565 drivename, 566 vinum_conf.plex[sd->plexno].name, 567 (unsigned long long) sd->sectors, 568 (unsigned long long) sd->driveoffset, 569 sd_state(sd->state)); 570 while (*s) 571 s++; /* find the end */ 572 if (sd->plexno >= 0) 573 snprintf(s, 574 configend - s, 575 " plexoffset %llds", 576 (long long) sd->plexoffset); 577 else 578 snprintf(s, configend - s, " detached"); 579 while (*s) 580 s++; /* find the end */ 581 if (sd->flags & VF_RETRYERRORS) { 582 snprintf(s, configend - s, " retryerrors"); 583 while (*s) 584 s++; /* find the end */ 585 } 586 snprintf(s, configend - s, " \n"); 587 while (*s) 588 s++; /* find the end */ 589 } 590 } 591 if (s > &config[len - 2]) 592 panic("vinum: configuration data overflow"); 593 } 594 595 /* 596 * issue a save config request to the d�mon. The actual work 597 * is done in process context by daemon_save_config 598 */ 599 void 600 save_config(void) 601 { 602 queue_daemon_request(daemonrq_saveconfig, (union daemoninfo) NULL); 603 } 604 605 /* 606 * Write the configuration to all vinum slices. This 607 * is performed by the d�mon only 608 */ 609 void 610 daemon_save_config(void) 611 { 612 int error; 613 int written_config; /* set when we first write the config to disk */ 614 int driveno; 615 struct drive *drive; /* point to current drive info */ 616 struct vinum_hdr *vhdr; /* and as header */ 617 char *config; /* point to config data */ 618 int wlabel_on; /* to set writing label on/off */ 619 620 /* don't save the configuration while we're still working on it */ 621 if (vinum_conf.flags & VF_CONFIGURING) 622 return; 623 written_config = 0; /* no config written yet */ 624 /* Build a volume header */ 625 vhdr = (struct vinum_hdr *) Malloc(VINUMHEADERLEN); /* get space for the config data */ 626 CHECKALLOC(vhdr, "Can't allocate config data"); 627 vhdr->magic = VINUM_MAGIC; /* magic number */ 628 vhdr->config_length = MAXCONFIG; /* length of following config info */ 629 630 config = Malloc(MAXCONFIG); /* get space for the config data */ 631 CHECKALLOC(config, "Can't allocate config data"); 632 633 format_config(config, MAXCONFIG); 634 error = 0; /* no errors yet */ 635 for (driveno = 0; driveno < vinum_conf.drives_allocated; driveno++) { 636 drive = &vinum_conf.drive[driveno]; /* point to drive */ 637 if (drive->state > drive_referenced) { 638 LOCKDRIVE(drive); /* don't let it change */ 639 640 /* 641 * First, do some drive consistency checks. Some 642 * of these are kludges, others require a process 643 * context and couldn't be done before 644 */ 645 if ((drive->devicename[0] == '\0') 646 || (drive->label.name[0] == '\0')) { 647 unlockdrive(drive); 648 free_drive(drive); /* get rid of it */ 649 break; 650 } 651 if (((drive->flags & VF_OPEN) == 0) /* drive not open */ 652 &&(drive->state > drive_down)) { /* and it thinks it's not down */ 653 unlockdrive(drive); 654 set_drive_state(driveno, drive_down, setstate_force); /* tell it what's what */ 655 continue; 656 } 657 if ((drive->state == drive_down) /* it's down */ 658 &&(drive->flags & VF_OPEN)) { /* but open, */ 659 unlockdrive(drive); 660 close_drive(drive); /* close it */ 661 } else if (drive->state > drive_down) { 662 getmicrotime(&drive->label.last_update); /* time of last update is now */ 663 bcopy((char *) &drive->label, /* and the label info from the drive structure */ 664 (char *) &vhdr->label, 665 sizeof(vhdr->label)); 666 if ((drive->state != drive_unallocated) 667 && (drive->state != drive_referenced)) { /* and it's a real drive */ 668 wlabel_on = 1; /* enable writing the label */ 669 error = (*devsw(drive->dev)->d_ioctl) (drive->dev, /* make the label writeable */ 670 DIOCWLABEL, 671 (caddr_t) & wlabel_on, 672 FWRITE, 673 curproc); 674 if (error == 0) 675 error = write_drive(drive, (char *) vhdr, VINUMHEADERLEN, VINUM_LABEL_OFFSET); 676 if (error == 0) 677 error = write_drive(drive, config, MAXCONFIG, VINUM_CONFIG_OFFSET); /* first config copy */ 678 if (error == 0) 679 error = write_drive(drive, config, MAXCONFIG, VINUM_CONFIG_OFFSET + MAXCONFIG); /* second copy */ 680 wlabel_on = 0; /* enable writing the label */ 681 if (error == 0) 682 error = (*devsw(drive->dev)->d_ioctl) (drive->dev, /* make the label non-writeable again */ 683 DIOCWLABEL, 684 (caddr_t) & wlabel_on, 685 FWRITE, 686 curproc); 687 unlockdrive(drive); 688 if (error) { 689 log(LOG_ERR, 690 "vinum: Can't write config to %s, error %d\n", 691 drive->devicename, 692 error); 693 set_drive_state(drive->driveno, drive_down, setstate_force); 694 } else 695 written_config = 1; /* we've written it on at least one drive */ 696 } 697 } else /* not worth looking at, */ 698 unlockdrive(drive); /* just unlock it again */ 699 } 700 } 701 Free(vhdr); 702 Free(config); 703 } 704 705 /* 706 * Disk labels are a mess. The correct way to 707 * access them is with the DIOC[GSW]DINFO ioctls, 708 * but some programs, such as newfs, access the 709 * disk directly, so we have to write things 710 * there. We do this only on request. If a user 711 * request tries to read it directly, we fake up 712 * one on the fly. 713 */ 714 715 /* 716 * get_volume_label returns a label structure to lp, which 717 * is allocated by the caller 718 */ 719 void 720 get_volume_label(char *name, int plexes, u_int64_t size, struct disklabel *lp) 721 { 722 bzero(lp, sizeof(struct disklabel)); 723 724 strncpy(lp->d_typename, "vinum", sizeof(lp->d_typename)); 725 lp->d_type = DTYPE_VINUM; 726 strncpy(lp->d_packname, name, min(sizeof(lp->d_packname), sizeof(name))); 727 lp->d_rpm = 14400 * plexes; /* to keep them guessing */ 728 lp->d_interleave = 1; 729 lp->d_flags = 0; 730 731 /* 732 * A Vinum volume has a single track with all 733 * its sectors. 734 */ 735 lp->d_secsize = DEV_BSIZE; /* bytes per sector */ 736 lp->d_nsectors = size; /* data sectors per track */ 737 lp->d_ntracks = 1; /* tracks per cylinder */ 738 lp->d_ncylinders = 1; /* data cylinders per unit */ 739 lp->d_secpercyl = size; /* data sectors per cylinder */ 740 lp->d_secperunit = size; /* data sectors per unit */ 741 742 lp->d_bbsize = BBSIZE; 743 lp->d_sbsize = SBSIZE; 744 745 lp->d_magic = DISKMAGIC; 746 lp->d_magic2 = DISKMAGIC; 747 748 /* 749 * Set up partitions a, b and c to be identical 750 * and the size of the volume. a is UFS, b is 751 * swap, c is nothing. 752 */ 753 lp->d_partitions[0].p_size = size; 754 lp->d_partitions[0].p_fsize = 1024; 755 lp->d_partitions[0].p_fstype = FS_BSDFFS; /* FreeBSD File System :-) */ 756 lp->d_partitions[0].p_fsize = 1024; /* FS fragment size */ 757 lp->d_partitions[0].p_frag = 8; /* and fragments per block */ 758 lp->d_partitions[SWAP_PART].p_size = size; 759 lp->d_partitions[SWAP_PART].p_fstype = FS_SWAP; /* swap partition */ 760 lp->d_partitions[LABEL_PART].p_size = size; 761 lp->d_npartitions = LABEL_PART + 1; 762 strncpy(lp->d_packname, name, min(sizeof(lp->d_packname), sizeof(name))); 763 lp->d_checksum = dkcksum(lp); 764 } 765 766 /* Write a volume label. This implements the VINUM_LABEL ioctl. */ 767 int 768 write_volume_label(int volno) 769 { 770 struct disklabel *lp; 771 struct buf *bp; 772 struct disklabel *dlp; 773 struct volume *vol; 774 int error; 775 776 lp = (struct disklabel *) Malloc((sizeof(struct disklabel) + (DEV_BSIZE - 1)) & (DEV_BSIZE - 1)); 777 if (lp == 0) 778 return ENOMEM; 779 780 if ((unsigned) (volno) >= (unsigned) vinum_conf.volumes_allocated) /* invalid volume */ 781 return ENOENT; 782 783 vol = &VOL[volno]; /* volume in question */ 784 if (vol->state <= volume_uninit) /* nothing there */ 785 return ENXIO; 786 else if (vol->state < volume_up) /* not accessible */ 787 return EIO; /* I/O error */ 788 789 get_volume_label(vol->name, vol->plexes, vol->size, lp); /* get the label */ 790 791 /* 792 * Now write to disk. This code is derived from the 793 * system writedisklabel (), which does silly things 794 * like reading the label and refusing to write 795 * unless it's already there. 796 */ 797 bp = geteblk((int) lp->d_secsize); /* get a buffer */ 798 bp->b_dev = makedev(VINUM_CDEV_MAJOR, vol->volno); /* our own raw volume */ 799 bp->b_blkno = LABELSECTOR * ((int) lp->d_secsize / DEV_BSIZE); 800 bp->b_bcount = lp->d_secsize; 801 bzero(bp->b_data, lp->d_secsize); 802 dlp = (struct disklabel *) bp->b_data; 803 *dlp = *lp; 804 bp->b_flags &= ~B_INVAL; 805 bp->b_flags |= B_WRITE; 806 807 /* 808 * This should read: 809 * 810 * vinumstrategy (bp); 811 * 812 * Negotiate with phk to get it fixed. 813 */ 814 BUF_STRATEGY(bp, 0); 815 error = biowait(bp); 816 bp->b_flags |= B_INVAL | B_AGE; 817 bp->b_flags &= ~B_ERROR; 818 819 brelse(bp); 820 return error; 821 } 822 823 /* Look at all disks on the system for vinum slices */ 824 int 825 vinum_scandisk(char *devicename[], int drives) 826 { 827 struct drive *volatile drive; 828 volatile int driveno; 829 int firstdrive; /* first drive in this list */ 830 volatile int gooddrives; /* number of usable drives found */ 831 int firsttime; /* set if we have never configured before */ 832 int error; 833 char *config_text; /* read the config info from disk into here */ 834 char *volatile cptr; /* pointer into config information */ 835 char *eptr; /* end pointer into config information */ 836 char *config_line; /* copy the config line to */ 837 volatile int status; 838 int *volatile drivelist; /* list of drive indices */ 839 #define DRIVENAMELEN 64 840 #define DRIVEPARTS 35 /* max partitions per drive, excluding c */ 841 char partname[DRIVENAMELEN]; /* for creating partition names */ 842 843 status = 0; /* success indication */ 844 vinum_conf.flags |= VF_READING_CONFIG; /* reading config from disk */ 845 846 gooddrives = 0; /* number of usable drives found */ 847 firstdrive = vinum_conf.drives_used; /* the first drive */ 848 firsttime = vinum_conf.drives_used == 0; /* are we a virgin? */ 849 850 /* allocate a drive pointer list */ 851 drivelist = (int *) Malloc(drives * DRIVEPARTS * sizeof(int)); 852 CHECKALLOC(drivelist, "Can't allocate memory"); 853 854 /* Open all drives and find which was modified most recently */ 855 for (driveno = 0; driveno < drives; driveno++) { 856 char part; /* UNIX partition */ 857 int slice; 858 int founddrive; /* flag when we find a vinum drive */ 859 860 founddrive = 0; /* no vinum drive found yet on this spindle */ 861 /* first try the partition table */ 862 for (slice = 1; slice < 5; slice++) 863 for (part = 'a'; part < 'i'; part++) { 864 if (part != 'c') { /* don't do the c partition */ 865 snprintf(partname, 866 DRIVENAMELEN, 867 "%ss%d%c", 868 devicename[driveno], 869 slice, 870 part); 871 drive = check_drive(partname); /* try to open it */ 872 if ((drive->lasterror != 0) /* didn't work, */ 873 ||(drive->state != drive_up)) 874 free_drive(drive); /* get rid of it */ 875 else if (drive->flags & VF_CONFIGURED) /* already read this config, */ 876 log(LOG_WARNING, 877 "vinum: already read config from %s\n", /* say so */ 878 drive->label.name); 879 else { 880 drivelist[gooddrives] = drive->driveno; /* keep the drive index */ 881 drive->flags &= ~VF_NEWBORN; /* which is no longer newly born */ 882 gooddrives++; 883 founddrive++; 884 } 885 } 886 } 887 if (founddrive == 0) { /* didn't find anything, */ 888 for (part = 'a'; part < 'i'; part++) /* try the compatibility partition */ 889 if (part != 'c') { /* don't do the c partition */ 890 snprintf(partname, /* /dev/sd0a */ 891 DRIVENAMELEN, 892 "%s%c", 893 devicename[driveno], 894 part); 895 drive = check_drive(partname); /* try to open it */ 896 if ((drive->lasterror != 0) /* didn't work, */ 897 ||(drive->state != drive_up)) 898 free_drive(drive); /* get rid of it */ 899 else if (drive->flags & VF_CONFIGURED) /* already read this config, */ 900 log(LOG_WARNING, 901 "vinum: already read config from %s\n", /* say so */ 902 drive->label.name); 903 else { 904 drivelist[gooddrives] = drive->driveno; /* keep the drive index */ 905 drive->flags &= ~VF_NEWBORN; /* which is no longer newly born */ 906 gooddrives++; 907 } 908 } 909 } 910 } 911 912 if (gooddrives == 0) { 913 if (firsttime) 914 log(LOG_WARNING, "vinum: no drives found\n"); 915 else 916 log(LOG_INFO, "vinum: no additional drives found\n"); 917 return ENOENT; 918 } 919 /* 920 * We now have at least one drive 921 * open. Sort them in order of config time 922 * and merge the config info with what we 923 * have already. 924 */ 925 qsort(drivelist, gooddrives, sizeof(int), drivecmp); 926 config_text = (char *) Malloc(MAXCONFIG * 2); /* allocate buffers */ 927 CHECKALLOC(config_text, "Can't allocate memory"); 928 config_line = (char *) Malloc(MAXCONFIGLINE * 2); /* allocate buffers */ 929 CHECKALLOC(config_line, "Can't allocate memory"); 930 for (driveno = 0; driveno < gooddrives; driveno++) { /* now include the config */ 931 drive = &DRIVE[drivelist[driveno]]; /* point to the drive */ 932 933 if (firsttime && (driveno == 0)) /* we've never configured before, */ 934 log(LOG_INFO, "vinum: reading configuration from %s\n", drive->devicename); 935 else 936 log(LOG_INFO, "vinum: updating configuration from %s\n", drive->devicename); 937 938 if (drive->state == drive_up) 939 /* Read in both copies of the configuration information */ 940 error = read_drive(drive, config_text, MAXCONFIG * 2, VINUM_CONFIG_OFFSET); 941 else { 942 error = EIO; 943 printf("vinum_scandisk: %s is %s\n", drive->devicename, drive_state(drive->state)); 944 } 945 946 if (error != 0) { 947 log(LOG_ERR, "vinum: Can't read device %s, error %d\n", drive->devicename, error); 948 free_drive(drive); /* give it back */ 949 status = error; 950 } 951 /* 952 * At this point, check that the two copies 953 * are the same, and do something useful if 954 * not. In particular, consider which is 955 * newer, and what this means for the 956 * integrity of the data on the drive. 957 */ 958 else { 959 vinum_conf.drives_used++; /* another drive in use */ 960 /* Parse the configuration, and add it to the global configuration */ 961 for (cptr = config_text; *cptr != '\0';) { /* love this style(9) */ 962 volatile int parse_status; /* return value from parse_config */ 963 964 for (eptr = config_line; (*cptr != '\n') && (*cptr != '\0');) /* until the end of the line */ 965 *eptr++ = *cptr++; 966 *eptr = '\0'; /* and delimit */ 967 if (setjmp(command_fail) == 0) { /* come back here on error and continue */ 968 parse_status = parse_config(config_line, &keyword_set, 1); /* parse the config line */ 969 if (parse_status < 0) { /* error in config */ 970 /* 971 * This config should have been parsed in user 972 * space. If we run into problems here, something 973 * serious is afoot. Complain and let the user 974 * snarf the config to see what's wrong. 975 */ 976 log(LOG_ERR, 977 "vinum: Config error on %s, aborting integration\n", 978 drive->devicename); 979 free_drive(drive); /* give it back */ 980 status = EINVAL; 981 } 982 } 983 while (*cptr == '\n') 984 cptr++; /* skip to next line */ 985 } 986 } 987 drive->flags |= VF_CONFIGURED; /* read this drive's configuration */ 988 } 989 990 Free(config_text); 991 Free(drivelist); 992 vinum_conf.flags &= ~VF_READING_CONFIG; /* no longer reading from disk */ 993 if (status != 0) 994 printf("vinum: couldn't read configuration"); 995 else 996 updateconfig(VF_READING_CONFIG); /* update from disk config */ 997 return status; 998 } 999 1000 /* 1001 * Compare the modification dates of the drives, for qsort. 1002 * Return 1 if a < b, 0 if a == b, 01 if a > b: in other 1003 * words, sort backwards. 1004 */ 1005 int 1006 drivecmp(const void *va, const void *vb) 1007 { 1008 const struct drive *a = &DRIVE[*(const int *) va]; 1009 const struct drive *b = &DRIVE[*(const int *) vb]; 1010 1011 if ((a->label.last_update.tv_sec == b->label.last_update.tv_sec) 1012 && (a->label.last_update.tv_usec == b->label.last_update.tv_usec)) 1013 return 0; 1014 else if ((a->label.last_update.tv_sec > b->label.last_update.tv_sec) 1015 || ((a->label.last_update.tv_sec == b->label.last_update.tv_sec) 1016 && (a->label.last_update.tv_usec > b->label.last_update.tv_usec))) 1017 return -1; 1018 else 1019 return 1; 1020 } 1021 /* Local Variables: */ 1022 /* fill-column: 50 */ 1023 /* End: */ 1024