1 /* $OpenBSD: subr_disk.c,v 1.97 2009/08/13 15:23:11 deraadt Exp $ */ 2 /* $NetBSD: subr_disk.c,v 1.17 1996/03/16 23:17:08 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1995 Jason R. Thorpe. All rights reserved. 6 * Copyright (c) 1982, 1986, 1988, 1993 7 * The Regents of the University of California. All rights reserved. 8 * (c) UNIX System Laboratories, Inc. 9 * All or some portions of this file are derived from material licensed 10 * to the University of California by American Telephone and Telegraph 11 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 12 * the permission of UNIX System Laboratories, Inc. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)ufs_disksubr.c 8.5 (Berkeley) 1/21/94 39 */ 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/kernel.h> 44 #include <sys/malloc.h> 45 #include <sys/fcntl.h> 46 #include <sys/buf.h> 47 #include <sys/stat.h> 48 #include <sys/syslog.h> 49 #include <sys/device.h> 50 #include <sys/time.h> 51 #include <sys/disklabel.h> 52 #include <sys/conf.h> 53 #include <sys/lock.h> 54 #include <sys/disk.h> 55 #include <sys/reboot.h> 56 #include <sys/dkio.h> 57 #include <sys/dkstat.h> /* XXX */ 58 #include <sys/proc.h> 59 #include <sys/vnode.h> 60 #include <uvm/uvm_extern.h> 61 62 #include <sys/socket.h> 63 #include <sys/socketvar.h> 64 65 #include <net/if.h> 66 67 #include <dev/rndvar.h> 68 #include <dev/cons.h> 69 70 /* 71 * A global list of all disks attached to the system. May grow or 72 * shrink over time. 73 */ 74 struct disklist_head disklist; /* TAILQ_HEAD */ 75 int disk_count; /* number of drives in global disklist */ 76 int disk_change; /* set if a disk has been attached/detached 77 * since last we looked at this variable. This 78 * is reset by hw_sysctl() 79 */ 80 81 /* softraid callback, do not use! */ 82 void (*softraid_disk_attach)(struct disk *, int); 83 84 /* 85 * Seek sort for disks. We depend on the driver which calls us using b_resid 86 * as the current cylinder number. 87 * 88 * The argument ap structure holds a b_actf activity chain pointer on which we 89 * keep two queues, sorted in ascending cylinder order. The first queue holds 90 * those requests which are positioned after the current cylinder (in the first 91 * request); the second holds requests which came in after their cylinder number 92 * was passed. Thus we implement a one way scan, retracting after reaching the 93 * end of the drive to the first request on the second queue, at which time it 94 * becomes the first queue. 95 * 96 * A one-way scan is natural because of the way UNIX read-ahead blocks are 97 * allocated. 98 */ 99 100 void 101 disksort(struct buf *ap, struct buf *bp) 102 { 103 struct buf *bq; 104 105 /* If the queue is empty, then it's easy. */ 106 if (ap->b_actf == NULL) { 107 bp->b_actf = NULL; 108 ap->b_actf = bp; 109 return; 110 } 111 112 /* 113 * If we lie after the first (currently active) request, then we 114 * must locate the second request list and add ourselves to it. 115 */ 116 bq = ap->b_actf; 117 if (bp->b_cylinder < bq->b_cylinder) { 118 while (bq->b_actf) { 119 /* 120 * Check for an ``inversion'' in the normally ascending 121 * cylinder numbers, indicating the start of the second 122 * request list. 123 */ 124 if (bq->b_actf->b_cylinder < bq->b_cylinder) { 125 /* 126 * Search the second request list for the first 127 * request at a larger cylinder number. We go 128 * before that; if there is no such request, we 129 * go at end. 130 */ 131 do { 132 if (bp->b_cylinder < 133 bq->b_actf->b_cylinder) 134 goto insert; 135 if (bp->b_cylinder == 136 bq->b_actf->b_cylinder && 137 bp->b_blkno < bq->b_actf->b_blkno) 138 goto insert; 139 bq = bq->b_actf; 140 } while (bq->b_actf); 141 goto insert; /* after last */ 142 } 143 bq = bq->b_actf; 144 } 145 /* 146 * No inversions... we will go after the last, and 147 * be the first request in the second request list. 148 */ 149 goto insert; 150 } 151 /* 152 * Request is at/after the current request... 153 * sort in the first request list. 154 */ 155 while (bq->b_actf) { 156 /* 157 * We want to go after the current request if there is an 158 * inversion after it (i.e. it is the end of the first 159 * request list), or if the next request is a larger cylinder 160 * than our request. 161 */ 162 if (bq->b_actf->b_cylinder < bq->b_cylinder || 163 bp->b_cylinder < bq->b_actf->b_cylinder || 164 (bp->b_cylinder == bq->b_actf->b_cylinder && 165 bp->b_blkno < bq->b_actf->b_blkno)) 166 goto insert; 167 bq = bq->b_actf; 168 } 169 /* 170 * Neither a second list nor a larger request... we go at the end of 171 * the first list, which is the same as the end of the whole schebang. 172 */ 173 insert: bp->b_actf = bq->b_actf; 174 bq->b_actf = bp; 175 } 176 177 /* 178 * Compute checksum for disk label. 179 */ 180 u_int 181 dkcksum(struct disklabel *lp) 182 { 183 u_int16_t *start, *end; 184 u_int16_t sum = 0; 185 186 start = (u_int16_t *)lp; 187 end = (u_int16_t *)&lp->d_partitions[lp->d_npartitions]; 188 while (start < end) 189 sum ^= *start++; 190 return (sum); 191 } 192 193 int 194 initdisklabel(struct disklabel *lp) 195 { 196 int i; 197 198 /* minimal requirements for archetypal disk label */ 199 if (lp->d_secsize < DEV_BSIZE) 200 lp->d_secsize = DEV_BSIZE; 201 if (DL_GETDSIZE(lp) == 0) 202 DL_SETDSIZE(lp, MAXDISKSIZE); 203 if (lp->d_secpercyl == 0) 204 return (ERANGE); 205 lp->d_npartitions = MAXPARTITIONS; 206 for (i = 0; i < RAW_PART; i++) { 207 DL_SETPSIZE(&lp->d_partitions[i], 0); 208 DL_SETPOFFSET(&lp->d_partitions[i], 0); 209 } 210 if (DL_GETPSIZE(&lp->d_partitions[RAW_PART]) == 0) 211 DL_SETPSIZE(&lp->d_partitions[RAW_PART], DL_GETDSIZE(lp)); 212 DL_SETPOFFSET(&lp->d_partitions[RAW_PART], 0); 213 DL_SETBSTART(lp, 0); 214 DL_SETBEND(lp, DL_GETDSIZE(lp)); 215 lp->d_version = 1; 216 lp->d_bbsize = 8192; 217 lp->d_sbsize = 64*1024; /* XXX ? */ 218 return (0); 219 } 220 221 /* 222 * Check an incoming block to make sure it is a disklabel, convert it to 223 * a newer version if needed, etc etc. 224 */ 225 int 226 checkdisklabel(void *rlp, struct disklabel *lp, 227 u_int64_t boundstart, u_int64_t boundend) 228 { 229 struct disklabel *dlp = rlp; 230 struct __partitionv0 *v0pp; 231 struct partition *pp; 232 daddr64_t disksize; 233 int error = 0; 234 int i; 235 236 if (dlp->d_magic != DISKMAGIC || dlp->d_magic2 != DISKMAGIC) 237 error = ENOENT; /* no disk label */ 238 else if (dlp->d_npartitions > MAXPARTITIONS) 239 error = E2BIG; /* too many partitions */ 240 else if (dlp->d_secpercyl == 0) 241 error = EINVAL; /* invalid label */ 242 else if (dlp->d_secsize == 0) 243 error = ENOSPC; /* disk too small */ 244 else if (dkcksum(dlp) != 0) 245 error = EINVAL; /* incorrect checksum */ 246 247 if (error) { 248 u_int16_t *start, *end, sum = 0; 249 250 /* If it is byte-swapped, attempt to convert it */ 251 if (swap32(dlp->d_magic) != DISKMAGIC || 252 swap32(dlp->d_magic2) != DISKMAGIC || 253 swap16(dlp->d_npartitions) > MAXPARTITIONS) 254 return (error); 255 256 /* 257 * Need a byte-swap aware dkcksum varient 258 * inlined, because dkcksum uses a sub-field 259 */ 260 start = (u_int16_t *)dlp; 261 end = (u_int16_t *)&dlp->d_partitions[ 262 swap16(dlp->d_npartitions)]; 263 while (start < end) 264 sum ^= *start++; 265 if (sum != 0) 266 return (error); 267 268 dlp->d_magic = swap32(dlp->d_magic); 269 dlp->d_type = swap16(dlp->d_type); 270 dlp->d_subtype = swap16(dlp->d_subtype); 271 272 /* d_typename and d_packname are strings */ 273 274 dlp->d_secsize = swap32(dlp->d_secsize); 275 dlp->d_nsectors = swap32(dlp->d_nsectors); 276 dlp->d_ntracks = swap32(dlp->d_ntracks); 277 dlp->d_ncylinders = swap32(dlp->d_ncylinders); 278 dlp->d_secpercyl = swap32(dlp->d_secpercyl); 279 dlp->d_secperunit = swap32(dlp->d_secperunit); 280 281 dlp->d_sparespertrack = swap16(dlp->d_sparespertrack); 282 dlp->d_sparespercyl = swap16(dlp->d_sparespercyl); 283 284 dlp->d_acylinders = swap32(dlp->d_acylinders); 285 286 dlp->d_rpm = swap16(dlp->d_rpm); 287 dlp->d_interleave = swap16(dlp->d_interleave); 288 dlp->d_flags = swap32(dlp->d_flags); 289 290 for (i = 0; i < NDDATA; i++) 291 dlp->d_drivedata[i] = swap32(dlp->d_drivedata[i]); 292 293 dlp->d_secperunith = swap16(dlp->d_secperunith); 294 dlp->d_version = swap16(dlp->d_version); 295 296 for (i = 0; i < NSPARE; i++) 297 dlp->d_spare[i] = swap32(dlp->d_spare[i]); 298 299 dlp->d_magic2 = swap32(dlp->d_magic2); 300 dlp->d_checksum = swap16(dlp->d_checksum); 301 302 dlp->d_npartitions = swap16(dlp->d_npartitions); 303 dlp->d_bbsize = swap32(dlp->d_bbsize); 304 dlp->d_sbsize = swap32(dlp->d_sbsize); 305 306 for (i = 0; i < MAXPARTITIONS; i++) { 307 pp = &dlp->d_partitions[i]; 308 pp->p_size = swap32(pp->p_size); 309 pp->p_offset = swap32(pp->p_offset); 310 if (dlp->d_version == 0) { 311 v0pp = (struct __partitionv0 *)pp; 312 v0pp->p_fsize = swap32(v0pp->p_fsize); 313 } else { 314 pp->p_offseth = swap16(pp->p_offseth); 315 pp->p_sizeh = swap16(pp->p_sizeh); 316 } 317 pp->p_cpg = swap16(pp->p_cpg); 318 } 319 320 dlp->d_checksum = 0; 321 dlp->d_checksum = dkcksum(dlp); 322 error = 0; 323 } 324 325 /* XXX should verify lots of other fields and whine a lot */ 326 327 if (error) 328 return (error); 329 330 /* Initial passed in lp contains the real disk size. */ 331 disksize = DL_GETDSIZE(lp); 332 333 if (lp != dlp) 334 *lp = *dlp; 335 336 if (lp->d_version == 0) { 337 lp->d_version = 1; 338 lp->d_secperunith = 0; 339 340 v0pp = (struct __partitionv0 *)lp->d_partitions; 341 pp = lp->d_partitions; 342 for (i = 0; i < lp->d_npartitions; i++, pp++, v0pp++) { 343 pp->p_fragblock = DISKLABELV1_FFS_FRAGBLOCK(v0pp-> 344 p_fsize, v0pp->p_frag); 345 pp->p_offseth = 0; 346 pp->p_sizeh = 0; 347 } 348 } 349 350 #ifdef DEBUG 351 if (DL_GETDSIZE(lp) != disksize) 352 printf("on-disk disklabel has incorrect disksize (%lld)\n", 353 DL_GETDSIZE(lp)); 354 if (DL_GETPSIZE(&lp->d_partitions[RAW_PART]) != disksize) 355 printf("on-disk disklabel RAW_PART has incorrect size (%lld)\n", 356 DL_GETPSIZE(&lp->d_partitions[RAW_PART])); 357 if (DL_GETPOFFSET(&lp->d_partitions[RAW_PART]) != 0) 358 printf("on-disk disklabel RAW_PART offset != 0 (%lld)\n", 359 DL_GETPOFFSET(&lp->d_partitions[RAW_PART])); 360 #endif 361 DL_SETDSIZE(lp, disksize); 362 DL_SETPSIZE(&lp->d_partitions[RAW_PART], disksize); 363 DL_SETPOFFSET(&lp->d_partitions[RAW_PART], 0); 364 DL_SETBSTART(lp, boundstart); 365 DL_SETBEND(lp, boundend < DL_GETDSIZE(lp) ? boundend : DL_GETDSIZE(lp)); 366 367 lp->d_checksum = 0; 368 lp->d_checksum = dkcksum(lp); 369 return (0); 370 } 371 372 /* 373 * If dos partition table requested, attempt to load it and 374 * find disklabel inside a DOS partition. Return buffer 375 * for use in signalling errors if requested. 376 * 377 * We would like to check if each MBR has a valid BOOT_MAGIC, but 378 * we cannot because it doesn't always exist. So.. we assume the 379 * MBR is valid. 380 */ 381 int 382 readdoslabel(struct buf *bp, void (*strat)(struct buf *), 383 struct disklabel *lp, int *partoffp, int spoofonly) 384 { 385 u_int64_t dospartoff = 0, dospartend = DL_GETBEND(lp); 386 int i, ourpart = -1, wander = 1, n = 0, loop = 0, offset; 387 struct dos_partition dp[NDOSPART], *dp2; 388 daddr64_t part_blkno = DOSBBSECTOR; 389 u_int32_t extoff = 0; 390 int error; 391 392 if (lp->d_secpercyl == 0) 393 return (EINVAL); /* invalid label */ 394 if (lp->d_secsize == 0) 395 return (ENOSPC); /* disk too small */ 396 397 /* do DOS partitions in the process of getting disklabel? */ 398 399 /* 400 * Read dos partition table, follow extended partitions. 401 * Map the partitions to disklabel entries i-p 402 */ 403 while (wander && n < 8 && loop < 8) { 404 loop++; 405 wander = 0; 406 if (part_blkno < extoff) 407 part_blkno = extoff; 408 409 /* read boot record */ 410 bp->b_blkno = DL_BLKTOSEC(lp, part_blkno) * DL_BLKSPERSEC(lp); 411 offset = DL_BLKOFFSET(lp, part_blkno) + DOSPARTOFF; 412 bp->b_bcount = lp->d_secsize; 413 bp->b_flags = B_BUSY | B_READ | B_RAW; 414 (*strat)(bp); 415 error = biowait(bp); 416 if (error) { 417 /*wrong*/ if (partoffp) 418 /*wrong*/ *partoffp = -1; 419 return (error); 420 } 421 422 bcopy(bp->b_data + offset, dp, sizeof(dp)); 423 424 if (n == 0 && part_blkno == DOSBBSECTOR) { 425 u_int16_t fattest; 426 427 /* Check the end of sector marker. */ 428 fattest = ((bp->b_data[510] << 8) & 0xff00) | 429 (bp->b_data[511] & 0xff); 430 if (fattest != 0x55aa) 431 goto notfat; 432 } 433 434 if (ourpart == -1) { 435 /* Search for our MBR partition */ 436 for (dp2=dp, i=0; i < NDOSPART && ourpart == -1; 437 i++, dp2++) 438 if (letoh32(dp2->dp_size) && 439 dp2->dp_typ == DOSPTYP_OPENBSD) 440 ourpart = i; 441 if (ourpart == -1) 442 goto donot; 443 /* 444 * This is our MBR partition. need sector 445 * address for SCSI/IDE, cylinder for 446 * ESDI/ST506/RLL 447 */ 448 dp2 = &dp[ourpart]; 449 dospartoff = letoh32(dp2->dp_start) + part_blkno; 450 dospartend = dospartoff + letoh32(dp2->dp_size); 451 452 /* found our OpenBSD partition, finish up */ 453 if (partoffp) 454 goto notfat; 455 456 if (lp->d_ntracks == 0) 457 lp->d_ntracks = dp2->dp_ehd + 1; 458 if (lp->d_nsectors == 0) 459 lp->d_nsectors = DPSECT(dp2->dp_esect); 460 if (lp->d_secpercyl == 0) 461 lp->d_secpercyl = lp->d_ntracks * 462 lp->d_nsectors; 463 } 464 donot: 465 /* 466 * In case the disklabel read below fails, we want to 467 * provide a fake label in i-p. 468 */ 469 for (dp2=dp, i=0; i < NDOSPART && n < 8; i++, dp2++) { 470 struct partition *pp = &lp->d_partitions[8+n]; 471 u_int8_t fstype; 472 473 if (dp2->dp_typ == DOSPTYP_OPENBSD) 474 continue; 475 if (letoh32(dp2->dp_size) > DL_GETDSIZE(lp)) 476 continue; 477 if (letoh32(dp2->dp_start) > DL_GETDSIZE(lp)) 478 continue; 479 if (letoh32(dp2->dp_size) == 0) 480 continue; 481 482 switch (dp2->dp_typ) { 483 case DOSPTYP_UNUSED: 484 fstype = FS_UNUSED; 485 n++; 486 break; 487 488 case DOSPTYP_LINUX: 489 fstype = FS_EXT2FS; 490 n++; 491 break; 492 493 case DOSPTYP_NTFS: 494 fstype = FS_NTFS; 495 n++; 496 break; 497 498 case DOSPTYP_FAT12: 499 case DOSPTYP_FAT16S: 500 case DOSPTYP_FAT16B: 501 case DOSPTYP_FAT16L: 502 case DOSPTYP_FAT32: 503 case DOSPTYP_FAT32L: 504 fstype = FS_MSDOS; 505 n++; 506 break; 507 case DOSPTYP_EXTEND: 508 case DOSPTYP_EXTENDL: 509 part_blkno = letoh32(dp2->dp_start) + extoff; 510 if (!extoff) { 511 extoff = letoh32(dp2->dp_start); 512 part_blkno = 0; 513 } 514 wander = 1; 515 continue; 516 break; 517 default: 518 fstype = FS_OTHER; 519 n++; 520 break; 521 } 522 523 /* 524 * Don't set fstype/offset/size when just looking for 525 * the offset of the OpenBSD partition. It would 526 * invalidate the disklabel checksum! 527 */ 528 if (partoffp) 529 continue; 530 531 pp->p_fstype = fstype; 532 if (letoh32(dp2->dp_start)) 533 DL_SETPOFFSET(pp, 534 letoh32(dp2->dp_start) + part_blkno); 535 DL_SETPSIZE(pp, letoh32(dp2->dp_size)); 536 } 537 } 538 if (partoffp) 539 /* dospartoff has been set and we must not modify *lp. */ 540 goto notfat; 541 542 lp->d_npartitions = MAXPARTITIONS; 543 544 if (n == 0 && part_blkno == DOSBBSECTOR) { 545 u_int16_t fattest; 546 547 /* Check for a valid initial jmp instruction. */ 548 switch ((u_int8_t)bp->b_data[0]) { 549 case 0xeb: 550 /* 551 * Two-byte jmp instruction. The 2nd byte is the number 552 * of bytes to jmp and the 3rd byte must be a NOP. 553 */ 554 if ((u_int8_t)bp->b_data[2] != 0x90) 555 goto notfat; 556 break; 557 case 0xe9: 558 /* 559 * Three-byte jmp instruction. The next two bytes are a 560 * little-endian 16 bit value. 561 */ 562 break; 563 default: 564 goto notfat; 565 break; 566 } 567 568 /* Check for a valid bytes per sector value. */ 569 fattest = ((bp->b_data[12] << 8) & 0xff00) | 570 (bp->b_data[11] & 0xff); 571 if (fattest < 512 || fattest > 4096 || (fattest % 512 != 0)) 572 goto notfat; 573 574 /* Looks like a FAT filesystem. Spoof 'i'. */ 575 DL_SETPSIZE(&lp->d_partitions['i' - 'a'], 576 DL_GETPSIZE(&lp->d_partitions[RAW_PART])); 577 DL_SETPOFFSET(&lp->d_partitions['i' - 'a'], 0); 578 lp->d_partitions['i' - 'a'].p_fstype = FS_MSDOS; 579 } 580 notfat: 581 /* record the OpenBSD partition's placement for the caller */ 582 if (partoffp) 583 *partoffp = dospartoff; 584 else { 585 DL_SETBSTART(lp, dospartoff); 586 DL_SETBEND(lp, 587 dospartend < DL_GETDSIZE(lp) ? dospartend : DL_GETDSIZE(lp)); 588 } 589 590 /* don't read the on-disk label if we are in spoofed-only mode */ 591 if (spoofonly) 592 return (0); 593 594 bp->b_blkno = DL_BLKTOSEC(lp, dospartoff + DOS_LABELSECTOR) * 595 DL_BLKSPERSEC(lp); 596 offset = DL_BLKOFFSET(lp, dospartoff + DOS_LABELSECTOR); 597 bp->b_bcount = lp->d_secsize; 598 bp->b_flags = B_BUSY | B_READ | B_RAW; 599 (*strat)(bp); 600 if (biowait(bp)) 601 return (bp->b_error); 602 603 /* sub-MBR disklabels are always at a LABELOFFSET of 0 */ 604 return checkdisklabel(bp->b_data + offset, lp, dospartoff, dospartend); 605 } 606 607 /* 608 * Check new disk label for sensibility 609 * before setting it. 610 */ 611 int 612 setdisklabel(struct disklabel *olp, struct disklabel *nlp, u_int openmask) 613 { 614 int i; 615 struct partition *opp, *npp; 616 617 /* sanity clause */ 618 if (nlp->d_secpercyl == 0 || nlp->d_secsize == 0 || 619 (nlp->d_secsize % DEV_BSIZE) != 0) 620 return (EINVAL); 621 622 /* special case to allow disklabel to be invalidated */ 623 if (nlp->d_magic == 0xffffffff) { 624 *olp = *nlp; 625 return (0); 626 } 627 628 if (nlp->d_magic != DISKMAGIC || nlp->d_magic2 != DISKMAGIC || 629 dkcksum(nlp) != 0) 630 return (EINVAL); 631 632 /* XXX missing check if other dos partitions will be overwritten */ 633 634 while (openmask != 0) { 635 i = ffs(openmask) - 1; 636 openmask &= ~(1 << i); 637 if (nlp->d_npartitions <= i) 638 return (EBUSY); 639 opp = &olp->d_partitions[i]; 640 npp = &nlp->d_partitions[i]; 641 if (DL_GETPOFFSET(npp) != DL_GETPOFFSET(opp) || 642 DL_GETPSIZE(npp) < DL_GETPSIZE(opp)) 643 return (EBUSY); 644 /* 645 * Copy internally-set partition information 646 * if new label doesn't include it. XXX 647 */ 648 if (npp->p_fstype == FS_UNUSED && opp->p_fstype != FS_UNUSED) { 649 npp->p_fstype = opp->p_fstype; 650 npp->p_fragblock = opp->p_fragblock; 651 npp->p_cpg = opp->p_cpg; 652 } 653 } 654 nlp->d_checksum = 0; 655 nlp->d_checksum = dkcksum(nlp); 656 *olp = *nlp; 657 return (0); 658 } 659 660 /* 661 * Determine the size of the transfer, and make sure it is within the 662 * boundaries of the partition. Adjust transfer if needed, and signal errors or 663 * early completion. 664 */ 665 int 666 bounds_check_with_label(struct buf *bp, struct disklabel *lp, int wlabel) 667 { 668 struct partition *p = &lp->d_partitions[DISKPART(bp->b_dev)]; 669 daddr64_t sz = howmany(bp->b_bcount, DEV_BSIZE); 670 671 /* avoid division by zero */ 672 if (lp->d_secpercyl == 0) 673 goto bad; 674 675 if (bp->b_blkno < 0 || sz < 0) 676 panic("bounds_check_with_label %lld %lld\n", bp->b_blkno, sz); 677 678 /* beyond partition? */ 679 if (bp->b_blkno + sz > DL_SECTOBLK(lp, DL_GETPSIZE(p))) { 680 sz = DL_SECTOBLK(lp, DL_GETPSIZE(p)) - bp->b_blkno; 681 if (sz == 0) { 682 /* If exactly at end of disk, return EOF. */ 683 bp->b_resid = bp->b_bcount; 684 return (-1); 685 } 686 if (sz < 0) 687 /* If past end of disk, return EINVAL. */ 688 goto bad; 689 690 /* Otherwise, truncate request. */ 691 bp->b_bcount = sz << DEV_BSHIFT; 692 } 693 694 /* calculate cylinder for disksort to order transfers with */ 695 bp->b_cylinder = (bp->b_blkno + DL_SECTOBLK(lp, DL_GETPOFFSET(p))) / 696 DL_SECTOBLK(lp, lp->d_secpercyl); 697 return (1); 698 699 bad: 700 bp->b_error = EINVAL; 701 bp->b_flags |= B_ERROR; 702 return (-1); 703 } 704 705 /* 706 * Disk error is the preface to plaintive error messages 707 * about failing disk transfers. It prints messages of the form 708 709 hp0g: hard error reading fsbn 12345 of 12344-12347 (hp0 bn %d cn %d tn %d sn %d) 710 711 * if the offset of the error in the transfer and a disk label 712 * are both available. blkdone should be -1 if the position of the error 713 * is unknown; the disklabel pointer may be null from drivers that have not 714 * been converted to use them. The message is printed with printf 715 * if pri is LOG_PRINTF, otherwise it uses log at the specified priority. 716 * The message should be completed (with at least a newline) with printf 717 * or addlog, respectively. There is no trailing space. 718 */ 719 void 720 diskerr(struct buf *bp, char *dname, char *what, int pri, int blkdone, 721 struct disklabel *lp) 722 { 723 int unit = DISKUNIT(bp->b_dev), part = DISKPART(bp->b_dev); 724 int (*pr)(const char *, ...); 725 char partname = 'a' + part; 726 daddr64_t sn; 727 728 if (pri != LOG_PRINTF) { 729 static const char fmt[] = ""; 730 log(pri, fmt); 731 pr = addlog; 732 } else 733 pr = printf; 734 (*pr)("%s%d%c: %s %sing fsbn ", dname, unit, partname, what, 735 bp->b_flags & B_READ ? "read" : "writ"); 736 sn = bp->b_blkno; 737 if (bp->b_bcount <= DEV_BSIZE) 738 (*pr)("%lld", sn); 739 else { 740 if (blkdone >= 0) { 741 sn += blkdone; 742 (*pr)("%lld of ", sn); 743 } 744 (*pr)("%lld-%lld", bp->b_blkno, 745 bp->b_blkno + (bp->b_bcount - 1) / DEV_BSIZE); 746 } 747 if (lp && (blkdone >= 0 || bp->b_bcount <= lp->d_secsize)) { 748 sn += DL_GETPOFFSET(&lp->d_partitions[part]); 749 (*pr)(" (%s%d bn %lld; cn %lld", dname, unit, sn, 750 sn / lp->d_secpercyl); 751 sn %= lp->d_secpercyl; 752 (*pr)(" tn %lld sn %lld)", sn / lp->d_nsectors, 753 sn % lp->d_nsectors); 754 } 755 } 756 757 /* 758 * Initialize the disklist. Called by main() before autoconfiguration. 759 */ 760 void 761 disk_init(void) 762 { 763 764 TAILQ_INIT(&disklist); 765 disk_count = disk_change = 0; 766 } 767 768 int 769 disk_construct(struct disk *diskp, char *lockname) 770 { 771 rw_init(&diskp->dk_lock, "dklk"); 772 mtx_init(&diskp->dk_mtx, IPL_BIO); 773 774 diskp->dk_flags |= DKF_CONSTRUCTED; 775 776 return (0); 777 } 778 779 /* 780 * Attach a disk. 781 */ 782 void 783 disk_attach(struct disk *diskp) 784 { 785 786 if (!ISSET(diskp->dk_flags, DKF_CONSTRUCTED)) 787 disk_construct(diskp, diskp->dk_name); 788 789 /* 790 * Allocate and initialize the disklabel structures. Note that 791 * it's not safe to sleep here, since we're probably going to be 792 * called during autoconfiguration. 793 */ 794 diskp->dk_label = malloc(sizeof(struct disklabel), M_DEVBUF, 795 M_NOWAIT|M_ZERO); 796 if (diskp->dk_label == NULL) 797 panic("disk_attach: can't allocate storage for disklabel"); 798 799 /* 800 * Set the attached timestamp. 801 */ 802 microuptime(&diskp->dk_attachtime); 803 804 /* 805 * Link into the disklist. 806 */ 807 TAILQ_INSERT_TAIL(&disklist, diskp, dk_link); 808 ++disk_count; 809 disk_change = 1; 810 811 if (softraid_disk_attach) 812 softraid_disk_attach(diskp, 1); 813 } 814 815 /* 816 * Detach a disk. 817 */ 818 void 819 disk_detach(struct disk *diskp) 820 { 821 822 if (softraid_disk_attach) 823 softraid_disk_attach(diskp, -1); 824 825 /* 826 * Free the space used by the disklabel structures. 827 */ 828 free(diskp->dk_label, M_DEVBUF); 829 830 /* 831 * Remove from the disklist. 832 */ 833 TAILQ_REMOVE(&disklist, diskp, dk_link); 834 disk_change = 1; 835 if (--disk_count < 0) 836 panic("disk_detach: disk_count < 0"); 837 } 838 839 /* 840 * Increment a disk's busy counter. If the counter is going from 841 * 0 to 1, set the timestamp. 842 */ 843 void 844 disk_busy(struct disk *diskp) 845 { 846 847 /* 848 * XXX We'd like to use something as accurate as microtime(), 849 * but that doesn't depend on the system TOD clock. 850 */ 851 mtx_enter(&diskp->dk_mtx); 852 if (diskp->dk_busy++ == 0) 853 microuptime(&diskp->dk_timestamp); 854 mtx_leave(&diskp->dk_mtx); 855 } 856 857 /* 858 * Decrement a disk's busy counter, increment the byte count, total busy 859 * time, and reset the timestamp. 860 */ 861 void 862 disk_unbusy(struct disk *diskp, long bcount, int read) 863 { 864 struct timeval dv_time, diff_time; 865 866 mtx_enter(&diskp->dk_mtx); 867 868 if (diskp->dk_busy-- == 0) 869 printf("disk_unbusy: %s: dk_busy < 0\n", diskp->dk_name); 870 871 microuptime(&dv_time); 872 873 timersub(&dv_time, &diskp->dk_timestamp, &diff_time); 874 timeradd(&diskp->dk_time, &diff_time, &diskp->dk_time); 875 876 diskp->dk_timestamp = dv_time; 877 if (bcount > 0) { 878 if (read) { 879 diskp->dk_rbytes += bcount; 880 diskp->dk_rxfer++; 881 } else { 882 diskp->dk_wbytes += bcount; 883 diskp->dk_wxfer++; 884 } 885 } else 886 diskp->dk_seek++; 887 888 mtx_leave(&diskp->dk_mtx); 889 890 add_disk_randomness(bcount ^ diff_time.tv_usec); 891 } 892 893 int 894 disk_lock(struct disk *dk) 895 { 896 int error; 897 898 error = rw_enter(&dk->dk_lock, RW_WRITE|RW_INTR); 899 900 return (error); 901 } 902 903 void 904 disk_unlock(struct disk *dk) 905 { 906 rw_exit(&dk->dk_lock); 907 } 908 909 int 910 dk_mountroot(void) 911 { 912 dev_t rawdev, rrootdev; 913 int part = DISKPART(rootdev); 914 int (*mountrootfn)(void); 915 struct disklabel dl; 916 struct vnode *vn; 917 int error; 918 919 rrootdev = blktochr(rootdev); 920 rawdev = MAKEDISKDEV(major(rrootdev), DISKUNIT(rootdev), RAW_PART); 921 #ifdef DEBUG 922 printf("rootdev=0x%x rrootdev=0x%x rawdev=0x%x\n", rootdev, 923 rrootdev, rawdev); 924 #endif 925 926 /* 927 * open device, ioctl for the disklabel, and close it. 928 */ 929 if (cdevvp(rawdev, &vn)) 930 panic("cannot obtain vnode for 0x%x/0x%x", rootdev, rrootdev); 931 error = VOP_OPEN(vn, FREAD, NOCRED, curproc); 932 if (error) 933 panic("cannot open disk, 0x%x/0x%x, error %d", 934 rootdev, rrootdev, error); 935 error = VOP_IOCTL(vn, DIOCGDINFO, (caddr_t)&dl, FREAD, NOCRED, 0); 936 if (error) 937 panic("cannot read disk label, 0x%x/0x%x, error %d", 938 rootdev, rrootdev, error); 939 error = VOP_CLOSE(vn, FREAD, NOCRED, 0); 940 if (error) 941 panic("cannot close disk , 0x%x/0x%x, error %d", 942 rootdev, rrootdev, error); 943 vput(vn); 944 945 if (DL_GETPSIZE(&dl.d_partitions[part]) == 0) 946 panic("root filesystem has size 0"); 947 switch (dl.d_partitions[part].p_fstype) { 948 #ifdef EXT2FS 949 case FS_EXT2FS: 950 { 951 extern int ext2fs_mountroot(void); 952 mountrootfn = ext2fs_mountroot; 953 } 954 break; 955 #endif 956 #ifdef FFS 957 case FS_BSDFFS: 958 { 959 extern int ffs_mountroot(void); 960 mountrootfn = ffs_mountroot; 961 } 962 break; 963 #endif 964 #ifdef CD9660 965 case FS_ISO9660: 966 { 967 extern int cd9660_mountroot(void); 968 mountrootfn = cd9660_mountroot; 969 } 970 break; 971 #endif 972 default: 973 #ifdef FFS 974 { 975 extern int ffs_mountroot(void); 976 977 printf("filesystem type %d not known.. assuming ffs\n", 978 dl.d_partitions[part].p_fstype); 979 mountrootfn = ffs_mountroot; 980 } 981 #else 982 panic("disk 0x%x/0x%x filesystem type %d not known", 983 rootdev, rrootdev, dl.d_partitions[part].p_fstype); 984 #endif 985 } 986 return (*mountrootfn)(); 987 } 988 989 struct device * 990 getdisk(char *str, int len, int defpart, dev_t *devp) 991 { 992 struct device *dv; 993 994 if ((dv = parsedisk(str, len, defpart, devp)) == NULL) { 995 printf("use one of: exit"); 996 TAILQ_FOREACH(dv, &alldevs, dv_list) { 997 if (dv->dv_class == DV_DISK) 998 printf(" %s[a-p]", dv->dv_xname); 999 #if defined(NFSCLIENT) 1000 if (dv->dv_class == DV_IFNET) 1001 printf(" %s", dv->dv_xname); 1002 #endif 1003 } 1004 printf("\n"); 1005 } 1006 return (dv); 1007 } 1008 1009 struct device * 1010 parsedisk(char *str, int len, int defpart, dev_t *devp) 1011 { 1012 struct device *dv; 1013 int majdev, part = defpart; 1014 char c; 1015 1016 if (len == 0) 1017 return (NULL); 1018 c = str[len-1]; 1019 if (c >= 'a' && (c - 'a') < MAXPARTITIONS) { 1020 part = c - 'a'; 1021 len -= 1; 1022 } 1023 1024 TAILQ_FOREACH(dv, &alldevs, dv_list) { 1025 if (dv->dv_class == DV_DISK && 1026 strncmp(str, dv->dv_xname, len) == 0 && 1027 dv->dv_xname[len] == '\0') { 1028 majdev = findblkmajor(dv); 1029 if (majdev < 0) 1030 panic("parsedisk"); 1031 *devp = MAKEDISKDEV(majdev, dv->dv_unit, part); 1032 break; 1033 } 1034 #if defined(NFSCLIENT) 1035 if (dv->dv_class == DV_IFNET && 1036 strncmp(str, dv->dv_xname, len) == 0 && 1037 dv->dv_xname[len] == '\0') { 1038 *devp = NODEV; 1039 break; 1040 } 1041 #endif 1042 } 1043 1044 return (dv); 1045 } 1046 1047 void 1048 setroot(struct device *bootdv, int part, int exitflags) 1049 { 1050 int majdev, unit, len, s; 1051 struct swdevt *swp; 1052 struct device *rootdv, *dv; 1053 dev_t nrootdev, nswapdev = NODEV, temp = NODEV; 1054 struct ifnet *ifp = NULL; 1055 char buf[128]; 1056 #if defined(NFSCLIENT) 1057 extern char *nfsbootdevname; 1058 #endif 1059 1060 /* 1061 * If `swap generic' and we couldn't determine boot device, 1062 * ask the user. 1063 */ 1064 if (mountroot == NULL && bootdv == NULL) 1065 boothowto |= RB_ASKNAME; 1066 if (boothowto & RB_ASKNAME) { 1067 while (1) { 1068 printf("root device"); 1069 if (bootdv != NULL) { 1070 printf(" (default %s", bootdv->dv_xname); 1071 if (bootdv->dv_class == DV_DISK) 1072 printf("%c", 'a' + part); 1073 printf(")"); 1074 } 1075 printf(": "); 1076 s = splhigh(); 1077 cnpollc(TRUE); 1078 len = getsn(buf, sizeof(buf)); 1079 cnpollc(FALSE); 1080 splx(s); 1081 if (strcmp(buf, "exit") == 0) 1082 boot(exitflags); 1083 if (len == 0 && bootdv != NULL) { 1084 strlcpy(buf, bootdv->dv_xname, sizeof buf); 1085 len = strlen(buf); 1086 } 1087 if (len > 0 && buf[len - 1] == '*') { 1088 buf[--len] = '\0'; 1089 dv = getdisk(buf, len, part, &nrootdev); 1090 if (dv != NULL) { 1091 rootdv = dv; 1092 nswapdev = nrootdev; 1093 goto gotswap; 1094 } 1095 } 1096 dv = getdisk(buf, len, part, &nrootdev); 1097 if (dv != NULL) { 1098 rootdv = dv; 1099 break; 1100 } 1101 } 1102 1103 if (rootdv->dv_class == DV_IFNET) 1104 goto gotswap; 1105 1106 /* try to build swap device out of new root device */ 1107 while (1) { 1108 printf("swap device"); 1109 if (rootdv != NULL) 1110 printf(" (default %s%s)", rootdv->dv_xname, 1111 rootdv->dv_class == DV_DISK ? "b" : ""); 1112 printf(": "); 1113 s = splhigh(); 1114 cnpollc(TRUE); 1115 len = getsn(buf, sizeof(buf)); 1116 cnpollc(FALSE); 1117 splx(s); 1118 if (strcmp(buf, "exit") == 0) 1119 boot(exitflags); 1120 if (len == 0 && rootdv != NULL) { 1121 switch (rootdv->dv_class) { 1122 case DV_IFNET: 1123 nswapdev = NODEV; 1124 break; 1125 case DV_DISK: 1126 nswapdev = MAKEDISKDEV(major(nrootdev), 1127 DISKUNIT(nrootdev), 1); 1128 if (nswapdev == nrootdev) 1129 continue; 1130 break; 1131 default: 1132 break; 1133 } 1134 break; 1135 } 1136 dv = getdisk(buf, len, 1, &nswapdev); 1137 if (dv) { 1138 if (dv->dv_class == DV_IFNET) 1139 nswapdev = NODEV; 1140 if (nswapdev == nrootdev) 1141 continue; 1142 break; 1143 } 1144 } 1145 gotswap: 1146 rootdev = nrootdev; 1147 dumpdev = nswapdev; 1148 swdevt[0].sw_dev = nswapdev; 1149 swdevt[1].sw_dev = NODEV; 1150 #if defined(NFSCLIENT) 1151 } else if (mountroot == nfs_mountroot) { 1152 rootdv = bootdv; 1153 rootdev = dumpdev = swapdev = NODEV; 1154 #endif 1155 } else if (mountroot == NULL && rootdev == NODEV) { 1156 /* 1157 * `swap generic' 1158 */ 1159 rootdv = bootdv; 1160 majdev = findblkmajor(rootdv); 1161 if (majdev >= 0) { 1162 /* 1163 * Root and swap are on the disk. 1164 * Assume swap is on partition b. 1165 */ 1166 rootdev = MAKEDISKDEV(majdev, rootdv->dv_unit, part); 1167 nswapdev = MAKEDISKDEV(majdev, rootdv->dv_unit, 1); 1168 } else { 1169 /* 1170 * Root and swap are on a net. 1171 */ 1172 nswapdev = NODEV; 1173 } 1174 dumpdev = nswapdev; 1175 swdevt[0].sw_dev = nswapdev; 1176 /* swdevt[1].sw_dev = NODEV; */ 1177 } else { 1178 /* Completely pre-configured, but we want rootdv .. */ 1179 majdev = major(rootdev); 1180 if (findblkname(majdev) == NULL) 1181 return; 1182 unit = DISKUNIT(rootdev); 1183 part = DISKPART(rootdev); 1184 snprintf(buf, sizeof buf, "%s%d%c", 1185 findblkname(majdev), unit, 'a' + part); 1186 rootdv = parsedisk(buf, strlen(buf), 0, &nrootdev); 1187 if (rootdv == NULL) 1188 panic("root device (%s) not found", buf); 1189 } 1190 1191 if (rootdv && rootdv == bootdv && rootdv->dv_class == DV_IFNET) 1192 ifp = ifunit(rootdv->dv_xname); 1193 else if (bootdv && bootdv->dv_class == DV_IFNET) 1194 ifp = ifunit(bootdv->dv_xname); 1195 1196 if (ifp) 1197 if_addgroup(ifp, "netboot"); 1198 1199 switch (rootdv->dv_class) { 1200 #if defined(NFSCLIENT) 1201 case DV_IFNET: 1202 mountroot = nfs_mountroot; 1203 nfsbootdevname = rootdv->dv_xname; 1204 return; 1205 #endif 1206 case DV_DISK: 1207 mountroot = dk_mountroot; 1208 part = DISKPART(rootdev); 1209 break; 1210 default: 1211 printf("can't figure root, hope your kernel is right\n"); 1212 return; 1213 } 1214 1215 printf("root on %s%c", rootdv->dv_xname, 'a' + part); 1216 1217 /* 1218 * Make the swap partition on the root drive the primary swap. 1219 */ 1220 for (swp = swdevt; swp->sw_dev != NODEV; swp++) { 1221 if (major(rootdev) == major(swp->sw_dev) && 1222 DISKUNIT(rootdev) == DISKUNIT(swp->sw_dev)) { 1223 temp = swdevt[0].sw_dev; 1224 swdevt[0].sw_dev = swp->sw_dev; 1225 swp->sw_dev = temp; 1226 break; 1227 } 1228 } 1229 if (swp->sw_dev != NODEV) { 1230 /* 1231 * If dumpdev was the same as the old primary swap device, 1232 * move it to the new primary swap device. 1233 */ 1234 if (temp == dumpdev) 1235 dumpdev = swdevt[0].sw_dev; 1236 } 1237 if (swdevt[0].sw_dev != NODEV) 1238 printf(" swap on %s%d%c", findblkname(major(swdevt[0].sw_dev)), 1239 DISKUNIT(swdevt[0].sw_dev), 1240 'a' + DISKPART(swdevt[0].sw_dev)); 1241 if (dumpdev != NODEV) 1242 printf(" dump on %s%d%c", findblkname(major(dumpdev)), 1243 DISKUNIT(dumpdev), 'a' + DISKPART(dumpdev)); 1244 printf("\n"); 1245 } 1246 1247 extern struct nam2blk nam2blk[]; 1248 1249 int 1250 findblkmajor(struct device *dv) 1251 { 1252 char buf[16], *p; 1253 int i; 1254 1255 if (strlcpy(buf, dv->dv_xname, sizeof buf) >= sizeof buf) 1256 return (-1); 1257 for (p = buf; *p; p++) 1258 if (*p >= '0' && *p <= '9') 1259 *p = '\0'; 1260 1261 for (i = 0; nam2blk[i].name; i++) 1262 if (!strcmp(buf, nam2blk[i].name)) 1263 return (nam2blk[i].maj); 1264 return (-1); 1265 } 1266 1267 char * 1268 findblkname(int maj) 1269 { 1270 int i; 1271 1272 for (i = 0; nam2blk[i].name; i++) 1273 if (nam2blk[i].maj == maj) 1274 return (nam2blk[i].name); 1275 return (NULL); 1276 } 1277