1 /* 2 * Copyright (c) 2007 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/kernel.h> 38 #include <sys/conf.h> 39 #include <sys/disklabel.h> 40 #include <sys/disklabel64.h> 41 #include <sys/diskslice.h> 42 #include <sys/disk.h> 43 #include <sys/kern_syscall.h> 44 #include <sys/buf2.h> 45 46 /* 47 * Alignment against physical start (verses slice start). We use a megabyte 48 * here. Why do we use a megabyte? Because SSDs already use large 128K 49 * blocks internally (for MLC) and who the hell knows in the future. 50 * 51 * This way if the sysop picks sane values for partition sizes everything 52 * will be nicely aligned, particularly swap for e.g. swapcache, and 53 * clustered operations against larger physical sector sizes for newer HDs, 54 * and so forth. 55 */ 56 #define PALIGN_SIZE (1024 * 1024) 57 #define PALIGN_MASK (PALIGN_SIZE - 1) 58 59 /* 60 * Retrieve the partition start and extent, in blocks. Return 0 on success, 61 * EINVAL on error. 62 */ 63 static int 64 l64_getpartbounds(struct diskslices *ssp, disklabel_t lp, u_int32_t part, 65 u_int64_t *start, u_int64_t *blocks) 66 { 67 struct partition64 *pp; 68 69 if (part >= lp.lab64->d_npartitions) 70 return (EINVAL); 71 72 pp = &lp.lab64->d_partitions[part]; 73 74 if ((pp->p_boffset & (ssp->dss_secsize - 1)) || 75 (pp->p_bsize & (ssp->dss_secsize - 1))) { 76 return (EINVAL); 77 } 78 *start = pp->p_boffset / ssp->dss_secsize; 79 *blocks = pp->p_bsize / ssp->dss_secsize; 80 return(0); 81 } 82 83 /* 84 * Get the filesystem type XXX - diskslices code needs to use uuids 85 */ 86 static void 87 l64_loadpartinfo(disklabel_t lp, u_int32_t part, struct partinfo *dpart) 88 { 89 struct partition64 *pp; 90 const size_t uuid_size = sizeof(struct uuid); 91 92 if (part < lp.lab64->d_npartitions) { 93 pp = &lp.lab64->d_partitions[part]; 94 dpart->fstype_uuid = pp->p_type_uuid; 95 dpart->storage_uuid = pp->p_stor_uuid; 96 dpart->fstype = pp->p_fstype; 97 } else { 98 bzero(&dpart->fstype_uuid, uuid_size); 99 bzero(&dpart->storage_uuid, uuid_size); 100 dpart->fstype = 0; 101 } 102 } 103 104 /* 105 * Get the number of partitions 106 */ 107 static u_int32_t 108 l64_getnumparts(disklabel_t lp) 109 { 110 return(lp.lab64->d_npartitions); 111 } 112 113 static int 114 l64_getpackname(disklabel_t lp, char *buf, size_t bytes) 115 { 116 size_t slen; 117 118 if (lp.lab64->d_packname[0] == 0) { 119 buf[0] = 0; 120 return -1; 121 } 122 slen = strnlen(lp.lab64->d_packname, sizeof(lp.lab64->d_packname)); 123 if (slen >= bytes) 124 slen = bytes - 1; 125 bcopy(lp.lab64->d_packname, buf, slen); 126 buf[slen] = 0; 127 128 return 0; 129 } 130 131 static void 132 l64_freedisklabel(disklabel_t *lpp) 133 { 134 kfree((*lpp).lab64, M_DEVBUF); 135 (*lpp).lab64 = NULL; 136 } 137 138 /* 139 * Attempt to read a disk label from a device. 64 bit disklabels are 140 * sector-agnostic and begin at offset 0 on the device. 141 * 142 * Returns NULL on sucess, and an error string on failure. 143 */ 144 static const char * 145 l64_readdisklabel(cdev_t dev, struct diskslice *sp, disklabel_t *lpp, 146 struct disk_info *info) 147 { 148 struct buf *bp; 149 struct disklabel64 *dlp; 150 const char *msg; 151 uint32_t savecrc; 152 size_t dlpcrcsize; 153 size_t bpsize; 154 int secsize; 155 156 /* 157 * XXX I/O size is subject to device DMA limitations 158 */ 159 secsize = info->d_media_blksize; 160 bpsize = roundup2(sizeof(*dlp), secsize); 161 162 bp = getpbuf_mem(NULL); 163 KKASSERT(bpsize <= bp->b_bufsize); 164 bp->b_bio1.bio_offset = 0; 165 bp->b_bio1.bio_done = biodone_sync; 166 bp->b_bio1.bio_flags |= BIO_SYNC; 167 bp->b_bcount = bpsize; 168 bp->b_flags &= ~B_INVAL; 169 bp->b_flags |= B_FAILONDIS; 170 bp->b_cmd = BUF_CMD_READ; 171 dev_dstrategy(dev, &bp->b_bio1); 172 173 if (biowait(&bp->b_bio1, "labrd")) { 174 msg = "I/O error"; 175 } else { 176 dlp = (struct disklabel64 *)bp->b_data; 177 dlpcrcsize = offsetof(struct disklabel64, 178 d_partitions[dlp->d_npartitions]) - 179 offsetof(struct disklabel64, d_magic); 180 savecrc = dlp->d_crc; 181 dlp->d_crc = 0; 182 if (dlp->d_magic != DISKMAGIC64) { 183 msg = "no disk label"; 184 } else if (dlp->d_npartitions > MAXPARTITIONS64) { 185 msg = "disklabel64 corrupted, too many partitions"; 186 } else if (savecrc != crc32(&dlp->d_magic, dlpcrcsize)) { 187 msg = "disklabel64 corrupted, bad CRC"; 188 } else { 189 dlp->d_crc = savecrc; 190 (*lpp).lab64 = kmalloc(sizeof(*dlp), 191 M_DEVBUF, M_WAITOK|M_ZERO); 192 *(*lpp).lab64 = *dlp; 193 msg = NULL; 194 } 195 } 196 bp->b_flags |= B_INVAL | B_AGE; 197 relpbuf(bp, NULL); 198 199 return (msg); 200 } 201 202 /* 203 * If everything is good, copy olpx to nlpx. Check to see if any 204 * open partitions would change. 205 */ 206 static int 207 l64_setdisklabel(disklabel_t olpx, disklabel_t nlpx, struct diskslices *ssp, 208 struct diskslice *sp, u_int32_t *openmask) 209 { 210 struct disklabel64 *olp, *nlp; 211 struct partition64 *opp, *npp; 212 uint32_t savecrc; 213 uint64_t slicebsize; 214 size_t nlpcrcsize; 215 int i; 216 217 olp = olpx.lab64; 218 nlp = nlpx.lab64; 219 220 slicebsize = (uint64_t)sp->ds_size * ssp->dss_secsize; 221 222 if (nlp->d_magic != DISKMAGIC64) 223 return (EINVAL); 224 if (nlp->d_npartitions > MAXPARTITIONS64) 225 return (EINVAL); 226 savecrc = nlp->d_crc; 227 nlp->d_crc = 0; 228 nlpcrcsize = offsetof(struct disklabel64, 229 d_partitions[nlp->d_npartitions]) - 230 offsetof(struct disklabel64, d_magic); 231 if (crc32(&nlp->d_magic, nlpcrcsize) != savecrc) { 232 nlp->d_crc = savecrc; 233 return (EINVAL); 234 } 235 nlp->d_crc = savecrc; 236 237 /* 238 * Check if open partitions have changed 239 */ 240 i = 0; 241 while (i < MAXPARTITIONS64) { 242 if (openmask[i >> 5] == 0) { 243 i += 32; 244 continue; 245 } 246 if ((openmask[i >> 5] & (1 << (i & 31))) == 0) { 247 ++i; 248 continue; 249 } 250 if (nlp->d_npartitions <= i) 251 return (EBUSY); 252 opp = &olp->d_partitions[i]; 253 npp = &nlp->d_partitions[i]; 254 if (npp->p_boffset != opp->p_boffset || 255 npp->p_bsize < opp->p_bsize) { 256 return (EBUSY); 257 } 258 259 /* 260 * Do not allow p_type_uuid or p_stor_uuid to change if 261 * the partition is currently open. 262 */ 263 if (bcmp(&npp->p_type_uuid, &opp->p_type_uuid, 264 sizeof(npp->p_type_uuid)) != 0) { 265 return (EBUSY); 266 } 267 if (bcmp(&npp->p_stor_uuid, &opp->p_stor_uuid, 268 sizeof(npp->p_stor_uuid)) != 0) { 269 return (EBUSY); 270 } 271 ++i; 272 } 273 274 /* 275 * Make sure the label and partition offsets and sizes are sane. 276 */ 277 if (nlp->d_total_size > slicebsize) 278 return (ENOSPC); 279 if (nlp->d_total_size & (ssp->dss_secsize - 1)) 280 return (EINVAL); 281 if (nlp->d_bbase & (ssp->dss_secsize - 1)) 282 return (EINVAL); 283 if (nlp->d_pbase & (ssp->dss_secsize - 1)) 284 return (EINVAL); 285 if (nlp->d_pstop & (ssp->dss_secsize - 1)) 286 return (EINVAL); 287 if (nlp->d_abase & (ssp->dss_secsize - 1)) 288 return (EINVAL); 289 290 for (i = 0; i < nlp->d_npartitions; ++i) { 291 npp = &nlp->d_partitions[i]; 292 if (npp->p_bsize == 0) { 293 if (npp->p_boffset != 0) 294 return (EINVAL); 295 continue; 296 } 297 if (npp->p_boffset & (ssp->dss_secsize - 1)) 298 return (EINVAL); 299 if (npp->p_bsize & (ssp->dss_secsize - 1)) 300 return (EINVAL); 301 if (npp->p_boffset < nlp->d_pbase) 302 return (ENOSPC); 303 if (npp->p_boffset + npp->p_bsize > nlp->d_total_size) 304 return (ENOSPC); 305 } 306 307 /* 308 * Structurally we may add code to make modifications above in the 309 * future, so regenerate the crc anyway. 310 */ 311 nlp->d_crc = 0; 312 nlp->d_crc = crc32(&nlp->d_magic, nlpcrcsize); 313 *olp = *nlp; 314 315 return (0); 316 } 317 318 /* 319 * Write disk label back to device after modification. 320 */ 321 static int 322 l64_writedisklabel(cdev_t dev, struct diskslices *ssp, 323 struct diskslice *sp, disklabel_t lpx) 324 { 325 struct disklabel64 *lp; 326 struct disklabel64 *dlp; 327 struct buf *bp; 328 int error = 0; 329 size_t bpsize; 330 int secsize; 331 332 lp = lpx.lab64; 333 334 /* 335 * XXX I/O size is subject to device DMA limitations 336 */ 337 secsize = ssp->dss_secsize; 338 bpsize = roundup2(sizeof(*lp), secsize); 339 340 bp = getpbuf_mem(NULL); 341 KKASSERT(bpsize <= bp->b_bufsize); 342 bp->b_bio1.bio_offset = 0; 343 bp->b_bio1.bio_done = biodone_sync; 344 bp->b_bio1.bio_flags |= BIO_SYNC; 345 bp->b_bcount = bpsize; 346 bp->b_flags |= B_FAILONDIS; 347 348 /* 349 * Because our I/O is larger then the label, and because we do not 350 * write the d_reserved0[] area, do a read-modify-write. 351 */ 352 bp->b_flags &= ~B_INVAL; 353 bp->b_cmd = BUF_CMD_READ; 354 KKASSERT(dkpart(dev) == WHOLE_SLICE_PART); 355 dev_dstrategy(dev, &bp->b_bio1); 356 error = biowait(&bp->b_bio1, "labrd"); 357 if (error) 358 goto done; 359 360 dlp = (void *)bp->b_data; 361 bcopy(&lp->d_magic, &dlp->d_magic, 362 sizeof(*lp) - offsetof(struct disklabel64, d_magic)); 363 bp->b_cmd = BUF_CMD_WRITE; 364 bp->b_bio1.bio_done = biodone_sync; 365 bp->b_bio1.bio_flags |= BIO_SYNC; 366 KKASSERT(dkpart(dev) == WHOLE_SLICE_PART); 367 dev_dstrategy(dev, &bp->b_bio1); 368 error = biowait(&bp->b_bio1, "labwr"); 369 done: 370 bp->b_flags |= B_INVAL | B_AGE; 371 relpbuf(bp, NULL); 372 373 return (error); 374 } 375 376 /* 377 * Create a disklabel based on a disk_info structure for the purposes of 378 * DSO_COMPATLABEL - cases where no real label exists on the storage medium. 379 * 380 * If a diskslice is passed, the label is truncated to the slice. 381 * 382 * NOTE! This is not a legal label because d_bbase and d_pbase are both 383 * set to 0. 384 */ 385 static disklabel_t 386 l64_clone_label(struct disk_info *info, struct diskslice *sp) 387 { 388 struct disklabel64 *lp; 389 disklabel_t res; 390 uint32_t blksize = info->d_media_blksize; 391 size_t lpcrcsize; 392 393 lp = kmalloc(sizeof *lp, M_DEVBUF, M_WAITOK | M_ZERO); 394 395 if (sp) 396 lp->d_total_size = (uint64_t)sp->ds_size * blksize; 397 else 398 lp->d_total_size = info->d_media_blocks * blksize; 399 400 lp->d_magic = DISKMAGIC64; 401 lp->d_align = blksize; 402 lp->d_npartitions = MAXPARTITIONS64; 403 lp->d_pstop = lp->d_total_size; 404 405 /* 406 * Create a dummy 'c' part and a dummy 'a' part (if requested). 407 * Note that the 'c' part is really a hack. 64 bit disklabels 408 * do not use 'c' to mean the raw partition. 409 */ 410 411 lp->d_partitions[2].p_boffset = 0; 412 lp->d_partitions[2].p_bsize = lp->d_total_size; 413 /* XXX SET FS TYPE */ 414 415 if (info->d_dsflags & DSO_COMPATPARTA) { 416 lp->d_partitions[0].p_boffset = 0; 417 lp->d_partitions[0].p_bsize = lp->d_total_size; 418 /* XXX SET FS TYPE */ 419 } 420 421 lpcrcsize = offsetof(struct disklabel64, 422 d_partitions[lp->d_npartitions]) - 423 offsetof(struct disklabel64, d_magic); 424 425 lp->d_crc = crc32(&lp->d_magic, lpcrcsize); 426 res.lab64 = lp; 427 return (res); 428 } 429 430 /* 431 * Create a virgin disklabel64 suitable for writing to the media. 432 * 433 * disklabel64 always reserves 32KB for a boot area and leaves room 434 * for up to RESPARTITIONS64 partitions. 435 */ 436 static void 437 l64_makevirginlabel(disklabel_t lpx, struct diskslices *ssp, 438 struct diskslice *sp, struct disk_info *info) 439 { 440 struct disklabel64 *lp = lpx.lab64; 441 struct partition64 *pp; 442 uint32_t blksize; 443 uint32_t ressize; 444 uint64_t blkmask; /* 64 bits so we can ~ */ 445 uint64_t doffset; 446 size_t lpcrcsize; 447 448 doffset = sp->ds_offset * info->d_media_blksize; 449 450 /* 451 * Setup the initial label. Use of a block size of at least 4KB 452 * for calculating the initial reserved areas to allow some degree 453 * of portability between media with different sector sizes. 454 * 455 * Note that the modified blksize is stored in d_align as a hint 456 * to the disklabeling program. 457 */ 458 bzero(lp, sizeof(*lp)); 459 if ((blksize = info->d_media_blksize) < 4096) 460 blksize = 4096; 461 blkmask = blksize - 1; 462 463 if (sp) 464 lp->d_total_size = (uint64_t)sp->ds_size * ssp->dss_secsize; 465 else 466 lp->d_total_size = info->d_media_blocks * info->d_media_blksize; 467 468 lp->d_magic = DISKMAGIC64; 469 lp->d_align = blksize; 470 lp->d_npartitions = MAXPARTITIONS64; 471 kern_uuidgen(&lp->d_stor_uuid, 1); 472 473 ressize = offsetof(struct disklabel64, d_partitions[RESPARTITIONS64]); 474 ressize = (ressize + (uint32_t)blkmask) & ~blkmask; 475 476 /* Reserve space for the stage2 boot code */ 477 lp->d_bbase = ressize; 478 lp->d_pbase = lp->d_bbase + ((BOOT2SIZE64 + blkmask) & ~blkmask); 479 480 /* Reserve space for the backup label at the slice end */ 481 lp->d_abase = lp->d_total_size - ressize; 482 483 /* 484 * NOTE: The pbase and pstop are calculated to align to PALIGN_SIZE 485 * and adjusted with the slice offset, so the partitions are 486 * aligned relative to the start of the physical disk. 487 */ 488 lp->d_pbase = ((doffset + lp->d_pbase + PALIGN_MASK) & 489 ~(uint64_t)PALIGN_MASK) - doffset; 490 lp->d_pstop = ((lp->d_abase - lp->d_pbase) & 491 ~(uint64_t)PALIGN_MASK) + lp->d_pbase; 492 493 /* 494 * All partitions are left empty unless DSO_COMPATPARTA is set 495 */ 496 497 if (info->d_dsflags & DSO_COMPATPARTA) { 498 pp = &lp->d_partitions[0]; 499 pp->p_boffset = lp->d_pbase; 500 pp->p_bsize = lp->d_pstop - lp->d_pbase; 501 /* XXX SET FS TYPE */ 502 } 503 504 lpcrcsize = offsetof(struct disklabel64, 505 d_partitions[lp->d_npartitions]) - 506 offsetof(struct disklabel64, d_magic); 507 lp->d_crc = crc32(&lp->d_magic, lpcrcsize); 508 } 509 510 /* 511 * Set the number of blocks at the beginning of the slice which have 512 * been reserved for label operations. This area will be write-protected 513 * when accessed via the slice. 514 * 515 * For now just protect the label area proper. Do not protect the 516 * boot area. Note partitions in 64 bit disklabels do not overlap 517 * the disklabel or boot area. 518 */ 519 static void 520 l64_adjust_label_reserved(struct diskslices *ssp, int slice, 521 struct diskslice *sp) 522 { 523 struct disklabel64 *lp = sp->ds_label.lab64; 524 525 sp->ds_reserved = lp->d_bbase / ssp->dss_secsize; 526 } 527 528 struct disklabel_ops disklabel64_ops = { 529 .labelsize = sizeof(struct disklabel64), 530 .op_readdisklabel = l64_readdisklabel, 531 .op_setdisklabel = l64_setdisklabel, 532 .op_writedisklabel = l64_writedisklabel, 533 .op_clone_label = l64_clone_label, 534 .op_adjust_label_reserved = l64_adjust_label_reserved, 535 .op_getpartbounds = l64_getpartbounds, 536 .op_loadpartinfo = l64_loadpartinfo, 537 .op_getnumparts = l64_getnumparts, 538 .op_getpackname = l64_getpackname, 539 .op_makevirginlabel = l64_makevirginlabel, 540 .op_freedisklabel = l64_freedisklabel 541 }; 542 543