1 /* 2 * Copyright (c) 2007 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/kernel.h> 38 #include <sys/malloc.h> 39 #include <sys/conf.h> 40 #include <sys/disklabel.h> 41 #include <sys/disklabel64.h> 42 #include <sys/diskslice.h> 43 #include <sys/disk.h> 44 #include <sys/kern_syscall.h> 45 #include <sys/buf2.h> 46 47 /* 48 * Alignment against physical start (verses slice start). We use a megabyte 49 * here. Why do we use a megabyte? Because SSDs already use large 128K 50 * blocks internally (for MLC) and who the hell knows in the future. 51 * 52 * This way if the sysop picks sane values for partition sizes everything 53 * will be nicely aligned, particularly swap for e.g. swapcache, and 54 * clustered operations against larger physical sector sizes for newer HDs, 55 * and so forth. 56 */ 57 #define PALIGN_SIZE (1024 * 1024) 58 #define PALIGN_MASK (PALIGN_SIZE - 1) 59 60 /* 61 * Retrieve the partition start and extent, in blocks. Return 0 on success, 62 * EINVAL on error. 63 */ 64 static int 65 l64_getpartbounds(struct diskslices *ssp, disklabel_t lp, u_int32_t part, 66 u_int64_t *start, u_int64_t *blocks) 67 { 68 struct partition64 *pp; 69 70 if (part >= lp.lab64->d_npartitions) 71 return (EINVAL); 72 73 pp = &lp.lab64->d_partitions[part]; 74 75 if ((pp->p_boffset & (ssp->dss_secsize - 1)) || 76 (pp->p_bsize & (ssp->dss_secsize - 1))) { 77 return (EINVAL); 78 } 79 *start = pp->p_boffset / ssp->dss_secsize; 80 *blocks = pp->p_bsize / ssp->dss_secsize; 81 return(0); 82 } 83 84 /* 85 * Get the filesystem type XXX - diskslices code needs to use uuids 86 */ 87 static void 88 l64_loadpartinfo(disklabel_t lp, u_int32_t part, struct partinfo *dpart) 89 { 90 struct partition64 *pp; 91 const size_t uuid_size = sizeof(struct uuid); 92 93 if (part < lp.lab64->d_npartitions) { 94 pp = &lp.lab64->d_partitions[part]; 95 dpart->fstype_uuid = pp->p_type_uuid; 96 dpart->storage_uuid = pp->p_stor_uuid; 97 dpart->fstype = pp->p_fstype; 98 } else { 99 bzero(&dpart->fstype_uuid, uuid_size); 100 bzero(&dpart->storage_uuid, uuid_size); 101 dpart->fstype = 0; 102 } 103 } 104 105 /* 106 * Get the number of partitions 107 */ 108 static u_int32_t 109 l64_getnumparts(disklabel_t lp) 110 { 111 return(lp.lab64->d_npartitions); 112 } 113 114 static int 115 l64_getpackname(disklabel_t lp, char *buf, size_t bytes) 116 { 117 size_t slen; 118 119 if (lp.lab64->d_packname[0] == 0) { 120 buf[0] = 0; 121 return -1; 122 } 123 slen = strnlen(lp.lab64->d_packname, sizeof(lp.lab64->d_packname)); 124 if (slen >= bytes) 125 slen = bytes - 1; 126 bcopy(lp.lab64->d_packname, buf, slen); 127 buf[slen] = 0; 128 129 return 0; 130 } 131 132 static void 133 l64_freedisklabel(disklabel_t *lpp) 134 { 135 kfree((*lpp).lab64, M_DEVBUF); 136 (*lpp).lab64 = NULL; 137 } 138 139 /* 140 * Attempt to read a disk label from a device. 64 bit disklabels are 141 * sector-agnostic and begin at offset 0 on the device. 142 * 143 * Returns NULL on sucess, and an error string on failure. 144 */ 145 static const char * 146 l64_readdisklabel(cdev_t dev, struct diskslice *sp, disklabel_t *lpp, 147 struct disk_info *info) 148 { 149 struct buf *bp; 150 struct disklabel64 *dlp; 151 const char *msg; 152 uint32_t savecrc; 153 size_t dlpcrcsize; 154 size_t bpsize; 155 int secsize; 156 157 /* 158 * XXX I/O size is subject to device DMA limitations 159 */ 160 secsize = info->d_media_blksize; 161 bpsize = roundup2(sizeof(*dlp), secsize); 162 163 bp = getpbuf_mem(NULL); 164 KKASSERT(bpsize <= bp->b_bufsize); 165 bp->b_bio1.bio_offset = 0; 166 bp->b_bio1.bio_done = biodone_sync; 167 bp->b_bio1.bio_flags |= BIO_SYNC; 168 bp->b_bcount = bpsize; 169 bp->b_flags &= ~B_INVAL; 170 bp->b_flags |= B_FAILONDIS; 171 bp->b_cmd = BUF_CMD_READ; 172 dev_dstrategy(dev, &bp->b_bio1); 173 174 if (biowait(&bp->b_bio1, "labrd")) { 175 msg = "I/O error"; 176 } else { 177 dlp = (struct disklabel64 *)bp->b_data; 178 dlpcrcsize = offsetof(struct disklabel64, 179 d_partitions[dlp->d_npartitions]) - 180 offsetof(struct disklabel64, d_magic); 181 savecrc = dlp->d_crc; 182 dlp->d_crc = 0; 183 if (dlp->d_magic != DISKMAGIC64) { 184 msg = "no disk label"; 185 } else if (dlp->d_npartitions > MAXPARTITIONS64) { 186 msg = "disklabel64 corrupted, too many partitions"; 187 } else if (savecrc != crc32(&dlp->d_magic, dlpcrcsize)) { 188 msg = "disklabel64 corrupted, bad CRC"; 189 } else { 190 dlp->d_crc = savecrc; 191 (*lpp).lab64 = kmalloc(sizeof(*dlp), 192 M_DEVBUF, M_WAITOK|M_ZERO); 193 *(*lpp).lab64 = *dlp; 194 msg = NULL; 195 } 196 } 197 bp->b_flags |= B_INVAL | B_AGE; 198 relpbuf(bp, NULL); 199 200 return (msg); 201 } 202 203 /* 204 * If everything is good, copy olpx to nlpx. Check to see if any 205 * open partitions would change. 206 */ 207 static int 208 l64_setdisklabel(disklabel_t olpx, disklabel_t nlpx, struct diskslices *ssp, 209 struct diskslice *sp, u_int32_t *openmask) 210 { 211 struct disklabel64 *olp, *nlp; 212 struct partition64 *opp, *npp; 213 uint32_t savecrc; 214 uint64_t slicebsize; 215 size_t nlpcrcsize; 216 int i; 217 218 olp = olpx.lab64; 219 nlp = nlpx.lab64; 220 221 slicebsize = (uint64_t)sp->ds_size * ssp->dss_secsize; 222 223 if (nlp->d_magic != DISKMAGIC64) 224 return (EINVAL); 225 if (nlp->d_npartitions > MAXPARTITIONS64) 226 return (EINVAL); 227 savecrc = nlp->d_crc; 228 nlp->d_crc = 0; 229 nlpcrcsize = offsetof(struct disklabel64, 230 d_partitions[nlp->d_npartitions]) - 231 offsetof(struct disklabel64, d_magic); 232 if (crc32(&nlp->d_magic, nlpcrcsize) != savecrc) { 233 nlp->d_crc = savecrc; 234 return (EINVAL); 235 } 236 nlp->d_crc = savecrc; 237 238 /* 239 * Check if open partitions have changed 240 */ 241 i = 0; 242 while (i < MAXPARTITIONS64) { 243 if (openmask[i >> 5] == 0) { 244 i += 32; 245 continue; 246 } 247 if ((openmask[i >> 5] & (1 << (i & 31))) == 0) { 248 ++i; 249 continue; 250 } 251 if (nlp->d_npartitions <= i) 252 return (EBUSY); 253 opp = &olp->d_partitions[i]; 254 npp = &nlp->d_partitions[i]; 255 if (npp->p_boffset != opp->p_boffset || 256 npp->p_bsize < opp->p_bsize) { 257 return (EBUSY); 258 } 259 260 /* 261 * Do not allow p_type_uuid or p_stor_uuid to change if 262 * the partition is currently open. 263 */ 264 if (bcmp(&npp->p_type_uuid, &opp->p_type_uuid, 265 sizeof(npp->p_type_uuid)) != 0) { 266 return (EBUSY); 267 } 268 if (bcmp(&npp->p_stor_uuid, &opp->p_stor_uuid, 269 sizeof(npp->p_stor_uuid)) != 0) { 270 return (EBUSY); 271 } 272 ++i; 273 } 274 275 /* 276 * Make sure the label and partition offsets and sizes are sane. 277 */ 278 if (nlp->d_total_size > slicebsize) 279 return (ENOSPC); 280 if (nlp->d_total_size & (ssp->dss_secsize - 1)) 281 return (EINVAL); 282 if (nlp->d_bbase & (ssp->dss_secsize - 1)) 283 return (EINVAL); 284 if (nlp->d_pbase & (ssp->dss_secsize - 1)) 285 return (EINVAL); 286 if (nlp->d_pstop & (ssp->dss_secsize - 1)) 287 return (EINVAL); 288 if (nlp->d_abase & (ssp->dss_secsize - 1)) 289 return (EINVAL); 290 291 for (i = 0; i < nlp->d_npartitions; ++i) { 292 npp = &nlp->d_partitions[i]; 293 if (npp->p_bsize == 0) { 294 if (npp->p_boffset != 0) 295 return (EINVAL); 296 continue; 297 } 298 if (npp->p_boffset & (ssp->dss_secsize - 1)) 299 return (EINVAL); 300 if (npp->p_bsize & (ssp->dss_secsize - 1)) 301 return (EINVAL); 302 if (npp->p_boffset < nlp->d_pbase) 303 return (ENOSPC); 304 if (npp->p_boffset + npp->p_bsize > nlp->d_total_size) 305 return (ENOSPC); 306 } 307 308 /* 309 * Structurally we may add code to make modifications above in the 310 * future, so regenerate the crc anyway. 311 */ 312 nlp->d_crc = 0; 313 nlp->d_crc = crc32(&nlp->d_magic, nlpcrcsize); 314 *olp = *nlp; 315 316 return (0); 317 } 318 319 /* 320 * Write disk label back to device after modification. 321 */ 322 static int 323 l64_writedisklabel(cdev_t dev, struct diskslices *ssp, 324 struct diskslice *sp, disklabel_t lpx) 325 { 326 struct disklabel64 *lp; 327 struct disklabel64 *dlp; 328 struct buf *bp; 329 int error = 0; 330 size_t bpsize; 331 int secsize; 332 333 lp = lpx.lab64; 334 335 /* 336 * XXX I/O size is subject to device DMA limitations 337 */ 338 secsize = ssp->dss_secsize; 339 bpsize = roundup2(sizeof(*lp), secsize); 340 341 bp = getpbuf_mem(NULL); 342 KKASSERT(bpsize <= bp->b_bufsize); 343 bp->b_bio1.bio_offset = 0; 344 bp->b_bio1.bio_done = biodone_sync; 345 bp->b_bio1.bio_flags |= BIO_SYNC; 346 bp->b_bcount = bpsize; 347 bp->b_flags |= B_FAILONDIS; 348 349 /* 350 * Because our I/O is larger then the label, and because we do not 351 * write the d_reserved0[] area, do a read-modify-write. 352 */ 353 bp->b_flags &= ~B_INVAL; 354 bp->b_cmd = BUF_CMD_READ; 355 KKASSERT(dkpart(dev) == WHOLE_SLICE_PART); 356 dev_dstrategy(dev, &bp->b_bio1); 357 error = biowait(&bp->b_bio1, "labrd"); 358 if (error) 359 goto done; 360 361 dlp = (void *)bp->b_data; 362 bcopy(&lp->d_magic, &dlp->d_magic, 363 sizeof(*lp) - offsetof(struct disklabel64, d_magic)); 364 bp->b_cmd = BUF_CMD_WRITE; 365 bp->b_bio1.bio_done = biodone_sync; 366 bp->b_bio1.bio_flags |= BIO_SYNC; 367 KKASSERT(dkpart(dev) == WHOLE_SLICE_PART); 368 dev_dstrategy(dev, &bp->b_bio1); 369 error = biowait(&bp->b_bio1, "labwr"); 370 done: 371 bp->b_flags |= B_INVAL | B_AGE; 372 relpbuf(bp, NULL); 373 374 return (error); 375 } 376 377 /* 378 * Create a disklabel based on a disk_info structure for the purposes of 379 * DSO_COMPATLABEL - cases where no real label exists on the storage medium. 380 * 381 * If a diskslice is passed, the label is truncated to the slice. 382 * 383 * NOTE! This is not a legal label because d_bbase and d_pbase are both 384 * set to 0. 385 */ 386 static disklabel_t 387 l64_clone_label(struct disk_info *info, struct diskslice *sp) 388 { 389 struct disklabel64 *lp; 390 disklabel_t res; 391 uint32_t blksize = info->d_media_blksize; 392 size_t lpcrcsize; 393 394 lp = kmalloc(sizeof *lp, M_DEVBUF, M_WAITOK | M_ZERO); 395 396 if (sp) 397 lp->d_total_size = (uint64_t)sp->ds_size * blksize; 398 else 399 lp->d_total_size = info->d_media_blocks * blksize; 400 401 lp->d_magic = DISKMAGIC64; 402 lp->d_align = blksize; 403 lp->d_npartitions = MAXPARTITIONS64; 404 lp->d_pstop = lp->d_total_size; 405 406 /* 407 * Create a dummy 'c' part and a dummy 'a' part (if requested). 408 * Note that the 'c' part is really a hack. 64 bit disklabels 409 * do not use 'c' to mean the raw partition. 410 */ 411 412 lp->d_partitions[2].p_boffset = 0; 413 lp->d_partitions[2].p_bsize = lp->d_total_size; 414 /* XXX SET FS TYPE */ 415 416 if (info->d_dsflags & DSO_COMPATPARTA) { 417 lp->d_partitions[0].p_boffset = 0; 418 lp->d_partitions[0].p_bsize = lp->d_total_size; 419 /* XXX SET FS TYPE */ 420 } 421 422 lpcrcsize = offsetof(struct disklabel64, 423 d_partitions[lp->d_npartitions]) - 424 offsetof(struct disklabel64, d_magic); 425 426 lp->d_crc = crc32(&lp->d_magic, lpcrcsize); 427 res.lab64 = lp; 428 return (res); 429 } 430 431 /* 432 * Create a virgin disklabel64 suitable for writing to the media. 433 * 434 * disklabel64 always reserves 32KB for a boot area and leaves room 435 * for up to RESPARTITIONS64 partitions. 436 */ 437 static void 438 l64_makevirginlabel(disklabel_t lpx, struct diskslices *ssp, 439 struct diskslice *sp, struct disk_info *info) 440 { 441 struct disklabel64 *lp = lpx.lab64; 442 struct partition64 *pp; 443 uint32_t blksize; 444 uint32_t ressize; 445 uint64_t blkmask; /* 64 bits so we can ~ */ 446 uint64_t doffset; 447 size_t lpcrcsize; 448 449 doffset = sp->ds_offset * info->d_media_blksize; 450 451 /* 452 * Setup the initial label. Use of a block size of at least 4KB 453 * for calculating the initial reserved areas to allow some degree 454 * of portability between media with different sector sizes. 455 * 456 * Note that the modified blksize is stored in d_align as a hint 457 * to the disklabeling program. 458 */ 459 bzero(lp, sizeof(*lp)); 460 if ((blksize = info->d_media_blksize) < 4096) 461 blksize = 4096; 462 blkmask = blksize - 1; 463 464 if (sp) 465 lp->d_total_size = (uint64_t)sp->ds_size * ssp->dss_secsize; 466 else 467 lp->d_total_size = info->d_media_blocks * info->d_media_blksize; 468 469 lp->d_magic = DISKMAGIC64; 470 lp->d_align = blksize; 471 lp->d_npartitions = MAXPARTITIONS64; 472 kern_uuidgen(&lp->d_stor_uuid, 1); 473 474 ressize = offsetof(struct disklabel64, d_partitions[RESPARTITIONS64]); 475 ressize = (ressize + (uint32_t)blkmask) & ~blkmask; 476 477 /* Reserve space for the stage2 boot code */ 478 lp->d_bbase = ressize; 479 lp->d_pbase = lp->d_bbase + ((BOOT2SIZE64 + blkmask) & ~blkmask); 480 481 /* Reserve space for the backup label at the slice end */ 482 lp->d_abase = lp->d_total_size - ressize; 483 484 /* 485 * NOTE: The pbase and pstop are calculated to align to PALIGN_SIZE 486 * and adjusted with the slice offset, so the partitions are 487 * aligned relative to the start of the physical disk. 488 */ 489 lp->d_pbase = ((doffset + lp->d_pbase + PALIGN_MASK) & 490 ~(uint64_t)PALIGN_MASK) - doffset; 491 lp->d_pstop = ((lp->d_abase - lp->d_pbase) & 492 ~(uint64_t)PALIGN_MASK) + lp->d_pbase; 493 494 /* 495 * All partitions are left empty unless DSO_COMPATPARTA is set 496 */ 497 498 if (info->d_dsflags & DSO_COMPATPARTA) { 499 pp = &lp->d_partitions[0]; 500 pp->p_boffset = lp->d_pbase; 501 pp->p_bsize = lp->d_pstop - lp->d_pbase; 502 /* XXX SET FS TYPE */ 503 } 504 505 lpcrcsize = offsetof(struct disklabel64, 506 d_partitions[lp->d_npartitions]) - 507 offsetof(struct disklabel64, d_magic); 508 lp->d_crc = crc32(&lp->d_magic, lpcrcsize); 509 } 510 511 /* 512 * Set the number of blocks at the beginning of the slice which have 513 * been reserved for label operations. This area will be write-protected 514 * when accessed via the slice. 515 * 516 * For now just protect the label area proper. Do not protect the 517 * boot area. Note partitions in 64 bit disklabels do not overlap 518 * the disklabel or boot area. 519 */ 520 static void 521 l64_adjust_label_reserved(struct diskslices *ssp, int slice, 522 struct diskslice *sp) 523 { 524 struct disklabel64 *lp = sp->ds_label.lab64; 525 526 sp->ds_reserved = lp->d_bbase / ssp->dss_secsize; 527 } 528 529 struct disklabel_ops disklabel64_ops = { 530 .labelsize = sizeof(struct disklabel64), 531 .op_readdisklabel = l64_readdisklabel, 532 .op_setdisklabel = l64_setdisklabel, 533 .op_writedisklabel = l64_writedisklabel, 534 .op_clone_label = l64_clone_label, 535 .op_adjust_label_reserved = l64_adjust_label_reserved, 536 .op_getpartbounds = l64_getpartbounds, 537 .op_loadpartinfo = l64_loadpartinfo, 538 .op_getnumparts = l64_getnumparts, 539 .op_getpackname = l64_getpackname, 540 .op_makevirginlabel = l64_makevirginlabel, 541 .op_freedisklabel = l64_freedisklabel 542 }; 543 544