1 /* 2 * Copyright (c) 2007 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/kernel.h> 38 #include <sys/conf.h> 39 #include <sys/disklabel.h> 40 #include <sys/disklabel64.h> 41 #include <sys/diskslice.h> 42 #include <sys/disk.h> 43 #include <sys/kern_syscall.h> 44 #include <sys/buf2.h> 45 46 /* 47 * Alignment against physical start (verses slice start). We use a megabyte 48 * here. Why do we use a megabyte? Because SSDs already use large 128K 49 * blocks internally (for MLC) and who the hell knows in the future. 50 * 51 * This way if the sysop picks sane values for partition sizes everything 52 * will be nicely aligned, particularly swap for e.g. swapcache, and 53 * clustered operations against larger physical sector sizes for newer HDs, 54 * and so forth. 55 */ 56 #define PALIGN_SIZE (1024 * 1024) 57 #define PALIGN_MASK (PALIGN_SIZE - 1) 58 59 /* 60 * Retrieve the partition start and extent, in blocks. Return 0 on success, 61 * EINVAL on error. 62 */ 63 static int 64 l64_getpartbounds(struct diskslices *ssp, disklabel_t lp, u_int32_t part, 65 u_int64_t *start, u_int64_t *blocks) 66 { 67 struct partition64 *pp; 68 69 if (part >= lp.lab64->d_npartitions) 70 return (EINVAL); 71 72 pp = &lp.lab64->d_partitions[part]; 73 74 if ((pp->p_boffset & (ssp->dss_secsize - 1)) || 75 (pp->p_bsize & (ssp->dss_secsize - 1))) { 76 return (EINVAL); 77 } 78 *start = pp->p_boffset / ssp->dss_secsize; 79 *blocks = pp->p_bsize / ssp->dss_secsize; 80 return(0); 81 } 82 83 /* 84 * Get the filesystem type XXX - diskslices code needs to use uuids 85 */ 86 static void 87 l64_loadpartinfo(disklabel_t lp, u_int32_t part, struct partinfo *dpart) 88 { 89 struct partition64 *pp; 90 const size_t uuid_size = sizeof(struct uuid); 91 92 if (part < lp.lab64->d_npartitions) { 93 pp = &lp.lab64->d_partitions[part]; 94 dpart->fstype_uuid = pp->p_type_uuid; 95 dpart->storage_uuid = pp->p_stor_uuid; 96 dpart->fstype = pp->p_fstype; 97 } else { 98 bzero(&dpart->fstype_uuid, uuid_size); 99 bzero(&dpart->storage_uuid, uuid_size); 100 dpart->fstype = 0; 101 } 102 } 103 104 /* 105 * Get the number of partitions 106 */ 107 static u_int32_t 108 l64_getnumparts(disklabel_t lp) 109 { 110 return(lp.lab64->d_npartitions); 111 } 112 113 static void 114 l64_freedisklabel(disklabel_t *lpp) 115 { 116 kfree((*lpp).lab64, M_DEVBUF); 117 (*lpp).lab64 = NULL; 118 } 119 120 /* 121 * Attempt to read a disk label from a device. 64 bit disklabels are 122 * sector-agnostic and begin at offset 0 on the device. 123 * 124 * Returns NULL on sucess, and an error string on failure. 125 */ 126 static const char * 127 l64_readdisklabel(cdev_t dev, struct diskslice *sp, disklabel_t *lpp, 128 struct disk_info *info) 129 { 130 struct buf *bp; 131 struct disklabel64 *dlp; 132 const char *msg; 133 uint32_t savecrc; 134 size_t dlpcrcsize; 135 size_t bpsize; 136 int secsize; 137 138 /* 139 * XXX I/O size is subject to device DMA limitations 140 */ 141 secsize = info->d_media_blksize; 142 bpsize = roundup2(sizeof(*dlp), secsize); 143 144 bp = getpbuf_mem(NULL); 145 KKASSERT(bpsize <= bp->b_bufsize); 146 bp->b_bio1.bio_offset = 0; 147 bp->b_bio1.bio_done = biodone_sync; 148 bp->b_bio1.bio_flags |= BIO_SYNC; 149 bp->b_bcount = bpsize; 150 bp->b_flags &= ~B_INVAL; 151 bp->b_flags |= B_FAILONDIS; 152 bp->b_cmd = BUF_CMD_READ; 153 dev_dstrategy(dev, &bp->b_bio1); 154 155 if (biowait(&bp->b_bio1, "labrd")) { 156 msg = "I/O error"; 157 } else { 158 dlp = (struct disklabel64 *)bp->b_data; 159 dlpcrcsize = offsetof(struct disklabel64, 160 d_partitions[dlp->d_npartitions]) - 161 offsetof(struct disklabel64, d_magic); 162 savecrc = dlp->d_crc; 163 dlp->d_crc = 0; 164 if (dlp->d_magic != DISKMAGIC64) { 165 msg = "no disk label"; 166 } else if (dlp->d_npartitions > MAXPARTITIONS64) { 167 msg = "disklabel64 corrupted, too many partitions"; 168 } else if (savecrc != crc32(&dlp->d_magic, dlpcrcsize)) { 169 msg = "disklabel64 corrupted, bad CRC"; 170 } else { 171 dlp->d_crc = savecrc; 172 (*lpp).lab64 = kmalloc(sizeof(*dlp), 173 M_DEVBUF, M_WAITOK|M_ZERO); 174 *(*lpp).lab64 = *dlp; 175 msg = NULL; 176 } 177 } 178 bp->b_flags |= B_INVAL | B_AGE; 179 relpbuf(bp, NULL); 180 181 return (msg); 182 } 183 184 /* 185 * If everything is good, copy olpx to nlpx. Check to see if any 186 * open partitions would change. 187 */ 188 static int 189 l64_setdisklabel(disklabel_t olpx, disklabel_t nlpx, struct diskslices *ssp, 190 struct diskslice *sp, u_int32_t *openmask) 191 { 192 struct disklabel64 *olp, *nlp; 193 struct partition64 *opp, *npp; 194 uint32_t savecrc; 195 uint64_t slicebsize; 196 size_t nlpcrcsize; 197 int i; 198 199 olp = olpx.lab64; 200 nlp = nlpx.lab64; 201 202 slicebsize = (uint64_t)sp->ds_size * ssp->dss_secsize; 203 204 if (nlp->d_magic != DISKMAGIC64) 205 return (EINVAL); 206 if (nlp->d_npartitions > MAXPARTITIONS64) 207 return (EINVAL); 208 savecrc = nlp->d_crc; 209 nlp->d_crc = 0; 210 nlpcrcsize = offsetof(struct disklabel64, 211 d_partitions[nlp->d_npartitions]) - 212 offsetof(struct disklabel64, d_magic); 213 if (crc32(&nlp->d_magic, nlpcrcsize) != savecrc) { 214 nlp->d_crc = savecrc; 215 return (EINVAL); 216 } 217 nlp->d_crc = savecrc; 218 219 /* 220 * Check if open partitions have changed 221 */ 222 i = 0; 223 while (i < MAXPARTITIONS64) { 224 if (openmask[i >> 5] == 0) { 225 i += 32; 226 continue; 227 } 228 if ((openmask[i >> 5] & (1 << (i & 31))) == 0) { 229 ++i; 230 continue; 231 } 232 if (nlp->d_npartitions <= i) 233 return (EBUSY); 234 opp = &olp->d_partitions[i]; 235 npp = &nlp->d_partitions[i]; 236 if (npp->p_boffset != opp->p_boffset || 237 npp->p_bsize < opp->p_bsize) { 238 return (EBUSY); 239 } 240 241 /* 242 * Do not allow p_type_uuid or p_stor_uuid to change if 243 * the partition is currently open. 244 */ 245 if (bcmp(&npp->p_type_uuid, &opp->p_type_uuid, 246 sizeof(npp->p_type_uuid)) != 0) { 247 return (EBUSY); 248 } 249 if (bcmp(&npp->p_stor_uuid, &opp->p_stor_uuid, 250 sizeof(npp->p_stor_uuid)) != 0) { 251 return (EBUSY); 252 } 253 ++i; 254 } 255 256 /* 257 * Make sure the label and partition offsets and sizes are sane. 258 */ 259 if (nlp->d_total_size > slicebsize) 260 return (ENOSPC); 261 if (nlp->d_total_size & (ssp->dss_secsize - 1)) 262 return (EINVAL); 263 if (nlp->d_bbase & (ssp->dss_secsize - 1)) 264 return (EINVAL); 265 if (nlp->d_pbase & (ssp->dss_secsize - 1)) 266 return (EINVAL); 267 if (nlp->d_pstop & (ssp->dss_secsize - 1)) 268 return (EINVAL); 269 if (nlp->d_abase & (ssp->dss_secsize - 1)) 270 return (EINVAL); 271 272 for (i = 0; i < nlp->d_npartitions; ++i) { 273 npp = &nlp->d_partitions[i]; 274 if (npp->p_bsize == 0) { 275 if (npp->p_boffset != 0) 276 return (EINVAL); 277 continue; 278 } 279 if (npp->p_boffset & (ssp->dss_secsize - 1)) 280 return (EINVAL); 281 if (npp->p_bsize & (ssp->dss_secsize - 1)) 282 return (EINVAL); 283 if (npp->p_boffset < nlp->d_pbase) 284 return (ENOSPC); 285 if (npp->p_boffset + npp->p_bsize > nlp->d_total_size) 286 return (ENOSPC); 287 } 288 289 /* 290 * Structurally we may add code to make modifications above in the 291 * future, so regenerate the crc anyway. 292 */ 293 nlp->d_crc = 0; 294 nlp->d_crc = crc32(&nlp->d_magic, nlpcrcsize); 295 *olp = *nlp; 296 297 return (0); 298 } 299 300 /* 301 * Write disk label back to device after modification. 302 */ 303 static int 304 l64_writedisklabel(cdev_t dev, struct diskslices *ssp, 305 struct diskslice *sp, disklabel_t lpx) 306 { 307 struct disklabel64 *lp; 308 struct disklabel64 *dlp; 309 struct buf *bp; 310 int error = 0; 311 size_t bpsize; 312 int secsize; 313 314 lp = lpx.lab64; 315 316 /* 317 * XXX I/O size is subject to device DMA limitations 318 */ 319 secsize = ssp->dss_secsize; 320 bpsize = roundup2(sizeof(*lp), secsize); 321 322 bp = getpbuf_mem(NULL); 323 KKASSERT(bpsize <= bp->b_bufsize); 324 bp->b_bio1.bio_offset = 0; 325 bp->b_bio1.bio_done = biodone_sync; 326 bp->b_bio1.bio_flags |= BIO_SYNC; 327 bp->b_bcount = bpsize; 328 bp->b_flags |= B_FAILONDIS; 329 330 /* 331 * Because our I/O is larger then the label, and because we do not 332 * write the d_reserved0[] area, do a read-modify-write. 333 */ 334 bp->b_flags &= ~B_INVAL; 335 bp->b_cmd = BUF_CMD_READ; 336 KKASSERT(dkpart(dev) == WHOLE_SLICE_PART); 337 dev_dstrategy(dev, &bp->b_bio1); 338 error = biowait(&bp->b_bio1, "labrd"); 339 if (error) 340 goto done; 341 342 dlp = (void *)bp->b_data; 343 bcopy(&lp->d_magic, &dlp->d_magic, 344 sizeof(*lp) - offsetof(struct disklabel64, d_magic)); 345 bp->b_cmd = BUF_CMD_WRITE; 346 bp->b_bio1.bio_done = biodone_sync; 347 bp->b_bio1.bio_flags |= BIO_SYNC; 348 KKASSERT(dkpart(dev) == WHOLE_SLICE_PART); 349 dev_dstrategy(dev, &bp->b_bio1); 350 error = biowait(&bp->b_bio1, "labwr"); 351 done: 352 bp->b_flags |= B_INVAL | B_AGE; 353 relpbuf(bp, NULL); 354 355 return (error); 356 } 357 358 /* 359 * Create a disklabel based on a disk_info structure for the purposes of 360 * DSO_COMPATLABEL - cases where no real label exists on the storage medium. 361 * 362 * If a diskslice is passed, the label is truncated to the slice. 363 * 364 * NOTE! This is not a legal label because d_bbase and d_pbase are both 365 * set to 0. 366 */ 367 static disklabel_t 368 l64_clone_label(struct disk_info *info, struct diskslice *sp) 369 { 370 struct disklabel64 *lp; 371 disklabel_t res; 372 uint32_t blksize = info->d_media_blksize; 373 size_t lpcrcsize; 374 375 lp = kmalloc(sizeof *lp, M_DEVBUF, M_WAITOK | M_ZERO); 376 377 if (sp) 378 lp->d_total_size = (uint64_t)sp->ds_size * blksize; 379 else 380 lp->d_total_size = info->d_media_blocks * blksize; 381 382 lp->d_magic = DISKMAGIC64; 383 lp->d_align = blksize; 384 lp->d_npartitions = MAXPARTITIONS64; 385 lp->d_pstop = lp->d_total_size; 386 387 /* 388 * Create a dummy 'c' part and a dummy 'a' part (if requested). 389 * Note that the 'c' part is really a hack. 64 bit disklabels 390 * do not use 'c' to mean the raw partition. 391 */ 392 393 lp->d_partitions[2].p_boffset = 0; 394 lp->d_partitions[2].p_bsize = lp->d_total_size; 395 /* XXX SET FS TYPE */ 396 397 if (info->d_dsflags & DSO_COMPATPARTA) { 398 lp->d_partitions[0].p_boffset = 0; 399 lp->d_partitions[0].p_bsize = lp->d_total_size; 400 /* XXX SET FS TYPE */ 401 } 402 403 lpcrcsize = offsetof(struct disklabel64, 404 d_partitions[lp->d_npartitions]) - 405 offsetof(struct disklabel64, d_magic); 406 407 lp->d_crc = crc32(&lp->d_magic, lpcrcsize); 408 res.lab64 = lp; 409 return (res); 410 } 411 412 /* 413 * Create a virgin disklabel64 suitable for writing to the media. 414 * 415 * disklabel64 always reserves 32KB for a boot area and leaves room 416 * for up to RESPARTITIONS64 partitions. 417 */ 418 static void 419 l64_makevirginlabel(disklabel_t lpx, struct diskslices *ssp, 420 struct diskslice *sp, struct disk_info *info) 421 { 422 struct disklabel64 *lp = lpx.lab64; 423 struct partition64 *pp; 424 uint32_t blksize; 425 uint32_t ressize; 426 uint64_t blkmask; /* 64 bits so we can ~ */ 427 uint64_t doffset; 428 size_t lpcrcsize; 429 430 doffset = sp->ds_offset * info->d_media_blksize; 431 432 /* 433 * Setup the initial label. Use of a block size of at least 4KB 434 * for calculating the initial reserved areas to allow some degree 435 * of portability between media with different sector sizes. 436 * 437 * Note that the modified blksize is stored in d_align as a hint 438 * to the disklabeling program. 439 */ 440 bzero(lp, sizeof(*lp)); 441 if ((blksize = info->d_media_blksize) < 4096) 442 blksize = 4096; 443 blkmask = blksize - 1; 444 445 if (sp) 446 lp->d_total_size = (uint64_t)sp->ds_size * ssp->dss_secsize; 447 else 448 lp->d_total_size = info->d_media_blocks * info->d_media_blksize; 449 450 lp->d_magic = DISKMAGIC64; 451 lp->d_align = blksize; 452 lp->d_npartitions = MAXPARTITIONS64; 453 kern_uuidgen(&lp->d_stor_uuid, 1); 454 455 ressize = offsetof(struct disklabel64, d_partitions[RESPARTITIONS64]); 456 ressize = (ressize + (uint32_t)blkmask) & ~blkmask; 457 458 /* Reserve space for the stage2 boot code */ 459 lp->d_bbase = ressize; 460 lp->d_pbase = lp->d_bbase + ((BOOT2SIZE64 + blkmask) & ~blkmask); 461 462 /* Reserve space for the backup label at the slice end */ 463 lp->d_abase = lp->d_total_size - ressize; 464 465 /* 466 * NOTE: The pbase and pstop are calculated to align to PALIGN_SIZE 467 * and adjusted with the slice offset, so the partitions are 468 * aligned relative to the start of the physical disk. 469 */ 470 lp->d_pbase = ((doffset + lp->d_pbase + PALIGN_MASK) & 471 ~(uint64_t)PALIGN_MASK) - doffset; 472 lp->d_pstop = ((lp->d_abase - lp->d_pbase) & 473 ~(uint64_t)PALIGN_MASK) + lp->d_pbase; 474 475 /* 476 * All partitions are left empty unless DSO_COMPATPARTA is set 477 */ 478 479 if (info->d_dsflags & DSO_COMPATPARTA) { 480 pp = &lp->d_partitions[0]; 481 pp->p_boffset = lp->d_pbase; 482 pp->p_bsize = lp->d_pstop - lp->d_pbase; 483 /* XXX SET FS TYPE */ 484 } 485 486 lpcrcsize = offsetof(struct disklabel64, 487 d_partitions[lp->d_npartitions]) - 488 offsetof(struct disklabel64, d_magic); 489 lp->d_crc = crc32(&lp->d_magic, lpcrcsize); 490 } 491 492 /* 493 * Set the number of blocks at the beginning of the slice which have 494 * been reserved for label operations. This area will be write-protected 495 * when accessed via the slice. 496 * 497 * For now just protect the label area proper. Do not protect the 498 * boot area. Note partitions in 64 bit disklabels do not overlap 499 * the disklabel or boot area. 500 */ 501 static void 502 l64_adjust_label_reserved(struct diskslices *ssp, int slice, 503 struct diskslice *sp) 504 { 505 struct disklabel64 *lp = sp->ds_label.lab64; 506 507 sp->ds_reserved = lp->d_bbase / ssp->dss_secsize; 508 } 509 510 struct disklabel_ops disklabel64_ops = { 511 .labelsize = sizeof(struct disklabel64), 512 .op_readdisklabel = l64_readdisklabel, 513 .op_setdisklabel = l64_setdisklabel, 514 .op_writedisklabel = l64_writedisklabel, 515 .op_clone_label = l64_clone_label, 516 .op_adjust_label_reserved = l64_adjust_label_reserved, 517 .op_getpartbounds = l64_getpartbounds, 518 .op_loadpartinfo = l64_loadpartinfo, 519 .op_getnumparts = l64_getnumparts, 520 .op_makevirginlabel = l64_makevirginlabel, 521 .op_freedisklabel = l64_freedisklabel 522 }; 523 524