1 /* 2 * Copyright (c) 2007 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * $DragonFly: src/sys/kern/subr_disklabel64.c,v 1.5 2007/07/20 17:21:51 dillon Exp $ 35 */ 36 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/kernel.h> 40 #include <sys/conf.h> 41 #include <sys/disklabel.h> 42 #include <sys/disklabel64.h> 43 #include <sys/diskslice.h> 44 #include <sys/disk.h> 45 #include <sys/kern_syscall.h> 46 #include <sys/buf2.h> 47 48 /* 49 * Alignment against physical start (verses slice start). We use a megabyte 50 * here. Why do we use a megabyte? Because SSDs already use large 128K 51 * blocks internally (for MLC) and who the hell knows in the future. 52 * 53 * This way if the sysop picks sane values for partition sizes everything 54 * will be nicely aligned, particularly swap for e.g. swapcache, and 55 * clustered operations against larger physical sector sizes for newer HDs, 56 * and so forth. 57 */ 58 #define PALIGN_SIZE (1024 * 1024) 59 #define PALIGN_MASK (PALIGN_SIZE - 1) 60 61 /* 62 * Retrieve the partition start and extent, in blocks. Return 0 on success, 63 * EINVAL on error. 64 */ 65 static int 66 l64_getpartbounds(struct diskslices *ssp, disklabel_t lp, u_int32_t part, 67 u_int64_t *start, u_int64_t *blocks) 68 { 69 struct partition64 *pp; 70 71 if (part >= lp.lab64->d_npartitions) 72 return (EINVAL); 73 74 pp = &lp.lab64->d_partitions[part]; 75 76 if ((pp->p_boffset & (ssp->dss_secsize - 1)) || 77 (pp->p_bsize & (ssp->dss_secsize - 1))) { 78 return (EINVAL); 79 } 80 *start = pp->p_boffset / ssp->dss_secsize; 81 *blocks = pp->p_bsize / ssp->dss_secsize; 82 return(0); 83 } 84 85 /* 86 * Get the filesystem type XXX - diskslices code needs to use uuids 87 */ 88 static void 89 l64_loadpartinfo(disklabel_t lp, u_int32_t part, struct partinfo *dpart) 90 { 91 struct partition64 *pp; 92 const size_t uuid_size = sizeof(struct uuid); 93 94 if (part < lp.lab64->d_npartitions) { 95 pp = &lp.lab64->d_partitions[part]; 96 dpart->fstype_uuid = pp->p_type_uuid; 97 dpart->storage_uuid = pp->p_stor_uuid; 98 dpart->fstype = pp->p_fstype; 99 } else { 100 bzero(&dpart->fstype_uuid, uuid_size); 101 bzero(&dpart->storage_uuid, uuid_size); 102 dpart->fstype = 0; 103 } 104 } 105 106 /* 107 * Get the number of partitions 108 */ 109 static u_int32_t 110 l64_getnumparts(disklabel_t lp) 111 { 112 return(lp.lab64->d_npartitions); 113 } 114 115 /* 116 * Attempt to read a disk label from a device. 64 bit disklabels are 117 * sector-agnostic and begin at offset 0 on the device. 64 bit disklabels 118 * may only be used with GPT partitioning schemes. 119 * 120 * Returns NULL on sucess, and an error string on failure. 121 */ 122 static const char * 123 l64_readdisklabel(cdev_t dev, struct diskslice *sp, disklabel_t *lpp, 124 struct disk_info *info) 125 { 126 struct buf *bp; 127 struct disklabel64 *dlp; 128 const char *msg; 129 uint32_t savecrc; 130 size_t dlpcrcsize; 131 size_t bpsize; 132 int secsize; 133 134 /* 135 * XXX I/O size is subject to device DMA limitations 136 */ 137 secsize = info->d_media_blksize; 138 bpsize = (sizeof(*dlp) + secsize - 1) & ~(secsize - 1); 139 140 bp = geteblk(bpsize); 141 bp->b_bio1.bio_offset = 0; 142 bp->b_bio1.bio_done = biodone_sync; 143 bp->b_bio1.bio_flags |= BIO_SYNC; 144 bp->b_bcount = bpsize; 145 bp->b_flags &= ~B_INVAL; 146 bp->b_cmd = BUF_CMD_READ; 147 dev_dstrategy(dev, &bp->b_bio1); 148 149 if (biowait(&bp->b_bio1, "labrd")) { 150 msg = "I/O error"; 151 } else { 152 dlp = (struct disklabel64 *)bp->b_data; 153 dlpcrcsize = offsetof(struct disklabel64, 154 d_partitions[dlp->d_npartitions]) - 155 offsetof(struct disklabel64, d_magic); 156 savecrc = dlp->d_crc; 157 dlp->d_crc = 0; 158 if (dlp->d_magic != DISKMAGIC64) { 159 msg = "no disk label"; 160 } else if (dlp->d_npartitions > MAXPARTITIONS64) { 161 msg = "disklabel64 corrupted, too many partitions"; 162 } else if (savecrc != crc32(&dlp->d_magic, dlpcrcsize)) { 163 msg = "disklabel64 corrupted, bad CRC"; 164 } else { 165 dlp->d_crc = savecrc; 166 (*lpp).lab64 = kmalloc(sizeof(*dlp), 167 M_DEVBUF, M_WAITOK|M_ZERO); 168 *(*lpp).lab64 = *dlp; 169 msg = NULL; 170 } 171 } 172 bp->b_flags |= B_INVAL | B_AGE; 173 brelse(bp); 174 return (msg); 175 } 176 177 /* 178 * If everything is good, copy olpx to nlpx. Check to see if any 179 * open partitions would change. 180 */ 181 static int 182 l64_setdisklabel(disklabel_t olpx, disklabel_t nlpx, struct diskslices *ssp, 183 struct diskslice *sp, u_int32_t *openmask) 184 { 185 struct disklabel64 *olp, *nlp; 186 struct partition64 *opp, *npp; 187 uint32_t savecrc; 188 uint64_t slicebsize; 189 size_t nlpcrcsize; 190 int i; 191 192 olp = olpx.lab64; 193 nlp = nlpx.lab64; 194 195 slicebsize = (uint64_t)sp->ds_size * ssp->dss_secsize; 196 197 if (nlp->d_magic != DISKMAGIC64) 198 return (EINVAL); 199 if (nlp->d_npartitions > MAXPARTITIONS64) 200 return (EINVAL); 201 savecrc = nlp->d_crc; 202 nlp->d_crc = 0; 203 nlpcrcsize = offsetof(struct disklabel64, 204 d_partitions[nlp->d_npartitions]) - 205 offsetof(struct disklabel64, d_magic); 206 if (crc32(&nlp->d_magic, nlpcrcsize) != savecrc) { 207 nlp->d_crc = savecrc; 208 return (EINVAL); 209 } 210 nlp->d_crc = savecrc; 211 212 /* 213 * Check if open partitions have changed 214 */ 215 i = 0; 216 while (i < MAXPARTITIONS64) { 217 if (openmask[i >> 5] == 0) { 218 i += 32; 219 continue; 220 } 221 if ((openmask[i >> 5] & (1 << (i & 31))) == 0) { 222 ++i; 223 continue; 224 } 225 if (nlp->d_npartitions <= i) 226 return (EBUSY); 227 opp = &olp->d_partitions[i]; 228 npp = &nlp->d_partitions[i]; 229 if (npp->p_boffset != opp->p_boffset || 230 npp->p_bsize < opp->p_bsize) { 231 return (EBUSY); 232 } 233 234 /* 235 * Do not allow p_type_uuid or p_stor_uuid to change if 236 * the partition is currently open. 237 */ 238 if (bcmp(&npp->p_type_uuid, &opp->p_type_uuid, 239 sizeof(npp->p_type_uuid)) != 0) { 240 return (EBUSY); 241 } 242 if (bcmp(&npp->p_stor_uuid, &opp->p_stor_uuid, 243 sizeof(npp->p_stor_uuid)) != 0) { 244 return (EBUSY); 245 } 246 ++i; 247 } 248 249 /* 250 * Make sure the label and partition offsets and sizes are sane. 251 */ 252 if (nlp->d_total_size > slicebsize) 253 return (ENOSPC); 254 if (nlp->d_total_size & (ssp->dss_secsize - 1)) 255 return (EINVAL); 256 if (nlp->d_bbase & (ssp->dss_secsize - 1)) 257 return (EINVAL); 258 if (nlp->d_pbase & (ssp->dss_secsize - 1)) 259 return (EINVAL); 260 if (nlp->d_pstop & (ssp->dss_secsize - 1)) 261 return (EINVAL); 262 if (nlp->d_abase & (ssp->dss_secsize - 1)) 263 return (EINVAL); 264 265 for (i = 0; i < nlp->d_npartitions; ++i) { 266 npp = &nlp->d_partitions[i]; 267 if (npp->p_bsize == 0) { 268 if (npp->p_boffset != 0) 269 return (EINVAL); 270 continue; 271 } 272 if (npp->p_boffset & (ssp->dss_secsize - 1)) 273 return (EINVAL); 274 if (npp->p_bsize & (ssp->dss_secsize - 1)) 275 return (EINVAL); 276 if (npp->p_boffset < nlp->d_pbase) 277 return (ENOSPC); 278 if (npp->p_boffset + npp->p_bsize > nlp->d_total_size) 279 return (ENOSPC); 280 } 281 282 /* 283 * Structurally we may add code to make modifications above in the 284 * future, so regenerate the crc anyway. 285 */ 286 nlp->d_crc = 0; 287 nlp->d_crc = crc32(&nlp->d_magic, nlpcrcsize); 288 *olp = *nlp; 289 290 return (0); 291 } 292 293 /* 294 * Write disk label back to device after modification. 295 */ 296 static int 297 l64_writedisklabel(cdev_t dev, struct diskslices *ssp, 298 struct diskslice *sp, disklabel_t lpx) 299 { 300 struct disklabel64 *lp; 301 struct disklabel64 *dlp; 302 struct buf *bp; 303 int error = 0; 304 size_t bpsize; 305 int secsize; 306 307 lp = lpx.lab64; 308 309 /* 310 * XXX I/O size is subject to device DMA limitations 311 */ 312 secsize = ssp->dss_secsize; 313 bpsize = (sizeof(*lp) + secsize - 1) & ~(secsize - 1); 314 315 bp = geteblk(bpsize); 316 bp->b_bio1.bio_offset = 0; 317 bp->b_bio1.bio_done = biodone_sync; 318 bp->b_bio1.bio_flags |= BIO_SYNC; 319 bp->b_bcount = bpsize; 320 321 /* 322 * Because our I/O is larger then the label, and because we do not 323 * write the d_reserved0[] area, do a read-modify-write. 324 */ 325 bp->b_flags &= ~B_INVAL; 326 bp->b_cmd = BUF_CMD_READ; 327 KKASSERT(dkpart(dev) == WHOLE_SLICE_PART); 328 dev_dstrategy(dev, &bp->b_bio1); 329 error = biowait(&bp->b_bio1, "labrd"); 330 if (error) 331 goto done; 332 333 dlp = (void *)bp->b_data; 334 bcopy(&lp->d_magic, &dlp->d_magic, 335 sizeof(*lp) - offsetof(struct disklabel64, d_magic)); 336 bp->b_cmd = BUF_CMD_WRITE; 337 bp->b_bio1.bio_done = biodone_sync; 338 bp->b_bio1.bio_flags |= BIO_SYNC; 339 KKASSERT(dkpart(dev) == WHOLE_SLICE_PART); 340 dev_dstrategy(dev, &bp->b_bio1); 341 error = biowait(&bp->b_bio1, "labwr"); 342 done: 343 bp->b_flags |= B_INVAL | B_AGE; 344 brelse(bp); 345 return (error); 346 } 347 348 /* 349 * Create a disklabel based on a disk_info structure for the purposes of 350 * DSO_COMPATLABEL - cases where no real label exists on the storage medium. 351 * 352 * If a diskslice is passed, the label is truncated to the slice. 353 * 354 * NOTE! This is not a legal label because d_bbase and d_pbase are both 355 * set to 0. 356 */ 357 static disklabel_t 358 l64_clone_label(struct disk_info *info, struct diskslice *sp) 359 { 360 struct disklabel64 *lp; 361 disklabel_t res; 362 uint32_t blksize = info->d_media_blksize; 363 size_t lpcrcsize; 364 365 lp = kmalloc(sizeof *lp, M_DEVBUF, M_WAITOK | M_ZERO); 366 367 if (sp) 368 lp->d_total_size = (uint64_t)sp->ds_size * blksize; 369 else 370 lp->d_total_size = info->d_media_blocks * blksize; 371 372 lp->d_magic = DISKMAGIC64; 373 lp->d_align = blksize; 374 lp->d_npartitions = MAXPARTITIONS64; 375 lp->d_pstop = lp->d_total_size; 376 377 /* 378 * Create a dummy 'c' part and a dummy 'a' part (if requested). 379 * Note that the 'c' part is really a hack. 64 bit disklabels 380 * do not use 'c' to mean the raw partition. 381 */ 382 383 lp->d_partitions[2].p_boffset = 0; 384 lp->d_partitions[2].p_bsize = lp->d_total_size; 385 /* XXX SET FS TYPE */ 386 387 if (info->d_dsflags & DSO_COMPATPARTA) { 388 lp->d_partitions[0].p_boffset = 0; 389 lp->d_partitions[0].p_bsize = lp->d_total_size; 390 /* XXX SET FS TYPE */ 391 } 392 393 lpcrcsize = offsetof(struct disklabel64, 394 d_partitions[lp->d_npartitions]) - 395 offsetof(struct disklabel64, d_magic); 396 397 lp->d_crc = crc32(&lp->d_magic, lpcrcsize); 398 res.lab64 = lp; 399 return (res); 400 } 401 402 /* 403 * Create a virgin disklabel64 suitable for writing to the media. 404 * 405 * disklabel64 always reserves 32KB for a boot area and leaves room 406 * for up to RESPARTITIONS64 partitions. 407 */ 408 static void 409 l64_makevirginlabel(disklabel_t lpx, struct diskslices *ssp, 410 struct diskslice *sp, struct disk_info *info) 411 { 412 struct disklabel64 *lp = lpx.lab64; 413 struct partition64 *pp; 414 uint32_t blksize; 415 uint32_t ressize; 416 uint64_t blkmask; /* 64 bits so we can ~ */ 417 size_t lpcrcsize; 418 419 /* 420 * Setup the initial label. Use of a block size of at least 4KB 421 * for calculating the initial reserved areas to allow some degree 422 * of portability between media with different sector sizes. 423 * 424 * Note that the modified blksize is stored in d_align as a hint 425 * to the disklabeling program. 426 */ 427 bzero(lp, sizeof(*lp)); 428 if ((blksize = info->d_media_blksize) < 4096) 429 blksize = 4096; 430 blkmask = blksize - 1; 431 432 if (sp) 433 lp->d_total_size = (uint64_t)sp->ds_size * ssp->dss_secsize; 434 else 435 lp->d_total_size = info->d_media_blocks * info->d_media_blksize; 436 437 lp->d_magic = DISKMAGIC64; 438 lp->d_align = blksize; 439 lp->d_npartitions = MAXPARTITIONS64; 440 kern_uuidgen(&lp->d_stor_uuid, 1); 441 442 ressize = offsetof(struct disklabel64, d_partitions[RESPARTITIONS64]); 443 ressize = (ressize + (uint32_t)blkmask) & ~blkmask; 444 445 /* 446 * NOTE: When calculating pbase take into account the slice offset 447 * so the partitions are at least 32K-aligned relative to the 448 * start of the physical disk. This will accomodate efficient 449 * access to 4096 byte physical sector drives. 450 */ 451 lp->d_bbase = ressize; 452 lp->d_pbase = lp->d_bbase + ((32768 + blkmask) & ~blkmask); 453 lp->d_pbase = (lp->d_pbase + PALIGN_MASK) & ~(uint64_t)PALIGN_MASK; 454 455 /* adjust for slice offset so we are physically aligned */ 456 lp->d_pbase += 32768 - (sp->ds_offset * info->d_media_blksize) % 32768; 457 458 lp->d_pstop = (lp->d_total_size - lp->d_bbase) & ~blkmask; 459 lp->d_abase = lp->d_pstop; 460 461 /* 462 * All partitions are left empty unless DSO_COMPATPARTA is set 463 */ 464 465 if (info->d_dsflags & DSO_COMPATPARTA) { 466 pp = &lp->d_partitions[0]; 467 pp->p_boffset = lp->d_pbase; 468 pp->p_bsize = lp->d_pstop - lp->d_pbase; 469 /* XXX SET FS TYPE */ 470 } 471 472 lpcrcsize = offsetof(struct disklabel64, 473 d_partitions[lp->d_npartitions]) - 474 offsetof(struct disklabel64, d_magic); 475 lp->d_crc = crc32(&lp->d_magic, lpcrcsize); 476 } 477 478 /* 479 * Set the number of blocks at the beginning of the slice which have 480 * been reserved for label operations. This area will be write-protected 481 * when accessed via the slice. 482 * 483 * For now just protect the label area proper. Do not protect the 484 * boot area. Note partitions in 64 bit disklabels do not overlap 485 * the disklabel or boot area. 486 */ 487 static void 488 l64_adjust_label_reserved(struct diskslices *ssp, int slice, 489 struct diskslice *sp) 490 { 491 struct disklabel64 *lp = sp->ds_label.lab64; 492 493 sp->ds_reserved = lp->d_bbase / ssp->dss_secsize; 494 } 495 496 struct disklabel_ops disklabel64_ops = { 497 .labelsize = sizeof(struct disklabel64), 498 .op_readdisklabel = l64_readdisklabel, 499 .op_setdisklabel = l64_setdisklabel, 500 .op_writedisklabel = l64_writedisklabel, 501 .op_clone_label = l64_clone_label, 502 .op_adjust_label_reserved = l64_adjust_label_reserved, 503 .op_getpartbounds = l64_getpartbounds, 504 .op_loadpartinfo = l64_loadpartinfo, 505 .op_getnumparts = l64_getnumparts, 506 .op_makevirginlabel = l64_makevirginlabel 507 }; 508 509