1 /* $OpenBSD: softraid_amd64.c,v 1.7 2021/06/02 22:44:26 krw Exp $ */ 2 3 /* 4 * Copyright (c) 2012 Joel Sing <jsing@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include <sys/param.h> 20 #include <sys/queue.h> 21 #include <sys/disklabel.h> 22 #include <sys/reboot.h> 23 24 #include <dev/biovar.h> 25 #include <dev/softraidvar.h> 26 27 #include <lib/libsa/aes_xts.h> 28 #include <lib/libsa/softraid.h> 29 #include <lib/libz/zlib.h> 30 31 #include "libsa.h" 32 #include "disk.h" 33 #include "softraid_amd64.h" 34 35 static int gpt_chk_mbr(struct dos_partition *, u_int64_t); 36 static uint64_t findopenbsd_gpt(struct sr_boot_volume *, const char **); 37 38 void 39 srprobe_meta_opt_load(struct sr_metadata *sm, struct sr_meta_opt_head *som) 40 { 41 struct sr_meta_opt_hdr *omh; 42 struct sr_meta_opt_item *omi; 43 #if 0 44 u_int8_t checksum[MD5_DIGEST_LENGTH]; 45 #endif 46 int i; 47 48 /* Process optional metadata. */ 49 omh = (struct sr_meta_opt_hdr *)((u_int8_t *)(sm + 1) + 50 sizeof(struct sr_meta_chunk) * sm->ssdi.ssd_chunk_no); 51 for (i = 0; i < sm->ssdi.ssd_opt_no; i++) { 52 53 #ifdef BIOS_DEBUG 54 printf("Found optional metadata of type %u, length %u\n", 55 omh->som_type, omh->som_length); 56 #endif 57 58 /* Unsupported old fixed length optional metadata. */ 59 if (omh->som_length == 0) { 60 omh = (struct sr_meta_opt_hdr *)((void *)omh + 61 SR_OLD_META_OPT_SIZE); 62 continue; 63 } 64 65 /* Load variable length optional metadata. */ 66 omi = alloc(sizeof(struct sr_meta_opt_item)); 67 bzero(omi, sizeof(struct sr_meta_opt_item)); 68 SLIST_INSERT_HEAD(som, omi, omi_link); 69 omi->omi_som = alloc(omh->som_length); 70 bzero(omi->omi_som, omh->som_length); 71 bcopy(omh, omi->omi_som, omh->som_length); 72 73 #if 0 74 /* XXX - Validate checksum. */ 75 bcopy(&omi->omi_som->som_checksum, &checksum, 76 MD5_DIGEST_LENGTH); 77 bzero(&omi->omi_som->som_checksum, MD5_DIGEST_LENGTH); 78 sr_checksum(sc, omi->omi_som, 79 &omi->omi_som->som_checksum, omh->som_length); 80 if (bcmp(&checksum, &omi->omi_som->som_checksum, 81 sizeof(checksum))) 82 panic("%s: invalid optional metadata checksum", 83 DEVNAME(sc)); 84 #endif 85 86 omh = (struct sr_meta_opt_hdr *)((void *)omh + 87 omh->som_length); 88 } 89 } 90 91 void 92 srprobe_keydisk_load(struct sr_metadata *sm) 93 { 94 struct sr_meta_opt_hdr *omh; 95 struct sr_meta_keydisk *skm; 96 struct sr_boot_keydisk *kd; 97 int i; 98 99 /* Process optional metadata. */ 100 omh = (struct sr_meta_opt_hdr *)((u_int8_t *)(sm + 1) + 101 sizeof(struct sr_meta_chunk) * sm->ssdi.ssd_chunk_no); 102 for (i = 0; i < sm->ssdi.ssd_opt_no; i++) { 103 104 /* Unsupported old fixed length optional metadata. */ 105 if (omh->som_length == 0) { 106 omh = (struct sr_meta_opt_hdr *)((void *)omh + 107 SR_OLD_META_OPT_SIZE); 108 continue; 109 } 110 111 if (omh->som_type != SR_OPT_KEYDISK) { 112 omh = (struct sr_meta_opt_hdr *)((void *)omh + 113 omh->som_length); 114 continue; 115 } 116 117 kd = alloc(sizeof(struct sr_boot_keydisk)); 118 bcopy(&sm->ssdi.ssd_uuid, &kd->kd_uuid, sizeof(kd->kd_uuid)); 119 skm = (struct sr_meta_keydisk*)omh; 120 bcopy(&skm->skm_maskkey, &kd->kd_key, sizeof(kd->kd_key)); 121 SLIST_INSERT_HEAD(&sr_keydisks, kd, kd_link); 122 } 123 } 124 125 void 126 srprobe(void) 127 { 128 struct sr_boot_volume *bv, *bv1, *bv2; 129 struct sr_boot_chunk *bc, *bc1, *bc2; 130 struct sr_meta_chunk *mc; 131 struct sr_metadata *md; 132 struct diskinfo *dip; 133 struct partition *pp; 134 int i, error, volno; 135 dev_t bsd_dev; 136 daddr_t off; 137 138 /* Probe for softraid volumes. */ 139 SLIST_INIT(&sr_volumes); 140 SLIST_INIT(&sr_keydisks); 141 142 md = alloc(SR_META_SIZE * DEV_BSIZE); 143 144 TAILQ_FOREACH(dip, &disklist, list) { 145 146 /* Only check hard disks, skip those with I/O errors. */ 147 if ((dip->bios_info.bios_number & 0x80) == 0 || 148 (dip->bios_info.flags & BDI_INVALID)) 149 continue; 150 151 /* Make sure disklabel has been read. */ 152 if ((dip->bios_info.flags & (BDI_BADLABEL|BDI_GOODLABEL)) == 0) 153 continue; 154 155 for (i = 0; i < MAXPARTITIONS; i++) { 156 157 pp = &dip->disklabel.d_partitions[i]; 158 if (pp->p_fstype != FS_RAID || pp->p_size == 0) 159 continue; 160 161 /* Read softraid metadata. */ 162 bzero(md, SR_META_SIZE * DEV_BSIZE); 163 off = DL_SECTOBLK(&dip->disklabel, DL_GETPOFFSET(pp)); 164 off += SR_META_OFFSET; 165 error = dip->diskio(F_READ, dip, off, SR_META_SIZE, md); 166 if (error) 167 continue; 168 169 /* Is this valid softraid metadata? */ 170 if (md->ssdi.ssd_magic != SR_MAGIC) 171 continue; 172 173 /* XXX - validate checksum. */ 174 175 /* Handle key disks separately... */ 176 if (md->ssdi.ssd_level == SR_KEYDISK_LEVEL) { 177 srprobe_keydisk_load(md); 178 continue; 179 } 180 181 /* Locate chunk-specific metadata for this chunk. */ 182 mc = (struct sr_meta_chunk *)(md + 1); 183 mc += md->ssdi.ssd_chunk_id; 184 185 bc = alloc(sizeof(struct sr_boot_chunk)); 186 bc->sbc_diskinfo = dip; 187 bc->sbc_disk = dip->bios_info.bios_number; 188 bc->sbc_part = 'a' + i; 189 190 bsd_dev = dip->bios_info.bsd_dev; 191 bc->sbc_mm = MAKEBOOTDEV(B_TYPE(bsd_dev), 192 B_ADAPTOR(bsd_dev), B_CONTROLLER(bsd_dev), 193 B_UNIT(bsd_dev), bc->sbc_part - 'a'); 194 195 bc->sbc_chunk_id = md->ssdi.ssd_chunk_id; 196 bc->sbc_ondisk = md->ssd_ondisk; 197 bc->sbc_state = mc->scm_status; 198 199 SLIST_FOREACH(bv, &sr_volumes, sbv_link) { 200 if (bcmp(&md->ssdi.ssd_uuid, &bv->sbv_uuid, 201 sizeof(md->ssdi.ssd_uuid)) == 0) 202 break; 203 } 204 205 if (bv == NULL) { 206 bv = alloc(sizeof(struct sr_boot_volume)); 207 bzero(bv, sizeof(struct sr_boot_volume)); 208 bv->sbv_level = md->ssdi.ssd_level; 209 bv->sbv_volid = md->ssdi.ssd_volid; 210 bv->sbv_chunk_no = md->ssdi.ssd_chunk_no; 211 bv->sbv_flags = md->ssdi.ssd_vol_flags; 212 bv->sbv_size = md->ssdi.ssd_size; 213 bv->sbv_secsize = md->ssdi.ssd_secsize; 214 bv->sbv_data_blkno = md->ssd_data_blkno; 215 bcopy(&md->ssdi.ssd_uuid, &bv->sbv_uuid, 216 sizeof(md->ssdi.ssd_uuid)); 217 SLIST_INIT(&bv->sbv_chunks); 218 SLIST_INIT(&bv->sbv_meta_opt); 219 220 /* Load optional metadata for this volume. */ 221 srprobe_meta_opt_load(md, &bv->sbv_meta_opt); 222 223 /* Maintain volume order. */ 224 bv2 = NULL; 225 SLIST_FOREACH(bv1, &sr_volumes, sbv_link) { 226 if (bv1->sbv_volid > bv->sbv_volid) 227 break; 228 bv2 = bv1; 229 } 230 if (bv2 == NULL) 231 SLIST_INSERT_HEAD(&sr_volumes, bv, 232 sbv_link); 233 else 234 SLIST_INSERT_AFTER(bv2, bv, sbv_link); 235 } 236 237 /* Maintain chunk order. */ 238 bc2 = NULL; 239 SLIST_FOREACH(bc1, &bv->sbv_chunks, sbc_link) { 240 if (bc1->sbc_chunk_id > bc->sbc_chunk_id) 241 break; 242 bc2 = bc1; 243 } 244 if (bc2 == NULL) 245 SLIST_INSERT_HEAD(&bv->sbv_chunks, 246 bc, sbc_link); 247 else 248 SLIST_INSERT_AFTER(bc2, bc, sbc_link); 249 250 bv->sbv_chunks_found++; 251 } 252 } 253 254 /* 255 * Assemble RAID volumes. 256 */ 257 volno = 0; 258 SLIST_FOREACH(bv, &sr_volumes, sbv_link) { 259 260 /* Skip if this is a hotspare "volume". */ 261 if (bv->sbv_level == SR_HOTSPARE_LEVEL && 262 bv->sbv_chunk_no == 1) 263 continue; 264 265 /* Determine current ondisk version. */ 266 bv->sbv_ondisk = 0; 267 SLIST_FOREACH(bc, &bv->sbv_chunks, sbc_link) { 268 if (bc->sbc_ondisk > bv->sbv_ondisk) 269 bv->sbv_ondisk = bc->sbc_ondisk; 270 } 271 SLIST_FOREACH(bc, &bv->sbv_chunks, sbc_link) { 272 if (bc->sbc_ondisk != bv->sbv_ondisk) 273 bc->sbc_state = BIOC_SDOFFLINE; 274 } 275 276 /* XXX - Check for duplicate chunks. */ 277 278 /* 279 * Validate that volume has sufficient chunks for 280 * read-only access. 281 * 282 * XXX - check chunk states. 283 */ 284 bv->sbv_state = BIOC_SVOFFLINE; 285 switch (bv->sbv_level) { 286 case 0: 287 case 'C': 288 case 'c': 289 if (bv->sbv_chunk_no == bv->sbv_chunks_found) 290 bv->sbv_state = BIOC_SVONLINE; 291 break; 292 293 case 1: 294 if (bv->sbv_chunk_no == bv->sbv_chunks_found) 295 bv->sbv_state = BIOC_SVONLINE; 296 else if (bv->sbv_chunks_found > 0) 297 bv->sbv_state = BIOC_SVDEGRADED; 298 break; 299 } 300 301 bv->sbv_unit = volno++; 302 if (bv->sbv_state != BIOC_SVOFFLINE) 303 printf(" sr%d%s", bv->sbv_unit, 304 bv->sbv_flags & BIOC_SCBOOTABLE ? "*" : ""); 305 } 306 307 explicit_bzero(md, SR_META_SIZE * DEV_BSIZE); 308 free(md, SR_META_SIZE * DEV_BSIZE); 309 } 310 311 int 312 sr_strategy(struct sr_boot_volume *bv, int rw, daddr_t blk, size_t size, 313 void *buf, size_t *rsize) 314 { 315 struct diskinfo *sr_dip, *dip; 316 struct sr_boot_chunk *bc; 317 struct aes_xts_ctx ctx; 318 size_t i, j, nsect; 319 daddr_t blkno; 320 u_char iv[8]; 321 u_char *bp; 322 int err; 323 324 /* We only support read-only softraid. */ 325 if (rw != F_READ) 326 return ENOTSUP; 327 328 /* Partition offset within softraid volume. */ 329 sr_dip = (struct diskinfo *)bv->sbv_diskinfo; 330 blk += DL_SECTOBLK(&sr_dip->disklabel, 331 sr_dip->disklabel.d_partitions[bv->sbv_part - 'a'].p_offset); 332 333 if (bv->sbv_level == 0) { 334 return ENOTSUP; 335 } else if (bv->sbv_level == 1) { 336 337 /* Select first online chunk. */ 338 SLIST_FOREACH(bc, &bv->sbv_chunks, sbc_link) 339 if (bc->sbc_state == BIOC_SDONLINE) 340 break; 341 if (bc == NULL) 342 return EIO; 343 344 dip = (struct diskinfo *)bc->sbc_diskinfo; 345 dip->bsddev = bc->sbc_mm; 346 blk += bv->sbv_data_blkno; 347 348 /* XXX - If I/O failed we should try another chunk... */ 349 return dip->strategy(dip, rw, blk, size, buf, rsize); 350 351 } else if (bv->sbv_level == 'C') { 352 353 /* Select first online chunk. */ 354 SLIST_FOREACH(bc, &bv->sbv_chunks, sbc_link) 355 if (bc->sbc_state == BIOC_SDONLINE) 356 break; 357 if (bc == NULL) 358 return EIO; 359 360 dip = (struct diskinfo *)bc->sbc_diskinfo; 361 dip->bsddev = bc->sbc_mm; 362 363 /* XXX - select correct key. */ 364 aes_xts_setkey(&ctx, (u_char *)bv->sbv_keys, 64); 365 366 nsect = (size + DEV_BSIZE - 1) / DEV_BSIZE; 367 for (i = 0; i < nsect; i++) { 368 blkno = blk + i; 369 bp = ((u_char *)buf) + i * DEV_BSIZE; 370 err = dip->strategy(dip, rw, bv->sbv_data_blkno + blkno, 371 DEV_BSIZE, bp, NULL); 372 if (err != 0) 373 return err; 374 375 bcopy(&blkno, iv, sizeof(blkno)); 376 aes_xts_reinit(&ctx, iv); 377 for (j = 0; j < DEV_BSIZE; j += AES_XTS_BLOCKSIZE) 378 aes_xts_decrypt(&ctx, bp + j); 379 } 380 if (rsize != NULL) 381 *rsize = nsect * DEV_BSIZE; 382 383 return err; 384 385 } else 386 return ENOTSUP; 387 } 388 389 /* 390 * Returns 0 if the MBR with the provided partition array is a GPT protective 391 * MBR, and returns 1 otherwise. A GPT protective MBR would have one and only 392 * one MBR partition, an EFI partition that either covers the whole disk or as 393 * much of it as is possible with a 32bit size field. 394 * 395 * Taken from kern/subr_disk.c. 396 * 397 * NOTE: MS always uses a size of UINT32_MAX for the EFI partition!** 398 */ 399 static int 400 gpt_chk_mbr(struct dos_partition *dp, u_int64_t dsize) 401 { 402 struct dos_partition *dp2; 403 int efi, found, i; 404 u_int32_t psize; 405 406 found = efi = 0; 407 for (dp2=dp, i=0; i < NDOSPART; i++, dp2++) { 408 if (dp2->dp_typ == DOSPTYP_UNUSED) 409 continue; 410 found++; 411 if (dp2->dp_typ != DOSPTYP_EFI) 412 continue; 413 if (letoh32(dp2->dp_start) != GPTSECTOR) 414 continue; 415 psize = letoh32(dp2->dp_size); 416 if (psize <= (dsize - GPTSECTOR) || psize == UINT32_MAX) 417 efi++; 418 } 419 if (found == 1 && efi == 1) 420 return (0); 421 422 return (1); 423 } 424 425 static uint64_t 426 findopenbsd_gpt(struct sr_boot_volume *bv, const char **err) 427 { 428 struct gpt_header gh; 429 int i, part, found; 430 uint64_t lba; 431 uint32_t orig_csum, new_csum; 432 uint32_t ghsize, ghpartsize, ghpartnum, ghpartspersec; 433 uint32_t gpsectors; 434 const char openbsd_uuid_code[] = GPT_UUID_OPENBSD; 435 struct gpt_partition gp; 436 static struct uuid *openbsd_uuid = NULL, openbsd_uuid_space; 437 u_char *buf; 438 439 /* Prepare OpenBSD UUID */ 440 if (openbsd_uuid == NULL) { 441 /* XXX: should be replaced by uuid_dec_be() */ 442 memcpy(&openbsd_uuid_space, openbsd_uuid_code, 443 sizeof(openbsd_uuid_space)); 444 openbsd_uuid_space.time_low = 445 betoh32(openbsd_uuid_space.time_low); 446 openbsd_uuid_space.time_mid = 447 betoh16(openbsd_uuid_space.time_mid); 448 openbsd_uuid_space.time_hi_and_version = 449 betoh16(openbsd_uuid_space.time_hi_and_version); 450 451 openbsd_uuid = &openbsd_uuid_space; 452 } 453 454 if (bv->sbv_secsize > 4096) { 455 *err = "disk sector > 4096 bytes\n"; 456 return (-1); 457 } 458 buf = alloc(bv->sbv_secsize); 459 if (buf == NULL) { 460 *err = "out of memory\n"; 461 return (-1); 462 } 463 bzero(buf, bv->sbv_secsize); 464 465 /* GPT Header */ 466 lba = GPTSECTOR; 467 sr_strategy(bv, F_READ, lba * (bv->sbv_secsize / DEV_BSIZE), DEV_BSIZE, 468 buf, NULL); 469 memcpy(&gh, buf, sizeof(gh)); 470 471 /* Check signature */ 472 if (letoh64(gh.gh_sig) != GPTSIGNATURE) { 473 *err = "bad GPT signature\n"; 474 free(buf, bv->sbv_secsize); 475 return (-1); 476 } 477 478 if (letoh32(gh.gh_rev) != GPTREVISION) { 479 *err = "bad GPT revision\n"; 480 free(buf, bv->sbv_secsize); 481 return (-1); 482 } 483 484 ghsize = letoh32(gh.gh_size); 485 if (ghsize < GPTMINHDRSIZE || ghsize > sizeof(struct gpt_header)) { 486 *err = "bad GPT header size\n"; 487 free(buf, bv->sbv_secsize); 488 return (-1); 489 } 490 491 /* Check checksum */ 492 orig_csum = gh.gh_csum; 493 gh.gh_csum = 0; 494 new_csum = crc32(0, (unsigned char *)&gh, ghsize); 495 gh.gh_csum = orig_csum; 496 if (letoh32(orig_csum) != new_csum) { 497 *err = "bad GPT header checksum\n"; 498 free(buf, bv->sbv_secsize); 499 return (-1); 500 } 501 502 lba = letoh64(gh.gh_part_lba); 503 ghpartsize = letoh32(gh.gh_part_size); 504 ghpartspersec = bv->sbv_secsize / ghpartsize; 505 ghpartnum = letoh32(gh.gh_part_num); 506 gpsectors = (ghpartnum + ghpartspersec - 1) / ghpartspersec; 507 new_csum = crc32(0L, Z_NULL, 0); 508 found = 0; 509 for (i = 0; i < gpsectors; i++, lba++) { 510 sr_strategy(bv, F_READ, lba * (bv->sbv_secsize / DEV_BSIZE), 511 bv->sbv_secsize, buf, NULL); 512 for (part = 0; part < ghpartspersec; part++) { 513 if (ghpartnum == 0) 514 break; 515 new_csum = crc32(new_csum, buf + part * sizeof(gp), 516 sizeof(gp)); 517 ghpartnum--; 518 if (found) 519 continue; 520 memcpy(&gp, buf + part * sizeof(gp), sizeof(gp)); 521 if (memcmp(&gp.gp_type, openbsd_uuid, 522 sizeof(struct uuid)) == 0) 523 found = 1; 524 } 525 } 526 527 free(buf, bv->sbv_secsize); 528 529 if (new_csum != letoh32(gh.gh_part_csum)) { 530 *err = "bad GPT entries checksum\n"; 531 return (-1); 532 } 533 if (found) 534 return (letoh64(gp.gp_lba_start)); 535 536 return (-1); 537 } 538 539 const char * 540 sr_getdisklabel(struct sr_boot_volume *bv, struct disklabel *label) 541 { 542 struct dos_partition *dp; 543 struct dos_mbr mbr; 544 const char *err = NULL; 545 u_int start = 0; 546 char buf[DEV_BSIZE]; 547 int i; 548 549 /* Check for MBR to determine partition offset. */ 550 bzero(&mbr, sizeof(mbr)); 551 sr_strategy(bv, F_READ, DOSBBSECTOR, sizeof(mbr), &mbr, NULL); 552 if (gpt_chk_mbr(mbr.dmbr_parts, bv->sbv_size / 553 (bv->sbv_secsize / DEV_BSIZE)) == 0) { 554 start = findopenbsd_gpt(bv, &err); 555 if (start == (u_int)-1) { 556 if (err != NULL) 557 return (err); 558 return "no OpenBSD partition\n"; 559 } 560 } else if (mbr.dmbr_sign == DOSMBR_SIGNATURE) { 561 562 /* Search for OpenBSD partition */ 563 for (i = 0; i < NDOSPART; i++) { 564 dp = &mbr.dmbr_parts[i]; 565 if (!dp->dp_size) 566 continue; 567 if (dp->dp_typ == DOSPTYP_OPENBSD) { 568 start = dp->dp_start; 569 break; 570 } 571 } 572 } 573 574 /* Read the disklabel. */ 575 sr_strategy(bv, F_READ, 576 start * (bv->sbv_secsize / DEV_BSIZE) + DOS_LABELSECTOR, 577 sizeof(struct disklabel), buf, NULL); 578 579 #ifdef BIOS_DEBUG 580 printf("sr_getdisklabel: magic %lx\n", 581 ((struct disklabel *)buf)->d_magic); 582 for (i = 0; i < MAXPARTITIONS; i++) 583 printf("part %c: type = %d, size = %d, offset = %d\n", 'a' + i, 584 (int)((struct disklabel *)buf)->d_partitions[i].p_fstype, 585 (int)((struct disklabel *)buf)->d_partitions[i].p_size, 586 (int)((struct disklabel *)buf)->d_partitions[i].p_offset); 587 #endif 588 589 /* Fill in disklabel */ 590 return (getdisklabel(buf, label)); 591 } 592