1 /* $NetBSD: disk-rep.c,v 1.1.1.2 2009/12/02 00:26:48 haad Exp $ */ 2 3 /* 4 * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. 5 * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. 6 * 7 * This file is part of LVM2. 8 * 9 * This copyrighted material is made available to anyone wishing to use, 10 * modify, copy, or redistribute it subject to the terms and conditions 11 * of the GNU Lesser General Public License v.2.1. 12 * 13 * You should have received a copy of the GNU Lesser General Public License 14 * along with this program; if not, write to the Free Software Foundation, 15 * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 16 */ 17 18 #include "lib.h" 19 #include "disk-rep.h" 20 #include "xlate.h" 21 #include "filter.h" 22 #include "lvmcache.h" 23 24 #include <fcntl.h> 25 26 #define xx16(v) disk->v = xlate16(disk->v) 27 #define xx32(v) disk->v = xlate32(disk->v) 28 #define xx64(v) disk->v = xlate64(disk->v) 29 30 /* 31 * Functions to perform the endian conversion 32 * between disk and core. The same code works 33 * both ways of course. 34 */ 35 static void _xlate_pvd(struct pv_disk *disk) 36 { 37 xx16(version); 38 39 xx32(pv_on_disk.base); 40 xx32(pv_on_disk.size); 41 xx32(vg_on_disk.base); 42 xx32(vg_on_disk.size); 43 xx32(pv_uuidlist_on_disk.base); 44 xx32(pv_uuidlist_on_disk.size); 45 xx32(lv_on_disk.base); 46 xx32(lv_on_disk.size); 47 xx32(pe_on_disk.base); 48 xx32(pe_on_disk.size); 49 50 xx32(pv_major); 51 xx32(pv_number); 52 xx32(pv_status); 53 xx32(pv_allocatable); 54 xx32(pv_size); 55 xx32(lv_cur); 56 xx32(pe_size); 57 xx32(pe_total); 58 xx32(pe_allocated); 59 xx32(pe_start); 60 } 61 62 static void _xlate_lvd(struct lv_disk *disk) 63 { 64 xx32(lv_access); 65 xx32(lv_status); 66 xx32(lv_open); 67 xx32(lv_dev); 68 xx32(lv_number); 69 xx32(lv_mirror_copies); 70 xx32(lv_recovery); 71 xx32(lv_schedule); 72 xx32(lv_size); 73 xx32(lv_snapshot_minor); 74 xx16(lv_chunk_size); 75 xx16(dummy); 76 xx32(lv_allocated_le); 77 xx32(lv_stripes); 78 xx32(lv_stripesize); 79 xx32(lv_badblock); 80 xx32(lv_allocation); 81 xx32(lv_io_timeout); 82 xx32(lv_read_ahead); 83 } 84 85 static void _xlate_vgd(struct vg_disk *disk) 86 { 87 xx32(vg_number); 88 xx32(vg_access); 89 xx32(vg_status); 90 xx32(lv_max); 91 xx32(lv_cur); 92 xx32(lv_open); 93 xx32(pv_max); 94 xx32(pv_cur); 95 xx32(pv_act); 96 xx32(dummy); 97 xx32(vgda); 98 xx32(pe_size); 99 xx32(pe_total); 100 xx32(pe_allocated); 101 xx32(pvg_total); 102 } 103 104 static void _xlate_extents(struct pe_disk *extents, uint32_t count) 105 { 106 unsigned i; 107 108 for (i = 0; i < count; i++) { 109 extents[i].lv_num = xlate16(extents[i].lv_num); 110 extents[i].le_num = xlate16(extents[i].le_num); 111 } 112 } 113 114 /* 115 * Handle both minor metadata formats. 116 */ 117 static int _munge_formats(struct pv_disk *pvd) 118 { 119 uint32_t pe_start; 120 unsigned b, e; 121 122 switch (pvd->version) { 123 case 1: 124 pvd->pe_start = ((pvd->pe_on_disk.base + 125 pvd->pe_on_disk.size) >> SECTOR_SHIFT); 126 break; 127 128 case 2: 129 pvd->version = 1; 130 pe_start = pvd->pe_start << SECTOR_SHIFT; 131 pvd->pe_on_disk.size = pe_start - pvd->pe_on_disk.base; 132 break; 133 134 default: 135 return 0; 136 } 137 138 /* UUID too long? */ 139 if (pvd->pv_uuid[ID_LEN]) { 140 /* Retain ID_LEN chars from end */ 141 for (e = ID_LEN; e < sizeof(pvd->pv_uuid); e++) { 142 if (!pvd->pv_uuid[e]) { 143 e--; 144 break; 145 } 146 } 147 for (b = 0; b < ID_LEN; b++) { 148 pvd->pv_uuid[b] = pvd->pv_uuid[++e - ID_LEN]; 149 /* FIXME Remove all invalid chars */ 150 if (pvd->pv_uuid[b] == '/') 151 pvd->pv_uuid[b] = '#'; 152 } 153 memset(&pvd->pv_uuid[ID_LEN], 0, sizeof(pvd->pv_uuid) - ID_LEN); 154 } 155 156 /* If UUID is missing, create one */ 157 if (pvd->pv_uuid[0] == '\0') { 158 uuid_from_num((char *)pvd->pv_uuid, pvd->pv_number); 159 pvd->pv_uuid[ID_LEN] = '\0'; 160 } 161 162 return 1; 163 } 164 165 /* 166 * If exported, remove "PV_EXP" from end of VG name 167 */ 168 static void _munge_exported_vg(struct pv_disk *pvd) 169 { 170 int l; 171 size_t s; 172 173 /* Return if PV not in a VG */ 174 if ((!*pvd->vg_name)) 175 return; 176 /* FIXME also check vgd->status & VG_EXPORTED? */ 177 178 l = strlen((char *)pvd->vg_name); 179 s = sizeof(EXPORTED_TAG); 180 if (!strncmp((char *)pvd->vg_name + l - s + 1, EXPORTED_TAG, s)) { 181 pvd->vg_name[l - s + 1] = '\0'; 182 pvd->pv_status |= VG_EXPORTED; 183 } 184 } 185 186 int munge_pvd(struct device *dev, struct pv_disk *pvd) 187 { 188 _xlate_pvd(pvd); 189 190 if (pvd->id[0] != 'H' || pvd->id[1] != 'M') { 191 log_very_verbose("%s does not have a valid LVM1 PV identifier", 192 dev_name(dev)); 193 return 0; 194 } 195 196 if (!_munge_formats(pvd)) { 197 log_very_verbose("format1: Unknown metadata version %d " 198 "found on %s", pvd->version, dev_name(dev)); 199 return 0; 200 } 201 202 /* If VG is exported, set VG name back to the real name */ 203 _munge_exported_vg(pvd); 204 205 return 1; 206 } 207 208 static int _read_pvd(struct device *dev, struct pv_disk *pvd) 209 { 210 if (!dev_read(dev, UINT64_C(0), sizeof(*pvd), pvd)) { 211 log_very_verbose("Failed to read PV data from %s", 212 dev_name(dev)); 213 return 0; 214 } 215 216 return munge_pvd(dev, pvd); 217 } 218 219 static int _read_lvd(struct device *dev, uint64_t pos, struct lv_disk *disk) 220 { 221 if (!dev_read(dev, pos, sizeof(*disk), disk)) 222 return_0; 223 224 _xlate_lvd(disk); 225 226 return 1; 227 } 228 229 int read_vgd(struct device *dev, struct vg_disk *vgd, struct pv_disk *pvd) 230 { 231 uint64_t pos = pvd->vg_on_disk.base; 232 233 if (!dev_read(dev, pos, sizeof(*vgd), vgd)) 234 return_0; 235 236 _xlate_vgd(vgd); 237 238 if ((vgd->lv_max > MAX_LV) || (vgd->pv_max > MAX_PV)) 239 return_0; 240 241 /* If UUID is missing, create one */ 242 if (vgd->vg_uuid[0] == '\0') 243 uuid_from_num((char *)vgd->vg_uuid, vgd->vg_number); 244 245 return 1; 246 } 247 248 static int _read_uuids(struct disk_list *data) 249 { 250 unsigned num_read = 0; 251 struct uuid_list *ul; 252 char buffer[NAME_LEN] __attribute((aligned(8))); 253 uint64_t pos = data->pvd.pv_uuidlist_on_disk.base; 254 uint64_t end = pos + data->pvd.pv_uuidlist_on_disk.size; 255 256 while (pos < end && num_read < data->vgd.pv_cur) { 257 if (!dev_read(data->dev, pos, sizeof(buffer), buffer)) 258 return_0; 259 260 if (!(ul = dm_pool_alloc(data->mem, sizeof(*ul)))) 261 return_0; 262 263 memcpy(ul->uuid, buffer, NAME_LEN); 264 ul->uuid[NAME_LEN - 1] = '\0'; 265 266 dm_list_add(&data->uuids, &ul->list); 267 268 pos += NAME_LEN; 269 num_read++; 270 } 271 272 return 1; 273 } 274 275 static int _check_lvd(struct lv_disk *lvd) 276 { 277 return !(lvd->lv_name[0] == '\0'); 278 } 279 280 static int _read_lvs(struct disk_list *data) 281 { 282 unsigned int i, lvs_read = 0; 283 uint64_t pos; 284 struct lvd_list *ll; 285 struct vg_disk *vgd = &data->vgd; 286 287 for (i = 0; (i < vgd->lv_max) && (lvs_read < vgd->lv_cur); i++) { 288 pos = data->pvd.lv_on_disk.base + (i * sizeof(struct lv_disk)); 289 ll = dm_pool_alloc(data->mem, sizeof(*ll)); 290 291 if (!ll) 292 return_0; 293 294 if (!_read_lvd(data->dev, pos, &ll->lvd)) 295 return_0; 296 297 if (!_check_lvd(&ll->lvd)) 298 continue; 299 300 lvs_read++; 301 dm_list_add(&data->lvds, &ll->list); 302 } 303 304 return 1; 305 } 306 307 static int _read_extents(struct disk_list *data) 308 { 309 size_t len = sizeof(struct pe_disk) * data->pvd.pe_total; 310 struct pe_disk *extents = dm_pool_alloc(data->mem, len); 311 uint64_t pos = data->pvd.pe_on_disk.base; 312 313 if (!extents) 314 return_0; 315 316 if (!dev_read(data->dev, pos, len, extents)) 317 return_0; 318 319 _xlate_extents(extents, data->pvd.pe_total); 320 data->extents = extents; 321 322 return 1; 323 } 324 325 static void __update_lvmcache(const struct format_type *fmt, 326 struct disk_list *dl, 327 struct device *dev, const char *vgid, 328 unsigned exported) 329 { 330 struct lvmcache_info *info; 331 const char *vgname = *((char *)dl->pvd.vg_name) ? 332 (char *)dl->pvd.vg_name : fmt->orphan_vg_name; 333 334 if (!(info = lvmcache_add(fmt->labeller, (char *)dl->pvd.pv_uuid, dev, 335 vgname, vgid, exported ? EXPORTED_VG : 0))) { 336 stack; 337 return; 338 } 339 340 info->device_size = xlate32(dl->pvd.pv_size) << SECTOR_SHIFT; 341 dm_list_init(&info->mdas); 342 info->status &= ~CACHE_INVALID; 343 } 344 345 static struct disk_list *__read_disk(const struct format_type *fmt, 346 struct device *dev, struct dm_pool *mem, 347 const char *vg_name) 348 { 349 struct disk_list *dl = dm_pool_zalloc(mem, sizeof(*dl)); 350 const char *name = dev_name(dev); 351 352 if (!dl) 353 return_NULL; 354 355 dl->dev = dev; 356 dl->mem = mem; 357 dm_list_init(&dl->uuids); 358 dm_list_init(&dl->lvds); 359 360 if (!_read_pvd(dev, &dl->pvd)) 361 goto_bad; 362 363 /* 364 * is it an orphan ? 365 */ 366 if (!*dl->pvd.vg_name) { 367 log_very_verbose("%s is not a member of any format1 VG", name); 368 369 __update_lvmcache(fmt, dl, dev, fmt->orphan_vg_name, 0); 370 return (vg_name) ? NULL : dl; 371 } 372 373 if (!read_vgd(dl->dev, &dl->vgd, &dl->pvd)) { 374 log_error("Failed to read VG data from PV (%s)", name); 375 __update_lvmcache(fmt, dl, dev, fmt->orphan_vg_name, 0); 376 goto bad; 377 } 378 379 if (vg_name && strcmp(vg_name, (char *)dl->pvd.vg_name)) { 380 log_very_verbose("%s is not a member of the VG %s", 381 name, vg_name); 382 __update_lvmcache(fmt, dl, dev, fmt->orphan_vg_name, 0); 383 goto bad; 384 } 385 386 __update_lvmcache(fmt, dl, dev, (char *)dl->vgd.vg_uuid, 387 dl->vgd.vg_status & VG_EXPORTED); 388 389 if (!_read_uuids(dl)) { 390 log_error("Failed to read PV uuid list from %s", name); 391 goto bad; 392 } 393 394 if (!_read_lvs(dl)) { 395 log_error("Failed to read LV's from %s", name); 396 goto bad; 397 } 398 399 if (!_read_extents(dl)) { 400 log_error("Failed to read extents from %s", name); 401 goto bad; 402 } 403 404 log_very_verbose("Found %s in %sVG %s", name, 405 (dl->vgd.vg_status & VG_EXPORTED) ? "exported " : "", 406 dl->pvd.vg_name); 407 408 return dl; 409 410 bad: 411 dm_pool_free(dl->mem, dl); 412 return NULL; 413 } 414 415 struct disk_list *read_disk(const struct format_type *fmt, struct device *dev, 416 struct dm_pool *mem, const char *vg_name) 417 { 418 struct disk_list *dl; 419 420 if (!dev_open(dev)) 421 return_NULL; 422 423 dl = __read_disk(fmt, dev, mem, vg_name); 424 425 if (!dev_close(dev)) 426 stack; 427 428 return dl; 429 } 430 431 static void _add_pv_to_list(struct dm_list *head, struct disk_list *data) 432 { 433 struct pv_disk *pvd; 434 struct disk_list *diskl; 435 436 dm_list_iterate_items(diskl, head) { 437 pvd = &diskl->pvd; 438 if (!strncmp((char *)data->pvd.pv_uuid, (char *)pvd->pv_uuid, 439 sizeof(pvd->pv_uuid))) { 440 if (!dev_subsystem_part_major(data->dev)) { 441 log_very_verbose("Ignoring duplicate PV %s on " 442 "%s", pvd->pv_uuid, 443 dev_name(data->dev)); 444 return; 445 } 446 log_very_verbose("Duplicate PV %s - using %s %s", 447 pvd->pv_uuid, dev_subsystem_name(data->dev), 448 dev_name(data->dev)); 449 dm_list_del(&diskl->list); 450 break; 451 } 452 } 453 dm_list_add(head, &data->list); 454 } 455 456 /* 457 * Build a list of pv_d's structures, allocated from mem. 458 * We keep track of the first object allocated from the pool 459 * so we can free off all the memory if something goes wrong. 460 */ 461 int read_pvs_in_vg(const struct format_type *fmt, const char *vg_name, 462 struct dev_filter *filter, struct dm_pool *mem, 463 struct dm_list *head) 464 { 465 struct dev_iter *iter; 466 struct device *dev; 467 struct disk_list *data = NULL; 468 struct lvmcache_vginfo *vginfo; 469 struct lvmcache_info *info; 470 471 /* Fast path if we already saw this VG and cached the list of PVs */ 472 if (vg_name && (vginfo = vginfo_from_vgname(vg_name, NULL)) && 473 vginfo->infos.n) { 474 dm_list_iterate_items(info, &vginfo->infos) { 475 dev = info->dev; 476 if (dev && !(data = read_disk(fmt, dev, mem, vg_name))) 477 break; 478 _add_pv_to_list(head, data); 479 } 480 481 /* Did we find the whole VG? */ 482 if (!vg_name || is_orphan_vg(vg_name) || 483 (data && *data->pvd.vg_name && 484 dm_list_size(head) == data->vgd.pv_cur)) 485 return 1; 486 487 /* Failed */ 488 dm_list_init(head); 489 /* vgcache_del(vg_name); */ 490 } 491 492 if (!(iter = dev_iter_create(filter, 1))) { 493 log_error("read_pvs_in_vg: dev_iter_create failed"); 494 return 0; 495 } 496 497 /* Otherwise do a complete scan */ 498 for (dev = dev_iter_get(iter); dev; dev = dev_iter_get(iter)) { 499 if ((data = read_disk(fmt, dev, mem, vg_name))) { 500 _add_pv_to_list(head, data); 501 } 502 } 503 dev_iter_destroy(iter); 504 505 if (dm_list_empty(head)) 506 return 0; 507 508 return 1; 509 } 510 511 static int _write_vgd(struct disk_list *data) 512 { 513 struct vg_disk *vgd = &data->vgd; 514 uint64_t pos = data->pvd.vg_on_disk.base; 515 516 log_debug("Writing %s VG metadata to %s at %" PRIu64 " len %" PRIsize_t, 517 data->pvd.vg_name, dev_name(data->dev), pos, sizeof(*vgd)); 518 519 _xlate_vgd(vgd); 520 if (!dev_write(data->dev, pos, sizeof(*vgd), vgd)) 521 return_0; 522 523 _xlate_vgd(vgd); 524 525 return 1; 526 } 527 528 static int _write_uuids(struct disk_list *data) 529 { 530 struct uuid_list *ul; 531 uint64_t pos = data->pvd.pv_uuidlist_on_disk.base; 532 uint64_t end = pos + data->pvd.pv_uuidlist_on_disk.size; 533 534 dm_list_iterate_items(ul, &data->uuids) { 535 if (pos >= end) { 536 log_error("Too many uuids to fit on %s", 537 dev_name(data->dev)); 538 return 0; 539 } 540 541 log_debug("Writing %s uuidlist to %s at %" PRIu64 " len %d", 542 data->pvd.vg_name, dev_name(data->dev), 543 pos, NAME_LEN); 544 545 if (!dev_write(data->dev, pos, NAME_LEN, ul->uuid)) 546 return_0; 547 548 pos += NAME_LEN; 549 } 550 551 return 1; 552 } 553 554 static int _write_lvd(struct device *dev, uint64_t pos, struct lv_disk *disk) 555 { 556 log_debug("Writing %s LV %s metadata to %s at %" PRIu64 " len %" 557 PRIsize_t, disk->vg_name, disk->lv_name, dev_name(dev), 558 pos, sizeof(*disk)); 559 560 _xlate_lvd(disk); 561 if (!dev_write(dev, pos, sizeof(*disk), disk)) 562 return_0; 563 564 _xlate_lvd(disk); 565 566 return 1; 567 } 568 569 static int _write_lvs(struct disk_list *data) 570 { 571 struct lvd_list *ll; 572 uint64_t pos, offset; 573 574 pos = data->pvd.lv_on_disk.base; 575 576 if (!dev_set(data->dev, pos, data->pvd.lv_on_disk.size, 0)) { 577 log_error("Couldn't zero lv area on device '%s'", 578 dev_name(data->dev)); 579 return 0; 580 } 581 582 dm_list_iterate_items(ll, &data->lvds) { 583 offset = sizeof(struct lv_disk) * ll->lvd.lv_number; 584 if (offset + sizeof(struct lv_disk) > data->pvd.lv_on_disk.size) { 585 log_error("lv_number %d too large", ll->lvd.lv_number); 586 return 0; 587 } 588 589 if (!_write_lvd(data->dev, pos + offset, &ll->lvd)) 590 return_0; 591 } 592 593 return 1; 594 } 595 596 static int _write_extents(struct disk_list *data) 597 { 598 size_t len = sizeof(struct pe_disk) * data->pvd.pe_total; 599 struct pe_disk *extents = data->extents; 600 uint64_t pos = data->pvd.pe_on_disk.base; 601 602 log_debug("Writing %s extents metadata to %s at %" PRIu64 " len %" 603 PRIsize_t, data->pvd.vg_name, dev_name(data->dev), 604 pos, len); 605 606 _xlate_extents(extents, data->pvd.pe_total); 607 if (!dev_write(data->dev, pos, len, extents)) 608 return_0; 609 610 _xlate_extents(extents, data->pvd.pe_total); 611 612 return 1; 613 } 614 615 static int _write_pvd(struct disk_list *data) 616 { 617 char *buf; 618 uint64_t pos = data->pvd.pv_on_disk.base; 619 size_t size = data->pvd.pv_on_disk.size; 620 621 if (size < sizeof(struct pv_disk)) { 622 log_error("Invalid PV structure size."); 623 return 0; 624 } 625 626 /* Make sure that the gap between the PV structure and 627 the next one is zeroed in order to make non LVM tools 628 happy (idea from AED) */ 629 buf = dm_malloc(size); 630 if (!buf) { 631 log_error("Couldn't allocate temporary PV buffer."); 632 return 0; 633 } 634 635 memset(buf, 0, size); 636 memcpy(buf, &data->pvd, sizeof(struct pv_disk)); 637 638 log_debug("Writing %s PV metadata to %s at %" PRIu64 " len %" 639 PRIsize_t, data->pvd.vg_name, dev_name(data->dev), 640 pos, size); 641 642 _xlate_pvd((struct pv_disk *) buf); 643 if (!dev_write(data->dev, pos, size, buf)) { 644 dm_free(buf); 645 return_0; 646 } 647 648 dm_free(buf); 649 return 1; 650 } 651 652 /* 653 * assumes the device has been opened. 654 */ 655 static int __write_all_pvd(const struct format_type *fmt __attribute((unused)), 656 struct disk_list *data) 657 { 658 const char *pv_name = dev_name(data->dev); 659 660 if (!_write_pvd(data)) { 661 log_error("Failed to write PV structure onto %s", pv_name); 662 return 0; 663 } 664 665 /* vgcache_add(data->pvd.vg_name, data->vgd.vg_uuid, data->dev, fmt); */ 666 /* 667 * Stop here for orphan pv's. 668 */ 669 if (data->pvd.vg_name[0] == '\0') { 670 /* if (!test_mode()) 671 vgcache_add(data->pvd.vg_name, NULL, data->dev, fmt); */ 672 return 1; 673 } 674 675 /* if (!test_mode()) 676 vgcache_add(data->pvd.vg_name, data->vgd.vg_uuid, data->dev, 677 fmt); */ 678 679 if (!_write_vgd(data)) { 680 log_error("Failed to write VG data to %s", pv_name); 681 return 0; 682 } 683 684 if (!_write_uuids(data)) { 685 log_error("Failed to write PV uuid list to %s", pv_name); 686 return 0; 687 } 688 689 if (!_write_lvs(data)) { 690 log_error("Failed to write LV's to %s", pv_name); 691 return 0; 692 } 693 694 if (!_write_extents(data)) { 695 log_error("Failed to write extents to %s", pv_name); 696 return 0; 697 } 698 699 return 1; 700 } 701 702 /* 703 * opens the device and hands to the above fn. 704 */ 705 static int _write_all_pvd(const struct format_type *fmt, struct disk_list *data) 706 { 707 int r; 708 709 if (!dev_open(data->dev)) 710 return_0; 711 712 r = __write_all_pvd(fmt, data); 713 714 if (!dev_close(data->dev)) 715 stack; 716 717 return r; 718 } 719 720 /* 721 * Writes all the given pv's to disk. Does very 722 * little sanity checking, so make sure correct 723 * data is passed to here. 724 */ 725 int write_disks(const struct format_type *fmt, struct dm_list *pvs) 726 { 727 struct disk_list *dl; 728 729 dm_list_iterate_items(dl, pvs) { 730 if (!(_write_all_pvd(fmt, dl))) 731 return_0; 732 733 log_very_verbose("Successfully wrote data to %s", 734 dev_name(dl->dev)); 735 } 736 737 return 1; 738 } 739