1e8d4e5ffSJeff Cody /* 2e8d4e5ffSJeff Cody * Block driver for Hyper-V VHDX Images 3e8d4e5ffSJeff Cody * 4e8d4e5ffSJeff Cody * Copyright (c) 2013 Red Hat, Inc., 5e8d4e5ffSJeff Cody * 6e8d4e5ffSJeff Cody * Authors: 7e8d4e5ffSJeff Cody * Jeff Cody <jcody@redhat.com> 8e8d4e5ffSJeff Cody * 96e9d290bSJeff Cody * This is based on the "VHDX Format Specification v1.00", published 8/25/2012 10e8d4e5ffSJeff Cody * by Microsoft: 116e9d290bSJeff Cody * https://www.microsoft.com/en-us/download/details.aspx?id=34750 12e8d4e5ffSJeff Cody * 13e8d4e5ffSJeff Cody * This work is licensed under the terms of the GNU LGPL, version 2 or later. 14e8d4e5ffSJeff Cody * See the COPYING.LIB file in the top-level directory. 15e8d4e5ffSJeff Cody * 16e8d4e5ffSJeff Cody */ 17e8d4e5ffSJeff Cody 1880c71a24SPeter Maydell #include "qemu/osdep.h" 19da34e65cSMarkus Armbruster #include "qapi/error.h" 20e8d4e5ffSJeff Cody #include "block/block_int.h" 21609f45eaSMax Reitz #include "block/qdict.h" 2210bf03afSKevin Wolf #include "sysemu/block-backend.h" 23e8d4e5ffSJeff Cody #include "qemu/module.h" 24922a01a0SMarkus Armbruster #include "qemu/option.h" 25e8d4e5ffSJeff Cody #include "qemu/crc32c.h" 2658369e22SPaolo Bonzini #include "qemu/bswap.h" 276caaad46SPeter Lieven #include "qemu/error-report.h" 285df022cfSPeter Maydell #include "qemu/memalign.h" 290d8c41daSMichael S. Tsirkin #include "vhdx.h" 30795c40b8SJuan Quintela #include "migration/blocker.h" 31cb6414dfSFam Zheng #include "qemu/uuid.h" 3209b68dabSKevin Wolf #include "qapi/qmp/qdict.h" 3309b68dabSKevin Wolf #include "qapi/qobject-input-visitor.h" 3409b68dabSKevin Wolf #include "qapi/qapi-visit-block-core.h" 353412f7b1SJeff Cody 363412f7b1SJeff Cody /* Options for VHDX creation */ 373412f7b1SJeff Cody 383412f7b1SJeff Cody #define VHDX_BLOCK_OPT_LOG_SIZE "log_size" 393412f7b1SJeff Cody #define VHDX_BLOCK_OPT_BLOCK_SIZE "block_size" 403412f7b1SJeff Cody #define VHDX_BLOCK_OPT_ZERO "block_state_zero" 413412f7b1SJeff Cody 423412f7b1SJeff Cody typedef enum VHDXImageType { 433412f7b1SJeff Cody VHDX_TYPE_DYNAMIC = 0, 443412f7b1SJeff Cody VHDX_TYPE_FIXED, 453412f7b1SJeff Cody VHDX_TYPE_DIFFERENCING, /* Currently unsupported */ 463412f7b1SJeff Cody } VHDXImageType; 47e8d4e5ffSJeff Cody 4809b68dabSKevin Wolf static QemuOptsList vhdx_create_opts; 4909b68dabSKevin Wolf 50e8d4e5ffSJeff Cody /* Several metadata and region table data entries are identified by 51e8d4e5ffSJeff Cody * guids in a MS-specific GUID format. */ 52e8d4e5ffSJeff Cody 53e8d4e5ffSJeff Cody 54e8d4e5ffSJeff Cody /* ------- Known Region Table GUIDs ---------------------- */ 55e8d4e5ffSJeff Cody static const MSGUID bat_guid = { .data1 = 0x2dc27766, 56e8d4e5ffSJeff Cody .data2 = 0xf623, 57e8d4e5ffSJeff Cody .data3 = 0x4200, 58e8d4e5ffSJeff Cody .data4 = { 0x9d, 0x64, 0x11, 0x5e, 59e8d4e5ffSJeff Cody 0x9b, 0xfd, 0x4a, 0x08} }; 60e8d4e5ffSJeff Cody 61e8d4e5ffSJeff Cody static const MSGUID metadata_guid = { .data1 = 0x8b7ca206, 62e8d4e5ffSJeff Cody .data2 = 0x4790, 63e8d4e5ffSJeff Cody .data3 = 0x4b9a, 64e8d4e5ffSJeff Cody .data4 = { 0xb8, 0xfe, 0x57, 0x5f, 65e8d4e5ffSJeff Cody 0x05, 0x0f, 0x88, 0x6e} }; 66e8d4e5ffSJeff Cody 67e8d4e5ffSJeff Cody 68e8d4e5ffSJeff Cody 69e8d4e5ffSJeff Cody /* ------- Known Metadata Entry GUIDs ---------------------- */ 70e8d4e5ffSJeff Cody static const MSGUID file_param_guid = { .data1 = 0xcaa16737, 71e8d4e5ffSJeff Cody .data2 = 0xfa36, 72e8d4e5ffSJeff Cody .data3 = 0x4d43, 73e8d4e5ffSJeff Cody .data4 = { 0xb3, 0xb6, 0x33, 0xf0, 74e8d4e5ffSJeff Cody 0xaa, 0x44, 0xe7, 0x6b} }; 75e8d4e5ffSJeff Cody 76e8d4e5ffSJeff Cody static const MSGUID virtual_size_guid = { .data1 = 0x2FA54224, 77e8d4e5ffSJeff Cody .data2 = 0xcd1b, 78e8d4e5ffSJeff Cody .data3 = 0x4876, 79e8d4e5ffSJeff Cody .data4 = { 0xb2, 0x11, 0x5d, 0xbe, 80e8d4e5ffSJeff Cody 0xd8, 0x3b, 0xf4, 0xb8} }; 81e8d4e5ffSJeff Cody 82e8d4e5ffSJeff Cody static const MSGUID page83_guid = { .data1 = 0xbeca12ab, 83e8d4e5ffSJeff Cody .data2 = 0xb2e6, 84e8d4e5ffSJeff Cody .data3 = 0x4523, 85e8d4e5ffSJeff Cody .data4 = { 0x93, 0xef, 0xc3, 0x09, 86e8d4e5ffSJeff Cody 0xe0, 0x00, 0xc7, 0x46} }; 87e8d4e5ffSJeff Cody 88e8d4e5ffSJeff Cody 89e8d4e5ffSJeff Cody static const MSGUID phys_sector_guid = { .data1 = 0xcda348c7, 90e8d4e5ffSJeff Cody .data2 = 0x445d, 91e8d4e5ffSJeff Cody .data3 = 0x4471, 92e8d4e5ffSJeff Cody .data4 = { 0x9c, 0xc9, 0xe9, 0x88, 93e8d4e5ffSJeff Cody 0x52, 0x51, 0xc5, 0x56} }; 94e8d4e5ffSJeff Cody 95e8d4e5ffSJeff Cody static const MSGUID parent_locator_guid = { .data1 = 0xa8d35f2d, 96e8d4e5ffSJeff Cody .data2 = 0xb30b, 97e8d4e5ffSJeff Cody .data3 = 0x454d, 98e8d4e5ffSJeff Cody .data4 = { 0xab, 0xf7, 0xd3, 99e8d4e5ffSJeff Cody 0xd8, 0x48, 0x34, 100e8d4e5ffSJeff Cody 0xab, 0x0c} }; 101e8d4e5ffSJeff Cody 102e8d4e5ffSJeff Cody static const MSGUID logical_sector_guid = { .data1 = 0x8141bf1d, 103e8d4e5ffSJeff Cody .data2 = 0xa96f, 104e8d4e5ffSJeff Cody .data3 = 0x4709, 105e8d4e5ffSJeff Cody .data4 = { 0xba, 0x47, 0xf2, 106e8d4e5ffSJeff Cody 0x33, 0xa8, 0xfa, 107e8d4e5ffSJeff Cody 0xab, 0x5f} }; 108e8d4e5ffSJeff Cody 109e8d4e5ffSJeff Cody /* Each parent type must have a valid GUID; this is for parent images 110e8d4e5ffSJeff Cody * of type 'VHDX'. If we were to allow e.g. a QCOW2 parent, we would 111e8d4e5ffSJeff Cody * need to make up our own QCOW2 GUID type */ 112c2ebb05eSPeter Maydell static const MSGUID parent_vhdx_guid __attribute__((unused)) 113c2ebb05eSPeter Maydell = { .data1 = 0xb04aefb7, 114e8d4e5ffSJeff Cody .data2 = 0xd19e, 115e8d4e5ffSJeff Cody .data3 = 0x4a81, 116e8d4e5ffSJeff Cody .data4 = { 0xb7, 0x89, 0x25, 0xb8, 117e8d4e5ffSJeff Cody 0xe9, 0x44, 0x59, 0x13} }; 118e8d4e5ffSJeff Cody 119e8d4e5ffSJeff Cody 120e8d4e5ffSJeff Cody #define META_FILE_PARAMETER_PRESENT 0x01 121e8d4e5ffSJeff Cody #define META_VIRTUAL_DISK_SIZE_PRESENT 0x02 122e8d4e5ffSJeff Cody #define META_PAGE_83_PRESENT 0x04 123e8d4e5ffSJeff Cody #define META_LOGICAL_SECTOR_SIZE_PRESENT 0x08 124e8d4e5ffSJeff Cody #define META_PHYS_SECTOR_SIZE_PRESENT 0x10 125e8d4e5ffSJeff Cody #define META_PARENT_LOCATOR_PRESENT 0x20 126e8d4e5ffSJeff Cody 127e8d4e5ffSJeff Cody #define META_ALL_PRESENT \ 128e8d4e5ffSJeff Cody (META_FILE_PARAMETER_PRESENT | META_VIRTUAL_DISK_SIZE_PRESENT | \ 129e8d4e5ffSJeff Cody META_PAGE_83_PRESENT | META_LOGICAL_SECTOR_SIZE_PRESENT | \ 130e8d4e5ffSJeff Cody META_PHYS_SECTOR_SIZE_PRESENT) 131e8d4e5ffSJeff Cody 132e8d4e5ffSJeff Cody 133059e2fbbSJeff Cody typedef struct VHDXSectorInfo { 134059e2fbbSJeff Cody uint32_t bat_idx; /* BAT entry index */ 135059e2fbbSJeff Cody uint32_t sectors_avail; /* sectors available in payload block */ 136059e2fbbSJeff Cody uint32_t bytes_left; /* bytes left in the block after data to r/w */ 137059e2fbbSJeff Cody uint32_t bytes_avail; /* bytes available in payload block */ 138059e2fbbSJeff Cody uint64_t file_offset; /* absolute offset in bytes, in file */ 139059e2fbbSJeff Cody uint64_t block_offset; /* block offset, in bytes */ 140059e2fbbSJeff Cody } VHDXSectorInfo; 141059e2fbbSJeff Cody 1424f18b782SJeff Cody /* Calculates new checksum. 1434f18b782SJeff Cody * 1444f18b782SJeff Cody * Zero is substituted during crc calculation for the original crc field 1454f18b782SJeff Cody * crc_offset: byte offset in buf of the buffer crc 1464f18b782SJeff Cody * buf: buffer pointer 1474f18b782SJeff Cody * size: size of buffer (must be > crc_offset+4) 1484f18b782SJeff Cody * 1494f75b52aSJeff Cody * Note: The buffer should have all multi-byte data in little-endian format, 1504f75b52aSJeff Cody * and the resulting checksum is in little endian format. 1514f18b782SJeff Cody */ 1524f18b782SJeff Cody uint32_t vhdx_update_checksum(uint8_t *buf, size_t size, int crc_offset) 1534f18b782SJeff Cody { 1544f18b782SJeff Cody uint32_t crc; 1554f18b782SJeff Cody 1564f18b782SJeff Cody assert(buf != NULL); 1574f18b782SJeff Cody assert(size > (crc_offset + sizeof(crc))); 1584f18b782SJeff Cody 1594f18b782SJeff Cody memset(buf + crc_offset, 0, sizeof(crc)); 1604f18b782SJeff Cody crc = crc32c(0xffffffff, buf, size); 1611229e46dSPeter Maydell crc = cpu_to_le32(crc); 1624f18b782SJeff Cody memcpy(buf + crc_offset, &crc, sizeof(crc)); 1634f18b782SJeff Cody 1644f18b782SJeff Cody return crc; 1654f18b782SJeff Cody } 1664f18b782SJeff Cody 167e8d4e5ffSJeff Cody uint32_t vhdx_checksum_calc(uint32_t crc, uint8_t *buf, size_t size, 168e8d4e5ffSJeff Cody int crc_offset) 169e8d4e5ffSJeff Cody { 170e8d4e5ffSJeff Cody uint32_t crc_new; 171e8d4e5ffSJeff Cody uint32_t crc_orig; 172e8d4e5ffSJeff Cody assert(buf != NULL); 173e8d4e5ffSJeff Cody 174e8d4e5ffSJeff Cody if (crc_offset > 0) { 175e8d4e5ffSJeff Cody memcpy(&crc_orig, buf + crc_offset, sizeof(crc_orig)); 176e8d4e5ffSJeff Cody memset(buf + crc_offset, 0, sizeof(crc_orig)); 177e8d4e5ffSJeff Cody } 178e8d4e5ffSJeff Cody 179e8d4e5ffSJeff Cody crc_new = crc32c(crc, buf, size); 180e8d4e5ffSJeff Cody if (crc_offset > 0) { 181e8d4e5ffSJeff Cody memcpy(buf + crc_offset, &crc_orig, sizeof(crc_orig)); 182e8d4e5ffSJeff Cody } 183e8d4e5ffSJeff Cody 184e8d4e5ffSJeff Cody return crc_new; 185e8d4e5ffSJeff Cody } 186e8d4e5ffSJeff Cody 187e8d4e5ffSJeff Cody /* Validates the checksum of the buffer, with an in-place CRC. 188e8d4e5ffSJeff Cody * 189e8d4e5ffSJeff Cody * Zero is substituted during crc calculation for the original crc field, 19050d6a8a3SStefan Weil * and the crc field is restored afterwards. But the buffer will be modified 191e8d4e5ffSJeff Cody * during the calculation, so this may not be not suitable for multi-threaded 192e8d4e5ffSJeff Cody * use. 193e8d4e5ffSJeff Cody * 194e8d4e5ffSJeff Cody * crc_offset: byte offset in buf of the buffer crc 195e8d4e5ffSJeff Cody * buf: buffer pointer 196e8d4e5ffSJeff Cody * size: size of buffer (must be > crc_offset+4) 197e8d4e5ffSJeff Cody * 198e8d4e5ffSJeff Cody * returns true if checksum is valid, false otherwise 199e8d4e5ffSJeff Cody */ 200e8d4e5ffSJeff Cody bool vhdx_checksum_is_valid(uint8_t *buf, size_t size, int crc_offset) 201e8d4e5ffSJeff Cody { 202e8d4e5ffSJeff Cody uint32_t crc_orig; 203e8d4e5ffSJeff Cody uint32_t crc; 204e8d4e5ffSJeff Cody 205e8d4e5ffSJeff Cody assert(buf != NULL); 206e8d4e5ffSJeff Cody assert(size > (crc_offset + 4)); 207e8d4e5ffSJeff Cody 208e8d4e5ffSJeff Cody memcpy(&crc_orig, buf + crc_offset, sizeof(crc_orig)); 209e8d4e5ffSJeff Cody crc_orig = le32_to_cpu(crc_orig); 210e8d4e5ffSJeff Cody 211e8d4e5ffSJeff Cody crc = vhdx_checksum_calc(0xffffffff, buf, size, crc_offset); 212e8d4e5ffSJeff Cody 213e8d4e5ffSJeff Cody return crc == crc_orig; 214e8d4e5ffSJeff Cody } 215e8d4e5ffSJeff Cody 216e8d4e5ffSJeff Cody 217e8d4e5ffSJeff Cody /* 2184f18b782SJeff Cody * This generates a UUID that is compliant with the MS GUIDs used 2194f18b782SJeff Cody * in the VHDX spec (and elsewhere). 2204f18b782SJeff Cody */ 2214f18b782SJeff Cody void vhdx_guid_generate(MSGUID *guid) 2224f18b782SJeff Cody { 223cb6414dfSFam Zheng QemuUUID uuid; 2244f18b782SJeff Cody assert(guid != NULL); 2254f18b782SJeff Cody 226cb6414dfSFam Zheng qemu_uuid_generate(&uuid); 227cb6414dfSFam Zheng memcpy(guid, &uuid, sizeof(MSGUID)); 2284f18b782SJeff Cody } 2294f18b782SJeff Cody 2301a848fd4SJeff Cody /* Check for region overlaps inside the VHDX image */ 2311a848fd4SJeff Cody static int vhdx_region_check(BDRVVHDXState *s, uint64_t start, uint64_t length) 2321a848fd4SJeff Cody { 2331a848fd4SJeff Cody int ret = 0; 2341a848fd4SJeff Cody uint64_t end; 2351a848fd4SJeff Cody VHDXRegionEntry *r; 2361a848fd4SJeff Cody 2371a848fd4SJeff Cody end = start + length; 2381a848fd4SJeff Cody QLIST_FOREACH(r, &s->regions, entries) { 2391a848fd4SJeff Cody if (!((start >= r->end) || (end <= r->start))) { 2406caaad46SPeter Lieven error_report("VHDX region %" PRIu64 "-%" PRIu64 " overlaps with " 2416caaad46SPeter Lieven "region %" PRIu64 "-%." PRIu64, start, end, r->start, 2426caaad46SPeter Lieven r->end); 2431a848fd4SJeff Cody ret = -EINVAL; 2441a848fd4SJeff Cody goto exit; 2451a848fd4SJeff Cody } 2461a848fd4SJeff Cody } 2471a848fd4SJeff Cody 2481a848fd4SJeff Cody exit: 2491a848fd4SJeff Cody return ret; 2501a848fd4SJeff Cody } 2511a848fd4SJeff Cody 2521a848fd4SJeff Cody /* Register a region for future checks */ 2531a848fd4SJeff Cody static void vhdx_region_register(BDRVVHDXState *s, 2541a848fd4SJeff Cody uint64_t start, uint64_t length) 2551a848fd4SJeff Cody { 2561a848fd4SJeff Cody VHDXRegionEntry *r; 2571a848fd4SJeff Cody 2581a848fd4SJeff Cody r = g_malloc0(sizeof(*r)); 2591a848fd4SJeff Cody 2601a848fd4SJeff Cody r->start = start; 2611a848fd4SJeff Cody r->end = start + length; 2621a848fd4SJeff Cody 2631a848fd4SJeff Cody QLIST_INSERT_HEAD(&s->regions, r, entries); 2641a848fd4SJeff Cody } 2651a848fd4SJeff Cody 2661a848fd4SJeff Cody /* Free all registered regions */ 2671a848fd4SJeff Cody static void vhdx_region_unregister_all(BDRVVHDXState *s) 2681a848fd4SJeff Cody { 2691a848fd4SJeff Cody VHDXRegionEntry *r, *r_next; 2701a848fd4SJeff Cody 2711a848fd4SJeff Cody QLIST_FOREACH_SAFE(r, &s->regions, entries, r_next) { 2721a848fd4SJeff Cody QLIST_REMOVE(r, entries); 2731a848fd4SJeff Cody g_free(r); 2741a848fd4SJeff Cody } 2751a848fd4SJeff Cody } 2761a848fd4SJeff Cody 2771e74a971SJeff Cody static void vhdx_set_shift_bits(BDRVVHDXState *s) 2781e74a971SJeff Cody { 27904a36158SMax Reitz s->logical_sector_size_bits = ctz32(s->logical_sector_size); 28004a36158SMax Reitz s->sectors_per_block_bits = ctz32(s->sectors_per_block); 28104a36158SMax Reitz s->chunk_ratio_bits = ctz64(s->chunk_ratio); 28204a36158SMax Reitz s->block_size_bits = ctz32(s->block_size); 2831e74a971SJeff Cody } 2841e74a971SJeff Cody 2854f18b782SJeff Cody /* 286e8d4e5ffSJeff Cody * Per the MS VHDX Specification, for every VHDX file: 287e8d4e5ffSJeff Cody * - The header section is fixed size - 1 MB 288e8d4e5ffSJeff Cody * - The header section is always the first "object" 289e8d4e5ffSJeff Cody * - The first 64KB of the header is the File Identifier 290e8d4e5ffSJeff Cody * - The first uint64 (8 bytes) is the VHDX Signature ("vhdxfile") 291e8d4e5ffSJeff Cody * - The following 512 bytes constitute a UTF-16 string identifiying the 292e8d4e5ffSJeff Cody * software that created the file, and is optional and diagnostic only. 293e8d4e5ffSJeff Cody * 294e8d4e5ffSJeff Cody * Therefore, we probe by looking for the vhdxfile signature "vhdxfile" 295e8d4e5ffSJeff Cody */ 296e8d4e5ffSJeff Cody static int vhdx_probe(const uint8_t *buf, int buf_size, const char *filename) 297e8d4e5ffSJeff Cody { 298e8d4e5ffSJeff Cody if (buf_size >= 8 && !memcmp(buf, "vhdxfile", 8)) { 299e8d4e5ffSJeff Cody return 100; 300e8d4e5ffSJeff Cody } 301e8d4e5ffSJeff Cody return 0; 302e8d4e5ffSJeff Cody } 303e8d4e5ffSJeff Cody 3041e74a971SJeff Cody /* 3051e74a971SJeff Cody * Writes the header to the specified offset. 3061e74a971SJeff Cody * 3071e74a971SJeff Cody * This will optionally read in buffer data from disk (otherwise zero-fill), 3081e74a971SJeff Cody * and then update the header checksum. Header is converted to proper 3091e74a971SJeff Cody * endianness before being written to the specified file offset 3101e74a971SJeff Cody */ 311cf2ab8fcSKevin Wolf static int vhdx_write_header(BdrvChild *file, VHDXHeader *hdr, 3121e74a971SJeff Cody uint64_t offset, bool read) 3131e74a971SJeff Cody { 314cf2ab8fcSKevin Wolf BlockDriverState *bs_file = file->bs; 3151e74a971SJeff Cody uint8_t *buffer = NULL; 3161e74a971SJeff Cody int ret; 3174f75b52aSJeff Cody VHDXHeader *header_le; 3181e74a971SJeff Cody 3191e74a971SJeff Cody assert(bs_file != NULL); 3201e74a971SJeff Cody assert(hdr != NULL); 3211e74a971SJeff Cody 3221e74a971SJeff Cody /* the header checksum is not over just the packed size of VHDXHeader, 3231e74a971SJeff Cody * but rather over the entire 'reserved' range for the header, which is 3241e74a971SJeff Cody * 4KB (VHDX_HEADER_SIZE). */ 3251e74a971SJeff Cody 3261e74a971SJeff Cody buffer = qemu_blockalign(bs_file, VHDX_HEADER_SIZE); 3271e74a971SJeff Cody if (read) { 3281e74a971SJeff Cody /* if true, we can't assume the extra reserved bytes are 0 */ 32932cc71deSAlberto Faria ret = bdrv_pread(file, offset, VHDX_HEADER_SIZE, buffer, 0); 3301e74a971SJeff Cody if (ret < 0) { 3311e74a971SJeff Cody goto exit; 3321e74a971SJeff Cody } 3331e74a971SJeff Cody } else { 3341e74a971SJeff Cody memset(buffer, 0, VHDX_HEADER_SIZE); 3351e74a971SJeff Cody } 3361e74a971SJeff Cody 3371e74a971SJeff Cody /* overwrite the actual VHDXHeader portion */ 3384f75b52aSJeff Cody header_le = (VHDXHeader *)buffer; 3394f75b52aSJeff Cody memcpy(header_le, hdr, sizeof(VHDXHeader)); 3404f75b52aSJeff Cody vhdx_header_le_export(hdr, header_le); 3414f75b52aSJeff Cody vhdx_update_checksum(buffer, VHDX_HEADER_SIZE, 3421e74a971SJeff Cody offsetof(VHDXHeader, checksum)); 34332cc71deSAlberto Faria ret = bdrv_pwrite_sync(file, offset, sizeof(VHDXHeader), header_le, 0); 3441e74a971SJeff Cody 3451e74a971SJeff Cody exit: 3461e74a971SJeff Cody qemu_vfree(buffer); 3471e74a971SJeff Cody return ret; 3481e74a971SJeff Cody } 3491e74a971SJeff Cody 3504f18b782SJeff Cody /* Update the VHDX headers 3514f18b782SJeff Cody * 3524f18b782SJeff Cody * This follows the VHDX spec procedures for header updates. 3534f18b782SJeff Cody * 3544f18b782SJeff Cody * - non-current header is updated with largest sequence number 3554f18b782SJeff Cody */ 3564f18b782SJeff Cody static int vhdx_update_header(BlockDriverState *bs, BDRVVHDXState *s, 357c3906c5eSJeff Cody bool generate_data_write_guid, MSGUID *log_guid) 3584f18b782SJeff Cody { 3594f18b782SJeff Cody int ret = 0; 3604f18b782SJeff Cody int hdr_idx = 0; 3614f18b782SJeff Cody uint64_t header_offset = VHDX_HEADER1_OFFSET; 3624f18b782SJeff Cody 3634f18b782SJeff Cody VHDXHeader *active_header; 3644f18b782SJeff Cody VHDXHeader *inactive_header; 3654f18b782SJeff Cody 3664f18b782SJeff Cody /* operate on the non-current header */ 3674f18b782SJeff Cody if (s->curr_header == 0) { 3684f18b782SJeff Cody hdr_idx = 1; 3694f18b782SJeff Cody header_offset = VHDX_HEADER2_OFFSET; 3704f18b782SJeff Cody } 3714f18b782SJeff Cody 3724f18b782SJeff Cody active_header = s->headers[s->curr_header]; 3734f18b782SJeff Cody inactive_header = s->headers[hdr_idx]; 3744f18b782SJeff Cody 3754f18b782SJeff Cody inactive_header->sequence_number = active_header->sequence_number + 1; 3764f18b782SJeff Cody 3774f18b782SJeff Cody /* a new file guid must be generated before any file write, including 3784f18b782SJeff Cody * headers */ 3794f18b782SJeff Cody inactive_header->file_write_guid = s->session_guid; 3804f18b782SJeff Cody 3814f18b782SJeff Cody /* a new data guid only needs to be generated before any guest-visible 3824f18b782SJeff Cody * writes (i.e. something observable via virtual disk read) */ 3834f18b782SJeff Cody if (generate_data_write_guid) { 3844f18b782SJeff Cody vhdx_guid_generate(&inactive_header->data_write_guid); 3854f18b782SJeff Cody } 3864f18b782SJeff Cody 387c3906c5eSJeff Cody /* update the log guid if present */ 388c3906c5eSJeff Cody if (log_guid) { 389c3906c5eSJeff Cody inactive_header->log_guid = *log_guid; 390c3906c5eSJeff Cody } 391c3906c5eSJeff Cody 392cf2ab8fcSKevin Wolf ret = vhdx_write_header(bs->file, inactive_header, header_offset, true); 3934f18b782SJeff Cody if (ret < 0) { 3944f18b782SJeff Cody goto exit; 3954f18b782SJeff Cody } 3964f18b782SJeff Cody s->curr_header = hdr_idx; 3974f18b782SJeff Cody 3984f18b782SJeff Cody exit: 3994f18b782SJeff Cody return ret; 4004f18b782SJeff Cody } 4014f18b782SJeff Cody 4024f18b782SJeff Cody /* 4034f18b782SJeff Cody * The VHDX spec calls for header updates to be performed twice, so that both 4044f18b782SJeff Cody * the current and non-current header have valid info 4054f18b782SJeff Cody */ 406c3906c5eSJeff Cody int vhdx_update_headers(BlockDriverState *bs, BDRVVHDXState *s, 407c3906c5eSJeff Cody bool generate_data_write_guid, MSGUID *log_guid) 4084f18b782SJeff Cody { 4094f18b782SJeff Cody int ret; 4104f18b782SJeff Cody 411c3906c5eSJeff Cody ret = vhdx_update_header(bs, s, generate_data_write_guid, log_guid); 4124f18b782SJeff Cody if (ret < 0) { 4134f18b782SJeff Cody return ret; 4144f18b782SJeff Cody } 415b3ac2b94SSimran Singhal return vhdx_update_header(bs, s, generate_data_write_guid, log_guid); 4164f18b782SJeff Cody } 417e8d4e5ffSJeff Cody 418e8d4e5ffSJeff Cody /* opens the specified header block from the VHDX file header section */ 4196890aad4SPaolo Bonzini static void vhdx_parse_header(BlockDriverState *bs, BDRVVHDXState *s, 4206890aad4SPaolo Bonzini Error **errp) 421e8d4e5ffSJeff Cody { 4226890aad4SPaolo Bonzini int ret; 423e8d4e5ffSJeff Cody VHDXHeader *header1; 424e8d4e5ffSJeff Cody VHDXHeader *header2; 425e8d4e5ffSJeff Cody bool h1_valid = false; 426e8d4e5ffSJeff Cody bool h2_valid = false; 427e8d4e5ffSJeff Cody uint64_t h1_seq = 0; 428e8d4e5ffSJeff Cody uint64_t h2_seq = 0; 429e8d4e5ffSJeff Cody uint8_t *buffer; 430e8d4e5ffSJeff Cody 4316e9d290bSJeff Cody /* header1 & header2 are freed in vhdx_close() */ 432e8d4e5ffSJeff Cody header1 = qemu_blockalign(bs, sizeof(VHDXHeader)); 433e8d4e5ffSJeff Cody header2 = qemu_blockalign(bs, sizeof(VHDXHeader)); 434e8d4e5ffSJeff Cody 435e8d4e5ffSJeff Cody buffer = qemu_blockalign(bs, VHDX_HEADER_SIZE); 436e8d4e5ffSJeff Cody 437e8d4e5ffSJeff Cody s->headers[0] = header1; 438e8d4e5ffSJeff Cody s->headers[1] = header2; 439e8d4e5ffSJeff Cody 440e8d4e5ffSJeff Cody /* We have to read the whole VHDX_HEADER_SIZE instead of 441e8d4e5ffSJeff Cody * sizeof(VHDXHeader), because the checksum is over the whole 442e8d4e5ffSJeff Cody * region */ 44332cc71deSAlberto Faria ret = bdrv_pread(bs->file, VHDX_HEADER1_OFFSET, VHDX_HEADER_SIZE, buffer, 44453fb7844SAlberto Faria 0); 445e8d4e5ffSJeff Cody if (ret < 0) { 446e8d4e5ffSJeff Cody goto fail; 447e8d4e5ffSJeff Cody } 448e8d4e5ffSJeff Cody /* copy over just the relevant portion that we need */ 449e8d4e5ffSJeff Cody memcpy(header1, buffer, sizeof(VHDXHeader)); 450e8d4e5ffSJeff Cody 4514f75b52aSJeff Cody if (vhdx_checksum_is_valid(buffer, VHDX_HEADER_SIZE, 4)) { 4524f75b52aSJeff Cody vhdx_header_le_import(header1); 4534f75b52aSJeff Cody if (header1->signature == VHDX_HEADER_SIGNATURE && 454e8d4e5ffSJeff Cody header1->version == 1) { 455e8d4e5ffSJeff Cody h1_seq = header1->sequence_number; 456e8d4e5ffSJeff Cody h1_valid = true; 457e8d4e5ffSJeff Cody } 4584f75b52aSJeff Cody } 459e8d4e5ffSJeff Cody 46032cc71deSAlberto Faria ret = bdrv_pread(bs->file, VHDX_HEADER2_OFFSET, VHDX_HEADER_SIZE, buffer, 46153fb7844SAlberto Faria 0); 462e8d4e5ffSJeff Cody if (ret < 0) { 463e8d4e5ffSJeff Cody goto fail; 464e8d4e5ffSJeff Cody } 465e8d4e5ffSJeff Cody /* copy over just the relevant portion that we need */ 466e8d4e5ffSJeff Cody memcpy(header2, buffer, sizeof(VHDXHeader)); 467e8d4e5ffSJeff Cody 4684f75b52aSJeff Cody if (vhdx_checksum_is_valid(buffer, VHDX_HEADER_SIZE, 4)) { 4694f75b52aSJeff Cody vhdx_header_le_import(header2); 4704f75b52aSJeff Cody if (header2->signature == VHDX_HEADER_SIGNATURE && 471e8d4e5ffSJeff Cody header2->version == 1) { 472e8d4e5ffSJeff Cody h2_seq = header2->sequence_number; 473e8d4e5ffSJeff Cody h2_valid = true; 474e8d4e5ffSJeff Cody } 4754f75b52aSJeff Cody } 476e8d4e5ffSJeff Cody 477e8d4e5ffSJeff Cody /* If there is only 1 valid header (or no valid headers), we 478e8d4e5ffSJeff Cody * don't care what the sequence numbers are */ 479e8d4e5ffSJeff Cody if (h1_valid && !h2_valid) { 480e8d4e5ffSJeff Cody s->curr_header = 0; 481e8d4e5ffSJeff Cody } else if (!h1_valid && h2_valid) { 482e8d4e5ffSJeff Cody s->curr_header = 1; 483e8d4e5ffSJeff Cody } else if (!h1_valid && !h2_valid) { 484e8d4e5ffSJeff Cody goto fail; 485e8d4e5ffSJeff Cody } else { 486e8d4e5ffSJeff Cody /* If both headers are valid, then we choose the active one by the 487e8d4e5ffSJeff Cody * highest sequence number. If the sequence numbers are equal, that is 488e8d4e5ffSJeff Cody * invalid */ 489e8d4e5ffSJeff Cody if (h1_seq > h2_seq) { 490e8d4e5ffSJeff Cody s->curr_header = 0; 491e8d4e5ffSJeff Cody } else if (h2_seq > h1_seq) { 492e8d4e5ffSJeff Cody s->curr_header = 1; 493e8d4e5ffSJeff Cody } else { 49469060461SJeff Cody /* The Microsoft Disk2VHD tool will create 2 identical 49569060461SJeff Cody * headers, with identical sequence numbers. If the headers are 49669060461SJeff Cody * identical, don't consider the file corrupt */ 49769060461SJeff Cody if (!memcmp(header1, header2, sizeof(VHDXHeader))) { 49869060461SJeff Cody s->curr_header = 0; 49969060461SJeff Cody } else { 500e8d4e5ffSJeff Cody goto fail; 501e8d4e5ffSJeff Cody } 502e8d4e5ffSJeff Cody } 50369060461SJeff Cody } 504e8d4e5ffSJeff Cody 5051a848fd4SJeff Cody vhdx_region_register(s, s->headers[s->curr_header]->log_offset, 5061a848fd4SJeff Cody s->headers[s->curr_header]->log_length); 507e8d4e5ffSJeff Cody goto exit; 508e8d4e5ffSJeff Cody 509e8d4e5ffSJeff Cody fail: 5106890aad4SPaolo Bonzini error_setg_errno(errp, -ret, "No valid VHDX header found"); 511e8d4e5ffSJeff Cody qemu_vfree(header1); 512e8d4e5ffSJeff Cody qemu_vfree(header2); 513e8d4e5ffSJeff Cody s->headers[0] = NULL; 514e8d4e5ffSJeff Cody s->headers[1] = NULL; 515e8d4e5ffSJeff Cody exit: 516e8d4e5ffSJeff Cody qemu_vfree(buffer); 517e8d4e5ffSJeff Cody } 518e8d4e5ffSJeff Cody 519e8d4e5ffSJeff Cody 520e8d4e5ffSJeff Cody static int vhdx_open_region_tables(BlockDriverState *bs, BDRVVHDXState *s) 521e8d4e5ffSJeff Cody { 522e8d4e5ffSJeff Cody int ret = 0; 523e8d4e5ffSJeff Cody uint8_t *buffer; 524e8d4e5ffSJeff Cody int offset = 0; 525e8d4e5ffSJeff Cody VHDXRegionTableEntry rt_entry; 526e8d4e5ffSJeff Cody uint32_t i; 527e8d4e5ffSJeff Cody bool bat_rt_found = false; 528e8d4e5ffSJeff Cody bool metadata_rt_found = false; 529e8d4e5ffSJeff Cody 530e8d4e5ffSJeff Cody /* We have to read the whole 64KB block, because the crc32 is over the 531e8d4e5ffSJeff Cody * whole block */ 532e8d4e5ffSJeff Cody buffer = qemu_blockalign(bs, VHDX_HEADER_BLOCK_SIZE); 533e8d4e5ffSJeff Cody 53432cc71deSAlberto Faria ret = bdrv_pread(bs->file, VHDX_REGION_TABLE_OFFSET, 53532cc71deSAlberto Faria VHDX_HEADER_BLOCK_SIZE, buffer, 0); 536e8d4e5ffSJeff Cody if (ret < 0) { 537e8d4e5ffSJeff Cody goto fail; 538e8d4e5ffSJeff Cody } 539e8d4e5ffSJeff Cody memcpy(&s->rt, buffer, sizeof(s->rt)); 540e8d4e5ffSJeff Cody offset += sizeof(s->rt); 541e8d4e5ffSJeff Cody 5424f75b52aSJeff Cody if (!vhdx_checksum_is_valid(buffer, VHDX_HEADER_BLOCK_SIZE, 4)) { 543e8d4e5ffSJeff Cody ret = -EINVAL; 544e8d4e5ffSJeff Cody goto fail; 545e8d4e5ffSJeff Cody } 546e8d4e5ffSJeff Cody 5474f75b52aSJeff Cody vhdx_region_header_le_import(&s->rt); 5484f75b52aSJeff Cody 5494f75b52aSJeff Cody if (s->rt.signature != VHDX_REGION_SIGNATURE) { 5504f75b52aSJeff Cody ret = -EINVAL; 5514f75b52aSJeff Cody goto fail; 5524f75b52aSJeff Cody } 5534f75b52aSJeff Cody 5544f75b52aSJeff Cody 555e8d4e5ffSJeff Cody /* Per spec, maximum region table entry count is 2047 */ 556e8d4e5ffSJeff Cody if (s->rt.entry_count > 2047) { 557e8d4e5ffSJeff Cody ret = -EINVAL; 558e8d4e5ffSJeff Cody goto fail; 559e8d4e5ffSJeff Cody } 560e8d4e5ffSJeff Cody 561e8d4e5ffSJeff Cody for (i = 0; i < s->rt.entry_count; i++) { 562e8d4e5ffSJeff Cody memcpy(&rt_entry, buffer + offset, sizeof(rt_entry)); 563e8d4e5ffSJeff Cody offset += sizeof(rt_entry); 564e8d4e5ffSJeff Cody 565c325ee1dSJeff Cody vhdx_region_entry_le_import(&rt_entry); 566e8d4e5ffSJeff Cody 5671a848fd4SJeff Cody /* check for region overlap between these entries, and any 5681a848fd4SJeff Cody * other memory regions in the file */ 5691a848fd4SJeff Cody ret = vhdx_region_check(s, rt_entry.file_offset, rt_entry.length); 5701a848fd4SJeff Cody if (ret < 0) { 5711a848fd4SJeff Cody goto fail; 5721a848fd4SJeff Cody } 5731a848fd4SJeff Cody 5741a848fd4SJeff Cody vhdx_region_register(s, rt_entry.file_offset, rt_entry.length); 5751a848fd4SJeff Cody 576e8d4e5ffSJeff Cody /* see if we recognize the entry */ 577e8d4e5ffSJeff Cody if (guid_eq(rt_entry.guid, bat_guid)) { 578e8d4e5ffSJeff Cody /* must be unique; if we have already found it this is invalid */ 579e8d4e5ffSJeff Cody if (bat_rt_found) { 580e8d4e5ffSJeff Cody ret = -EINVAL; 581e8d4e5ffSJeff Cody goto fail; 582e8d4e5ffSJeff Cody } 583e8d4e5ffSJeff Cody bat_rt_found = true; 584e8d4e5ffSJeff Cody s->bat_rt = rt_entry; 585e8d4e5ffSJeff Cody continue; 586e8d4e5ffSJeff Cody } 587e8d4e5ffSJeff Cody 588e8d4e5ffSJeff Cody if (guid_eq(rt_entry.guid, metadata_guid)) { 589e8d4e5ffSJeff Cody /* must be unique; if we have already found it this is invalid */ 590e8d4e5ffSJeff Cody if (metadata_rt_found) { 591e8d4e5ffSJeff Cody ret = -EINVAL; 592e8d4e5ffSJeff Cody goto fail; 593e8d4e5ffSJeff Cody } 594e8d4e5ffSJeff Cody metadata_rt_found = true; 595e8d4e5ffSJeff Cody s->metadata_rt = rt_entry; 596e8d4e5ffSJeff Cody continue; 597e8d4e5ffSJeff Cody } 598e8d4e5ffSJeff Cody 599e8d4e5ffSJeff Cody if (rt_entry.data_bits & VHDX_REGION_ENTRY_REQUIRED) { 600e8d4e5ffSJeff Cody /* cannot read vhdx file - required region table entry that 601e8d4e5ffSJeff Cody * we do not understand. per spec, we must fail to open */ 602e8d4e5ffSJeff Cody ret = -ENOTSUP; 603e8d4e5ffSJeff Cody goto fail; 604e8d4e5ffSJeff Cody } 605e8d4e5ffSJeff Cody } 6061a848fd4SJeff Cody 6071a848fd4SJeff Cody if (!bat_rt_found || !metadata_rt_found) { 6081a848fd4SJeff Cody ret = -EINVAL; 6091a848fd4SJeff Cody goto fail; 6101a848fd4SJeff Cody } 6111a848fd4SJeff Cody 612e8d4e5ffSJeff Cody ret = 0; 613e8d4e5ffSJeff Cody 614e8d4e5ffSJeff Cody fail: 615e8d4e5ffSJeff Cody qemu_vfree(buffer); 616e8d4e5ffSJeff Cody return ret; 617e8d4e5ffSJeff Cody } 618e8d4e5ffSJeff Cody 619e8d4e5ffSJeff Cody 620e8d4e5ffSJeff Cody 621e8d4e5ffSJeff Cody /* Metadata initial parser 622e8d4e5ffSJeff Cody * 623e8d4e5ffSJeff Cody * This loads all the metadata entry fields. This may cause additional 624e8d4e5ffSJeff Cody * fields to be processed (e.g. parent locator, etc..). 625e8d4e5ffSJeff Cody * 626e8d4e5ffSJeff Cody * There are 5 Metadata items that are always required: 627e8d4e5ffSJeff Cody * - File Parameters (block size, has a parent) 628e8d4e5ffSJeff Cody * - Virtual Disk Size (size, in bytes, of the virtual drive) 629e8d4e5ffSJeff Cody * - Page 83 Data (scsi page 83 guid) 630e8d4e5ffSJeff Cody * - Logical Sector Size (logical sector size in bytes, either 512 or 631e8d4e5ffSJeff Cody * 4096. We only support 512 currently) 632e8d4e5ffSJeff Cody * - Physical Sector Size (512 or 4096) 633e8d4e5ffSJeff Cody * 634e8d4e5ffSJeff Cody * Also, if the File Parameters indicate this is a differencing file, 635e8d4e5ffSJeff Cody * we must also look for the Parent Locator metadata item. 636e8d4e5ffSJeff Cody */ 637e8d4e5ffSJeff Cody static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s) 638e8d4e5ffSJeff Cody { 639e8d4e5ffSJeff Cody int ret = 0; 640e8d4e5ffSJeff Cody uint8_t *buffer; 641e8d4e5ffSJeff Cody int offset = 0; 642e8d4e5ffSJeff Cody uint32_t i = 0; 643e8d4e5ffSJeff Cody VHDXMetadataTableEntry md_entry; 644e8d4e5ffSJeff Cody 645e8d4e5ffSJeff Cody buffer = qemu_blockalign(bs, VHDX_METADATA_TABLE_MAX_SIZE); 646e8d4e5ffSJeff Cody 64732cc71deSAlberto Faria ret = bdrv_pread(bs->file, s->metadata_rt.file_offset, 64832cc71deSAlberto Faria VHDX_METADATA_TABLE_MAX_SIZE, buffer, 0); 649e8d4e5ffSJeff Cody if (ret < 0) { 650e8d4e5ffSJeff Cody goto exit; 651e8d4e5ffSJeff Cody } 652e8d4e5ffSJeff Cody memcpy(&s->metadata_hdr, buffer, sizeof(s->metadata_hdr)); 653e8d4e5ffSJeff Cody offset += sizeof(s->metadata_hdr); 654e8d4e5ffSJeff Cody 655c325ee1dSJeff Cody vhdx_metadata_header_le_import(&s->metadata_hdr); 656e8d4e5ffSJeff Cody 6574f75b52aSJeff Cody if (s->metadata_hdr.signature != VHDX_METADATA_SIGNATURE) { 658e8d4e5ffSJeff Cody ret = -EINVAL; 659e8d4e5ffSJeff Cody goto exit; 660e8d4e5ffSJeff Cody } 661e8d4e5ffSJeff Cody 662e8d4e5ffSJeff Cody s->metadata_entries.present = 0; 663e8d4e5ffSJeff Cody 664e8d4e5ffSJeff Cody if ((s->metadata_hdr.entry_count * sizeof(md_entry)) > 665e8d4e5ffSJeff Cody (VHDX_METADATA_TABLE_MAX_SIZE - offset)) { 666e8d4e5ffSJeff Cody ret = -EINVAL; 667e8d4e5ffSJeff Cody goto exit; 668e8d4e5ffSJeff Cody } 669e8d4e5ffSJeff Cody 670e8d4e5ffSJeff Cody for (i = 0; i < s->metadata_hdr.entry_count; i++) { 671e8d4e5ffSJeff Cody memcpy(&md_entry, buffer + offset, sizeof(md_entry)); 672e8d4e5ffSJeff Cody offset += sizeof(md_entry); 673e8d4e5ffSJeff Cody 674c325ee1dSJeff Cody vhdx_metadata_entry_le_import(&md_entry); 675e8d4e5ffSJeff Cody 676e8d4e5ffSJeff Cody if (guid_eq(md_entry.item_id, file_param_guid)) { 677e8d4e5ffSJeff Cody if (s->metadata_entries.present & META_FILE_PARAMETER_PRESENT) { 678e8d4e5ffSJeff Cody ret = -EINVAL; 679e8d4e5ffSJeff Cody goto exit; 680e8d4e5ffSJeff Cody } 681e8d4e5ffSJeff Cody s->metadata_entries.file_parameters_entry = md_entry; 682e8d4e5ffSJeff Cody s->metadata_entries.present |= META_FILE_PARAMETER_PRESENT; 683e8d4e5ffSJeff Cody continue; 684e8d4e5ffSJeff Cody } 685e8d4e5ffSJeff Cody 686e8d4e5ffSJeff Cody if (guid_eq(md_entry.item_id, virtual_size_guid)) { 687e8d4e5ffSJeff Cody if (s->metadata_entries.present & META_VIRTUAL_DISK_SIZE_PRESENT) { 688e8d4e5ffSJeff Cody ret = -EINVAL; 689e8d4e5ffSJeff Cody goto exit; 690e8d4e5ffSJeff Cody } 691e8d4e5ffSJeff Cody s->metadata_entries.virtual_disk_size_entry = md_entry; 692e8d4e5ffSJeff Cody s->metadata_entries.present |= META_VIRTUAL_DISK_SIZE_PRESENT; 693e8d4e5ffSJeff Cody continue; 694e8d4e5ffSJeff Cody } 695e8d4e5ffSJeff Cody 696e8d4e5ffSJeff Cody if (guid_eq(md_entry.item_id, page83_guid)) { 697e8d4e5ffSJeff Cody if (s->metadata_entries.present & META_PAGE_83_PRESENT) { 698e8d4e5ffSJeff Cody ret = -EINVAL; 699e8d4e5ffSJeff Cody goto exit; 700e8d4e5ffSJeff Cody } 701e8d4e5ffSJeff Cody s->metadata_entries.page83_data_entry = md_entry; 702e8d4e5ffSJeff Cody s->metadata_entries.present |= META_PAGE_83_PRESENT; 703e8d4e5ffSJeff Cody continue; 704e8d4e5ffSJeff Cody } 705e8d4e5ffSJeff Cody 706e8d4e5ffSJeff Cody if (guid_eq(md_entry.item_id, logical_sector_guid)) { 707e8d4e5ffSJeff Cody if (s->metadata_entries.present & 708e8d4e5ffSJeff Cody META_LOGICAL_SECTOR_SIZE_PRESENT) { 709e8d4e5ffSJeff Cody ret = -EINVAL; 710e8d4e5ffSJeff Cody goto exit; 711e8d4e5ffSJeff Cody } 712e8d4e5ffSJeff Cody s->metadata_entries.logical_sector_size_entry = md_entry; 713e8d4e5ffSJeff Cody s->metadata_entries.present |= META_LOGICAL_SECTOR_SIZE_PRESENT; 714e8d4e5ffSJeff Cody continue; 715e8d4e5ffSJeff Cody } 716e8d4e5ffSJeff Cody 717e8d4e5ffSJeff Cody if (guid_eq(md_entry.item_id, phys_sector_guid)) { 718e8d4e5ffSJeff Cody if (s->metadata_entries.present & META_PHYS_SECTOR_SIZE_PRESENT) { 719e8d4e5ffSJeff Cody ret = -EINVAL; 720e8d4e5ffSJeff Cody goto exit; 721e8d4e5ffSJeff Cody } 722e8d4e5ffSJeff Cody s->metadata_entries.phys_sector_size_entry = md_entry; 723e8d4e5ffSJeff Cody s->metadata_entries.present |= META_PHYS_SECTOR_SIZE_PRESENT; 724e8d4e5ffSJeff Cody continue; 725e8d4e5ffSJeff Cody } 726e8d4e5ffSJeff Cody 727e8d4e5ffSJeff Cody if (guid_eq(md_entry.item_id, parent_locator_guid)) { 728e8d4e5ffSJeff Cody if (s->metadata_entries.present & META_PARENT_LOCATOR_PRESENT) { 729e8d4e5ffSJeff Cody ret = -EINVAL; 730e8d4e5ffSJeff Cody goto exit; 731e8d4e5ffSJeff Cody } 732e8d4e5ffSJeff Cody s->metadata_entries.parent_locator_entry = md_entry; 733e8d4e5ffSJeff Cody s->metadata_entries.present |= META_PARENT_LOCATOR_PRESENT; 734e8d4e5ffSJeff Cody continue; 735e8d4e5ffSJeff Cody } 736e8d4e5ffSJeff Cody 737e8d4e5ffSJeff Cody if (md_entry.data_bits & VHDX_META_FLAGS_IS_REQUIRED) { 738e8d4e5ffSJeff Cody /* cannot read vhdx file - required region table entry that 739e8d4e5ffSJeff Cody * we do not understand. per spec, we must fail to open */ 740e8d4e5ffSJeff Cody ret = -ENOTSUP; 741e8d4e5ffSJeff Cody goto exit; 742e8d4e5ffSJeff Cody } 743e8d4e5ffSJeff Cody } 744e8d4e5ffSJeff Cody 745e8d4e5ffSJeff Cody if (s->metadata_entries.present != META_ALL_PRESENT) { 746e8d4e5ffSJeff Cody ret = -ENOTSUP; 747e8d4e5ffSJeff Cody goto exit; 748e8d4e5ffSJeff Cody } 749e8d4e5ffSJeff Cody 750cf2ab8fcSKevin Wolf ret = bdrv_pread(bs->file, 751e8d4e5ffSJeff Cody s->metadata_entries.file_parameters_entry.offset 752e8d4e5ffSJeff Cody + s->metadata_rt.file_offset, 75353fb7844SAlberto Faria sizeof(s->params), 75432cc71deSAlberto Faria &s->params, 75553fb7844SAlberto Faria 0); 756e8d4e5ffSJeff Cody 757e8d4e5ffSJeff Cody if (ret < 0) { 758e8d4e5ffSJeff Cody goto exit; 759e8d4e5ffSJeff Cody } 760e8d4e5ffSJeff Cody 7611229e46dSPeter Maydell s->params.block_size = le32_to_cpu(s->params.block_size); 7621229e46dSPeter Maydell s->params.data_bits = le32_to_cpu(s->params.data_bits); 763e8d4e5ffSJeff Cody 764e8d4e5ffSJeff Cody 765e8d4e5ffSJeff Cody /* We now have the file parameters, so we can tell if this is a 766e8d4e5ffSJeff Cody * differencing file (i.e.. has_parent), is dynamic or fixed 767e8d4e5ffSJeff Cody * sized (leave_blocks_allocated), and the block size */ 768e8d4e5ffSJeff Cody 769e8d4e5ffSJeff Cody /* The parent locator required iff the file parameters has_parent set */ 770e8d4e5ffSJeff Cody if (s->params.data_bits & VHDX_PARAMS_HAS_PARENT) { 771e8d4e5ffSJeff Cody if (s->metadata_entries.present & META_PARENT_LOCATOR_PRESENT) { 772e8d4e5ffSJeff Cody /* TODO: parse parent locator fields */ 773e8d4e5ffSJeff Cody ret = -ENOTSUP; /* temp, until differencing files are supported */ 774e8d4e5ffSJeff Cody goto exit; 775e8d4e5ffSJeff Cody } else { 776e8d4e5ffSJeff Cody /* if has_parent is set, but there is not parent locator present, 777e8d4e5ffSJeff Cody * then that is an invalid combination */ 778e8d4e5ffSJeff Cody ret = -EINVAL; 779e8d4e5ffSJeff Cody goto exit; 780e8d4e5ffSJeff Cody } 781e8d4e5ffSJeff Cody } 782e8d4e5ffSJeff Cody 783e8d4e5ffSJeff Cody /* determine virtual disk size, logical sector size, 784e8d4e5ffSJeff Cody * and phys sector size */ 785e8d4e5ffSJeff Cody 786cf2ab8fcSKevin Wolf ret = bdrv_pread(bs->file, 787e8d4e5ffSJeff Cody s->metadata_entries.virtual_disk_size_entry.offset 788e8d4e5ffSJeff Cody + s->metadata_rt.file_offset, 78953fb7844SAlberto Faria sizeof(uint64_t), 79032cc71deSAlberto Faria &s->virtual_disk_size, 79153fb7844SAlberto Faria 0); 792e8d4e5ffSJeff Cody if (ret < 0) { 793e8d4e5ffSJeff Cody goto exit; 794e8d4e5ffSJeff Cody } 795cf2ab8fcSKevin Wolf ret = bdrv_pread(bs->file, 796e8d4e5ffSJeff Cody s->metadata_entries.logical_sector_size_entry.offset 797e8d4e5ffSJeff Cody + s->metadata_rt.file_offset, 79853fb7844SAlberto Faria sizeof(uint32_t), 79932cc71deSAlberto Faria &s->logical_sector_size, 80053fb7844SAlberto Faria 0); 801e8d4e5ffSJeff Cody if (ret < 0) { 802e8d4e5ffSJeff Cody goto exit; 803e8d4e5ffSJeff Cody } 804cf2ab8fcSKevin Wolf ret = bdrv_pread(bs->file, 805e8d4e5ffSJeff Cody s->metadata_entries.phys_sector_size_entry.offset 806e8d4e5ffSJeff Cody + s->metadata_rt.file_offset, 80753fb7844SAlberto Faria sizeof(uint32_t), 80832cc71deSAlberto Faria &s->physical_sector_size, 80953fb7844SAlberto Faria 0); 810e8d4e5ffSJeff Cody if (ret < 0) { 811e8d4e5ffSJeff Cody goto exit; 812e8d4e5ffSJeff Cody } 813e8d4e5ffSJeff Cody 8141229e46dSPeter Maydell s->virtual_disk_size = le64_to_cpu(s->virtual_disk_size); 8151229e46dSPeter Maydell s->logical_sector_size = le32_to_cpu(s->logical_sector_size); 8161229e46dSPeter Maydell s->physical_sector_size = le32_to_cpu(s->physical_sector_size); 817e8d4e5ffSJeff Cody 8181d7678deSJeff Cody if (s->params.block_size < VHDX_BLOCK_SIZE_MIN || 8191d7678deSJeff Cody s->params.block_size > VHDX_BLOCK_SIZE_MAX) { 820e8d4e5ffSJeff Cody ret = -EINVAL; 821e8d4e5ffSJeff Cody goto exit; 822e8d4e5ffSJeff Cody } 823e8d4e5ffSJeff Cody 82483a6a900SSwapnil Ingle /* Currently we only support 512 */ 82583a6a900SSwapnil Ingle if (s->logical_sector_size != 512) { 82683a6a900SSwapnil Ingle ret = -ENOTSUP; 8271d7678deSJeff Cody goto exit; 8281d7678deSJeff Cody } 8291d7678deSJeff Cody 8301d7678deSJeff Cody /* Both block_size and sector_size are guaranteed powers of 2, below. 8311d7678deSJeff Cody Due to range checks above, s->sectors_per_block can never be < 256 */ 832e8d4e5ffSJeff Cody s->sectors_per_block = s->params.block_size / s->logical_sector_size; 833e8d4e5ffSJeff Cody s->chunk_ratio = (VHDX_MAX_SECTORS_PER_BLOCK) * 834e8d4e5ffSJeff Cody (uint64_t)s->logical_sector_size / 835e8d4e5ffSJeff Cody (uint64_t)s->params.block_size; 836e8d4e5ffSJeff Cody 837e8d4e5ffSJeff Cody /* These values are ones we will want to use for division / multiplication 838e8d4e5ffSJeff Cody * later on, and they are all guaranteed (per the spec) to be powers of 2, 839e8d4e5ffSJeff Cody * so we can take advantage of that for shift operations during 840e8d4e5ffSJeff Cody * reads/writes */ 841e8d4e5ffSJeff Cody if (s->logical_sector_size & (s->logical_sector_size - 1)) { 842e8d4e5ffSJeff Cody ret = -EINVAL; 843e8d4e5ffSJeff Cody goto exit; 844e8d4e5ffSJeff Cody } 845e8d4e5ffSJeff Cody if (s->sectors_per_block & (s->sectors_per_block - 1)) { 846e8d4e5ffSJeff Cody ret = -EINVAL; 847e8d4e5ffSJeff Cody goto exit; 848e8d4e5ffSJeff Cody } 849e8d4e5ffSJeff Cody if (s->chunk_ratio & (s->chunk_ratio - 1)) { 850e8d4e5ffSJeff Cody ret = -EINVAL; 851e8d4e5ffSJeff Cody goto exit; 852e8d4e5ffSJeff Cody } 853e8d4e5ffSJeff Cody s->block_size = s->params.block_size; 854e8d4e5ffSJeff Cody if (s->block_size & (s->block_size - 1)) { 855e8d4e5ffSJeff Cody ret = -EINVAL; 856e8d4e5ffSJeff Cody goto exit; 857e8d4e5ffSJeff Cody } 858e8d4e5ffSJeff Cody 8591e74a971SJeff Cody vhdx_set_shift_bits(s); 860e8d4e5ffSJeff Cody 861e8d4e5ffSJeff Cody ret = 0; 862e8d4e5ffSJeff Cody 863e8d4e5ffSJeff Cody exit: 864e8d4e5ffSJeff Cody qemu_vfree(buffer); 865e8d4e5ffSJeff Cody return ret; 866e8d4e5ffSJeff Cody } 867e8d4e5ffSJeff Cody 8681e74a971SJeff Cody /* 8691e74a971SJeff Cody * Calculate the number of BAT entries, including sector 8701e74a971SJeff Cody * bitmap entries. 8711e74a971SJeff Cody */ 8721e74a971SJeff Cody static void vhdx_calc_bat_entries(BDRVVHDXState *s) 8731e74a971SJeff Cody { 8741e74a971SJeff Cody uint32_t data_blocks_cnt, bitmap_blocks_cnt; 8751e74a971SJeff Cody 876939901dcSMax Reitz data_blocks_cnt = DIV_ROUND_UP(s->virtual_disk_size, s->block_size); 877939901dcSMax Reitz bitmap_blocks_cnt = DIV_ROUND_UP(data_blocks_cnt, s->chunk_ratio); 8781e74a971SJeff Cody 8791e74a971SJeff Cody if (s->parent_entries) { 8801e74a971SJeff Cody s->bat_entries = bitmap_blocks_cnt * (s->chunk_ratio + 1); 8811e74a971SJeff Cody } else { 8821e74a971SJeff Cody s->bat_entries = data_blocks_cnt + 8831e74a971SJeff Cody ((data_blocks_cnt - 1) >> s->chunk_ratio_bits); 8841e74a971SJeff Cody } 8851e74a971SJeff Cody 8861e74a971SJeff Cody } 887e8d4e5ffSJeff Cody 8886caaad46SPeter Lieven static int vhdx_check_bat_entries(BlockDriverState *bs, int *errcnt) 8896caaad46SPeter Lieven { 8906caaad46SPeter Lieven BDRVVHDXState *s = bs->opaque; 8916caaad46SPeter Lieven int64_t image_file_size = bdrv_getlength(bs->file->bs); 8926caaad46SPeter Lieven uint64_t payblocks = s->chunk_ratio; 8936caaad46SPeter Lieven uint64_t i; 8946caaad46SPeter Lieven int ret = 0; 8956caaad46SPeter Lieven 8966caaad46SPeter Lieven if (image_file_size < 0) { 8976caaad46SPeter Lieven error_report("Could not determinate VHDX image file size."); 8986caaad46SPeter Lieven return image_file_size; 8996caaad46SPeter Lieven } 9006caaad46SPeter Lieven 9016caaad46SPeter Lieven for (i = 0; i < s->bat_entries; i++) { 9026caaad46SPeter Lieven if ((s->bat[i] & VHDX_BAT_STATE_BIT_MASK) == 9036caaad46SPeter Lieven PAYLOAD_BLOCK_FULLY_PRESENT) { 9046caaad46SPeter Lieven uint64_t offset = s->bat[i] & VHDX_BAT_FILE_OFF_MASK; 9056caaad46SPeter Lieven /* 9066caaad46SPeter Lieven * Allow that the last block exists only partially. The VHDX spec 9076caaad46SPeter Lieven * states that the image file can only grow in blocksize increments, 9086caaad46SPeter Lieven * but QEMU created images with partial last blocks in the past. 9096caaad46SPeter Lieven */ 9106caaad46SPeter Lieven uint32_t block_length = MIN(s->block_size, 9116caaad46SPeter Lieven bs->total_sectors * BDRV_SECTOR_SIZE - i * s->block_size); 9126caaad46SPeter Lieven /* 9136caaad46SPeter Lieven * Check for BAT entry overflow. 9146caaad46SPeter Lieven */ 9156caaad46SPeter Lieven if (offset > INT64_MAX - s->block_size) { 9166caaad46SPeter Lieven error_report("VHDX BAT entry %" PRIu64 " offset overflow.", i); 9176caaad46SPeter Lieven ret = -EINVAL; 9186caaad46SPeter Lieven if (!errcnt) { 9196caaad46SPeter Lieven break; 9206caaad46SPeter Lieven } 9216caaad46SPeter Lieven (*errcnt)++; 9226caaad46SPeter Lieven } 9236caaad46SPeter Lieven /* 9246caaad46SPeter Lieven * Check if fully allocated BAT entries do not reside after 9256caaad46SPeter Lieven * end of the image file. 9266caaad46SPeter Lieven */ 9276caaad46SPeter Lieven if (offset >= image_file_size) { 9286caaad46SPeter Lieven error_report("VHDX BAT entry %" PRIu64 " start offset %" PRIu64 9296caaad46SPeter Lieven " points after end of file (%" PRIi64 "). Image" 9306caaad46SPeter Lieven " has probably been truncated.", 9316caaad46SPeter Lieven i, offset, image_file_size); 9326caaad46SPeter Lieven ret = -EINVAL; 9336caaad46SPeter Lieven if (!errcnt) { 9346caaad46SPeter Lieven break; 9356caaad46SPeter Lieven } 9366caaad46SPeter Lieven (*errcnt)++; 9376caaad46SPeter Lieven } else if (offset + block_length > image_file_size) { 9386caaad46SPeter Lieven error_report("VHDX BAT entry %" PRIu64 " end offset %" PRIu64 9396caaad46SPeter Lieven " points after end of file (%" PRIi64 "). Image" 9406caaad46SPeter Lieven " has probably been truncated.", 9416caaad46SPeter Lieven i, offset + block_length - 1, image_file_size); 9426caaad46SPeter Lieven ret = -EINVAL; 9436caaad46SPeter Lieven if (!errcnt) { 9446caaad46SPeter Lieven break; 9456caaad46SPeter Lieven } 9466caaad46SPeter Lieven (*errcnt)++; 9476caaad46SPeter Lieven } 9486caaad46SPeter Lieven 9496caaad46SPeter Lieven /* 9506caaad46SPeter Lieven * verify populated BAT field file offsets against 9516caaad46SPeter Lieven * region table and log entries 9526caaad46SPeter Lieven */ 9536caaad46SPeter Lieven if (payblocks--) { 9546caaad46SPeter Lieven /* payload bat entries */ 9556caaad46SPeter Lieven int ret2; 9566caaad46SPeter Lieven ret2 = vhdx_region_check(s, offset, s->block_size); 9576caaad46SPeter Lieven if (ret2 < 0) { 9586caaad46SPeter Lieven ret = -EINVAL; 9596caaad46SPeter Lieven if (!errcnt) { 9606caaad46SPeter Lieven break; 9616caaad46SPeter Lieven } 9626caaad46SPeter Lieven (*errcnt)++; 9636caaad46SPeter Lieven } 9646caaad46SPeter Lieven } else { 9656caaad46SPeter Lieven payblocks = s->chunk_ratio; 9666caaad46SPeter Lieven /* 9676caaad46SPeter Lieven * Once differencing files are supported, verify sector bitmap 9686caaad46SPeter Lieven * blocks here 9696caaad46SPeter Lieven */ 9706caaad46SPeter Lieven } 9716caaad46SPeter Lieven } 9726caaad46SPeter Lieven } 9736caaad46SPeter Lieven 9746caaad46SPeter Lieven return ret; 9756caaad46SPeter Lieven } 9766caaad46SPeter Lieven 977c46415afSJeff Cody static void vhdx_close(BlockDriverState *bs) 978c46415afSJeff Cody { 979c46415afSJeff Cody BDRVVHDXState *s = bs->opaque; 980c46415afSJeff Cody qemu_vfree(s->headers[0]); 9810a43a1b5SJeff Cody s->headers[0] = NULL; 982c46415afSJeff Cody qemu_vfree(s->headers[1]); 9830a43a1b5SJeff Cody s->headers[1] = NULL; 984c46415afSJeff Cody qemu_vfree(s->bat); 9850a43a1b5SJeff Cody s->bat = NULL; 986c46415afSJeff Cody qemu_vfree(s->parent_entries); 9870a43a1b5SJeff Cody s->parent_entries = NULL; 988c46415afSJeff Cody migrate_del_blocker(s->migration_blocker); 989c46415afSJeff Cody error_free(s->migration_blocker); 9900a43a1b5SJeff Cody qemu_vfree(s->log.hdr); 9910a43a1b5SJeff Cody s->log.hdr = NULL; 9921a848fd4SJeff Cody vhdx_region_unregister_all(s); 993c46415afSJeff Cody } 994c46415afSJeff Cody 995015a1036SMax Reitz static int vhdx_open(BlockDriverState *bs, QDict *options, int flags, 996015a1036SMax Reitz Error **errp) 997e8d4e5ffSJeff Cody { 998e8d4e5ffSJeff Cody BDRVVHDXState *s = bs->opaque; 999e8d4e5ffSJeff Cody int ret = 0; 1000e8d4e5ffSJeff Cody uint32_t i; 1001e8d4e5ffSJeff Cody uint64_t signature; 10026890aad4SPaolo Bonzini Error *local_err = NULL; 1003e8d4e5ffSJeff Cody 100483930780SVladimir Sementsov-Ogievskiy ret = bdrv_open_file_child(NULL, options, "file", bs, errp); 100583930780SVladimir Sementsov-Ogievskiy if (ret < 0) { 100683930780SVladimir Sementsov-Ogievskiy return ret; 10074e4bf5c4SKevin Wolf } 10084e4bf5c4SKevin Wolf 1009e8d4e5ffSJeff Cody s->bat = NULL; 1010c3906c5eSJeff Cody s->first_visible_write = true; 1011e8d4e5ffSJeff Cody 1012e8d4e5ffSJeff Cody qemu_co_mutex_init(&s->lock); 10131a848fd4SJeff Cody QLIST_INIT(&s->regions); 1014e8d4e5ffSJeff Cody 1015e8d4e5ffSJeff Cody /* validate the file signature */ 101632cc71deSAlberto Faria ret = bdrv_pread(bs->file, 0, sizeof(uint64_t), &signature, 0); 1017e8d4e5ffSJeff Cody if (ret < 0) { 1018e8d4e5ffSJeff Cody goto fail; 1019e8d4e5ffSJeff Cody } 1020e8d4e5ffSJeff Cody if (memcmp(&signature, "vhdxfile", 8)) { 1021e8d4e5ffSJeff Cody ret = -EINVAL; 1022e8d4e5ffSJeff Cody goto fail; 1023e8d4e5ffSJeff Cody } 1024e8d4e5ffSJeff Cody 10254f18b782SJeff Cody /* This is used for any header updates, for the file_write_guid. 10264f18b782SJeff Cody * The spec dictates that a new value should be used for the first 10274f18b782SJeff Cody * header update */ 10284f18b782SJeff Cody vhdx_guid_generate(&s->session_guid); 10294f18b782SJeff Cody 10306890aad4SPaolo Bonzini vhdx_parse_header(bs, s, &local_err); 10316890aad4SPaolo Bonzini if (local_err != NULL) { 10326890aad4SPaolo Bonzini error_propagate(errp, local_err); 10336890aad4SPaolo Bonzini ret = -EINVAL; 1034e8d4e5ffSJeff Cody goto fail; 1035e8d4e5ffSJeff Cody } 1036e8d4e5ffSJeff Cody 10377e30e6a6SJeff Cody ret = vhdx_parse_log(bs, s, &s->log_replayed_on_open, errp); 10380a43a1b5SJeff Cody if (ret < 0) { 1039e8d4e5ffSJeff Cody goto fail; 1040e8d4e5ffSJeff Cody } 1041e8d4e5ffSJeff Cody 1042e8d4e5ffSJeff Cody ret = vhdx_open_region_tables(bs, s); 10430a43a1b5SJeff Cody if (ret < 0) { 1044e8d4e5ffSJeff Cody goto fail; 1045e8d4e5ffSJeff Cody } 1046e8d4e5ffSJeff Cody 1047e8d4e5ffSJeff Cody ret = vhdx_parse_metadata(bs, s); 10480a43a1b5SJeff Cody if (ret < 0) { 1049e8d4e5ffSJeff Cody goto fail; 1050e8d4e5ffSJeff Cody } 10510a43a1b5SJeff Cody 1052e8d4e5ffSJeff Cody s->block_size = s->params.block_size; 1053e8d4e5ffSJeff Cody 1054e8d4e5ffSJeff Cody /* the VHDX spec dictates that virtual_disk_size is always a multiple of 1055e8d4e5ffSJeff Cody * logical_sector_size */ 1056e8d4e5ffSJeff Cody bs->total_sectors = s->virtual_disk_size >> s->logical_sector_size_bits; 1057e8d4e5ffSJeff Cody 10581e74a971SJeff Cody vhdx_calc_bat_entries(s); 1059e8d4e5ffSJeff Cody 1060e8d4e5ffSJeff Cody s->bat_offset = s->bat_rt.file_offset; 1061e8d4e5ffSJeff Cody 1062e8d4e5ffSJeff Cody if (s->bat_entries > s->bat_rt.length / sizeof(VHDXBatEntry)) { 1063e8d4e5ffSJeff Cody /* BAT allocation is not large enough for all entries */ 1064e8d4e5ffSJeff Cody ret = -EINVAL; 1065e8d4e5ffSJeff Cody goto fail; 1066e8d4e5ffSJeff Cody } 1067e8d4e5ffSJeff Cody 10686e9d290bSJeff Cody /* s->bat is freed in vhdx_close() */ 10699a4f4c31SKevin Wolf s->bat = qemu_try_blockalign(bs->file->bs, s->bat_rt.length); 1070a67e128aSKevin Wolf if (s->bat == NULL) { 1071a67e128aSKevin Wolf ret = -ENOMEM; 1072a67e128aSKevin Wolf goto fail; 1073a67e128aSKevin Wolf } 1074a67e128aSKevin Wolf 107532cc71deSAlberto Faria ret = bdrv_pread(bs->file, s->bat_offset, s->bat_rt.length, s->bat, 0); 1076e8d4e5ffSJeff Cody if (ret < 0) { 1077e8d4e5ffSJeff Cody goto fail; 1078e8d4e5ffSJeff Cody } 1079e8d4e5ffSJeff Cody 10806caaad46SPeter Lieven /* endian convert populated BAT field entires */ 1081e8d4e5ffSJeff Cody for (i = 0; i < s->bat_entries; i++) { 10821229e46dSPeter Maydell s->bat[i] = le64_to_cpu(s->bat[i]); 10836caaad46SPeter Lieven } 10846caaad46SPeter Lieven 10856caaad46SPeter Lieven if (!(flags & BDRV_O_CHECK)) { 10866caaad46SPeter Lieven ret = vhdx_check_bat_entries(bs, NULL); 10871a848fd4SJeff Cody if (ret < 0) { 10881a848fd4SJeff Cody goto fail; 10891a848fd4SJeff Cody } 10901a848fd4SJeff Cody } 1091e8d4e5ffSJeff Cody 1092fe44dc91SAshijeet Acharya /* Disable migration when VHDX images are used */ 1093fe44dc91SAshijeet Acharya error_setg(&s->migration_blocker, "The vhdx format used by node '%s' " 1094fe44dc91SAshijeet Acharya "does not support live migration", 1095fe44dc91SAshijeet Acharya bdrv_get_device_or_node_name(bs)); 1096386f6c07SMarkus Armbruster ret = migrate_add_blocker(s->migration_blocker, errp); 1097386f6c07SMarkus Armbruster if (ret < 0) { 1098fe44dc91SAshijeet Acharya error_free(s->migration_blocker); 1099fe44dc91SAshijeet Acharya goto fail; 1100fe44dc91SAshijeet Acharya } 1101fe44dc91SAshijeet Acharya 1102d92aa883SJeff Cody /* TODO: differencing files */ 1103e8d4e5ffSJeff Cody 1104e8d4e5ffSJeff Cody return 0; 1105e8d4e5ffSJeff Cody fail: 11060a43a1b5SJeff Cody vhdx_close(bs); 1107e8d4e5ffSJeff Cody return ret; 1108e8d4e5ffSJeff Cody } 1109e8d4e5ffSJeff Cody 1110e8d4e5ffSJeff Cody static int vhdx_reopen_prepare(BDRVReopenState *state, 1111e8d4e5ffSJeff Cody BlockReopenQueue *queue, Error **errp) 1112e8d4e5ffSJeff Cody { 1113e8d4e5ffSJeff Cody return 0; 1114e8d4e5ffSJeff Cody } 1115e8d4e5ffSJeff Cody 1116e8d4e5ffSJeff Cody 1117059e2fbbSJeff Cody /* 1118059e2fbbSJeff Cody * Perform sector to block offset translations, to get various 1119059e2fbbSJeff Cody * sector and file offsets into the image. See VHDXSectorInfo 1120059e2fbbSJeff Cody */ 1121059e2fbbSJeff Cody static void vhdx_block_translate(BDRVVHDXState *s, int64_t sector_num, 1122059e2fbbSJeff Cody int nb_sectors, VHDXSectorInfo *sinfo) 1123059e2fbbSJeff Cody { 1124059e2fbbSJeff Cody uint32_t block_offset; 1125059e2fbbSJeff Cody 1126059e2fbbSJeff Cody sinfo->bat_idx = sector_num >> s->sectors_per_block_bits; 1127059e2fbbSJeff Cody /* effectively a modulo - this gives us the offset into the block 1128059e2fbbSJeff Cody * (in sector sizes) for our sector number */ 1129059e2fbbSJeff Cody block_offset = sector_num - (sinfo->bat_idx << s->sectors_per_block_bits); 1130059e2fbbSJeff Cody /* the chunk ratio gives us the interleaving of the sector 1131059e2fbbSJeff Cody * bitmaps, so we need to advance our page block index by the 1132059e2fbbSJeff Cody * sector bitmaps entry number */ 1133059e2fbbSJeff Cody sinfo->bat_idx += sinfo->bat_idx >> s->chunk_ratio_bits; 1134059e2fbbSJeff Cody 1135059e2fbbSJeff Cody /* the number of sectors we can read/write in this cycle */ 1136059e2fbbSJeff Cody sinfo->sectors_avail = s->sectors_per_block - block_offset; 1137059e2fbbSJeff Cody 1138059e2fbbSJeff Cody sinfo->bytes_left = sinfo->sectors_avail << s->logical_sector_size_bits; 1139059e2fbbSJeff Cody 1140059e2fbbSJeff Cody if (sinfo->sectors_avail > nb_sectors) { 1141059e2fbbSJeff Cody sinfo->sectors_avail = nb_sectors; 1142059e2fbbSJeff Cody } 1143059e2fbbSJeff Cody 1144059e2fbbSJeff Cody sinfo->bytes_avail = sinfo->sectors_avail << s->logical_sector_size_bits; 1145059e2fbbSJeff Cody 11460b7da092SJeff Cody sinfo->file_offset = s->bat[sinfo->bat_idx] & VHDX_BAT_FILE_OFF_MASK; 1147059e2fbbSJeff Cody 1148059e2fbbSJeff Cody sinfo->block_offset = block_offset << s->logical_sector_size_bits; 1149059e2fbbSJeff Cody 1150059e2fbbSJeff Cody /* The file offset must be past the header section, so must be > 0 */ 1151059e2fbbSJeff Cody if (sinfo->file_offset == 0) { 1152059e2fbbSJeff Cody return; 1153059e2fbbSJeff Cody } 1154059e2fbbSJeff Cody 1155059e2fbbSJeff Cody /* block offset is the offset in vhdx logical sectors, in 1156059e2fbbSJeff Cody * the payload data block. Convert that to a byte offset 1157059e2fbbSJeff Cody * in the block, and add in the payload data block offset 1158059e2fbbSJeff Cody * in the file, in bytes, to get the final read address */ 1159059e2fbbSJeff Cody 1160059e2fbbSJeff Cody sinfo->file_offset += sinfo->block_offset; 1161059e2fbbSJeff Cody } 1162059e2fbbSJeff Cody 1163059e2fbbSJeff Cody 116497b00e28SPaolo Bonzini static int vhdx_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) 116597b00e28SPaolo Bonzini { 116697b00e28SPaolo Bonzini BDRVVHDXState *s = bs->opaque; 116797b00e28SPaolo Bonzini 116897b00e28SPaolo Bonzini bdi->cluster_size = s->block_size; 116997b00e28SPaolo Bonzini 117097b00e28SPaolo Bonzini return 0; 117197b00e28SPaolo Bonzini } 117297b00e28SPaolo Bonzini 1173059e2fbbSJeff Cody 1174e8d4e5ffSJeff Cody static coroutine_fn int vhdx_co_readv(BlockDriverState *bs, int64_t sector_num, 1175e8d4e5ffSJeff Cody int nb_sectors, QEMUIOVector *qiov) 1176e8d4e5ffSJeff Cody { 1177059e2fbbSJeff Cody BDRVVHDXState *s = bs->opaque; 1178059e2fbbSJeff Cody int ret = 0; 1179059e2fbbSJeff Cody VHDXSectorInfo sinfo; 1180059e2fbbSJeff Cody uint64_t bytes_done = 0; 1181059e2fbbSJeff Cody QEMUIOVector hd_qiov; 1182059e2fbbSJeff Cody 1183059e2fbbSJeff Cody qemu_iovec_init(&hd_qiov, qiov->niov); 1184059e2fbbSJeff Cody 1185059e2fbbSJeff Cody qemu_co_mutex_lock(&s->lock); 1186059e2fbbSJeff Cody 1187059e2fbbSJeff Cody while (nb_sectors > 0) { 1188059e2fbbSJeff Cody /* We are a differencing file, so we need to inspect the sector bitmap 1189059e2fbbSJeff Cody * to see if we have the data or not */ 1190059e2fbbSJeff Cody if (s->params.data_bits & VHDX_PARAMS_HAS_PARENT) { 1191059e2fbbSJeff Cody /* not supported yet */ 1192059e2fbbSJeff Cody ret = -ENOTSUP; 1193059e2fbbSJeff Cody goto exit; 1194059e2fbbSJeff Cody } else { 1195059e2fbbSJeff Cody vhdx_block_translate(s, sector_num, nb_sectors, &sinfo); 1196059e2fbbSJeff Cody 1197059e2fbbSJeff Cody qemu_iovec_reset(&hd_qiov); 1198059e2fbbSJeff Cody qemu_iovec_concat(&hd_qiov, qiov, bytes_done, sinfo.bytes_avail); 1199059e2fbbSJeff Cody 1200059e2fbbSJeff Cody /* check the payload block state */ 1201059e2fbbSJeff Cody switch (s->bat[sinfo.bat_idx] & VHDX_BAT_STATE_BIT_MASK) { 1202059e2fbbSJeff Cody case PAYLOAD_BLOCK_NOT_PRESENT: /* fall through */ 12030571df44SJeff Cody case PAYLOAD_BLOCK_UNDEFINED: 12040571df44SJeff Cody case PAYLOAD_BLOCK_UNMAPPED: 1205a9d1e9daSJeff Cody case PAYLOAD_BLOCK_UNMAPPED_v095: 1206059e2fbbSJeff Cody case PAYLOAD_BLOCK_ZERO: 1207059e2fbbSJeff Cody /* return zero */ 1208059e2fbbSJeff Cody qemu_iovec_memset(&hd_qiov, 0, 0, sinfo.bytes_avail); 1209059e2fbbSJeff Cody break; 1210d92aa883SJeff Cody case PAYLOAD_BLOCK_FULLY_PRESENT: 1211059e2fbbSJeff Cody qemu_co_mutex_unlock(&s->lock); 12123a7404b3SEric Blake ret = bdrv_co_preadv(bs->file, sinfo.file_offset, 12133a7404b3SEric Blake sinfo.sectors_avail * BDRV_SECTOR_SIZE, 12143a7404b3SEric Blake &hd_qiov, 0); 1215059e2fbbSJeff Cody qemu_co_mutex_lock(&s->lock); 1216059e2fbbSJeff Cody if (ret < 0) { 1217059e2fbbSJeff Cody goto exit; 1218059e2fbbSJeff Cody } 1219059e2fbbSJeff Cody break; 1220059e2fbbSJeff Cody case PAYLOAD_BLOCK_PARTIALLY_PRESENT: 1221059e2fbbSJeff Cody /* we don't yet support difference files, fall through 1222059e2fbbSJeff Cody * to error */ 1223059e2fbbSJeff Cody default: 1224059e2fbbSJeff Cody ret = -EIO; 1225059e2fbbSJeff Cody goto exit; 1226059e2fbbSJeff Cody break; 1227059e2fbbSJeff Cody } 1228059e2fbbSJeff Cody nb_sectors -= sinfo.sectors_avail; 1229059e2fbbSJeff Cody sector_num += sinfo.sectors_avail; 1230059e2fbbSJeff Cody bytes_done += sinfo.bytes_avail; 1231059e2fbbSJeff Cody } 1232059e2fbbSJeff Cody } 1233059e2fbbSJeff Cody ret = 0; 1234059e2fbbSJeff Cody exit: 1235059e2fbbSJeff Cody qemu_co_mutex_unlock(&s->lock); 1236059e2fbbSJeff Cody qemu_iovec_destroy(&hd_qiov); 1237059e2fbbSJeff Cody return ret; 1238e8d4e5ffSJeff Cody } 1239e8d4e5ffSJeff Cody 1240d92aa883SJeff Cody /* 1241d92aa883SJeff Cody * Allocate a new payload block at the end of the file. 1242d92aa883SJeff Cody * 1243dbc636e7SEric Blake * Allocation will happen at 1MB alignment inside the file. 1244dbc636e7SEric Blake * 1245dbc636e7SEric Blake * If @need_zero is set on entry but not cleared on return, then truncation 1246dbc636e7SEric Blake * could not guarantee that the new portion reads as zero, and the caller 1247dbc636e7SEric Blake * will take care of it instead. 1248d92aa883SJeff Cody * 1249d92aa883SJeff Cody * Returns the file offset start of the new payload block 1250d92aa883SJeff Cody */ 1251d92aa883SJeff Cody static int vhdx_allocate_block(BlockDriverState *bs, BDRVVHDXState *s, 1252dbc636e7SEric Blake uint64_t *new_offset, bool *need_zero) 1253d92aa883SJeff Cody { 12543f910692SJeff Cody int64_t current_len; 12553f910692SJeff Cody 12563f910692SJeff Cody current_len = bdrv_getlength(bs->file->bs); 12573f910692SJeff Cody if (current_len < 0) { 12583f910692SJeff Cody return current_len; 12593f910692SJeff Cody } 12603f910692SJeff Cody 12613f910692SJeff Cody *new_offset = current_len; 1262e8d4e5ffSJeff Cody 1263d92aa883SJeff Cody /* per the spec, the address for a block is in units of 1MB */ 12640cb98af2SStefano Garzarella *new_offset = ROUND_UP(*new_offset, 1 * MiB); 126527539ac5SJeff Cody if (*new_offset > INT64_MAX) { 126627539ac5SJeff Cody return -EINVAL; 126727539ac5SJeff Cody } 1268d92aa883SJeff Cody 1269dbc636e7SEric Blake if (*need_zero) { 1270dbc636e7SEric Blake int ret; 1271dbc636e7SEric Blake 1272dbc636e7SEric Blake ret = bdrv_truncate(bs->file, *new_offset + s->block_size, false, 1273dbc636e7SEric Blake PREALLOC_MODE_OFF, BDRV_REQ_ZERO_WRITE, NULL); 1274dbc636e7SEric Blake if (ret != -ENOTSUP) { 1275dbc636e7SEric Blake *need_zero = false; 1276dbc636e7SEric Blake return ret; 1277dbc636e7SEric Blake } 1278dbc636e7SEric Blake } 1279dbc636e7SEric Blake 1280c80d8b06SMax Reitz return bdrv_truncate(bs->file, *new_offset + s->block_size, false, 12817b8e4857SKevin Wolf PREALLOC_MODE_OFF, 0, NULL); 1282d92aa883SJeff Cody } 1283d92aa883SJeff Cody 1284d92aa883SJeff Cody /* 1285d92aa883SJeff Cody * Update the BAT table entry with the new file offset, and the new entry 1286d92aa883SJeff Cody * state */ 1287d92aa883SJeff Cody static void vhdx_update_bat_table_entry(BlockDriverState *bs, BDRVVHDXState *s, 1288d92aa883SJeff Cody VHDXSectorInfo *sinfo, 1289d92aa883SJeff Cody uint64_t *bat_entry_le, 1290d92aa883SJeff Cody uint64_t *bat_offset, int state) 1291d92aa883SJeff Cody { 1292d92aa883SJeff Cody /* The BAT entry is a uint64, with 44 bits for the file offset in units of 1293d92aa883SJeff Cody * 1MB, and 3 bits for the block state. */ 1294cdf9634bSJeff Cody if ((state == PAYLOAD_BLOCK_ZERO) || 1295cdf9634bSJeff Cody (state == PAYLOAD_BLOCK_UNDEFINED) || 1296cdf9634bSJeff Cody (state == PAYLOAD_BLOCK_NOT_PRESENT) || 1297cdf9634bSJeff Cody (state == PAYLOAD_BLOCK_UNMAPPED)) { 1298cdf9634bSJeff Cody s->bat[sinfo->bat_idx] = 0; /* For PAYLOAD_BLOCK_ZERO, the 1299cdf9634bSJeff Cody FileOffsetMB field is denoted as 1300cdf9634bSJeff Cody 'reserved' in the v1.0 spec. If it is 1301cdf9634bSJeff Cody non-zero, MS Hyper-V will fail to read 1302cdf9634bSJeff Cody the disk image */ 1303cdf9634bSJeff Cody } else { 13040b7da092SJeff Cody s->bat[sinfo->bat_idx] = sinfo->file_offset; 1305cdf9634bSJeff Cody } 1306d92aa883SJeff Cody 1307d92aa883SJeff Cody s->bat[sinfo->bat_idx] |= state & VHDX_BAT_STATE_BIT_MASK; 1308d92aa883SJeff Cody 1309d92aa883SJeff Cody *bat_entry_le = cpu_to_le64(s->bat[sinfo->bat_idx]); 1310d92aa883SJeff Cody *bat_offset = s->bat_offset + sinfo->bat_idx * sizeof(VHDXBatEntry); 1311d92aa883SJeff Cody 1312d92aa883SJeff Cody } 1313e8d4e5ffSJeff Cody 1314c3906c5eSJeff Cody /* Per the spec, on the first write of guest-visible data to the file the 1315c3906c5eSJeff Cody * data write guid must be updated in the header */ 1316c3906c5eSJeff Cody int vhdx_user_visible_write(BlockDriverState *bs, BDRVVHDXState *s) 1317c3906c5eSJeff Cody { 1318c3906c5eSJeff Cody int ret = 0; 1319c3906c5eSJeff Cody if (s->first_visible_write) { 1320c3906c5eSJeff Cody s->first_visible_write = false; 1321c3906c5eSJeff Cody ret = vhdx_update_headers(bs, s, true, NULL); 1322c3906c5eSJeff Cody } 1323c3906c5eSJeff Cody return ret; 1324c3906c5eSJeff Cody } 1325c3906c5eSJeff Cody 1326e8d4e5ffSJeff Cody static coroutine_fn int vhdx_co_writev(BlockDriverState *bs, int64_t sector_num, 1327e18a58b4SEric Blake int nb_sectors, QEMUIOVector *qiov, 1328e18a58b4SEric Blake int flags) 1329e8d4e5ffSJeff Cody { 1330d92aa883SJeff Cody int ret = -ENOTSUP; 1331d92aa883SJeff Cody BDRVVHDXState *s = bs->opaque; 1332d92aa883SJeff Cody VHDXSectorInfo sinfo; 1333d92aa883SJeff Cody uint64_t bytes_done = 0; 1334d92aa883SJeff Cody uint64_t bat_entry = 0; 1335d92aa883SJeff Cody uint64_t bat_entry_offset = 0; 1336d92aa883SJeff Cody QEMUIOVector hd_qiov; 1337d92aa883SJeff Cody struct iovec iov1 = { 0 }; 1338d92aa883SJeff Cody struct iovec iov2 = { 0 }; 1339d92aa883SJeff Cody int sectors_to_write; 1340d92aa883SJeff Cody int bat_state; 1341d92aa883SJeff Cody uint64_t bat_prior_offset = 0; 1342d92aa883SJeff Cody bool bat_update = false; 1343d92aa883SJeff Cody 1344e18a58b4SEric Blake assert(!flags); 1345d92aa883SJeff Cody qemu_iovec_init(&hd_qiov, qiov->niov); 1346d92aa883SJeff Cody 1347d92aa883SJeff Cody qemu_co_mutex_lock(&s->lock); 1348d92aa883SJeff Cody 1349d92aa883SJeff Cody ret = vhdx_user_visible_write(bs, s); 1350d92aa883SJeff Cody if (ret < 0) { 1351d92aa883SJeff Cody goto exit; 1352d92aa883SJeff Cody } 1353d92aa883SJeff Cody 1354d92aa883SJeff Cody while (nb_sectors > 0) { 1355d92aa883SJeff Cody bool use_zero_buffers = false; 1356d92aa883SJeff Cody bat_update = false; 1357d92aa883SJeff Cody if (s->params.data_bits & VHDX_PARAMS_HAS_PARENT) { 1358d92aa883SJeff Cody /* not supported yet */ 1359d92aa883SJeff Cody ret = -ENOTSUP; 1360d92aa883SJeff Cody goto exit; 1361d92aa883SJeff Cody } else { 1362d92aa883SJeff Cody vhdx_block_translate(s, sector_num, nb_sectors, &sinfo); 1363d92aa883SJeff Cody sectors_to_write = sinfo.sectors_avail; 1364d92aa883SJeff Cody 1365d92aa883SJeff Cody qemu_iovec_reset(&hd_qiov); 1366d92aa883SJeff Cody /* check the payload block state */ 1367d92aa883SJeff Cody bat_state = s->bat[sinfo.bat_idx] & VHDX_BAT_STATE_BIT_MASK; 1368d92aa883SJeff Cody switch (bat_state) { 1369d92aa883SJeff Cody case PAYLOAD_BLOCK_ZERO: 1370d92aa883SJeff Cody /* in this case, we need to preserve zero writes for 1371d92aa883SJeff Cody * data that is not part of this write, so we must pad 1372d92aa883SJeff Cody * the rest of the buffer to zeroes */ 1373d92aa883SJeff Cody use_zero_buffers = true; 1374dbc636e7SEric Blake /* fall through */ 1375dbc636e7SEric Blake case PAYLOAD_BLOCK_NOT_PRESENT: /* fall through */ 1376dbc636e7SEric Blake case PAYLOAD_BLOCK_UNMAPPED: 1377dbc636e7SEric Blake case PAYLOAD_BLOCK_UNMAPPED_v095: 1378dbc636e7SEric Blake case PAYLOAD_BLOCK_UNDEFINED: 1379dbc636e7SEric Blake bat_prior_offset = sinfo.file_offset; 1380dbc636e7SEric Blake ret = vhdx_allocate_block(bs, s, &sinfo.file_offset, 1381dbc636e7SEric Blake &use_zero_buffers); 1382dbc636e7SEric Blake if (ret < 0) { 1383dbc636e7SEric Blake goto exit; 1384dbc636e7SEric Blake } 1385dbc636e7SEric Blake /* 1386dbc636e7SEric Blake * once we support differencing files, this may also be 1387dbc636e7SEric Blake * partially present 1388dbc636e7SEric Blake */ 1389dbc636e7SEric Blake /* update block state to the newly specified state */ 1390dbc636e7SEric Blake vhdx_update_bat_table_entry(bs, s, &sinfo, &bat_entry, 1391dbc636e7SEric Blake &bat_entry_offset, 1392dbc636e7SEric Blake PAYLOAD_BLOCK_FULLY_PRESENT); 1393dbc636e7SEric Blake bat_update = true; 1394dbc636e7SEric Blake /* 1395dbc636e7SEric Blake * Since we just allocated a block, file_offset is the 1396dbc636e7SEric Blake * beginning of the payload block. It needs to be the 1397dbc636e7SEric Blake * write address, which includes the offset into the 1398dbc636e7SEric Blake * block, unless the entire block needs to read as 1399dbc636e7SEric Blake * zeroes but truncation was not able to provide them, 1400dbc636e7SEric Blake * in which case we need to fill in the rest. 1401dbc636e7SEric Blake */ 1402dbc636e7SEric Blake if (!use_zero_buffers) { 1403dbc636e7SEric Blake sinfo.file_offset += sinfo.block_offset; 1404dbc636e7SEric Blake } else { 1405d92aa883SJeff Cody /* zero fill the front, if any */ 1406d92aa883SJeff Cody if (sinfo.block_offset) { 1407d92aa883SJeff Cody iov1.iov_len = sinfo.block_offset; 1408d92aa883SJeff Cody iov1.iov_base = qemu_blockalign(bs, iov1.iov_len); 1409d92aa883SJeff Cody memset(iov1.iov_base, 0, iov1.iov_len); 1410d92aa883SJeff Cody qemu_iovec_concat_iov(&hd_qiov, &iov1, 1, 0, 1411d1a126c5SKevin Wolf iov1.iov_len); 1412d92aa883SJeff Cody sectors_to_write += iov1.iov_len >> BDRV_SECTOR_BITS; 1413d92aa883SJeff Cody } 1414d92aa883SJeff Cody 1415d92aa883SJeff Cody /* our actual data */ 1416d92aa883SJeff Cody qemu_iovec_concat(&hd_qiov, qiov, bytes_done, 1417d92aa883SJeff Cody sinfo.bytes_avail); 1418d92aa883SJeff Cody 1419d92aa883SJeff Cody /* zero fill the back, if any */ 1420d92aa883SJeff Cody if ((sinfo.bytes_avail - sinfo.block_offset) < 1421d92aa883SJeff Cody s->block_size) { 1422d92aa883SJeff Cody iov2.iov_len = s->block_size - 1423d92aa883SJeff Cody (sinfo.bytes_avail + sinfo.block_offset); 1424d92aa883SJeff Cody iov2.iov_base = qemu_blockalign(bs, iov2.iov_len); 1425d92aa883SJeff Cody memset(iov2.iov_base, 0, iov2.iov_len); 1426d92aa883SJeff Cody qemu_iovec_concat_iov(&hd_qiov, &iov2, 1, 0, 1427d1a126c5SKevin Wolf iov2.iov_len); 1428d92aa883SJeff Cody sectors_to_write += iov2.iov_len >> BDRV_SECTOR_BITS; 1429d92aa883SJeff Cody } 1430d92aa883SJeff Cody } 1431dbc636e7SEric Blake 1432d92aa883SJeff Cody /* fall through */ 1433d92aa883SJeff Cody case PAYLOAD_BLOCK_FULLY_PRESENT: 1434d92aa883SJeff Cody /* if the file offset address is in the header zone, 1435d92aa883SJeff Cody * there is a problem */ 14360cb98af2SStefano Garzarella if (sinfo.file_offset < (1 * MiB)) { 1437d92aa883SJeff Cody ret = -EFAULT; 1438d92aa883SJeff Cody goto error_bat_restore; 1439d92aa883SJeff Cody } 1440d92aa883SJeff Cody 1441d92aa883SJeff Cody if (!use_zero_buffers) { 1442d92aa883SJeff Cody qemu_iovec_concat(&hd_qiov, qiov, bytes_done, 1443d92aa883SJeff Cody sinfo.bytes_avail); 1444d92aa883SJeff Cody } 1445d92aa883SJeff Cody /* block exists, so we can just overwrite it */ 1446d92aa883SJeff Cody qemu_co_mutex_unlock(&s->lock); 14473a7404b3SEric Blake ret = bdrv_co_pwritev(bs->file, sinfo.file_offset, 14483a7404b3SEric Blake sectors_to_write * BDRV_SECTOR_SIZE, 14493a7404b3SEric Blake &hd_qiov, 0); 1450d92aa883SJeff Cody qemu_co_mutex_lock(&s->lock); 1451d92aa883SJeff Cody if (ret < 0) { 1452d92aa883SJeff Cody goto error_bat_restore; 1453d92aa883SJeff Cody } 1454d92aa883SJeff Cody break; 1455d92aa883SJeff Cody case PAYLOAD_BLOCK_PARTIALLY_PRESENT: 1456d92aa883SJeff Cody /* we don't yet support difference files, fall through 1457d92aa883SJeff Cody * to error */ 1458d92aa883SJeff Cody default: 1459d92aa883SJeff Cody ret = -EIO; 1460d92aa883SJeff Cody goto exit; 1461d92aa883SJeff Cody break; 1462d92aa883SJeff Cody } 1463d92aa883SJeff Cody 1464d92aa883SJeff Cody if (bat_update) { 1465d92aa883SJeff Cody /* this will update the BAT entry into the log journal, and 1466d92aa883SJeff Cody * then flush the log journal out to disk */ 1467d92aa883SJeff Cody ret = vhdx_log_write_and_flush(bs, s, &bat_entry, 1468d92aa883SJeff Cody sizeof(VHDXBatEntry), 1469d92aa883SJeff Cody bat_entry_offset); 1470d92aa883SJeff Cody if (ret < 0) { 1471d92aa883SJeff Cody goto exit; 1472d92aa883SJeff Cody } 1473d92aa883SJeff Cody } 1474d92aa883SJeff Cody 1475d92aa883SJeff Cody nb_sectors -= sinfo.sectors_avail; 1476d92aa883SJeff Cody sector_num += sinfo.sectors_avail; 1477d92aa883SJeff Cody bytes_done += sinfo.bytes_avail; 1478d92aa883SJeff Cody 1479d92aa883SJeff Cody } 1480d92aa883SJeff Cody } 1481d92aa883SJeff Cody 1482d92aa883SJeff Cody goto exit; 1483d92aa883SJeff Cody 1484d92aa883SJeff Cody error_bat_restore: 1485d92aa883SJeff Cody if (bat_update) { 1486d92aa883SJeff Cody /* keep metadata in sync, and restore the bat entry state 1487d92aa883SJeff Cody * if error. */ 1488d92aa883SJeff Cody sinfo.file_offset = bat_prior_offset; 1489d92aa883SJeff Cody vhdx_update_bat_table_entry(bs, s, &sinfo, &bat_entry, 1490d92aa883SJeff Cody &bat_entry_offset, bat_state); 1491d92aa883SJeff Cody } 1492d92aa883SJeff Cody exit: 1493d92aa883SJeff Cody qemu_vfree(iov1.iov_base); 1494d92aa883SJeff Cody qemu_vfree(iov2.iov_base); 1495d92aa883SJeff Cody qemu_co_mutex_unlock(&s->lock); 1496d92aa883SJeff Cody qemu_iovec_destroy(&hd_qiov); 1497d92aa883SJeff Cody return ret; 1498e8d4e5ffSJeff Cody } 1499e8d4e5ffSJeff Cody 1500e8d4e5ffSJeff Cody 15013412f7b1SJeff Cody 15023412f7b1SJeff Cody /* 15033412f7b1SJeff Cody * Create VHDX Headers 15043412f7b1SJeff Cody * 15053412f7b1SJeff Cody * There are 2 headers, and the highest sequence number will represent 15063412f7b1SJeff Cody * the active header 15073412f7b1SJeff Cody */ 1508db1e80eeSKevin Wolf static int vhdx_create_new_headers(BlockBackend *blk, uint64_t image_size, 15093412f7b1SJeff Cody uint32_t log_size) 15103412f7b1SJeff Cody { 1511db1e80eeSKevin Wolf BlockDriverState *bs = blk_bs(blk); 1512cf2ab8fcSKevin Wolf BdrvChild *child; 15133412f7b1SJeff Cody int ret = 0; 15143412f7b1SJeff Cody VHDXHeader *hdr = NULL; 15153412f7b1SJeff Cody 15165839e53bSMarkus Armbruster hdr = g_new0(VHDXHeader, 1); 15173412f7b1SJeff Cody 15183412f7b1SJeff Cody hdr->signature = VHDX_HEADER_SIGNATURE; 15193412f7b1SJeff Cody hdr->sequence_number = g_random_int(); 15203412f7b1SJeff Cody hdr->log_version = 0; 15213412f7b1SJeff Cody hdr->version = 1; 15223412f7b1SJeff Cody hdr->log_length = log_size; 15233412f7b1SJeff Cody hdr->log_offset = VHDX_HEADER_SECTION_END; 15243412f7b1SJeff Cody vhdx_guid_generate(&hdr->file_write_guid); 15253412f7b1SJeff Cody vhdx_guid_generate(&hdr->data_write_guid); 15263412f7b1SJeff Cody 1527cf2ab8fcSKevin Wolf /* XXX Ugly way to get blk->root, but that's a feature, not a bug. This 1528cf2ab8fcSKevin Wolf * hack makes it obvious that vhdx_write_header() bypasses the BlockBackend 1529cf2ab8fcSKevin Wolf * here, which it really shouldn't be doing. */ 1530cf2ab8fcSKevin Wolf child = QLIST_FIRST(&bs->parents); 1531cf2ab8fcSKevin Wolf assert(!QLIST_NEXT(child, next_parent)); 1532cf2ab8fcSKevin Wolf 1533cf2ab8fcSKevin Wolf ret = vhdx_write_header(child, hdr, VHDX_HEADER1_OFFSET, false); 15343412f7b1SJeff Cody if (ret < 0) { 15353412f7b1SJeff Cody goto exit; 15363412f7b1SJeff Cody } 15373412f7b1SJeff Cody hdr->sequence_number++; 1538cf2ab8fcSKevin Wolf ret = vhdx_write_header(child, hdr, VHDX_HEADER2_OFFSET, false); 15393412f7b1SJeff Cody if (ret < 0) { 15403412f7b1SJeff Cody goto exit; 15413412f7b1SJeff Cody } 15423412f7b1SJeff Cody 15433412f7b1SJeff Cody exit: 15443412f7b1SJeff Cody g_free(hdr); 15453412f7b1SJeff Cody return ret; 15463412f7b1SJeff Cody } 15473412f7b1SJeff Cody 1548e91a8b2fSJeff Cody #define VHDX_METADATA_ENTRY_BUFFER_SIZE \ 1549e91a8b2fSJeff Cody (sizeof(VHDXFileParameters) +\ 1550e91a8b2fSJeff Cody sizeof(VHDXVirtualDiskSize) +\ 1551e91a8b2fSJeff Cody sizeof(VHDXPage83Data) +\ 1552e91a8b2fSJeff Cody sizeof(VHDXVirtualDiskLogicalSectorSize) +\ 1553e91a8b2fSJeff Cody sizeof(VHDXVirtualDiskPhysicalSectorSize)) 15543412f7b1SJeff Cody 15553412f7b1SJeff Cody /* 15563412f7b1SJeff Cody * Create the Metadata entries. 15573412f7b1SJeff Cody * 15583412f7b1SJeff Cody * For more details on the entries, see section 3.5 (pg 29) in the 15593412f7b1SJeff Cody * VHDX 1.00 specification. 15603412f7b1SJeff Cody * 15613412f7b1SJeff Cody * We support 5 metadata entries (all required by spec): 15623412f7b1SJeff Cody * File Parameters, 15633412f7b1SJeff Cody * Virtual Disk Size, 15643412f7b1SJeff Cody * Page 83 Data, 15653412f7b1SJeff Cody * Logical Sector Size, 15663412f7b1SJeff Cody * Physical Sector Size 15673412f7b1SJeff Cody * 15683412f7b1SJeff Cody * The first 64KB of the Metadata section is reserved for the metadata 15693412f7b1SJeff Cody * header and entries; beyond that, the metadata items themselves reside. 15703412f7b1SJeff Cody */ 1571db1e80eeSKevin Wolf static int vhdx_create_new_metadata(BlockBackend *blk, 15723412f7b1SJeff Cody uint64_t image_size, 15733412f7b1SJeff Cody uint32_t block_size, 15743412f7b1SJeff Cody uint32_t sector_size, 15753412f7b1SJeff Cody uint64_t metadata_offset, 15763412f7b1SJeff Cody VHDXImageType type) 15773412f7b1SJeff Cody { 15783412f7b1SJeff Cody int ret = 0; 15793412f7b1SJeff Cody uint32_t offset = 0; 15803412f7b1SJeff Cody void *buffer = NULL; 15813412f7b1SJeff Cody void *entry_buffer; 1582a8f15a27SDaniel P. Berrange VHDXMetadataTableHeader *md_table; 15833412f7b1SJeff Cody VHDXMetadataTableEntry *md_table_entry; 15843412f7b1SJeff Cody 15853412f7b1SJeff Cody /* Metadata entries */ 15863412f7b1SJeff Cody VHDXFileParameters *mt_file_params; 15873412f7b1SJeff Cody VHDXVirtualDiskSize *mt_virtual_size; 15883412f7b1SJeff Cody VHDXPage83Data *mt_page83; 15893412f7b1SJeff Cody VHDXVirtualDiskLogicalSectorSize *mt_log_sector_size; 15903412f7b1SJeff Cody VHDXVirtualDiskPhysicalSectorSize *mt_phys_sector_size; 15913412f7b1SJeff Cody 1592e91a8b2fSJeff Cody entry_buffer = g_malloc0(VHDX_METADATA_ENTRY_BUFFER_SIZE); 15933412f7b1SJeff Cody 15943412f7b1SJeff Cody mt_file_params = entry_buffer; 15953412f7b1SJeff Cody offset += sizeof(VHDXFileParameters); 15963412f7b1SJeff Cody mt_virtual_size = entry_buffer + offset; 15973412f7b1SJeff Cody offset += sizeof(VHDXVirtualDiskSize); 15983412f7b1SJeff Cody mt_page83 = entry_buffer + offset; 15993412f7b1SJeff Cody offset += sizeof(VHDXPage83Data); 16003412f7b1SJeff Cody mt_log_sector_size = entry_buffer + offset; 16013412f7b1SJeff Cody offset += sizeof(VHDXVirtualDiskLogicalSectorSize); 16023412f7b1SJeff Cody mt_phys_sector_size = entry_buffer + offset; 16033412f7b1SJeff Cody 16043412f7b1SJeff Cody mt_file_params->block_size = cpu_to_le32(block_size); 16053412f7b1SJeff Cody if (type == VHDX_TYPE_FIXED) { 16063412f7b1SJeff Cody mt_file_params->data_bits |= VHDX_PARAMS_LEAVE_BLOCKS_ALLOCED; 16071229e46dSPeter Maydell mt_file_params->data_bits = cpu_to_le32(mt_file_params->data_bits); 16083412f7b1SJeff Cody } 16093412f7b1SJeff Cody 16103412f7b1SJeff Cody vhdx_guid_generate(&mt_page83->page_83_data); 16113412f7b1SJeff Cody cpu_to_leguids(&mt_page83->page_83_data); 16123412f7b1SJeff Cody mt_virtual_size->virtual_disk_size = cpu_to_le64(image_size); 16133412f7b1SJeff Cody mt_log_sector_size->logical_sector_size = cpu_to_le32(sector_size); 16143412f7b1SJeff Cody mt_phys_sector_size->physical_sector_size = cpu_to_le32(sector_size); 16153412f7b1SJeff Cody 16163412f7b1SJeff Cody buffer = g_malloc0(VHDX_HEADER_BLOCK_SIZE); 16173412f7b1SJeff Cody md_table = buffer; 16183412f7b1SJeff Cody 16193412f7b1SJeff Cody md_table->signature = VHDX_METADATA_SIGNATURE; 16203412f7b1SJeff Cody md_table->entry_count = 5; 16213412f7b1SJeff Cody vhdx_metadata_header_le_export(md_table); 16223412f7b1SJeff Cody 16233412f7b1SJeff Cody 16243412f7b1SJeff Cody /* This will reference beyond the reserved table portion */ 16253412f7b1SJeff Cody offset = 64 * KiB; 16263412f7b1SJeff Cody 16273412f7b1SJeff Cody md_table_entry = buffer + sizeof(VHDXMetadataTableHeader); 16283412f7b1SJeff Cody 16293412f7b1SJeff Cody md_table_entry[0].item_id = file_param_guid; 16303412f7b1SJeff Cody md_table_entry[0].offset = offset; 16313412f7b1SJeff Cody md_table_entry[0].length = sizeof(VHDXFileParameters); 16323412f7b1SJeff Cody md_table_entry[0].data_bits |= VHDX_META_FLAGS_IS_REQUIRED; 16333412f7b1SJeff Cody offset += md_table_entry[0].length; 16343412f7b1SJeff Cody vhdx_metadata_entry_le_export(&md_table_entry[0]); 16353412f7b1SJeff Cody 16363412f7b1SJeff Cody md_table_entry[1].item_id = virtual_size_guid; 16373412f7b1SJeff Cody md_table_entry[1].offset = offset; 16383412f7b1SJeff Cody md_table_entry[1].length = sizeof(VHDXVirtualDiskSize); 16393412f7b1SJeff Cody md_table_entry[1].data_bits |= VHDX_META_FLAGS_IS_REQUIRED | 16403412f7b1SJeff Cody VHDX_META_FLAGS_IS_VIRTUAL_DISK; 16413412f7b1SJeff Cody offset += md_table_entry[1].length; 16423412f7b1SJeff Cody vhdx_metadata_entry_le_export(&md_table_entry[1]); 16433412f7b1SJeff Cody 16443412f7b1SJeff Cody md_table_entry[2].item_id = page83_guid; 16453412f7b1SJeff Cody md_table_entry[2].offset = offset; 16463412f7b1SJeff Cody md_table_entry[2].length = sizeof(VHDXPage83Data); 16473412f7b1SJeff Cody md_table_entry[2].data_bits |= VHDX_META_FLAGS_IS_REQUIRED | 16483412f7b1SJeff Cody VHDX_META_FLAGS_IS_VIRTUAL_DISK; 16493412f7b1SJeff Cody offset += md_table_entry[2].length; 16503412f7b1SJeff Cody vhdx_metadata_entry_le_export(&md_table_entry[2]); 16513412f7b1SJeff Cody 16523412f7b1SJeff Cody md_table_entry[3].item_id = logical_sector_guid; 16533412f7b1SJeff Cody md_table_entry[3].offset = offset; 16543412f7b1SJeff Cody md_table_entry[3].length = sizeof(VHDXVirtualDiskLogicalSectorSize); 16553412f7b1SJeff Cody md_table_entry[3].data_bits |= VHDX_META_FLAGS_IS_REQUIRED | 16563412f7b1SJeff Cody VHDX_META_FLAGS_IS_VIRTUAL_DISK; 16573412f7b1SJeff Cody offset += md_table_entry[3].length; 16583412f7b1SJeff Cody vhdx_metadata_entry_le_export(&md_table_entry[3]); 16593412f7b1SJeff Cody 16603412f7b1SJeff Cody md_table_entry[4].item_id = phys_sector_guid; 16613412f7b1SJeff Cody md_table_entry[4].offset = offset; 16623412f7b1SJeff Cody md_table_entry[4].length = sizeof(VHDXVirtualDiskPhysicalSectorSize); 16633412f7b1SJeff Cody md_table_entry[4].data_bits |= VHDX_META_FLAGS_IS_REQUIRED | 16643412f7b1SJeff Cody VHDX_META_FLAGS_IS_VIRTUAL_DISK; 16653412f7b1SJeff Cody vhdx_metadata_entry_le_export(&md_table_entry[4]); 16663412f7b1SJeff Cody 1667a9262f55SAlberto Faria ret = blk_pwrite(blk, metadata_offset, VHDX_HEADER_BLOCK_SIZE, buffer, 0); 16683412f7b1SJeff Cody if (ret < 0) { 16693412f7b1SJeff Cody goto exit; 16703412f7b1SJeff Cody } 16713412f7b1SJeff Cody 1672a9262f55SAlberto Faria ret = blk_pwrite(blk, metadata_offset + (64 * KiB), 1673a9262f55SAlberto Faria VHDX_METADATA_ENTRY_BUFFER_SIZE, entry_buffer, 0); 16743412f7b1SJeff Cody if (ret < 0) { 16753412f7b1SJeff Cody goto exit; 16763412f7b1SJeff Cody } 16773412f7b1SJeff Cody 16783412f7b1SJeff Cody 16793412f7b1SJeff Cody exit: 16803412f7b1SJeff Cody g_free(buffer); 16813412f7b1SJeff Cody g_free(entry_buffer); 16823412f7b1SJeff Cody return ret; 16833412f7b1SJeff Cody } 16843412f7b1SJeff Cody 16853412f7b1SJeff Cody /* This create the actual BAT itself. We currently only support 16863412f7b1SJeff Cody * 'Dynamic' and 'Fixed' image types. 16873412f7b1SJeff Cody * 16883412f7b1SJeff Cody * Dynamic images: default state of the BAT is all zeroes. 16893412f7b1SJeff Cody * 16903412f7b1SJeff Cody * Fixed images: default state of the BAT is fully populated, with 16913412f7b1SJeff Cody * file offsets and state PAYLOAD_BLOCK_FULLY_PRESENT. 16923412f7b1SJeff Cody */ 1693db1e80eeSKevin Wolf static int vhdx_create_bat(BlockBackend *blk, BDRVVHDXState *s, 16943412f7b1SJeff Cody uint64_t image_size, VHDXImageType type, 16954f75b52aSJeff Cody bool use_zero_blocks, uint64_t file_offset, 169655b9392bSMax Reitz uint32_t length, Error **errp) 16973412f7b1SJeff Cody { 16983412f7b1SJeff Cody int ret = 0; 16993412f7b1SJeff Cody uint64_t data_file_offset; 17003412f7b1SJeff Cody uint64_t total_sectors = 0; 17013412f7b1SJeff Cody uint64_t sector_num = 0; 17023412f7b1SJeff Cody uint64_t unused; 17033412f7b1SJeff Cody int block_state; 17043412f7b1SJeff Cody VHDXSectorInfo sinfo; 17053412f7b1SJeff Cody 17063412f7b1SJeff Cody assert(s->bat == NULL); 17073412f7b1SJeff Cody 17083412f7b1SJeff Cody /* this gives a data start after BAT/bitmap entries, and well 17093412f7b1SJeff Cody * past any metadata entries (with a 4 MB buffer for future 17103412f7b1SJeff Cody * expansion */ 17114f75b52aSJeff Cody data_file_offset = file_offset + length + 5 * MiB; 17123412f7b1SJeff Cody total_sectors = image_size >> s->logical_sector_size_bits; 17133412f7b1SJeff Cody 17143412f7b1SJeff Cody if (type == VHDX_TYPE_DYNAMIC) { 17153412f7b1SJeff Cody /* All zeroes, so we can just extend the file - the end of the BAT 17163412f7b1SJeff Cody * is the furthest thing we have written yet */ 1717c80d8b06SMax Reitz ret = blk_truncate(blk, data_file_offset, false, PREALLOC_MODE_OFF, 17188c6242b6SKevin Wolf 0, errp); 17193412f7b1SJeff Cody if (ret < 0) { 17203412f7b1SJeff Cody goto exit; 17213412f7b1SJeff Cody } 17223412f7b1SJeff Cody } else if (type == VHDX_TYPE_FIXED) { 1723c80d8b06SMax Reitz ret = blk_truncate(blk, data_file_offset + image_size, false, 17248c6242b6SKevin Wolf PREALLOC_MODE_OFF, 0, errp); 17253412f7b1SJeff Cody if (ret < 0) { 17263412f7b1SJeff Cody goto exit; 17273412f7b1SJeff Cody } 17283412f7b1SJeff Cody } else { 172955b9392bSMax Reitz error_setg(errp, "Unsupported image type"); 17303412f7b1SJeff Cody ret = -ENOTSUP; 17313412f7b1SJeff Cody goto exit; 17323412f7b1SJeff Cody } 17333412f7b1SJeff Cody 17343412f7b1SJeff Cody if (type == VHDX_TYPE_FIXED || 17353412f7b1SJeff Cody use_zero_blocks || 1736db1e80eeSKevin Wolf bdrv_has_zero_init(blk_bs(blk)) == 0) { 17373412f7b1SJeff Cody /* for a fixed file, the default BAT entry is not zero */ 1738a67e128aSKevin Wolf s->bat = g_try_malloc0(length); 1739a011898dSAdelina Tuvenie if (length && s->bat == NULL) { 174055b9392bSMax Reitz error_setg(errp, "Failed to allocate memory for the BAT"); 1741a67e128aSKevin Wolf ret = -ENOMEM; 1742a67e128aSKevin Wolf goto exit; 1743a67e128aSKevin Wolf } 17443412f7b1SJeff Cody block_state = type == VHDX_TYPE_FIXED ? PAYLOAD_BLOCK_FULLY_PRESENT : 17453412f7b1SJeff Cody PAYLOAD_BLOCK_NOT_PRESENT; 17463412f7b1SJeff Cody block_state = use_zero_blocks ? PAYLOAD_BLOCK_ZERO : block_state; 17473412f7b1SJeff Cody /* fill the BAT by emulating sector writes of sectors_per_block size */ 17483412f7b1SJeff Cody while (sector_num < total_sectors) { 17493412f7b1SJeff Cody vhdx_block_translate(s, sector_num, s->sectors_per_block, &sinfo); 17503412f7b1SJeff Cody sinfo.file_offset = data_file_offset + 17513412f7b1SJeff Cody (sector_num << s->logical_sector_size_bits); 17523412f7b1SJeff Cody sinfo.file_offset = ROUND_UP(sinfo.file_offset, MiB); 1753db1e80eeSKevin Wolf vhdx_update_bat_table_entry(blk_bs(blk), s, &sinfo, &unused, &unused, 17543412f7b1SJeff Cody block_state); 17551229e46dSPeter Maydell s->bat[sinfo.bat_idx] = cpu_to_le64(s->bat[sinfo.bat_idx]); 17563412f7b1SJeff Cody sector_num += s->sectors_per_block; 17573412f7b1SJeff Cody } 1758a9262f55SAlberto Faria ret = blk_pwrite(blk, file_offset, length, s->bat, 0); 17593412f7b1SJeff Cody if (ret < 0) { 176055b9392bSMax Reitz error_setg_errno(errp, -ret, "Failed to write the BAT"); 17613412f7b1SJeff Cody goto exit; 17623412f7b1SJeff Cody } 17633412f7b1SJeff Cody } 17643412f7b1SJeff Cody 17653412f7b1SJeff Cody 17663412f7b1SJeff Cody 17673412f7b1SJeff Cody exit: 17683412f7b1SJeff Cody g_free(s->bat); 17693412f7b1SJeff Cody return ret; 17703412f7b1SJeff Cody } 17713412f7b1SJeff Cody 17723412f7b1SJeff Cody /* Creates the region table header, and region table entries. 17733412f7b1SJeff Cody * There are 2 supported region table entries: BAT, and Metadata/ 17743412f7b1SJeff Cody * 17753412f7b1SJeff Cody * As the calculations for the BAT region table are also needed 17763412f7b1SJeff Cody * to create the BAT itself, we will also cause the BAT to be 17773412f7b1SJeff Cody * created. 17783412f7b1SJeff Cody */ 1779db1e80eeSKevin Wolf static int vhdx_create_new_region_table(BlockBackend *blk, 17803412f7b1SJeff Cody uint64_t image_size, 17813412f7b1SJeff Cody uint32_t block_size, 17823412f7b1SJeff Cody uint32_t sector_size, 17833412f7b1SJeff Cody uint32_t log_size, 17843412f7b1SJeff Cody bool use_zero_blocks, 17853412f7b1SJeff Cody VHDXImageType type, 178655b9392bSMax Reitz uint64_t *metadata_offset, 178755b9392bSMax Reitz Error **errp) 17883412f7b1SJeff Cody { 17893412f7b1SJeff Cody int ret = 0; 17903412f7b1SJeff Cody uint32_t offset = 0; 17913412f7b1SJeff Cody void *buffer = NULL; 17924f75b52aSJeff Cody uint64_t bat_file_offset; 17934f75b52aSJeff Cody uint32_t bat_length; 17943412f7b1SJeff Cody BDRVVHDXState *s = NULL; 17953412f7b1SJeff Cody VHDXRegionTableHeader *region_table; 17963412f7b1SJeff Cody VHDXRegionTableEntry *rt_bat; 17973412f7b1SJeff Cody VHDXRegionTableEntry *rt_metadata; 17983412f7b1SJeff Cody 17993412f7b1SJeff Cody assert(metadata_offset != NULL); 18003412f7b1SJeff Cody 18013412f7b1SJeff Cody /* Populate enough of the BDRVVHDXState to be able to use the 18023412f7b1SJeff Cody * pre-existing BAT calculation, translation, and update functions */ 18035839e53bSMarkus Armbruster s = g_new0(BDRVVHDXState, 1); 18043412f7b1SJeff Cody 18053412f7b1SJeff Cody s->chunk_ratio = (VHDX_MAX_SECTORS_PER_BLOCK) * 18063412f7b1SJeff Cody (uint64_t) sector_size / (uint64_t) block_size; 18073412f7b1SJeff Cody 18083412f7b1SJeff Cody s->sectors_per_block = block_size / sector_size; 18093412f7b1SJeff Cody s->virtual_disk_size = image_size; 18103412f7b1SJeff Cody s->block_size = block_size; 18113412f7b1SJeff Cody s->logical_sector_size = sector_size; 18123412f7b1SJeff Cody 18133412f7b1SJeff Cody vhdx_set_shift_bits(s); 18143412f7b1SJeff Cody 18153412f7b1SJeff Cody vhdx_calc_bat_entries(s); 18163412f7b1SJeff Cody 18173412f7b1SJeff Cody /* At this point the VHDX state is populated enough for creation */ 18183412f7b1SJeff Cody 18193412f7b1SJeff Cody /* a single buffer is used so we can calculate the checksum over the 18203412f7b1SJeff Cody * entire 64KB block */ 18213412f7b1SJeff Cody buffer = g_malloc0(VHDX_HEADER_BLOCK_SIZE); 18223412f7b1SJeff Cody region_table = buffer; 18233412f7b1SJeff Cody offset += sizeof(VHDXRegionTableHeader); 18243412f7b1SJeff Cody rt_bat = buffer + offset; 18253412f7b1SJeff Cody offset += sizeof(VHDXRegionTableEntry); 18263412f7b1SJeff Cody rt_metadata = buffer + offset; 18273412f7b1SJeff Cody 18283412f7b1SJeff Cody region_table->signature = VHDX_REGION_SIGNATURE; 18293412f7b1SJeff Cody region_table->entry_count = 2; /* BAT and Metadata */ 18303412f7b1SJeff Cody 18313412f7b1SJeff Cody rt_bat->guid = bat_guid; 18323412f7b1SJeff Cody rt_bat->length = ROUND_UP(s->bat_entries * sizeof(VHDXBatEntry), MiB); 18333412f7b1SJeff Cody rt_bat->file_offset = ROUND_UP(VHDX_HEADER_SECTION_END + log_size, MiB); 18343412f7b1SJeff Cody s->bat_offset = rt_bat->file_offset; 18353412f7b1SJeff Cody 18363412f7b1SJeff Cody rt_metadata->guid = metadata_guid; 18373412f7b1SJeff Cody rt_metadata->file_offset = ROUND_UP(rt_bat->file_offset + rt_bat->length, 18383412f7b1SJeff Cody MiB); 18393412f7b1SJeff Cody rt_metadata->length = 1 * MiB; /* min size, and more than enough */ 18403412f7b1SJeff Cody *metadata_offset = rt_metadata->file_offset; 18413412f7b1SJeff Cody 18424f75b52aSJeff Cody bat_file_offset = rt_bat->file_offset; 18434f75b52aSJeff Cody bat_length = rt_bat->length; 18444f75b52aSJeff Cody 18454f75b52aSJeff Cody vhdx_region_header_le_export(region_table); 18464f75b52aSJeff Cody vhdx_region_entry_le_export(rt_bat); 18474f75b52aSJeff Cody vhdx_region_entry_le_export(rt_metadata); 18484f75b52aSJeff Cody 18493412f7b1SJeff Cody vhdx_update_checksum(buffer, VHDX_HEADER_BLOCK_SIZE, 18503412f7b1SJeff Cody offsetof(VHDXRegionTableHeader, checksum)); 18513412f7b1SJeff Cody 18523412f7b1SJeff Cody 18533412f7b1SJeff Cody /* The region table gives us the data we need to create the BAT, 18543412f7b1SJeff Cody * so do that now */ 1855db1e80eeSKevin Wolf ret = vhdx_create_bat(blk, s, image_size, type, use_zero_blocks, 185655b9392bSMax Reitz bat_file_offset, bat_length, errp); 18574f75b52aSJeff Cody if (ret < 0) { 18584f75b52aSJeff Cody goto exit; 18594f75b52aSJeff Cody } 18603412f7b1SJeff Cody 18613412f7b1SJeff Cody /* Now write out the region headers to disk */ 1862a9262f55SAlberto Faria ret = blk_pwrite(blk, VHDX_REGION_TABLE_OFFSET, VHDX_HEADER_BLOCK_SIZE, 1863a9262f55SAlberto Faria buffer, 0); 18643412f7b1SJeff Cody if (ret < 0) { 186555b9392bSMax Reitz error_setg_errno(errp, -ret, "Failed to write first region table"); 18663412f7b1SJeff Cody goto exit; 18673412f7b1SJeff Cody } 18683412f7b1SJeff Cody 1869a9262f55SAlberto Faria ret = blk_pwrite(blk, VHDX_REGION_TABLE2_OFFSET, VHDX_HEADER_BLOCK_SIZE, 1870a9262f55SAlberto Faria buffer, 0); 18713412f7b1SJeff Cody if (ret < 0) { 187255b9392bSMax Reitz error_setg_errno(errp, -ret, "Failed to write second region table"); 18733412f7b1SJeff Cody goto exit; 18743412f7b1SJeff Cody } 18753412f7b1SJeff Cody 18763412f7b1SJeff Cody exit: 18773412f7b1SJeff Cody g_free(s); 18783412f7b1SJeff Cody g_free(buffer); 18793412f7b1SJeff Cody return ret; 18803412f7b1SJeff Cody } 18813412f7b1SJeff Cody 18823412f7b1SJeff Cody /* We need to create the following elements: 18833412f7b1SJeff Cody * 18843412f7b1SJeff Cody * .-----------------------------------------------------------------. 18853412f7b1SJeff Cody * | (A) | (B) | (C) | (D) | (E) | 18863412f7b1SJeff Cody * | File ID | Header1 | Header 2 | Region Tbl 1 | Region Tbl 2 | 18873412f7b1SJeff Cody * | | | | | | 18883412f7b1SJeff Cody * .-----------------------------------------------------------------. 18893412f7b1SJeff Cody * 0 64KB 128KB 192KB 256KB 320KB 18903412f7b1SJeff Cody * 18913412f7b1SJeff Cody * 18923412f7b1SJeff Cody * .---- ~ ----------- ~ ------------ ~ ---------------- ~ -----------. 18933412f7b1SJeff Cody * | (F) | (G) | (H) | | 18943412f7b1SJeff Cody * | Journal Log | BAT / Bitmap | Metadata | .... data ...... | 18953412f7b1SJeff Cody * | | | | | 18963412f7b1SJeff Cody * .---- ~ ----------- ~ ------------ ~ ---------------- ~ -----------. 18973412f7b1SJeff Cody * 1MB 18983412f7b1SJeff Cody */ 189909b68dabSKevin Wolf static int coroutine_fn vhdx_co_create(BlockdevCreateOptions *opts, 1900efc75e2aSStefan Hajnoczi Error **errp) 19013412f7b1SJeff Cody { 190209b68dabSKevin Wolf BlockdevCreateOptionsVhdx *vhdx_opts; 190309b68dabSKevin Wolf BlockBackend *blk = NULL; 190409b68dabSKevin Wolf BlockDriverState *bs = NULL; 190509b68dabSKevin Wolf 19063412f7b1SJeff Cody int ret = 0; 190709b68dabSKevin Wolf uint64_t image_size; 190809b68dabSKevin Wolf uint32_t log_size; 190909b68dabSKevin Wolf uint32_t block_size; 19103412f7b1SJeff Cody uint64_t signature; 19113412f7b1SJeff Cody uint64_t metadata_offset; 19123412f7b1SJeff Cody bool use_zero_blocks = false; 19133412f7b1SJeff Cody 19143412f7b1SJeff Cody gunichar2 *creator = NULL; 19153412f7b1SJeff Cody glong creator_items; 19163412f7b1SJeff Cody VHDXImageType image_type; 19173412f7b1SJeff Cody 191809b68dabSKevin Wolf assert(opts->driver == BLOCKDEV_DRIVER_VHDX); 191909b68dabSKevin Wolf vhdx_opts = &opts->u.vhdx; 19203412f7b1SJeff Cody 192109b68dabSKevin Wolf /* Validate options and set default values */ 192209b68dabSKevin Wolf image_size = vhdx_opts->size; 19233412f7b1SJeff Cody if (image_size > VHDX_MAX_IMAGE_SIZE) { 19240fcc38e7SKevin Wolf error_setg(errp, "Image size too large; max of 64TB"); 192509b68dabSKevin Wolf return -EINVAL; 19263412f7b1SJeff Cody } 19273412f7b1SJeff Cody 192809b68dabSKevin Wolf if (!vhdx_opts->has_log_size) { 192909b68dabSKevin Wolf log_size = DEFAULT_LOG_SIZE; 19303412f7b1SJeff Cody } else { 19316f16f7c5SKevin Wolf if (vhdx_opts->log_size > UINT32_MAX) { 19326f16f7c5SKevin Wolf error_setg(errp, "Log size must be smaller than 4 GB"); 19336f16f7c5SKevin Wolf return -EINVAL; 19346f16f7c5SKevin Wolf } 193509b68dabSKevin Wolf log_size = vhdx_opts->log_size; 193609b68dabSKevin Wolf } 193709b68dabSKevin Wolf if (log_size < MiB || (log_size % MiB) != 0) { 19380fcc38e7SKevin Wolf error_setg(errp, "Log size must be a multiple of 1 MB"); 193909b68dabSKevin Wolf return -EINVAL; 194009b68dabSKevin Wolf } 194109b68dabSKevin Wolf 194209b68dabSKevin Wolf if (!vhdx_opts->has_block_state_zero) { 194309b68dabSKevin Wolf use_zero_blocks = true; 194409b68dabSKevin Wolf } else { 194509b68dabSKevin Wolf use_zero_blocks = vhdx_opts->block_state_zero; 194609b68dabSKevin Wolf } 194709b68dabSKevin Wolf 194809b68dabSKevin Wolf if (!vhdx_opts->has_subformat) { 194909b68dabSKevin Wolf vhdx_opts->subformat = BLOCKDEV_VHDX_SUBFORMAT_DYNAMIC; 195009b68dabSKevin Wolf } 195109b68dabSKevin Wolf 195209b68dabSKevin Wolf switch (vhdx_opts->subformat) { 195309b68dabSKevin Wolf case BLOCKDEV_VHDX_SUBFORMAT_DYNAMIC: 195409b68dabSKevin Wolf image_type = VHDX_TYPE_DYNAMIC; 195509b68dabSKevin Wolf break; 195609b68dabSKevin Wolf case BLOCKDEV_VHDX_SUBFORMAT_FIXED: 195709b68dabSKevin Wolf image_type = VHDX_TYPE_FIXED; 195809b68dabSKevin Wolf break; 195909b68dabSKevin Wolf default: 196009b68dabSKevin Wolf g_assert_not_reached(); 19613412f7b1SJeff Cody } 19623412f7b1SJeff Cody 19633412f7b1SJeff Cody /* These are pretty arbitrary, and mainly designed to keep the BAT 19643412f7b1SJeff Cody * size reasonable to load into RAM */ 196509b68dabSKevin Wolf if (vhdx_opts->has_block_size) { 196609b68dabSKevin Wolf block_size = vhdx_opts->block_size; 196709b68dabSKevin Wolf } else { 19683412f7b1SJeff Cody if (image_size > 32 * TiB) { 19693412f7b1SJeff Cody block_size = 64 * MiB; 19703412f7b1SJeff Cody } else if (image_size > (uint64_t) 100 * GiB) { 19713412f7b1SJeff Cody block_size = 32 * MiB; 19723412f7b1SJeff Cody } else if (image_size > 1 * GiB) { 19733412f7b1SJeff Cody block_size = 16 * MiB; 19743412f7b1SJeff Cody } else { 19753412f7b1SJeff Cody block_size = 8 * MiB; 19763412f7b1SJeff Cody } 19773412f7b1SJeff Cody } 19783412f7b1SJeff Cody 197909b68dabSKevin Wolf if (block_size < MiB || (block_size % MiB) != 0) { 19800fcc38e7SKevin Wolf error_setg(errp, "Block size must be a multiple of 1 MB"); 198109b68dabSKevin Wolf return -EINVAL; 198209b68dabSKevin Wolf } 1983b412f494SKevin Wolf if (!is_power_of_2(block_size)) { 1984b412f494SKevin Wolf error_setg(errp, "Block size must be a power of two"); 1985b412f494SKevin Wolf return -EINVAL; 1986b412f494SKevin Wolf } 198709b68dabSKevin Wolf if (block_size > VHDX_BLOCK_SIZE_MAX) { 1988e9991e29SStefano Garzarella error_setg(errp, "Block size must not exceed %" PRId64, 1989e9991e29SStefano Garzarella VHDX_BLOCK_SIZE_MAX); 199009b68dabSKevin Wolf return -EINVAL; 199109b68dabSKevin Wolf } 19923412f7b1SJeff Cody 199309b68dabSKevin Wolf /* Create BlockBackend to write to the image */ 199409b68dabSKevin Wolf bs = bdrv_open_blockdev_ref(vhdx_opts->file, errp); 199509b68dabSKevin Wolf if (bs == NULL) { 199609b68dabSKevin Wolf return -EIO; 199709b68dabSKevin Wolf } 19983412f7b1SJeff Cody 1999a3aeeab5SEric Blake blk = blk_new_with_bs(bs, BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL, 2000a3aeeab5SEric Blake errp); 2001a3aeeab5SEric Blake if (!blk) { 2002a3aeeab5SEric Blake ret = -EPERM; 200309b68dabSKevin Wolf goto delete_and_exit; 20043412f7b1SJeff Cody } 200510bf03afSKevin Wolf blk_set_allow_write_beyond_eof(blk, true); 200610bf03afSKevin Wolf 20073412f7b1SJeff Cody /* Create (A) */ 20083412f7b1SJeff Cody 20093412f7b1SJeff Cody /* The creator field is optional, but may be useful for 20103412f7b1SJeff Cody * debugging / diagnostics */ 20113412f7b1SJeff Cody creator = g_utf8_to_utf16("QEMU v" QEMU_VERSION, -1, NULL, 20123412f7b1SJeff Cody &creator_items, NULL); 20133412f7b1SJeff Cody signature = cpu_to_le64(VHDX_FILE_SIGNATURE); 2014eb342749SAlberto Faria ret = blk_co_pwrite(blk, VHDX_FILE_ID_OFFSET, sizeof(signature), &signature, 20158341f00dSEric Blake 0); 20163412f7b1SJeff Cody if (ret < 0) { 201755b9392bSMax Reitz error_setg_errno(errp, -ret, "Failed to write file signature"); 20183412f7b1SJeff Cody goto delete_and_exit; 20193412f7b1SJeff Cody } 20203412f7b1SJeff Cody if (creator) { 2021eb342749SAlberto Faria ret = blk_co_pwrite(blk, VHDX_FILE_ID_OFFSET + sizeof(signature), 2022a9262f55SAlberto Faria creator_items * sizeof(gunichar2), creator, 0); 20233412f7b1SJeff Cody if (ret < 0) { 202455b9392bSMax Reitz error_setg_errno(errp, -ret, "Failed to write creator field"); 20253412f7b1SJeff Cody goto delete_and_exit; 20263412f7b1SJeff Cody } 20273412f7b1SJeff Cody } 20283412f7b1SJeff Cody 20293412f7b1SJeff Cody 20303412f7b1SJeff Cody /* Creates (B),(C) */ 2031db1e80eeSKevin Wolf ret = vhdx_create_new_headers(blk, image_size, log_size); 20323412f7b1SJeff Cody if (ret < 0) { 203355b9392bSMax Reitz error_setg_errno(errp, -ret, "Failed to write image headers"); 20343412f7b1SJeff Cody goto delete_and_exit; 20353412f7b1SJeff Cody } 20363412f7b1SJeff Cody 20373412f7b1SJeff Cody /* Creates (D),(E),(G) explicitly. (F) created as by-product */ 2038db1e80eeSKevin Wolf ret = vhdx_create_new_region_table(blk, image_size, block_size, 512, 20393412f7b1SJeff Cody log_size, use_zero_blocks, image_type, 204055b9392bSMax Reitz &metadata_offset, errp); 20413412f7b1SJeff Cody if (ret < 0) { 20423412f7b1SJeff Cody goto delete_and_exit; 20433412f7b1SJeff Cody } 20443412f7b1SJeff Cody 20453412f7b1SJeff Cody /* Creates (H) */ 2046db1e80eeSKevin Wolf ret = vhdx_create_new_metadata(blk, image_size, block_size, 512, 20473412f7b1SJeff Cody metadata_offset, image_type); 20483412f7b1SJeff Cody if (ret < 0) { 204955b9392bSMax Reitz error_setg_errno(errp, -ret, "Failed to initialize metadata"); 20503412f7b1SJeff Cody goto delete_and_exit; 20513412f7b1SJeff Cody } 20523412f7b1SJeff Cody 20534a5f2779SKevin Wolf ret = 0; 20543412f7b1SJeff Cody delete_and_exit: 205510bf03afSKevin Wolf blk_unref(blk); 205609b68dabSKevin Wolf bdrv_unref(bs); 20573412f7b1SJeff Cody g_free(creator); 20583412f7b1SJeff Cody return ret; 20593412f7b1SJeff Cody } 20603412f7b1SJeff Cody 2061b92902dfSMaxim Levitsky static int coroutine_fn vhdx_co_create_opts(BlockDriver *drv, 2062b92902dfSMaxim Levitsky const char *filename, 206309b68dabSKevin Wolf QemuOpts *opts, 206409b68dabSKevin Wolf Error **errp) 206509b68dabSKevin Wolf { 206609b68dabSKevin Wolf BlockdevCreateOptions *create_options = NULL; 206792adf9dbSMarkus Armbruster QDict *qdict; 206809b68dabSKevin Wolf Visitor *v; 206909b68dabSKevin Wolf BlockDriverState *bs = NULL; 207009b68dabSKevin Wolf int ret; 207109b68dabSKevin Wolf 207209b68dabSKevin Wolf static const QDictRenames opt_renames[] = { 207309b68dabSKevin Wolf { VHDX_BLOCK_OPT_LOG_SIZE, "log-size" }, 207409b68dabSKevin Wolf { VHDX_BLOCK_OPT_BLOCK_SIZE, "block-size" }, 207509b68dabSKevin Wolf { VHDX_BLOCK_OPT_ZERO, "block-state-zero" }, 207609b68dabSKevin Wolf { NULL, NULL }, 207709b68dabSKevin Wolf }; 207809b68dabSKevin Wolf 207909b68dabSKevin Wolf /* Parse options and convert legacy syntax */ 208009b68dabSKevin Wolf qdict = qemu_opts_to_qdict_filtered(opts, NULL, &vhdx_create_opts, true); 208109b68dabSKevin Wolf 208209b68dabSKevin Wolf if (!qdict_rename_keys(qdict, opt_renames, errp)) { 208309b68dabSKevin Wolf ret = -EINVAL; 208409b68dabSKevin Wolf goto fail; 208509b68dabSKevin Wolf } 208609b68dabSKevin Wolf 208709b68dabSKevin Wolf /* Create and open the file (protocol layer) */ 2088668f62ecSMarkus Armbruster ret = bdrv_create_file(filename, opts, errp); 208909b68dabSKevin Wolf if (ret < 0) { 209009b68dabSKevin Wolf goto fail; 209109b68dabSKevin Wolf } 209209b68dabSKevin Wolf 209309b68dabSKevin Wolf bs = bdrv_open(filename, NULL, NULL, 209409b68dabSKevin Wolf BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp); 209509b68dabSKevin Wolf if (bs == NULL) { 209609b68dabSKevin Wolf ret = -EIO; 209709b68dabSKevin Wolf goto fail; 209809b68dabSKevin Wolf } 209909b68dabSKevin Wolf 210009b68dabSKevin Wolf /* Now get the QAPI type BlockdevCreateOptions */ 210109b68dabSKevin Wolf qdict_put_str(qdict, "driver", "vhdx"); 210209b68dabSKevin Wolf qdict_put_str(qdict, "file", bs->node_name); 210309b68dabSKevin Wolf 2104af91062eSMarkus Armbruster v = qobject_input_visitor_new_flat_confused(qdict, errp); 2105af91062eSMarkus Armbruster if (!v) { 210609b68dabSKevin Wolf ret = -EINVAL; 210709b68dabSKevin Wolf goto fail; 210809b68dabSKevin Wolf } 210909b68dabSKevin Wolf 2110b11a093cSMarkus Armbruster visit_type_BlockdevCreateOptions(v, NULL, &create_options, errp); 211109b68dabSKevin Wolf visit_free(v); 2112b11a093cSMarkus Armbruster if (!create_options) { 211309b68dabSKevin Wolf ret = -EINVAL; 211409b68dabSKevin Wolf goto fail; 211509b68dabSKevin Wolf } 211609b68dabSKevin Wolf 211709b68dabSKevin Wolf /* Silently round up sizes: 211809b68dabSKevin Wolf * The image size is rounded to 512 bytes. Make the block and log size 211909b68dabSKevin Wolf * close to what was specified, but must be at least 1MB, and a multiple of 212009b68dabSKevin Wolf * 1 MB. Also respect VHDX_BLOCK_SIZE_MAX for block sizes. block_size = 0 212109b68dabSKevin Wolf * means auto, which is represented by a missing key in QAPI. */ 212209b68dabSKevin Wolf assert(create_options->driver == BLOCKDEV_DRIVER_VHDX); 212309b68dabSKevin Wolf create_options->u.vhdx.size = 212409b68dabSKevin Wolf ROUND_UP(create_options->u.vhdx.size, BDRV_SECTOR_SIZE); 212509b68dabSKevin Wolf 212609b68dabSKevin Wolf if (create_options->u.vhdx.has_log_size) { 212709b68dabSKevin Wolf create_options->u.vhdx.log_size = 212809b68dabSKevin Wolf ROUND_UP(create_options->u.vhdx.log_size, MiB); 212909b68dabSKevin Wolf } 213009b68dabSKevin Wolf if (create_options->u.vhdx.has_block_size) { 213109b68dabSKevin Wolf create_options->u.vhdx.block_size = 213209b68dabSKevin Wolf ROUND_UP(create_options->u.vhdx.block_size, MiB); 213309b68dabSKevin Wolf 213409b68dabSKevin Wolf if (create_options->u.vhdx.block_size == 0) { 213509b68dabSKevin Wolf create_options->u.vhdx.has_block_size = false; 213609b68dabSKevin Wolf } 213709b68dabSKevin Wolf if (create_options->u.vhdx.block_size > VHDX_BLOCK_SIZE_MAX) { 213809b68dabSKevin Wolf create_options->u.vhdx.block_size = VHDX_BLOCK_SIZE_MAX; 213909b68dabSKevin Wolf } 214009b68dabSKevin Wolf } 214109b68dabSKevin Wolf 214209b68dabSKevin Wolf /* Create the vhdx image (format layer) */ 214309b68dabSKevin Wolf ret = vhdx_co_create(create_options, errp); 214409b68dabSKevin Wolf 214509b68dabSKevin Wolf fail: 2146cb3e7f08SMarc-André Lureau qobject_unref(qdict); 214709b68dabSKevin Wolf bdrv_unref(bs); 214809b68dabSKevin Wolf qapi_free_BlockdevCreateOptions(create_options); 214909b68dabSKevin Wolf return ret; 215009b68dabSKevin Wolf } 215109b68dabSKevin Wolf 21527e30e6a6SJeff Cody /* If opened r/w, the VHDX driver will automatically replay the log, 21537e30e6a6SJeff Cody * if one is present, inside the vhdx_open() call. 21547e30e6a6SJeff Cody * 21557e30e6a6SJeff Cody * If qemu-img check -r all is called, the image is automatically opened 21567e30e6a6SJeff Cody * r/w and any log has already been replayed, so there is nothing (currently) 21577e30e6a6SJeff Cody * for us to do here 21587e30e6a6SJeff Cody */ 21592fd61638SPaolo Bonzini static int coroutine_fn vhdx_co_check(BlockDriverState *bs, 21602fd61638SPaolo Bonzini BdrvCheckResult *result, 21617e30e6a6SJeff Cody BdrvCheckMode fix) 21627e30e6a6SJeff Cody { 21637e30e6a6SJeff Cody BDRVVHDXState *s = bs->opaque; 21647e30e6a6SJeff Cody 21657e30e6a6SJeff Cody if (s->log_replayed_on_open) { 21667e30e6a6SJeff Cody result->corruptions_fixed++; 21677e30e6a6SJeff Cody } 21686caaad46SPeter Lieven 21696caaad46SPeter Lieven vhdx_check_bat_entries(bs, &result->corruptions); 21706caaad46SPeter Lieven 21717e30e6a6SJeff Cody return 0; 21727e30e6a6SJeff Cody } 21737e30e6a6SJeff Cody 21749956688aSMax Reitz static int vhdx_has_zero_init(BlockDriverState *bs) 21759956688aSMax Reitz { 21769956688aSMax Reitz BDRVVHDXState *s = bs->opaque; 21779956688aSMax Reitz int state; 21789956688aSMax Reitz 21799956688aSMax Reitz /* 21809956688aSMax Reitz * Check the subformat: Fixed images have all BAT entries present, 21819956688aSMax Reitz * dynamic images have none (right after creation). It is 21829956688aSMax Reitz * therefore enough to check the first BAT entry. 21839956688aSMax Reitz */ 21849956688aSMax Reitz if (!s->bat_entries) { 21859956688aSMax Reitz return 1; 21869956688aSMax Reitz } 21879956688aSMax Reitz 21889956688aSMax Reitz state = s->bat[0] & VHDX_BAT_STATE_BIT_MASK; 21899956688aSMax Reitz if (state == PAYLOAD_BLOCK_FULLY_PRESENT) { 21909956688aSMax Reitz /* Fixed subformat */ 21919956688aSMax Reitz return bdrv_has_zero_init(bs->file->bs); 21929956688aSMax Reitz } 21939956688aSMax Reitz 21949956688aSMax Reitz /* Dynamic subformat */ 21959956688aSMax Reitz return 1; 21969956688aSMax Reitz } 21979956688aSMax Reitz 21985366092cSChunyan Liu static QemuOptsList vhdx_create_opts = { 21995366092cSChunyan Liu .name = "vhdx-create-opts", 22005366092cSChunyan Liu .head = QTAILQ_HEAD_INITIALIZER(vhdx_create_opts.head), 22015366092cSChunyan Liu .desc = { 22023412f7b1SJeff Cody { 22033412f7b1SJeff Cody .name = BLOCK_OPT_SIZE, 22045366092cSChunyan Liu .type = QEMU_OPT_SIZE, 22053412f7b1SJeff Cody .help = "Virtual disk size; max of 64TB." 22063412f7b1SJeff Cody }, 22073412f7b1SJeff Cody { 22083412f7b1SJeff Cody .name = VHDX_BLOCK_OPT_LOG_SIZE, 22095366092cSChunyan Liu .type = QEMU_OPT_SIZE, 22105366092cSChunyan Liu .def_value_str = stringify(DEFAULT_LOG_SIZE), 22113412f7b1SJeff Cody .help = "Log size; min 1MB." 22123412f7b1SJeff Cody }, 22133412f7b1SJeff Cody { 22143412f7b1SJeff Cody .name = VHDX_BLOCK_OPT_BLOCK_SIZE, 22155366092cSChunyan Liu .type = QEMU_OPT_SIZE, 22165366092cSChunyan Liu .def_value_str = stringify(0), 221778ee6bd0SPhilippe Mathieu-Daudé .help = "Block Size; min 1MB, max 256MB. " 22183412f7b1SJeff Cody "0 means auto-calculate based on image size." 22193412f7b1SJeff Cody }, 22203412f7b1SJeff Cody { 22213412f7b1SJeff Cody .name = BLOCK_OPT_SUBFMT, 22225366092cSChunyan Liu .type = QEMU_OPT_STRING, 222378ee6bd0SPhilippe Mathieu-Daudé .help = "VHDX format type, can be either 'dynamic' or 'fixed'. " 22243412f7b1SJeff Cody "Default is 'dynamic'." 22253412f7b1SJeff Cody }, 22263412f7b1SJeff Cody { 22273412f7b1SJeff Cody .name = VHDX_BLOCK_OPT_ZERO, 22285366092cSChunyan Liu .type = QEMU_OPT_BOOL, 222978ee6bd0SPhilippe Mathieu-Daudé .help = "Force use of payload blocks of type 'ZERO'. " 223078ee6bd0SPhilippe Mathieu-Daudé "Non-standard, but default. Do not set to 'off' when " 223130af51ceSJeff Cody "using 'qemu-img convert' with subformat=dynamic." 22323412f7b1SJeff Cody }, 22333412f7b1SJeff Cody { NULL } 22345366092cSChunyan Liu } 22353412f7b1SJeff Cody }; 22363412f7b1SJeff Cody 2237e8d4e5ffSJeff Cody static BlockDriver bdrv_vhdx = { 2238e8d4e5ffSJeff Cody .format_name = "vhdx", 2239e8d4e5ffSJeff Cody .instance_size = sizeof(BDRVVHDXState), 2240e8d4e5ffSJeff Cody .bdrv_probe = vhdx_probe, 2241e8d4e5ffSJeff Cody .bdrv_open = vhdx_open, 2242e8d4e5ffSJeff Cody .bdrv_close = vhdx_close, 2243e8d4e5ffSJeff Cody .bdrv_reopen_prepare = vhdx_reopen_prepare, 224469dca43dSMax Reitz .bdrv_child_perm = bdrv_default_perms, 2245e8d4e5ffSJeff Cody .bdrv_co_readv = vhdx_co_readv, 2246e8d4e5ffSJeff Cody .bdrv_co_writev = vhdx_co_writev, 224709b68dabSKevin Wolf .bdrv_co_create = vhdx_co_create, 2248efc75e2aSStefan Hajnoczi .bdrv_co_create_opts = vhdx_co_create_opts, 224997b00e28SPaolo Bonzini .bdrv_get_info = vhdx_get_info, 22502fd61638SPaolo Bonzini .bdrv_co_check = vhdx_co_check, 22519956688aSMax Reitz .bdrv_has_zero_init = vhdx_has_zero_init, 22523412f7b1SJeff Cody 2253d67066d8SMax Reitz .is_format = true, 22545366092cSChunyan Liu .create_opts = &vhdx_create_opts, 2255e8d4e5ffSJeff Cody }; 2256e8d4e5ffSJeff Cody 2257e8d4e5ffSJeff Cody static void bdrv_vhdx_init(void) 2258e8d4e5ffSJeff Cody { 2259e8d4e5ffSJeff Cody bdrv_register(&bdrv_vhdx); 2260e8d4e5ffSJeff Cody } 2261e8d4e5ffSJeff Cody 2262e8d4e5ffSJeff Cody block_init(bdrv_vhdx_init); 2263