10a43a1b5SJeff Cody /*
20a43a1b5SJeff Cody * Block driver for Hyper-V VHDX Images
30a43a1b5SJeff Cody *
40a43a1b5SJeff Cody * Copyright (c) 2013 Red Hat, Inc.,
50a43a1b5SJeff Cody *
60a43a1b5SJeff Cody * Authors:
70a43a1b5SJeff Cody * Jeff Cody <jcody@redhat.com>
80a43a1b5SJeff Cody *
90a43a1b5SJeff Cody * This is based on the "VHDX Format Specification v1.00", published 8/25/2012
100a43a1b5SJeff Cody * by Microsoft:
110a43a1b5SJeff Cody * https://www.microsoft.com/en-us/download/details.aspx?id=34750
120a43a1b5SJeff Cody *
130a43a1b5SJeff Cody * This file covers the functionality of the metadata log writing, parsing, and
140a43a1b5SJeff Cody * replay.
150a43a1b5SJeff Cody *
160a43a1b5SJeff Cody * This work is licensed under the terms of the GNU LGPL, version 2 or later.
170a43a1b5SJeff Cody * See the COPYING.LIB file in the top-level directory.
180a43a1b5SJeff Cody *
190a43a1b5SJeff Cody */
200b8fa32fSMarkus Armbruster
2180c71a24SPeter Maydell #include "qemu/osdep.h"
22da34e65cSMarkus Armbruster #include "qapi/error.h"
23e2c1c34fSMarkus Armbruster #include "block/block-io.h"
240a43a1b5SJeff Cody #include "block/block_int.h"
25d49b6836SMarkus Armbruster #include "qemu/error-report.h"
2658369e22SPaolo Bonzini #include "qemu/bswap.h"
275df022cfSPeter Maydell #include "qemu/memalign.h"
280d8c41daSMichael S. Tsirkin #include "vhdx.h"
290a43a1b5SJeff Cody
300a43a1b5SJeff Cody
310a43a1b5SJeff Cody typedef struct VHDXLogSequence {
320a43a1b5SJeff Cody bool valid;
330a43a1b5SJeff Cody uint32_t count;
340a43a1b5SJeff Cody VHDXLogEntries log;
350a43a1b5SJeff Cody VHDXLogEntryHeader hdr;
360a43a1b5SJeff Cody } VHDXLogSequence;
370a43a1b5SJeff Cody
380a43a1b5SJeff Cody typedef struct VHDXLogDescEntries {
390a43a1b5SJeff Cody VHDXLogEntryHeader hdr;
400a43a1b5SJeff Cody VHDXLogDescriptor desc[];
410a43a1b5SJeff Cody } VHDXLogDescEntries;
420a43a1b5SJeff Cody
430a43a1b5SJeff Cody static const MSGUID zero_guid = { 0 };
440a43a1b5SJeff Cody
450a43a1b5SJeff Cody /* The log located on the disk is circular buffer containing
460a43a1b5SJeff Cody * sectors of 4096 bytes each.
470a43a1b5SJeff Cody *
480a43a1b5SJeff Cody * It is assumed for the read/write functions below that the
490a43a1b5SJeff Cody * circular buffer scheme uses a 'one sector open' to indicate
500a43a1b5SJeff Cody * the buffer is full. Given the validation methods used for each
510a43a1b5SJeff Cody * sector, this method should be compatible with other methods that
520a43a1b5SJeff Cody * do not waste a sector.
530a43a1b5SJeff Cody */
540a43a1b5SJeff Cody
550a43a1b5SJeff Cody
560a43a1b5SJeff Cody /* Allow peeking at the hdr entry at the beginning of the current
570a43a1b5SJeff Cody * read index, without advancing the read index */
5865ff757dSKevin Wolf static int GRAPH_RDLOCK
vhdx_log_peek_hdr(BlockDriverState * bs,VHDXLogEntries * log,VHDXLogEntryHeader * hdr)5965ff757dSKevin Wolf vhdx_log_peek_hdr(BlockDriverState *bs, VHDXLogEntries *log,
600a43a1b5SJeff Cody VHDXLogEntryHeader *hdr)
610a43a1b5SJeff Cody {
620a43a1b5SJeff Cody int ret = 0;
630a43a1b5SJeff Cody uint64_t offset;
640a43a1b5SJeff Cody uint32_t read;
650a43a1b5SJeff Cody
660a43a1b5SJeff Cody assert(hdr != NULL);
670a43a1b5SJeff Cody
680a43a1b5SJeff Cody /* peek is only supported on sector boundaries */
690a43a1b5SJeff Cody if (log->read % VHDX_LOG_SECTOR_SIZE) {
700a43a1b5SJeff Cody ret = -EFAULT;
710a43a1b5SJeff Cody goto exit;
720a43a1b5SJeff Cody }
730a43a1b5SJeff Cody
740a43a1b5SJeff Cody read = log->read;
750a43a1b5SJeff Cody /* we are guaranteed that a) log sectors are 4096 bytes,
760a43a1b5SJeff Cody * and b) the log length is a multiple of 1MB. So, there
770a43a1b5SJeff Cody * is always a round number of sectors in the buffer */
780a43a1b5SJeff Cody if ((read + sizeof(VHDXLogEntryHeader)) > log->length) {
790a43a1b5SJeff Cody read = 0;
800a43a1b5SJeff Cody }
810a43a1b5SJeff Cody
820a43a1b5SJeff Cody if (read == log->write) {
830a43a1b5SJeff Cody ret = -EINVAL;
840a43a1b5SJeff Cody goto exit;
850a43a1b5SJeff Cody }
860a43a1b5SJeff Cody
870a43a1b5SJeff Cody offset = log->offset + read;
880a43a1b5SJeff Cody
8932cc71deSAlberto Faria ret = bdrv_pread(bs->file, offset, sizeof(VHDXLogEntryHeader), hdr, 0);
900a43a1b5SJeff Cody if (ret < 0) {
910a43a1b5SJeff Cody goto exit;
920a43a1b5SJeff Cody }
934f75b52aSJeff Cody vhdx_log_entry_hdr_le_import(hdr);
940a43a1b5SJeff Cody
950a43a1b5SJeff Cody exit:
960a43a1b5SJeff Cody return ret;
970a43a1b5SJeff Cody }
980a43a1b5SJeff Cody
990a43a1b5SJeff Cody /* Index increment for log, based on sector boundaries */
vhdx_log_inc_idx(uint32_t idx,uint64_t length)1000a43a1b5SJeff Cody static int vhdx_log_inc_idx(uint32_t idx, uint64_t length)
1010a43a1b5SJeff Cody {
1020a43a1b5SJeff Cody idx += VHDX_LOG_SECTOR_SIZE;
1030a43a1b5SJeff Cody /* we are guaranteed that a) log sectors are 4096 bytes,
1040a43a1b5SJeff Cody * and b) the log length is a multiple of 1MB. So, there
1050a43a1b5SJeff Cody * is always a round number of sectors in the buffer */
1060a43a1b5SJeff Cody return idx >= length ? 0 : idx;
1070a43a1b5SJeff Cody }
1080a43a1b5SJeff Cody
1090a43a1b5SJeff Cody
1100a43a1b5SJeff Cody /* Reset the log to empty */
vhdx_log_reset(BlockDriverState * bs,BDRVVHDXState * s)11165ff757dSKevin Wolf static void GRAPH_RDLOCK vhdx_log_reset(BlockDriverState *bs, BDRVVHDXState *s)
1120a43a1b5SJeff Cody {
1130a43a1b5SJeff Cody MSGUID guid = { 0 };
1140a43a1b5SJeff Cody s->log.read = s->log.write = 0;
1150a43a1b5SJeff Cody /* a log guid of 0 indicates an empty log to any parser of v0
1160a43a1b5SJeff Cody * VHDX logs */
1170a43a1b5SJeff Cody vhdx_update_headers(bs, s, false, &guid);
1180a43a1b5SJeff Cody }
1190a43a1b5SJeff Cody
1200a43a1b5SJeff Cody /* Reads num_sectors from the log (all log sectors are 4096 bytes),
1210a43a1b5SJeff Cody * into buffer 'buffer'. Upon return, *sectors_read will contain
1220a43a1b5SJeff Cody * the number of sectors successfully read.
1230a43a1b5SJeff Cody *
1240a43a1b5SJeff Cody * It is assumed that 'buffer' is already allocated, and of sufficient
1250a43a1b5SJeff Cody * size (i.e. >= 4096*num_sectors).
1260a43a1b5SJeff Cody *
1270a43a1b5SJeff Cody * If 'peek' is true, then the tail (read) pointer for the circular buffer is
1280a43a1b5SJeff Cody * not modified.
1290a43a1b5SJeff Cody *
1300a43a1b5SJeff Cody * 0 is returned on success, -errno otherwise. */
13165ff757dSKevin Wolf static int GRAPH_RDLOCK
vhdx_log_read_sectors(BlockDriverState * bs,VHDXLogEntries * log,uint32_t * sectors_read,void * buffer,uint32_t num_sectors,bool peek)13265ff757dSKevin Wolf vhdx_log_read_sectors(BlockDriverState *bs, VHDXLogEntries *log,
1330a43a1b5SJeff Cody uint32_t *sectors_read, void *buffer,
1340a43a1b5SJeff Cody uint32_t num_sectors, bool peek)
1350a43a1b5SJeff Cody {
1360a43a1b5SJeff Cody int ret = 0;
1370a43a1b5SJeff Cody uint64_t offset;
1380a43a1b5SJeff Cody uint32_t read;
1390a43a1b5SJeff Cody
1400a43a1b5SJeff Cody read = log->read;
1410a43a1b5SJeff Cody
1420a43a1b5SJeff Cody *sectors_read = 0;
1430a43a1b5SJeff Cody while (num_sectors) {
1440a43a1b5SJeff Cody if (read == log->write) {
1450a43a1b5SJeff Cody /* empty */
1460a43a1b5SJeff Cody break;
1470a43a1b5SJeff Cody }
1480a43a1b5SJeff Cody offset = log->offset + read;
1490a43a1b5SJeff Cody
15032cc71deSAlberto Faria ret = bdrv_pread(bs->file, offset, VHDX_LOG_SECTOR_SIZE, buffer, 0);
1510a43a1b5SJeff Cody if (ret < 0) {
1520a43a1b5SJeff Cody goto exit;
1530a43a1b5SJeff Cody }
1540a43a1b5SJeff Cody read = vhdx_log_inc_idx(read, log->length);
1550a43a1b5SJeff Cody
1560a43a1b5SJeff Cody *sectors_read = *sectors_read + 1;
1570a43a1b5SJeff Cody num_sectors--;
1580a43a1b5SJeff Cody }
1590a43a1b5SJeff Cody
1600a43a1b5SJeff Cody exit:
1610a43a1b5SJeff Cody if (!peek) {
1620a43a1b5SJeff Cody log->read = read;
1630a43a1b5SJeff Cody }
1640a43a1b5SJeff Cody return ret;
1650a43a1b5SJeff Cody }
1660a43a1b5SJeff Cody
1678adc5233SJeff Cody /* Writes num_sectors to the log (all log sectors are 4096 bytes),
1688adc5233SJeff Cody * from buffer 'buffer'. Upon return, *sectors_written will contain
1698adc5233SJeff Cody * the number of sectors successfully written.
1708adc5233SJeff Cody *
1718adc5233SJeff Cody * It is assumed that 'buffer' is at least 4096*num_sectors large.
1728adc5233SJeff Cody *
1738adc5233SJeff Cody * 0 is returned on success, -errno otherwise */
174f6b08994SPaolo Bonzini static int coroutine_fn GRAPH_RDLOCK
vhdx_log_write_sectors(BlockDriverState * bs,VHDXLogEntries * log,uint32_t * sectors_written,void * buffer,uint32_t num_sectors)175f6b08994SPaolo Bonzini vhdx_log_write_sectors(BlockDriverState *bs, VHDXLogEntries *log,
1768adc5233SJeff Cody uint32_t *sectors_written, void *buffer,
1778adc5233SJeff Cody uint32_t num_sectors)
1788adc5233SJeff Cody {
1798adc5233SJeff Cody int ret = 0;
1808adc5233SJeff Cody uint64_t offset;
1818adc5233SJeff Cody uint32_t write;
1828adc5233SJeff Cody void *buffer_tmp;
1838adc5233SJeff Cody BDRVVHDXState *s = bs->opaque;
1848adc5233SJeff Cody
1858adc5233SJeff Cody ret = vhdx_user_visible_write(bs, s);
1868adc5233SJeff Cody if (ret < 0) {
1878adc5233SJeff Cody goto exit;
1888adc5233SJeff Cody }
1898adc5233SJeff Cody
1908adc5233SJeff Cody write = log->write;
1918adc5233SJeff Cody
1928adc5233SJeff Cody buffer_tmp = buffer;
1938adc5233SJeff Cody while (num_sectors) {
1948adc5233SJeff Cody
1958adc5233SJeff Cody offset = log->offset + write;
1968adc5233SJeff Cody write = vhdx_log_inc_idx(write, log->length);
1978adc5233SJeff Cody if (write == log->read) {
1988adc5233SJeff Cody /* full */
1998adc5233SJeff Cody break;
2008adc5233SJeff Cody }
201f6b08994SPaolo Bonzini ret = bdrv_co_pwrite(bs->file, offset, VHDX_LOG_SECTOR_SIZE, buffer_tmp, 0);
2028adc5233SJeff Cody if (ret < 0) {
2038adc5233SJeff Cody goto exit;
2048adc5233SJeff Cody }
2058adc5233SJeff Cody buffer_tmp += VHDX_LOG_SECTOR_SIZE;
2068adc5233SJeff Cody
2078adc5233SJeff Cody log->write = write;
2088adc5233SJeff Cody *sectors_written = *sectors_written + 1;
2098adc5233SJeff Cody num_sectors--;
2108adc5233SJeff Cody }
2118adc5233SJeff Cody
2128adc5233SJeff Cody exit:
2138adc5233SJeff Cody return ret;
2148adc5233SJeff Cody }
2158adc5233SJeff Cody
2168adc5233SJeff Cody
2170a43a1b5SJeff Cody /* Validates a log entry header */
vhdx_log_hdr_is_valid(VHDXLogEntries * log,VHDXLogEntryHeader * hdr,BDRVVHDXState * s)2180a43a1b5SJeff Cody static bool vhdx_log_hdr_is_valid(VHDXLogEntries *log, VHDXLogEntryHeader *hdr,
2190a43a1b5SJeff Cody BDRVVHDXState *s)
2200a43a1b5SJeff Cody {
2210a43a1b5SJeff Cody int valid = false;
2220a43a1b5SJeff Cody
2234f75b52aSJeff Cody if (hdr->signature != VHDX_LOG_SIGNATURE) {
2240a43a1b5SJeff Cody goto exit;
2250a43a1b5SJeff Cody }
2260a43a1b5SJeff Cody
2270a43a1b5SJeff Cody /* if the individual entry length is larger than the whole log
2280a43a1b5SJeff Cody * buffer, that is obviously invalid */
2290a43a1b5SJeff Cody if (log->length < hdr->entry_length) {
2300a43a1b5SJeff Cody goto exit;
2310a43a1b5SJeff Cody }
2320a43a1b5SJeff Cody
2330a43a1b5SJeff Cody /* length of entire entry must be in units of 4KB (log sector size) */
2340a43a1b5SJeff Cody if (hdr->entry_length % (VHDX_LOG_SECTOR_SIZE)) {
2350a43a1b5SJeff Cody goto exit;
2360a43a1b5SJeff Cody }
2370a43a1b5SJeff Cody
2380a43a1b5SJeff Cody /* per spec, sequence # must be > 0 */
2390a43a1b5SJeff Cody if (hdr->sequence_number == 0) {
2400a43a1b5SJeff Cody goto exit;
2410a43a1b5SJeff Cody }
2420a43a1b5SJeff Cody
2430a43a1b5SJeff Cody /* log entries are only valid if they match the file-wide log guid
2440a43a1b5SJeff Cody * found in the active header */
2450a43a1b5SJeff Cody if (!guid_eq(hdr->log_guid, s->headers[s->curr_header]->log_guid)) {
2460a43a1b5SJeff Cody goto exit;
2470a43a1b5SJeff Cody }
2480a43a1b5SJeff Cody
2490a43a1b5SJeff Cody if (hdr->descriptor_count * sizeof(VHDXLogDescriptor) > hdr->entry_length) {
2500a43a1b5SJeff Cody goto exit;
2510a43a1b5SJeff Cody }
2520a43a1b5SJeff Cody
2530a43a1b5SJeff Cody valid = true;
2540a43a1b5SJeff Cody
2550a43a1b5SJeff Cody exit:
2560a43a1b5SJeff Cody return valid;
2570a43a1b5SJeff Cody }
2580a43a1b5SJeff Cody
2590a43a1b5SJeff Cody /*
2600a43a1b5SJeff Cody * Given a log header, this will validate that the descriptors and the
2610a43a1b5SJeff Cody * corresponding data sectors (if applicable)
2620a43a1b5SJeff Cody *
2630a43a1b5SJeff Cody * Validation consists of:
2640a43a1b5SJeff Cody * 1. Making sure the sequence numbers matches the entry header
2650a43a1b5SJeff Cody * 2. Verifying a valid signature ('zero' or 'desc' for descriptors)
2660a43a1b5SJeff Cody * 3. File offset field is a multiple of 4KB
2670a43a1b5SJeff Cody * 4. If a data descriptor, the corresponding data sector
2680a43a1b5SJeff Cody * has its signature ('data') and matching sequence number
2690a43a1b5SJeff Cody *
2700a43a1b5SJeff Cody * @desc: the data buffer containing the descriptor
2710a43a1b5SJeff Cody * @hdr: the log entry header
2720a43a1b5SJeff Cody *
2730a43a1b5SJeff Cody * Returns true if valid
2740a43a1b5SJeff Cody */
vhdx_log_desc_is_valid(VHDXLogDescriptor * desc,VHDXLogEntryHeader * hdr)2750a43a1b5SJeff Cody static bool vhdx_log_desc_is_valid(VHDXLogDescriptor *desc,
2760a43a1b5SJeff Cody VHDXLogEntryHeader *hdr)
2770a43a1b5SJeff Cody {
2780a43a1b5SJeff Cody bool ret = false;
2790a43a1b5SJeff Cody
2800a43a1b5SJeff Cody if (desc->sequence_number != hdr->sequence_number) {
2810a43a1b5SJeff Cody goto exit;
2820a43a1b5SJeff Cody }
2830a43a1b5SJeff Cody if (desc->file_offset % VHDX_LOG_SECTOR_SIZE) {
2840a43a1b5SJeff Cody goto exit;
2850a43a1b5SJeff Cody }
2860a43a1b5SJeff Cody
2874f75b52aSJeff Cody if (desc->signature == VHDX_LOG_ZERO_SIGNATURE) {
2880a43a1b5SJeff Cody if (desc->zero_length % VHDX_LOG_SECTOR_SIZE == 0) {
2890a43a1b5SJeff Cody /* valid */
2900a43a1b5SJeff Cody ret = true;
2910a43a1b5SJeff Cody }
2924f75b52aSJeff Cody } else if (desc->signature == VHDX_LOG_DESC_SIGNATURE) {
2930a43a1b5SJeff Cody /* valid */
2940a43a1b5SJeff Cody ret = true;
2950a43a1b5SJeff Cody }
2960a43a1b5SJeff Cody
2970a43a1b5SJeff Cody exit:
2980a43a1b5SJeff Cody return ret;
2990a43a1b5SJeff Cody }
3000a43a1b5SJeff Cody
3010a43a1b5SJeff Cody
3020a43a1b5SJeff Cody /* Prior to sector data for a log entry, there is the header
3030a43a1b5SJeff Cody * and the descriptors referenced in the header:
3040a43a1b5SJeff Cody *
3050a43a1b5SJeff Cody * [] = 4KB sector
3060a43a1b5SJeff Cody *
3070a43a1b5SJeff Cody * [ hdr, desc ][ desc ][ ... ][ data ][ ... ]
3080a43a1b5SJeff Cody *
3090a43a1b5SJeff Cody * The first sector in a log entry has a 64 byte header, and
3100a43a1b5SJeff Cody * up to 126 32-byte descriptors. If more descriptors than
3110a43a1b5SJeff Cody * 126 are required, then subsequent sectors can have up to 128
3120a43a1b5SJeff Cody * descriptors. Each sector is 4KB. Data follows the descriptor
3130a43a1b5SJeff Cody * sectors.
3140a43a1b5SJeff Cody *
3150a43a1b5SJeff Cody * This will return the number of sectors needed to encompass
3160a43a1b5SJeff Cody * the passed number of descriptors in desc_cnt.
3170a43a1b5SJeff Cody *
3180a43a1b5SJeff Cody * This will never return 0, even if desc_cnt is 0.
3190a43a1b5SJeff Cody */
vhdx_compute_desc_sectors(uint32_t desc_cnt)3200a43a1b5SJeff Cody static int vhdx_compute_desc_sectors(uint32_t desc_cnt)
3210a43a1b5SJeff Cody {
3220a43a1b5SJeff Cody uint32_t desc_sectors;
3230a43a1b5SJeff Cody
3240a43a1b5SJeff Cody desc_cnt += 2; /* account for header in first sector */
3250a43a1b5SJeff Cody desc_sectors = desc_cnt / 128;
3260a43a1b5SJeff Cody if (desc_cnt % 128) {
3270a43a1b5SJeff Cody desc_sectors++;
3280a43a1b5SJeff Cody }
3290a43a1b5SJeff Cody
3300a43a1b5SJeff Cody return desc_sectors;
3310a43a1b5SJeff Cody }
3320a43a1b5SJeff Cody
3330a43a1b5SJeff Cody
3340a43a1b5SJeff Cody /* Reads the log header, and subsequent descriptors (if any). This
3350a43a1b5SJeff Cody * will allocate all the space for buffer, which must be NULL when
3360a43a1b5SJeff Cody * passed into this function. Each descriptor will also be validated,
3370a43a1b5SJeff Cody * and error returned if any are invalid. */
33865ff757dSKevin Wolf static int GRAPH_RDLOCK
vhdx_log_read_desc(BlockDriverState * bs,BDRVVHDXState * s,VHDXLogEntries * log,VHDXLogDescEntries ** buffer,bool convert_endian)33965ff757dSKevin Wolf vhdx_log_read_desc(BlockDriverState *bs, BDRVVHDXState *s, VHDXLogEntries *log,
34065ff757dSKevin Wolf VHDXLogDescEntries **buffer, bool convert_endian)
3410a43a1b5SJeff Cody {
3420a43a1b5SJeff Cody int ret = 0;
3430a43a1b5SJeff Cody uint32_t desc_sectors;
3440a43a1b5SJeff Cody uint32_t sectors_read;
3450a43a1b5SJeff Cody VHDXLogEntryHeader hdr;
3460a43a1b5SJeff Cody VHDXLogDescEntries *desc_entries = NULL;
3474f75b52aSJeff Cody VHDXLogDescriptor desc;
3480a43a1b5SJeff Cody int i;
3490a43a1b5SJeff Cody
3500a43a1b5SJeff Cody assert(*buffer == NULL);
3510a43a1b5SJeff Cody
3520a43a1b5SJeff Cody ret = vhdx_log_peek_hdr(bs, log, &hdr);
3530a43a1b5SJeff Cody if (ret < 0) {
3540a43a1b5SJeff Cody goto exit;
3550a43a1b5SJeff Cody }
3564f75b52aSJeff Cody
3570a43a1b5SJeff Cody if (vhdx_log_hdr_is_valid(log, &hdr, s) == false) {
3580a43a1b5SJeff Cody ret = -EINVAL;
3590a43a1b5SJeff Cody goto exit;
3600a43a1b5SJeff Cody }
3610a43a1b5SJeff Cody
3620a43a1b5SJeff Cody desc_sectors = vhdx_compute_desc_sectors(hdr.descriptor_count);
3639a4f4c31SKevin Wolf desc_entries = qemu_try_blockalign(bs->file->bs,
364a67e128aSKevin Wolf desc_sectors * VHDX_LOG_SECTOR_SIZE);
365a67e128aSKevin Wolf if (desc_entries == NULL) {
366a67e128aSKevin Wolf ret = -ENOMEM;
367a67e128aSKevin Wolf goto exit;
368a67e128aSKevin Wolf }
3690a43a1b5SJeff Cody
3700a43a1b5SJeff Cody ret = vhdx_log_read_sectors(bs, log, §ors_read, desc_entries,
3710a43a1b5SJeff Cody desc_sectors, false);
3720a43a1b5SJeff Cody if (ret < 0) {
3730a43a1b5SJeff Cody goto free_and_exit;
3740a43a1b5SJeff Cody }
3750a43a1b5SJeff Cody if (sectors_read != desc_sectors) {
3760a43a1b5SJeff Cody ret = -EINVAL;
3770a43a1b5SJeff Cody goto free_and_exit;
3780a43a1b5SJeff Cody }
3790a43a1b5SJeff Cody
3800a43a1b5SJeff Cody /* put in proper endianness, and validate each desc */
3810a43a1b5SJeff Cody for (i = 0; i < hdr.descriptor_count; i++) {
3824f75b52aSJeff Cody desc = desc_entries->desc[i];
3834f75b52aSJeff Cody vhdx_log_desc_le_import(&desc);
3844f75b52aSJeff Cody if (convert_endian) {
3854f75b52aSJeff Cody desc_entries->desc[i] = desc;
3864f75b52aSJeff Cody }
3874f75b52aSJeff Cody if (vhdx_log_desc_is_valid(&desc, &hdr) == false) {
3880a43a1b5SJeff Cody ret = -EINVAL;
3890a43a1b5SJeff Cody goto free_and_exit;
3900a43a1b5SJeff Cody }
3910a43a1b5SJeff Cody }
3924f75b52aSJeff Cody if (convert_endian) {
3934f75b52aSJeff Cody desc_entries->hdr = hdr;
3944f75b52aSJeff Cody }
3950a43a1b5SJeff Cody
3960a43a1b5SJeff Cody *buffer = desc_entries;
3970a43a1b5SJeff Cody goto exit;
3980a43a1b5SJeff Cody
3990a43a1b5SJeff Cody free_and_exit:
4000a43a1b5SJeff Cody qemu_vfree(desc_entries);
4010a43a1b5SJeff Cody exit:
4020a43a1b5SJeff Cody return ret;
4030a43a1b5SJeff Cody }
4040a43a1b5SJeff Cody
4050a43a1b5SJeff Cody
4060a43a1b5SJeff Cody /* Flushes the descriptor described by desc to the VHDX image file.
4070a43a1b5SJeff Cody * If the descriptor is a data descriptor, than 'data' must be non-NULL,
4080a43a1b5SJeff Cody * and >= 4096 bytes (VHDX_LOG_SECTOR_SIZE), containing the data to be
4090a43a1b5SJeff Cody * written.
4100a43a1b5SJeff Cody *
4110a43a1b5SJeff Cody * Verification is performed to make sure the sequence numbers of a data
4120a43a1b5SJeff Cody * descriptor match the sequence number in the desc.
4130a43a1b5SJeff Cody *
4140a43a1b5SJeff Cody * For a zero descriptor, it may describe multiple sectors to fill with zeroes.
4150a43a1b5SJeff Cody * In this case, it should be noted that zeroes are written to disk, and the
4160a43a1b5SJeff Cody * image file is not extended as a sparse file. */
41765ff757dSKevin Wolf static int GRAPH_RDLOCK
vhdx_log_flush_desc(BlockDriverState * bs,VHDXLogDescriptor * desc,VHDXLogDataSector * data)41865ff757dSKevin Wolf vhdx_log_flush_desc(BlockDriverState *bs, VHDXLogDescriptor *desc,
4190a43a1b5SJeff Cody VHDXLogDataSector *data)
4200a43a1b5SJeff Cody {
4210a43a1b5SJeff Cody int ret = 0;
4220a43a1b5SJeff Cody uint64_t seq, file_offset;
4230a43a1b5SJeff Cody uint32_t offset = 0;
4240a43a1b5SJeff Cody void *buffer = NULL;
4250a43a1b5SJeff Cody uint64_t count = 1;
4260a43a1b5SJeff Cody int i;
4270a43a1b5SJeff Cody
4280a43a1b5SJeff Cody buffer = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE);
4290a43a1b5SJeff Cody
4304f75b52aSJeff Cody if (desc->signature == VHDX_LOG_DESC_SIGNATURE) {
4310a43a1b5SJeff Cody /* data sector */
4320a43a1b5SJeff Cody if (data == NULL) {
4330a43a1b5SJeff Cody ret = -EFAULT;
4340a43a1b5SJeff Cody goto exit;
4350a43a1b5SJeff Cody }
4360a43a1b5SJeff Cody
4370a43a1b5SJeff Cody /* The sequence number of the data sector must match that
4380a43a1b5SJeff Cody * in the descriptor */
4390a43a1b5SJeff Cody seq = data->sequence_high;
4400a43a1b5SJeff Cody seq <<= 32;
4410a43a1b5SJeff Cody seq |= data->sequence_low & 0xffffffff;
4420a43a1b5SJeff Cody
4430a43a1b5SJeff Cody if (seq != desc->sequence_number) {
4440a43a1b5SJeff Cody ret = -EINVAL;
4450a43a1b5SJeff Cody goto exit;
4460a43a1b5SJeff Cody }
4470a43a1b5SJeff Cody
4480a43a1b5SJeff Cody /* Each data sector is in total 4096 bytes, however the first
4490a43a1b5SJeff Cody * 8 bytes, and last 4 bytes, are located in the descriptor */
4500a43a1b5SJeff Cody memcpy(buffer, &desc->leading_bytes, 8);
4510a43a1b5SJeff Cody offset += 8;
4520a43a1b5SJeff Cody
4530a43a1b5SJeff Cody memcpy(buffer+offset, data->data, 4084);
4540a43a1b5SJeff Cody offset += 4084;
4550a43a1b5SJeff Cody
4560a43a1b5SJeff Cody memcpy(buffer+offset, &desc->trailing_bytes, 4);
4570a43a1b5SJeff Cody
4584f75b52aSJeff Cody } else if (desc->signature == VHDX_LOG_ZERO_SIGNATURE) {
4590a43a1b5SJeff Cody /* write 'count' sectors of sector */
4600a43a1b5SJeff Cody memset(buffer, 0, VHDX_LOG_SECTOR_SIZE);
4610a43a1b5SJeff Cody count = desc->zero_length / VHDX_LOG_SECTOR_SIZE;
462349592e0SJeff Cody } else {
463349592e0SJeff Cody error_report("Invalid VHDX log descriptor entry signature 0x%" PRIx32,
464349592e0SJeff Cody desc->signature);
465349592e0SJeff Cody ret = -EINVAL;
466349592e0SJeff Cody goto exit;
4670a43a1b5SJeff Cody }
4680a43a1b5SJeff Cody
4690a43a1b5SJeff Cody file_offset = desc->file_offset;
4700a43a1b5SJeff Cody
4710a43a1b5SJeff Cody /* count is only > 1 if we are writing zeroes */
4720a43a1b5SJeff Cody for (i = 0; i < count; i++) {
47332cc71deSAlberto Faria ret = bdrv_pwrite_sync(bs->file, file_offset, VHDX_LOG_SECTOR_SIZE,
47432cc71deSAlberto Faria buffer, 0);
4750a43a1b5SJeff Cody if (ret < 0) {
4760a43a1b5SJeff Cody goto exit;
4770a43a1b5SJeff Cody }
4780a43a1b5SJeff Cody file_offset += VHDX_LOG_SECTOR_SIZE;
4790a43a1b5SJeff Cody }
4800a43a1b5SJeff Cody
4810a43a1b5SJeff Cody exit:
4820a43a1b5SJeff Cody qemu_vfree(buffer);
4830a43a1b5SJeff Cody return ret;
4840a43a1b5SJeff Cody }
4850a43a1b5SJeff Cody
4860a43a1b5SJeff Cody /* Flush the entire log (as described by 'logs') to the VHDX image
4870a43a1b5SJeff Cody * file, and then set the log to 'empty' status once complete.
4880a43a1b5SJeff Cody *
4890a43a1b5SJeff Cody * The log entries should be validate prior to flushing */
49065ff757dSKevin Wolf static int GRAPH_RDLOCK
vhdx_log_flush(BlockDriverState * bs,BDRVVHDXState * s,VHDXLogSequence * logs)49165ff757dSKevin Wolf vhdx_log_flush(BlockDriverState *bs, BDRVVHDXState *s, VHDXLogSequence *logs)
4920a43a1b5SJeff Cody {
4930a43a1b5SJeff Cody int ret = 0;
4940a43a1b5SJeff Cody int i;
4950a43a1b5SJeff Cody uint32_t cnt, sectors_read;
4960a43a1b5SJeff Cody uint64_t new_file_size;
4970a43a1b5SJeff Cody void *data = NULL;
4983f910692SJeff Cody int64_t file_length;
4990a43a1b5SJeff Cody VHDXLogDescEntries *desc_entries = NULL;
5000a43a1b5SJeff Cody VHDXLogEntryHeader hdr_tmp = { 0 };
5010a43a1b5SJeff Cody
5020a43a1b5SJeff Cody cnt = logs->count;
5030a43a1b5SJeff Cody
5040a43a1b5SJeff Cody data = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE);
5050a43a1b5SJeff Cody
5060a43a1b5SJeff Cody ret = vhdx_user_visible_write(bs, s);
5070a43a1b5SJeff Cody if (ret < 0) {
5080a43a1b5SJeff Cody goto exit;
5090a43a1b5SJeff Cody }
5100a43a1b5SJeff Cody
5110a43a1b5SJeff Cody /* each iteration represents one log sequence, which may span multiple
5120a43a1b5SJeff Cody * sectors */
5130a43a1b5SJeff Cody while (cnt--) {
5140a43a1b5SJeff Cody ret = vhdx_log_peek_hdr(bs, &logs->log, &hdr_tmp);
5150a43a1b5SJeff Cody if (ret < 0) {
5160a43a1b5SJeff Cody goto exit;
5170a43a1b5SJeff Cody }
5183f910692SJeff Cody file_length = bdrv_getlength(bs->file->bs);
5193f910692SJeff Cody if (file_length < 0) {
5203f910692SJeff Cody ret = file_length;
5213f910692SJeff Cody goto exit;
5223f910692SJeff Cody }
5230a43a1b5SJeff Cody /* if the log shows a FlushedFileOffset larger than our current file
5240a43a1b5SJeff Cody * size, then that means the file has been truncated / corrupted, and
5250a43a1b5SJeff Cody * we must refused to open it / use it */
5263f910692SJeff Cody if (hdr_tmp.flushed_file_offset > file_length) {
5270a43a1b5SJeff Cody ret = -EINVAL;
5280a43a1b5SJeff Cody goto exit;
5290a43a1b5SJeff Cody }
5300a43a1b5SJeff Cody
5314f75b52aSJeff Cody ret = vhdx_log_read_desc(bs, s, &logs->log, &desc_entries, true);
5320a43a1b5SJeff Cody if (ret < 0) {
5330a43a1b5SJeff Cody goto exit;
5340a43a1b5SJeff Cody }
5350a43a1b5SJeff Cody
5360a43a1b5SJeff Cody for (i = 0; i < desc_entries->hdr.descriptor_count; i++) {
5374f75b52aSJeff Cody if (desc_entries->desc[i].signature == VHDX_LOG_DESC_SIGNATURE) {
5380a43a1b5SJeff Cody /* data sector, so read a sector to flush */
5390a43a1b5SJeff Cody ret = vhdx_log_read_sectors(bs, &logs->log, §ors_read,
5400a43a1b5SJeff Cody data, 1, false);
5410a43a1b5SJeff Cody if (ret < 0) {
5420a43a1b5SJeff Cody goto exit;
5430a43a1b5SJeff Cody }
5440a43a1b5SJeff Cody if (sectors_read != 1) {
5450a43a1b5SJeff Cody ret = -EINVAL;
5460a43a1b5SJeff Cody goto exit;
5470a43a1b5SJeff Cody }
5484f75b52aSJeff Cody vhdx_log_data_le_import(data);
5490a43a1b5SJeff Cody }
5500a43a1b5SJeff Cody
5510a43a1b5SJeff Cody ret = vhdx_log_flush_desc(bs, &desc_entries->desc[i], data);
5520a43a1b5SJeff Cody if (ret < 0) {
5530a43a1b5SJeff Cody goto exit;
5540a43a1b5SJeff Cody }
5550a43a1b5SJeff Cody }
5563f910692SJeff Cody if (file_length < desc_entries->hdr.last_file_offset) {
5570a43a1b5SJeff Cody new_file_size = desc_entries->hdr.last_file_offset;
5580cb98af2SStefano Garzarella if (new_file_size % (1 * MiB)) {
5590a43a1b5SJeff Cody /* round up to nearest 1MB boundary */
56027539ac5SJeff Cody new_file_size = QEMU_ALIGN_UP(new_file_size, MiB);
56127539ac5SJeff Cody if (new_file_size > INT64_MAX) {
56227539ac5SJeff Cody ret = -EINVAL;
56327539ac5SJeff Cody goto exit;
56427539ac5SJeff Cody }
565c80d8b06SMax Reitz ret = bdrv_truncate(bs->file, new_file_size, false,
5667b8e4857SKevin Wolf PREALLOC_MODE_OFF, 0, NULL);
56795d72983SJeff Cody if (ret < 0) {
56895d72983SJeff Cody goto exit;
56995d72983SJeff Cody }
5700a43a1b5SJeff Cody }
5710a43a1b5SJeff Cody }
5720a43a1b5SJeff Cody qemu_vfree(desc_entries);
5730a43a1b5SJeff Cody desc_entries = NULL;
5740a43a1b5SJeff Cody }
5750a43a1b5SJeff Cody
576c6572fa0SJeff Cody ret = bdrv_flush(bs);
577c6572fa0SJeff Cody if (ret < 0) {
578c6572fa0SJeff Cody goto exit;
579c6572fa0SJeff Cody }
5800a43a1b5SJeff Cody /* once the log is fully flushed, indicate that we have an empty log
5810a43a1b5SJeff Cody * now. This also sets the log guid to 0, to indicate an empty log */
5820a43a1b5SJeff Cody vhdx_log_reset(bs, s);
5830a43a1b5SJeff Cody
5840a43a1b5SJeff Cody exit:
5850a43a1b5SJeff Cody qemu_vfree(data);
5860a43a1b5SJeff Cody qemu_vfree(desc_entries);
5870a43a1b5SJeff Cody return ret;
5880a43a1b5SJeff Cody }
5890a43a1b5SJeff Cody
59065ff757dSKevin Wolf static int GRAPH_RDLOCK
vhdx_validate_log_entry(BlockDriverState * bs,BDRVVHDXState * s,VHDXLogEntries * log,uint64_t seq,bool * valid,VHDXLogEntryHeader * entry)59165ff757dSKevin Wolf vhdx_validate_log_entry(BlockDriverState *bs, BDRVVHDXState *s,
5920a43a1b5SJeff Cody VHDXLogEntries *log, uint64_t seq,
5930a43a1b5SJeff Cody bool *valid, VHDXLogEntryHeader *entry)
5940a43a1b5SJeff Cody {
5950a43a1b5SJeff Cody int ret = 0;
5960a43a1b5SJeff Cody VHDXLogEntryHeader hdr;
5970a43a1b5SJeff Cody void *buffer = NULL;
5980a43a1b5SJeff Cody uint32_t i, desc_sectors, total_sectors, crc;
5990a43a1b5SJeff Cody uint32_t sectors_read = 0;
6000a43a1b5SJeff Cody VHDXLogDescEntries *desc_buffer = NULL;
6010a43a1b5SJeff Cody
6020a43a1b5SJeff Cody *valid = false;
6030a43a1b5SJeff Cody
6040a43a1b5SJeff Cody ret = vhdx_log_peek_hdr(bs, log, &hdr);
6050a43a1b5SJeff Cody if (ret < 0) {
6060a43a1b5SJeff Cody goto inc_and_exit;
6070a43a1b5SJeff Cody }
6080a43a1b5SJeff Cody
6090a43a1b5SJeff Cody if (vhdx_log_hdr_is_valid(log, &hdr, s) == false) {
6100a43a1b5SJeff Cody goto inc_and_exit;
6110a43a1b5SJeff Cody }
6120a43a1b5SJeff Cody
6130a43a1b5SJeff Cody if (seq > 0) {
6140a43a1b5SJeff Cody if (hdr.sequence_number != seq + 1) {
6150a43a1b5SJeff Cody goto inc_and_exit;
6160a43a1b5SJeff Cody }
6170a43a1b5SJeff Cody }
6180a43a1b5SJeff Cody
6190a43a1b5SJeff Cody desc_sectors = vhdx_compute_desc_sectors(hdr.descriptor_count);
6200a43a1b5SJeff Cody
6214f75b52aSJeff Cody /* Read all log sectors, and calculate log checksum */
6220a43a1b5SJeff Cody
6230a43a1b5SJeff Cody total_sectors = hdr.entry_length / VHDX_LOG_SECTOR_SIZE;
6240a43a1b5SJeff Cody
6250a43a1b5SJeff Cody
626dc6fb73dSDeepak Kathayat /* read_desc() will increment the read idx */
6274f75b52aSJeff Cody ret = vhdx_log_read_desc(bs, s, log, &desc_buffer, false);
6280a43a1b5SJeff Cody if (ret < 0) {
6290a43a1b5SJeff Cody goto free_and_exit;
6300a43a1b5SJeff Cody }
6310a43a1b5SJeff Cody
6320a43a1b5SJeff Cody crc = vhdx_checksum_calc(0xffffffff, (void *)desc_buffer,
6330a43a1b5SJeff Cody desc_sectors * VHDX_LOG_SECTOR_SIZE, 4);
6340a43a1b5SJeff Cody crc ^= 0xffffffff;
6350a43a1b5SJeff Cody
6360a43a1b5SJeff Cody buffer = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE);
6370a43a1b5SJeff Cody if (total_sectors > desc_sectors) {
6380a43a1b5SJeff Cody for (i = 0; i < total_sectors - desc_sectors; i++) {
6390a43a1b5SJeff Cody sectors_read = 0;
6400a43a1b5SJeff Cody ret = vhdx_log_read_sectors(bs, log, §ors_read, buffer,
6410a43a1b5SJeff Cody 1, false);
6420a43a1b5SJeff Cody if (ret < 0 || sectors_read != 1) {
6430a43a1b5SJeff Cody goto free_and_exit;
6440a43a1b5SJeff Cody }
6450a43a1b5SJeff Cody crc = vhdx_checksum_calc(crc, buffer, VHDX_LOG_SECTOR_SIZE, -1);
6460a43a1b5SJeff Cody crc ^= 0xffffffff;
6470a43a1b5SJeff Cody }
6480a43a1b5SJeff Cody }
6490a43a1b5SJeff Cody crc ^= 0xffffffff;
6504f75b52aSJeff Cody if (crc != hdr.checksum) {
6510a43a1b5SJeff Cody goto free_and_exit;
6520a43a1b5SJeff Cody }
6530a43a1b5SJeff Cody
6540a43a1b5SJeff Cody *valid = true;
6550a43a1b5SJeff Cody *entry = hdr;
6560a43a1b5SJeff Cody goto free_and_exit;
6570a43a1b5SJeff Cody
6580a43a1b5SJeff Cody inc_and_exit:
6590a43a1b5SJeff Cody log->read = vhdx_log_inc_idx(log->read, log->length);
6600a43a1b5SJeff Cody
6610a43a1b5SJeff Cody free_and_exit:
6620a43a1b5SJeff Cody qemu_vfree(buffer);
6630a43a1b5SJeff Cody qemu_vfree(desc_buffer);
6640a43a1b5SJeff Cody return ret;
6650a43a1b5SJeff Cody }
6660a43a1b5SJeff Cody
6670a43a1b5SJeff Cody /* Search through the log circular buffer, and find the valid, active
6680a43a1b5SJeff Cody * log sequence, if any exists
6690a43a1b5SJeff Cody * */
67065ff757dSKevin Wolf static int GRAPH_RDLOCK
vhdx_log_search(BlockDriverState * bs,BDRVVHDXState * s,VHDXLogSequence * logs)67165ff757dSKevin Wolf vhdx_log_search(BlockDriverState *bs, BDRVVHDXState *s, VHDXLogSequence *logs)
6720a43a1b5SJeff Cody {
6730a43a1b5SJeff Cody int ret = 0;
6740a43a1b5SJeff Cody uint32_t tail;
6750a43a1b5SJeff Cody bool seq_valid = false;
6760a43a1b5SJeff Cody VHDXLogSequence candidate = { 0 };
6770a43a1b5SJeff Cody VHDXLogEntryHeader hdr = { 0 };
6780a43a1b5SJeff Cody VHDXLogEntries curr_log;
6790a43a1b5SJeff Cody
6800a43a1b5SJeff Cody memcpy(&curr_log, &s->log, sizeof(VHDXLogEntries));
6810a43a1b5SJeff Cody curr_log.write = curr_log.length; /* assume log is full */
6820a43a1b5SJeff Cody curr_log.read = 0;
6830a43a1b5SJeff Cody
6840a43a1b5SJeff Cody
6850a43a1b5SJeff Cody /* now we will go through the whole log sector by sector, until
6860a43a1b5SJeff Cody * we find a valid, active log sequence, or reach the end of the
6870a43a1b5SJeff Cody * log buffer */
6880a43a1b5SJeff Cody for (;;) {
6890a43a1b5SJeff Cody uint64_t curr_seq = 0;
6900a43a1b5SJeff Cody VHDXLogSequence current = { 0 };
6910a43a1b5SJeff Cody
6920a43a1b5SJeff Cody tail = curr_log.read;
6930a43a1b5SJeff Cody
6940a43a1b5SJeff Cody ret = vhdx_validate_log_entry(bs, s, &curr_log, curr_seq,
6950a43a1b5SJeff Cody &seq_valid, &hdr);
6960a43a1b5SJeff Cody if (ret < 0) {
6970a43a1b5SJeff Cody goto exit;
6980a43a1b5SJeff Cody }
6990a43a1b5SJeff Cody
7000a43a1b5SJeff Cody if (seq_valid) {
7010a43a1b5SJeff Cody current.valid = true;
7020a43a1b5SJeff Cody current.log = curr_log;
7030a43a1b5SJeff Cody current.log.read = tail;
7040a43a1b5SJeff Cody current.log.write = curr_log.read;
7050a43a1b5SJeff Cody current.count = 1;
7060a43a1b5SJeff Cody current.hdr = hdr;
7070a43a1b5SJeff Cody
7080a43a1b5SJeff Cody
7090a43a1b5SJeff Cody for (;;) {
7100a43a1b5SJeff Cody ret = vhdx_validate_log_entry(bs, s, &curr_log, curr_seq,
7110a43a1b5SJeff Cody &seq_valid, &hdr);
7120a43a1b5SJeff Cody if (ret < 0) {
7130a43a1b5SJeff Cody goto exit;
7140a43a1b5SJeff Cody }
7150a43a1b5SJeff Cody if (seq_valid == false) {
7160a43a1b5SJeff Cody break;
7170a43a1b5SJeff Cody }
7180a43a1b5SJeff Cody current.log.write = curr_log.read;
7190a43a1b5SJeff Cody current.count++;
7200a43a1b5SJeff Cody
7210a43a1b5SJeff Cody curr_seq = hdr.sequence_number;
7220a43a1b5SJeff Cody }
7230a43a1b5SJeff Cody }
7240a43a1b5SJeff Cody
7250a43a1b5SJeff Cody if (current.valid) {
7260a43a1b5SJeff Cody if (candidate.valid == false ||
7270a43a1b5SJeff Cody current.hdr.sequence_number > candidate.hdr.sequence_number) {
7280a43a1b5SJeff Cody candidate = current;
7290a43a1b5SJeff Cody }
7300a43a1b5SJeff Cody }
7310a43a1b5SJeff Cody
7320a43a1b5SJeff Cody if (curr_log.read < tail) {
7330a43a1b5SJeff Cody break;
7340a43a1b5SJeff Cody }
7350a43a1b5SJeff Cody }
7360a43a1b5SJeff Cody
7370a43a1b5SJeff Cody *logs = candidate;
7380a43a1b5SJeff Cody
7390a43a1b5SJeff Cody if (candidate.valid) {
7400a43a1b5SJeff Cody /* this is the next sequence number, for writes */
7410a43a1b5SJeff Cody s->log.sequence = candidate.hdr.sequence_number + 1;
7420a43a1b5SJeff Cody }
7430a43a1b5SJeff Cody
7440a43a1b5SJeff Cody
7450a43a1b5SJeff Cody exit:
7460a43a1b5SJeff Cody return ret;
7470a43a1b5SJeff Cody }
7480a43a1b5SJeff Cody
7490a43a1b5SJeff Cody /* Parse the replay log. Per the VHDX spec, if the log is present
7500a43a1b5SJeff Cody * it must be replayed prior to opening the file, even read-only.
7510a43a1b5SJeff Cody *
7520a43a1b5SJeff Cody * If read-only, we must replay the log in RAM (or refuse to open
7530a43a1b5SJeff Cody * a dirty VHDX file read-only) */
vhdx_parse_log(BlockDriverState * bs,BDRVVHDXState * s,bool * flushed,Error ** errp)7547e30e6a6SJeff Cody int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s, bool *flushed,
7557e30e6a6SJeff Cody Error **errp)
7560a43a1b5SJeff Cody {
7570a43a1b5SJeff Cody int ret = 0;
7580a43a1b5SJeff Cody VHDXHeader *hdr;
7590a43a1b5SJeff Cody VHDXLogSequence logs = { 0 };
7600a43a1b5SJeff Cody
7610a43a1b5SJeff Cody hdr = s->headers[s->curr_header];
7620a43a1b5SJeff Cody
7630a43a1b5SJeff Cody *flushed = false;
7640a43a1b5SJeff Cody
7650a43a1b5SJeff Cody /* s->log.hdr is freed in vhdx_close() */
7660a43a1b5SJeff Cody if (s->log.hdr == NULL) {
7670a43a1b5SJeff Cody s->log.hdr = qemu_blockalign(bs, sizeof(VHDXLogEntryHeader));
7680a43a1b5SJeff Cody }
7690a43a1b5SJeff Cody
7700a43a1b5SJeff Cody s->log.offset = hdr->log_offset;
7710a43a1b5SJeff Cody s->log.length = hdr->log_length;
7720a43a1b5SJeff Cody
7730a43a1b5SJeff Cody if (s->log.offset < VHDX_LOG_MIN_SIZE ||
7740a43a1b5SJeff Cody s->log.offset % VHDX_LOG_MIN_SIZE) {
7750a43a1b5SJeff Cody ret = -EINVAL;
7760a43a1b5SJeff Cody goto exit;
7770a43a1b5SJeff Cody }
7780a43a1b5SJeff Cody
7790a43a1b5SJeff Cody /* per spec, only log version of 0 is supported */
7800a43a1b5SJeff Cody if (hdr->log_version != 0) {
7810a43a1b5SJeff Cody ret = -EINVAL;
7820a43a1b5SJeff Cody goto exit;
7830a43a1b5SJeff Cody }
7840a43a1b5SJeff Cody
7850a43a1b5SJeff Cody /* If either the log guid, or log length is zero,
7860a43a1b5SJeff Cody * then a replay log is not present */
7870a43a1b5SJeff Cody if (guid_eq(hdr->log_guid, zero_guid)) {
7880a43a1b5SJeff Cody goto exit;
7890a43a1b5SJeff Cody }
7900a43a1b5SJeff Cody
7910a43a1b5SJeff Cody if (hdr->log_length == 0) {
7920a43a1b5SJeff Cody goto exit;
7930a43a1b5SJeff Cody }
7940a43a1b5SJeff Cody
7950a43a1b5SJeff Cody if (hdr->log_length % VHDX_LOG_MIN_SIZE) {
7960a43a1b5SJeff Cody ret = -EINVAL;
7970a43a1b5SJeff Cody goto exit;
7980a43a1b5SJeff Cody }
7990a43a1b5SJeff Cody
8000a43a1b5SJeff Cody
8010a43a1b5SJeff Cody /* The log is present, we need to find if and where there is an active
8020a43a1b5SJeff Cody * sequence of valid entries present in the log. */
8030a43a1b5SJeff Cody
8040a43a1b5SJeff Cody ret = vhdx_log_search(bs, s, &logs);
8050a43a1b5SJeff Cody if (ret < 0) {
8060a43a1b5SJeff Cody goto exit;
8070a43a1b5SJeff Cody }
8080a43a1b5SJeff Cody
8090a43a1b5SJeff Cody if (logs.valid) {
810307261b2SVladimir Sementsov-Ogievskiy if (bdrv_is_read_only(bs)) {
811f30c66baSMax Reitz bdrv_refresh_filename(bs);
8127e30e6a6SJeff Cody ret = -EPERM;
813bf89e874SMarkus Armbruster error_setg(errp,
8147e30e6a6SJeff Cody "VHDX image file '%s' opened read-only, but "
815bf89e874SMarkus Armbruster "contains a log that needs to be replayed",
816bf89e874SMarkus Armbruster bs->filename);
817bf89e874SMarkus Armbruster error_append_hint(errp, "To replay the log, run:\n"
818bf89e874SMarkus Armbruster "qemu-img check -r all '%s'\n",
819bf89e874SMarkus Armbruster bs->filename);
8207e30e6a6SJeff Cody goto exit;
8217e30e6a6SJeff Cody }
8220a43a1b5SJeff Cody /* now flush the log */
8230a43a1b5SJeff Cody ret = vhdx_log_flush(bs, s, &logs);
8240a43a1b5SJeff Cody if (ret < 0) {
8250a43a1b5SJeff Cody goto exit;
8260a43a1b5SJeff Cody }
8270a43a1b5SJeff Cody *flushed = true;
8280a43a1b5SJeff Cody }
8290a43a1b5SJeff Cody
8300a43a1b5SJeff Cody
8310a43a1b5SJeff Cody exit:
8320a43a1b5SJeff Cody return ret;
8330a43a1b5SJeff Cody }
8340a43a1b5SJeff Cody
8350a43a1b5SJeff Cody
8368adc5233SJeff Cody
vhdx_log_raw_to_le_sector(VHDXLogDescriptor * desc,VHDXLogDataSector * sector,void * data,uint64_t seq)8378adc5233SJeff Cody static void vhdx_log_raw_to_le_sector(VHDXLogDescriptor *desc,
8388adc5233SJeff Cody VHDXLogDataSector *sector, void *data,
8398adc5233SJeff Cody uint64_t seq)
8408adc5233SJeff Cody {
8418adc5233SJeff Cody /* 8 + 4084 + 4 = 4096, 1 log sector */
8428adc5233SJeff Cody memcpy(&desc->leading_bytes, data, 8);
8438adc5233SJeff Cody data += 8;
8441229e46dSPeter Maydell desc->leading_bytes = cpu_to_le64(desc->leading_bytes);
8458adc5233SJeff Cody memcpy(sector->data, data, 4084);
8468adc5233SJeff Cody data += 4084;
8478adc5233SJeff Cody memcpy(&desc->trailing_bytes, data, 4);
8481229e46dSPeter Maydell desc->trailing_bytes = cpu_to_le32(desc->trailing_bytes);
8498adc5233SJeff Cody data += 4;
8508adc5233SJeff Cody
8518adc5233SJeff Cody sector->sequence_high = (uint32_t) (seq >> 32);
8528adc5233SJeff Cody sector->sequence_low = (uint32_t) (seq & 0xffffffff);
8538adc5233SJeff Cody sector->data_signature = VHDX_LOG_DATA_SIGNATURE;
8548adc5233SJeff Cody
8558adc5233SJeff Cody vhdx_log_desc_le_export(desc);
8568adc5233SJeff Cody vhdx_log_data_le_export(sector);
8578adc5233SJeff Cody }
8588adc5233SJeff Cody
8598adc5233SJeff Cody
860f6b08994SPaolo Bonzini static int coroutine_fn GRAPH_RDLOCK
vhdx_log_write(BlockDriverState * bs,BDRVVHDXState * s,void * data,uint32_t length,uint64_t offset)861f6b08994SPaolo Bonzini vhdx_log_write(BlockDriverState *bs, BDRVVHDXState *s,
8628adc5233SJeff Cody void *data, uint32_t length, uint64_t offset)
8638adc5233SJeff Cody {
8648adc5233SJeff Cody int ret = 0;
8658adc5233SJeff Cody void *buffer = NULL;
8668adc5233SJeff Cody void *merged_sector = NULL;
8678adc5233SJeff Cody void *data_tmp, *sector_write;
8688adc5233SJeff Cody unsigned int i;
8698adc5233SJeff Cody int sector_offset;
8708adc5233SJeff Cody uint32_t desc_sectors, sectors, total_length;
8718adc5233SJeff Cody uint32_t sectors_written = 0;
8728adc5233SJeff Cody uint32_t aligned_length;
8738adc5233SJeff Cody uint32_t leading_length = 0;
8748adc5233SJeff Cody uint32_t trailing_length = 0;
8758adc5233SJeff Cody uint32_t partial_sectors = 0;
8768adc5233SJeff Cody uint32_t bytes_written = 0;
8778adc5233SJeff Cody uint64_t file_offset;
8783f910692SJeff Cody int64_t file_length;
8798adc5233SJeff Cody VHDXHeader *header;
8808adc5233SJeff Cody VHDXLogEntryHeader new_hdr;
8818adc5233SJeff Cody VHDXLogDescriptor *new_desc = NULL;
8828adc5233SJeff Cody VHDXLogDataSector *data_sector = NULL;
8838adc5233SJeff Cody MSGUID new_guid = { 0 };
8848adc5233SJeff Cody
8858adc5233SJeff Cody header = s->headers[s->curr_header];
8868adc5233SJeff Cody
8878adc5233SJeff Cody /* need to have offset read data, and be on 4096 byte boundary */
8888adc5233SJeff Cody
8898adc5233SJeff Cody if (length > header->log_length) {
8908adc5233SJeff Cody /* no log present. we could create a log here instead of failing */
8918adc5233SJeff Cody ret = -EINVAL;
8928adc5233SJeff Cody goto exit;
8938adc5233SJeff Cody }
8948adc5233SJeff Cody
8958adc5233SJeff Cody if (guid_eq(header->log_guid, zero_guid)) {
8968adc5233SJeff Cody vhdx_guid_generate(&new_guid);
8978adc5233SJeff Cody vhdx_update_headers(bs, s, false, &new_guid);
8988adc5233SJeff Cody } else {
8998adc5233SJeff Cody /* currently, we require that the log be flushed after
9008adc5233SJeff Cody * every write. */
9018adc5233SJeff Cody ret = -ENOTSUP;
9028adc5233SJeff Cody goto exit;
9038adc5233SJeff Cody }
9048adc5233SJeff Cody
9058adc5233SJeff Cody /* 0 is an invalid sequence number, but may also represent the first
9068adc5233SJeff Cody * log write (or a wrapped seq) */
9078adc5233SJeff Cody if (s->log.sequence == 0) {
9088adc5233SJeff Cody s->log.sequence = 1;
9098adc5233SJeff Cody }
9108adc5233SJeff Cody
9118adc5233SJeff Cody sector_offset = offset % VHDX_LOG_SECTOR_SIZE;
912cf7a09c1SMarc-André Lureau file_offset = QEMU_ALIGN_DOWN(offset, VHDX_LOG_SECTOR_SIZE);
9138adc5233SJeff Cody
9148adc5233SJeff Cody aligned_length = length;
9158adc5233SJeff Cody
9168adc5233SJeff Cody /* add in the unaligned head and tail bytes */
9178adc5233SJeff Cody if (sector_offset) {
9188adc5233SJeff Cody leading_length = (VHDX_LOG_SECTOR_SIZE - sector_offset);
9198adc5233SJeff Cody leading_length = leading_length > length ? length : leading_length;
9208adc5233SJeff Cody aligned_length -= leading_length;
9218adc5233SJeff Cody partial_sectors++;
9228adc5233SJeff Cody }
9238adc5233SJeff Cody
9248adc5233SJeff Cody sectors = aligned_length / VHDX_LOG_SECTOR_SIZE;
9258adc5233SJeff Cody trailing_length = aligned_length - (sectors * VHDX_LOG_SECTOR_SIZE);
9268adc5233SJeff Cody if (trailing_length) {
9278adc5233SJeff Cody partial_sectors++;
9288adc5233SJeff Cody }
9298adc5233SJeff Cody
9308adc5233SJeff Cody sectors += partial_sectors;
9318adc5233SJeff Cody
932f6b08994SPaolo Bonzini file_length = bdrv_co_getlength(bs->file->bs);
9333f910692SJeff Cody if (file_length < 0) {
9343f910692SJeff Cody ret = file_length;
9353f910692SJeff Cody goto exit;
9363f910692SJeff Cody }
9373f910692SJeff Cody
9388adc5233SJeff Cody /* sectors is now how many sectors the data itself takes, not
9398adc5233SJeff Cody * including the header and descriptor metadata */
9408adc5233SJeff Cody
9418adc5233SJeff Cody new_hdr = (VHDXLogEntryHeader) {
9428adc5233SJeff Cody .signature = VHDX_LOG_SIGNATURE,
9438adc5233SJeff Cody .tail = s->log.tail,
9448adc5233SJeff Cody .sequence_number = s->log.sequence,
9458adc5233SJeff Cody .descriptor_count = sectors,
9468adc5233SJeff Cody .reserved = 0,
9473f910692SJeff Cody .flushed_file_offset = file_length,
9483f910692SJeff Cody .last_file_offset = file_length,
9493f910692SJeff Cody .log_guid = header->log_guid,
9508adc5233SJeff Cody };
9518adc5233SJeff Cody
9528adc5233SJeff Cody
9538adc5233SJeff Cody desc_sectors = vhdx_compute_desc_sectors(new_hdr.descriptor_count);
9548adc5233SJeff Cody
9558adc5233SJeff Cody total_length = (desc_sectors + sectors) * VHDX_LOG_SECTOR_SIZE;
9568adc5233SJeff Cody new_hdr.entry_length = total_length;
9578adc5233SJeff Cody
9588adc5233SJeff Cody vhdx_log_entry_hdr_le_export(&new_hdr);
9598adc5233SJeff Cody
9608adc5233SJeff Cody buffer = qemu_blockalign(bs, total_length);
9618adc5233SJeff Cody memcpy(buffer, &new_hdr, sizeof(new_hdr));
9628adc5233SJeff Cody
963d4df3dbcSMarkus Armbruster new_desc = buffer + sizeof(new_hdr);
9648adc5233SJeff Cody data_sector = buffer + (desc_sectors * VHDX_LOG_SECTOR_SIZE);
9658adc5233SJeff Cody data_tmp = data;
9668adc5233SJeff Cody
9678adc5233SJeff Cody /* All log sectors are 4KB, so for any partial sectors we must
9688adc5233SJeff Cody * merge the data with preexisting data from the final file
9698adc5233SJeff Cody * destination */
9708adc5233SJeff Cody merged_sector = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE);
9718adc5233SJeff Cody
9728adc5233SJeff Cody for (i = 0; i < sectors; i++) {
9738adc5233SJeff Cody new_desc->signature = VHDX_LOG_DESC_SIGNATURE;
9748adc5233SJeff Cody new_desc->sequence_number = s->log.sequence;
9758adc5233SJeff Cody new_desc->file_offset = file_offset;
9768adc5233SJeff Cody
9778adc5233SJeff Cody if (i == 0 && leading_length) {
9788adc5233SJeff Cody /* partial sector at the front of the buffer */
979f6b08994SPaolo Bonzini ret = bdrv_co_pread(bs->file, file_offset, VHDX_LOG_SECTOR_SIZE,
98032cc71deSAlberto Faria merged_sector, 0);
9818adc5233SJeff Cody if (ret < 0) {
9828adc5233SJeff Cody goto exit;
9838adc5233SJeff Cody }
9848adc5233SJeff Cody memcpy(merged_sector + sector_offset, data_tmp, leading_length);
9858adc5233SJeff Cody bytes_written = leading_length;
9868adc5233SJeff Cody sector_write = merged_sector;
9878adc5233SJeff Cody } else if (i == sectors - 1 && trailing_length) {
9888adc5233SJeff Cody /* partial sector at the end of the buffer */
989f6b08994SPaolo Bonzini ret = bdrv_co_pread(bs->file, file_offset + trailing_length,
99032cc71deSAlberto Faria VHDX_LOG_SECTOR_SIZE - trailing_length,
99132cc71deSAlberto Faria merged_sector + trailing_length, 0);
9928adc5233SJeff Cody if (ret < 0) {
9938adc5233SJeff Cody goto exit;
9948adc5233SJeff Cody }
9958adc5233SJeff Cody memcpy(merged_sector, data_tmp, trailing_length);
9968adc5233SJeff Cody bytes_written = trailing_length;
9978adc5233SJeff Cody sector_write = merged_sector;
9988adc5233SJeff Cody } else {
9998adc5233SJeff Cody bytes_written = VHDX_LOG_SECTOR_SIZE;
10008adc5233SJeff Cody sector_write = data_tmp;
10018adc5233SJeff Cody }
10028adc5233SJeff Cody
10038adc5233SJeff Cody /* populate the raw sector data into the proper structures,
10048adc5233SJeff Cody * as well as update the descriptor, and convert to proper
10058adc5233SJeff Cody * endianness */
10068adc5233SJeff Cody vhdx_log_raw_to_le_sector(new_desc, data_sector, sector_write,
10078adc5233SJeff Cody s->log.sequence);
10088adc5233SJeff Cody
10098adc5233SJeff Cody data_tmp += bytes_written;
10108adc5233SJeff Cody data_sector++;
10118adc5233SJeff Cody new_desc++;
10128adc5233SJeff Cody file_offset += VHDX_LOG_SECTOR_SIZE;
10138adc5233SJeff Cody }
10148adc5233SJeff Cody
10158adc5233SJeff Cody /* checksum covers entire entry, from the log header through the
10168adc5233SJeff Cody * last data sector */
10178adc5233SJeff Cody vhdx_update_checksum(buffer, total_length,
10188adc5233SJeff Cody offsetof(VHDXLogEntryHeader, checksum));
10198adc5233SJeff Cody
10208adc5233SJeff Cody /* now write to the log */
1021f50159faSMarkus Armbruster ret = vhdx_log_write_sectors(bs, &s->log, §ors_written, buffer,
10228adc5233SJeff Cody desc_sectors + sectors);
10238adc5233SJeff Cody if (ret < 0) {
10248adc5233SJeff Cody goto exit;
10258adc5233SJeff Cody }
10268adc5233SJeff Cody
10278adc5233SJeff Cody if (sectors_written != desc_sectors + sectors) {
10288adc5233SJeff Cody /* instead of failing, we could flush the log here */
10298adc5233SJeff Cody ret = -EINVAL;
10308adc5233SJeff Cody goto exit;
10318adc5233SJeff Cody }
10328adc5233SJeff Cody
10338adc5233SJeff Cody s->log.sequence++;
10348adc5233SJeff Cody /* write new tail */
10358adc5233SJeff Cody s->log.tail = s->log.write;
10368adc5233SJeff Cody
10378adc5233SJeff Cody exit:
10388adc5233SJeff Cody qemu_vfree(buffer);
10398adc5233SJeff Cody qemu_vfree(merged_sector);
10408adc5233SJeff Cody return ret;
10418adc5233SJeff Cody }
10428adc5233SJeff Cody
10438adc5233SJeff Cody /* Perform a log write, and then immediately flush the entire log */
1044f6b08994SPaolo Bonzini int coroutine_fn
vhdx_log_write_and_flush(BlockDriverState * bs,BDRVVHDXState * s,void * data,uint32_t length,uint64_t offset)1045f6b08994SPaolo Bonzini vhdx_log_write_and_flush(BlockDriverState *bs, BDRVVHDXState *s,
10468adc5233SJeff Cody void *data, uint32_t length, uint64_t offset)
10478adc5233SJeff Cody {
10488adc5233SJeff Cody int ret = 0;
10498adc5233SJeff Cody VHDXLogSequence logs = { .valid = true,
10508adc5233SJeff Cody .count = 1,
10518adc5233SJeff Cody .hdr = { 0 } };
10528adc5233SJeff Cody
10538adc5233SJeff Cody
10548adc5233SJeff Cody /* Make sure data written (new and/or changed blocks) is stable
10558adc5233SJeff Cody * on disk, before creating log entry */
1056f6b08994SPaolo Bonzini ret = bdrv_co_flush(bs);
1057c6572fa0SJeff Cody if (ret < 0) {
1058c6572fa0SJeff Cody goto exit;
1059c6572fa0SJeff Cody }
1060c6572fa0SJeff Cody
10618adc5233SJeff Cody ret = vhdx_log_write(bs, s, data, length, offset);
10628adc5233SJeff Cody if (ret < 0) {
10638adc5233SJeff Cody goto exit;
10648adc5233SJeff Cody }
10658adc5233SJeff Cody logs.log = s->log;
10668adc5233SJeff Cody
10678adc5233SJeff Cody /* Make sure log is stable on disk */
1068f6b08994SPaolo Bonzini ret = bdrv_co_flush(bs);
1069c6572fa0SJeff Cody if (ret < 0) {
1070c6572fa0SJeff Cody goto exit;
1071c6572fa0SJeff Cody }
1072c6572fa0SJeff Cody
10738adc5233SJeff Cody ret = vhdx_log_flush(bs, s, &logs);
10748adc5233SJeff Cody if (ret < 0) {
10758adc5233SJeff Cody goto exit;
10768adc5233SJeff Cody }
10778adc5233SJeff Cody
10788adc5233SJeff Cody s->log = logs.log;
10798adc5233SJeff Cody
10808adc5233SJeff Cody exit:
10818adc5233SJeff Cody return ret;
10828adc5233SJeff Cody }
10838adc5233SJeff Cody
1084