xref: /qemu/block/vhdx-log.c (revision 65ff757d)
10a43a1b5SJeff Cody /*
20a43a1b5SJeff Cody  * Block driver for Hyper-V VHDX Images
30a43a1b5SJeff Cody  *
40a43a1b5SJeff Cody  * Copyright (c) 2013 Red Hat, Inc.,
50a43a1b5SJeff Cody  *
60a43a1b5SJeff Cody  * Authors:
70a43a1b5SJeff Cody  *  Jeff Cody <jcody@redhat.com>
80a43a1b5SJeff Cody  *
90a43a1b5SJeff Cody  *  This is based on the "VHDX Format Specification v1.00", published 8/25/2012
100a43a1b5SJeff Cody  *  by Microsoft:
110a43a1b5SJeff Cody  *      https://www.microsoft.com/en-us/download/details.aspx?id=34750
120a43a1b5SJeff Cody  *
130a43a1b5SJeff Cody  * This file covers the functionality of the metadata log writing, parsing, and
140a43a1b5SJeff Cody  * replay.
150a43a1b5SJeff Cody  *
160a43a1b5SJeff Cody  * This work is licensed under the terms of the GNU LGPL, version 2 or later.
170a43a1b5SJeff Cody  * See the COPYING.LIB file in the top-level directory.
180a43a1b5SJeff Cody  *
190a43a1b5SJeff Cody  */
200b8fa32fSMarkus Armbruster 
2180c71a24SPeter Maydell #include "qemu/osdep.h"
22da34e65cSMarkus Armbruster #include "qapi/error.h"
23e2c1c34fSMarkus Armbruster #include "block/block-io.h"
240a43a1b5SJeff Cody #include "block/block_int.h"
25d49b6836SMarkus Armbruster #include "qemu/error-report.h"
2658369e22SPaolo Bonzini #include "qemu/bswap.h"
275df022cfSPeter Maydell #include "qemu/memalign.h"
280d8c41daSMichael S. Tsirkin #include "vhdx.h"
290a43a1b5SJeff Cody 
300a43a1b5SJeff Cody 
310a43a1b5SJeff Cody typedef struct VHDXLogSequence {
320a43a1b5SJeff Cody     bool valid;
330a43a1b5SJeff Cody     uint32_t count;
340a43a1b5SJeff Cody     VHDXLogEntries log;
350a43a1b5SJeff Cody     VHDXLogEntryHeader hdr;
360a43a1b5SJeff Cody } VHDXLogSequence;
370a43a1b5SJeff Cody 
380a43a1b5SJeff Cody typedef struct VHDXLogDescEntries {
390a43a1b5SJeff Cody     VHDXLogEntryHeader hdr;
400a43a1b5SJeff Cody     VHDXLogDescriptor desc[];
410a43a1b5SJeff Cody } VHDXLogDescEntries;
420a43a1b5SJeff Cody 
430a43a1b5SJeff Cody static const MSGUID zero_guid = { 0 };
440a43a1b5SJeff Cody 
450a43a1b5SJeff Cody /* The log located on the disk is circular buffer containing
460a43a1b5SJeff Cody  * sectors of 4096 bytes each.
470a43a1b5SJeff Cody  *
480a43a1b5SJeff Cody  * It is assumed for the read/write functions below that the
490a43a1b5SJeff Cody  * circular buffer scheme uses a 'one sector open' to indicate
500a43a1b5SJeff Cody  * the buffer is full.  Given the validation methods used for each
510a43a1b5SJeff Cody  * sector, this method should be compatible with other methods that
520a43a1b5SJeff Cody  * do not waste a sector.
530a43a1b5SJeff Cody  */
540a43a1b5SJeff Cody 
550a43a1b5SJeff Cody 
560a43a1b5SJeff Cody /* Allow peeking at the hdr entry at the beginning of the current
570a43a1b5SJeff Cody  * read index, without advancing the read index */
5865ff757dSKevin Wolf static int GRAPH_RDLOCK
vhdx_log_peek_hdr(BlockDriverState * bs,VHDXLogEntries * log,VHDXLogEntryHeader * hdr)5965ff757dSKevin Wolf vhdx_log_peek_hdr(BlockDriverState *bs, VHDXLogEntries *log,
600a43a1b5SJeff Cody                   VHDXLogEntryHeader *hdr)
610a43a1b5SJeff Cody {
620a43a1b5SJeff Cody     int ret = 0;
630a43a1b5SJeff Cody     uint64_t offset;
640a43a1b5SJeff Cody     uint32_t read;
650a43a1b5SJeff Cody 
660a43a1b5SJeff Cody     assert(hdr != NULL);
670a43a1b5SJeff Cody 
680a43a1b5SJeff Cody     /* peek is only supported on sector boundaries */
690a43a1b5SJeff Cody     if (log->read % VHDX_LOG_SECTOR_SIZE) {
700a43a1b5SJeff Cody         ret = -EFAULT;
710a43a1b5SJeff Cody         goto exit;
720a43a1b5SJeff Cody     }
730a43a1b5SJeff Cody 
740a43a1b5SJeff Cody     read = log->read;
750a43a1b5SJeff Cody     /* we are guaranteed that a) log sectors are 4096 bytes,
760a43a1b5SJeff Cody      * and b) the log length is a multiple of 1MB. So, there
770a43a1b5SJeff Cody      * is always a round number of sectors in the buffer */
780a43a1b5SJeff Cody     if ((read + sizeof(VHDXLogEntryHeader)) > log->length) {
790a43a1b5SJeff Cody         read = 0;
800a43a1b5SJeff Cody     }
810a43a1b5SJeff Cody 
820a43a1b5SJeff Cody     if (read == log->write) {
830a43a1b5SJeff Cody         ret = -EINVAL;
840a43a1b5SJeff Cody         goto exit;
850a43a1b5SJeff Cody     }
860a43a1b5SJeff Cody 
870a43a1b5SJeff Cody     offset = log->offset + read;
880a43a1b5SJeff Cody 
8932cc71deSAlberto Faria     ret = bdrv_pread(bs->file, offset, sizeof(VHDXLogEntryHeader), hdr, 0);
900a43a1b5SJeff Cody     if (ret < 0) {
910a43a1b5SJeff Cody         goto exit;
920a43a1b5SJeff Cody     }
934f75b52aSJeff Cody     vhdx_log_entry_hdr_le_import(hdr);
940a43a1b5SJeff Cody 
950a43a1b5SJeff Cody exit:
960a43a1b5SJeff Cody     return ret;
970a43a1b5SJeff Cody }
980a43a1b5SJeff Cody 
990a43a1b5SJeff Cody /* Index increment for log, based on sector boundaries */
vhdx_log_inc_idx(uint32_t idx,uint64_t length)1000a43a1b5SJeff Cody static int vhdx_log_inc_idx(uint32_t idx, uint64_t length)
1010a43a1b5SJeff Cody {
1020a43a1b5SJeff Cody     idx += VHDX_LOG_SECTOR_SIZE;
1030a43a1b5SJeff Cody     /* we are guaranteed that a) log sectors are 4096 bytes,
1040a43a1b5SJeff Cody      * and b) the log length is a multiple of 1MB. So, there
1050a43a1b5SJeff Cody      * is always a round number of sectors in the buffer */
1060a43a1b5SJeff Cody     return idx >= length ? 0 : idx;
1070a43a1b5SJeff Cody }
1080a43a1b5SJeff Cody 
1090a43a1b5SJeff Cody 
1100a43a1b5SJeff Cody /* Reset the log to empty */
vhdx_log_reset(BlockDriverState * bs,BDRVVHDXState * s)11165ff757dSKevin Wolf static void GRAPH_RDLOCK vhdx_log_reset(BlockDriverState *bs, BDRVVHDXState *s)
1120a43a1b5SJeff Cody {
1130a43a1b5SJeff Cody     MSGUID guid = { 0 };
1140a43a1b5SJeff Cody     s->log.read = s->log.write = 0;
1150a43a1b5SJeff Cody     /* a log guid of 0 indicates an empty log to any parser of v0
1160a43a1b5SJeff Cody      * VHDX logs */
1170a43a1b5SJeff Cody     vhdx_update_headers(bs, s, false, &guid);
1180a43a1b5SJeff Cody }
1190a43a1b5SJeff Cody 
1200a43a1b5SJeff Cody /* Reads num_sectors from the log (all log sectors are 4096 bytes),
1210a43a1b5SJeff Cody  * into buffer 'buffer'.  Upon return, *sectors_read will contain
1220a43a1b5SJeff Cody  * the number of sectors successfully read.
1230a43a1b5SJeff Cody  *
1240a43a1b5SJeff Cody  * It is assumed that 'buffer' is already allocated, and of sufficient
1250a43a1b5SJeff Cody  * size (i.e. >= 4096*num_sectors).
1260a43a1b5SJeff Cody  *
1270a43a1b5SJeff Cody  * If 'peek' is true, then the tail (read) pointer for the circular buffer is
1280a43a1b5SJeff Cody  * not modified.
1290a43a1b5SJeff Cody  *
1300a43a1b5SJeff Cody  * 0 is returned on success, -errno otherwise.  */
13165ff757dSKevin Wolf static int GRAPH_RDLOCK
vhdx_log_read_sectors(BlockDriverState * bs,VHDXLogEntries * log,uint32_t * sectors_read,void * buffer,uint32_t num_sectors,bool peek)13265ff757dSKevin Wolf vhdx_log_read_sectors(BlockDriverState *bs, VHDXLogEntries *log,
1330a43a1b5SJeff Cody                       uint32_t *sectors_read, void *buffer,
1340a43a1b5SJeff Cody                       uint32_t num_sectors, bool peek)
1350a43a1b5SJeff Cody {
1360a43a1b5SJeff Cody     int ret = 0;
1370a43a1b5SJeff Cody     uint64_t offset;
1380a43a1b5SJeff Cody     uint32_t read;
1390a43a1b5SJeff Cody 
1400a43a1b5SJeff Cody     read = log->read;
1410a43a1b5SJeff Cody 
1420a43a1b5SJeff Cody     *sectors_read = 0;
1430a43a1b5SJeff Cody     while (num_sectors) {
1440a43a1b5SJeff Cody         if (read == log->write) {
1450a43a1b5SJeff Cody             /* empty */
1460a43a1b5SJeff Cody             break;
1470a43a1b5SJeff Cody         }
1480a43a1b5SJeff Cody         offset = log->offset + read;
1490a43a1b5SJeff Cody 
15032cc71deSAlberto Faria         ret = bdrv_pread(bs->file, offset, VHDX_LOG_SECTOR_SIZE, buffer, 0);
1510a43a1b5SJeff Cody         if (ret < 0) {
1520a43a1b5SJeff Cody             goto exit;
1530a43a1b5SJeff Cody         }
1540a43a1b5SJeff Cody         read = vhdx_log_inc_idx(read, log->length);
1550a43a1b5SJeff Cody 
1560a43a1b5SJeff Cody         *sectors_read = *sectors_read + 1;
1570a43a1b5SJeff Cody         num_sectors--;
1580a43a1b5SJeff Cody     }
1590a43a1b5SJeff Cody 
1600a43a1b5SJeff Cody exit:
1610a43a1b5SJeff Cody     if (!peek) {
1620a43a1b5SJeff Cody         log->read = read;
1630a43a1b5SJeff Cody     }
1640a43a1b5SJeff Cody     return ret;
1650a43a1b5SJeff Cody }
1660a43a1b5SJeff Cody 
1678adc5233SJeff Cody /* Writes num_sectors to the log (all log sectors are 4096 bytes),
1688adc5233SJeff Cody  * from buffer 'buffer'.  Upon return, *sectors_written will contain
1698adc5233SJeff Cody  * the number of sectors successfully written.
1708adc5233SJeff Cody  *
1718adc5233SJeff Cody  * It is assumed that 'buffer' is at least 4096*num_sectors large.
1728adc5233SJeff Cody  *
1738adc5233SJeff Cody  * 0 is returned on success, -errno otherwise */
174f6b08994SPaolo Bonzini static int coroutine_fn GRAPH_RDLOCK
vhdx_log_write_sectors(BlockDriverState * bs,VHDXLogEntries * log,uint32_t * sectors_written,void * buffer,uint32_t num_sectors)175f6b08994SPaolo Bonzini vhdx_log_write_sectors(BlockDriverState *bs, VHDXLogEntries *log,
1768adc5233SJeff Cody                        uint32_t *sectors_written, void *buffer,
1778adc5233SJeff Cody                        uint32_t num_sectors)
1788adc5233SJeff Cody {
1798adc5233SJeff Cody     int ret = 0;
1808adc5233SJeff Cody     uint64_t offset;
1818adc5233SJeff Cody     uint32_t write;
1828adc5233SJeff Cody     void *buffer_tmp;
1838adc5233SJeff Cody     BDRVVHDXState *s = bs->opaque;
1848adc5233SJeff Cody 
1858adc5233SJeff Cody     ret = vhdx_user_visible_write(bs, s);
1868adc5233SJeff Cody     if (ret < 0) {
1878adc5233SJeff Cody         goto exit;
1888adc5233SJeff Cody     }
1898adc5233SJeff Cody 
1908adc5233SJeff Cody     write = log->write;
1918adc5233SJeff Cody 
1928adc5233SJeff Cody     buffer_tmp = buffer;
1938adc5233SJeff Cody     while (num_sectors) {
1948adc5233SJeff Cody 
1958adc5233SJeff Cody         offset = log->offset + write;
1968adc5233SJeff Cody         write = vhdx_log_inc_idx(write, log->length);
1978adc5233SJeff Cody         if (write == log->read) {
1988adc5233SJeff Cody             /* full */
1998adc5233SJeff Cody             break;
2008adc5233SJeff Cody         }
201f6b08994SPaolo Bonzini         ret = bdrv_co_pwrite(bs->file, offset, VHDX_LOG_SECTOR_SIZE, buffer_tmp, 0);
2028adc5233SJeff Cody         if (ret < 0) {
2038adc5233SJeff Cody             goto exit;
2048adc5233SJeff Cody         }
2058adc5233SJeff Cody         buffer_tmp += VHDX_LOG_SECTOR_SIZE;
2068adc5233SJeff Cody 
2078adc5233SJeff Cody         log->write = write;
2088adc5233SJeff Cody         *sectors_written = *sectors_written + 1;
2098adc5233SJeff Cody         num_sectors--;
2108adc5233SJeff Cody     }
2118adc5233SJeff Cody 
2128adc5233SJeff Cody exit:
2138adc5233SJeff Cody     return ret;
2148adc5233SJeff Cody }
2158adc5233SJeff Cody 
2168adc5233SJeff Cody 
2170a43a1b5SJeff Cody /* Validates a log entry header */
vhdx_log_hdr_is_valid(VHDXLogEntries * log,VHDXLogEntryHeader * hdr,BDRVVHDXState * s)2180a43a1b5SJeff Cody static bool vhdx_log_hdr_is_valid(VHDXLogEntries *log, VHDXLogEntryHeader *hdr,
2190a43a1b5SJeff Cody                                   BDRVVHDXState *s)
2200a43a1b5SJeff Cody {
2210a43a1b5SJeff Cody     int valid = false;
2220a43a1b5SJeff Cody 
2234f75b52aSJeff Cody     if (hdr->signature != VHDX_LOG_SIGNATURE) {
2240a43a1b5SJeff Cody         goto exit;
2250a43a1b5SJeff Cody     }
2260a43a1b5SJeff Cody 
2270a43a1b5SJeff Cody     /* if the individual entry length is larger than the whole log
2280a43a1b5SJeff Cody      * buffer, that is obviously invalid */
2290a43a1b5SJeff Cody     if (log->length < hdr->entry_length) {
2300a43a1b5SJeff Cody         goto exit;
2310a43a1b5SJeff Cody     }
2320a43a1b5SJeff Cody 
2330a43a1b5SJeff Cody     /* length of entire entry must be in units of 4KB (log sector size) */
2340a43a1b5SJeff Cody     if (hdr->entry_length % (VHDX_LOG_SECTOR_SIZE)) {
2350a43a1b5SJeff Cody         goto exit;
2360a43a1b5SJeff Cody     }
2370a43a1b5SJeff Cody 
2380a43a1b5SJeff Cody     /* per spec, sequence # must be > 0 */
2390a43a1b5SJeff Cody     if (hdr->sequence_number == 0) {
2400a43a1b5SJeff Cody         goto exit;
2410a43a1b5SJeff Cody     }
2420a43a1b5SJeff Cody 
2430a43a1b5SJeff Cody     /* log entries are only valid if they match the file-wide log guid
2440a43a1b5SJeff Cody      * found in the active header */
2450a43a1b5SJeff Cody     if (!guid_eq(hdr->log_guid, s->headers[s->curr_header]->log_guid)) {
2460a43a1b5SJeff Cody         goto exit;
2470a43a1b5SJeff Cody     }
2480a43a1b5SJeff Cody 
2490a43a1b5SJeff Cody     if (hdr->descriptor_count * sizeof(VHDXLogDescriptor) > hdr->entry_length) {
2500a43a1b5SJeff Cody         goto exit;
2510a43a1b5SJeff Cody     }
2520a43a1b5SJeff Cody 
2530a43a1b5SJeff Cody     valid = true;
2540a43a1b5SJeff Cody 
2550a43a1b5SJeff Cody exit:
2560a43a1b5SJeff Cody     return valid;
2570a43a1b5SJeff Cody }
2580a43a1b5SJeff Cody 
2590a43a1b5SJeff Cody /*
2600a43a1b5SJeff Cody  * Given a log header, this will validate that the descriptors and the
2610a43a1b5SJeff Cody  * corresponding data sectors (if applicable)
2620a43a1b5SJeff Cody  *
2630a43a1b5SJeff Cody  * Validation consists of:
2640a43a1b5SJeff Cody  *      1. Making sure the sequence numbers matches the entry header
2650a43a1b5SJeff Cody  *      2. Verifying a valid signature ('zero' or 'desc' for descriptors)
2660a43a1b5SJeff Cody  *      3. File offset field is a multiple of 4KB
2670a43a1b5SJeff Cody  *      4. If a data descriptor, the corresponding data sector
2680a43a1b5SJeff Cody  *         has its signature ('data') and matching sequence number
2690a43a1b5SJeff Cody  *
2700a43a1b5SJeff Cody  * @desc: the data buffer containing the descriptor
2710a43a1b5SJeff Cody  * @hdr:  the log entry header
2720a43a1b5SJeff Cody  *
2730a43a1b5SJeff Cody  * Returns true if valid
2740a43a1b5SJeff Cody  */
vhdx_log_desc_is_valid(VHDXLogDescriptor * desc,VHDXLogEntryHeader * hdr)2750a43a1b5SJeff Cody static bool vhdx_log_desc_is_valid(VHDXLogDescriptor *desc,
2760a43a1b5SJeff Cody                                    VHDXLogEntryHeader *hdr)
2770a43a1b5SJeff Cody {
2780a43a1b5SJeff Cody     bool ret = false;
2790a43a1b5SJeff Cody 
2800a43a1b5SJeff Cody     if (desc->sequence_number != hdr->sequence_number) {
2810a43a1b5SJeff Cody         goto exit;
2820a43a1b5SJeff Cody     }
2830a43a1b5SJeff Cody     if (desc->file_offset % VHDX_LOG_SECTOR_SIZE) {
2840a43a1b5SJeff Cody         goto exit;
2850a43a1b5SJeff Cody     }
2860a43a1b5SJeff Cody 
2874f75b52aSJeff Cody     if (desc->signature == VHDX_LOG_ZERO_SIGNATURE) {
2880a43a1b5SJeff Cody         if (desc->zero_length % VHDX_LOG_SECTOR_SIZE == 0) {
2890a43a1b5SJeff Cody             /* valid */
2900a43a1b5SJeff Cody             ret = true;
2910a43a1b5SJeff Cody         }
2924f75b52aSJeff Cody     } else if (desc->signature == VHDX_LOG_DESC_SIGNATURE) {
2930a43a1b5SJeff Cody             /* valid */
2940a43a1b5SJeff Cody             ret = true;
2950a43a1b5SJeff Cody     }
2960a43a1b5SJeff Cody 
2970a43a1b5SJeff Cody exit:
2980a43a1b5SJeff Cody     return ret;
2990a43a1b5SJeff Cody }
3000a43a1b5SJeff Cody 
3010a43a1b5SJeff Cody 
3020a43a1b5SJeff Cody /* Prior to sector data for a log entry, there is the header
3030a43a1b5SJeff Cody  * and the descriptors referenced in the header:
3040a43a1b5SJeff Cody  *
3050a43a1b5SJeff Cody  * [] = 4KB sector
3060a43a1b5SJeff Cody  *
3070a43a1b5SJeff Cody  * [ hdr, desc ][   desc   ][ ... ][ data ][ ... ]
3080a43a1b5SJeff Cody  *
3090a43a1b5SJeff Cody  * The first sector in a log entry has a 64 byte header, and
3100a43a1b5SJeff Cody  * up to 126 32-byte descriptors.  If more descriptors than
3110a43a1b5SJeff Cody  * 126 are required, then subsequent sectors can have up to 128
3120a43a1b5SJeff Cody  * descriptors.  Each sector is 4KB.  Data follows the descriptor
3130a43a1b5SJeff Cody  * sectors.
3140a43a1b5SJeff Cody  *
3150a43a1b5SJeff Cody  * This will return the number of sectors needed to encompass
3160a43a1b5SJeff Cody  * the passed number of descriptors in desc_cnt.
3170a43a1b5SJeff Cody  *
3180a43a1b5SJeff Cody  * This will never return 0, even if desc_cnt is 0.
3190a43a1b5SJeff Cody  */
vhdx_compute_desc_sectors(uint32_t desc_cnt)3200a43a1b5SJeff Cody static int vhdx_compute_desc_sectors(uint32_t desc_cnt)
3210a43a1b5SJeff Cody {
3220a43a1b5SJeff Cody     uint32_t desc_sectors;
3230a43a1b5SJeff Cody 
3240a43a1b5SJeff Cody     desc_cnt += 2; /* account for header in first sector */
3250a43a1b5SJeff Cody     desc_sectors = desc_cnt / 128;
3260a43a1b5SJeff Cody     if (desc_cnt % 128) {
3270a43a1b5SJeff Cody         desc_sectors++;
3280a43a1b5SJeff Cody     }
3290a43a1b5SJeff Cody 
3300a43a1b5SJeff Cody     return desc_sectors;
3310a43a1b5SJeff Cody }
3320a43a1b5SJeff Cody 
3330a43a1b5SJeff Cody 
3340a43a1b5SJeff Cody /* Reads the log header, and subsequent descriptors (if any).  This
3350a43a1b5SJeff Cody  * will allocate all the space for buffer, which must be NULL when
3360a43a1b5SJeff Cody  * passed into this function. Each descriptor will also be validated,
3370a43a1b5SJeff Cody  * and error returned if any are invalid. */
33865ff757dSKevin Wolf static int GRAPH_RDLOCK
vhdx_log_read_desc(BlockDriverState * bs,BDRVVHDXState * s,VHDXLogEntries * log,VHDXLogDescEntries ** buffer,bool convert_endian)33965ff757dSKevin Wolf vhdx_log_read_desc(BlockDriverState *bs, BDRVVHDXState *s, VHDXLogEntries *log,
34065ff757dSKevin Wolf                    VHDXLogDescEntries **buffer, bool convert_endian)
3410a43a1b5SJeff Cody {
3420a43a1b5SJeff Cody     int ret = 0;
3430a43a1b5SJeff Cody     uint32_t desc_sectors;
3440a43a1b5SJeff Cody     uint32_t sectors_read;
3450a43a1b5SJeff Cody     VHDXLogEntryHeader hdr;
3460a43a1b5SJeff Cody     VHDXLogDescEntries *desc_entries = NULL;
3474f75b52aSJeff Cody     VHDXLogDescriptor desc;
3480a43a1b5SJeff Cody     int i;
3490a43a1b5SJeff Cody 
3500a43a1b5SJeff Cody     assert(*buffer == NULL);
3510a43a1b5SJeff Cody 
3520a43a1b5SJeff Cody     ret = vhdx_log_peek_hdr(bs, log, &hdr);
3530a43a1b5SJeff Cody     if (ret < 0) {
3540a43a1b5SJeff Cody         goto exit;
3550a43a1b5SJeff Cody     }
3564f75b52aSJeff Cody 
3570a43a1b5SJeff Cody     if (vhdx_log_hdr_is_valid(log, &hdr, s) == false) {
3580a43a1b5SJeff Cody         ret = -EINVAL;
3590a43a1b5SJeff Cody         goto exit;
3600a43a1b5SJeff Cody     }
3610a43a1b5SJeff Cody 
3620a43a1b5SJeff Cody     desc_sectors = vhdx_compute_desc_sectors(hdr.descriptor_count);
3639a4f4c31SKevin Wolf     desc_entries = qemu_try_blockalign(bs->file->bs,
364a67e128aSKevin Wolf                                        desc_sectors * VHDX_LOG_SECTOR_SIZE);
365a67e128aSKevin Wolf     if (desc_entries == NULL) {
366a67e128aSKevin Wolf         ret = -ENOMEM;
367a67e128aSKevin Wolf         goto exit;
368a67e128aSKevin Wolf     }
3690a43a1b5SJeff Cody 
3700a43a1b5SJeff Cody     ret = vhdx_log_read_sectors(bs, log, &sectors_read, desc_entries,
3710a43a1b5SJeff Cody                                 desc_sectors, false);
3720a43a1b5SJeff Cody     if (ret < 0) {
3730a43a1b5SJeff Cody         goto free_and_exit;
3740a43a1b5SJeff Cody     }
3750a43a1b5SJeff Cody     if (sectors_read != desc_sectors) {
3760a43a1b5SJeff Cody         ret = -EINVAL;
3770a43a1b5SJeff Cody         goto free_and_exit;
3780a43a1b5SJeff Cody     }
3790a43a1b5SJeff Cody 
3800a43a1b5SJeff Cody     /* put in proper endianness, and validate each desc */
3810a43a1b5SJeff Cody     for (i = 0; i < hdr.descriptor_count; i++) {
3824f75b52aSJeff Cody         desc = desc_entries->desc[i];
3834f75b52aSJeff Cody         vhdx_log_desc_le_import(&desc);
3844f75b52aSJeff Cody         if (convert_endian) {
3854f75b52aSJeff Cody             desc_entries->desc[i] = desc;
3864f75b52aSJeff Cody         }
3874f75b52aSJeff Cody         if (vhdx_log_desc_is_valid(&desc, &hdr) == false) {
3880a43a1b5SJeff Cody             ret = -EINVAL;
3890a43a1b5SJeff Cody             goto free_and_exit;
3900a43a1b5SJeff Cody         }
3910a43a1b5SJeff Cody     }
3924f75b52aSJeff Cody     if (convert_endian) {
3934f75b52aSJeff Cody         desc_entries->hdr = hdr;
3944f75b52aSJeff Cody     }
3950a43a1b5SJeff Cody 
3960a43a1b5SJeff Cody     *buffer = desc_entries;
3970a43a1b5SJeff Cody     goto exit;
3980a43a1b5SJeff Cody 
3990a43a1b5SJeff Cody free_and_exit:
4000a43a1b5SJeff Cody     qemu_vfree(desc_entries);
4010a43a1b5SJeff Cody exit:
4020a43a1b5SJeff Cody     return ret;
4030a43a1b5SJeff Cody }
4040a43a1b5SJeff Cody 
4050a43a1b5SJeff Cody 
4060a43a1b5SJeff Cody /* Flushes the descriptor described by desc to the VHDX image file.
4070a43a1b5SJeff Cody  * If the descriptor is a data descriptor, than 'data' must be non-NULL,
4080a43a1b5SJeff Cody  * and >= 4096 bytes (VHDX_LOG_SECTOR_SIZE), containing the data to be
4090a43a1b5SJeff Cody  * written.
4100a43a1b5SJeff Cody  *
4110a43a1b5SJeff Cody  * Verification is performed to make sure the sequence numbers of a data
4120a43a1b5SJeff Cody  * descriptor match the sequence number in the desc.
4130a43a1b5SJeff Cody  *
4140a43a1b5SJeff Cody  * For a zero descriptor, it may describe multiple sectors to fill with zeroes.
4150a43a1b5SJeff Cody  * In this case, it should be noted that zeroes are written to disk, and the
4160a43a1b5SJeff Cody  * image file is not extended as a sparse file.  */
41765ff757dSKevin Wolf static int GRAPH_RDLOCK
vhdx_log_flush_desc(BlockDriverState * bs,VHDXLogDescriptor * desc,VHDXLogDataSector * data)41865ff757dSKevin Wolf vhdx_log_flush_desc(BlockDriverState *bs, VHDXLogDescriptor *desc,
4190a43a1b5SJeff Cody                     VHDXLogDataSector *data)
4200a43a1b5SJeff Cody {
4210a43a1b5SJeff Cody     int ret = 0;
4220a43a1b5SJeff Cody     uint64_t seq, file_offset;
4230a43a1b5SJeff Cody     uint32_t offset = 0;
4240a43a1b5SJeff Cody     void *buffer = NULL;
4250a43a1b5SJeff Cody     uint64_t count = 1;
4260a43a1b5SJeff Cody     int i;
4270a43a1b5SJeff Cody 
4280a43a1b5SJeff Cody     buffer = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE);
4290a43a1b5SJeff Cody 
4304f75b52aSJeff Cody     if (desc->signature == VHDX_LOG_DESC_SIGNATURE) {
4310a43a1b5SJeff Cody         /* data sector */
4320a43a1b5SJeff Cody         if (data == NULL) {
4330a43a1b5SJeff Cody             ret = -EFAULT;
4340a43a1b5SJeff Cody             goto exit;
4350a43a1b5SJeff Cody         }
4360a43a1b5SJeff Cody 
4370a43a1b5SJeff Cody         /* The sequence number of the data sector must match that
4380a43a1b5SJeff Cody          * in the descriptor */
4390a43a1b5SJeff Cody         seq = data->sequence_high;
4400a43a1b5SJeff Cody         seq <<= 32;
4410a43a1b5SJeff Cody         seq |= data->sequence_low & 0xffffffff;
4420a43a1b5SJeff Cody 
4430a43a1b5SJeff Cody         if (seq != desc->sequence_number) {
4440a43a1b5SJeff Cody             ret = -EINVAL;
4450a43a1b5SJeff Cody             goto exit;
4460a43a1b5SJeff Cody         }
4470a43a1b5SJeff Cody 
4480a43a1b5SJeff Cody         /* Each data sector is in total 4096 bytes, however the first
4490a43a1b5SJeff Cody          * 8 bytes, and last 4 bytes, are located in the descriptor */
4500a43a1b5SJeff Cody         memcpy(buffer, &desc->leading_bytes, 8);
4510a43a1b5SJeff Cody         offset += 8;
4520a43a1b5SJeff Cody 
4530a43a1b5SJeff Cody         memcpy(buffer+offset, data->data, 4084);
4540a43a1b5SJeff Cody         offset += 4084;
4550a43a1b5SJeff Cody 
4560a43a1b5SJeff Cody         memcpy(buffer+offset, &desc->trailing_bytes, 4);
4570a43a1b5SJeff Cody 
4584f75b52aSJeff Cody     } else if (desc->signature == VHDX_LOG_ZERO_SIGNATURE) {
4590a43a1b5SJeff Cody         /* write 'count' sectors of sector */
4600a43a1b5SJeff Cody         memset(buffer, 0, VHDX_LOG_SECTOR_SIZE);
4610a43a1b5SJeff Cody         count = desc->zero_length / VHDX_LOG_SECTOR_SIZE;
462349592e0SJeff Cody     } else {
463349592e0SJeff Cody         error_report("Invalid VHDX log descriptor entry signature 0x%" PRIx32,
464349592e0SJeff Cody                       desc->signature);
465349592e0SJeff Cody         ret = -EINVAL;
466349592e0SJeff Cody         goto exit;
4670a43a1b5SJeff Cody     }
4680a43a1b5SJeff Cody 
4690a43a1b5SJeff Cody     file_offset = desc->file_offset;
4700a43a1b5SJeff Cody 
4710a43a1b5SJeff Cody     /* count is only > 1 if we are writing zeroes */
4720a43a1b5SJeff Cody     for (i = 0; i < count; i++) {
47332cc71deSAlberto Faria         ret = bdrv_pwrite_sync(bs->file, file_offset, VHDX_LOG_SECTOR_SIZE,
47432cc71deSAlberto Faria                                buffer, 0);
4750a43a1b5SJeff Cody         if (ret < 0) {
4760a43a1b5SJeff Cody             goto exit;
4770a43a1b5SJeff Cody         }
4780a43a1b5SJeff Cody         file_offset += VHDX_LOG_SECTOR_SIZE;
4790a43a1b5SJeff Cody     }
4800a43a1b5SJeff Cody 
4810a43a1b5SJeff Cody exit:
4820a43a1b5SJeff Cody     qemu_vfree(buffer);
4830a43a1b5SJeff Cody     return ret;
4840a43a1b5SJeff Cody }
4850a43a1b5SJeff Cody 
4860a43a1b5SJeff Cody /* Flush the entire log (as described by 'logs') to the VHDX image
4870a43a1b5SJeff Cody  * file, and then set the log to 'empty' status once complete.
4880a43a1b5SJeff Cody  *
4890a43a1b5SJeff Cody  * The log entries should be validate prior to flushing */
49065ff757dSKevin Wolf static int GRAPH_RDLOCK
vhdx_log_flush(BlockDriverState * bs,BDRVVHDXState * s,VHDXLogSequence * logs)49165ff757dSKevin Wolf vhdx_log_flush(BlockDriverState *bs, BDRVVHDXState *s, VHDXLogSequence *logs)
4920a43a1b5SJeff Cody {
4930a43a1b5SJeff Cody     int ret = 0;
4940a43a1b5SJeff Cody     int i;
4950a43a1b5SJeff Cody     uint32_t cnt, sectors_read;
4960a43a1b5SJeff Cody     uint64_t new_file_size;
4970a43a1b5SJeff Cody     void *data = NULL;
4983f910692SJeff Cody     int64_t file_length;
4990a43a1b5SJeff Cody     VHDXLogDescEntries *desc_entries = NULL;
5000a43a1b5SJeff Cody     VHDXLogEntryHeader hdr_tmp = { 0 };
5010a43a1b5SJeff Cody 
5020a43a1b5SJeff Cody     cnt = logs->count;
5030a43a1b5SJeff Cody 
5040a43a1b5SJeff Cody     data = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE);
5050a43a1b5SJeff Cody 
5060a43a1b5SJeff Cody     ret = vhdx_user_visible_write(bs, s);
5070a43a1b5SJeff Cody     if (ret < 0) {
5080a43a1b5SJeff Cody         goto exit;
5090a43a1b5SJeff Cody     }
5100a43a1b5SJeff Cody 
5110a43a1b5SJeff Cody     /* each iteration represents one log sequence, which may span multiple
5120a43a1b5SJeff Cody      * sectors */
5130a43a1b5SJeff Cody     while (cnt--) {
5140a43a1b5SJeff Cody         ret = vhdx_log_peek_hdr(bs, &logs->log, &hdr_tmp);
5150a43a1b5SJeff Cody         if (ret < 0) {
5160a43a1b5SJeff Cody             goto exit;
5170a43a1b5SJeff Cody         }
5183f910692SJeff Cody         file_length = bdrv_getlength(bs->file->bs);
5193f910692SJeff Cody         if (file_length < 0) {
5203f910692SJeff Cody             ret = file_length;
5213f910692SJeff Cody             goto exit;
5223f910692SJeff Cody         }
5230a43a1b5SJeff Cody         /* if the log shows a FlushedFileOffset larger than our current file
5240a43a1b5SJeff Cody          * size, then that means the file has been truncated / corrupted, and
5250a43a1b5SJeff Cody          * we must refused to open it / use it */
5263f910692SJeff Cody         if (hdr_tmp.flushed_file_offset > file_length) {
5270a43a1b5SJeff Cody             ret = -EINVAL;
5280a43a1b5SJeff Cody             goto exit;
5290a43a1b5SJeff Cody         }
5300a43a1b5SJeff Cody 
5314f75b52aSJeff Cody         ret = vhdx_log_read_desc(bs, s, &logs->log, &desc_entries, true);
5320a43a1b5SJeff Cody         if (ret < 0) {
5330a43a1b5SJeff Cody             goto exit;
5340a43a1b5SJeff Cody         }
5350a43a1b5SJeff Cody 
5360a43a1b5SJeff Cody         for (i = 0; i < desc_entries->hdr.descriptor_count; i++) {
5374f75b52aSJeff Cody             if (desc_entries->desc[i].signature == VHDX_LOG_DESC_SIGNATURE) {
5380a43a1b5SJeff Cody                 /* data sector, so read a sector to flush */
5390a43a1b5SJeff Cody                 ret = vhdx_log_read_sectors(bs, &logs->log, &sectors_read,
5400a43a1b5SJeff Cody                                             data, 1, false);
5410a43a1b5SJeff Cody                 if (ret < 0) {
5420a43a1b5SJeff Cody                     goto exit;
5430a43a1b5SJeff Cody                 }
5440a43a1b5SJeff Cody                 if (sectors_read != 1) {
5450a43a1b5SJeff Cody                     ret = -EINVAL;
5460a43a1b5SJeff Cody                     goto exit;
5470a43a1b5SJeff Cody                 }
5484f75b52aSJeff Cody                 vhdx_log_data_le_import(data);
5490a43a1b5SJeff Cody             }
5500a43a1b5SJeff Cody 
5510a43a1b5SJeff Cody             ret = vhdx_log_flush_desc(bs, &desc_entries->desc[i], data);
5520a43a1b5SJeff Cody             if (ret < 0) {
5530a43a1b5SJeff Cody                 goto exit;
5540a43a1b5SJeff Cody             }
5550a43a1b5SJeff Cody         }
5563f910692SJeff Cody         if (file_length < desc_entries->hdr.last_file_offset) {
5570a43a1b5SJeff Cody             new_file_size = desc_entries->hdr.last_file_offset;
5580cb98af2SStefano Garzarella             if (new_file_size % (1 * MiB)) {
5590a43a1b5SJeff Cody                 /* round up to nearest 1MB boundary */
56027539ac5SJeff Cody                 new_file_size = QEMU_ALIGN_UP(new_file_size, MiB);
56127539ac5SJeff Cody                 if (new_file_size > INT64_MAX) {
56227539ac5SJeff Cody                     ret = -EINVAL;
56327539ac5SJeff Cody                     goto exit;
56427539ac5SJeff Cody                 }
565c80d8b06SMax Reitz                 ret = bdrv_truncate(bs->file, new_file_size, false,
5667b8e4857SKevin Wolf                                     PREALLOC_MODE_OFF, 0, NULL);
56795d72983SJeff Cody                 if (ret < 0) {
56895d72983SJeff Cody                     goto exit;
56995d72983SJeff Cody                 }
5700a43a1b5SJeff Cody             }
5710a43a1b5SJeff Cody         }
5720a43a1b5SJeff Cody         qemu_vfree(desc_entries);
5730a43a1b5SJeff Cody         desc_entries = NULL;
5740a43a1b5SJeff Cody     }
5750a43a1b5SJeff Cody 
576c6572fa0SJeff Cody     ret = bdrv_flush(bs);
577c6572fa0SJeff Cody     if (ret < 0) {
578c6572fa0SJeff Cody         goto exit;
579c6572fa0SJeff Cody     }
5800a43a1b5SJeff Cody     /* once the log is fully flushed, indicate that we have an empty log
5810a43a1b5SJeff Cody      * now.  This also sets the log guid to 0, to indicate an empty log */
5820a43a1b5SJeff Cody     vhdx_log_reset(bs, s);
5830a43a1b5SJeff Cody 
5840a43a1b5SJeff Cody exit:
5850a43a1b5SJeff Cody     qemu_vfree(data);
5860a43a1b5SJeff Cody     qemu_vfree(desc_entries);
5870a43a1b5SJeff Cody     return ret;
5880a43a1b5SJeff Cody }
5890a43a1b5SJeff Cody 
59065ff757dSKevin Wolf static int GRAPH_RDLOCK
vhdx_validate_log_entry(BlockDriverState * bs,BDRVVHDXState * s,VHDXLogEntries * log,uint64_t seq,bool * valid,VHDXLogEntryHeader * entry)59165ff757dSKevin Wolf vhdx_validate_log_entry(BlockDriverState *bs, BDRVVHDXState *s,
5920a43a1b5SJeff Cody                         VHDXLogEntries *log, uint64_t seq,
5930a43a1b5SJeff Cody                         bool *valid, VHDXLogEntryHeader *entry)
5940a43a1b5SJeff Cody {
5950a43a1b5SJeff Cody     int ret = 0;
5960a43a1b5SJeff Cody     VHDXLogEntryHeader hdr;
5970a43a1b5SJeff Cody     void *buffer = NULL;
5980a43a1b5SJeff Cody     uint32_t i, desc_sectors, total_sectors, crc;
5990a43a1b5SJeff Cody     uint32_t sectors_read = 0;
6000a43a1b5SJeff Cody     VHDXLogDescEntries *desc_buffer = NULL;
6010a43a1b5SJeff Cody 
6020a43a1b5SJeff Cody     *valid = false;
6030a43a1b5SJeff Cody 
6040a43a1b5SJeff Cody     ret = vhdx_log_peek_hdr(bs, log, &hdr);
6050a43a1b5SJeff Cody     if (ret < 0) {
6060a43a1b5SJeff Cody         goto inc_and_exit;
6070a43a1b5SJeff Cody     }
6080a43a1b5SJeff Cody 
6090a43a1b5SJeff Cody     if (vhdx_log_hdr_is_valid(log, &hdr, s) == false) {
6100a43a1b5SJeff Cody         goto inc_and_exit;
6110a43a1b5SJeff Cody     }
6120a43a1b5SJeff Cody 
6130a43a1b5SJeff Cody     if (seq > 0) {
6140a43a1b5SJeff Cody         if (hdr.sequence_number != seq + 1) {
6150a43a1b5SJeff Cody             goto inc_and_exit;
6160a43a1b5SJeff Cody         }
6170a43a1b5SJeff Cody     }
6180a43a1b5SJeff Cody 
6190a43a1b5SJeff Cody     desc_sectors = vhdx_compute_desc_sectors(hdr.descriptor_count);
6200a43a1b5SJeff Cody 
6214f75b52aSJeff Cody     /* Read all log sectors, and calculate log checksum */
6220a43a1b5SJeff Cody 
6230a43a1b5SJeff Cody     total_sectors = hdr.entry_length / VHDX_LOG_SECTOR_SIZE;
6240a43a1b5SJeff Cody 
6250a43a1b5SJeff Cody 
626dc6fb73dSDeepak Kathayat     /* read_desc() will increment the read idx */
6274f75b52aSJeff Cody     ret = vhdx_log_read_desc(bs, s, log, &desc_buffer, false);
6280a43a1b5SJeff Cody     if (ret < 0) {
6290a43a1b5SJeff Cody         goto free_and_exit;
6300a43a1b5SJeff Cody     }
6310a43a1b5SJeff Cody 
6320a43a1b5SJeff Cody     crc = vhdx_checksum_calc(0xffffffff, (void *)desc_buffer,
6330a43a1b5SJeff Cody                             desc_sectors * VHDX_LOG_SECTOR_SIZE, 4);
6340a43a1b5SJeff Cody     crc ^= 0xffffffff;
6350a43a1b5SJeff Cody 
6360a43a1b5SJeff Cody     buffer = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE);
6370a43a1b5SJeff Cody     if (total_sectors > desc_sectors) {
6380a43a1b5SJeff Cody         for (i = 0; i < total_sectors - desc_sectors; i++) {
6390a43a1b5SJeff Cody             sectors_read = 0;
6400a43a1b5SJeff Cody             ret = vhdx_log_read_sectors(bs, log, &sectors_read, buffer,
6410a43a1b5SJeff Cody                                         1, false);
6420a43a1b5SJeff Cody             if (ret < 0 || sectors_read != 1) {
6430a43a1b5SJeff Cody                 goto free_and_exit;
6440a43a1b5SJeff Cody             }
6450a43a1b5SJeff Cody             crc = vhdx_checksum_calc(crc, buffer, VHDX_LOG_SECTOR_SIZE, -1);
6460a43a1b5SJeff Cody             crc ^= 0xffffffff;
6470a43a1b5SJeff Cody         }
6480a43a1b5SJeff Cody     }
6490a43a1b5SJeff Cody     crc ^= 0xffffffff;
6504f75b52aSJeff Cody     if (crc != hdr.checksum) {
6510a43a1b5SJeff Cody         goto free_and_exit;
6520a43a1b5SJeff Cody     }
6530a43a1b5SJeff Cody 
6540a43a1b5SJeff Cody     *valid = true;
6550a43a1b5SJeff Cody     *entry = hdr;
6560a43a1b5SJeff Cody     goto free_and_exit;
6570a43a1b5SJeff Cody 
6580a43a1b5SJeff Cody inc_and_exit:
6590a43a1b5SJeff Cody     log->read = vhdx_log_inc_idx(log->read, log->length);
6600a43a1b5SJeff Cody 
6610a43a1b5SJeff Cody free_and_exit:
6620a43a1b5SJeff Cody     qemu_vfree(buffer);
6630a43a1b5SJeff Cody     qemu_vfree(desc_buffer);
6640a43a1b5SJeff Cody     return ret;
6650a43a1b5SJeff Cody }
6660a43a1b5SJeff Cody 
6670a43a1b5SJeff Cody /* Search through the log circular buffer, and find the valid, active
6680a43a1b5SJeff Cody  * log sequence, if any exists
6690a43a1b5SJeff Cody  * */
67065ff757dSKevin Wolf static int GRAPH_RDLOCK
vhdx_log_search(BlockDriverState * bs,BDRVVHDXState * s,VHDXLogSequence * logs)67165ff757dSKevin Wolf vhdx_log_search(BlockDriverState *bs, BDRVVHDXState *s, VHDXLogSequence *logs)
6720a43a1b5SJeff Cody {
6730a43a1b5SJeff Cody     int ret = 0;
6740a43a1b5SJeff Cody     uint32_t tail;
6750a43a1b5SJeff Cody     bool seq_valid = false;
6760a43a1b5SJeff Cody     VHDXLogSequence candidate = { 0 };
6770a43a1b5SJeff Cody     VHDXLogEntryHeader hdr = { 0 };
6780a43a1b5SJeff Cody     VHDXLogEntries curr_log;
6790a43a1b5SJeff Cody 
6800a43a1b5SJeff Cody     memcpy(&curr_log, &s->log, sizeof(VHDXLogEntries));
6810a43a1b5SJeff Cody     curr_log.write = curr_log.length;   /* assume log is full */
6820a43a1b5SJeff Cody     curr_log.read = 0;
6830a43a1b5SJeff Cody 
6840a43a1b5SJeff Cody 
6850a43a1b5SJeff Cody     /* now we will go through the whole log sector by sector, until
6860a43a1b5SJeff Cody      * we find a valid, active log sequence, or reach the end of the
6870a43a1b5SJeff Cody      * log buffer */
6880a43a1b5SJeff Cody     for (;;) {
6890a43a1b5SJeff Cody         uint64_t curr_seq = 0;
6900a43a1b5SJeff Cody         VHDXLogSequence current = { 0 };
6910a43a1b5SJeff Cody 
6920a43a1b5SJeff Cody         tail = curr_log.read;
6930a43a1b5SJeff Cody 
6940a43a1b5SJeff Cody         ret = vhdx_validate_log_entry(bs, s, &curr_log, curr_seq,
6950a43a1b5SJeff Cody                                       &seq_valid, &hdr);
6960a43a1b5SJeff Cody         if (ret < 0) {
6970a43a1b5SJeff Cody             goto exit;
6980a43a1b5SJeff Cody         }
6990a43a1b5SJeff Cody 
7000a43a1b5SJeff Cody         if (seq_valid) {
7010a43a1b5SJeff Cody             current.valid     = true;
7020a43a1b5SJeff Cody             current.log       = curr_log;
7030a43a1b5SJeff Cody             current.log.read  = tail;
7040a43a1b5SJeff Cody             current.log.write = curr_log.read;
7050a43a1b5SJeff Cody             current.count     = 1;
7060a43a1b5SJeff Cody             current.hdr       = hdr;
7070a43a1b5SJeff Cody 
7080a43a1b5SJeff Cody 
7090a43a1b5SJeff Cody             for (;;) {
7100a43a1b5SJeff Cody                 ret = vhdx_validate_log_entry(bs, s, &curr_log, curr_seq,
7110a43a1b5SJeff Cody                                               &seq_valid, &hdr);
7120a43a1b5SJeff Cody                 if (ret < 0) {
7130a43a1b5SJeff Cody                     goto exit;
7140a43a1b5SJeff Cody                 }
7150a43a1b5SJeff Cody                 if (seq_valid == false) {
7160a43a1b5SJeff Cody                     break;
7170a43a1b5SJeff Cody                 }
7180a43a1b5SJeff Cody                 current.log.write = curr_log.read;
7190a43a1b5SJeff Cody                 current.count++;
7200a43a1b5SJeff Cody 
7210a43a1b5SJeff Cody                 curr_seq = hdr.sequence_number;
7220a43a1b5SJeff Cody             }
7230a43a1b5SJeff Cody         }
7240a43a1b5SJeff Cody 
7250a43a1b5SJeff Cody         if (current.valid) {
7260a43a1b5SJeff Cody             if (candidate.valid == false ||
7270a43a1b5SJeff Cody                 current.hdr.sequence_number > candidate.hdr.sequence_number) {
7280a43a1b5SJeff Cody                 candidate = current;
7290a43a1b5SJeff Cody             }
7300a43a1b5SJeff Cody         }
7310a43a1b5SJeff Cody 
7320a43a1b5SJeff Cody         if (curr_log.read < tail) {
7330a43a1b5SJeff Cody             break;
7340a43a1b5SJeff Cody         }
7350a43a1b5SJeff Cody     }
7360a43a1b5SJeff Cody 
7370a43a1b5SJeff Cody     *logs = candidate;
7380a43a1b5SJeff Cody 
7390a43a1b5SJeff Cody     if (candidate.valid) {
7400a43a1b5SJeff Cody         /* this is the next sequence number, for writes */
7410a43a1b5SJeff Cody         s->log.sequence = candidate.hdr.sequence_number + 1;
7420a43a1b5SJeff Cody     }
7430a43a1b5SJeff Cody 
7440a43a1b5SJeff Cody 
7450a43a1b5SJeff Cody exit:
7460a43a1b5SJeff Cody     return ret;
7470a43a1b5SJeff Cody }
7480a43a1b5SJeff Cody 
7490a43a1b5SJeff Cody /* Parse the replay log.  Per the VHDX spec, if the log is present
7500a43a1b5SJeff Cody  * it must be replayed prior to opening the file, even read-only.
7510a43a1b5SJeff Cody  *
7520a43a1b5SJeff Cody  * If read-only, we must replay the log in RAM (or refuse to open
7530a43a1b5SJeff Cody  * a dirty VHDX file read-only) */
vhdx_parse_log(BlockDriverState * bs,BDRVVHDXState * s,bool * flushed,Error ** errp)7547e30e6a6SJeff Cody int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s, bool *flushed,
7557e30e6a6SJeff Cody                    Error **errp)
7560a43a1b5SJeff Cody {
7570a43a1b5SJeff Cody     int ret = 0;
7580a43a1b5SJeff Cody     VHDXHeader *hdr;
7590a43a1b5SJeff Cody     VHDXLogSequence logs = { 0 };
7600a43a1b5SJeff Cody 
7610a43a1b5SJeff Cody     hdr = s->headers[s->curr_header];
7620a43a1b5SJeff Cody 
7630a43a1b5SJeff Cody     *flushed = false;
7640a43a1b5SJeff Cody 
7650a43a1b5SJeff Cody     /* s->log.hdr is freed in vhdx_close() */
7660a43a1b5SJeff Cody     if (s->log.hdr == NULL) {
7670a43a1b5SJeff Cody         s->log.hdr = qemu_blockalign(bs, sizeof(VHDXLogEntryHeader));
7680a43a1b5SJeff Cody     }
7690a43a1b5SJeff Cody 
7700a43a1b5SJeff Cody     s->log.offset = hdr->log_offset;
7710a43a1b5SJeff Cody     s->log.length = hdr->log_length;
7720a43a1b5SJeff Cody 
7730a43a1b5SJeff Cody     if (s->log.offset < VHDX_LOG_MIN_SIZE ||
7740a43a1b5SJeff Cody         s->log.offset % VHDX_LOG_MIN_SIZE) {
7750a43a1b5SJeff Cody         ret = -EINVAL;
7760a43a1b5SJeff Cody         goto exit;
7770a43a1b5SJeff Cody     }
7780a43a1b5SJeff Cody 
7790a43a1b5SJeff Cody     /* per spec, only log version of 0 is supported */
7800a43a1b5SJeff Cody     if (hdr->log_version != 0) {
7810a43a1b5SJeff Cody         ret = -EINVAL;
7820a43a1b5SJeff Cody         goto exit;
7830a43a1b5SJeff Cody     }
7840a43a1b5SJeff Cody 
7850a43a1b5SJeff Cody     /* If either the log guid, or log length is zero,
7860a43a1b5SJeff Cody      * then a replay log is not present */
7870a43a1b5SJeff Cody     if (guid_eq(hdr->log_guid, zero_guid)) {
7880a43a1b5SJeff Cody         goto exit;
7890a43a1b5SJeff Cody     }
7900a43a1b5SJeff Cody 
7910a43a1b5SJeff Cody     if (hdr->log_length == 0) {
7920a43a1b5SJeff Cody         goto exit;
7930a43a1b5SJeff Cody     }
7940a43a1b5SJeff Cody 
7950a43a1b5SJeff Cody     if (hdr->log_length % VHDX_LOG_MIN_SIZE) {
7960a43a1b5SJeff Cody         ret = -EINVAL;
7970a43a1b5SJeff Cody         goto exit;
7980a43a1b5SJeff Cody     }
7990a43a1b5SJeff Cody 
8000a43a1b5SJeff Cody 
8010a43a1b5SJeff Cody     /* The log is present, we need to find if and where there is an active
8020a43a1b5SJeff Cody      * sequence of valid entries present in the log.  */
8030a43a1b5SJeff Cody 
8040a43a1b5SJeff Cody     ret = vhdx_log_search(bs, s, &logs);
8050a43a1b5SJeff Cody     if (ret < 0) {
8060a43a1b5SJeff Cody         goto exit;
8070a43a1b5SJeff Cody     }
8080a43a1b5SJeff Cody 
8090a43a1b5SJeff Cody     if (logs.valid) {
810307261b2SVladimir Sementsov-Ogievskiy         if (bdrv_is_read_only(bs)) {
811f30c66baSMax Reitz             bdrv_refresh_filename(bs);
8127e30e6a6SJeff Cody             ret = -EPERM;
813bf89e874SMarkus Armbruster             error_setg(errp,
8147e30e6a6SJeff Cody                        "VHDX image file '%s' opened read-only, but "
815bf89e874SMarkus Armbruster                        "contains a log that needs to be replayed",
816bf89e874SMarkus Armbruster                        bs->filename);
817bf89e874SMarkus Armbruster             error_append_hint(errp,  "To replay the log, run:\n"
818bf89e874SMarkus Armbruster                               "qemu-img check -r all '%s'\n",
819bf89e874SMarkus Armbruster                               bs->filename);
8207e30e6a6SJeff Cody             goto exit;
8217e30e6a6SJeff Cody         }
8220a43a1b5SJeff Cody         /* now flush the log */
8230a43a1b5SJeff Cody         ret = vhdx_log_flush(bs, s, &logs);
8240a43a1b5SJeff Cody         if (ret < 0) {
8250a43a1b5SJeff Cody             goto exit;
8260a43a1b5SJeff Cody         }
8270a43a1b5SJeff Cody         *flushed = true;
8280a43a1b5SJeff Cody     }
8290a43a1b5SJeff Cody 
8300a43a1b5SJeff Cody 
8310a43a1b5SJeff Cody exit:
8320a43a1b5SJeff Cody     return ret;
8330a43a1b5SJeff Cody }
8340a43a1b5SJeff Cody 
8350a43a1b5SJeff Cody 
8368adc5233SJeff Cody 
vhdx_log_raw_to_le_sector(VHDXLogDescriptor * desc,VHDXLogDataSector * sector,void * data,uint64_t seq)8378adc5233SJeff Cody static void vhdx_log_raw_to_le_sector(VHDXLogDescriptor *desc,
8388adc5233SJeff Cody                                       VHDXLogDataSector *sector, void *data,
8398adc5233SJeff Cody                                       uint64_t seq)
8408adc5233SJeff Cody {
8418adc5233SJeff Cody     /* 8 + 4084 + 4 = 4096, 1 log sector */
8428adc5233SJeff Cody     memcpy(&desc->leading_bytes, data, 8);
8438adc5233SJeff Cody     data += 8;
8441229e46dSPeter Maydell     desc->leading_bytes = cpu_to_le64(desc->leading_bytes);
8458adc5233SJeff Cody     memcpy(sector->data, data, 4084);
8468adc5233SJeff Cody     data += 4084;
8478adc5233SJeff Cody     memcpy(&desc->trailing_bytes, data, 4);
8481229e46dSPeter Maydell     desc->trailing_bytes = cpu_to_le32(desc->trailing_bytes);
8498adc5233SJeff Cody     data += 4;
8508adc5233SJeff Cody 
8518adc5233SJeff Cody     sector->sequence_high  = (uint32_t) (seq >> 32);
8528adc5233SJeff Cody     sector->sequence_low   = (uint32_t) (seq & 0xffffffff);
8538adc5233SJeff Cody     sector->data_signature = VHDX_LOG_DATA_SIGNATURE;
8548adc5233SJeff Cody 
8558adc5233SJeff Cody     vhdx_log_desc_le_export(desc);
8568adc5233SJeff Cody     vhdx_log_data_le_export(sector);
8578adc5233SJeff Cody }
8588adc5233SJeff Cody 
8598adc5233SJeff Cody 
860f6b08994SPaolo Bonzini static int coroutine_fn GRAPH_RDLOCK
vhdx_log_write(BlockDriverState * bs,BDRVVHDXState * s,void * data,uint32_t length,uint64_t offset)861f6b08994SPaolo Bonzini vhdx_log_write(BlockDriverState *bs, BDRVVHDXState *s,
8628adc5233SJeff Cody                void *data, uint32_t length, uint64_t offset)
8638adc5233SJeff Cody {
8648adc5233SJeff Cody     int ret = 0;
8658adc5233SJeff Cody     void *buffer = NULL;
8668adc5233SJeff Cody     void *merged_sector = NULL;
8678adc5233SJeff Cody     void *data_tmp, *sector_write;
8688adc5233SJeff Cody     unsigned int i;
8698adc5233SJeff Cody     int sector_offset;
8708adc5233SJeff Cody     uint32_t desc_sectors, sectors, total_length;
8718adc5233SJeff Cody     uint32_t sectors_written = 0;
8728adc5233SJeff Cody     uint32_t aligned_length;
8738adc5233SJeff Cody     uint32_t leading_length = 0;
8748adc5233SJeff Cody     uint32_t trailing_length = 0;
8758adc5233SJeff Cody     uint32_t partial_sectors = 0;
8768adc5233SJeff Cody     uint32_t bytes_written = 0;
8778adc5233SJeff Cody     uint64_t file_offset;
8783f910692SJeff Cody     int64_t file_length;
8798adc5233SJeff Cody     VHDXHeader *header;
8808adc5233SJeff Cody     VHDXLogEntryHeader new_hdr;
8818adc5233SJeff Cody     VHDXLogDescriptor *new_desc = NULL;
8828adc5233SJeff Cody     VHDXLogDataSector *data_sector = NULL;
8838adc5233SJeff Cody     MSGUID new_guid = { 0 };
8848adc5233SJeff Cody 
8858adc5233SJeff Cody     header = s->headers[s->curr_header];
8868adc5233SJeff Cody 
8878adc5233SJeff Cody     /* need to have offset read data, and be on 4096 byte boundary */
8888adc5233SJeff Cody 
8898adc5233SJeff Cody     if (length > header->log_length) {
8908adc5233SJeff Cody         /* no log present.  we could create a log here instead of failing */
8918adc5233SJeff Cody         ret = -EINVAL;
8928adc5233SJeff Cody         goto exit;
8938adc5233SJeff Cody     }
8948adc5233SJeff Cody 
8958adc5233SJeff Cody     if (guid_eq(header->log_guid, zero_guid)) {
8968adc5233SJeff Cody         vhdx_guid_generate(&new_guid);
8978adc5233SJeff Cody         vhdx_update_headers(bs, s, false, &new_guid);
8988adc5233SJeff Cody     } else {
8998adc5233SJeff Cody         /* currently, we require that the log be flushed after
9008adc5233SJeff Cody          * every write. */
9018adc5233SJeff Cody         ret = -ENOTSUP;
9028adc5233SJeff Cody         goto exit;
9038adc5233SJeff Cody     }
9048adc5233SJeff Cody 
9058adc5233SJeff Cody     /* 0 is an invalid sequence number, but may also represent the first
9068adc5233SJeff Cody      * log write (or a wrapped seq) */
9078adc5233SJeff Cody     if (s->log.sequence == 0) {
9088adc5233SJeff Cody         s->log.sequence = 1;
9098adc5233SJeff Cody     }
9108adc5233SJeff Cody 
9118adc5233SJeff Cody     sector_offset = offset % VHDX_LOG_SECTOR_SIZE;
912cf7a09c1SMarc-André Lureau     file_offset = QEMU_ALIGN_DOWN(offset, VHDX_LOG_SECTOR_SIZE);
9138adc5233SJeff Cody 
9148adc5233SJeff Cody     aligned_length = length;
9158adc5233SJeff Cody 
9168adc5233SJeff Cody     /* add in the unaligned head and tail bytes */
9178adc5233SJeff Cody     if (sector_offset) {
9188adc5233SJeff Cody         leading_length = (VHDX_LOG_SECTOR_SIZE - sector_offset);
9198adc5233SJeff Cody         leading_length = leading_length > length ? length : leading_length;
9208adc5233SJeff Cody         aligned_length -= leading_length;
9218adc5233SJeff Cody         partial_sectors++;
9228adc5233SJeff Cody     }
9238adc5233SJeff Cody 
9248adc5233SJeff Cody     sectors = aligned_length / VHDX_LOG_SECTOR_SIZE;
9258adc5233SJeff Cody     trailing_length = aligned_length - (sectors * VHDX_LOG_SECTOR_SIZE);
9268adc5233SJeff Cody     if (trailing_length) {
9278adc5233SJeff Cody         partial_sectors++;
9288adc5233SJeff Cody     }
9298adc5233SJeff Cody 
9308adc5233SJeff Cody     sectors += partial_sectors;
9318adc5233SJeff Cody 
932f6b08994SPaolo Bonzini     file_length = bdrv_co_getlength(bs->file->bs);
9333f910692SJeff Cody     if (file_length < 0) {
9343f910692SJeff Cody         ret = file_length;
9353f910692SJeff Cody         goto exit;
9363f910692SJeff Cody     }
9373f910692SJeff Cody 
9388adc5233SJeff Cody     /* sectors is now how many sectors the data itself takes, not
9398adc5233SJeff Cody      * including the header and descriptor metadata */
9408adc5233SJeff Cody 
9418adc5233SJeff Cody     new_hdr = (VHDXLogEntryHeader) {
9428adc5233SJeff Cody                 .signature           = VHDX_LOG_SIGNATURE,
9438adc5233SJeff Cody                 .tail                = s->log.tail,
9448adc5233SJeff Cody                 .sequence_number     = s->log.sequence,
9458adc5233SJeff Cody                 .descriptor_count    = sectors,
9468adc5233SJeff Cody                 .reserved            = 0,
9473f910692SJeff Cody                 .flushed_file_offset = file_length,
9483f910692SJeff Cody                 .last_file_offset    = file_length,
9493f910692SJeff Cody                 .log_guid            = header->log_guid,
9508adc5233SJeff Cody               };
9518adc5233SJeff Cody 
9528adc5233SJeff Cody 
9538adc5233SJeff Cody     desc_sectors = vhdx_compute_desc_sectors(new_hdr.descriptor_count);
9548adc5233SJeff Cody 
9558adc5233SJeff Cody     total_length = (desc_sectors + sectors) * VHDX_LOG_SECTOR_SIZE;
9568adc5233SJeff Cody     new_hdr.entry_length = total_length;
9578adc5233SJeff Cody 
9588adc5233SJeff Cody     vhdx_log_entry_hdr_le_export(&new_hdr);
9598adc5233SJeff Cody 
9608adc5233SJeff Cody     buffer = qemu_blockalign(bs, total_length);
9618adc5233SJeff Cody     memcpy(buffer, &new_hdr, sizeof(new_hdr));
9628adc5233SJeff Cody 
963d4df3dbcSMarkus Armbruster     new_desc = buffer + sizeof(new_hdr);
9648adc5233SJeff Cody     data_sector = buffer + (desc_sectors * VHDX_LOG_SECTOR_SIZE);
9658adc5233SJeff Cody     data_tmp = data;
9668adc5233SJeff Cody 
9678adc5233SJeff Cody     /* All log sectors are 4KB, so for any partial sectors we must
9688adc5233SJeff Cody      * merge the data with preexisting data from the final file
9698adc5233SJeff Cody      * destination */
9708adc5233SJeff Cody     merged_sector = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE);
9718adc5233SJeff Cody 
9728adc5233SJeff Cody     for (i = 0; i < sectors; i++) {
9738adc5233SJeff Cody         new_desc->signature       = VHDX_LOG_DESC_SIGNATURE;
9748adc5233SJeff Cody         new_desc->sequence_number = s->log.sequence;
9758adc5233SJeff Cody         new_desc->file_offset     = file_offset;
9768adc5233SJeff Cody 
9778adc5233SJeff Cody         if (i == 0 && leading_length) {
9788adc5233SJeff Cody             /* partial sector at the front of the buffer */
979f6b08994SPaolo Bonzini             ret = bdrv_co_pread(bs->file, file_offset, VHDX_LOG_SECTOR_SIZE,
98032cc71deSAlberto Faria                                 merged_sector, 0);
9818adc5233SJeff Cody             if (ret < 0) {
9828adc5233SJeff Cody                 goto exit;
9838adc5233SJeff Cody             }
9848adc5233SJeff Cody             memcpy(merged_sector + sector_offset, data_tmp, leading_length);
9858adc5233SJeff Cody             bytes_written = leading_length;
9868adc5233SJeff Cody             sector_write = merged_sector;
9878adc5233SJeff Cody         } else if (i == sectors - 1 && trailing_length) {
9888adc5233SJeff Cody             /* partial sector at the end of the buffer */
989f6b08994SPaolo Bonzini             ret = bdrv_co_pread(bs->file, file_offset + trailing_length,
99032cc71deSAlberto Faria                                 VHDX_LOG_SECTOR_SIZE - trailing_length,
99132cc71deSAlberto Faria                                 merged_sector + trailing_length, 0);
9928adc5233SJeff Cody             if (ret < 0) {
9938adc5233SJeff Cody                 goto exit;
9948adc5233SJeff Cody             }
9958adc5233SJeff Cody             memcpy(merged_sector, data_tmp, trailing_length);
9968adc5233SJeff Cody             bytes_written = trailing_length;
9978adc5233SJeff Cody             sector_write = merged_sector;
9988adc5233SJeff Cody         } else {
9998adc5233SJeff Cody             bytes_written = VHDX_LOG_SECTOR_SIZE;
10008adc5233SJeff Cody             sector_write = data_tmp;
10018adc5233SJeff Cody         }
10028adc5233SJeff Cody 
10038adc5233SJeff Cody         /* populate the raw sector data into the proper structures,
10048adc5233SJeff Cody          * as well as update the descriptor, and convert to proper
10058adc5233SJeff Cody          * endianness */
10068adc5233SJeff Cody         vhdx_log_raw_to_le_sector(new_desc, data_sector, sector_write,
10078adc5233SJeff Cody                                   s->log.sequence);
10088adc5233SJeff Cody 
10098adc5233SJeff Cody         data_tmp += bytes_written;
10108adc5233SJeff Cody         data_sector++;
10118adc5233SJeff Cody         new_desc++;
10128adc5233SJeff Cody         file_offset += VHDX_LOG_SECTOR_SIZE;
10138adc5233SJeff Cody     }
10148adc5233SJeff Cody 
10158adc5233SJeff Cody     /* checksum covers entire entry, from the log header through the
10168adc5233SJeff Cody      * last data sector */
10178adc5233SJeff Cody     vhdx_update_checksum(buffer, total_length,
10188adc5233SJeff Cody                          offsetof(VHDXLogEntryHeader, checksum));
10198adc5233SJeff Cody 
10208adc5233SJeff Cody     /* now write to the log */
1021f50159faSMarkus Armbruster     ret = vhdx_log_write_sectors(bs, &s->log, &sectors_written, buffer,
10228adc5233SJeff Cody                                  desc_sectors + sectors);
10238adc5233SJeff Cody     if (ret < 0) {
10248adc5233SJeff Cody         goto exit;
10258adc5233SJeff Cody     }
10268adc5233SJeff Cody 
10278adc5233SJeff Cody     if (sectors_written != desc_sectors + sectors) {
10288adc5233SJeff Cody         /* instead of failing, we could flush the log here */
10298adc5233SJeff Cody         ret = -EINVAL;
10308adc5233SJeff Cody         goto exit;
10318adc5233SJeff Cody     }
10328adc5233SJeff Cody 
10338adc5233SJeff Cody     s->log.sequence++;
10348adc5233SJeff Cody     /* write new tail */
10358adc5233SJeff Cody     s->log.tail = s->log.write;
10368adc5233SJeff Cody 
10378adc5233SJeff Cody exit:
10388adc5233SJeff Cody     qemu_vfree(buffer);
10398adc5233SJeff Cody     qemu_vfree(merged_sector);
10408adc5233SJeff Cody     return ret;
10418adc5233SJeff Cody }
10428adc5233SJeff Cody 
10438adc5233SJeff Cody /* Perform a log write, and then immediately flush the entire log */
1044f6b08994SPaolo Bonzini int coroutine_fn
vhdx_log_write_and_flush(BlockDriverState * bs,BDRVVHDXState * s,void * data,uint32_t length,uint64_t offset)1045f6b08994SPaolo Bonzini vhdx_log_write_and_flush(BlockDriverState *bs, BDRVVHDXState *s,
10468adc5233SJeff Cody                          void *data, uint32_t length, uint64_t offset)
10478adc5233SJeff Cody {
10488adc5233SJeff Cody     int ret = 0;
10498adc5233SJeff Cody     VHDXLogSequence logs = { .valid = true,
10508adc5233SJeff Cody                              .count = 1,
10518adc5233SJeff Cody                              .hdr = { 0 } };
10528adc5233SJeff Cody 
10538adc5233SJeff Cody 
10548adc5233SJeff Cody     /* Make sure data written (new and/or changed blocks) is stable
10558adc5233SJeff Cody      * on disk, before creating log entry */
1056f6b08994SPaolo Bonzini     ret = bdrv_co_flush(bs);
1057c6572fa0SJeff Cody     if (ret < 0) {
1058c6572fa0SJeff Cody         goto exit;
1059c6572fa0SJeff Cody     }
1060c6572fa0SJeff Cody 
10618adc5233SJeff Cody     ret = vhdx_log_write(bs, s, data, length, offset);
10628adc5233SJeff Cody     if (ret < 0) {
10638adc5233SJeff Cody         goto exit;
10648adc5233SJeff Cody     }
10658adc5233SJeff Cody     logs.log = s->log;
10668adc5233SJeff Cody 
10678adc5233SJeff Cody     /* Make sure log is stable on disk */
1068f6b08994SPaolo Bonzini     ret = bdrv_co_flush(bs);
1069c6572fa0SJeff Cody     if (ret < 0) {
1070c6572fa0SJeff Cody         goto exit;
1071c6572fa0SJeff Cody     }
1072c6572fa0SJeff Cody 
10738adc5233SJeff Cody     ret = vhdx_log_flush(bs, s, &logs);
10748adc5233SJeff Cody     if (ret < 0) {
10758adc5233SJeff Cody         goto exit;
10768adc5233SJeff Cody     }
10778adc5233SJeff Cody 
10788adc5233SJeff Cody     s->log = logs.log;
10798adc5233SJeff Cody 
10808adc5233SJeff Cody exit:
10818adc5233SJeff Cody     return ret;
10828adc5233SJeff Cody }
10838adc5233SJeff Cody 
1084