1019d6b8fSAnthony Liguori /* 2019d6b8fSAnthony Liguori * Block driver for the QCOW format 3019d6b8fSAnthony Liguori * 4019d6b8fSAnthony Liguori * Copyright (c) 2004-2006 Fabrice Bellard 5019d6b8fSAnthony Liguori * 6019d6b8fSAnthony Liguori * Permission is hereby granted, free of charge, to any person obtaining a copy 7019d6b8fSAnthony Liguori * of this software and associated documentation files (the "Software"), to deal 8019d6b8fSAnthony Liguori * in the Software without restriction, including without limitation the rights 9019d6b8fSAnthony Liguori * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10019d6b8fSAnthony Liguori * copies of the Software, and to permit persons to whom the Software is 11019d6b8fSAnthony Liguori * furnished to do so, subject to the following conditions: 12019d6b8fSAnthony Liguori * 13019d6b8fSAnthony Liguori * The above copyright notice and this permission notice shall be included in 14019d6b8fSAnthony Liguori * all copies or substantial portions of the Software. 15019d6b8fSAnthony Liguori * 16019d6b8fSAnthony Liguori * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17019d6b8fSAnthony Liguori * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18019d6b8fSAnthony Liguori * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19019d6b8fSAnthony Liguori * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20019d6b8fSAnthony Liguori * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21019d6b8fSAnthony Liguori * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22019d6b8fSAnthony Liguori * THE SOFTWARE. 23019d6b8fSAnthony Liguori */ 24019d6b8fSAnthony Liguori #include "qemu-common.h" 25737e150eSPaolo Bonzini #include "block/block_int.h" 261de7afc9SPaolo Bonzini #include "qemu/module.h" 27019d6b8fSAnthony Liguori #include <zlib.h> 28753d9b82SAurelien Jarno #include "qemu/aes.h" 29caf71f86SPaolo Bonzini #include "migration/migration.h" 30019d6b8fSAnthony Liguori 31019d6b8fSAnthony Liguori /**************************************************************/ 32019d6b8fSAnthony Liguori /* QEMU COW block driver with compression and encryption support */ 33019d6b8fSAnthony Liguori 34019d6b8fSAnthony Liguori #define QCOW_MAGIC (('Q' << 24) | ('F' << 16) | ('I' << 8) | 0xfb) 35019d6b8fSAnthony Liguori #define QCOW_VERSION 1 36019d6b8fSAnthony Liguori 37019d6b8fSAnthony Liguori #define QCOW_CRYPT_NONE 0 38019d6b8fSAnthony Liguori #define QCOW_CRYPT_AES 1 39019d6b8fSAnthony Liguori 40019d6b8fSAnthony Liguori #define QCOW_OFLAG_COMPRESSED (1LL << 63) 41019d6b8fSAnthony Liguori 42019d6b8fSAnthony Liguori typedef struct QCowHeader { 43019d6b8fSAnthony Liguori uint32_t magic; 44019d6b8fSAnthony Liguori uint32_t version; 45019d6b8fSAnthony Liguori uint64_t backing_file_offset; 46019d6b8fSAnthony Liguori uint32_t backing_file_size; 47019d6b8fSAnthony Liguori uint32_t mtime; 48019d6b8fSAnthony Liguori uint64_t size; /* in bytes */ 49019d6b8fSAnthony Liguori uint8_t cluster_bits; 50019d6b8fSAnthony Liguori uint8_t l2_bits; 51019d6b8fSAnthony Liguori uint32_t crypt_method; 52019d6b8fSAnthony Liguori uint64_t l1_table_offset; 53019d6b8fSAnthony Liguori } QCowHeader; 54019d6b8fSAnthony Liguori 55019d6b8fSAnthony Liguori #define L2_CACHE_SIZE 16 56019d6b8fSAnthony Liguori 57019d6b8fSAnthony Liguori typedef struct BDRVQcowState { 58019d6b8fSAnthony Liguori int cluster_bits; 59019d6b8fSAnthony Liguori int cluster_size; 60019d6b8fSAnthony Liguori int cluster_sectors; 61019d6b8fSAnthony Liguori int l2_bits; 62019d6b8fSAnthony Liguori int l2_size; 63019d6b8fSAnthony Liguori int l1_size; 64019d6b8fSAnthony Liguori uint64_t cluster_offset_mask; 65019d6b8fSAnthony Liguori uint64_t l1_table_offset; 66019d6b8fSAnthony Liguori uint64_t *l1_table; 67019d6b8fSAnthony Liguori uint64_t *l2_cache; 68019d6b8fSAnthony Liguori uint64_t l2_cache_offsets[L2_CACHE_SIZE]; 69019d6b8fSAnthony Liguori uint32_t l2_cache_counts[L2_CACHE_SIZE]; 70019d6b8fSAnthony Liguori uint8_t *cluster_cache; 71019d6b8fSAnthony Liguori uint8_t *cluster_data; 72019d6b8fSAnthony Liguori uint64_t cluster_cache_offset; 73019d6b8fSAnthony Liguori uint32_t crypt_method; /* current crypt method, 0 if no key yet */ 74019d6b8fSAnthony Liguori uint32_t crypt_method_header; 75019d6b8fSAnthony Liguori AES_KEY aes_encrypt_key; 76019d6b8fSAnthony Liguori AES_KEY aes_decrypt_key; 7752b8eb60SKevin Wolf CoMutex lock; 78fd9f102cSKevin Wolf Error *migration_blocker; 79019d6b8fSAnthony Liguori } BDRVQcowState; 80019d6b8fSAnthony Liguori 8166f82ceeSKevin Wolf static int decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset); 82019d6b8fSAnthony Liguori 83019d6b8fSAnthony Liguori static int qcow_probe(const uint8_t *buf, int buf_size, const char *filename) 84019d6b8fSAnthony Liguori { 85019d6b8fSAnthony Liguori const QCowHeader *cow_header = (const void *)buf; 86019d6b8fSAnthony Liguori 87019d6b8fSAnthony Liguori if (buf_size >= sizeof(QCowHeader) && 88019d6b8fSAnthony Liguori be32_to_cpu(cow_header->magic) == QCOW_MAGIC && 89019d6b8fSAnthony Liguori be32_to_cpu(cow_header->version) == QCOW_VERSION) 90019d6b8fSAnthony Liguori return 100; 91019d6b8fSAnthony Liguori else 92019d6b8fSAnthony Liguori return 0; 93019d6b8fSAnthony Liguori } 94019d6b8fSAnthony Liguori 95015a1036SMax Reitz static int qcow_open(BlockDriverState *bs, QDict *options, int flags, 96015a1036SMax Reitz Error **errp) 97019d6b8fSAnthony Liguori { 98019d6b8fSAnthony Liguori BDRVQcowState *s = bs->opaque; 9984b0ec02SLi Zhi Hui int len, i, shift, ret; 100019d6b8fSAnthony Liguori QCowHeader header; 101019d6b8fSAnthony Liguori 10284b0ec02SLi Zhi Hui ret = bdrv_pread(bs->file, 0, &header, sizeof(header)); 10384b0ec02SLi Zhi Hui if (ret < 0) { 104019d6b8fSAnthony Liguori goto fail; 10584b0ec02SLi Zhi Hui } 106019d6b8fSAnthony Liguori be32_to_cpus(&header.magic); 107019d6b8fSAnthony Liguori be32_to_cpus(&header.version); 108019d6b8fSAnthony Liguori be64_to_cpus(&header.backing_file_offset); 109019d6b8fSAnthony Liguori be32_to_cpus(&header.backing_file_size); 110019d6b8fSAnthony Liguori be32_to_cpus(&header.mtime); 111019d6b8fSAnthony Liguori be64_to_cpus(&header.size); 112019d6b8fSAnthony Liguori be32_to_cpus(&header.crypt_method); 113019d6b8fSAnthony Liguori be64_to_cpus(&header.l1_table_offset); 114019d6b8fSAnthony Liguori 11584b0ec02SLi Zhi Hui if (header.magic != QCOW_MAGIC) { 11615bac0d5SStefan Weil ret = -EMEDIUMTYPE; 117019d6b8fSAnthony Liguori goto fail; 11884b0ec02SLi Zhi Hui } 11984b0ec02SLi Zhi Hui if (header.version != QCOW_VERSION) { 12084b0ec02SLi Zhi Hui char version[64]; 12184b0ec02SLi Zhi Hui snprintf(version, sizeof(version), "QCOW version %d", header.version); 122*b6d5066dSPaolo Bonzini error_set(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE, 12384b0ec02SLi Zhi Hui bs->device_name, "qcow", version); 12484b0ec02SLi Zhi Hui ret = -ENOTSUP; 125019d6b8fSAnthony Liguori goto fail; 12684b0ec02SLi Zhi Hui } 12784b0ec02SLi Zhi Hui 12884b0ec02SLi Zhi Hui if (header.size <= 1 || header.cluster_bits < 9) { 129*b6d5066dSPaolo Bonzini error_setg(errp, "invalid value in qcow header"); 13084b0ec02SLi Zhi Hui ret = -EINVAL; 131019d6b8fSAnthony Liguori goto fail; 13284b0ec02SLi Zhi Hui } 13384b0ec02SLi Zhi Hui if (header.crypt_method > QCOW_CRYPT_AES) { 134*b6d5066dSPaolo Bonzini error_setg(errp, "invalid encryption method in qcow header"); 13584b0ec02SLi Zhi Hui ret = -EINVAL; 13684b0ec02SLi Zhi Hui goto fail; 13784b0ec02SLi Zhi Hui } 138019d6b8fSAnthony Liguori s->crypt_method_header = header.crypt_method; 13984b0ec02SLi Zhi Hui if (s->crypt_method_header) { 140019d6b8fSAnthony Liguori bs->encrypted = 1; 14184b0ec02SLi Zhi Hui } 142019d6b8fSAnthony Liguori s->cluster_bits = header.cluster_bits; 143019d6b8fSAnthony Liguori s->cluster_size = 1 << s->cluster_bits; 144019d6b8fSAnthony Liguori s->cluster_sectors = 1 << (s->cluster_bits - 9); 145019d6b8fSAnthony Liguori s->l2_bits = header.l2_bits; 146019d6b8fSAnthony Liguori s->l2_size = 1 << s->l2_bits; 147019d6b8fSAnthony Liguori bs->total_sectors = header.size / 512; 148019d6b8fSAnthony Liguori s->cluster_offset_mask = (1LL << (63 - s->cluster_bits)) - 1; 149019d6b8fSAnthony Liguori 150019d6b8fSAnthony Liguori /* read the level 1 table */ 151019d6b8fSAnthony Liguori shift = s->cluster_bits + s->l2_bits; 152019d6b8fSAnthony Liguori s->l1_size = (header.size + (1LL << shift) - 1) >> shift; 153019d6b8fSAnthony Liguori 154019d6b8fSAnthony Liguori s->l1_table_offset = header.l1_table_offset; 1557267c094SAnthony Liguori s->l1_table = g_malloc(s->l1_size * sizeof(uint64_t)); 15684b0ec02SLi Zhi Hui 15784b0ec02SLi Zhi Hui ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table, 15884b0ec02SLi Zhi Hui s->l1_size * sizeof(uint64_t)); 15984b0ec02SLi Zhi Hui if (ret < 0) { 160019d6b8fSAnthony Liguori goto fail; 16184b0ec02SLi Zhi Hui } 16284b0ec02SLi Zhi Hui 163019d6b8fSAnthony Liguori for(i = 0;i < s->l1_size; i++) { 164019d6b8fSAnthony Liguori be64_to_cpus(&s->l1_table[i]); 165019d6b8fSAnthony Liguori } 166019d6b8fSAnthony Liguori /* alloc L2 cache */ 1677267c094SAnthony Liguori s->l2_cache = g_malloc(s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t)); 1687267c094SAnthony Liguori s->cluster_cache = g_malloc(s->cluster_size); 1697267c094SAnthony Liguori s->cluster_data = g_malloc(s->cluster_size); 170019d6b8fSAnthony Liguori s->cluster_cache_offset = -1; 171019d6b8fSAnthony Liguori 172019d6b8fSAnthony Liguori /* read the backing file name */ 173019d6b8fSAnthony Liguori if (header.backing_file_offset != 0) { 174019d6b8fSAnthony Liguori len = header.backing_file_size; 17584b0ec02SLi Zhi Hui if (len > 1023) { 176019d6b8fSAnthony Liguori len = 1023; 17784b0ec02SLi Zhi Hui } 17884b0ec02SLi Zhi Hui ret = bdrv_pread(bs->file, header.backing_file_offset, 17984b0ec02SLi Zhi Hui bs->backing_file, len); 18084b0ec02SLi Zhi Hui if (ret < 0) { 181019d6b8fSAnthony Liguori goto fail; 18284b0ec02SLi Zhi Hui } 183019d6b8fSAnthony Liguori bs->backing_file[len] = '\0'; 184019d6b8fSAnthony Liguori } 185de33b1f3SScott Wood 186fd9f102cSKevin Wolf /* Disable migration when qcow images are used */ 187fd9f102cSKevin Wolf error_set(&s->migration_blocker, 188fd9f102cSKevin Wolf QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED, 189fd9f102cSKevin Wolf "qcow", bs->device_name, "live migration"); 190fd9f102cSKevin Wolf migrate_add_blocker(s->migration_blocker); 191fd9f102cSKevin Wolf 192de33b1f3SScott Wood qemu_co_mutex_init(&s->lock); 193019d6b8fSAnthony Liguori return 0; 194019d6b8fSAnthony Liguori 195019d6b8fSAnthony Liguori fail: 1967267c094SAnthony Liguori g_free(s->l1_table); 1977267c094SAnthony Liguori g_free(s->l2_cache); 1987267c094SAnthony Liguori g_free(s->cluster_cache); 1997267c094SAnthony Liguori g_free(s->cluster_data); 20084b0ec02SLi Zhi Hui return ret; 201019d6b8fSAnthony Liguori } 202019d6b8fSAnthony Liguori 203d177692eSJeff Cody 204d177692eSJeff Cody /* We have nothing to do for QCOW reopen, stubs just return 205d177692eSJeff Cody * success */ 206d177692eSJeff Cody static int qcow_reopen_prepare(BDRVReopenState *state, 207d177692eSJeff Cody BlockReopenQueue *queue, Error **errp) 208d177692eSJeff Cody { 209d177692eSJeff Cody return 0; 210d177692eSJeff Cody } 211d177692eSJeff Cody 212019d6b8fSAnthony Liguori static int qcow_set_key(BlockDriverState *bs, const char *key) 213019d6b8fSAnthony Liguori { 214019d6b8fSAnthony Liguori BDRVQcowState *s = bs->opaque; 215019d6b8fSAnthony Liguori uint8_t keybuf[16]; 216019d6b8fSAnthony Liguori int len, i; 217019d6b8fSAnthony Liguori 218019d6b8fSAnthony Liguori memset(keybuf, 0, 16); 219019d6b8fSAnthony Liguori len = strlen(key); 220019d6b8fSAnthony Liguori if (len > 16) 221019d6b8fSAnthony Liguori len = 16; 222019d6b8fSAnthony Liguori /* XXX: we could compress the chars to 7 bits to increase 223019d6b8fSAnthony Liguori entropy */ 224019d6b8fSAnthony Liguori for(i = 0;i < len;i++) { 225019d6b8fSAnthony Liguori keybuf[i] = key[i]; 226019d6b8fSAnthony Liguori } 227019d6b8fSAnthony Liguori s->crypt_method = s->crypt_method_header; 228019d6b8fSAnthony Liguori 229019d6b8fSAnthony Liguori if (AES_set_encrypt_key(keybuf, 128, &s->aes_encrypt_key) != 0) 230019d6b8fSAnthony Liguori return -1; 231019d6b8fSAnthony Liguori if (AES_set_decrypt_key(keybuf, 128, &s->aes_decrypt_key) != 0) 232019d6b8fSAnthony Liguori return -1; 233019d6b8fSAnthony Liguori return 0; 234019d6b8fSAnthony Liguori } 235019d6b8fSAnthony Liguori 236019d6b8fSAnthony Liguori /* The crypt function is compatible with the linux cryptoloop 237019d6b8fSAnthony Liguori algorithm for < 4 GB images. NOTE: out_buf == in_buf is 238019d6b8fSAnthony Liguori supported */ 239019d6b8fSAnthony Liguori static void encrypt_sectors(BDRVQcowState *s, int64_t sector_num, 240019d6b8fSAnthony Liguori uint8_t *out_buf, const uint8_t *in_buf, 241019d6b8fSAnthony Liguori int nb_sectors, int enc, 242019d6b8fSAnthony Liguori const AES_KEY *key) 243019d6b8fSAnthony Liguori { 244019d6b8fSAnthony Liguori union { 245019d6b8fSAnthony Liguori uint64_t ll[2]; 246019d6b8fSAnthony Liguori uint8_t b[16]; 247019d6b8fSAnthony Liguori } ivec; 248019d6b8fSAnthony Liguori int i; 249019d6b8fSAnthony Liguori 250019d6b8fSAnthony Liguori for(i = 0; i < nb_sectors; i++) { 251019d6b8fSAnthony Liguori ivec.ll[0] = cpu_to_le64(sector_num); 252019d6b8fSAnthony Liguori ivec.ll[1] = 0; 253019d6b8fSAnthony Liguori AES_cbc_encrypt(in_buf, out_buf, 512, key, 254019d6b8fSAnthony Liguori ivec.b, enc); 255019d6b8fSAnthony Liguori sector_num++; 256019d6b8fSAnthony Liguori in_buf += 512; 257019d6b8fSAnthony Liguori out_buf += 512; 258019d6b8fSAnthony Liguori } 259019d6b8fSAnthony Liguori } 260019d6b8fSAnthony Liguori 261019d6b8fSAnthony Liguori /* 'allocate' is: 262019d6b8fSAnthony Liguori * 263019d6b8fSAnthony Liguori * 0 to not allocate. 264019d6b8fSAnthony Liguori * 265019d6b8fSAnthony Liguori * 1 to allocate a normal cluster (for sector indexes 'n_start' to 266019d6b8fSAnthony Liguori * 'n_end') 267019d6b8fSAnthony Liguori * 268019d6b8fSAnthony Liguori * 2 to allocate a compressed cluster of size 269019d6b8fSAnthony Liguori * 'compressed_size'. 'compressed_size' must be > 0 and < 270019d6b8fSAnthony Liguori * cluster_size 271019d6b8fSAnthony Liguori * 272019d6b8fSAnthony Liguori * return 0 if not allocated. 273019d6b8fSAnthony Liguori */ 274019d6b8fSAnthony Liguori static uint64_t get_cluster_offset(BlockDriverState *bs, 275019d6b8fSAnthony Liguori uint64_t offset, int allocate, 276019d6b8fSAnthony Liguori int compressed_size, 277019d6b8fSAnthony Liguori int n_start, int n_end) 278019d6b8fSAnthony Liguori { 279019d6b8fSAnthony Liguori BDRVQcowState *s = bs->opaque; 280019d6b8fSAnthony Liguori int min_index, i, j, l1_index, l2_index; 281019d6b8fSAnthony Liguori uint64_t l2_offset, *l2_table, cluster_offset, tmp; 282019d6b8fSAnthony Liguori uint32_t min_count; 283019d6b8fSAnthony Liguori int new_l2_table; 284019d6b8fSAnthony Liguori 285019d6b8fSAnthony Liguori l1_index = offset >> (s->l2_bits + s->cluster_bits); 286019d6b8fSAnthony Liguori l2_offset = s->l1_table[l1_index]; 287019d6b8fSAnthony Liguori new_l2_table = 0; 288019d6b8fSAnthony Liguori if (!l2_offset) { 289019d6b8fSAnthony Liguori if (!allocate) 290019d6b8fSAnthony Liguori return 0; 291019d6b8fSAnthony Liguori /* allocate a new l2 entry */ 29266f82ceeSKevin Wolf l2_offset = bdrv_getlength(bs->file); 293019d6b8fSAnthony Liguori /* round to cluster size */ 294019d6b8fSAnthony Liguori l2_offset = (l2_offset + s->cluster_size - 1) & ~(s->cluster_size - 1); 295019d6b8fSAnthony Liguori /* update the L1 entry */ 296019d6b8fSAnthony Liguori s->l1_table[l1_index] = l2_offset; 297019d6b8fSAnthony Liguori tmp = cpu_to_be64(l2_offset); 2985e5557d9SKevin Wolf if (bdrv_pwrite_sync(bs->file, 2995e5557d9SKevin Wolf s->l1_table_offset + l1_index * sizeof(tmp), 3005e5557d9SKevin Wolf &tmp, sizeof(tmp)) < 0) 301019d6b8fSAnthony Liguori return 0; 302019d6b8fSAnthony Liguori new_l2_table = 1; 303019d6b8fSAnthony Liguori } 304019d6b8fSAnthony Liguori for(i = 0; i < L2_CACHE_SIZE; i++) { 305019d6b8fSAnthony Liguori if (l2_offset == s->l2_cache_offsets[i]) { 306019d6b8fSAnthony Liguori /* increment the hit count */ 307019d6b8fSAnthony Liguori if (++s->l2_cache_counts[i] == 0xffffffff) { 308019d6b8fSAnthony Liguori for(j = 0; j < L2_CACHE_SIZE; j++) { 309019d6b8fSAnthony Liguori s->l2_cache_counts[j] >>= 1; 310019d6b8fSAnthony Liguori } 311019d6b8fSAnthony Liguori } 312019d6b8fSAnthony Liguori l2_table = s->l2_cache + (i << s->l2_bits); 313019d6b8fSAnthony Liguori goto found; 314019d6b8fSAnthony Liguori } 315019d6b8fSAnthony Liguori } 316019d6b8fSAnthony Liguori /* not found: load a new entry in the least used one */ 317019d6b8fSAnthony Liguori min_index = 0; 318019d6b8fSAnthony Liguori min_count = 0xffffffff; 319019d6b8fSAnthony Liguori for(i = 0; i < L2_CACHE_SIZE; i++) { 320019d6b8fSAnthony Liguori if (s->l2_cache_counts[i] < min_count) { 321019d6b8fSAnthony Liguori min_count = s->l2_cache_counts[i]; 322019d6b8fSAnthony Liguori min_index = i; 323019d6b8fSAnthony Liguori } 324019d6b8fSAnthony Liguori } 325019d6b8fSAnthony Liguori l2_table = s->l2_cache + (min_index << s->l2_bits); 326019d6b8fSAnthony Liguori if (new_l2_table) { 327019d6b8fSAnthony Liguori memset(l2_table, 0, s->l2_size * sizeof(uint64_t)); 3285e5557d9SKevin Wolf if (bdrv_pwrite_sync(bs->file, l2_offset, l2_table, 3295e5557d9SKevin Wolf s->l2_size * sizeof(uint64_t)) < 0) 330019d6b8fSAnthony Liguori return 0; 331019d6b8fSAnthony Liguori } else { 33266f82ceeSKevin Wolf if (bdrv_pread(bs->file, l2_offset, l2_table, s->l2_size * sizeof(uint64_t)) != 333019d6b8fSAnthony Liguori s->l2_size * sizeof(uint64_t)) 334019d6b8fSAnthony Liguori return 0; 335019d6b8fSAnthony Liguori } 336019d6b8fSAnthony Liguori s->l2_cache_offsets[min_index] = l2_offset; 337019d6b8fSAnthony Liguori s->l2_cache_counts[min_index] = 1; 338019d6b8fSAnthony Liguori found: 339019d6b8fSAnthony Liguori l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1); 340019d6b8fSAnthony Liguori cluster_offset = be64_to_cpu(l2_table[l2_index]); 341019d6b8fSAnthony Liguori if (!cluster_offset || 342019d6b8fSAnthony Liguori ((cluster_offset & QCOW_OFLAG_COMPRESSED) && allocate == 1)) { 343019d6b8fSAnthony Liguori if (!allocate) 344019d6b8fSAnthony Liguori return 0; 345019d6b8fSAnthony Liguori /* allocate a new cluster */ 346019d6b8fSAnthony Liguori if ((cluster_offset & QCOW_OFLAG_COMPRESSED) && 347019d6b8fSAnthony Liguori (n_end - n_start) < s->cluster_sectors) { 348019d6b8fSAnthony Liguori /* if the cluster is already compressed, we must 349019d6b8fSAnthony Liguori decompress it in the case it is not completely 350019d6b8fSAnthony Liguori overwritten */ 35166f82ceeSKevin Wolf if (decompress_cluster(bs, cluster_offset) < 0) 352019d6b8fSAnthony Liguori return 0; 35366f82ceeSKevin Wolf cluster_offset = bdrv_getlength(bs->file); 354019d6b8fSAnthony Liguori cluster_offset = (cluster_offset + s->cluster_size - 1) & 355019d6b8fSAnthony Liguori ~(s->cluster_size - 1); 356019d6b8fSAnthony Liguori /* write the cluster content */ 35766f82ceeSKevin Wolf if (bdrv_pwrite(bs->file, cluster_offset, s->cluster_cache, s->cluster_size) != 358019d6b8fSAnthony Liguori s->cluster_size) 359019d6b8fSAnthony Liguori return -1; 360019d6b8fSAnthony Liguori } else { 36166f82ceeSKevin Wolf cluster_offset = bdrv_getlength(bs->file); 362019d6b8fSAnthony Liguori if (allocate == 1) { 363019d6b8fSAnthony Liguori /* round to cluster size */ 364019d6b8fSAnthony Liguori cluster_offset = (cluster_offset + s->cluster_size - 1) & 365019d6b8fSAnthony Liguori ~(s->cluster_size - 1); 36666f82ceeSKevin Wolf bdrv_truncate(bs->file, cluster_offset + s->cluster_size); 367019d6b8fSAnthony Liguori /* if encrypted, we must initialize the cluster 368019d6b8fSAnthony Liguori content which won't be written */ 369019d6b8fSAnthony Liguori if (s->crypt_method && 370019d6b8fSAnthony Liguori (n_end - n_start) < s->cluster_sectors) { 371019d6b8fSAnthony Liguori uint64_t start_sect; 372019d6b8fSAnthony Liguori start_sect = (offset & ~(s->cluster_size - 1)) >> 9; 373019d6b8fSAnthony Liguori memset(s->cluster_data + 512, 0x00, 512); 374019d6b8fSAnthony Liguori for(i = 0; i < s->cluster_sectors; i++) { 375019d6b8fSAnthony Liguori if (i < n_start || i >= n_end) { 376019d6b8fSAnthony Liguori encrypt_sectors(s, start_sect + i, 377019d6b8fSAnthony Liguori s->cluster_data, 378019d6b8fSAnthony Liguori s->cluster_data + 512, 1, 1, 379019d6b8fSAnthony Liguori &s->aes_encrypt_key); 38066f82ceeSKevin Wolf if (bdrv_pwrite(bs->file, cluster_offset + i * 512, 381019d6b8fSAnthony Liguori s->cluster_data, 512) != 512) 382019d6b8fSAnthony Liguori return -1; 383019d6b8fSAnthony Liguori } 384019d6b8fSAnthony Liguori } 385019d6b8fSAnthony Liguori } 386019d6b8fSAnthony Liguori } else if (allocate == 2) { 387019d6b8fSAnthony Liguori cluster_offset |= QCOW_OFLAG_COMPRESSED | 388019d6b8fSAnthony Liguori (uint64_t)compressed_size << (63 - s->cluster_bits); 389019d6b8fSAnthony Liguori } 390019d6b8fSAnthony Liguori } 391019d6b8fSAnthony Liguori /* update L2 table */ 392019d6b8fSAnthony Liguori tmp = cpu_to_be64(cluster_offset); 393019d6b8fSAnthony Liguori l2_table[l2_index] = tmp; 3945e5557d9SKevin Wolf if (bdrv_pwrite_sync(bs->file, l2_offset + l2_index * sizeof(tmp), 3955e5557d9SKevin Wolf &tmp, sizeof(tmp)) < 0) 396019d6b8fSAnthony Liguori return 0; 397019d6b8fSAnthony Liguori } 398019d6b8fSAnthony Liguori return cluster_offset; 399019d6b8fSAnthony Liguori } 400019d6b8fSAnthony Liguori 401b6b8a333SPaolo Bonzini static int64_t coroutine_fn qcow_co_get_block_status(BlockDriverState *bs, 402f8a2e5e3SStefan Hajnoczi int64_t sector_num, int nb_sectors, int *pnum) 403019d6b8fSAnthony Liguori { 404019d6b8fSAnthony Liguori BDRVQcowState *s = bs->opaque; 405019d6b8fSAnthony Liguori int index_in_cluster, n; 406019d6b8fSAnthony Liguori uint64_t cluster_offset; 407019d6b8fSAnthony Liguori 408f8a2e5e3SStefan Hajnoczi qemu_co_mutex_lock(&s->lock); 409019d6b8fSAnthony Liguori cluster_offset = get_cluster_offset(bs, sector_num << 9, 0, 0, 0, 0); 410f8a2e5e3SStefan Hajnoczi qemu_co_mutex_unlock(&s->lock); 411019d6b8fSAnthony Liguori index_in_cluster = sector_num & (s->cluster_sectors - 1); 412019d6b8fSAnthony Liguori n = s->cluster_sectors - index_in_cluster; 413019d6b8fSAnthony Liguori if (n > nb_sectors) 414019d6b8fSAnthony Liguori n = nb_sectors; 415019d6b8fSAnthony Liguori *pnum = n; 4164bc74be9SPaolo Bonzini if (!cluster_offset) { 4174bc74be9SPaolo Bonzini return 0; 4184bc74be9SPaolo Bonzini } 4194bc74be9SPaolo Bonzini if ((cluster_offset & QCOW_OFLAG_COMPRESSED) || s->crypt_method) { 4204bc74be9SPaolo Bonzini return BDRV_BLOCK_DATA; 4214bc74be9SPaolo Bonzini } 4224bc74be9SPaolo Bonzini cluster_offset |= (index_in_cluster << BDRV_SECTOR_BITS); 4234bc74be9SPaolo Bonzini return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | cluster_offset; 424019d6b8fSAnthony Liguori } 425019d6b8fSAnthony Liguori 426019d6b8fSAnthony Liguori static int decompress_buffer(uint8_t *out_buf, int out_buf_size, 427019d6b8fSAnthony Liguori const uint8_t *buf, int buf_size) 428019d6b8fSAnthony Liguori { 429019d6b8fSAnthony Liguori z_stream strm1, *strm = &strm1; 430019d6b8fSAnthony Liguori int ret, out_len; 431019d6b8fSAnthony Liguori 432019d6b8fSAnthony Liguori memset(strm, 0, sizeof(*strm)); 433019d6b8fSAnthony Liguori 434019d6b8fSAnthony Liguori strm->next_in = (uint8_t *)buf; 435019d6b8fSAnthony Liguori strm->avail_in = buf_size; 436019d6b8fSAnthony Liguori strm->next_out = out_buf; 437019d6b8fSAnthony Liguori strm->avail_out = out_buf_size; 438019d6b8fSAnthony Liguori 439019d6b8fSAnthony Liguori ret = inflateInit2(strm, -12); 440019d6b8fSAnthony Liguori if (ret != Z_OK) 441019d6b8fSAnthony Liguori return -1; 442019d6b8fSAnthony Liguori ret = inflate(strm, Z_FINISH); 443019d6b8fSAnthony Liguori out_len = strm->next_out - out_buf; 444019d6b8fSAnthony Liguori if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) || 445019d6b8fSAnthony Liguori out_len != out_buf_size) { 446019d6b8fSAnthony Liguori inflateEnd(strm); 447019d6b8fSAnthony Liguori return -1; 448019d6b8fSAnthony Liguori } 449019d6b8fSAnthony Liguori inflateEnd(strm); 450019d6b8fSAnthony Liguori return 0; 451019d6b8fSAnthony Liguori } 452019d6b8fSAnthony Liguori 45366f82ceeSKevin Wolf static int decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset) 454019d6b8fSAnthony Liguori { 45566f82ceeSKevin Wolf BDRVQcowState *s = bs->opaque; 456019d6b8fSAnthony Liguori int ret, csize; 457019d6b8fSAnthony Liguori uint64_t coffset; 458019d6b8fSAnthony Liguori 459019d6b8fSAnthony Liguori coffset = cluster_offset & s->cluster_offset_mask; 460019d6b8fSAnthony Liguori if (s->cluster_cache_offset != coffset) { 461019d6b8fSAnthony Liguori csize = cluster_offset >> (63 - s->cluster_bits); 462019d6b8fSAnthony Liguori csize &= (s->cluster_size - 1); 46366f82ceeSKevin Wolf ret = bdrv_pread(bs->file, coffset, s->cluster_data, csize); 464019d6b8fSAnthony Liguori if (ret != csize) 465019d6b8fSAnthony Liguori return -1; 466019d6b8fSAnthony Liguori if (decompress_buffer(s->cluster_cache, s->cluster_size, 467019d6b8fSAnthony Liguori s->cluster_data, csize) < 0) { 468019d6b8fSAnthony Liguori return -1; 469019d6b8fSAnthony Liguori } 470019d6b8fSAnthony Liguori s->cluster_cache_offset = coffset; 471019d6b8fSAnthony Liguori } 472019d6b8fSAnthony Liguori return 0; 473019d6b8fSAnthony Liguori } 474019d6b8fSAnthony Liguori 475a968168cSDong Xu Wang static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num, 47627deebe8SFrediano Ziglio int nb_sectors, QEMUIOVector *qiov) 477ad53089bSChristoph Hellwig { 478019d6b8fSAnthony Liguori BDRVQcowState *s = bs->opaque; 479019d6b8fSAnthony Liguori int index_in_cluster; 48027deebe8SFrediano Ziglio int ret = 0, n; 48143ca85b5SFrediano Ziglio uint64_t cluster_offset; 482430bbaaaSFrediano Ziglio struct iovec hd_iov; 483430bbaaaSFrediano Ziglio QEMUIOVector hd_qiov; 48427deebe8SFrediano Ziglio uint8_t *buf; 48527deebe8SFrediano Ziglio void *orig_buf; 486019d6b8fSAnthony Liguori 48727deebe8SFrediano Ziglio if (qiov->niov > 1) { 48827deebe8SFrediano Ziglio buf = orig_buf = qemu_blockalign(bs, qiov->size); 48927deebe8SFrediano Ziglio } else { 49027deebe8SFrediano Ziglio orig_buf = NULL; 49127deebe8SFrediano Ziglio buf = (uint8_t *)qiov->iov->iov_base; 492019d6b8fSAnthony Liguori } 493019d6b8fSAnthony Liguori 49427deebe8SFrediano Ziglio qemu_co_mutex_lock(&s->lock); 49527deebe8SFrediano Ziglio 49627deebe8SFrediano Ziglio while (nb_sectors != 0) { 49743ca85b5SFrediano Ziglio /* prepare next request */ 49827deebe8SFrediano Ziglio cluster_offset = get_cluster_offset(bs, sector_num << 9, 499019d6b8fSAnthony Liguori 0, 0, 0, 0); 50027deebe8SFrediano Ziglio index_in_cluster = sector_num & (s->cluster_sectors - 1); 501430bbaaaSFrediano Ziglio n = s->cluster_sectors - index_in_cluster; 50227deebe8SFrediano Ziglio if (n > nb_sectors) { 50327deebe8SFrediano Ziglio n = nb_sectors; 504430bbaaaSFrediano Ziglio } 505019d6b8fSAnthony Liguori 506430bbaaaSFrediano Ziglio if (!cluster_offset) { 507019d6b8fSAnthony Liguori if (bs->backing_hd) { 508019d6b8fSAnthony Liguori /* read from the base image */ 50927deebe8SFrediano Ziglio hd_iov.iov_base = (void *)buf; 510430bbaaaSFrediano Ziglio hd_iov.iov_len = n * 512; 511430bbaaaSFrediano Ziglio qemu_iovec_init_external(&hd_qiov, &hd_iov, 1); 51252b8eb60SKevin Wolf qemu_co_mutex_unlock(&s->lock); 51327deebe8SFrediano Ziglio ret = bdrv_co_readv(bs->backing_hd, sector_num, 514430bbaaaSFrediano Ziglio n, &hd_qiov); 51552b8eb60SKevin Wolf qemu_co_mutex_lock(&s->lock); 51652b8eb60SKevin Wolf if (ret < 0) { 51727deebe8SFrediano Ziglio goto fail; 5185614c188SStefan Weil } 519019d6b8fSAnthony Liguori } else { 520019d6b8fSAnthony Liguori /* Note: in this case, no need to wait */ 52127deebe8SFrediano Ziglio memset(buf, 0, 512 * n); 522019d6b8fSAnthony Liguori } 523430bbaaaSFrediano Ziglio } else if (cluster_offset & QCOW_OFLAG_COMPRESSED) { 524019d6b8fSAnthony Liguori /* add AIO support for compressed blocks ? */ 525430bbaaaSFrediano Ziglio if (decompress_cluster(bs, cluster_offset) < 0) { 52627deebe8SFrediano Ziglio goto fail; 5275614c188SStefan Weil } 52827deebe8SFrediano Ziglio memcpy(buf, 529430bbaaaSFrediano Ziglio s->cluster_cache + index_in_cluster * 512, 512 * n); 530019d6b8fSAnthony Liguori } else { 531430bbaaaSFrediano Ziglio if ((cluster_offset & 511) != 0) { 53227deebe8SFrediano Ziglio goto fail; 533019d6b8fSAnthony Liguori } 53427deebe8SFrediano Ziglio hd_iov.iov_base = (void *)buf; 535430bbaaaSFrediano Ziglio hd_iov.iov_len = n * 512; 536430bbaaaSFrediano Ziglio qemu_iovec_init_external(&hd_qiov, &hd_iov, 1); 53752b8eb60SKevin Wolf qemu_co_mutex_unlock(&s->lock); 53852b8eb60SKevin Wolf ret = bdrv_co_readv(bs->file, 539430bbaaaSFrediano Ziglio (cluster_offset >> 9) + index_in_cluster, 540430bbaaaSFrediano Ziglio n, &hd_qiov); 54152b8eb60SKevin Wolf qemu_co_mutex_lock(&s->lock); 54252b8eb60SKevin Wolf if (ret < 0) { 54327deebe8SFrediano Ziglio break; 544019d6b8fSAnthony Liguori } 54543ca85b5SFrediano Ziglio if (s->crypt_method) { 54627deebe8SFrediano Ziglio encrypt_sectors(s, sector_num, buf, buf, 54743ca85b5SFrediano Ziglio n, 0, 54843ca85b5SFrediano Ziglio &s->aes_decrypt_key); 54943ca85b5SFrediano Ziglio } 55043ca85b5SFrediano Ziglio } 55127deebe8SFrediano Ziglio ret = 0; 55243ca85b5SFrediano Ziglio 55327deebe8SFrediano Ziglio nb_sectors -= n; 55427deebe8SFrediano Ziglio sector_num += n; 55527deebe8SFrediano Ziglio buf += n * 512; 55652b8eb60SKevin Wolf } 557019d6b8fSAnthony Liguori 55827deebe8SFrediano Ziglio done: 55952b8eb60SKevin Wolf qemu_co_mutex_unlock(&s->lock); 56052b8eb60SKevin Wolf 56127deebe8SFrediano Ziglio if (qiov->niov > 1) { 56203396148SMichael Tokarev qemu_iovec_from_buf(qiov, 0, orig_buf, qiov->size); 56327deebe8SFrediano Ziglio qemu_vfree(orig_buf); 564019d6b8fSAnthony Liguori } 56552b8eb60SKevin Wolf 56652b8eb60SKevin Wolf return ret; 56727deebe8SFrediano Ziglio 56827deebe8SFrediano Ziglio fail: 56927deebe8SFrediano Ziglio ret = -EIO; 57027deebe8SFrediano Ziglio goto done; 571019d6b8fSAnthony Liguori } 572019d6b8fSAnthony Liguori 573a968168cSDong Xu Wang static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num, 57427deebe8SFrediano Ziglio int nb_sectors, QEMUIOVector *qiov) 575019d6b8fSAnthony Liguori { 576019d6b8fSAnthony Liguori BDRVQcowState *s = bs->opaque; 577019d6b8fSAnthony Liguori int index_in_cluster; 578019d6b8fSAnthony Liguori uint64_t cluster_offset; 579019d6b8fSAnthony Liguori const uint8_t *src_buf; 58027deebe8SFrediano Ziglio int ret = 0, n; 581430bbaaaSFrediano Ziglio uint8_t *cluster_data = NULL; 582430bbaaaSFrediano Ziglio struct iovec hd_iov; 583430bbaaaSFrediano Ziglio QEMUIOVector hd_qiov; 58427deebe8SFrediano Ziglio uint8_t *buf; 58527deebe8SFrediano Ziglio void *orig_buf; 586019d6b8fSAnthony Liguori 58727deebe8SFrediano Ziglio s->cluster_cache_offset = -1; /* disable compressed cache */ 58827deebe8SFrediano Ziglio 58927deebe8SFrediano Ziglio if (qiov->niov > 1) { 59027deebe8SFrediano Ziglio buf = orig_buf = qemu_blockalign(bs, qiov->size); 591d5e6b161SMichael Tokarev qemu_iovec_to_buf(qiov, 0, buf, qiov->size); 59227deebe8SFrediano Ziglio } else { 59327deebe8SFrediano Ziglio orig_buf = NULL; 59427deebe8SFrediano Ziglio buf = (uint8_t *)qiov->iov->iov_base; 595019d6b8fSAnthony Liguori } 596019d6b8fSAnthony Liguori 59727deebe8SFrediano Ziglio qemu_co_mutex_lock(&s->lock); 59827deebe8SFrediano Ziglio 59927deebe8SFrediano Ziglio while (nb_sectors != 0) { 60027deebe8SFrediano Ziglio 60127deebe8SFrediano Ziglio index_in_cluster = sector_num & (s->cluster_sectors - 1); 602430bbaaaSFrediano Ziglio n = s->cluster_sectors - index_in_cluster; 60327deebe8SFrediano Ziglio if (n > nb_sectors) { 60427deebe8SFrediano Ziglio n = nb_sectors; 605430bbaaaSFrediano Ziglio } 60627deebe8SFrediano Ziglio cluster_offset = get_cluster_offset(bs, sector_num << 9, 1, 0, 607019d6b8fSAnthony Liguori index_in_cluster, 608430bbaaaSFrediano Ziglio index_in_cluster + n); 609019d6b8fSAnthony Liguori if (!cluster_offset || (cluster_offset & 511) != 0) { 61027deebe8SFrediano Ziglio ret = -EIO; 61127deebe8SFrediano Ziglio break; 612019d6b8fSAnthony Liguori } 613019d6b8fSAnthony Liguori if (s->crypt_method) { 614430bbaaaSFrediano Ziglio if (!cluster_data) { 615430bbaaaSFrediano Ziglio cluster_data = g_malloc0(s->cluster_size); 616019d6b8fSAnthony Liguori } 61727deebe8SFrediano Ziglio encrypt_sectors(s, sector_num, cluster_data, buf, 618430bbaaaSFrediano Ziglio n, 1, &s->aes_encrypt_key); 619430bbaaaSFrediano Ziglio src_buf = cluster_data; 620019d6b8fSAnthony Liguori } else { 62127deebe8SFrediano Ziglio src_buf = buf; 622019d6b8fSAnthony Liguori } 623019d6b8fSAnthony Liguori 624430bbaaaSFrediano Ziglio hd_iov.iov_base = (void *)src_buf; 625430bbaaaSFrediano Ziglio hd_iov.iov_len = n * 512; 626430bbaaaSFrediano Ziglio qemu_iovec_init_external(&hd_qiov, &hd_iov, 1); 62752b8eb60SKevin Wolf qemu_co_mutex_unlock(&s->lock); 62852b8eb60SKevin Wolf ret = bdrv_co_writev(bs->file, 629019d6b8fSAnthony Liguori (cluster_offset >> 9) + index_in_cluster, 630430bbaaaSFrediano Ziglio n, &hd_qiov); 63152b8eb60SKevin Wolf qemu_co_mutex_lock(&s->lock); 63252b8eb60SKevin Wolf if (ret < 0) { 63327deebe8SFrediano Ziglio break; 6345614c188SStefan Weil } 63527deebe8SFrediano Ziglio ret = 0; 63643ca85b5SFrediano Ziglio 63727deebe8SFrediano Ziglio nb_sectors -= n; 63827deebe8SFrediano Ziglio sector_num += n; 63927deebe8SFrediano Ziglio buf += n * 512; 640019d6b8fSAnthony Liguori } 64152b8eb60SKevin Wolf qemu_co_mutex_unlock(&s->lock); 642019d6b8fSAnthony Liguori 64327deebe8SFrediano Ziglio if (qiov->niov > 1) { 64427deebe8SFrediano Ziglio qemu_vfree(orig_buf); 645b11a24deSKevin Wolf } 646add8d262SStefan Weil g_free(cluster_data); 647b11a24deSKevin Wolf 64852b8eb60SKevin Wolf return ret; 649019d6b8fSAnthony Liguori } 650019d6b8fSAnthony Liguori 651019d6b8fSAnthony Liguori static void qcow_close(BlockDriverState *bs) 652019d6b8fSAnthony Liguori { 653019d6b8fSAnthony Liguori BDRVQcowState *s = bs->opaque; 654fd9f102cSKevin Wolf 6557267c094SAnthony Liguori g_free(s->l1_table); 6567267c094SAnthony Liguori g_free(s->l2_cache); 6577267c094SAnthony Liguori g_free(s->cluster_cache); 6587267c094SAnthony Liguori g_free(s->cluster_data); 659fd9f102cSKevin Wolf 660fd9f102cSKevin Wolf migrate_del_blocker(s->migration_blocker); 661fd9f102cSKevin Wolf error_free(s->migration_blocker); 662019d6b8fSAnthony Liguori } 663019d6b8fSAnthony Liguori 664d5124c00SMax Reitz static int qcow_create(const char *filename, QEMUOptionParameter *options, 665d5124c00SMax Reitz Error **errp) 666019d6b8fSAnthony Liguori { 6672b16c9ffSLi Zhi Hui int header_size, backing_filename_len, l1_size, shift, i; 668019d6b8fSAnthony Liguori QCowHeader header; 6692b16c9ffSLi Zhi Hui uint8_t *tmp; 6700e7e1989SKevin Wolf int64_t total_size = 0; 6710e7e1989SKevin Wolf const char *backing_file = NULL; 6720e7e1989SKevin Wolf int flags = 0; 67334b5d2c6SMax Reitz Error *local_err = NULL; 6743e1a8134SKirill A. Shutemov int ret; 6752b16c9ffSLi Zhi Hui BlockDriverState *qcow_bs; 6760e7e1989SKevin Wolf 6770e7e1989SKevin Wolf /* Read out options */ 6780e7e1989SKevin Wolf while (options && options->name) { 6790e7e1989SKevin Wolf if (!strcmp(options->name, BLOCK_OPT_SIZE)) { 6800e7e1989SKevin Wolf total_size = options->value.n / 512; 6810e7e1989SKevin Wolf } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) { 6820e7e1989SKevin Wolf backing_file = options->value.s; 6830e7e1989SKevin Wolf } else if (!strcmp(options->name, BLOCK_OPT_ENCRYPT)) { 6840e7e1989SKevin Wolf flags |= options->value.n ? BLOCK_FLAG_ENCRYPT : 0; 6850e7e1989SKevin Wolf } 6860e7e1989SKevin Wolf options++; 6870e7e1989SKevin Wolf } 688019d6b8fSAnthony Liguori 689cc84d90fSMax Reitz ret = bdrv_create_file(filename, options, &local_err); 6902b16c9ffSLi Zhi Hui if (ret < 0) { 691*b6d5066dSPaolo Bonzini error_propagate(errp, local_err); 6922b16c9ffSLi Zhi Hui return ret; 6932b16c9ffSLi Zhi Hui } 6942b16c9ffSLi Zhi Hui 6952e40134bSMax Reitz qcow_bs = NULL; 6962e40134bSMax Reitz ret = bdrv_open(&qcow_bs, filename, NULL, NULL, 6972e40134bSMax Reitz BDRV_O_RDWR | BDRV_O_PROTOCOL, NULL, &local_err); 6982b16c9ffSLi Zhi Hui if (ret < 0) { 699*b6d5066dSPaolo Bonzini error_propagate(errp, local_err); 7002b16c9ffSLi Zhi Hui return ret; 7012b16c9ffSLi Zhi Hui } 7022b16c9ffSLi Zhi Hui 7032b16c9ffSLi Zhi Hui ret = bdrv_truncate(qcow_bs, 0); 7042b16c9ffSLi Zhi Hui if (ret < 0) { 7052b16c9ffSLi Zhi Hui goto exit; 7062b16c9ffSLi Zhi Hui } 7072b16c9ffSLi Zhi Hui 708019d6b8fSAnthony Liguori memset(&header, 0, sizeof(header)); 709019d6b8fSAnthony Liguori header.magic = cpu_to_be32(QCOW_MAGIC); 710019d6b8fSAnthony Liguori header.version = cpu_to_be32(QCOW_VERSION); 711019d6b8fSAnthony Liguori header.size = cpu_to_be64(total_size * 512); 712019d6b8fSAnthony Liguori header_size = sizeof(header); 713019d6b8fSAnthony Liguori backing_filename_len = 0; 714019d6b8fSAnthony Liguori if (backing_file) { 715019d6b8fSAnthony Liguori if (strcmp(backing_file, "fat:")) { 716019d6b8fSAnthony Liguori header.backing_file_offset = cpu_to_be64(header_size); 717019d6b8fSAnthony Liguori backing_filename_len = strlen(backing_file); 718019d6b8fSAnthony Liguori header.backing_file_size = cpu_to_be32(backing_filename_len); 719019d6b8fSAnthony Liguori header_size += backing_filename_len; 720019d6b8fSAnthony Liguori } else { 721019d6b8fSAnthony Liguori /* special backing file for vvfat */ 722019d6b8fSAnthony Liguori backing_file = NULL; 723019d6b8fSAnthony Liguori } 724019d6b8fSAnthony Liguori header.cluster_bits = 9; /* 512 byte cluster to avoid copying 725019d6b8fSAnthony Liguori unmodifyed sectors */ 726019d6b8fSAnthony Liguori header.l2_bits = 12; /* 32 KB L2 tables */ 727019d6b8fSAnthony Liguori } else { 728019d6b8fSAnthony Liguori header.cluster_bits = 12; /* 4 KB clusters */ 729019d6b8fSAnthony Liguori header.l2_bits = 9; /* 4 KB L2 tables */ 730019d6b8fSAnthony Liguori } 731019d6b8fSAnthony Liguori header_size = (header_size + 7) & ~7; 732019d6b8fSAnthony Liguori shift = header.cluster_bits + header.l2_bits; 733019d6b8fSAnthony Liguori l1_size = ((total_size * 512) + (1LL << shift) - 1) >> shift; 734019d6b8fSAnthony Liguori 735019d6b8fSAnthony Liguori header.l1_table_offset = cpu_to_be64(header_size); 736019d6b8fSAnthony Liguori if (flags & BLOCK_FLAG_ENCRYPT) { 737019d6b8fSAnthony Liguori header.crypt_method = cpu_to_be32(QCOW_CRYPT_AES); 738019d6b8fSAnthony Liguori } else { 739019d6b8fSAnthony Liguori header.crypt_method = cpu_to_be32(QCOW_CRYPT_NONE); 740019d6b8fSAnthony Liguori } 741019d6b8fSAnthony Liguori 742019d6b8fSAnthony Liguori /* write all the data */ 7432b16c9ffSLi Zhi Hui ret = bdrv_pwrite(qcow_bs, 0, &header, sizeof(header)); 7443e1a8134SKirill A. Shutemov if (ret != sizeof(header)) { 7453e1a8134SKirill A. Shutemov goto exit; 7463e1a8134SKirill A. Shutemov } 7473e1a8134SKirill A. Shutemov 748019d6b8fSAnthony Liguori if (backing_file) { 7492b16c9ffSLi Zhi Hui ret = bdrv_pwrite(qcow_bs, sizeof(header), 7502b16c9ffSLi Zhi Hui backing_file, backing_filename_len); 7513e1a8134SKirill A. Shutemov if (ret != backing_filename_len) { 7523e1a8134SKirill A. Shutemov goto exit; 753019d6b8fSAnthony Liguori } 7543e1a8134SKirill A. Shutemov } 7553e1a8134SKirill A. Shutemov 7562b16c9ffSLi Zhi Hui tmp = g_malloc0(BDRV_SECTOR_SIZE); 7572b16c9ffSLi Zhi Hui for (i = 0; i < ((sizeof(uint64_t)*l1_size + BDRV_SECTOR_SIZE - 1)/ 7582b16c9ffSLi Zhi Hui BDRV_SECTOR_SIZE); i++) { 7592b16c9ffSLi Zhi Hui ret = bdrv_pwrite(qcow_bs, header_size + 7602b16c9ffSLi Zhi Hui BDRV_SECTOR_SIZE*i, tmp, BDRV_SECTOR_SIZE); 7612b16c9ffSLi Zhi Hui if (ret != BDRV_SECTOR_SIZE) { 7622b16c9ffSLi Zhi Hui g_free(tmp); 7632b16c9ffSLi Zhi Hui goto exit; 7642b16c9ffSLi Zhi Hui } 7652b16c9ffSLi Zhi Hui } 7662b16c9ffSLi Zhi Hui 7672b16c9ffSLi Zhi Hui g_free(tmp); 7683e1a8134SKirill A. Shutemov ret = 0; 7693e1a8134SKirill A. Shutemov exit: 7704f6fd349SFam Zheng bdrv_unref(qcow_bs); 7713e1a8134SKirill A. Shutemov return ret; 772019d6b8fSAnthony Liguori } 773019d6b8fSAnthony Liguori 774019d6b8fSAnthony Liguori static int qcow_make_empty(BlockDriverState *bs) 775019d6b8fSAnthony Liguori { 776019d6b8fSAnthony Liguori BDRVQcowState *s = bs->opaque; 777019d6b8fSAnthony Liguori uint32_t l1_length = s->l1_size * sizeof(uint64_t); 778019d6b8fSAnthony Liguori int ret; 779019d6b8fSAnthony Liguori 780019d6b8fSAnthony Liguori memset(s->l1_table, 0, l1_length); 7815e5557d9SKevin Wolf if (bdrv_pwrite_sync(bs->file, s->l1_table_offset, s->l1_table, 7825e5557d9SKevin Wolf l1_length) < 0) 783019d6b8fSAnthony Liguori return -1; 78466f82ceeSKevin Wolf ret = bdrv_truncate(bs->file, s->l1_table_offset + l1_length); 785019d6b8fSAnthony Liguori if (ret < 0) 786019d6b8fSAnthony Liguori return ret; 787019d6b8fSAnthony Liguori 788019d6b8fSAnthony Liguori memset(s->l2_cache, 0, s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t)); 789019d6b8fSAnthony Liguori memset(s->l2_cache_offsets, 0, L2_CACHE_SIZE * sizeof(uint64_t)); 790019d6b8fSAnthony Liguori memset(s->l2_cache_counts, 0, L2_CACHE_SIZE * sizeof(uint32_t)); 791019d6b8fSAnthony Liguori 792019d6b8fSAnthony Liguori return 0; 793019d6b8fSAnthony Liguori } 794019d6b8fSAnthony Liguori 795019d6b8fSAnthony Liguori /* XXX: put compressed sectors first, then all the cluster aligned 796019d6b8fSAnthony Liguori tables to avoid losing bytes in alignment */ 797019d6b8fSAnthony Liguori static int qcow_write_compressed(BlockDriverState *bs, int64_t sector_num, 798019d6b8fSAnthony Liguori const uint8_t *buf, int nb_sectors) 799019d6b8fSAnthony Liguori { 800019d6b8fSAnthony Liguori BDRVQcowState *s = bs->opaque; 801019d6b8fSAnthony Liguori z_stream strm; 802019d6b8fSAnthony Liguori int ret, out_len; 803019d6b8fSAnthony Liguori uint8_t *out_buf; 804019d6b8fSAnthony Liguori uint64_t cluster_offset; 805019d6b8fSAnthony Liguori 80616b3c5cdSStefan Hajnoczi if (nb_sectors != s->cluster_sectors) { 80716b3c5cdSStefan Hajnoczi ret = -EINVAL; 80816b3c5cdSStefan Hajnoczi 80916b3c5cdSStefan Hajnoczi /* Zero-pad last write if image size is not cluster aligned */ 81016b3c5cdSStefan Hajnoczi if (sector_num + nb_sectors == bs->total_sectors && 81116b3c5cdSStefan Hajnoczi nb_sectors < s->cluster_sectors) { 81216b3c5cdSStefan Hajnoczi uint8_t *pad_buf = qemu_blockalign(bs, s->cluster_size); 81316b3c5cdSStefan Hajnoczi memset(pad_buf, 0, s->cluster_size); 81416b3c5cdSStefan Hajnoczi memcpy(pad_buf, buf, nb_sectors * BDRV_SECTOR_SIZE); 81516b3c5cdSStefan Hajnoczi ret = qcow_write_compressed(bs, sector_num, 81616b3c5cdSStefan Hajnoczi pad_buf, s->cluster_sectors); 81716b3c5cdSStefan Hajnoczi qemu_vfree(pad_buf); 81816b3c5cdSStefan Hajnoczi } 81916b3c5cdSStefan Hajnoczi return ret; 82016b3c5cdSStefan Hajnoczi } 821019d6b8fSAnthony Liguori 8227267c094SAnthony Liguori out_buf = g_malloc(s->cluster_size + (s->cluster_size / 1000) + 128); 823019d6b8fSAnthony Liguori 824019d6b8fSAnthony Liguori /* best compression, small window, no zlib header */ 825019d6b8fSAnthony Liguori memset(&strm, 0, sizeof(strm)); 826019d6b8fSAnthony Liguori ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION, 827019d6b8fSAnthony Liguori Z_DEFLATED, -12, 828019d6b8fSAnthony Liguori 9, Z_DEFAULT_STRATEGY); 829019d6b8fSAnthony Liguori if (ret != 0) { 83064ebe71aSKevin Wolf ret = -EINVAL; 83164ebe71aSKevin Wolf goto fail; 832019d6b8fSAnthony Liguori } 833019d6b8fSAnthony Liguori 834019d6b8fSAnthony Liguori strm.avail_in = s->cluster_size; 835019d6b8fSAnthony Liguori strm.next_in = (uint8_t *)buf; 836019d6b8fSAnthony Liguori strm.avail_out = s->cluster_size; 837019d6b8fSAnthony Liguori strm.next_out = out_buf; 838019d6b8fSAnthony Liguori 839019d6b8fSAnthony Liguori ret = deflate(&strm, Z_FINISH); 840019d6b8fSAnthony Liguori if (ret != Z_STREAM_END && ret != Z_OK) { 841019d6b8fSAnthony Liguori deflateEnd(&strm); 84264ebe71aSKevin Wolf ret = -EINVAL; 84364ebe71aSKevin Wolf goto fail; 844019d6b8fSAnthony Liguori } 845019d6b8fSAnthony Liguori out_len = strm.next_out - out_buf; 846019d6b8fSAnthony Liguori 847019d6b8fSAnthony Liguori deflateEnd(&strm); 848019d6b8fSAnthony Liguori 849019d6b8fSAnthony Liguori if (ret != Z_STREAM_END || out_len >= s->cluster_size) { 850019d6b8fSAnthony Liguori /* could not compress: write normal cluster */ 85164ebe71aSKevin Wolf ret = bdrv_write(bs, sector_num, buf, s->cluster_sectors); 85264ebe71aSKevin Wolf if (ret < 0) { 85364ebe71aSKevin Wolf goto fail; 85464ebe71aSKevin Wolf } 855019d6b8fSAnthony Liguori } else { 856019d6b8fSAnthony Liguori cluster_offset = get_cluster_offset(bs, sector_num << 9, 2, 857019d6b8fSAnthony Liguori out_len, 0, 0); 85864ebe71aSKevin Wolf if (cluster_offset == 0) { 85964ebe71aSKevin Wolf ret = -EIO; 86064ebe71aSKevin Wolf goto fail; 86164ebe71aSKevin Wolf } 86264ebe71aSKevin Wolf 863019d6b8fSAnthony Liguori cluster_offset &= s->cluster_offset_mask; 86464ebe71aSKevin Wolf ret = bdrv_pwrite(bs->file, cluster_offset, out_buf, out_len); 86564ebe71aSKevin Wolf if (ret < 0) { 86664ebe71aSKevin Wolf goto fail; 867019d6b8fSAnthony Liguori } 868019d6b8fSAnthony Liguori } 869019d6b8fSAnthony Liguori 87064ebe71aSKevin Wolf ret = 0; 87164ebe71aSKevin Wolf fail: 8727267c094SAnthony Liguori g_free(out_buf); 87364ebe71aSKevin Wolf return ret; 874019d6b8fSAnthony Liguori } 875019d6b8fSAnthony Liguori 876019d6b8fSAnthony Liguori static int qcow_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) 877019d6b8fSAnthony Liguori { 878019d6b8fSAnthony Liguori BDRVQcowState *s = bs->opaque; 879019d6b8fSAnthony Liguori bdi->cluster_size = s->cluster_size; 880019d6b8fSAnthony Liguori return 0; 881019d6b8fSAnthony Liguori } 882019d6b8fSAnthony Liguori 8830e7e1989SKevin Wolf 8840e7e1989SKevin Wolf static QEMUOptionParameter qcow_create_options[] = { 885db08adf5SKevin Wolf { 886db08adf5SKevin Wolf .name = BLOCK_OPT_SIZE, 887db08adf5SKevin Wolf .type = OPT_SIZE, 888db08adf5SKevin Wolf .help = "Virtual disk size" 889db08adf5SKevin Wolf }, 890db08adf5SKevin Wolf { 891db08adf5SKevin Wolf .name = BLOCK_OPT_BACKING_FILE, 892db08adf5SKevin Wolf .type = OPT_STRING, 893db08adf5SKevin Wolf .help = "File name of a base image" 894db08adf5SKevin Wolf }, 895db08adf5SKevin Wolf { 896db08adf5SKevin Wolf .name = BLOCK_OPT_ENCRYPT, 897db08adf5SKevin Wolf .type = OPT_FLAG, 898db08adf5SKevin Wolf .help = "Encrypt the image" 899db08adf5SKevin Wolf }, 9000e7e1989SKevin Wolf { NULL } 9010e7e1989SKevin Wolf }; 9020e7e1989SKevin Wolf 903019d6b8fSAnthony Liguori static BlockDriver bdrv_qcow = { 904019d6b8fSAnthony Liguori .format_name = "qcow", 905019d6b8fSAnthony Liguori .instance_size = sizeof(BDRVQcowState), 906019d6b8fSAnthony Liguori .bdrv_probe = qcow_probe, 907019d6b8fSAnthony Liguori .bdrv_open = qcow_open, 908019d6b8fSAnthony Liguori .bdrv_close = qcow_close, 909d177692eSJeff Cody .bdrv_reopen_prepare = qcow_reopen_prepare, 910019d6b8fSAnthony Liguori .bdrv_create = qcow_create, 9113ac21627SPeter Lieven .bdrv_has_zero_init = bdrv_has_zero_init_1, 912c68b89acSKevin Wolf 91352b8eb60SKevin Wolf .bdrv_co_readv = qcow_co_readv, 91452b8eb60SKevin Wolf .bdrv_co_writev = qcow_co_writev, 915b6b8a333SPaolo Bonzini .bdrv_co_get_block_status = qcow_co_get_block_status, 916c68b89acSKevin Wolf 917c68b89acSKevin Wolf .bdrv_set_key = qcow_set_key, 918c68b89acSKevin Wolf .bdrv_make_empty = qcow_make_empty, 919019d6b8fSAnthony Liguori .bdrv_write_compressed = qcow_write_compressed, 920019d6b8fSAnthony Liguori .bdrv_get_info = qcow_get_info, 9210e7e1989SKevin Wolf 9220e7e1989SKevin Wolf .create_options = qcow_create_options, 923019d6b8fSAnthony Liguori }; 924019d6b8fSAnthony Liguori 925019d6b8fSAnthony Liguori static void bdrv_qcow_init(void) 926019d6b8fSAnthony Liguori { 927019d6b8fSAnthony Liguori bdrv_register(&bdrv_qcow); 928019d6b8fSAnthony Liguori } 929019d6b8fSAnthony Liguori 930019d6b8fSAnthony Liguori block_init(bdrv_qcow_init); 931