145aba42fSKevin Wolf /* 245aba42fSKevin Wolf * Block driver for the QCOW version 2 format 345aba42fSKevin Wolf * 445aba42fSKevin Wolf * Copyright (c) 2004-2006 Fabrice Bellard 545aba42fSKevin Wolf * 645aba42fSKevin Wolf * Permission is hereby granted, free of charge, to any person obtaining a copy 745aba42fSKevin Wolf * of this software and associated documentation files (the "Software"), to deal 845aba42fSKevin Wolf * in the Software without restriction, including without limitation the rights 945aba42fSKevin Wolf * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 1045aba42fSKevin Wolf * copies of the Software, and to permit persons to whom the Software is 1145aba42fSKevin Wolf * furnished to do so, subject to the following conditions: 1245aba42fSKevin Wolf * 1345aba42fSKevin Wolf * The above copyright notice and this permission notice shall be included in 1445aba42fSKevin Wolf * all copies or substantial portions of the Software. 1545aba42fSKevin Wolf * 1645aba42fSKevin Wolf * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1745aba42fSKevin Wolf * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1845aba42fSKevin Wolf * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1945aba42fSKevin Wolf * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 2045aba42fSKevin Wolf * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 2145aba42fSKevin Wolf * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 2245aba42fSKevin Wolf * THE SOFTWARE. 2345aba42fSKevin Wolf */ 2445aba42fSKevin Wolf 2580c71a24SPeter Maydell #include "qemu/osdep.h" 2645aba42fSKevin Wolf #include <zlib.h> 2745aba42fSKevin Wolf 28*da34e65cSMarkus Armbruster #include "qapi/error.h" 2945aba42fSKevin Wolf #include "qemu-common.h" 30737e150eSPaolo Bonzini #include "block/block_int.h" 3145aba42fSKevin Wolf #include "block/qcow2.h" 323cce16f4SKevin Wolf #include "trace.h" 3345aba42fSKevin Wolf 342cf7cfa1SKevin Wolf int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size, 352cf7cfa1SKevin Wolf bool exact_size) 3645aba42fSKevin Wolf { 37ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 382cf7cfa1SKevin Wolf int new_l1_size2, ret, i; 3945aba42fSKevin Wolf uint64_t *new_l1_table; 40fda74f82SMax Reitz int64_t old_l1_table_offset, old_l1_size; 412cf7cfa1SKevin Wolf int64_t new_l1_table_offset, new_l1_size; 4245aba42fSKevin Wolf uint8_t data[12]; 4345aba42fSKevin Wolf 4472893756SStefan Hajnoczi if (min_size <= s->l1_size) 4545aba42fSKevin Wolf return 0; 4672893756SStefan Hajnoczi 47b93f9950SMax Reitz /* Do a sanity check on min_size before trying to calculate new_l1_size 48b93f9950SMax Reitz * (this prevents overflows during the while loop for the calculation of 49b93f9950SMax Reitz * new_l1_size) */ 50b93f9950SMax Reitz if (min_size > INT_MAX / sizeof(uint64_t)) { 51b93f9950SMax Reitz return -EFBIG; 52b93f9950SMax Reitz } 53b93f9950SMax Reitz 5472893756SStefan Hajnoczi if (exact_size) { 5572893756SStefan Hajnoczi new_l1_size = min_size; 5672893756SStefan Hajnoczi } else { 5772893756SStefan Hajnoczi /* Bump size up to reduce the number of times we have to grow */ 5872893756SStefan Hajnoczi new_l1_size = s->l1_size; 59d191d12dSStefan Weil if (new_l1_size == 0) { 60d191d12dSStefan Weil new_l1_size = 1; 61d191d12dSStefan Weil } 6245aba42fSKevin Wolf while (min_size > new_l1_size) { 6345aba42fSKevin Wolf new_l1_size = (new_l1_size * 3 + 1) / 2; 6445aba42fSKevin Wolf } 6572893756SStefan Hajnoczi } 6672893756SStefan Hajnoczi 67cab60de9SKevin Wolf if (new_l1_size > INT_MAX / sizeof(uint64_t)) { 682cf7cfa1SKevin Wolf return -EFBIG; 692cf7cfa1SKevin Wolf } 702cf7cfa1SKevin Wolf 7145aba42fSKevin Wolf #ifdef DEBUG_ALLOC2 722cf7cfa1SKevin Wolf fprintf(stderr, "grow l1_table from %d to %" PRId64 "\n", 732cf7cfa1SKevin Wolf s->l1_size, new_l1_size); 7445aba42fSKevin Wolf #endif 7545aba42fSKevin Wolf 7645aba42fSKevin Wolf new_l1_size2 = sizeof(uint64_t) * new_l1_size; 779a4f4c31SKevin Wolf new_l1_table = qemu_try_blockalign(bs->file->bs, 78de82815dSKevin Wolf align_offset(new_l1_size2, 512)); 79de82815dSKevin Wolf if (new_l1_table == NULL) { 80de82815dSKevin Wolf return -ENOMEM; 81de82815dSKevin Wolf } 82de82815dSKevin Wolf memset(new_l1_table, 0, align_offset(new_l1_size2, 512)); 83de82815dSKevin Wolf 8445aba42fSKevin Wolf memcpy(new_l1_table, s->l1_table, s->l1_size * sizeof(uint64_t)); 8545aba42fSKevin Wolf 8645aba42fSKevin Wolf /* write new table (align to cluster) */ 8766f82ceeSKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ALLOC_TABLE); 88ed6ccf0fSKevin Wolf new_l1_table_offset = qcow2_alloc_clusters(bs, new_l1_size2); 895d757b56SKevin Wolf if (new_l1_table_offset < 0) { 90de82815dSKevin Wolf qemu_vfree(new_l1_table); 915d757b56SKevin Wolf return new_l1_table_offset; 925d757b56SKevin Wolf } 9329c1a730SKevin Wolf 9429c1a730SKevin Wolf ret = qcow2_cache_flush(bs, s->refcount_block_cache); 9529c1a730SKevin Wolf if (ret < 0) { 9680fa3341SKevin Wolf goto fail; 9729c1a730SKevin Wolf } 9845aba42fSKevin Wolf 99cf93980eSMax Reitz /* the L1 position has not yet been updated, so these clusters must 100cf93980eSMax Reitz * indeed be completely free */ 101231bb267SMax Reitz ret = qcow2_pre_write_overlap_check(bs, 0, new_l1_table_offset, 102231bb267SMax Reitz new_l1_size2); 103cf93980eSMax Reitz if (ret < 0) { 104cf93980eSMax Reitz goto fail; 105cf93980eSMax Reitz } 106cf93980eSMax Reitz 10766f82ceeSKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_WRITE_TABLE); 10845aba42fSKevin Wolf for(i = 0; i < s->l1_size; i++) 10945aba42fSKevin Wolf new_l1_table[i] = cpu_to_be64(new_l1_table[i]); 1109a4f4c31SKevin Wolf ret = bdrv_pwrite_sync(bs->file->bs, new_l1_table_offset, 1119a4f4c31SKevin Wolf new_l1_table, new_l1_size2); 1128b3b7206SKevin Wolf if (ret < 0) 11345aba42fSKevin Wolf goto fail; 11445aba42fSKevin Wolf for(i = 0; i < s->l1_size; i++) 11545aba42fSKevin Wolf new_l1_table[i] = be64_to_cpu(new_l1_table[i]); 11645aba42fSKevin Wolf 11745aba42fSKevin Wolf /* set new table */ 11866f82ceeSKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ACTIVATE_TABLE); 11945aba42fSKevin Wolf cpu_to_be32w((uint32_t*)data, new_l1_size); 120e4ef9f46SPeter Maydell stq_be_p(data + 4, new_l1_table_offset); 1219a4f4c31SKevin Wolf ret = bdrv_pwrite_sync(bs->file->bs, offsetof(QCowHeader, l1_size), 1229a4f4c31SKevin Wolf data, sizeof(data)); 1238b3b7206SKevin Wolf if (ret < 0) { 12445aba42fSKevin Wolf goto fail; 125fb8fa77cSKevin Wolf } 126de82815dSKevin Wolf qemu_vfree(s->l1_table); 127fda74f82SMax Reitz old_l1_table_offset = s->l1_table_offset; 12845aba42fSKevin Wolf s->l1_table_offset = new_l1_table_offset; 12945aba42fSKevin Wolf s->l1_table = new_l1_table; 130fda74f82SMax Reitz old_l1_size = s->l1_size; 13145aba42fSKevin Wolf s->l1_size = new_l1_size; 132fda74f82SMax Reitz qcow2_free_clusters(bs, old_l1_table_offset, old_l1_size * sizeof(uint64_t), 133fda74f82SMax Reitz QCOW2_DISCARD_OTHER); 13445aba42fSKevin Wolf return 0; 13545aba42fSKevin Wolf fail: 136de82815dSKevin Wolf qemu_vfree(new_l1_table); 1376cfcb9b8SKevin Wolf qcow2_free_clusters(bs, new_l1_table_offset, new_l1_size2, 1386cfcb9b8SKevin Wolf QCOW2_DISCARD_OTHER); 1398b3b7206SKevin Wolf return ret; 14045aba42fSKevin Wolf } 14145aba42fSKevin Wolf 14245aba42fSKevin Wolf /* 14345aba42fSKevin Wolf * l2_load 14445aba42fSKevin Wolf * 14545aba42fSKevin Wolf * Loads a L2 table into memory. If the table is in the cache, the cache 14645aba42fSKevin Wolf * is used; otherwise the L2 table is loaded from the image file. 14745aba42fSKevin Wolf * 14845aba42fSKevin Wolf * Returns a pointer to the L2 table on success, or NULL if the read from 14945aba42fSKevin Wolf * the image file failed. 15045aba42fSKevin Wolf */ 15145aba42fSKevin Wolf 15255c17e98SKevin Wolf static int l2_load(BlockDriverState *bs, uint64_t l2_offset, 15355c17e98SKevin Wolf uint64_t **l2_table) 15445aba42fSKevin Wolf { 155ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 15655c17e98SKevin Wolf int ret; 15745aba42fSKevin Wolf 15829c1a730SKevin Wolf ret = qcow2_cache_get(bs, s->l2_table_cache, l2_offset, (void**) l2_table); 15945aba42fSKevin Wolf 16055c17e98SKevin Wolf return ret; 16155c17e98SKevin Wolf } 16255c17e98SKevin Wolf 16345aba42fSKevin Wolf /* 1646583e3c7SKevin Wolf * Writes one sector of the L1 table to the disk (can't update single entries 1656583e3c7SKevin Wolf * and we really don't want bdrv_pread to perform a read-modify-write) 1666583e3c7SKevin Wolf */ 1676583e3c7SKevin Wolf #define L1_ENTRIES_PER_SECTOR (512 / 8) 168e23e400eSMax Reitz int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index) 1696583e3c7SKevin Wolf { 170ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 171a1391444SMax Reitz uint64_t buf[L1_ENTRIES_PER_SECTOR] = { 0 }; 1726583e3c7SKevin Wolf int l1_start_index; 173f7defcb6SKevin Wolf int i, ret; 1746583e3c7SKevin Wolf 1756583e3c7SKevin Wolf l1_start_index = l1_index & ~(L1_ENTRIES_PER_SECTOR - 1); 176a1391444SMax Reitz for (i = 0; i < L1_ENTRIES_PER_SECTOR && l1_start_index + i < s->l1_size; 177a1391444SMax Reitz i++) 178a1391444SMax Reitz { 1796583e3c7SKevin Wolf buf[i] = cpu_to_be64(s->l1_table[l1_start_index + i]); 1806583e3c7SKevin Wolf } 1816583e3c7SKevin Wolf 182231bb267SMax Reitz ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_ACTIVE_L1, 183cf93980eSMax Reitz s->l1_table_offset + 8 * l1_start_index, sizeof(buf)); 184cf93980eSMax Reitz if (ret < 0) { 185cf93980eSMax Reitz return ret; 186cf93980eSMax Reitz } 187cf93980eSMax Reitz 18866f82ceeSKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE); 1899a4f4c31SKevin Wolf ret = bdrv_pwrite_sync(bs->file->bs, 1909a4f4c31SKevin Wolf s->l1_table_offset + 8 * l1_start_index, 191f7defcb6SKevin Wolf buf, sizeof(buf)); 192f7defcb6SKevin Wolf if (ret < 0) { 193f7defcb6SKevin Wolf return ret; 1946583e3c7SKevin Wolf } 1956583e3c7SKevin Wolf 1966583e3c7SKevin Wolf return 0; 1976583e3c7SKevin Wolf } 1986583e3c7SKevin Wolf 1996583e3c7SKevin Wolf /* 20045aba42fSKevin Wolf * l2_allocate 20145aba42fSKevin Wolf * 20245aba42fSKevin Wolf * Allocate a new l2 entry in the file. If l1_index points to an already 20345aba42fSKevin Wolf * used entry in the L2 table (i.e. we are doing a copy on write for the L2 20445aba42fSKevin Wolf * table) copy the contents of the old L2 table into the newly allocated one. 20545aba42fSKevin Wolf * Otherwise the new table is initialized with zeros. 20645aba42fSKevin Wolf * 20745aba42fSKevin Wolf */ 20845aba42fSKevin Wolf 209c46e1167SKevin Wolf static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table) 21045aba42fSKevin Wolf { 211ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 2126583e3c7SKevin Wolf uint64_t old_l2_offset; 2138585afd8SMax Reitz uint64_t *l2_table = NULL; 214f4f0d391SKevin Wolf int64_t l2_offset; 215c46e1167SKevin Wolf int ret; 21645aba42fSKevin Wolf 21745aba42fSKevin Wolf old_l2_offset = s->l1_table[l1_index]; 21845aba42fSKevin Wolf 2193cce16f4SKevin Wolf trace_qcow2_l2_allocate(bs, l1_index); 2203cce16f4SKevin Wolf 22145aba42fSKevin Wolf /* allocate a new l2 entry */ 22245aba42fSKevin Wolf 223ed6ccf0fSKevin Wolf l2_offset = qcow2_alloc_clusters(bs, s->l2_size * sizeof(uint64_t)); 2245d757b56SKevin Wolf if (l2_offset < 0) { 225be0b742eSMax Reitz ret = l2_offset; 226be0b742eSMax Reitz goto fail; 2275d757b56SKevin Wolf } 22829c1a730SKevin Wolf 22929c1a730SKevin Wolf ret = qcow2_cache_flush(bs, s->refcount_block_cache); 23029c1a730SKevin Wolf if (ret < 0) { 23129c1a730SKevin Wolf goto fail; 23229c1a730SKevin Wolf } 23345aba42fSKevin Wolf 23445aba42fSKevin Wolf /* allocate a new entry in the l2 cache */ 23545aba42fSKevin Wolf 2363cce16f4SKevin Wolf trace_qcow2_l2_allocate_get_empty(bs, l1_index); 23729c1a730SKevin Wolf ret = qcow2_cache_get_empty(bs, s->l2_table_cache, l2_offset, (void**) table); 23829c1a730SKevin Wolf if (ret < 0) { 239be0b742eSMax Reitz goto fail; 24029c1a730SKevin Wolf } 24129c1a730SKevin Wolf 24229c1a730SKevin Wolf l2_table = *table; 24345aba42fSKevin Wolf 2448e37f681SKevin Wolf if ((old_l2_offset & L1E_OFFSET_MASK) == 0) { 24545aba42fSKevin Wolf /* if there was no old l2 table, clear the new table */ 24645aba42fSKevin Wolf memset(l2_table, 0, s->l2_size * sizeof(uint64_t)); 24745aba42fSKevin Wolf } else { 24829c1a730SKevin Wolf uint64_t* old_table; 24929c1a730SKevin Wolf 25045aba42fSKevin Wolf /* if there was an old l2 table, read it from the disk */ 25166f82ceeSKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_COW_READ); 2528e37f681SKevin Wolf ret = qcow2_cache_get(bs, s->l2_table_cache, 2538e37f681SKevin Wolf old_l2_offset & L1E_OFFSET_MASK, 25429c1a730SKevin Wolf (void**) &old_table); 25529c1a730SKevin Wolf if (ret < 0) { 25629c1a730SKevin Wolf goto fail; 25729c1a730SKevin Wolf } 25829c1a730SKevin Wolf 25929c1a730SKevin Wolf memcpy(l2_table, old_table, s->cluster_size); 26029c1a730SKevin Wolf 261a3f1afb4SAlberto Garcia qcow2_cache_put(bs, s->l2_table_cache, (void **) &old_table); 26245aba42fSKevin Wolf } 26329c1a730SKevin Wolf 26445aba42fSKevin Wolf /* write the l2 table to the file */ 26566f82ceeSKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_WRITE); 26629c1a730SKevin Wolf 2673cce16f4SKevin Wolf trace_qcow2_l2_allocate_write_l2(bs, l1_index); 26872e80b89SAlberto Garcia qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table); 26929c1a730SKevin Wolf ret = qcow2_cache_flush(bs, s->l2_table_cache); 270c46e1167SKevin Wolf if (ret < 0) { 271175e1152SKevin Wolf goto fail; 272175e1152SKevin Wolf } 273175e1152SKevin Wolf 274175e1152SKevin Wolf /* update the L1 entry */ 2753cce16f4SKevin Wolf trace_qcow2_l2_allocate_write_l1(bs, l1_index); 276175e1152SKevin Wolf s->l1_table[l1_index] = l2_offset | QCOW_OFLAG_COPIED; 277e23e400eSMax Reitz ret = qcow2_write_l1_entry(bs, l1_index); 278175e1152SKevin Wolf if (ret < 0) { 279175e1152SKevin Wolf goto fail; 280c46e1167SKevin Wolf } 28145aba42fSKevin Wolf 282c46e1167SKevin Wolf *table = l2_table; 2833cce16f4SKevin Wolf trace_qcow2_l2_allocate_done(bs, l1_index, 0); 284c46e1167SKevin Wolf return 0; 285175e1152SKevin Wolf 286175e1152SKevin Wolf fail: 2873cce16f4SKevin Wolf trace_qcow2_l2_allocate_done(bs, l1_index, ret); 2888585afd8SMax Reitz if (l2_table != NULL) { 28929c1a730SKevin Wolf qcow2_cache_put(bs, s->l2_table_cache, (void**) table); 2908585afd8SMax Reitz } 29168dba0bfSKevin Wolf s->l1_table[l1_index] = old_l2_offset; 292e3b21ef9SMax Reitz if (l2_offset > 0) { 293e3b21ef9SMax Reitz qcow2_free_clusters(bs, l2_offset, s->l2_size * sizeof(uint64_t), 294e3b21ef9SMax Reitz QCOW2_DISCARD_ALWAYS); 295e3b21ef9SMax Reitz } 296175e1152SKevin Wolf return ret; 29745aba42fSKevin Wolf } 29845aba42fSKevin Wolf 2992bfcc4a0SKevin Wolf /* 3002bfcc4a0SKevin Wolf * Checks how many clusters in a given L2 table are contiguous in the image 3012bfcc4a0SKevin Wolf * file. As soon as one of the flags in the bitmask stop_flags changes compared 3022bfcc4a0SKevin Wolf * to the first cluster, the search is stopped and the cluster is not counted 3032bfcc4a0SKevin Wolf * as contiguous. (This allows it, for example, to stop at the first compressed 3042bfcc4a0SKevin Wolf * cluster which may require a different handling) 3052bfcc4a0SKevin Wolf */ 306b6d36defSMax Reitz static int count_contiguous_clusters(int nb_clusters, int cluster_size, 30761653008SKevin Wolf uint64_t *l2_table, uint64_t stop_flags) 30845aba42fSKevin Wolf { 30945aba42fSKevin Wolf int i; 31078a52ad5SPeter Lieven uint64_t mask = stop_flags | L2E_OFFSET_MASK | QCOW_OFLAG_COMPRESSED; 31115684a47SMax Reitz uint64_t first_entry = be64_to_cpu(l2_table[0]); 31215684a47SMax Reitz uint64_t offset = first_entry & mask; 31345aba42fSKevin Wolf 31445aba42fSKevin Wolf if (!offset) 31545aba42fSKevin Wolf return 0; 31645aba42fSKevin Wolf 317a99dfb45SKevin Wolf assert(qcow2_get_cluster_type(first_entry) == QCOW2_CLUSTER_NORMAL); 31815684a47SMax Reitz 31961653008SKevin Wolf for (i = 0; i < nb_clusters; i++) { 3202bfcc4a0SKevin Wolf uint64_t l2_entry = be64_to_cpu(l2_table[i]) & mask; 3212bfcc4a0SKevin Wolf if (offset + (uint64_t) i * cluster_size != l2_entry) { 32245aba42fSKevin Wolf break; 3232bfcc4a0SKevin Wolf } 3242bfcc4a0SKevin Wolf } 32545aba42fSKevin Wolf 32661653008SKevin Wolf return i; 32745aba42fSKevin Wolf } 32845aba42fSKevin Wolf 329a99dfb45SKevin Wolf static int count_contiguous_clusters_by_type(int nb_clusters, 330a99dfb45SKevin Wolf uint64_t *l2_table, 331a99dfb45SKevin Wolf int wanted_type) 33245aba42fSKevin Wolf { 3332bfcc4a0SKevin Wolf int i; 33445aba42fSKevin Wolf 3352bfcc4a0SKevin Wolf for (i = 0; i < nb_clusters; i++) { 3362bfcc4a0SKevin Wolf int type = qcow2_get_cluster_type(be64_to_cpu(l2_table[i])); 3372bfcc4a0SKevin Wolf 338a99dfb45SKevin Wolf if (type != wanted_type) { 3392bfcc4a0SKevin Wolf break; 3402bfcc4a0SKevin Wolf } 3412bfcc4a0SKevin Wolf } 34245aba42fSKevin Wolf 34345aba42fSKevin Wolf return i; 34445aba42fSKevin Wolf } 34545aba42fSKevin Wolf 34645aba42fSKevin Wolf /* The crypt function is compatible with the linux cryptoloop 34745aba42fSKevin Wolf algorithm for < 4 GB images. NOTE: out_buf == in_buf is 34845aba42fSKevin Wolf supported */ 349ff99129aSKevin Wolf int qcow2_encrypt_sectors(BDRVQcow2State *s, int64_t sector_num, 35045aba42fSKevin Wolf uint8_t *out_buf, const uint8_t *in_buf, 351f6fa64f6SDaniel P. Berrange int nb_sectors, bool enc, 352f6fa64f6SDaniel P. Berrange Error **errp) 35345aba42fSKevin Wolf { 35445aba42fSKevin Wolf union { 35545aba42fSKevin Wolf uint64_t ll[2]; 35645aba42fSKevin Wolf uint8_t b[16]; 35745aba42fSKevin Wolf } ivec; 35845aba42fSKevin Wolf int i; 359f6fa64f6SDaniel P. Berrange int ret; 36045aba42fSKevin Wolf 36145aba42fSKevin Wolf for(i = 0; i < nb_sectors; i++) { 36245aba42fSKevin Wolf ivec.ll[0] = cpu_to_le64(sector_num); 36345aba42fSKevin Wolf ivec.ll[1] = 0; 364f6fa64f6SDaniel P. Berrange if (qcrypto_cipher_setiv(s->cipher, 365f6fa64f6SDaniel P. Berrange ivec.b, G_N_ELEMENTS(ivec.b), 366f6fa64f6SDaniel P. Berrange errp) < 0) { 367f6fa64f6SDaniel P. Berrange return -1; 368f6fa64f6SDaniel P. Berrange } 369f6fa64f6SDaniel P. Berrange if (enc) { 370f6fa64f6SDaniel P. Berrange ret = qcrypto_cipher_encrypt(s->cipher, 371f6fa64f6SDaniel P. Berrange in_buf, 372f6fa64f6SDaniel P. Berrange out_buf, 373f6fa64f6SDaniel P. Berrange 512, 374f6fa64f6SDaniel P. Berrange errp); 375f6fa64f6SDaniel P. Berrange } else { 376f6fa64f6SDaniel P. Berrange ret = qcrypto_cipher_decrypt(s->cipher, 377f6fa64f6SDaniel P. Berrange in_buf, 378f6fa64f6SDaniel P. Berrange out_buf, 379f6fa64f6SDaniel P. Berrange 512, 380f6fa64f6SDaniel P. Berrange errp); 381f6fa64f6SDaniel P. Berrange } 382f6fa64f6SDaniel P. Berrange if (ret < 0) { 383f6fa64f6SDaniel P. Berrange return -1; 384f6fa64f6SDaniel P. Berrange } 38545aba42fSKevin Wolf sector_num++; 38645aba42fSKevin Wolf in_buf += 512; 38745aba42fSKevin Wolf out_buf += 512; 38845aba42fSKevin Wolf } 389f6fa64f6SDaniel P. Berrange return 0; 39045aba42fSKevin Wolf } 39145aba42fSKevin Wolf 392aef4acb6SStefan Hajnoczi static int coroutine_fn copy_sectors(BlockDriverState *bs, 393aef4acb6SStefan Hajnoczi uint64_t start_sect, 394aef4acb6SStefan Hajnoczi uint64_t cluster_offset, 395aef4acb6SStefan Hajnoczi int n_start, int n_end) 39645aba42fSKevin Wolf { 397ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 398aef4acb6SStefan Hajnoczi QEMUIOVector qiov; 399aef4acb6SStefan Hajnoczi struct iovec iov; 40045aba42fSKevin Wolf int n, ret; 4011b9f1491SKevin Wolf 40245aba42fSKevin Wolf n = n_end - n_start; 4031b9f1491SKevin Wolf if (n <= 0) { 40445aba42fSKevin Wolf return 0; 4051b9f1491SKevin Wolf } 4061b9f1491SKevin Wolf 407aef4acb6SStefan Hajnoczi iov.iov_len = n * BDRV_SECTOR_SIZE; 408de82815dSKevin Wolf iov.iov_base = qemu_try_blockalign(bs, iov.iov_len); 409de82815dSKevin Wolf if (iov.iov_base == NULL) { 410de82815dSKevin Wolf return -ENOMEM; 411de82815dSKevin Wolf } 412aef4acb6SStefan Hajnoczi 413aef4acb6SStefan Hajnoczi qemu_iovec_init_external(&qiov, &iov, 1); 4141b9f1491SKevin Wolf 41566f82ceeSKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_COW_READ); 416aef4acb6SStefan Hajnoczi 417dba28555SMax Reitz if (!bs->drv) { 418bd604369SKevin Wolf ret = -ENOMEDIUM; 419bd604369SKevin Wolf goto out; 420dba28555SMax Reitz } 421dba28555SMax Reitz 422aef4acb6SStefan Hajnoczi /* Call .bdrv_co_readv() directly instead of using the public block-layer 423aef4acb6SStefan Hajnoczi * interface. This avoids double I/O throttling and request tracking, 424aef4acb6SStefan Hajnoczi * which can lead to deadlock when block layer copy-on-read is enabled. 425aef4acb6SStefan Hajnoczi */ 426aef4acb6SStefan Hajnoczi ret = bs->drv->bdrv_co_readv(bs, start_sect + n_start, n, &qiov); 4271b9f1491SKevin Wolf if (ret < 0) { 4281b9f1491SKevin Wolf goto out; 4291b9f1491SKevin Wolf } 4301b9f1491SKevin Wolf 4318336aafaSDaniel P. Berrange if (bs->encrypted) { 432f6fa64f6SDaniel P. Berrange Error *err = NULL; 433f6fa64f6SDaniel P. Berrange assert(s->cipher); 434f6fa64f6SDaniel P. Berrange if (qcow2_encrypt_sectors(s, start_sect + n_start, 435f6fa64f6SDaniel P. Berrange iov.iov_base, iov.iov_base, n, 436f6fa64f6SDaniel P. Berrange true, &err) < 0) { 437f6fa64f6SDaniel P. Berrange ret = -EIO; 438f6fa64f6SDaniel P. Berrange error_free(err); 439f6fa64f6SDaniel P. Berrange goto out; 440f6fa64f6SDaniel P. Berrange } 44145aba42fSKevin Wolf } 4421b9f1491SKevin Wolf 443231bb267SMax Reitz ret = qcow2_pre_write_overlap_check(bs, 0, 444cf93980eSMax Reitz cluster_offset + n_start * BDRV_SECTOR_SIZE, n * BDRV_SECTOR_SIZE); 445cf93980eSMax Reitz if (ret < 0) { 446cf93980eSMax Reitz goto out; 447cf93980eSMax Reitz } 448cf93980eSMax Reitz 44966f82ceeSKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_COW_WRITE); 4509a4f4c31SKevin Wolf ret = bdrv_co_writev(bs->file->bs, (cluster_offset >> 9) + n_start, n, 4519a4f4c31SKevin Wolf &qiov); 4521b9f1491SKevin Wolf if (ret < 0) { 4531b9f1491SKevin Wolf goto out; 4541b9f1491SKevin Wolf } 4551b9f1491SKevin Wolf 4561b9f1491SKevin Wolf ret = 0; 4571b9f1491SKevin Wolf out: 458aef4acb6SStefan Hajnoczi qemu_vfree(iov.iov_base); 45945aba42fSKevin Wolf return ret; 46045aba42fSKevin Wolf } 46145aba42fSKevin Wolf 46245aba42fSKevin Wolf 46345aba42fSKevin Wolf /* 46445aba42fSKevin Wolf * get_cluster_offset 46545aba42fSKevin Wolf * 4661c46efaaSKevin Wolf * For a given offset of the disk image, find the cluster offset in 4671c46efaaSKevin Wolf * qcow2 file. The offset is stored in *cluster_offset. 46845aba42fSKevin Wolf * 469d57237f2SDevin Nakamura * on entry, *num is the number of contiguous sectors we'd like to 47045aba42fSKevin Wolf * access following offset. 47145aba42fSKevin Wolf * 472d57237f2SDevin Nakamura * on exit, *num is the number of contiguous sectors we can read. 47345aba42fSKevin Wolf * 47468d000a3SKevin Wolf * Returns the cluster type (QCOW2_CLUSTER_*) on success, -errno in error 47568d000a3SKevin Wolf * cases. 47645aba42fSKevin Wolf */ 4771c46efaaSKevin Wolf int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, 4781c46efaaSKevin Wolf int *num, uint64_t *cluster_offset) 47945aba42fSKevin Wolf { 480ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 4812cf7cfa1SKevin Wolf unsigned int l2_index; 4822cf7cfa1SKevin Wolf uint64_t l1_index, l2_offset, *l2_table; 48345aba42fSKevin Wolf int l1_bits, c; 48480ee15a6SKevin Wolf unsigned int index_in_cluster, nb_clusters; 48580ee15a6SKevin Wolf uint64_t nb_available, nb_needed; 48655c17e98SKevin Wolf int ret; 48745aba42fSKevin Wolf 48845aba42fSKevin Wolf index_in_cluster = (offset >> 9) & (s->cluster_sectors - 1); 48945aba42fSKevin Wolf nb_needed = *num + index_in_cluster; 49045aba42fSKevin Wolf 49145aba42fSKevin Wolf l1_bits = s->l2_bits + s->cluster_bits; 49245aba42fSKevin Wolf 49345aba42fSKevin Wolf /* compute how many bytes there are between the offset and 49445aba42fSKevin Wolf * the end of the l1 entry 49545aba42fSKevin Wolf */ 49645aba42fSKevin Wolf 49780ee15a6SKevin Wolf nb_available = (1ULL << l1_bits) - (offset & ((1ULL << l1_bits) - 1)); 49845aba42fSKevin Wolf 49945aba42fSKevin Wolf /* compute the number of available sectors */ 50045aba42fSKevin Wolf 50145aba42fSKevin Wolf nb_available = (nb_available >> 9) + index_in_cluster; 50245aba42fSKevin Wolf 50345aba42fSKevin Wolf if (nb_needed > nb_available) { 50445aba42fSKevin Wolf nb_needed = nb_available; 50545aba42fSKevin Wolf } 506b6d36defSMax Reitz assert(nb_needed <= INT_MAX); 50745aba42fSKevin Wolf 5081c46efaaSKevin Wolf *cluster_offset = 0; 50945aba42fSKevin Wolf 510b6af0975SDaniel P. Berrange /* seek to the l2 offset in the l1 table */ 51145aba42fSKevin Wolf 51245aba42fSKevin Wolf l1_index = offset >> l1_bits; 51368d000a3SKevin Wolf if (l1_index >= s->l1_size) { 51468d000a3SKevin Wolf ret = QCOW2_CLUSTER_UNALLOCATED; 51545aba42fSKevin Wolf goto out; 51668d000a3SKevin Wolf } 51745aba42fSKevin Wolf 51868d000a3SKevin Wolf l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK; 51968d000a3SKevin Wolf if (!l2_offset) { 52068d000a3SKevin Wolf ret = QCOW2_CLUSTER_UNALLOCATED; 52145aba42fSKevin Wolf goto out; 52268d000a3SKevin Wolf } 52345aba42fSKevin Wolf 524a97c67eeSMax Reitz if (offset_into_cluster(s, l2_offset)) { 525a97c67eeSMax Reitz qcow2_signal_corruption(bs, true, -1, -1, "L2 table offset %#" PRIx64 526a97c67eeSMax Reitz " unaligned (L1 index: %#" PRIx64 ")", 527a97c67eeSMax Reitz l2_offset, l1_index); 528a97c67eeSMax Reitz return -EIO; 529a97c67eeSMax Reitz } 530a97c67eeSMax Reitz 53145aba42fSKevin Wolf /* load the l2 table in memory */ 53245aba42fSKevin Wolf 53355c17e98SKevin Wolf ret = l2_load(bs, l2_offset, &l2_table); 53455c17e98SKevin Wolf if (ret < 0) { 53555c17e98SKevin Wolf return ret; 5361c46efaaSKevin Wolf } 53745aba42fSKevin Wolf 53845aba42fSKevin Wolf /* find the cluster offset for the given disk offset */ 53945aba42fSKevin Wolf 54045aba42fSKevin Wolf l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1); 5411c46efaaSKevin Wolf *cluster_offset = be64_to_cpu(l2_table[l2_index]); 542b6d36defSMax Reitz 543b6d36defSMax Reitz /* nb_needed <= INT_MAX, thus nb_clusters <= INT_MAX, too */ 54445aba42fSKevin Wolf nb_clusters = size_to_clusters(s, nb_needed << 9); 54545aba42fSKevin Wolf 54668d000a3SKevin Wolf ret = qcow2_get_cluster_type(*cluster_offset); 54768d000a3SKevin Wolf switch (ret) { 54868d000a3SKevin Wolf case QCOW2_CLUSTER_COMPRESSED: 54968d000a3SKevin Wolf /* Compressed clusters can only be processed one by one */ 55068d000a3SKevin Wolf c = 1; 55168d000a3SKevin Wolf *cluster_offset &= L2E_COMPRESSED_OFFSET_SIZE_MASK; 55268d000a3SKevin Wolf break; 5536377af48SKevin Wolf case QCOW2_CLUSTER_ZERO: 554381b487dSPaolo Bonzini if (s->qcow_version < 3) { 555a97c67eeSMax Reitz qcow2_signal_corruption(bs, true, -1, -1, "Zero cluster entry found" 556a97c67eeSMax Reitz " in pre-v3 image (L2 offset: %#" PRIx64 557a97c67eeSMax Reitz ", L2 index: %#x)", l2_offset, l2_index); 558a97c67eeSMax Reitz ret = -EIO; 559a97c67eeSMax Reitz goto fail; 560381b487dSPaolo Bonzini } 561a99dfb45SKevin Wolf c = count_contiguous_clusters_by_type(nb_clusters, &l2_table[l2_index], 562a99dfb45SKevin Wolf QCOW2_CLUSTER_ZERO); 5636377af48SKevin Wolf *cluster_offset = 0; 5646377af48SKevin Wolf break; 56568d000a3SKevin Wolf case QCOW2_CLUSTER_UNALLOCATED: 56645aba42fSKevin Wolf /* how many empty clusters ? */ 567a99dfb45SKevin Wolf c = count_contiguous_clusters_by_type(nb_clusters, &l2_table[l2_index], 568a99dfb45SKevin Wolf QCOW2_CLUSTER_UNALLOCATED); 56968d000a3SKevin Wolf *cluster_offset = 0; 57068d000a3SKevin Wolf break; 57168d000a3SKevin Wolf case QCOW2_CLUSTER_NORMAL: 57245aba42fSKevin Wolf /* how many allocated clusters ? */ 57345aba42fSKevin Wolf c = count_contiguous_clusters(nb_clusters, s->cluster_size, 57461653008SKevin Wolf &l2_table[l2_index], QCOW_OFLAG_ZERO); 57568d000a3SKevin Wolf *cluster_offset &= L2E_OFFSET_MASK; 576a97c67eeSMax Reitz if (offset_into_cluster(s, *cluster_offset)) { 577a97c67eeSMax Reitz qcow2_signal_corruption(bs, true, -1, -1, "Data cluster offset %#" 578a97c67eeSMax Reitz PRIx64 " unaligned (L2 offset: %#" PRIx64 579a97c67eeSMax Reitz ", L2 index: %#x)", *cluster_offset, 580a97c67eeSMax Reitz l2_offset, l2_index); 581a97c67eeSMax Reitz ret = -EIO; 582a97c67eeSMax Reitz goto fail; 583a97c67eeSMax Reitz } 58468d000a3SKevin Wolf break; 5851417d7e4SKevin Wolf default: 5861417d7e4SKevin Wolf abort(); 58745aba42fSKevin Wolf } 58845aba42fSKevin Wolf 58929c1a730SKevin Wolf qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); 59029c1a730SKevin Wolf 59145aba42fSKevin Wolf nb_available = (c * s->cluster_sectors); 59268d000a3SKevin Wolf 59345aba42fSKevin Wolf out: 59445aba42fSKevin Wolf if (nb_available > nb_needed) 59545aba42fSKevin Wolf nb_available = nb_needed; 59645aba42fSKevin Wolf 59745aba42fSKevin Wolf *num = nb_available - index_in_cluster; 59845aba42fSKevin Wolf 59968d000a3SKevin Wolf return ret; 600a97c67eeSMax Reitz 601a97c67eeSMax Reitz fail: 602a97c67eeSMax Reitz qcow2_cache_put(bs, s->l2_table_cache, (void **)&l2_table); 603a97c67eeSMax Reitz return ret; 60445aba42fSKevin Wolf } 60545aba42fSKevin Wolf 60645aba42fSKevin Wolf /* 60745aba42fSKevin Wolf * get_cluster_table 60845aba42fSKevin Wolf * 60945aba42fSKevin Wolf * for a given disk offset, load (and allocate if needed) 61045aba42fSKevin Wolf * the l2 table. 61145aba42fSKevin Wolf * 61245aba42fSKevin Wolf * the l2 table offset in the qcow2 file and the cluster index 61345aba42fSKevin Wolf * in the l2 table are given to the caller. 61445aba42fSKevin Wolf * 6151e3e8f1aSKevin Wolf * Returns 0 on success, -errno in failure case 61645aba42fSKevin Wolf */ 61745aba42fSKevin Wolf static int get_cluster_table(BlockDriverState *bs, uint64_t offset, 61845aba42fSKevin Wolf uint64_t **new_l2_table, 61945aba42fSKevin Wolf int *new_l2_index) 62045aba42fSKevin Wolf { 621ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 6222cf7cfa1SKevin Wolf unsigned int l2_index; 6232cf7cfa1SKevin Wolf uint64_t l1_index, l2_offset; 624c46e1167SKevin Wolf uint64_t *l2_table = NULL; 62580ee15a6SKevin Wolf int ret; 62645aba42fSKevin Wolf 627b6af0975SDaniel P. Berrange /* seek to the l2 offset in the l1 table */ 62845aba42fSKevin Wolf 62945aba42fSKevin Wolf l1_index = offset >> (s->l2_bits + s->cluster_bits); 63045aba42fSKevin Wolf if (l1_index >= s->l1_size) { 63172893756SStefan Hajnoczi ret = qcow2_grow_l1_table(bs, l1_index + 1, false); 6321e3e8f1aSKevin Wolf if (ret < 0) { 6331e3e8f1aSKevin Wolf return ret; 6341e3e8f1aSKevin Wolf } 63545aba42fSKevin Wolf } 6368e37f681SKevin Wolf 6372cf7cfa1SKevin Wolf assert(l1_index < s->l1_size); 6388e37f681SKevin Wolf l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK; 639a97c67eeSMax Reitz if (offset_into_cluster(s, l2_offset)) { 640a97c67eeSMax Reitz qcow2_signal_corruption(bs, true, -1, -1, "L2 table offset %#" PRIx64 641a97c67eeSMax Reitz " unaligned (L1 index: %#" PRIx64 ")", 642a97c67eeSMax Reitz l2_offset, l1_index); 643a97c67eeSMax Reitz return -EIO; 644a97c67eeSMax Reitz } 64545aba42fSKevin Wolf 64645aba42fSKevin Wolf /* seek the l2 table of the given l2 offset */ 64745aba42fSKevin Wolf 6488e37f681SKevin Wolf if (s->l1_table[l1_index] & QCOW_OFLAG_COPIED) { 64945aba42fSKevin Wolf /* load the l2 table in memory */ 65055c17e98SKevin Wolf ret = l2_load(bs, l2_offset, &l2_table); 65155c17e98SKevin Wolf if (ret < 0) { 65255c17e98SKevin Wolf return ret; 6531e3e8f1aSKevin Wolf } 65445aba42fSKevin Wolf } else { 65516fde5f2SKevin Wolf /* First allocate a new L2 table (and do COW if needed) */ 656c46e1167SKevin Wolf ret = l2_allocate(bs, l1_index, &l2_table); 657c46e1167SKevin Wolf if (ret < 0) { 658c46e1167SKevin Wolf return ret; 6591e3e8f1aSKevin Wolf } 66016fde5f2SKevin Wolf 66116fde5f2SKevin Wolf /* Then decrease the refcount of the old table */ 66216fde5f2SKevin Wolf if (l2_offset) { 6636cfcb9b8SKevin Wolf qcow2_free_clusters(bs, l2_offset, s->l2_size * sizeof(uint64_t), 6646cfcb9b8SKevin Wolf QCOW2_DISCARD_OTHER); 66516fde5f2SKevin Wolf } 66645aba42fSKevin Wolf } 66745aba42fSKevin Wolf 66845aba42fSKevin Wolf /* find the cluster offset for the given disk offset */ 66945aba42fSKevin Wolf 67045aba42fSKevin Wolf l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1); 67145aba42fSKevin Wolf 67245aba42fSKevin Wolf *new_l2_table = l2_table; 67345aba42fSKevin Wolf *new_l2_index = l2_index; 67445aba42fSKevin Wolf 6751e3e8f1aSKevin Wolf return 0; 67645aba42fSKevin Wolf } 67745aba42fSKevin Wolf 67845aba42fSKevin Wolf /* 67945aba42fSKevin Wolf * alloc_compressed_cluster_offset 68045aba42fSKevin Wolf * 68145aba42fSKevin Wolf * For a given offset of the disk image, return cluster offset in 68245aba42fSKevin Wolf * qcow2 file. 68345aba42fSKevin Wolf * 68445aba42fSKevin Wolf * If the offset is not found, allocate a new compressed cluster. 68545aba42fSKevin Wolf * 68645aba42fSKevin Wolf * Return the cluster offset if successful, 68745aba42fSKevin Wolf * Return 0, otherwise. 68845aba42fSKevin Wolf * 68945aba42fSKevin Wolf */ 69045aba42fSKevin Wolf 691ed6ccf0fSKevin Wolf uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, 69245aba42fSKevin Wolf uint64_t offset, 69345aba42fSKevin Wolf int compressed_size) 69445aba42fSKevin Wolf { 695ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 69645aba42fSKevin Wolf int l2_index, ret; 6973948d1d4SKevin Wolf uint64_t *l2_table; 698f4f0d391SKevin Wolf int64_t cluster_offset; 69945aba42fSKevin Wolf int nb_csectors; 70045aba42fSKevin Wolf 7013948d1d4SKevin Wolf ret = get_cluster_table(bs, offset, &l2_table, &l2_index); 7021e3e8f1aSKevin Wolf if (ret < 0) { 70345aba42fSKevin Wolf return 0; 7041e3e8f1aSKevin Wolf } 70545aba42fSKevin Wolf 706b0b6862eSKevin Wolf /* Compression can't overwrite anything. Fail if the cluster was already 707b0b6862eSKevin Wolf * allocated. */ 70845aba42fSKevin Wolf cluster_offset = be64_to_cpu(l2_table[l2_index]); 709b0b6862eSKevin Wolf if (cluster_offset & L2E_OFFSET_MASK) { 7108f1efd00SKevin Wolf qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); 7118f1efd00SKevin Wolf return 0; 7128f1efd00SKevin Wolf } 71345aba42fSKevin Wolf 714ed6ccf0fSKevin Wolf cluster_offset = qcow2_alloc_bytes(bs, compressed_size); 7155d757b56SKevin Wolf if (cluster_offset < 0) { 71629c1a730SKevin Wolf qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); 7175d757b56SKevin Wolf return 0; 7185d757b56SKevin Wolf } 7195d757b56SKevin Wolf 72045aba42fSKevin Wolf nb_csectors = ((cluster_offset + compressed_size - 1) >> 9) - 72145aba42fSKevin Wolf (cluster_offset >> 9); 72245aba42fSKevin Wolf 72345aba42fSKevin Wolf cluster_offset |= QCOW_OFLAG_COMPRESSED | 72445aba42fSKevin Wolf ((uint64_t)nb_csectors << s->csize_shift); 72545aba42fSKevin Wolf 72645aba42fSKevin Wolf /* update L2 table */ 72745aba42fSKevin Wolf 72845aba42fSKevin Wolf /* compressed clusters never have the copied flag */ 72945aba42fSKevin Wolf 73066f82ceeSKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE_COMPRESSED); 73172e80b89SAlberto Garcia qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table); 73245aba42fSKevin Wolf l2_table[l2_index] = cpu_to_be64(cluster_offset); 733a3f1afb4SAlberto Garcia qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table); 73445aba42fSKevin Wolf 73545aba42fSKevin Wolf return cluster_offset; 73645aba42fSKevin Wolf } 73745aba42fSKevin Wolf 738593fb83cSKevin Wolf static int perform_cow(BlockDriverState *bs, QCowL2Meta *m, Qcow2COWRegion *r) 739593fb83cSKevin Wolf { 740ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 741593fb83cSKevin Wolf int ret; 742593fb83cSKevin Wolf 743593fb83cSKevin Wolf if (r->nb_sectors == 0) { 744593fb83cSKevin Wolf return 0; 745593fb83cSKevin Wolf } 746593fb83cSKevin Wolf 747593fb83cSKevin Wolf qemu_co_mutex_unlock(&s->lock); 748593fb83cSKevin Wolf ret = copy_sectors(bs, m->offset / BDRV_SECTOR_SIZE, m->alloc_offset, 749593fb83cSKevin Wolf r->offset / BDRV_SECTOR_SIZE, 750593fb83cSKevin Wolf r->offset / BDRV_SECTOR_SIZE + r->nb_sectors); 751593fb83cSKevin Wolf qemu_co_mutex_lock(&s->lock); 752593fb83cSKevin Wolf 753593fb83cSKevin Wolf if (ret < 0) { 754593fb83cSKevin Wolf return ret; 755593fb83cSKevin Wolf } 756593fb83cSKevin Wolf 757593fb83cSKevin Wolf /* 758593fb83cSKevin Wolf * Before we update the L2 table to actually point to the new cluster, we 759593fb83cSKevin Wolf * need to be sure that the refcounts have been increased and COW was 760593fb83cSKevin Wolf * handled. 761593fb83cSKevin Wolf */ 762593fb83cSKevin Wolf qcow2_cache_depends_on_flush(s->l2_table_cache); 763593fb83cSKevin Wolf 764593fb83cSKevin Wolf return 0; 765593fb83cSKevin Wolf } 766593fb83cSKevin Wolf 767148da7eaSKevin Wolf int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) 76845aba42fSKevin Wolf { 769ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 77045aba42fSKevin Wolf int i, j = 0, l2_index, ret; 771593fb83cSKevin Wolf uint64_t *old_cluster, *l2_table; 772250196f1SKevin Wolf uint64_t cluster_offset = m->alloc_offset; 77345aba42fSKevin Wolf 7743cce16f4SKevin Wolf trace_qcow2_cluster_link_l2(qemu_coroutine_self(), m->nb_clusters); 775f50f88b9SKevin Wolf assert(m->nb_clusters > 0); 77645aba42fSKevin Wolf 7775839e53bSMarkus Armbruster old_cluster = g_try_new(uint64_t, m->nb_clusters); 778de82815dSKevin Wolf if (old_cluster == NULL) { 779de82815dSKevin Wolf ret = -ENOMEM; 780de82815dSKevin Wolf goto err; 781de82815dSKevin Wolf } 78245aba42fSKevin Wolf 78345aba42fSKevin Wolf /* copy content of unmodified sectors */ 784593fb83cSKevin Wolf ret = perform_cow(bs, m, &m->cow_start); 785593fb83cSKevin Wolf if (ret < 0) { 78645aba42fSKevin Wolf goto err; 78745aba42fSKevin Wolf } 78845aba42fSKevin Wolf 789593fb83cSKevin Wolf ret = perform_cow(bs, m, &m->cow_end); 790593fb83cSKevin Wolf if (ret < 0) { 79145aba42fSKevin Wolf goto err; 79245aba42fSKevin Wolf } 79345aba42fSKevin Wolf 794593fb83cSKevin Wolf /* Update L2 table. */ 79574c4510aSKevin Wolf if (s->use_lazy_refcounts) { 796280d3735SKevin Wolf qcow2_mark_dirty(bs); 797280d3735SKevin Wolf } 798bfe8043eSStefan Hajnoczi if (qcow2_need_accurate_refcounts(s)) { 799bfe8043eSStefan Hajnoczi qcow2_cache_set_dependency(bs, s->l2_table_cache, 800bfe8043eSStefan Hajnoczi s->refcount_block_cache); 801bfe8043eSStefan Hajnoczi } 802280d3735SKevin Wolf 8033948d1d4SKevin Wolf ret = get_cluster_table(bs, m->offset, &l2_table, &l2_index); 8041e3e8f1aSKevin Wolf if (ret < 0) { 80545aba42fSKevin Wolf goto err; 8061e3e8f1aSKevin Wolf } 80772e80b89SAlberto Garcia qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table); 80845aba42fSKevin Wolf 809c01dbccbSMax Reitz assert(l2_index + m->nb_clusters <= s->l2_size); 81045aba42fSKevin Wolf for (i = 0; i < m->nb_clusters; i++) { 81145aba42fSKevin Wolf /* if two concurrent writes happen to the same unallocated cluster 81245aba42fSKevin Wolf * each write allocates separate cluster and writes data concurrently. 81345aba42fSKevin Wolf * The first one to complete updates l2 table with pointer to its 81445aba42fSKevin Wolf * cluster the second one has to do RMW (which is done above by 81545aba42fSKevin Wolf * copy_sectors()), update l2 table with its cluster pointer and free 81645aba42fSKevin Wolf * old cluster. This is what this loop does */ 81745aba42fSKevin Wolf if(l2_table[l2_index + i] != 0) 81845aba42fSKevin Wolf old_cluster[j++] = l2_table[l2_index + i]; 81945aba42fSKevin Wolf 82045aba42fSKevin Wolf l2_table[l2_index + i] = cpu_to_be64((cluster_offset + 82145aba42fSKevin Wolf (i << s->cluster_bits)) | QCOW_OFLAG_COPIED); 82245aba42fSKevin Wolf } 82345aba42fSKevin Wolf 8249f8e668eSKevin Wolf 825a3f1afb4SAlberto Garcia qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table); 82645aba42fSKevin Wolf 8277ec5e6a4SKevin Wolf /* 8287ec5e6a4SKevin Wolf * If this was a COW, we need to decrease the refcount of the old cluster. 8296cfcb9b8SKevin Wolf * 8306cfcb9b8SKevin Wolf * Don't discard clusters that reach a refcount of 0 (e.g. compressed 8316cfcb9b8SKevin Wolf * clusters), the next write will reuse them anyway. 8327ec5e6a4SKevin Wolf */ 8337ec5e6a4SKevin Wolf if (j != 0) { 8347ec5e6a4SKevin Wolf for (i = 0; i < j; i++) { 8356cfcb9b8SKevin Wolf qcow2_free_any_clusters(bs, be64_to_cpu(old_cluster[i]), 1, 8366cfcb9b8SKevin Wolf QCOW2_DISCARD_NEVER); 8377ec5e6a4SKevin Wolf } 8387ec5e6a4SKevin Wolf } 83945aba42fSKevin Wolf 84045aba42fSKevin Wolf ret = 0; 84145aba42fSKevin Wolf err: 8427267c094SAnthony Liguori g_free(old_cluster); 84345aba42fSKevin Wolf return ret; 84445aba42fSKevin Wolf } 84545aba42fSKevin Wolf 84645aba42fSKevin Wolf /* 847bf319eceSKevin Wolf * Returns the number of contiguous clusters that can be used for an allocating 848bf319eceSKevin Wolf * write, but require COW to be performed (this includes yet unallocated space, 849bf319eceSKevin Wolf * which must copy from the backing file) 850bf319eceSKevin Wolf */ 851ff99129aSKevin Wolf static int count_cow_clusters(BDRVQcow2State *s, int nb_clusters, 852bf319eceSKevin Wolf uint64_t *l2_table, int l2_index) 853bf319eceSKevin Wolf { 854143550a8SKevin Wolf int i; 855bf319eceSKevin Wolf 856143550a8SKevin Wolf for (i = 0; i < nb_clusters; i++) { 857143550a8SKevin Wolf uint64_t l2_entry = be64_to_cpu(l2_table[l2_index + i]); 858143550a8SKevin Wolf int cluster_type = qcow2_get_cluster_type(l2_entry); 859143550a8SKevin Wolf 860143550a8SKevin Wolf switch(cluster_type) { 861143550a8SKevin Wolf case QCOW2_CLUSTER_NORMAL: 862143550a8SKevin Wolf if (l2_entry & QCOW_OFLAG_COPIED) { 863143550a8SKevin Wolf goto out; 864143550a8SKevin Wolf } 865bf319eceSKevin Wolf break; 866143550a8SKevin Wolf case QCOW2_CLUSTER_UNALLOCATED: 867143550a8SKevin Wolf case QCOW2_CLUSTER_COMPRESSED: 8686377af48SKevin Wolf case QCOW2_CLUSTER_ZERO: 869143550a8SKevin Wolf break; 870143550a8SKevin Wolf default: 871143550a8SKevin Wolf abort(); 872143550a8SKevin Wolf } 873bf319eceSKevin Wolf } 874bf319eceSKevin Wolf 875143550a8SKevin Wolf out: 876bf319eceSKevin Wolf assert(i <= nb_clusters); 877bf319eceSKevin Wolf return i; 878bf319eceSKevin Wolf } 879bf319eceSKevin Wolf 880bf319eceSKevin Wolf /* 881250196f1SKevin Wolf * Check if there already is an AIO write request in flight which allocates 882250196f1SKevin Wolf * the same cluster. In this case we need to wait until the previous 883250196f1SKevin Wolf * request has completed and updated the L2 table accordingly. 88465eb2e35SKevin Wolf * 88565eb2e35SKevin Wolf * Returns: 88665eb2e35SKevin Wolf * 0 if there was no dependency. *cur_bytes indicates the number of 88765eb2e35SKevin Wolf * bytes from guest_offset that can be read before the next 88865eb2e35SKevin Wolf * dependency must be processed (or the request is complete) 88965eb2e35SKevin Wolf * 89065eb2e35SKevin Wolf * -EAGAIN if we had to wait for another request, previously gathered 89165eb2e35SKevin Wolf * information on cluster allocation may be invalid now. The caller 89265eb2e35SKevin Wolf * must start over anyway, so consider *cur_bytes undefined. 893250196f1SKevin Wolf */ 894226c3c26SKevin Wolf static int handle_dependencies(BlockDriverState *bs, uint64_t guest_offset, 895ecdd5333SKevin Wolf uint64_t *cur_bytes, QCowL2Meta **m) 896226c3c26SKevin Wolf { 897ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 898226c3c26SKevin Wolf QCowL2Meta *old_alloc; 89965eb2e35SKevin Wolf uint64_t bytes = *cur_bytes; 900226c3c26SKevin Wolf 901250196f1SKevin Wolf QLIST_FOREACH(old_alloc, &s->cluster_allocs, next_in_flight) { 902250196f1SKevin Wolf 90365eb2e35SKevin Wolf uint64_t start = guest_offset; 90465eb2e35SKevin Wolf uint64_t end = start + bytes; 90565eb2e35SKevin Wolf uint64_t old_start = l2meta_cow_start(old_alloc); 90665eb2e35SKevin Wolf uint64_t old_end = l2meta_cow_end(old_alloc); 907250196f1SKevin Wolf 908d9d74f41SKevin Wolf if (end <= old_start || start >= old_end) { 909250196f1SKevin Wolf /* No intersection */ 910250196f1SKevin Wolf } else { 911250196f1SKevin Wolf if (start < old_start) { 912250196f1SKevin Wolf /* Stop at the start of a running allocation */ 91365eb2e35SKevin Wolf bytes = old_start - start; 914250196f1SKevin Wolf } else { 91565eb2e35SKevin Wolf bytes = 0; 916250196f1SKevin Wolf } 917250196f1SKevin Wolf 918ecdd5333SKevin Wolf /* Stop if already an l2meta exists. After yielding, it wouldn't 919ecdd5333SKevin Wolf * be valid any more, so we'd have to clean up the old L2Metas 920ecdd5333SKevin Wolf * and deal with requests depending on them before starting to 921ecdd5333SKevin Wolf * gather new ones. Not worth the trouble. */ 922ecdd5333SKevin Wolf if (bytes == 0 && *m) { 923ecdd5333SKevin Wolf *cur_bytes = 0; 924ecdd5333SKevin Wolf return 0; 925ecdd5333SKevin Wolf } 926ecdd5333SKevin Wolf 92765eb2e35SKevin Wolf if (bytes == 0) { 928250196f1SKevin Wolf /* Wait for the dependency to complete. We need to recheck 929250196f1SKevin Wolf * the free/allocated clusters when we continue. */ 930250196f1SKevin Wolf qemu_co_mutex_unlock(&s->lock); 931250196f1SKevin Wolf qemu_co_queue_wait(&old_alloc->dependent_requests); 932250196f1SKevin Wolf qemu_co_mutex_lock(&s->lock); 933250196f1SKevin Wolf return -EAGAIN; 934250196f1SKevin Wolf } 935250196f1SKevin Wolf } 936250196f1SKevin Wolf } 937250196f1SKevin Wolf 93865eb2e35SKevin Wolf /* Make sure that existing clusters and new allocations are only used up to 93965eb2e35SKevin Wolf * the next dependency if we shortened the request above */ 94065eb2e35SKevin Wolf *cur_bytes = bytes; 941250196f1SKevin Wolf 942226c3c26SKevin Wolf return 0; 943226c3c26SKevin Wolf } 944226c3c26SKevin Wolf 945226c3c26SKevin Wolf /* 9460af729ecSKevin Wolf * Checks how many already allocated clusters that don't require a copy on 9470af729ecSKevin Wolf * write there are at the given guest_offset (up to *bytes). If 9480af729ecSKevin Wolf * *host_offset is not zero, only physically contiguous clusters beginning at 9490af729ecSKevin Wolf * this host offset are counted. 9500af729ecSKevin Wolf * 951411d62b0SKevin Wolf * Note that guest_offset may not be cluster aligned. In this case, the 952411d62b0SKevin Wolf * returned *host_offset points to exact byte referenced by guest_offset and 953411d62b0SKevin Wolf * therefore isn't cluster aligned as well. 9540af729ecSKevin Wolf * 9550af729ecSKevin Wolf * Returns: 9560af729ecSKevin Wolf * 0: if no allocated clusters are available at the given offset. 9570af729ecSKevin Wolf * *bytes is normally unchanged. It is set to 0 if the cluster 9580af729ecSKevin Wolf * is allocated and doesn't need COW, but doesn't have the right 9590af729ecSKevin Wolf * physical offset. 9600af729ecSKevin Wolf * 9610af729ecSKevin Wolf * 1: if allocated clusters that don't require a COW are available at 9620af729ecSKevin Wolf * the requested offset. *bytes may have decreased and describes 9630af729ecSKevin Wolf * the length of the area that can be written to. 9640af729ecSKevin Wolf * 9650af729ecSKevin Wolf * -errno: in error cases 9660af729ecSKevin Wolf */ 9670af729ecSKevin Wolf static int handle_copied(BlockDriverState *bs, uint64_t guest_offset, 968c53ede9fSKevin Wolf uint64_t *host_offset, uint64_t *bytes, QCowL2Meta **m) 9690af729ecSKevin Wolf { 970ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 9710af729ecSKevin Wolf int l2_index; 9720af729ecSKevin Wolf uint64_t cluster_offset; 9730af729ecSKevin Wolf uint64_t *l2_table; 974b6d36defSMax Reitz uint64_t nb_clusters; 975c53ede9fSKevin Wolf unsigned int keep_clusters; 976a3f1afb4SAlberto Garcia int ret; 9770af729ecSKevin Wolf 9780af729ecSKevin Wolf trace_qcow2_handle_copied(qemu_coroutine_self(), guest_offset, *host_offset, 9790af729ecSKevin Wolf *bytes); 9800af729ecSKevin Wolf 981411d62b0SKevin Wolf assert(*host_offset == 0 || offset_into_cluster(s, guest_offset) 982411d62b0SKevin Wolf == offset_into_cluster(s, *host_offset)); 983411d62b0SKevin Wolf 984acb0467fSKevin Wolf /* 985acb0467fSKevin Wolf * Calculate the number of clusters to look for. We stop at L2 table 986acb0467fSKevin Wolf * boundaries to keep things simple. 987acb0467fSKevin Wolf */ 988acb0467fSKevin Wolf nb_clusters = 989acb0467fSKevin Wolf size_to_clusters(s, offset_into_cluster(s, guest_offset) + *bytes); 990acb0467fSKevin Wolf 991acb0467fSKevin Wolf l2_index = offset_to_l2_index(s, guest_offset); 992acb0467fSKevin Wolf nb_clusters = MIN(nb_clusters, s->l2_size - l2_index); 993b6d36defSMax Reitz assert(nb_clusters <= INT_MAX); 994acb0467fSKevin Wolf 9950af729ecSKevin Wolf /* Find L2 entry for the first involved cluster */ 9960af729ecSKevin Wolf ret = get_cluster_table(bs, guest_offset, &l2_table, &l2_index); 9970af729ecSKevin Wolf if (ret < 0) { 9980af729ecSKevin Wolf return ret; 9990af729ecSKevin Wolf } 10000af729ecSKevin Wolf 10010af729ecSKevin Wolf cluster_offset = be64_to_cpu(l2_table[l2_index]); 10020af729ecSKevin Wolf 10030af729ecSKevin Wolf /* Check how many clusters are already allocated and don't need COW */ 10040af729ecSKevin Wolf if (qcow2_get_cluster_type(cluster_offset) == QCOW2_CLUSTER_NORMAL 10050af729ecSKevin Wolf && (cluster_offset & QCOW_OFLAG_COPIED)) 10060af729ecSKevin Wolf { 1007e62daaf6SKevin Wolf /* If a specific host_offset is required, check it */ 1008e62daaf6SKevin Wolf bool offset_matches = 1009e62daaf6SKevin Wolf (cluster_offset & L2E_OFFSET_MASK) == *host_offset; 1010e62daaf6SKevin Wolf 1011a97c67eeSMax Reitz if (offset_into_cluster(s, cluster_offset & L2E_OFFSET_MASK)) { 1012a97c67eeSMax Reitz qcow2_signal_corruption(bs, true, -1, -1, "Data cluster offset " 1013a97c67eeSMax Reitz "%#llx unaligned (guest offset: %#" PRIx64 1014a97c67eeSMax Reitz ")", cluster_offset & L2E_OFFSET_MASK, 1015a97c67eeSMax Reitz guest_offset); 1016a97c67eeSMax Reitz ret = -EIO; 1017a97c67eeSMax Reitz goto out; 1018a97c67eeSMax Reitz } 1019a97c67eeSMax Reitz 1020e62daaf6SKevin Wolf if (*host_offset != 0 && !offset_matches) { 1021e62daaf6SKevin Wolf *bytes = 0; 1022e62daaf6SKevin Wolf ret = 0; 1023e62daaf6SKevin Wolf goto out; 1024e62daaf6SKevin Wolf } 1025e62daaf6SKevin Wolf 10260af729ecSKevin Wolf /* We keep all QCOW_OFLAG_COPIED clusters */ 1027c53ede9fSKevin Wolf keep_clusters = 1028acb0467fSKevin Wolf count_contiguous_clusters(nb_clusters, s->cluster_size, 102961653008SKevin Wolf &l2_table[l2_index], 10300af729ecSKevin Wolf QCOW_OFLAG_COPIED | QCOW_OFLAG_ZERO); 1031c53ede9fSKevin Wolf assert(keep_clusters <= nb_clusters); 1032c53ede9fSKevin Wolf 1033c53ede9fSKevin Wolf *bytes = MIN(*bytes, 1034c53ede9fSKevin Wolf keep_clusters * s->cluster_size 1035c53ede9fSKevin Wolf - offset_into_cluster(s, guest_offset)); 10360af729ecSKevin Wolf 10370af729ecSKevin Wolf ret = 1; 10380af729ecSKevin Wolf } else { 10390af729ecSKevin Wolf ret = 0; 10400af729ecSKevin Wolf } 10410af729ecSKevin Wolf 10420af729ecSKevin Wolf /* Cleanup */ 1043e62daaf6SKevin Wolf out: 1044a3f1afb4SAlberto Garcia qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table); 10450af729ecSKevin Wolf 1046e62daaf6SKevin Wolf /* Only return a host offset if we actually made progress. Otherwise we 1047e62daaf6SKevin Wolf * would make requirements for handle_alloc() that it can't fulfill */ 1048a97c67eeSMax Reitz if (ret > 0) { 1049411d62b0SKevin Wolf *host_offset = (cluster_offset & L2E_OFFSET_MASK) 1050411d62b0SKevin Wolf + offset_into_cluster(s, guest_offset); 1051e62daaf6SKevin Wolf } 1052e62daaf6SKevin Wolf 10530af729ecSKevin Wolf return ret; 10540af729ecSKevin Wolf } 10550af729ecSKevin Wolf 10560af729ecSKevin Wolf /* 1057226c3c26SKevin Wolf * Allocates new clusters for the given guest_offset. 1058226c3c26SKevin Wolf * 1059226c3c26SKevin Wolf * At most *nb_clusters are allocated, and on return *nb_clusters is updated to 1060226c3c26SKevin Wolf * contain the number of clusters that have been allocated and are contiguous 1061226c3c26SKevin Wolf * in the image file. 1062226c3c26SKevin Wolf * 1063226c3c26SKevin Wolf * If *host_offset is non-zero, it specifies the offset in the image file at 1064226c3c26SKevin Wolf * which the new clusters must start. *nb_clusters can be 0 on return in this 1065226c3c26SKevin Wolf * case if the cluster at host_offset is already in use. If *host_offset is 1066226c3c26SKevin Wolf * zero, the clusters can be allocated anywhere in the image file. 1067226c3c26SKevin Wolf * 1068226c3c26SKevin Wolf * *host_offset is updated to contain the offset into the image file at which 1069226c3c26SKevin Wolf * the first allocated cluster starts. 1070226c3c26SKevin Wolf * 1071226c3c26SKevin Wolf * Return 0 on success and -errno in error cases. -EAGAIN means that the 1072226c3c26SKevin Wolf * function has been waiting for another request and the allocation must be 1073226c3c26SKevin Wolf * restarted, but the whole request should not be failed. 1074226c3c26SKevin Wolf */ 1075226c3c26SKevin Wolf static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset, 1076b6d36defSMax Reitz uint64_t *host_offset, uint64_t *nb_clusters) 1077226c3c26SKevin Wolf { 1078ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 1079226c3c26SKevin Wolf 1080226c3c26SKevin Wolf trace_qcow2_do_alloc_clusters_offset(qemu_coroutine_self(), guest_offset, 1081226c3c26SKevin Wolf *host_offset, *nb_clusters); 1082226c3c26SKevin Wolf 1083250196f1SKevin Wolf /* Allocate new clusters */ 1084250196f1SKevin Wolf trace_qcow2_cluster_alloc_phys(qemu_coroutine_self()); 1085250196f1SKevin Wolf if (*host_offset == 0) { 1086df021791SKevin Wolf int64_t cluster_offset = 1087df021791SKevin Wolf qcow2_alloc_clusters(bs, *nb_clusters * s->cluster_size); 1088250196f1SKevin Wolf if (cluster_offset < 0) { 1089250196f1SKevin Wolf return cluster_offset; 1090250196f1SKevin Wolf } 1091250196f1SKevin Wolf *host_offset = cluster_offset; 1092250196f1SKevin Wolf return 0; 1093df021791SKevin Wolf } else { 1094b6d36defSMax Reitz int64_t ret = qcow2_alloc_clusters_at(bs, *host_offset, *nb_clusters); 1095df021791SKevin Wolf if (ret < 0) { 1096df021791SKevin Wolf return ret; 1097df021791SKevin Wolf } 1098df021791SKevin Wolf *nb_clusters = ret; 1099df021791SKevin Wolf return 0; 1100df021791SKevin Wolf } 1101250196f1SKevin Wolf } 1102250196f1SKevin Wolf 1103250196f1SKevin Wolf /* 110410f0ed8bSKevin Wolf * Allocates new clusters for an area that either is yet unallocated or needs a 110510f0ed8bSKevin Wolf * copy on write. If *host_offset is non-zero, clusters are only allocated if 110610f0ed8bSKevin Wolf * the new allocation can match the specified host offset. 110710f0ed8bSKevin Wolf * 1108411d62b0SKevin Wolf * Note that guest_offset may not be cluster aligned. In this case, the 1109411d62b0SKevin Wolf * returned *host_offset points to exact byte referenced by guest_offset and 1110411d62b0SKevin Wolf * therefore isn't cluster aligned as well. 111110f0ed8bSKevin Wolf * 111210f0ed8bSKevin Wolf * Returns: 111310f0ed8bSKevin Wolf * 0: if no clusters could be allocated. *bytes is set to 0, 111410f0ed8bSKevin Wolf * *host_offset is left unchanged. 111510f0ed8bSKevin Wolf * 111610f0ed8bSKevin Wolf * 1: if new clusters were allocated. *bytes may be decreased if the 111710f0ed8bSKevin Wolf * new allocation doesn't cover all of the requested area. 111810f0ed8bSKevin Wolf * *host_offset is updated to contain the host offset of the first 111910f0ed8bSKevin Wolf * newly allocated cluster. 112010f0ed8bSKevin Wolf * 112110f0ed8bSKevin Wolf * -errno: in error cases 112210f0ed8bSKevin Wolf */ 112310f0ed8bSKevin Wolf static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset, 1124c37f4cd7SKevin Wolf uint64_t *host_offset, uint64_t *bytes, QCowL2Meta **m) 112510f0ed8bSKevin Wolf { 1126ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 112710f0ed8bSKevin Wolf int l2_index; 112810f0ed8bSKevin Wolf uint64_t *l2_table; 112910f0ed8bSKevin Wolf uint64_t entry; 1130b6d36defSMax Reitz uint64_t nb_clusters; 113110f0ed8bSKevin Wolf int ret; 113210f0ed8bSKevin Wolf 113310f0ed8bSKevin Wolf uint64_t alloc_cluster_offset; 113410f0ed8bSKevin Wolf 113510f0ed8bSKevin Wolf trace_qcow2_handle_alloc(qemu_coroutine_self(), guest_offset, *host_offset, 113610f0ed8bSKevin Wolf *bytes); 113710f0ed8bSKevin Wolf assert(*bytes > 0); 113810f0ed8bSKevin Wolf 1139f5bc6350SKevin Wolf /* 1140f5bc6350SKevin Wolf * Calculate the number of clusters to look for. We stop at L2 table 1141f5bc6350SKevin Wolf * boundaries to keep things simple. 1142f5bc6350SKevin Wolf */ 1143c37f4cd7SKevin Wolf nb_clusters = 1144c37f4cd7SKevin Wolf size_to_clusters(s, offset_into_cluster(s, guest_offset) + *bytes); 1145c37f4cd7SKevin Wolf 1146f5bc6350SKevin Wolf l2_index = offset_to_l2_index(s, guest_offset); 1147c37f4cd7SKevin Wolf nb_clusters = MIN(nb_clusters, s->l2_size - l2_index); 1148b6d36defSMax Reitz assert(nb_clusters <= INT_MAX); 1149f5bc6350SKevin Wolf 115010f0ed8bSKevin Wolf /* Find L2 entry for the first involved cluster */ 115110f0ed8bSKevin Wolf ret = get_cluster_table(bs, guest_offset, &l2_table, &l2_index); 115210f0ed8bSKevin Wolf if (ret < 0) { 115310f0ed8bSKevin Wolf return ret; 115410f0ed8bSKevin Wolf } 115510f0ed8bSKevin Wolf 11563b8e2e26SKevin Wolf entry = be64_to_cpu(l2_table[l2_index]); 115710f0ed8bSKevin Wolf 115810f0ed8bSKevin Wolf /* For the moment, overwrite compressed clusters one by one */ 115910f0ed8bSKevin Wolf if (entry & QCOW_OFLAG_COMPRESSED) { 116010f0ed8bSKevin Wolf nb_clusters = 1; 116110f0ed8bSKevin Wolf } else { 11623b8e2e26SKevin Wolf nb_clusters = count_cow_clusters(s, nb_clusters, l2_table, l2_index); 116310f0ed8bSKevin Wolf } 116410f0ed8bSKevin Wolf 1165ecdd5333SKevin Wolf /* This function is only called when there were no non-COW clusters, so if 1166ecdd5333SKevin Wolf * we can't find any unallocated or COW clusters either, something is 1167ecdd5333SKevin Wolf * wrong with our code. */ 1168ecdd5333SKevin Wolf assert(nb_clusters > 0); 1169ecdd5333SKevin Wolf 1170a3f1afb4SAlberto Garcia qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table); 117110f0ed8bSKevin Wolf 117210f0ed8bSKevin Wolf /* Allocate, if necessary at a given offset in the image file */ 1173411d62b0SKevin Wolf alloc_cluster_offset = start_of_cluster(s, *host_offset); 117483baa9a4SKevin Wolf ret = do_alloc_cluster_offset(bs, guest_offset, &alloc_cluster_offset, 117510f0ed8bSKevin Wolf &nb_clusters); 117610f0ed8bSKevin Wolf if (ret < 0) { 117710f0ed8bSKevin Wolf goto fail; 117810f0ed8bSKevin Wolf } 117910f0ed8bSKevin Wolf 118083baa9a4SKevin Wolf /* Can't extend contiguous allocation */ 118183baa9a4SKevin Wolf if (nb_clusters == 0) { 118283baa9a4SKevin Wolf *bytes = 0; 118383baa9a4SKevin Wolf return 0; 118483baa9a4SKevin Wolf } 118583baa9a4SKevin Wolf 1186ff52aab2SMax Reitz /* !*host_offset would overwrite the image header and is reserved for "no 1187ff52aab2SMax Reitz * host offset preferred". If 0 was a valid host offset, it'd trigger the 1188ff52aab2SMax Reitz * following overlap check; do that now to avoid having an invalid value in 1189ff52aab2SMax Reitz * *host_offset. */ 1190ff52aab2SMax Reitz if (!alloc_cluster_offset) { 1191ff52aab2SMax Reitz ret = qcow2_pre_write_overlap_check(bs, 0, alloc_cluster_offset, 1192ff52aab2SMax Reitz nb_clusters * s->cluster_size); 1193ff52aab2SMax Reitz assert(ret < 0); 1194ff52aab2SMax Reitz goto fail; 1195ff52aab2SMax Reitz } 1196ff52aab2SMax Reitz 119710f0ed8bSKevin Wolf /* 119883baa9a4SKevin Wolf * Save info needed for meta data update. 119983baa9a4SKevin Wolf * 120010f0ed8bSKevin Wolf * requested_sectors: Number of sectors from the start of the first 120110f0ed8bSKevin Wolf * newly allocated cluster to the end of the (possibly shortened 120210f0ed8bSKevin Wolf * before) write request. 120310f0ed8bSKevin Wolf * 120410f0ed8bSKevin Wolf * avail_sectors: Number of sectors from the start of the first 120510f0ed8bSKevin Wolf * newly allocated to the end of the last newly allocated cluster. 120610f0ed8bSKevin Wolf * 120710f0ed8bSKevin Wolf * nb_sectors: The number of sectors from the start of the first 120883baa9a4SKevin Wolf * newly allocated cluster to the end of the area that the write 120910f0ed8bSKevin Wolf * request actually writes to (excluding COW at the end) 121010f0ed8bSKevin Wolf */ 1211c37f4cd7SKevin Wolf int requested_sectors = 1212c37f4cd7SKevin Wolf (*bytes + offset_into_cluster(s, guest_offset)) 1213c37f4cd7SKevin Wolf >> BDRV_SECTOR_BITS; 121410f0ed8bSKevin Wolf int avail_sectors = nb_clusters 121510f0ed8bSKevin Wolf << (s->cluster_bits - BDRV_SECTOR_BITS); 1216c37f4cd7SKevin Wolf int alloc_n_start = offset_into_cluster(s, guest_offset) 1217c37f4cd7SKevin Wolf >> BDRV_SECTOR_BITS; 121810f0ed8bSKevin Wolf int nb_sectors = MIN(requested_sectors, avail_sectors); 121988c6588cSKevin Wolf QCowL2Meta *old_m = *m; 122010f0ed8bSKevin Wolf 122110f0ed8bSKevin Wolf *m = g_malloc0(sizeof(**m)); 122210f0ed8bSKevin Wolf 122310f0ed8bSKevin Wolf **m = (QCowL2Meta) { 122488c6588cSKevin Wolf .next = old_m, 122588c6588cSKevin Wolf 1226411d62b0SKevin Wolf .alloc_offset = alloc_cluster_offset, 122783baa9a4SKevin Wolf .offset = start_of_cluster(s, guest_offset), 122810f0ed8bSKevin Wolf .nb_clusters = nb_clusters, 122910f0ed8bSKevin Wolf .nb_available = nb_sectors, 123010f0ed8bSKevin Wolf 123110f0ed8bSKevin Wolf .cow_start = { 123210f0ed8bSKevin Wolf .offset = 0, 123310f0ed8bSKevin Wolf .nb_sectors = alloc_n_start, 123410f0ed8bSKevin Wolf }, 123510f0ed8bSKevin Wolf .cow_end = { 123610f0ed8bSKevin Wolf .offset = nb_sectors * BDRV_SECTOR_SIZE, 123710f0ed8bSKevin Wolf .nb_sectors = avail_sectors - nb_sectors, 123810f0ed8bSKevin Wolf }, 123910f0ed8bSKevin Wolf }; 124010f0ed8bSKevin Wolf qemu_co_queue_init(&(*m)->dependent_requests); 124110f0ed8bSKevin Wolf QLIST_INSERT_HEAD(&s->cluster_allocs, *m, next_in_flight); 124210f0ed8bSKevin Wolf 1243411d62b0SKevin Wolf *host_offset = alloc_cluster_offset + offset_into_cluster(s, guest_offset); 1244c37f4cd7SKevin Wolf *bytes = MIN(*bytes, (nb_sectors * BDRV_SECTOR_SIZE) 1245c37f4cd7SKevin Wolf - offset_into_cluster(s, guest_offset)); 1246c37f4cd7SKevin Wolf assert(*bytes != 0); 124710f0ed8bSKevin Wolf 124810f0ed8bSKevin Wolf return 1; 124910f0ed8bSKevin Wolf 125010f0ed8bSKevin Wolf fail: 125110f0ed8bSKevin Wolf if (*m && (*m)->nb_clusters > 0) { 125210f0ed8bSKevin Wolf QLIST_REMOVE(*m, next_in_flight); 125310f0ed8bSKevin Wolf } 125410f0ed8bSKevin Wolf return ret; 125510f0ed8bSKevin Wolf } 125610f0ed8bSKevin Wolf 125710f0ed8bSKevin Wolf /* 125845aba42fSKevin Wolf * alloc_cluster_offset 125945aba42fSKevin Wolf * 1260250196f1SKevin Wolf * For a given offset on the virtual disk, find the cluster offset in qcow2 1261250196f1SKevin Wolf * file. If the offset is not found, allocate a new cluster. 126245aba42fSKevin Wolf * 1263250196f1SKevin Wolf * If the cluster was already allocated, m->nb_clusters is set to 0 and 1264a7912369SFrediano Ziglio * other fields in m are meaningless. 126545aba42fSKevin Wolf * 1266148da7eaSKevin Wolf * If the cluster is newly allocated, m->nb_clusters is set to the number of 126768d100e9SKevin Wolf * contiguous clusters that have been allocated. In this case, the other 126868d100e9SKevin Wolf * fields of m are valid and contain information about the first allocated 126968d100e9SKevin Wolf * cluster. 1270148da7eaSKevin Wolf * 127168d100e9SKevin Wolf * If the request conflicts with another write request in flight, the coroutine 127268d100e9SKevin Wolf * is queued and will be reentered when the dependency has completed. 1273148da7eaSKevin Wolf * 1274148da7eaSKevin Wolf * Return 0 on success and -errno in error cases 127545aba42fSKevin Wolf */ 1276f4f0d391SKevin Wolf int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset, 127716f0587eSHu Tao int *num, uint64_t *host_offset, QCowL2Meta **m) 127845aba42fSKevin Wolf { 1279ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 1280710c2496SKevin Wolf uint64_t start, remaining; 1281250196f1SKevin Wolf uint64_t cluster_offset; 128265eb2e35SKevin Wolf uint64_t cur_bytes; 1283710c2496SKevin Wolf int ret; 128445aba42fSKevin Wolf 128516f0587eSHu Tao trace_qcow2_alloc_clusters_offset(qemu_coroutine_self(), offset, *num); 12863cce16f4SKevin Wolf 128716f0587eSHu Tao assert((offset & ~BDRV_SECTOR_MASK) == 0); 1288710c2496SKevin Wolf 128972424114SKevin Wolf again: 129016f0587eSHu Tao start = offset; 129111c89769SMax Reitz remaining = (uint64_t)*num << BDRV_SECTOR_BITS; 12920af729ecSKevin Wolf cluster_offset = 0; 12930af729ecSKevin Wolf *host_offset = 0; 1294ecdd5333SKevin Wolf cur_bytes = 0; 1295ecdd5333SKevin Wolf *m = NULL; 12960af729ecSKevin Wolf 12972c3b32d2SKevin Wolf while (true) { 1298ecdd5333SKevin Wolf 1299ecdd5333SKevin Wolf if (!*host_offset) { 1300ecdd5333SKevin Wolf *host_offset = start_of_cluster(s, cluster_offset); 1301ecdd5333SKevin Wolf } 1302ecdd5333SKevin Wolf 1303ecdd5333SKevin Wolf assert(remaining >= cur_bytes); 1304ecdd5333SKevin Wolf 1305ecdd5333SKevin Wolf start += cur_bytes; 1306ecdd5333SKevin Wolf remaining -= cur_bytes; 1307ecdd5333SKevin Wolf cluster_offset += cur_bytes; 1308ecdd5333SKevin Wolf 1309ecdd5333SKevin Wolf if (remaining == 0) { 1310ecdd5333SKevin Wolf break; 1311ecdd5333SKevin Wolf } 1312ecdd5333SKevin Wolf 1313ecdd5333SKevin Wolf cur_bytes = remaining; 1314ecdd5333SKevin Wolf 1315250196f1SKevin Wolf /* 131617a71e58SKevin Wolf * Now start gathering as many contiguous clusters as possible: 131717a71e58SKevin Wolf * 131817a71e58SKevin Wolf * 1. Check for overlaps with in-flight allocations 131917a71e58SKevin Wolf * 13202c3b32d2SKevin Wolf * a) Overlap not in the first cluster -> shorten this request and 13212c3b32d2SKevin Wolf * let the caller handle the rest in its next loop iteration. 132217a71e58SKevin Wolf * 13232c3b32d2SKevin Wolf * b) Real overlaps of two requests. Yield and restart the search 13242c3b32d2SKevin Wolf * for contiguous clusters (the situation could have changed 13252c3b32d2SKevin Wolf * while we were sleeping) 132617a71e58SKevin Wolf * 132717a71e58SKevin Wolf * c) TODO: Request starts in the same cluster as the in-flight 13282c3b32d2SKevin Wolf * allocation ends. Shorten the COW of the in-fight allocation, 13292c3b32d2SKevin Wolf * set cluster_offset to write to the same cluster and set up 13302c3b32d2SKevin Wolf * the right synchronisation between the in-flight request and 13312c3b32d2SKevin Wolf * the new one. 133217a71e58SKevin Wolf */ 1333ecdd5333SKevin Wolf ret = handle_dependencies(bs, start, &cur_bytes, m); 133417a71e58SKevin Wolf if (ret == -EAGAIN) { 1335ecdd5333SKevin Wolf /* Currently handle_dependencies() doesn't yield if we already had 1336ecdd5333SKevin Wolf * an allocation. If it did, we would have to clean up the L2Meta 1337ecdd5333SKevin Wolf * structs before starting over. */ 1338ecdd5333SKevin Wolf assert(*m == NULL); 133917a71e58SKevin Wolf goto again; 134017a71e58SKevin Wolf } else if (ret < 0) { 134117a71e58SKevin Wolf return ret; 1342ecdd5333SKevin Wolf } else if (cur_bytes == 0) { 1343ecdd5333SKevin Wolf break; 134417a71e58SKevin Wolf } else { 134517a71e58SKevin Wolf /* handle_dependencies() may have decreased cur_bytes (shortened 134617a71e58SKevin Wolf * the allocations below) so that the next dependency is processed 134717a71e58SKevin Wolf * correctly during the next loop iteration. */ 134817a71e58SKevin Wolf } 134917a71e58SKevin Wolf 135072424114SKevin Wolf /* 13510af729ecSKevin Wolf * 2. Count contiguous COPIED clusters. 135272424114SKevin Wolf */ 1353710c2496SKevin Wolf ret = handle_copied(bs, start, &cluster_offset, &cur_bytes, m); 135472424114SKevin Wolf if (ret < 0) { 135572424114SKevin Wolf return ret; 13560af729ecSKevin Wolf } else if (ret) { 1357ecdd5333SKevin Wolf continue; 1358e62daaf6SKevin Wolf } else if (cur_bytes == 0) { 13592c3b32d2SKevin Wolf break; 136072424114SKevin Wolf } 136172424114SKevin Wolf 13620af729ecSKevin Wolf /* 13630af729ecSKevin Wolf * 3. If the request still hasn't completed, allocate new clusters, 13640af729ecSKevin Wolf * considering any cluster_offset of steps 1c or 2. 13650af729ecSKevin Wolf */ 1366710c2496SKevin Wolf ret = handle_alloc(bs, start, &cluster_offset, &cur_bytes, m); 1367037689d8SKevin Wolf if (ret < 0) { 1368037689d8SKevin Wolf return ret; 1369710c2496SKevin Wolf } else if (ret) { 1370ecdd5333SKevin Wolf continue; 13712c3b32d2SKevin Wolf } else { 13722c3b32d2SKevin Wolf assert(cur_bytes == 0); 13732c3b32d2SKevin Wolf break; 13742c3b32d2SKevin Wolf } 1375710c2496SKevin Wolf } 1376250196f1SKevin Wolf 137716f0587eSHu Tao *num -= remaining >> BDRV_SECTOR_BITS; 1378710c2496SKevin Wolf assert(*num > 0); 1379710c2496SKevin Wolf assert(*host_offset != 0); 138045aba42fSKevin Wolf 1381148da7eaSKevin Wolf return 0; 138245aba42fSKevin Wolf } 138345aba42fSKevin Wolf 138445aba42fSKevin Wolf static int decompress_buffer(uint8_t *out_buf, int out_buf_size, 138545aba42fSKevin Wolf const uint8_t *buf, int buf_size) 138645aba42fSKevin Wolf { 138745aba42fSKevin Wolf z_stream strm1, *strm = &strm1; 138845aba42fSKevin Wolf int ret, out_len; 138945aba42fSKevin Wolf 139045aba42fSKevin Wolf memset(strm, 0, sizeof(*strm)); 139145aba42fSKevin Wolf 139245aba42fSKevin Wolf strm->next_in = (uint8_t *)buf; 139345aba42fSKevin Wolf strm->avail_in = buf_size; 139445aba42fSKevin Wolf strm->next_out = out_buf; 139545aba42fSKevin Wolf strm->avail_out = out_buf_size; 139645aba42fSKevin Wolf 139745aba42fSKevin Wolf ret = inflateInit2(strm, -12); 139845aba42fSKevin Wolf if (ret != Z_OK) 139945aba42fSKevin Wolf return -1; 140045aba42fSKevin Wolf ret = inflate(strm, Z_FINISH); 140145aba42fSKevin Wolf out_len = strm->next_out - out_buf; 140245aba42fSKevin Wolf if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) || 140345aba42fSKevin Wolf out_len != out_buf_size) { 140445aba42fSKevin Wolf inflateEnd(strm); 140545aba42fSKevin Wolf return -1; 140645aba42fSKevin Wolf } 140745aba42fSKevin Wolf inflateEnd(strm); 140845aba42fSKevin Wolf return 0; 140945aba42fSKevin Wolf } 141045aba42fSKevin Wolf 141166f82ceeSKevin Wolf int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset) 141245aba42fSKevin Wolf { 1413ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 141445aba42fSKevin Wolf int ret, csize, nb_csectors, sector_offset; 141545aba42fSKevin Wolf uint64_t coffset; 141645aba42fSKevin Wolf 141745aba42fSKevin Wolf coffset = cluster_offset & s->cluster_offset_mask; 141845aba42fSKevin Wolf if (s->cluster_cache_offset != coffset) { 141945aba42fSKevin Wolf nb_csectors = ((cluster_offset >> s->csize_shift) & s->csize_mask) + 1; 142045aba42fSKevin Wolf sector_offset = coffset & 511; 142145aba42fSKevin Wolf csize = nb_csectors * 512 - sector_offset; 142266f82ceeSKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_READ_COMPRESSED); 14239a4f4c31SKevin Wolf ret = bdrv_read(bs->file->bs, coffset >> 9, s->cluster_data, 14249a4f4c31SKevin Wolf nb_csectors); 142545aba42fSKevin Wolf if (ret < 0) { 14268af36488SKevin Wolf return ret; 142745aba42fSKevin Wolf } 142845aba42fSKevin Wolf if (decompress_buffer(s->cluster_cache, s->cluster_size, 142945aba42fSKevin Wolf s->cluster_data + sector_offset, csize) < 0) { 14308af36488SKevin Wolf return -EIO; 143145aba42fSKevin Wolf } 143245aba42fSKevin Wolf s->cluster_cache_offset = coffset; 143345aba42fSKevin Wolf } 143445aba42fSKevin Wolf return 0; 143545aba42fSKevin Wolf } 14365ea929e3SKevin Wolf 14375ea929e3SKevin Wolf /* 14385ea929e3SKevin Wolf * This discards as many clusters of nb_clusters as possible at once (i.e. 14395ea929e3SKevin Wolf * all clusters in the same L2 table) and returns the number of discarded 14405ea929e3SKevin Wolf * clusters. 14415ea929e3SKevin Wolf */ 14425ea929e3SKevin Wolf static int discard_single_l2(BlockDriverState *bs, uint64_t offset, 1443b6d36defSMax Reitz uint64_t nb_clusters, enum qcow2_discard_type type, 1444b6d36defSMax Reitz bool full_discard) 14455ea929e3SKevin Wolf { 1446ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 14473948d1d4SKevin Wolf uint64_t *l2_table; 14485ea929e3SKevin Wolf int l2_index; 14495ea929e3SKevin Wolf int ret; 14505ea929e3SKevin Wolf int i; 14515ea929e3SKevin Wolf 14523948d1d4SKevin Wolf ret = get_cluster_table(bs, offset, &l2_table, &l2_index); 14535ea929e3SKevin Wolf if (ret < 0) { 14545ea929e3SKevin Wolf return ret; 14555ea929e3SKevin Wolf } 14565ea929e3SKevin Wolf 14575ea929e3SKevin Wolf /* Limit nb_clusters to one L2 table */ 14585ea929e3SKevin Wolf nb_clusters = MIN(nb_clusters, s->l2_size - l2_index); 1459b6d36defSMax Reitz assert(nb_clusters <= INT_MAX); 14605ea929e3SKevin Wolf 14615ea929e3SKevin Wolf for (i = 0; i < nb_clusters; i++) { 1462c883db0dSMax Reitz uint64_t old_l2_entry; 14635ea929e3SKevin Wolf 1464c883db0dSMax Reitz old_l2_entry = be64_to_cpu(l2_table[l2_index + i]); 1465a71835a0SKevin Wolf 1466a71835a0SKevin Wolf /* 1467808c4b6fSMax Reitz * If full_discard is false, make sure that a discarded area reads back 1468808c4b6fSMax Reitz * as zeroes for v3 images (we cannot do it for v2 without actually 1469808c4b6fSMax Reitz * writing a zero-filled buffer). We can skip the operation if the 1470808c4b6fSMax Reitz * cluster is already marked as zero, or if it's unallocated and we 1471808c4b6fSMax Reitz * don't have a backing file. 1472a71835a0SKevin Wolf * 1473a71835a0SKevin Wolf * TODO We might want to use bdrv_get_block_status(bs) here, but we're 1474a71835a0SKevin Wolf * holding s->lock, so that doesn't work today. 1475808c4b6fSMax Reitz * 1476808c4b6fSMax Reitz * If full_discard is true, the sector should not read back as zeroes, 1477808c4b6fSMax Reitz * but rather fall through to the backing file. 1478a71835a0SKevin Wolf */ 1479c883db0dSMax Reitz switch (qcow2_get_cluster_type(old_l2_entry)) { 1480c883db0dSMax Reitz case QCOW2_CLUSTER_UNALLOCATED: 1481760e0063SKevin Wolf if (full_discard || !bs->backing) { 1482a71835a0SKevin Wolf continue; 1483a71835a0SKevin Wolf } 1484c883db0dSMax Reitz break; 1485a71835a0SKevin Wolf 1486c883db0dSMax Reitz case QCOW2_CLUSTER_ZERO: 1487808c4b6fSMax Reitz if (!full_discard) { 14885ea929e3SKevin Wolf continue; 1489808c4b6fSMax Reitz } 1490808c4b6fSMax Reitz break; 1491c883db0dSMax Reitz 1492c883db0dSMax Reitz case QCOW2_CLUSTER_NORMAL: 1493c883db0dSMax Reitz case QCOW2_CLUSTER_COMPRESSED: 1494c883db0dSMax Reitz break; 1495c883db0dSMax Reitz 1496c883db0dSMax Reitz default: 1497c883db0dSMax Reitz abort(); 14985ea929e3SKevin Wolf } 14995ea929e3SKevin Wolf 15005ea929e3SKevin Wolf /* First remove L2 entries */ 150172e80b89SAlberto Garcia qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table); 1502808c4b6fSMax Reitz if (!full_discard && s->qcow_version >= 3) { 1503a71835a0SKevin Wolf l2_table[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO); 1504a71835a0SKevin Wolf } else { 15055ea929e3SKevin Wolf l2_table[l2_index + i] = cpu_to_be64(0); 1506a71835a0SKevin Wolf } 15075ea929e3SKevin Wolf 15085ea929e3SKevin Wolf /* Then decrease the refcount */ 1509c883db0dSMax Reitz qcow2_free_any_clusters(bs, old_l2_entry, 1, type); 15105ea929e3SKevin Wolf } 15115ea929e3SKevin Wolf 1512a3f1afb4SAlberto Garcia qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table); 15135ea929e3SKevin Wolf 15145ea929e3SKevin Wolf return nb_clusters; 15155ea929e3SKevin Wolf } 15165ea929e3SKevin Wolf 15175ea929e3SKevin Wolf int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset, 1518808c4b6fSMax Reitz int nb_sectors, enum qcow2_discard_type type, bool full_discard) 15195ea929e3SKevin Wolf { 1520ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 15215ea929e3SKevin Wolf uint64_t end_offset; 1522b6d36defSMax Reitz uint64_t nb_clusters; 15235ea929e3SKevin Wolf int ret; 15245ea929e3SKevin Wolf 15255ea929e3SKevin Wolf end_offset = offset + (nb_sectors << BDRV_SECTOR_BITS); 15265ea929e3SKevin Wolf 15275ea929e3SKevin Wolf /* Round start up and end down */ 15285ea929e3SKevin Wolf offset = align_offset(offset, s->cluster_size); 1529ac95acdbSHu Tao end_offset = start_of_cluster(s, end_offset); 15305ea929e3SKevin Wolf 15315ea929e3SKevin Wolf if (offset > end_offset) { 15325ea929e3SKevin Wolf return 0; 15335ea929e3SKevin Wolf } 15345ea929e3SKevin Wolf 15355ea929e3SKevin Wolf nb_clusters = size_to_clusters(s, end_offset - offset); 15365ea929e3SKevin Wolf 15370b919faeSKevin Wolf s->cache_discards = true; 15380b919faeSKevin Wolf 15395ea929e3SKevin Wolf /* Each L2 table is handled by its own loop iteration */ 15405ea929e3SKevin Wolf while (nb_clusters > 0) { 1541808c4b6fSMax Reitz ret = discard_single_l2(bs, offset, nb_clusters, type, full_discard); 15425ea929e3SKevin Wolf if (ret < 0) { 15430b919faeSKevin Wolf goto fail; 15445ea929e3SKevin Wolf } 15455ea929e3SKevin Wolf 15465ea929e3SKevin Wolf nb_clusters -= ret; 15475ea929e3SKevin Wolf offset += (ret * s->cluster_size); 15485ea929e3SKevin Wolf } 15495ea929e3SKevin Wolf 15500b919faeSKevin Wolf ret = 0; 15510b919faeSKevin Wolf fail: 15520b919faeSKevin Wolf s->cache_discards = false; 15530b919faeSKevin Wolf qcow2_process_discards(bs, ret); 15540b919faeSKevin Wolf 15550b919faeSKevin Wolf return ret; 15565ea929e3SKevin Wolf } 1557621f0589SKevin Wolf 1558621f0589SKevin Wolf /* 1559621f0589SKevin Wolf * This zeroes as many clusters of nb_clusters as possible at once (i.e. 1560621f0589SKevin Wolf * all clusters in the same L2 table) and returns the number of zeroed 1561621f0589SKevin Wolf * clusters. 1562621f0589SKevin Wolf */ 1563621f0589SKevin Wolf static int zero_single_l2(BlockDriverState *bs, uint64_t offset, 1564b6d36defSMax Reitz uint64_t nb_clusters) 1565621f0589SKevin Wolf { 1566ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 1567621f0589SKevin Wolf uint64_t *l2_table; 1568621f0589SKevin Wolf int l2_index; 1569621f0589SKevin Wolf int ret; 1570621f0589SKevin Wolf int i; 1571621f0589SKevin Wolf 1572621f0589SKevin Wolf ret = get_cluster_table(bs, offset, &l2_table, &l2_index); 1573621f0589SKevin Wolf if (ret < 0) { 1574621f0589SKevin Wolf return ret; 1575621f0589SKevin Wolf } 1576621f0589SKevin Wolf 1577621f0589SKevin Wolf /* Limit nb_clusters to one L2 table */ 1578621f0589SKevin Wolf nb_clusters = MIN(nb_clusters, s->l2_size - l2_index); 1579b6d36defSMax Reitz assert(nb_clusters <= INT_MAX); 1580621f0589SKevin Wolf 1581621f0589SKevin Wolf for (i = 0; i < nb_clusters; i++) { 1582621f0589SKevin Wolf uint64_t old_offset; 1583621f0589SKevin Wolf 1584621f0589SKevin Wolf old_offset = be64_to_cpu(l2_table[l2_index + i]); 1585621f0589SKevin Wolf 1586621f0589SKevin Wolf /* Update L2 entries */ 158772e80b89SAlberto Garcia qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table); 1588621f0589SKevin Wolf if (old_offset & QCOW_OFLAG_COMPRESSED) { 1589621f0589SKevin Wolf l2_table[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO); 15906cfcb9b8SKevin Wolf qcow2_free_any_clusters(bs, old_offset, 1, QCOW2_DISCARD_REQUEST); 1591621f0589SKevin Wolf } else { 1592621f0589SKevin Wolf l2_table[l2_index + i] |= cpu_to_be64(QCOW_OFLAG_ZERO); 1593621f0589SKevin Wolf } 1594621f0589SKevin Wolf } 1595621f0589SKevin Wolf 1596a3f1afb4SAlberto Garcia qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table); 1597621f0589SKevin Wolf 1598621f0589SKevin Wolf return nb_clusters; 1599621f0589SKevin Wolf } 1600621f0589SKevin Wolf 1601621f0589SKevin Wolf int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors) 1602621f0589SKevin Wolf { 1603ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 1604b6d36defSMax Reitz uint64_t nb_clusters; 1605621f0589SKevin Wolf int ret; 1606621f0589SKevin Wolf 1607621f0589SKevin Wolf /* The zero flag is only supported by version 3 and newer */ 1608621f0589SKevin Wolf if (s->qcow_version < 3) { 1609621f0589SKevin Wolf return -ENOTSUP; 1610621f0589SKevin Wolf } 1611621f0589SKevin Wolf 1612621f0589SKevin Wolf /* Each L2 table is handled by its own loop iteration */ 1613621f0589SKevin Wolf nb_clusters = size_to_clusters(s, nb_sectors << BDRV_SECTOR_BITS); 1614621f0589SKevin Wolf 16150b919faeSKevin Wolf s->cache_discards = true; 16160b919faeSKevin Wolf 1617621f0589SKevin Wolf while (nb_clusters > 0) { 1618621f0589SKevin Wolf ret = zero_single_l2(bs, offset, nb_clusters); 1619621f0589SKevin Wolf if (ret < 0) { 16200b919faeSKevin Wolf goto fail; 1621621f0589SKevin Wolf } 1622621f0589SKevin Wolf 1623621f0589SKevin Wolf nb_clusters -= ret; 1624621f0589SKevin Wolf offset += (ret * s->cluster_size); 1625621f0589SKevin Wolf } 1626621f0589SKevin Wolf 16270b919faeSKevin Wolf ret = 0; 16280b919faeSKevin Wolf fail: 16290b919faeSKevin Wolf s->cache_discards = false; 16300b919faeSKevin Wolf qcow2_process_discards(bs, ret); 16310b919faeSKevin Wolf 16320b919faeSKevin Wolf return ret; 1633621f0589SKevin Wolf } 163432b6444dSMax Reitz 163532b6444dSMax Reitz /* 163632b6444dSMax Reitz * Expands all zero clusters in a specific L1 table (or deallocates them, for 163732b6444dSMax Reitz * non-backed non-pre-allocated zero clusters). 163832b6444dSMax Reitz * 16394057a2b2SMax Reitz * l1_entries and *visited_l1_entries are used to keep track of progress for 16404057a2b2SMax Reitz * status_cb(). l1_entries contains the total number of L1 entries and 16414057a2b2SMax Reitz * *visited_l1_entries counts all visited L1 entries. 164232b6444dSMax Reitz */ 164332b6444dSMax Reitz static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table, 1644ecf58777SMax Reitz int l1_size, int64_t *visited_l1_entries, 16454057a2b2SMax Reitz int64_t l1_entries, 16468b13976dSMax Reitz BlockDriverAmendStatusCB *status_cb, 16478b13976dSMax Reitz void *cb_opaque) 164832b6444dSMax Reitz { 1649ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 165032b6444dSMax Reitz bool is_active_l1 = (l1_table == s->l1_table); 165132b6444dSMax Reitz uint64_t *l2_table = NULL; 165232b6444dSMax Reitz int ret; 165332b6444dSMax Reitz int i, j; 165432b6444dSMax Reitz 165532b6444dSMax Reitz if (!is_active_l1) { 165632b6444dSMax Reitz /* inactive L2 tables require a buffer to be stored in when loading 165732b6444dSMax Reitz * them from disk */ 16589a4f4c31SKevin Wolf l2_table = qemu_try_blockalign(bs->file->bs, s->cluster_size); 1659de82815dSKevin Wolf if (l2_table == NULL) { 1660de82815dSKevin Wolf return -ENOMEM; 1661de82815dSKevin Wolf } 166232b6444dSMax Reitz } 166332b6444dSMax Reitz 166432b6444dSMax Reitz for (i = 0; i < l1_size; i++) { 166532b6444dSMax Reitz uint64_t l2_offset = l1_table[i] & L1E_OFFSET_MASK; 166632b6444dSMax Reitz bool l2_dirty = false; 16670e06528eSMax Reitz uint64_t l2_refcount; 166832b6444dSMax Reitz 166932b6444dSMax Reitz if (!l2_offset) { 167032b6444dSMax Reitz /* unallocated */ 16714057a2b2SMax Reitz (*visited_l1_entries)++; 16724057a2b2SMax Reitz if (status_cb) { 16738b13976dSMax Reitz status_cb(bs, *visited_l1_entries, l1_entries, cb_opaque); 16744057a2b2SMax Reitz } 167532b6444dSMax Reitz continue; 167632b6444dSMax Reitz } 167732b6444dSMax Reitz 16788dd93d93SMax Reitz if (offset_into_cluster(s, l2_offset)) { 16798dd93d93SMax Reitz qcow2_signal_corruption(bs, true, -1, -1, "L2 table offset %#" 16808dd93d93SMax Reitz PRIx64 " unaligned (L1 index: %#x)", 16818dd93d93SMax Reitz l2_offset, i); 16828dd93d93SMax Reitz ret = -EIO; 16838dd93d93SMax Reitz goto fail; 16848dd93d93SMax Reitz } 16858dd93d93SMax Reitz 168632b6444dSMax Reitz if (is_active_l1) { 168732b6444dSMax Reitz /* get active L2 tables from cache */ 168832b6444dSMax Reitz ret = qcow2_cache_get(bs, s->l2_table_cache, l2_offset, 168932b6444dSMax Reitz (void **)&l2_table); 169032b6444dSMax Reitz } else { 169132b6444dSMax Reitz /* load inactive L2 tables from disk */ 16929a4f4c31SKevin Wolf ret = bdrv_read(bs->file->bs, l2_offset / BDRV_SECTOR_SIZE, 169332b6444dSMax Reitz (void *)l2_table, s->cluster_sectors); 169432b6444dSMax Reitz } 169532b6444dSMax Reitz if (ret < 0) { 169632b6444dSMax Reitz goto fail; 169732b6444dSMax Reitz } 169832b6444dSMax Reitz 16997324c10fSMax Reitz ret = qcow2_get_refcount(bs, l2_offset >> s->cluster_bits, 17007324c10fSMax Reitz &l2_refcount); 17017324c10fSMax Reitz if (ret < 0) { 1702ecf58777SMax Reitz goto fail; 1703ecf58777SMax Reitz } 1704ecf58777SMax Reitz 170532b6444dSMax Reitz for (j = 0; j < s->l2_size; j++) { 170632b6444dSMax Reitz uint64_t l2_entry = be64_to_cpu(l2_table[j]); 1707ecf58777SMax Reitz int64_t offset = l2_entry & L2E_OFFSET_MASK; 170832b6444dSMax Reitz int cluster_type = qcow2_get_cluster_type(l2_entry); 1709320c7066SMax Reitz bool preallocated = offset != 0; 171032b6444dSMax Reitz 1711ecf58777SMax Reitz if (cluster_type != QCOW2_CLUSTER_ZERO) { 171232b6444dSMax Reitz continue; 171332b6444dSMax Reitz } 171432b6444dSMax Reitz 1715320c7066SMax Reitz if (!preallocated) { 1716760e0063SKevin Wolf if (!bs->backing) { 171732b6444dSMax Reitz /* not backed; therefore we can simply deallocate the 171832b6444dSMax Reitz * cluster */ 171932b6444dSMax Reitz l2_table[j] = 0; 172032b6444dSMax Reitz l2_dirty = true; 172132b6444dSMax Reitz continue; 172232b6444dSMax Reitz } 172332b6444dSMax Reitz 172432b6444dSMax Reitz offset = qcow2_alloc_clusters(bs, s->cluster_size); 172532b6444dSMax Reitz if (offset < 0) { 172632b6444dSMax Reitz ret = offset; 172732b6444dSMax Reitz goto fail; 172832b6444dSMax Reitz } 1729ecf58777SMax Reitz 1730ecf58777SMax Reitz if (l2_refcount > 1) { 1731ecf58777SMax Reitz /* For shared L2 tables, set the refcount accordingly (it is 1732ecf58777SMax Reitz * already 1 and needs to be l2_refcount) */ 1733ecf58777SMax Reitz ret = qcow2_update_cluster_refcount(bs, 17342aabe7c7SMax Reitz offset >> s->cluster_bits, 17352aabe7c7SMax Reitz refcount_diff(1, l2_refcount), false, 1736ecf58777SMax Reitz QCOW2_DISCARD_OTHER); 1737ecf58777SMax Reitz if (ret < 0) { 1738ecf58777SMax Reitz qcow2_free_clusters(bs, offset, s->cluster_size, 1739ecf58777SMax Reitz QCOW2_DISCARD_OTHER); 1740ecf58777SMax Reitz goto fail; 1741ecf58777SMax Reitz } 1742ecf58777SMax Reitz } 174332b6444dSMax Reitz } 174432b6444dSMax Reitz 17458dd93d93SMax Reitz if (offset_into_cluster(s, offset)) { 17468dd93d93SMax Reitz qcow2_signal_corruption(bs, true, -1, -1, "Data cluster offset " 17478dd93d93SMax Reitz "%#" PRIx64 " unaligned (L2 offset: %#" 17488dd93d93SMax Reitz PRIx64 ", L2 index: %#x)", offset, 17498dd93d93SMax Reitz l2_offset, j); 17508dd93d93SMax Reitz if (!preallocated) { 17518dd93d93SMax Reitz qcow2_free_clusters(bs, offset, s->cluster_size, 17528dd93d93SMax Reitz QCOW2_DISCARD_ALWAYS); 17538dd93d93SMax Reitz } 17548dd93d93SMax Reitz ret = -EIO; 17558dd93d93SMax Reitz goto fail; 17568dd93d93SMax Reitz } 17578dd93d93SMax Reitz 1758231bb267SMax Reitz ret = qcow2_pre_write_overlap_check(bs, 0, offset, s->cluster_size); 175932b6444dSMax Reitz if (ret < 0) { 1760320c7066SMax Reitz if (!preallocated) { 176132b6444dSMax Reitz qcow2_free_clusters(bs, offset, s->cluster_size, 176232b6444dSMax Reitz QCOW2_DISCARD_ALWAYS); 1763320c7066SMax Reitz } 176432b6444dSMax Reitz goto fail; 176532b6444dSMax Reitz } 176632b6444dSMax Reitz 17679a4f4c31SKevin Wolf ret = bdrv_write_zeroes(bs->file->bs, offset / BDRV_SECTOR_SIZE, 1768aa7bfbffSPeter Lieven s->cluster_sectors, 0); 176932b6444dSMax Reitz if (ret < 0) { 1770320c7066SMax Reitz if (!preallocated) { 177132b6444dSMax Reitz qcow2_free_clusters(bs, offset, s->cluster_size, 177232b6444dSMax Reitz QCOW2_DISCARD_ALWAYS); 1773320c7066SMax Reitz } 177432b6444dSMax Reitz goto fail; 177532b6444dSMax Reitz } 177632b6444dSMax Reitz 1777ecf58777SMax Reitz if (l2_refcount == 1) { 177832b6444dSMax Reitz l2_table[j] = cpu_to_be64(offset | QCOW_OFLAG_COPIED); 1779ecf58777SMax Reitz } else { 1780ecf58777SMax Reitz l2_table[j] = cpu_to_be64(offset); 1781e390cf5aSMax Reitz } 1782ecf58777SMax Reitz l2_dirty = true; 178332b6444dSMax Reitz } 178432b6444dSMax Reitz 178532b6444dSMax Reitz if (is_active_l1) { 178632b6444dSMax Reitz if (l2_dirty) { 178772e80b89SAlberto Garcia qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table); 178832b6444dSMax Reitz qcow2_cache_depends_on_flush(s->l2_table_cache); 178932b6444dSMax Reitz } 1790a3f1afb4SAlberto Garcia qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table); 179132b6444dSMax Reitz } else { 179232b6444dSMax Reitz if (l2_dirty) { 1793231bb267SMax Reitz ret = qcow2_pre_write_overlap_check(bs, 1794231bb267SMax Reitz QCOW2_OL_INACTIVE_L2 | QCOW2_OL_ACTIVE_L2, l2_offset, 179532b6444dSMax Reitz s->cluster_size); 179632b6444dSMax Reitz if (ret < 0) { 179732b6444dSMax Reitz goto fail; 179832b6444dSMax Reitz } 179932b6444dSMax Reitz 18009a4f4c31SKevin Wolf ret = bdrv_write(bs->file->bs, l2_offset / BDRV_SECTOR_SIZE, 180132b6444dSMax Reitz (void *)l2_table, s->cluster_sectors); 180232b6444dSMax Reitz if (ret < 0) { 180332b6444dSMax Reitz goto fail; 180432b6444dSMax Reitz } 180532b6444dSMax Reitz } 180632b6444dSMax Reitz } 18074057a2b2SMax Reitz 18084057a2b2SMax Reitz (*visited_l1_entries)++; 18094057a2b2SMax Reitz if (status_cb) { 18108b13976dSMax Reitz status_cb(bs, *visited_l1_entries, l1_entries, cb_opaque); 18114057a2b2SMax Reitz } 181232b6444dSMax Reitz } 181332b6444dSMax Reitz 181432b6444dSMax Reitz ret = 0; 181532b6444dSMax Reitz 181632b6444dSMax Reitz fail: 181732b6444dSMax Reitz if (l2_table) { 181832b6444dSMax Reitz if (!is_active_l1) { 181932b6444dSMax Reitz qemu_vfree(l2_table); 182032b6444dSMax Reitz } else { 182132b6444dSMax Reitz qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table); 182232b6444dSMax Reitz } 182332b6444dSMax Reitz } 182432b6444dSMax Reitz return ret; 182532b6444dSMax Reitz } 182632b6444dSMax Reitz 182732b6444dSMax Reitz /* 182832b6444dSMax Reitz * For backed images, expands all zero clusters on the image. For non-backed 182932b6444dSMax Reitz * images, deallocates all non-pre-allocated zero clusters (and claims the 183032b6444dSMax Reitz * allocation for pre-allocated ones). This is important for downgrading to a 183132b6444dSMax Reitz * qcow2 version which doesn't yet support metadata zero clusters. 183232b6444dSMax Reitz */ 18334057a2b2SMax Reitz int qcow2_expand_zero_clusters(BlockDriverState *bs, 18348b13976dSMax Reitz BlockDriverAmendStatusCB *status_cb, 18358b13976dSMax Reitz void *cb_opaque) 183632b6444dSMax Reitz { 1837ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 183832b6444dSMax Reitz uint64_t *l1_table = NULL; 18394057a2b2SMax Reitz int64_t l1_entries = 0, visited_l1_entries = 0; 184032b6444dSMax Reitz int ret; 184132b6444dSMax Reitz int i, j; 184232b6444dSMax Reitz 18434057a2b2SMax Reitz if (status_cb) { 18444057a2b2SMax Reitz l1_entries = s->l1_size; 18454057a2b2SMax Reitz for (i = 0; i < s->nb_snapshots; i++) { 18464057a2b2SMax Reitz l1_entries += s->snapshots[i].l1_size; 18474057a2b2SMax Reitz } 18484057a2b2SMax Reitz } 18494057a2b2SMax Reitz 185032b6444dSMax Reitz ret = expand_zero_clusters_in_l1(bs, s->l1_table, s->l1_size, 18514057a2b2SMax Reitz &visited_l1_entries, l1_entries, 18528b13976dSMax Reitz status_cb, cb_opaque); 185332b6444dSMax Reitz if (ret < 0) { 185432b6444dSMax Reitz goto fail; 185532b6444dSMax Reitz } 185632b6444dSMax Reitz 185732b6444dSMax Reitz /* Inactive L1 tables may point to active L2 tables - therefore it is 185832b6444dSMax Reitz * necessary to flush the L2 table cache before trying to access the L2 185932b6444dSMax Reitz * tables pointed to by inactive L1 entries (else we might try to expand 186032b6444dSMax Reitz * zero clusters that have already been expanded); furthermore, it is also 186132b6444dSMax Reitz * necessary to empty the L2 table cache, since it may contain tables which 186232b6444dSMax Reitz * are now going to be modified directly on disk, bypassing the cache. 186332b6444dSMax Reitz * qcow2_cache_empty() does both for us. */ 186432b6444dSMax Reitz ret = qcow2_cache_empty(bs, s->l2_table_cache); 186532b6444dSMax Reitz if (ret < 0) { 186632b6444dSMax Reitz goto fail; 186732b6444dSMax Reitz } 186832b6444dSMax Reitz 186932b6444dSMax Reitz for (i = 0; i < s->nb_snapshots; i++) { 187032b6444dSMax Reitz int l1_sectors = (s->snapshots[i].l1_size * sizeof(uint64_t) + 187132b6444dSMax Reitz BDRV_SECTOR_SIZE - 1) / BDRV_SECTOR_SIZE; 187232b6444dSMax Reitz 187332b6444dSMax Reitz l1_table = g_realloc(l1_table, l1_sectors * BDRV_SECTOR_SIZE); 187432b6444dSMax Reitz 18759a4f4c31SKevin Wolf ret = bdrv_read(bs->file->bs, 18769a4f4c31SKevin Wolf s->snapshots[i].l1_table_offset / BDRV_SECTOR_SIZE, 18779a4f4c31SKevin Wolf (void *)l1_table, l1_sectors); 187832b6444dSMax Reitz if (ret < 0) { 187932b6444dSMax Reitz goto fail; 188032b6444dSMax Reitz } 188132b6444dSMax Reitz 188232b6444dSMax Reitz for (j = 0; j < s->snapshots[i].l1_size; j++) { 188332b6444dSMax Reitz be64_to_cpus(&l1_table[j]); 188432b6444dSMax Reitz } 188532b6444dSMax Reitz 188632b6444dSMax Reitz ret = expand_zero_clusters_in_l1(bs, l1_table, s->snapshots[i].l1_size, 18874057a2b2SMax Reitz &visited_l1_entries, l1_entries, 18888b13976dSMax Reitz status_cb, cb_opaque); 188932b6444dSMax Reitz if (ret < 0) { 189032b6444dSMax Reitz goto fail; 189132b6444dSMax Reitz } 189232b6444dSMax Reitz } 189332b6444dSMax Reitz 189432b6444dSMax Reitz ret = 0; 189532b6444dSMax Reitz 189632b6444dSMax Reitz fail: 189732b6444dSMax Reitz g_free(l1_table); 189832b6444dSMax Reitz return ret; 189932b6444dSMax Reitz } 1900