145aba42fSKevin Wolf /* 245aba42fSKevin Wolf * Block driver for the QCOW version 2 format 345aba42fSKevin Wolf * 445aba42fSKevin Wolf * Copyright (c) 2004-2006 Fabrice Bellard 545aba42fSKevin Wolf * 645aba42fSKevin Wolf * Permission is hereby granted, free of charge, to any person obtaining a copy 745aba42fSKevin Wolf * of this software and associated documentation files (the "Software"), to deal 845aba42fSKevin Wolf * in the Software without restriction, including without limitation the rights 945aba42fSKevin Wolf * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 1045aba42fSKevin Wolf * copies of the Software, and to permit persons to whom the Software is 1145aba42fSKevin Wolf * furnished to do so, subject to the following conditions: 1245aba42fSKevin Wolf * 1345aba42fSKevin Wolf * The above copyright notice and this permission notice shall be included in 1445aba42fSKevin Wolf * all copies or substantial portions of the Software. 1545aba42fSKevin Wolf * 1645aba42fSKevin Wolf * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1745aba42fSKevin Wolf * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1845aba42fSKevin Wolf * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1945aba42fSKevin Wolf * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 2045aba42fSKevin Wolf * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 2145aba42fSKevin Wolf * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 2245aba42fSKevin Wolf * THE SOFTWARE. 2345aba42fSKevin Wolf */ 2445aba42fSKevin Wolf 2580c71a24SPeter Maydell #include "qemu/osdep.h" 2645aba42fSKevin Wolf #include <zlib.h> 2745aba42fSKevin Wolf 28da34e65cSMarkus Armbruster #include "qapi/error.h" 2945aba42fSKevin Wolf #include "qemu-common.h" 30737e150eSPaolo Bonzini #include "block/block_int.h" 3145aba42fSKevin Wolf #include "block/qcow2.h" 3258369e22SPaolo Bonzini #include "qemu/bswap.h" 333cce16f4SKevin Wolf #include "trace.h" 3445aba42fSKevin Wolf 352cf7cfa1SKevin Wolf int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size, 362cf7cfa1SKevin Wolf bool exact_size) 3745aba42fSKevin Wolf { 38ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 392cf7cfa1SKevin Wolf int new_l1_size2, ret, i; 4045aba42fSKevin Wolf uint64_t *new_l1_table; 41fda74f82SMax Reitz int64_t old_l1_table_offset, old_l1_size; 422cf7cfa1SKevin Wolf int64_t new_l1_table_offset, new_l1_size; 4345aba42fSKevin Wolf uint8_t data[12]; 4445aba42fSKevin Wolf 4572893756SStefan Hajnoczi if (min_size <= s->l1_size) 4645aba42fSKevin Wolf return 0; 4772893756SStefan Hajnoczi 48b93f9950SMax Reitz /* Do a sanity check on min_size before trying to calculate new_l1_size 49b93f9950SMax Reitz * (this prevents overflows during the while loop for the calculation of 50b93f9950SMax Reitz * new_l1_size) */ 51b93f9950SMax Reitz if (min_size > INT_MAX / sizeof(uint64_t)) { 52b93f9950SMax Reitz return -EFBIG; 53b93f9950SMax Reitz } 54b93f9950SMax Reitz 5572893756SStefan Hajnoczi if (exact_size) { 5672893756SStefan Hajnoczi new_l1_size = min_size; 5772893756SStefan Hajnoczi } else { 5872893756SStefan Hajnoczi /* Bump size up to reduce the number of times we have to grow */ 5972893756SStefan Hajnoczi new_l1_size = s->l1_size; 60d191d12dSStefan Weil if (new_l1_size == 0) { 61d191d12dSStefan Weil new_l1_size = 1; 62d191d12dSStefan Weil } 6345aba42fSKevin Wolf while (min_size > new_l1_size) { 6445aba42fSKevin Wolf new_l1_size = (new_l1_size * 3 + 1) / 2; 6545aba42fSKevin Wolf } 6672893756SStefan Hajnoczi } 6772893756SStefan Hajnoczi 68cab60de9SKevin Wolf if (new_l1_size > INT_MAX / sizeof(uint64_t)) { 692cf7cfa1SKevin Wolf return -EFBIG; 702cf7cfa1SKevin Wolf } 712cf7cfa1SKevin Wolf 7245aba42fSKevin Wolf #ifdef DEBUG_ALLOC2 732cf7cfa1SKevin Wolf fprintf(stderr, "grow l1_table from %d to %" PRId64 "\n", 742cf7cfa1SKevin Wolf s->l1_size, new_l1_size); 7545aba42fSKevin Wolf #endif 7645aba42fSKevin Wolf 7745aba42fSKevin Wolf new_l1_size2 = sizeof(uint64_t) * new_l1_size; 789a4f4c31SKevin Wolf new_l1_table = qemu_try_blockalign(bs->file->bs, 79de82815dSKevin Wolf align_offset(new_l1_size2, 512)); 80de82815dSKevin Wolf if (new_l1_table == NULL) { 81de82815dSKevin Wolf return -ENOMEM; 82de82815dSKevin Wolf } 83de82815dSKevin Wolf memset(new_l1_table, 0, align_offset(new_l1_size2, 512)); 84de82815dSKevin Wolf 8545aba42fSKevin Wolf memcpy(new_l1_table, s->l1_table, s->l1_size * sizeof(uint64_t)); 8645aba42fSKevin Wolf 8745aba42fSKevin Wolf /* write new table (align to cluster) */ 8866f82ceeSKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ALLOC_TABLE); 89ed6ccf0fSKevin Wolf new_l1_table_offset = qcow2_alloc_clusters(bs, new_l1_size2); 905d757b56SKevin Wolf if (new_l1_table_offset < 0) { 91de82815dSKevin Wolf qemu_vfree(new_l1_table); 925d757b56SKevin Wolf return new_l1_table_offset; 935d757b56SKevin Wolf } 9429c1a730SKevin Wolf 9529c1a730SKevin Wolf ret = qcow2_cache_flush(bs, s->refcount_block_cache); 9629c1a730SKevin Wolf if (ret < 0) { 9780fa3341SKevin Wolf goto fail; 9829c1a730SKevin Wolf } 9945aba42fSKevin Wolf 100cf93980eSMax Reitz /* the L1 position has not yet been updated, so these clusters must 101cf93980eSMax Reitz * indeed be completely free */ 102231bb267SMax Reitz ret = qcow2_pre_write_overlap_check(bs, 0, new_l1_table_offset, 103231bb267SMax Reitz new_l1_size2); 104cf93980eSMax Reitz if (ret < 0) { 105cf93980eSMax Reitz goto fail; 106cf93980eSMax Reitz } 107cf93980eSMax Reitz 10866f82ceeSKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_WRITE_TABLE); 10945aba42fSKevin Wolf for(i = 0; i < s->l1_size; i++) 11045aba42fSKevin Wolf new_l1_table[i] = cpu_to_be64(new_l1_table[i]); 111d9ca2ea2SKevin Wolf ret = bdrv_pwrite_sync(bs->file, new_l1_table_offset, 1129a4f4c31SKevin Wolf new_l1_table, new_l1_size2); 1138b3b7206SKevin Wolf if (ret < 0) 11445aba42fSKevin Wolf goto fail; 11545aba42fSKevin Wolf for(i = 0; i < s->l1_size; i++) 11645aba42fSKevin Wolf new_l1_table[i] = be64_to_cpu(new_l1_table[i]); 11745aba42fSKevin Wolf 11845aba42fSKevin Wolf /* set new table */ 11966f82ceeSKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ACTIVATE_TABLE); 12045aba42fSKevin Wolf cpu_to_be32w((uint32_t*)data, new_l1_size); 121e4ef9f46SPeter Maydell stq_be_p(data + 4, new_l1_table_offset); 122d9ca2ea2SKevin Wolf ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_size), 1239a4f4c31SKevin Wolf data, sizeof(data)); 1248b3b7206SKevin Wolf if (ret < 0) { 12545aba42fSKevin Wolf goto fail; 126fb8fa77cSKevin Wolf } 127de82815dSKevin Wolf qemu_vfree(s->l1_table); 128fda74f82SMax Reitz old_l1_table_offset = s->l1_table_offset; 12945aba42fSKevin Wolf s->l1_table_offset = new_l1_table_offset; 13045aba42fSKevin Wolf s->l1_table = new_l1_table; 131fda74f82SMax Reitz old_l1_size = s->l1_size; 13245aba42fSKevin Wolf s->l1_size = new_l1_size; 133fda74f82SMax Reitz qcow2_free_clusters(bs, old_l1_table_offset, old_l1_size * sizeof(uint64_t), 134fda74f82SMax Reitz QCOW2_DISCARD_OTHER); 13545aba42fSKevin Wolf return 0; 13645aba42fSKevin Wolf fail: 137de82815dSKevin Wolf qemu_vfree(new_l1_table); 1386cfcb9b8SKevin Wolf qcow2_free_clusters(bs, new_l1_table_offset, new_l1_size2, 1396cfcb9b8SKevin Wolf QCOW2_DISCARD_OTHER); 1408b3b7206SKevin Wolf return ret; 14145aba42fSKevin Wolf } 14245aba42fSKevin Wolf 14345aba42fSKevin Wolf /* 14445aba42fSKevin Wolf * l2_load 14545aba42fSKevin Wolf * 14645aba42fSKevin Wolf * Loads a L2 table into memory. If the table is in the cache, the cache 14745aba42fSKevin Wolf * is used; otherwise the L2 table is loaded from the image file. 14845aba42fSKevin Wolf * 14945aba42fSKevin Wolf * Returns a pointer to the L2 table on success, or NULL if the read from 15045aba42fSKevin Wolf * the image file failed. 15145aba42fSKevin Wolf */ 15245aba42fSKevin Wolf 15355c17e98SKevin Wolf static int l2_load(BlockDriverState *bs, uint64_t l2_offset, 15455c17e98SKevin Wolf uint64_t **l2_table) 15545aba42fSKevin Wolf { 156ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 15745aba42fSKevin Wolf 1589be38598SEduardo Habkost return qcow2_cache_get(bs, s->l2_table_cache, l2_offset, 1599be38598SEduardo Habkost (void **)l2_table); 16055c17e98SKevin Wolf } 16155c17e98SKevin Wolf 16245aba42fSKevin Wolf /* 1636583e3c7SKevin Wolf * Writes one sector of the L1 table to the disk (can't update single entries 1646583e3c7SKevin Wolf * and we really don't want bdrv_pread to perform a read-modify-write) 1656583e3c7SKevin Wolf */ 1666583e3c7SKevin Wolf #define L1_ENTRIES_PER_SECTOR (512 / 8) 167e23e400eSMax Reitz int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index) 1686583e3c7SKevin Wolf { 169ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 170a1391444SMax Reitz uint64_t buf[L1_ENTRIES_PER_SECTOR] = { 0 }; 1716583e3c7SKevin Wolf int l1_start_index; 172f7defcb6SKevin Wolf int i, ret; 1736583e3c7SKevin Wolf 1746583e3c7SKevin Wolf l1_start_index = l1_index & ~(L1_ENTRIES_PER_SECTOR - 1); 175a1391444SMax Reitz for (i = 0; i < L1_ENTRIES_PER_SECTOR && l1_start_index + i < s->l1_size; 176a1391444SMax Reitz i++) 177a1391444SMax Reitz { 1786583e3c7SKevin Wolf buf[i] = cpu_to_be64(s->l1_table[l1_start_index + i]); 1796583e3c7SKevin Wolf } 1806583e3c7SKevin Wolf 181231bb267SMax Reitz ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_ACTIVE_L1, 182cf93980eSMax Reitz s->l1_table_offset + 8 * l1_start_index, sizeof(buf)); 183cf93980eSMax Reitz if (ret < 0) { 184cf93980eSMax Reitz return ret; 185cf93980eSMax Reitz } 186cf93980eSMax Reitz 18766f82ceeSKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE); 188d9ca2ea2SKevin Wolf ret = bdrv_pwrite_sync(bs->file, 1899a4f4c31SKevin Wolf s->l1_table_offset + 8 * l1_start_index, 190f7defcb6SKevin Wolf buf, sizeof(buf)); 191f7defcb6SKevin Wolf if (ret < 0) { 192f7defcb6SKevin Wolf return ret; 1936583e3c7SKevin Wolf } 1946583e3c7SKevin Wolf 1956583e3c7SKevin Wolf return 0; 1966583e3c7SKevin Wolf } 1976583e3c7SKevin Wolf 1986583e3c7SKevin Wolf /* 19945aba42fSKevin Wolf * l2_allocate 20045aba42fSKevin Wolf * 20145aba42fSKevin Wolf * Allocate a new l2 entry in the file. If l1_index points to an already 20245aba42fSKevin Wolf * used entry in the L2 table (i.e. we are doing a copy on write for the L2 20345aba42fSKevin Wolf * table) copy the contents of the old L2 table into the newly allocated one. 20445aba42fSKevin Wolf * Otherwise the new table is initialized with zeros. 20545aba42fSKevin Wolf * 20645aba42fSKevin Wolf */ 20745aba42fSKevin Wolf 208c46e1167SKevin Wolf static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table) 20945aba42fSKevin Wolf { 210ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 2116583e3c7SKevin Wolf uint64_t old_l2_offset; 2128585afd8SMax Reitz uint64_t *l2_table = NULL; 213f4f0d391SKevin Wolf int64_t l2_offset; 214c46e1167SKevin Wolf int ret; 21545aba42fSKevin Wolf 21645aba42fSKevin Wolf old_l2_offset = s->l1_table[l1_index]; 21745aba42fSKevin Wolf 2183cce16f4SKevin Wolf trace_qcow2_l2_allocate(bs, l1_index); 2193cce16f4SKevin Wolf 22045aba42fSKevin Wolf /* allocate a new l2 entry */ 22145aba42fSKevin Wolf 222ed6ccf0fSKevin Wolf l2_offset = qcow2_alloc_clusters(bs, s->l2_size * sizeof(uint64_t)); 2235d757b56SKevin Wolf if (l2_offset < 0) { 224be0b742eSMax Reitz ret = l2_offset; 225be0b742eSMax Reitz goto fail; 2265d757b56SKevin Wolf } 22729c1a730SKevin Wolf 22829c1a730SKevin Wolf ret = qcow2_cache_flush(bs, s->refcount_block_cache); 22929c1a730SKevin Wolf if (ret < 0) { 23029c1a730SKevin Wolf goto fail; 23129c1a730SKevin Wolf } 23245aba42fSKevin Wolf 23345aba42fSKevin Wolf /* allocate a new entry in the l2 cache */ 23445aba42fSKevin Wolf 2353cce16f4SKevin Wolf trace_qcow2_l2_allocate_get_empty(bs, l1_index); 23629c1a730SKevin Wolf ret = qcow2_cache_get_empty(bs, s->l2_table_cache, l2_offset, (void**) table); 23729c1a730SKevin Wolf if (ret < 0) { 238be0b742eSMax Reitz goto fail; 23929c1a730SKevin Wolf } 24029c1a730SKevin Wolf 24129c1a730SKevin Wolf l2_table = *table; 24245aba42fSKevin Wolf 2438e37f681SKevin Wolf if ((old_l2_offset & L1E_OFFSET_MASK) == 0) { 24445aba42fSKevin Wolf /* if there was no old l2 table, clear the new table */ 24545aba42fSKevin Wolf memset(l2_table, 0, s->l2_size * sizeof(uint64_t)); 24645aba42fSKevin Wolf } else { 24729c1a730SKevin Wolf uint64_t* old_table; 24829c1a730SKevin Wolf 24945aba42fSKevin Wolf /* if there was an old l2 table, read it from the disk */ 25066f82ceeSKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_COW_READ); 2518e37f681SKevin Wolf ret = qcow2_cache_get(bs, s->l2_table_cache, 2528e37f681SKevin Wolf old_l2_offset & L1E_OFFSET_MASK, 25329c1a730SKevin Wolf (void**) &old_table); 25429c1a730SKevin Wolf if (ret < 0) { 25529c1a730SKevin Wolf goto fail; 25629c1a730SKevin Wolf } 25729c1a730SKevin Wolf 25829c1a730SKevin Wolf memcpy(l2_table, old_table, s->cluster_size); 25929c1a730SKevin Wolf 260a3f1afb4SAlberto Garcia qcow2_cache_put(bs, s->l2_table_cache, (void **) &old_table); 26145aba42fSKevin Wolf } 26229c1a730SKevin Wolf 26345aba42fSKevin Wolf /* write the l2 table to the file */ 26466f82ceeSKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_WRITE); 26529c1a730SKevin Wolf 2663cce16f4SKevin Wolf trace_qcow2_l2_allocate_write_l2(bs, l1_index); 26772e80b89SAlberto Garcia qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table); 26829c1a730SKevin Wolf ret = qcow2_cache_flush(bs, s->l2_table_cache); 269c46e1167SKevin Wolf if (ret < 0) { 270175e1152SKevin Wolf goto fail; 271175e1152SKevin Wolf } 272175e1152SKevin Wolf 273175e1152SKevin Wolf /* update the L1 entry */ 2743cce16f4SKevin Wolf trace_qcow2_l2_allocate_write_l1(bs, l1_index); 275175e1152SKevin Wolf s->l1_table[l1_index] = l2_offset | QCOW_OFLAG_COPIED; 276e23e400eSMax Reitz ret = qcow2_write_l1_entry(bs, l1_index); 277175e1152SKevin Wolf if (ret < 0) { 278175e1152SKevin Wolf goto fail; 279c46e1167SKevin Wolf } 28045aba42fSKevin Wolf 281c46e1167SKevin Wolf *table = l2_table; 2823cce16f4SKevin Wolf trace_qcow2_l2_allocate_done(bs, l1_index, 0); 283c46e1167SKevin Wolf return 0; 284175e1152SKevin Wolf 285175e1152SKevin Wolf fail: 2863cce16f4SKevin Wolf trace_qcow2_l2_allocate_done(bs, l1_index, ret); 2878585afd8SMax Reitz if (l2_table != NULL) { 28829c1a730SKevin Wolf qcow2_cache_put(bs, s->l2_table_cache, (void**) table); 2898585afd8SMax Reitz } 29068dba0bfSKevin Wolf s->l1_table[l1_index] = old_l2_offset; 291e3b21ef9SMax Reitz if (l2_offset > 0) { 292e3b21ef9SMax Reitz qcow2_free_clusters(bs, l2_offset, s->l2_size * sizeof(uint64_t), 293e3b21ef9SMax Reitz QCOW2_DISCARD_ALWAYS); 294e3b21ef9SMax Reitz } 295175e1152SKevin Wolf return ret; 29645aba42fSKevin Wolf } 29745aba42fSKevin Wolf 2982bfcc4a0SKevin Wolf /* 2992bfcc4a0SKevin Wolf * Checks how many clusters in a given L2 table are contiguous in the image 3002bfcc4a0SKevin Wolf * file. As soon as one of the flags in the bitmask stop_flags changes compared 3012bfcc4a0SKevin Wolf * to the first cluster, the search is stopped and the cluster is not counted 3022bfcc4a0SKevin Wolf * as contiguous. (This allows it, for example, to stop at the first compressed 3032bfcc4a0SKevin Wolf * cluster which may require a different handling) 3042bfcc4a0SKevin Wolf */ 305b6d36defSMax Reitz static int count_contiguous_clusters(int nb_clusters, int cluster_size, 30661653008SKevin Wolf uint64_t *l2_table, uint64_t stop_flags) 30745aba42fSKevin Wolf { 30845aba42fSKevin Wolf int i; 30978a52ad5SPeter Lieven uint64_t mask = stop_flags | L2E_OFFSET_MASK | QCOW_OFLAG_COMPRESSED; 31015684a47SMax Reitz uint64_t first_entry = be64_to_cpu(l2_table[0]); 31115684a47SMax Reitz uint64_t offset = first_entry & mask; 31245aba42fSKevin Wolf 31345aba42fSKevin Wolf if (!offset) 31445aba42fSKevin Wolf return 0; 31545aba42fSKevin Wolf 316a99dfb45SKevin Wolf assert(qcow2_get_cluster_type(first_entry) == QCOW2_CLUSTER_NORMAL); 31715684a47SMax Reitz 31861653008SKevin Wolf for (i = 0; i < nb_clusters; i++) { 3192bfcc4a0SKevin Wolf uint64_t l2_entry = be64_to_cpu(l2_table[i]) & mask; 3202bfcc4a0SKevin Wolf if (offset + (uint64_t) i * cluster_size != l2_entry) { 32145aba42fSKevin Wolf break; 3222bfcc4a0SKevin Wolf } 3232bfcc4a0SKevin Wolf } 32445aba42fSKevin Wolf 32561653008SKevin Wolf return i; 32645aba42fSKevin Wolf } 32745aba42fSKevin Wolf 328a99dfb45SKevin Wolf static int count_contiguous_clusters_by_type(int nb_clusters, 329a99dfb45SKevin Wolf uint64_t *l2_table, 330a99dfb45SKevin Wolf int wanted_type) 33145aba42fSKevin Wolf { 3322bfcc4a0SKevin Wolf int i; 33345aba42fSKevin Wolf 3342bfcc4a0SKevin Wolf for (i = 0; i < nb_clusters; i++) { 3352bfcc4a0SKevin Wolf int type = qcow2_get_cluster_type(be64_to_cpu(l2_table[i])); 3362bfcc4a0SKevin Wolf 337a99dfb45SKevin Wolf if (type != wanted_type) { 3382bfcc4a0SKevin Wolf break; 3392bfcc4a0SKevin Wolf } 3402bfcc4a0SKevin Wolf } 34145aba42fSKevin Wolf 34245aba42fSKevin Wolf return i; 34345aba42fSKevin Wolf } 34445aba42fSKevin Wolf 34545aba42fSKevin Wolf /* The crypt function is compatible with the linux cryptoloop 34645aba42fSKevin Wolf algorithm for < 4 GB images. NOTE: out_buf == in_buf is 34745aba42fSKevin Wolf supported */ 348ff99129aSKevin Wolf int qcow2_encrypt_sectors(BDRVQcow2State *s, int64_t sector_num, 34945aba42fSKevin Wolf uint8_t *out_buf, const uint8_t *in_buf, 350f6fa64f6SDaniel P. Berrange int nb_sectors, bool enc, 351f6fa64f6SDaniel P. Berrange Error **errp) 35245aba42fSKevin Wolf { 35345aba42fSKevin Wolf union { 35445aba42fSKevin Wolf uint64_t ll[2]; 35545aba42fSKevin Wolf uint8_t b[16]; 35645aba42fSKevin Wolf } ivec; 35745aba42fSKevin Wolf int i; 358f6fa64f6SDaniel P. Berrange int ret; 35945aba42fSKevin Wolf 36045aba42fSKevin Wolf for(i = 0; i < nb_sectors; i++) { 36145aba42fSKevin Wolf ivec.ll[0] = cpu_to_le64(sector_num); 36245aba42fSKevin Wolf ivec.ll[1] = 0; 363f6fa64f6SDaniel P. Berrange if (qcrypto_cipher_setiv(s->cipher, 364f6fa64f6SDaniel P. Berrange ivec.b, G_N_ELEMENTS(ivec.b), 365f6fa64f6SDaniel P. Berrange errp) < 0) { 366f6fa64f6SDaniel P. Berrange return -1; 367f6fa64f6SDaniel P. Berrange } 368f6fa64f6SDaniel P. Berrange if (enc) { 369f6fa64f6SDaniel P. Berrange ret = qcrypto_cipher_encrypt(s->cipher, 370f6fa64f6SDaniel P. Berrange in_buf, 371f6fa64f6SDaniel P. Berrange out_buf, 372f6fa64f6SDaniel P. Berrange 512, 373f6fa64f6SDaniel P. Berrange errp); 374f6fa64f6SDaniel P. Berrange } else { 375f6fa64f6SDaniel P. Berrange ret = qcrypto_cipher_decrypt(s->cipher, 376f6fa64f6SDaniel P. Berrange in_buf, 377f6fa64f6SDaniel P. Berrange out_buf, 378f6fa64f6SDaniel P. Berrange 512, 379f6fa64f6SDaniel P. Berrange errp); 380f6fa64f6SDaniel P. Berrange } 381f6fa64f6SDaniel P. Berrange if (ret < 0) { 382f6fa64f6SDaniel P. Berrange return -1; 383f6fa64f6SDaniel P. Berrange } 38445aba42fSKevin Wolf sector_num++; 38545aba42fSKevin Wolf in_buf += 512; 38645aba42fSKevin Wolf out_buf += 512; 38745aba42fSKevin Wolf } 388f6fa64f6SDaniel P. Berrange return 0; 38945aba42fSKevin Wolf } 39045aba42fSKevin Wolf 391aaa4d20bSKevin Wolf static int coroutine_fn do_perform_cow(BlockDriverState *bs, 392aaa4d20bSKevin Wolf uint64_t src_cluster_offset, 393aef4acb6SStefan Hajnoczi uint64_t cluster_offset, 394aaa4d20bSKevin Wolf int offset_in_cluster, 395aaa4d20bSKevin Wolf int bytes) 39645aba42fSKevin Wolf { 397ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 398aef4acb6SStefan Hajnoczi QEMUIOVector qiov; 399aef4acb6SStefan Hajnoczi struct iovec iov; 400aaa4d20bSKevin Wolf int ret; 4011b9f1491SKevin Wolf 402aaa4d20bSKevin Wolf iov.iov_len = bytes; 403de82815dSKevin Wolf iov.iov_base = qemu_try_blockalign(bs, iov.iov_len); 404de82815dSKevin Wolf if (iov.iov_base == NULL) { 405de82815dSKevin Wolf return -ENOMEM; 406de82815dSKevin Wolf } 407aef4acb6SStefan Hajnoczi 408aef4acb6SStefan Hajnoczi qemu_iovec_init_external(&qiov, &iov, 1); 4091b9f1491SKevin Wolf 41066f82ceeSKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_COW_READ); 411aef4acb6SStefan Hajnoczi 412dba28555SMax Reitz if (!bs->drv) { 413bd604369SKevin Wolf ret = -ENOMEDIUM; 414bd604369SKevin Wolf goto out; 415dba28555SMax Reitz } 416dba28555SMax Reitz 417aef4acb6SStefan Hajnoczi /* Call .bdrv_co_readv() directly instead of using the public block-layer 418aef4acb6SStefan Hajnoczi * interface. This avoids double I/O throttling and request tracking, 419aef4acb6SStefan Hajnoczi * which can lead to deadlock when block layer copy-on-read is enabled. 420aef4acb6SStefan Hajnoczi */ 421aaa4d20bSKevin Wolf ret = bs->drv->bdrv_co_preadv(bs, src_cluster_offset + offset_in_cluster, 422aaa4d20bSKevin Wolf bytes, &qiov, 0); 4231b9f1491SKevin Wolf if (ret < 0) { 4241b9f1491SKevin Wolf goto out; 4251b9f1491SKevin Wolf } 4261b9f1491SKevin Wolf 4278336aafaSDaniel P. Berrange if (bs->encrypted) { 428f6fa64f6SDaniel P. Berrange Error *err = NULL; 429aaa4d20bSKevin Wolf int64_t sector = (cluster_offset + offset_in_cluster) 430aaa4d20bSKevin Wolf >> BDRV_SECTOR_BITS; 431f6fa64f6SDaniel P. Berrange assert(s->cipher); 432aaa4d20bSKevin Wolf assert((offset_in_cluster & ~BDRV_SECTOR_MASK) == 0); 433aaa4d20bSKevin Wolf assert((bytes & ~BDRV_SECTOR_MASK) == 0); 434aaa4d20bSKevin Wolf if (qcow2_encrypt_sectors(s, sector, iov.iov_base, iov.iov_base, 435aaa4d20bSKevin Wolf bytes >> BDRV_SECTOR_BITS, true, &err) < 0) { 436f6fa64f6SDaniel P. Berrange ret = -EIO; 437f6fa64f6SDaniel P. Berrange error_free(err); 438f6fa64f6SDaniel P. Berrange goto out; 439f6fa64f6SDaniel P. Berrange } 44045aba42fSKevin Wolf } 4411b9f1491SKevin Wolf 442231bb267SMax Reitz ret = qcow2_pre_write_overlap_check(bs, 0, 443aaa4d20bSKevin Wolf cluster_offset + offset_in_cluster, bytes); 444cf93980eSMax Reitz if (ret < 0) { 445cf93980eSMax Reitz goto out; 446cf93980eSMax Reitz } 447cf93980eSMax Reitz 44866f82ceeSKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_COW_WRITE); 449*a03ef88fSKevin Wolf ret = bdrv_co_pwritev(bs->file, cluster_offset + offset_in_cluster, 450aaa4d20bSKevin Wolf bytes, &qiov, 0); 4511b9f1491SKevin Wolf if (ret < 0) { 4521b9f1491SKevin Wolf goto out; 4531b9f1491SKevin Wolf } 4541b9f1491SKevin Wolf 4551b9f1491SKevin Wolf ret = 0; 4561b9f1491SKevin Wolf out: 457aef4acb6SStefan Hajnoczi qemu_vfree(iov.iov_base); 45845aba42fSKevin Wolf return ret; 45945aba42fSKevin Wolf } 46045aba42fSKevin Wolf 46145aba42fSKevin Wolf 46245aba42fSKevin Wolf /* 46345aba42fSKevin Wolf * get_cluster_offset 46445aba42fSKevin Wolf * 465ecfe1863SKevin Wolf * For a given offset of the virtual disk, find the cluster type and offset in 466ecfe1863SKevin Wolf * the qcow2 file. The offset is stored in *cluster_offset. 46745aba42fSKevin Wolf * 468ecfe1863SKevin Wolf * On entry, *bytes is the maximum number of contiguous bytes starting at 469ecfe1863SKevin Wolf * offset that we are interested in. 47045aba42fSKevin Wolf * 471ecfe1863SKevin Wolf * On exit, *bytes is the number of bytes starting at offset that have the same 472ecfe1863SKevin Wolf * cluster type and (if applicable) are stored contiguously in the image file. 473ecfe1863SKevin Wolf * Compressed clusters are always returned one by one. 47445aba42fSKevin Wolf * 47568d000a3SKevin Wolf * Returns the cluster type (QCOW2_CLUSTER_*) on success, -errno in error 47668d000a3SKevin Wolf * cases. 47745aba42fSKevin Wolf */ 4781c46efaaSKevin Wolf int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, 479ecfe1863SKevin Wolf unsigned int *bytes, uint64_t *cluster_offset) 48045aba42fSKevin Wolf { 481ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 4822cf7cfa1SKevin Wolf unsigned int l2_index; 4832cf7cfa1SKevin Wolf uint64_t l1_index, l2_offset, *l2_table; 48445aba42fSKevin Wolf int l1_bits, c; 485b2f65d6bSKevin Wolf unsigned int offset_in_cluster, nb_clusters; 486b2f65d6bSKevin Wolf uint64_t bytes_available, bytes_needed; 48755c17e98SKevin Wolf int ret; 488b2f65d6bSKevin Wolf 489b2f65d6bSKevin Wolf offset_in_cluster = offset_into_cluster(s, offset); 490ecfe1863SKevin Wolf bytes_needed = (uint64_t) *bytes + offset_in_cluster; 49145aba42fSKevin Wolf 49245aba42fSKevin Wolf l1_bits = s->l2_bits + s->cluster_bits; 49345aba42fSKevin Wolf 494b2f65d6bSKevin Wolf /* compute how many bytes there are between the start of the cluster 495b2f65d6bSKevin Wolf * containing offset and the end of the l1 entry */ 496b2f65d6bSKevin Wolf bytes_available = (1ULL << l1_bits) - (offset & ((1ULL << l1_bits) - 1)) 497b2f65d6bSKevin Wolf + offset_in_cluster; 49845aba42fSKevin Wolf 499b2f65d6bSKevin Wolf if (bytes_needed > bytes_available) { 500b2f65d6bSKevin Wolf bytes_needed = bytes_available; 50145aba42fSKevin Wolf } 502b2f65d6bSKevin Wolf assert(bytes_needed <= INT_MAX); 50345aba42fSKevin Wolf 5041c46efaaSKevin Wolf *cluster_offset = 0; 50545aba42fSKevin Wolf 506b6af0975SDaniel P. Berrange /* seek to the l2 offset in the l1 table */ 50745aba42fSKevin Wolf 50845aba42fSKevin Wolf l1_index = offset >> l1_bits; 50968d000a3SKevin Wolf if (l1_index >= s->l1_size) { 51068d000a3SKevin Wolf ret = QCOW2_CLUSTER_UNALLOCATED; 51145aba42fSKevin Wolf goto out; 51268d000a3SKevin Wolf } 51345aba42fSKevin Wolf 51468d000a3SKevin Wolf l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK; 51568d000a3SKevin Wolf if (!l2_offset) { 51668d000a3SKevin Wolf ret = QCOW2_CLUSTER_UNALLOCATED; 51745aba42fSKevin Wolf goto out; 51868d000a3SKevin Wolf } 51945aba42fSKevin Wolf 520a97c67eeSMax Reitz if (offset_into_cluster(s, l2_offset)) { 521a97c67eeSMax Reitz qcow2_signal_corruption(bs, true, -1, -1, "L2 table offset %#" PRIx64 522a97c67eeSMax Reitz " unaligned (L1 index: %#" PRIx64 ")", 523a97c67eeSMax Reitz l2_offset, l1_index); 524a97c67eeSMax Reitz return -EIO; 525a97c67eeSMax Reitz } 526a97c67eeSMax Reitz 52745aba42fSKevin Wolf /* load the l2 table in memory */ 52845aba42fSKevin Wolf 52955c17e98SKevin Wolf ret = l2_load(bs, l2_offset, &l2_table); 53055c17e98SKevin Wolf if (ret < 0) { 53155c17e98SKevin Wolf return ret; 5321c46efaaSKevin Wolf } 53345aba42fSKevin Wolf 53445aba42fSKevin Wolf /* find the cluster offset for the given disk offset */ 53545aba42fSKevin Wolf 53645aba42fSKevin Wolf l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1); 5371c46efaaSKevin Wolf *cluster_offset = be64_to_cpu(l2_table[l2_index]); 538b6d36defSMax Reitz 539b6d36defSMax Reitz /* nb_needed <= INT_MAX, thus nb_clusters <= INT_MAX, too */ 540b2f65d6bSKevin Wolf nb_clusters = size_to_clusters(s, bytes_needed); 54145aba42fSKevin Wolf 54268d000a3SKevin Wolf ret = qcow2_get_cluster_type(*cluster_offset); 54368d000a3SKevin Wolf switch (ret) { 54468d000a3SKevin Wolf case QCOW2_CLUSTER_COMPRESSED: 54568d000a3SKevin Wolf /* Compressed clusters can only be processed one by one */ 54668d000a3SKevin Wolf c = 1; 54768d000a3SKevin Wolf *cluster_offset &= L2E_COMPRESSED_OFFSET_SIZE_MASK; 54868d000a3SKevin Wolf break; 5496377af48SKevin Wolf case QCOW2_CLUSTER_ZERO: 550381b487dSPaolo Bonzini if (s->qcow_version < 3) { 551a97c67eeSMax Reitz qcow2_signal_corruption(bs, true, -1, -1, "Zero cluster entry found" 552a97c67eeSMax Reitz " in pre-v3 image (L2 offset: %#" PRIx64 553a97c67eeSMax Reitz ", L2 index: %#x)", l2_offset, l2_index); 554a97c67eeSMax Reitz ret = -EIO; 555a97c67eeSMax Reitz goto fail; 556381b487dSPaolo Bonzini } 557a99dfb45SKevin Wolf c = count_contiguous_clusters_by_type(nb_clusters, &l2_table[l2_index], 558a99dfb45SKevin Wolf QCOW2_CLUSTER_ZERO); 5596377af48SKevin Wolf *cluster_offset = 0; 5606377af48SKevin Wolf break; 56168d000a3SKevin Wolf case QCOW2_CLUSTER_UNALLOCATED: 56245aba42fSKevin Wolf /* how many empty clusters ? */ 563a99dfb45SKevin Wolf c = count_contiguous_clusters_by_type(nb_clusters, &l2_table[l2_index], 564a99dfb45SKevin Wolf QCOW2_CLUSTER_UNALLOCATED); 56568d000a3SKevin Wolf *cluster_offset = 0; 56668d000a3SKevin Wolf break; 56768d000a3SKevin Wolf case QCOW2_CLUSTER_NORMAL: 56845aba42fSKevin Wolf /* how many allocated clusters ? */ 56945aba42fSKevin Wolf c = count_contiguous_clusters(nb_clusters, s->cluster_size, 57061653008SKevin Wolf &l2_table[l2_index], QCOW_OFLAG_ZERO); 57168d000a3SKevin Wolf *cluster_offset &= L2E_OFFSET_MASK; 572a97c67eeSMax Reitz if (offset_into_cluster(s, *cluster_offset)) { 573a97c67eeSMax Reitz qcow2_signal_corruption(bs, true, -1, -1, "Data cluster offset %#" 574a97c67eeSMax Reitz PRIx64 " unaligned (L2 offset: %#" PRIx64 575a97c67eeSMax Reitz ", L2 index: %#x)", *cluster_offset, 576a97c67eeSMax Reitz l2_offset, l2_index); 577a97c67eeSMax Reitz ret = -EIO; 578a97c67eeSMax Reitz goto fail; 579a97c67eeSMax Reitz } 58068d000a3SKevin Wolf break; 5811417d7e4SKevin Wolf default: 5821417d7e4SKevin Wolf abort(); 58345aba42fSKevin Wolf } 58445aba42fSKevin Wolf 58529c1a730SKevin Wolf qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); 58629c1a730SKevin Wolf 587b2f65d6bSKevin Wolf bytes_available = (c * s->cluster_size); 58868d000a3SKevin Wolf 58945aba42fSKevin Wolf out: 590b2f65d6bSKevin Wolf if (bytes_available > bytes_needed) { 591b2f65d6bSKevin Wolf bytes_available = bytes_needed; 592b2f65d6bSKevin Wolf } 59345aba42fSKevin Wolf 594ecfe1863SKevin Wolf *bytes = bytes_available - offset_in_cluster; 59545aba42fSKevin Wolf 59668d000a3SKevin Wolf return ret; 597a97c67eeSMax Reitz 598a97c67eeSMax Reitz fail: 599a97c67eeSMax Reitz qcow2_cache_put(bs, s->l2_table_cache, (void **)&l2_table); 600a97c67eeSMax Reitz return ret; 60145aba42fSKevin Wolf } 60245aba42fSKevin Wolf 60345aba42fSKevin Wolf /* 60445aba42fSKevin Wolf * get_cluster_table 60545aba42fSKevin Wolf * 60645aba42fSKevin Wolf * for a given disk offset, load (and allocate if needed) 60745aba42fSKevin Wolf * the l2 table. 60845aba42fSKevin Wolf * 60945aba42fSKevin Wolf * the l2 table offset in the qcow2 file and the cluster index 61045aba42fSKevin Wolf * in the l2 table are given to the caller. 61145aba42fSKevin Wolf * 6121e3e8f1aSKevin Wolf * Returns 0 on success, -errno in failure case 61345aba42fSKevin Wolf */ 61445aba42fSKevin Wolf static int get_cluster_table(BlockDriverState *bs, uint64_t offset, 61545aba42fSKevin Wolf uint64_t **new_l2_table, 61645aba42fSKevin Wolf int *new_l2_index) 61745aba42fSKevin Wolf { 618ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 6192cf7cfa1SKevin Wolf unsigned int l2_index; 6202cf7cfa1SKevin Wolf uint64_t l1_index, l2_offset; 621c46e1167SKevin Wolf uint64_t *l2_table = NULL; 62280ee15a6SKevin Wolf int ret; 62345aba42fSKevin Wolf 624b6af0975SDaniel P. Berrange /* seek to the l2 offset in the l1 table */ 62545aba42fSKevin Wolf 62645aba42fSKevin Wolf l1_index = offset >> (s->l2_bits + s->cluster_bits); 62745aba42fSKevin Wolf if (l1_index >= s->l1_size) { 62872893756SStefan Hajnoczi ret = qcow2_grow_l1_table(bs, l1_index + 1, false); 6291e3e8f1aSKevin Wolf if (ret < 0) { 6301e3e8f1aSKevin Wolf return ret; 6311e3e8f1aSKevin Wolf } 63245aba42fSKevin Wolf } 6338e37f681SKevin Wolf 6342cf7cfa1SKevin Wolf assert(l1_index < s->l1_size); 6358e37f681SKevin Wolf l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK; 636a97c67eeSMax Reitz if (offset_into_cluster(s, l2_offset)) { 637a97c67eeSMax Reitz qcow2_signal_corruption(bs, true, -1, -1, "L2 table offset %#" PRIx64 638a97c67eeSMax Reitz " unaligned (L1 index: %#" PRIx64 ")", 639a97c67eeSMax Reitz l2_offset, l1_index); 640a97c67eeSMax Reitz return -EIO; 641a97c67eeSMax Reitz } 64245aba42fSKevin Wolf 64345aba42fSKevin Wolf /* seek the l2 table of the given l2 offset */ 64445aba42fSKevin Wolf 6458e37f681SKevin Wolf if (s->l1_table[l1_index] & QCOW_OFLAG_COPIED) { 64645aba42fSKevin Wolf /* load the l2 table in memory */ 64755c17e98SKevin Wolf ret = l2_load(bs, l2_offset, &l2_table); 64855c17e98SKevin Wolf if (ret < 0) { 64955c17e98SKevin Wolf return ret; 6501e3e8f1aSKevin Wolf } 65145aba42fSKevin Wolf } else { 65216fde5f2SKevin Wolf /* First allocate a new L2 table (and do COW if needed) */ 653c46e1167SKevin Wolf ret = l2_allocate(bs, l1_index, &l2_table); 654c46e1167SKevin Wolf if (ret < 0) { 655c46e1167SKevin Wolf return ret; 6561e3e8f1aSKevin Wolf } 65716fde5f2SKevin Wolf 65816fde5f2SKevin Wolf /* Then decrease the refcount of the old table */ 65916fde5f2SKevin Wolf if (l2_offset) { 6606cfcb9b8SKevin Wolf qcow2_free_clusters(bs, l2_offset, s->l2_size * sizeof(uint64_t), 6616cfcb9b8SKevin Wolf QCOW2_DISCARD_OTHER); 66216fde5f2SKevin Wolf } 66345aba42fSKevin Wolf } 66445aba42fSKevin Wolf 66545aba42fSKevin Wolf /* find the cluster offset for the given disk offset */ 66645aba42fSKevin Wolf 66745aba42fSKevin Wolf l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1); 66845aba42fSKevin Wolf 66945aba42fSKevin Wolf *new_l2_table = l2_table; 67045aba42fSKevin Wolf *new_l2_index = l2_index; 67145aba42fSKevin Wolf 6721e3e8f1aSKevin Wolf return 0; 67345aba42fSKevin Wolf } 67445aba42fSKevin Wolf 67545aba42fSKevin Wolf /* 67645aba42fSKevin Wolf * alloc_compressed_cluster_offset 67745aba42fSKevin Wolf * 67845aba42fSKevin Wolf * For a given offset of the disk image, return cluster offset in 67945aba42fSKevin Wolf * qcow2 file. 68045aba42fSKevin Wolf * 68145aba42fSKevin Wolf * If the offset is not found, allocate a new compressed cluster. 68245aba42fSKevin Wolf * 68345aba42fSKevin Wolf * Return the cluster offset if successful, 68445aba42fSKevin Wolf * Return 0, otherwise. 68545aba42fSKevin Wolf * 68645aba42fSKevin Wolf */ 68745aba42fSKevin Wolf 688ed6ccf0fSKevin Wolf uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, 68945aba42fSKevin Wolf uint64_t offset, 69045aba42fSKevin Wolf int compressed_size) 69145aba42fSKevin Wolf { 692ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 69345aba42fSKevin Wolf int l2_index, ret; 6943948d1d4SKevin Wolf uint64_t *l2_table; 695f4f0d391SKevin Wolf int64_t cluster_offset; 69645aba42fSKevin Wolf int nb_csectors; 69745aba42fSKevin Wolf 6983948d1d4SKevin Wolf ret = get_cluster_table(bs, offset, &l2_table, &l2_index); 6991e3e8f1aSKevin Wolf if (ret < 0) { 70045aba42fSKevin Wolf return 0; 7011e3e8f1aSKevin Wolf } 70245aba42fSKevin Wolf 703b0b6862eSKevin Wolf /* Compression can't overwrite anything. Fail if the cluster was already 704b0b6862eSKevin Wolf * allocated. */ 70545aba42fSKevin Wolf cluster_offset = be64_to_cpu(l2_table[l2_index]); 706b0b6862eSKevin Wolf if (cluster_offset & L2E_OFFSET_MASK) { 7078f1efd00SKevin Wolf qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); 7088f1efd00SKevin Wolf return 0; 7098f1efd00SKevin Wolf } 71045aba42fSKevin Wolf 711ed6ccf0fSKevin Wolf cluster_offset = qcow2_alloc_bytes(bs, compressed_size); 7125d757b56SKevin Wolf if (cluster_offset < 0) { 71329c1a730SKevin Wolf qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); 7145d757b56SKevin Wolf return 0; 7155d757b56SKevin Wolf } 7165d757b56SKevin Wolf 71745aba42fSKevin Wolf nb_csectors = ((cluster_offset + compressed_size - 1) >> 9) - 71845aba42fSKevin Wolf (cluster_offset >> 9); 71945aba42fSKevin Wolf 72045aba42fSKevin Wolf cluster_offset |= QCOW_OFLAG_COMPRESSED | 72145aba42fSKevin Wolf ((uint64_t)nb_csectors << s->csize_shift); 72245aba42fSKevin Wolf 72345aba42fSKevin Wolf /* update L2 table */ 72445aba42fSKevin Wolf 72545aba42fSKevin Wolf /* compressed clusters never have the copied flag */ 72645aba42fSKevin Wolf 72766f82ceeSKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE_COMPRESSED); 72872e80b89SAlberto Garcia qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table); 72945aba42fSKevin Wolf l2_table[l2_index] = cpu_to_be64(cluster_offset); 730a3f1afb4SAlberto Garcia qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table); 73145aba42fSKevin Wolf 73245aba42fSKevin Wolf return cluster_offset; 73345aba42fSKevin Wolf } 73445aba42fSKevin Wolf 735593fb83cSKevin Wolf static int perform_cow(BlockDriverState *bs, QCowL2Meta *m, Qcow2COWRegion *r) 736593fb83cSKevin Wolf { 737ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 738593fb83cSKevin Wolf int ret; 739593fb83cSKevin Wolf 74085567393SKevin Wolf if (r->nb_bytes == 0) { 741593fb83cSKevin Wolf return 0; 742593fb83cSKevin Wolf } 743593fb83cSKevin Wolf 744593fb83cSKevin Wolf qemu_co_mutex_unlock(&s->lock); 74585567393SKevin Wolf ret = do_perform_cow(bs, m->offset, m->alloc_offset, r->offset, r->nb_bytes); 746593fb83cSKevin Wolf qemu_co_mutex_lock(&s->lock); 747593fb83cSKevin Wolf 748593fb83cSKevin Wolf if (ret < 0) { 749593fb83cSKevin Wolf return ret; 750593fb83cSKevin Wolf } 751593fb83cSKevin Wolf 752593fb83cSKevin Wolf /* 753593fb83cSKevin Wolf * Before we update the L2 table to actually point to the new cluster, we 754593fb83cSKevin Wolf * need to be sure that the refcounts have been increased and COW was 755593fb83cSKevin Wolf * handled. 756593fb83cSKevin Wolf */ 757593fb83cSKevin Wolf qcow2_cache_depends_on_flush(s->l2_table_cache); 758593fb83cSKevin Wolf 759593fb83cSKevin Wolf return 0; 760593fb83cSKevin Wolf } 761593fb83cSKevin Wolf 762148da7eaSKevin Wolf int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) 76345aba42fSKevin Wolf { 764ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 76545aba42fSKevin Wolf int i, j = 0, l2_index, ret; 766593fb83cSKevin Wolf uint64_t *old_cluster, *l2_table; 767250196f1SKevin Wolf uint64_t cluster_offset = m->alloc_offset; 76845aba42fSKevin Wolf 7693cce16f4SKevin Wolf trace_qcow2_cluster_link_l2(qemu_coroutine_self(), m->nb_clusters); 770f50f88b9SKevin Wolf assert(m->nb_clusters > 0); 77145aba42fSKevin Wolf 7725839e53bSMarkus Armbruster old_cluster = g_try_new(uint64_t, m->nb_clusters); 773de82815dSKevin Wolf if (old_cluster == NULL) { 774de82815dSKevin Wolf ret = -ENOMEM; 775de82815dSKevin Wolf goto err; 776de82815dSKevin Wolf } 77745aba42fSKevin Wolf 77845aba42fSKevin Wolf /* copy content of unmodified sectors */ 779593fb83cSKevin Wolf ret = perform_cow(bs, m, &m->cow_start); 780593fb83cSKevin Wolf if (ret < 0) { 78145aba42fSKevin Wolf goto err; 78245aba42fSKevin Wolf } 78345aba42fSKevin Wolf 784593fb83cSKevin Wolf ret = perform_cow(bs, m, &m->cow_end); 785593fb83cSKevin Wolf if (ret < 0) { 78645aba42fSKevin Wolf goto err; 78745aba42fSKevin Wolf } 78845aba42fSKevin Wolf 789593fb83cSKevin Wolf /* Update L2 table. */ 79074c4510aSKevin Wolf if (s->use_lazy_refcounts) { 791280d3735SKevin Wolf qcow2_mark_dirty(bs); 792280d3735SKevin Wolf } 793bfe8043eSStefan Hajnoczi if (qcow2_need_accurate_refcounts(s)) { 794bfe8043eSStefan Hajnoczi qcow2_cache_set_dependency(bs, s->l2_table_cache, 795bfe8043eSStefan Hajnoczi s->refcount_block_cache); 796bfe8043eSStefan Hajnoczi } 797280d3735SKevin Wolf 7983948d1d4SKevin Wolf ret = get_cluster_table(bs, m->offset, &l2_table, &l2_index); 7991e3e8f1aSKevin Wolf if (ret < 0) { 80045aba42fSKevin Wolf goto err; 8011e3e8f1aSKevin Wolf } 80272e80b89SAlberto Garcia qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table); 80345aba42fSKevin Wolf 804c01dbccbSMax Reitz assert(l2_index + m->nb_clusters <= s->l2_size); 80545aba42fSKevin Wolf for (i = 0; i < m->nb_clusters; i++) { 80645aba42fSKevin Wolf /* if two concurrent writes happen to the same unallocated cluster 80745aba42fSKevin Wolf * each write allocates separate cluster and writes data concurrently. 80845aba42fSKevin Wolf * The first one to complete updates l2 table with pointer to its 80945aba42fSKevin Wolf * cluster the second one has to do RMW (which is done above by 810aaa4d20bSKevin Wolf * perform_cow()), update l2 table with its cluster pointer and free 81145aba42fSKevin Wolf * old cluster. This is what this loop does */ 812aaa4d20bSKevin Wolf if (l2_table[l2_index + i] != 0) { 81345aba42fSKevin Wolf old_cluster[j++] = l2_table[l2_index + i]; 814aaa4d20bSKevin Wolf } 81545aba42fSKevin Wolf 81645aba42fSKevin Wolf l2_table[l2_index + i] = cpu_to_be64((cluster_offset + 81745aba42fSKevin Wolf (i << s->cluster_bits)) | QCOW_OFLAG_COPIED); 81845aba42fSKevin Wolf } 81945aba42fSKevin Wolf 8209f8e668eSKevin Wolf 821a3f1afb4SAlberto Garcia qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table); 82245aba42fSKevin Wolf 8237ec5e6a4SKevin Wolf /* 8247ec5e6a4SKevin Wolf * If this was a COW, we need to decrease the refcount of the old cluster. 8256cfcb9b8SKevin Wolf * 8266cfcb9b8SKevin Wolf * Don't discard clusters that reach a refcount of 0 (e.g. compressed 8276cfcb9b8SKevin Wolf * clusters), the next write will reuse them anyway. 8287ec5e6a4SKevin Wolf */ 8297ec5e6a4SKevin Wolf if (j != 0) { 8307ec5e6a4SKevin Wolf for (i = 0; i < j; i++) { 8316cfcb9b8SKevin Wolf qcow2_free_any_clusters(bs, be64_to_cpu(old_cluster[i]), 1, 8326cfcb9b8SKevin Wolf QCOW2_DISCARD_NEVER); 8337ec5e6a4SKevin Wolf } 8347ec5e6a4SKevin Wolf } 83545aba42fSKevin Wolf 83645aba42fSKevin Wolf ret = 0; 83745aba42fSKevin Wolf err: 8387267c094SAnthony Liguori g_free(old_cluster); 83945aba42fSKevin Wolf return ret; 84045aba42fSKevin Wolf } 84145aba42fSKevin Wolf 84245aba42fSKevin Wolf /* 843bf319eceSKevin Wolf * Returns the number of contiguous clusters that can be used for an allocating 844bf319eceSKevin Wolf * write, but require COW to be performed (this includes yet unallocated space, 845bf319eceSKevin Wolf * which must copy from the backing file) 846bf319eceSKevin Wolf */ 847ff99129aSKevin Wolf static int count_cow_clusters(BDRVQcow2State *s, int nb_clusters, 848bf319eceSKevin Wolf uint64_t *l2_table, int l2_index) 849bf319eceSKevin Wolf { 850143550a8SKevin Wolf int i; 851bf319eceSKevin Wolf 852143550a8SKevin Wolf for (i = 0; i < nb_clusters; i++) { 853143550a8SKevin Wolf uint64_t l2_entry = be64_to_cpu(l2_table[l2_index + i]); 854143550a8SKevin Wolf int cluster_type = qcow2_get_cluster_type(l2_entry); 855143550a8SKevin Wolf 856143550a8SKevin Wolf switch(cluster_type) { 857143550a8SKevin Wolf case QCOW2_CLUSTER_NORMAL: 858143550a8SKevin Wolf if (l2_entry & QCOW_OFLAG_COPIED) { 859143550a8SKevin Wolf goto out; 860143550a8SKevin Wolf } 861bf319eceSKevin Wolf break; 862143550a8SKevin Wolf case QCOW2_CLUSTER_UNALLOCATED: 863143550a8SKevin Wolf case QCOW2_CLUSTER_COMPRESSED: 8646377af48SKevin Wolf case QCOW2_CLUSTER_ZERO: 865143550a8SKevin Wolf break; 866143550a8SKevin Wolf default: 867143550a8SKevin Wolf abort(); 868143550a8SKevin Wolf } 869bf319eceSKevin Wolf } 870bf319eceSKevin Wolf 871143550a8SKevin Wolf out: 872bf319eceSKevin Wolf assert(i <= nb_clusters); 873bf319eceSKevin Wolf return i; 874bf319eceSKevin Wolf } 875bf319eceSKevin Wolf 876bf319eceSKevin Wolf /* 877250196f1SKevin Wolf * Check if there already is an AIO write request in flight which allocates 878250196f1SKevin Wolf * the same cluster. In this case we need to wait until the previous 879250196f1SKevin Wolf * request has completed and updated the L2 table accordingly. 88065eb2e35SKevin Wolf * 88165eb2e35SKevin Wolf * Returns: 88265eb2e35SKevin Wolf * 0 if there was no dependency. *cur_bytes indicates the number of 88365eb2e35SKevin Wolf * bytes from guest_offset that can be read before the next 88465eb2e35SKevin Wolf * dependency must be processed (or the request is complete) 88565eb2e35SKevin Wolf * 88665eb2e35SKevin Wolf * -EAGAIN if we had to wait for another request, previously gathered 88765eb2e35SKevin Wolf * information on cluster allocation may be invalid now. The caller 88865eb2e35SKevin Wolf * must start over anyway, so consider *cur_bytes undefined. 889250196f1SKevin Wolf */ 890226c3c26SKevin Wolf static int handle_dependencies(BlockDriverState *bs, uint64_t guest_offset, 891ecdd5333SKevin Wolf uint64_t *cur_bytes, QCowL2Meta **m) 892226c3c26SKevin Wolf { 893ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 894226c3c26SKevin Wolf QCowL2Meta *old_alloc; 89565eb2e35SKevin Wolf uint64_t bytes = *cur_bytes; 896226c3c26SKevin Wolf 897250196f1SKevin Wolf QLIST_FOREACH(old_alloc, &s->cluster_allocs, next_in_flight) { 898250196f1SKevin Wolf 89965eb2e35SKevin Wolf uint64_t start = guest_offset; 90065eb2e35SKevin Wolf uint64_t end = start + bytes; 90165eb2e35SKevin Wolf uint64_t old_start = l2meta_cow_start(old_alloc); 90265eb2e35SKevin Wolf uint64_t old_end = l2meta_cow_end(old_alloc); 903250196f1SKevin Wolf 904d9d74f41SKevin Wolf if (end <= old_start || start >= old_end) { 905250196f1SKevin Wolf /* No intersection */ 906250196f1SKevin Wolf } else { 907250196f1SKevin Wolf if (start < old_start) { 908250196f1SKevin Wolf /* Stop at the start of a running allocation */ 90965eb2e35SKevin Wolf bytes = old_start - start; 910250196f1SKevin Wolf } else { 91165eb2e35SKevin Wolf bytes = 0; 912250196f1SKevin Wolf } 913250196f1SKevin Wolf 914ecdd5333SKevin Wolf /* Stop if already an l2meta exists. After yielding, it wouldn't 915ecdd5333SKevin Wolf * be valid any more, so we'd have to clean up the old L2Metas 916ecdd5333SKevin Wolf * and deal with requests depending on them before starting to 917ecdd5333SKevin Wolf * gather new ones. Not worth the trouble. */ 918ecdd5333SKevin Wolf if (bytes == 0 && *m) { 919ecdd5333SKevin Wolf *cur_bytes = 0; 920ecdd5333SKevin Wolf return 0; 921ecdd5333SKevin Wolf } 922ecdd5333SKevin Wolf 92365eb2e35SKevin Wolf if (bytes == 0) { 924250196f1SKevin Wolf /* Wait for the dependency to complete. We need to recheck 925250196f1SKevin Wolf * the free/allocated clusters when we continue. */ 926250196f1SKevin Wolf qemu_co_mutex_unlock(&s->lock); 927250196f1SKevin Wolf qemu_co_queue_wait(&old_alloc->dependent_requests); 928250196f1SKevin Wolf qemu_co_mutex_lock(&s->lock); 929250196f1SKevin Wolf return -EAGAIN; 930250196f1SKevin Wolf } 931250196f1SKevin Wolf } 932250196f1SKevin Wolf } 933250196f1SKevin Wolf 93465eb2e35SKevin Wolf /* Make sure that existing clusters and new allocations are only used up to 93565eb2e35SKevin Wolf * the next dependency if we shortened the request above */ 93665eb2e35SKevin Wolf *cur_bytes = bytes; 937250196f1SKevin Wolf 938226c3c26SKevin Wolf return 0; 939226c3c26SKevin Wolf } 940226c3c26SKevin Wolf 941226c3c26SKevin Wolf /* 9420af729ecSKevin Wolf * Checks how many already allocated clusters that don't require a copy on 9430af729ecSKevin Wolf * write there are at the given guest_offset (up to *bytes). If 9440af729ecSKevin Wolf * *host_offset is not zero, only physically contiguous clusters beginning at 9450af729ecSKevin Wolf * this host offset are counted. 9460af729ecSKevin Wolf * 947411d62b0SKevin Wolf * Note that guest_offset may not be cluster aligned. In this case, the 948411d62b0SKevin Wolf * returned *host_offset points to exact byte referenced by guest_offset and 949411d62b0SKevin Wolf * therefore isn't cluster aligned as well. 9500af729ecSKevin Wolf * 9510af729ecSKevin Wolf * Returns: 9520af729ecSKevin Wolf * 0: if no allocated clusters are available at the given offset. 9530af729ecSKevin Wolf * *bytes is normally unchanged. It is set to 0 if the cluster 9540af729ecSKevin Wolf * is allocated and doesn't need COW, but doesn't have the right 9550af729ecSKevin Wolf * physical offset. 9560af729ecSKevin Wolf * 9570af729ecSKevin Wolf * 1: if allocated clusters that don't require a COW are available at 9580af729ecSKevin Wolf * the requested offset. *bytes may have decreased and describes 9590af729ecSKevin Wolf * the length of the area that can be written to. 9600af729ecSKevin Wolf * 9610af729ecSKevin Wolf * -errno: in error cases 9620af729ecSKevin Wolf */ 9630af729ecSKevin Wolf static int handle_copied(BlockDriverState *bs, uint64_t guest_offset, 964c53ede9fSKevin Wolf uint64_t *host_offset, uint64_t *bytes, QCowL2Meta **m) 9650af729ecSKevin Wolf { 966ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 9670af729ecSKevin Wolf int l2_index; 9680af729ecSKevin Wolf uint64_t cluster_offset; 9690af729ecSKevin Wolf uint64_t *l2_table; 970b6d36defSMax Reitz uint64_t nb_clusters; 971c53ede9fSKevin Wolf unsigned int keep_clusters; 972a3f1afb4SAlberto Garcia int ret; 9730af729ecSKevin Wolf 9740af729ecSKevin Wolf trace_qcow2_handle_copied(qemu_coroutine_self(), guest_offset, *host_offset, 9750af729ecSKevin Wolf *bytes); 9760af729ecSKevin Wolf 977411d62b0SKevin Wolf assert(*host_offset == 0 || offset_into_cluster(s, guest_offset) 978411d62b0SKevin Wolf == offset_into_cluster(s, *host_offset)); 979411d62b0SKevin Wolf 980acb0467fSKevin Wolf /* 981acb0467fSKevin Wolf * Calculate the number of clusters to look for. We stop at L2 table 982acb0467fSKevin Wolf * boundaries to keep things simple. 983acb0467fSKevin Wolf */ 984acb0467fSKevin Wolf nb_clusters = 985acb0467fSKevin Wolf size_to_clusters(s, offset_into_cluster(s, guest_offset) + *bytes); 986acb0467fSKevin Wolf 987acb0467fSKevin Wolf l2_index = offset_to_l2_index(s, guest_offset); 988acb0467fSKevin Wolf nb_clusters = MIN(nb_clusters, s->l2_size - l2_index); 989b6d36defSMax Reitz assert(nb_clusters <= INT_MAX); 990acb0467fSKevin Wolf 9910af729ecSKevin Wolf /* Find L2 entry for the first involved cluster */ 9920af729ecSKevin Wolf ret = get_cluster_table(bs, guest_offset, &l2_table, &l2_index); 9930af729ecSKevin Wolf if (ret < 0) { 9940af729ecSKevin Wolf return ret; 9950af729ecSKevin Wolf } 9960af729ecSKevin Wolf 9970af729ecSKevin Wolf cluster_offset = be64_to_cpu(l2_table[l2_index]); 9980af729ecSKevin Wolf 9990af729ecSKevin Wolf /* Check how many clusters are already allocated and don't need COW */ 10000af729ecSKevin Wolf if (qcow2_get_cluster_type(cluster_offset) == QCOW2_CLUSTER_NORMAL 10010af729ecSKevin Wolf && (cluster_offset & QCOW_OFLAG_COPIED)) 10020af729ecSKevin Wolf { 1003e62daaf6SKevin Wolf /* If a specific host_offset is required, check it */ 1004e62daaf6SKevin Wolf bool offset_matches = 1005e62daaf6SKevin Wolf (cluster_offset & L2E_OFFSET_MASK) == *host_offset; 1006e62daaf6SKevin Wolf 1007a97c67eeSMax Reitz if (offset_into_cluster(s, cluster_offset & L2E_OFFSET_MASK)) { 1008a97c67eeSMax Reitz qcow2_signal_corruption(bs, true, -1, -1, "Data cluster offset " 1009a97c67eeSMax Reitz "%#llx unaligned (guest offset: %#" PRIx64 1010a97c67eeSMax Reitz ")", cluster_offset & L2E_OFFSET_MASK, 1011a97c67eeSMax Reitz guest_offset); 1012a97c67eeSMax Reitz ret = -EIO; 1013a97c67eeSMax Reitz goto out; 1014a97c67eeSMax Reitz } 1015a97c67eeSMax Reitz 1016e62daaf6SKevin Wolf if (*host_offset != 0 && !offset_matches) { 1017e62daaf6SKevin Wolf *bytes = 0; 1018e62daaf6SKevin Wolf ret = 0; 1019e62daaf6SKevin Wolf goto out; 1020e62daaf6SKevin Wolf } 1021e62daaf6SKevin Wolf 10220af729ecSKevin Wolf /* We keep all QCOW_OFLAG_COPIED clusters */ 1023c53ede9fSKevin Wolf keep_clusters = 1024acb0467fSKevin Wolf count_contiguous_clusters(nb_clusters, s->cluster_size, 102561653008SKevin Wolf &l2_table[l2_index], 10260af729ecSKevin Wolf QCOW_OFLAG_COPIED | QCOW_OFLAG_ZERO); 1027c53ede9fSKevin Wolf assert(keep_clusters <= nb_clusters); 1028c53ede9fSKevin Wolf 1029c53ede9fSKevin Wolf *bytes = MIN(*bytes, 1030c53ede9fSKevin Wolf keep_clusters * s->cluster_size 1031c53ede9fSKevin Wolf - offset_into_cluster(s, guest_offset)); 10320af729ecSKevin Wolf 10330af729ecSKevin Wolf ret = 1; 10340af729ecSKevin Wolf } else { 10350af729ecSKevin Wolf ret = 0; 10360af729ecSKevin Wolf } 10370af729ecSKevin Wolf 10380af729ecSKevin Wolf /* Cleanup */ 1039e62daaf6SKevin Wolf out: 1040a3f1afb4SAlberto Garcia qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table); 10410af729ecSKevin Wolf 1042e62daaf6SKevin Wolf /* Only return a host offset if we actually made progress. Otherwise we 1043e62daaf6SKevin Wolf * would make requirements for handle_alloc() that it can't fulfill */ 1044a97c67eeSMax Reitz if (ret > 0) { 1045411d62b0SKevin Wolf *host_offset = (cluster_offset & L2E_OFFSET_MASK) 1046411d62b0SKevin Wolf + offset_into_cluster(s, guest_offset); 1047e62daaf6SKevin Wolf } 1048e62daaf6SKevin Wolf 10490af729ecSKevin Wolf return ret; 10500af729ecSKevin Wolf } 10510af729ecSKevin Wolf 10520af729ecSKevin Wolf /* 1053226c3c26SKevin Wolf * Allocates new clusters for the given guest_offset. 1054226c3c26SKevin Wolf * 1055226c3c26SKevin Wolf * At most *nb_clusters are allocated, and on return *nb_clusters is updated to 1056226c3c26SKevin Wolf * contain the number of clusters that have been allocated and are contiguous 1057226c3c26SKevin Wolf * in the image file. 1058226c3c26SKevin Wolf * 1059226c3c26SKevin Wolf * If *host_offset is non-zero, it specifies the offset in the image file at 1060226c3c26SKevin Wolf * which the new clusters must start. *nb_clusters can be 0 on return in this 1061226c3c26SKevin Wolf * case if the cluster at host_offset is already in use. If *host_offset is 1062226c3c26SKevin Wolf * zero, the clusters can be allocated anywhere in the image file. 1063226c3c26SKevin Wolf * 1064226c3c26SKevin Wolf * *host_offset is updated to contain the offset into the image file at which 1065226c3c26SKevin Wolf * the first allocated cluster starts. 1066226c3c26SKevin Wolf * 1067226c3c26SKevin Wolf * Return 0 on success and -errno in error cases. -EAGAIN means that the 1068226c3c26SKevin Wolf * function has been waiting for another request and the allocation must be 1069226c3c26SKevin Wolf * restarted, but the whole request should not be failed. 1070226c3c26SKevin Wolf */ 1071226c3c26SKevin Wolf static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset, 1072b6d36defSMax Reitz uint64_t *host_offset, uint64_t *nb_clusters) 1073226c3c26SKevin Wolf { 1074ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 1075226c3c26SKevin Wolf 1076226c3c26SKevin Wolf trace_qcow2_do_alloc_clusters_offset(qemu_coroutine_self(), guest_offset, 1077226c3c26SKevin Wolf *host_offset, *nb_clusters); 1078226c3c26SKevin Wolf 1079250196f1SKevin Wolf /* Allocate new clusters */ 1080250196f1SKevin Wolf trace_qcow2_cluster_alloc_phys(qemu_coroutine_self()); 1081250196f1SKevin Wolf if (*host_offset == 0) { 1082df021791SKevin Wolf int64_t cluster_offset = 1083df021791SKevin Wolf qcow2_alloc_clusters(bs, *nb_clusters * s->cluster_size); 1084250196f1SKevin Wolf if (cluster_offset < 0) { 1085250196f1SKevin Wolf return cluster_offset; 1086250196f1SKevin Wolf } 1087250196f1SKevin Wolf *host_offset = cluster_offset; 1088250196f1SKevin Wolf return 0; 1089df021791SKevin Wolf } else { 1090b6d36defSMax Reitz int64_t ret = qcow2_alloc_clusters_at(bs, *host_offset, *nb_clusters); 1091df021791SKevin Wolf if (ret < 0) { 1092df021791SKevin Wolf return ret; 1093df021791SKevin Wolf } 1094df021791SKevin Wolf *nb_clusters = ret; 1095df021791SKevin Wolf return 0; 1096df021791SKevin Wolf } 1097250196f1SKevin Wolf } 1098250196f1SKevin Wolf 1099250196f1SKevin Wolf /* 110010f0ed8bSKevin Wolf * Allocates new clusters for an area that either is yet unallocated or needs a 110110f0ed8bSKevin Wolf * copy on write. If *host_offset is non-zero, clusters are only allocated if 110210f0ed8bSKevin Wolf * the new allocation can match the specified host offset. 110310f0ed8bSKevin Wolf * 1104411d62b0SKevin Wolf * Note that guest_offset may not be cluster aligned. In this case, the 1105411d62b0SKevin Wolf * returned *host_offset points to exact byte referenced by guest_offset and 1106411d62b0SKevin Wolf * therefore isn't cluster aligned as well. 110710f0ed8bSKevin Wolf * 110810f0ed8bSKevin Wolf * Returns: 110910f0ed8bSKevin Wolf * 0: if no clusters could be allocated. *bytes is set to 0, 111010f0ed8bSKevin Wolf * *host_offset is left unchanged. 111110f0ed8bSKevin Wolf * 111210f0ed8bSKevin Wolf * 1: if new clusters were allocated. *bytes may be decreased if the 111310f0ed8bSKevin Wolf * new allocation doesn't cover all of the requested area. 111410f0ed8bSKevin Wolf * *host_offset is updated to contain the host offset of the first 111510f0ed8bSKevin Wolf * newly allocated cluster. 111610f0ed8bSKevin Wolf * 111710f0ed8bSKevin Wolf * -errno: in error cases 111810f0ed8bSKevin Wolf */ 111910f0ed8bSKevin Wolf static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset, 1120c37f4cd7SKevin Wolf uint64_t *host_offset, uint64_t *bytes, QCowL2Meta **m) 112110f0ed8bSKevin Wolf { 1122ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 112310f0ed8bSKevin Wolf int l2_index; 112410f0ed8bSKevin Wolf uint64_t *l2_table; 112510f0ed8bSKevin Wolf uint64_t entry; 1126b6d36defSMax Reitz uint64_t nb_clusters; 112710f0ed8bSKevin Wolf int ret; 112810f0ed8bSKevin Wolf 112910f0ed8bSKevin Wolf uint64_t alloc_cluster_offset; 113010f0ed8bSKevin Wolf 113110f0ed8bSKevin Wolf trace_qcow2_handle_alloc(qemu_coroutine_self(), guest_offset, *host_offset, 113210f0ed8bSKevin Wolf *bytes); 113310f0ed8bSKevin Wolf assert(*bytes > 0); 113410f0ed8bSKevin Wolf 1135f5bc6350SKevin Wolf /* 1136f5bc6350SKevin Wolf * Calculate the number of clusters to look for. We stop at L2 table 1137f5bc6350SKevin Wolf * boundaries to keep things simple. 1138f5bc6350SKevin Wolf */ 1139c37f4cd7SKevin Wolf nb_clusters = 1140c37f4cd7SKevin Wolf size_to_clusters(s, offset_into_cluster(s, guest_offset) + *bytes); 1141c37f4cd7SKevin Wolf 1142f5bc6350SKevin Wolf l2_index = offset_to_l2_index(s, guest_offset); 1143c37f4cd7SKevin Wolf nb_clusters = MIN(nb_clusters, s->l2_size - l2_index); 1144b6d36defSMax Reitz assert(nb_clusters <= INT_MAX); 1145f5bc6350SKevin Wolf 114610f0ed8bSKevin Wolf /* Find L2 entry for the first involved cluster */ 114710f0ed8bSKevin Wolf ret = get_cluster_table(bs, guest_offset, &l2_table, &l2_index); 114810f0ed8bSKevin Wolf if (ret < 0) { 114910f0ed8bSKevin Wolf return ret; 115010f0ed8bSKevin Wolf } 115110f0ed8bSKevin Wolf 11523b8e2e26SKevin Wolf entry = be64_to_cpu(l2_table[l2_index]); 115310f0ed8bSKevin Wolf 115410f0ed8bSKevin Wolf /* For the moment, overwrite compressed clusters one by one */ 115510f0ed8bSKevin Wolf if (entry & QCOW_OFLAG_COMPRESSED) { 115610f0ed8bSKevin Wolf nb_clusters = 1; 115710f0ed8bSKevin Wolf } else { 11583b8e2e26SKevin Wolf nb_clusters = count_cow_clusters(s, nb_clusters, l2_table, l2_index); 115910f0ed8bSKevin Wolf } 116010f0ed8bSKevin Wolf 1161ecdd5333SKevin Wolf /* This function is only called when there were no non-COW clusters, so if 1162ecdd5333SKevin Wolf * we can't find any unallocated or COW clusters either, something is 1163ecdd5333SKevin Wolf * wrong with our code. */ 1164ecdd5333SKevin Wolf assert(nb_clusters > 0); 1165ecdd5333SKevin Wolf 1166a3f1afb4SAlberto Garcia qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table); 116710f0ed8bSKevin Wolf 116810f0ed8bSKevin Wolf /* Allocate, if necessary at a given offset in the image file */ 1169411d62b0SKevin Wolf alloc_cluster_offset = start_of_cluster(s, *host_offset); 117083baa9a4SKevin Wolf ret = do_alloc_cluster_offset(bs, guest_offset, &alloc_cluster_offset, 117110f0ed8bSKevin Wolf &nb_clusters); 117210f0ed8bSKevin Wolf if (ret < 0) { 117310f0ed8bSKevin Wolf goto fail; 117410f0ed8bSKevin Wolf } 117510f0ed8bSKevin Wolf 117683baa9a4SKevin Wolf /* Can't extend contiguous allocation */ 117783baa9a4SKevin Wolf if (nb_clusters == 0) { 117883baa9a4SKevin Wolf *bytes = 0; 117983baa9a4SKevin Wolf return 0; 118083baa9a4SKevin Wolf } 118183baa9a4SKevin Wolf 1182ff52aab2SMax Reitz /* !*host_offset would overwrite the image header and is reserved for "no 1183ff52aab2SMax Reitz * host offset preferred". If 0 was a valid host offset, it'd trigger the 1184ff52aab2SMax Reitz * following overlap check; do that now to avoid having an invalid value in 1185ff52aab2SMax Reitz * *host_offset. */ 1186ff52aab2SMax Reitz if (!alloc_cluster_offset) { 1187ff52aab2SMax Reitz ret = qcow2_pre_write_overlap_check(bs, 0, alloc_cluster_offset, 1188ff52aab2SMax Reitz nb_clusters * s->cluster_size); 1189ff52aab2SMax Reitz assert(ret < 0); 1190ff52aab2SMax Reitz goto fail; 1191ff52aab2SMax Reitz } 1192ff52aab2SMax Reitz 119310f0ed8bSKevin Wolf /* 119483baa9a4SKevin Wolf * Save info needed for meta data update. 119583baa9a4SKevin Wolf * 119685567393SKevin Wolf * requested_bytes: Number of bytes from the start of the first 119710f0ed8bSKevin Wolf * newly allocated cluster to the end of the (possibly shortened 119810f0ed8bSKevin Wolf * before) write request. 119910f0ed8bSKevin Wolf * 120085567393SKevin Wolf * avail_bytes: Number of bytes from the start of the first 120110f0ed8bSKevin Wolf * newly allocated to the end of the last newly allocated cluster. 120210f0ed8bSKevin Wolf * 120385567393SKevin Wolf * nb_bytes: The number of bytes from the start of the first 120483baa9a4SKevin Wolf * newly allocated cluster to the end of the area that the write 120510f0ed8bSKevin Wolf * request actually writes to (excluding COW at the end) 120610f0ed8bSKevin Wolf */ 120785567393SKevin Wolf uint64_t requested_bytes = *bytes + offset_into_cluster(s, guest_offset); 120885567393SKevin Wolf int avail_bytes = MIN(INT_MAX, nb_clusters << s->cluster_bits); 120985567393SKevin Wolf int nb_bytes = MIN(requested_bytes, avail_bytes); 121088c6588cSKevin Wolf QCowL2Meta *old_m = *m; 121110f0ed8bSKevin Wolf 121210f0ed8bSKevin Wolf *m = g_malloc0(sizeof(**m)); 121310f0ed8bSKevin Wolf 121410f0ed8bSKevin Wolf **m = (QCowL2Meta) { 121588c6588cSKevin Wolf .next = old_m, 121688c6588cSKevin Wolf 1217411d62b0SKevin Wolf .alloc_offset = alloc_cluster_offset, 121883baa9a4SKevin Wolf .offset = start_of_cluster(s, guest_offset), 121910f0ed8bSKevin Wolf .nb_clusters = nb_clusters, 122010f0ed8bSKevin Wolf 122110f0ed8bSKevin Wolf .cow_start = { 122210f0ed8bSKevin Wolf .offset = 0, 122385567393SKevin Wolf .nb_bytes = offset_into_cluster(s, guest_offset), 122410f0ed8bSKevin Wolf }, 122510f0ed8bSKevin Wolf .cow_end = { 122685567393SKevin Wolf .offset = nb_bytes, 122785567393SKevin Wolf .nb_bytes = avail_bytes - nb_bytes, 122810f0ed8bSKevin Wolf }, 122910f0ed8bSKevin Wolf }; 123010f0ed8bSKevin Wolf qemu_co_queue_init(&(*m)->dependent_requests); 123110f0ed8bSKevin Wolf QLIST_INSERT_HEAD(&s->cluster_allocs, *m, next_in_flight); 123210f0ed8bSKevin Wolf 1233411d62b0SKevin Wolf *host_offset = alloc_cluster_offset + offset_into_cluster(s, guest_offset); 123485567393SKevin Wolf *bytes = MIN(*bytes, nb_bytes - offset_into_cluster(s, guest_offset)); 1235c37f4cd7SKevin Wolf assert(*bytes != 0); 123610f0ed8bSKevin Wolf 123710f0ed8bSKevin Wolf return 1; 123810f0ed8bSKevin Wolf 123910f0ed8bSKevin Wolf fail: 124010f0ed8bSKevin Wolf if (*m && (*m)->nb_clusters > 0) { 124110f0ed8bSKevin Wolf QLIST_REMOVE(*m, next_in_flight); 124210f0ed8bSKevin Wolf } 124310f0ed8bSKevin Wolf return ret; 124410f0ed8bSKevin Wolf } 124510f0ed8bSKevin Wolf 124610f0ed8bSKevin Wolf /* 124745aba42fSKevin Wolf * alloc_cluster_offset 124845aba42fSKevin Wolf * 1249250196f1SKevin Wolf * For a given offset on the virtual disk, find the cluster offset in qcow2 1250250196f1SKevin Wolf * file. If the offset is not found, allocate a new cluster. 125145aba42fSKevin Wolf * 1252250196f1SKevin Wolf * If the cluster was already allocated, m->nb_clusters is set to 0 and 1253a7912369SFrediano Ziglio * other fields in m are meaningless. 125445aba42fSKevin Wolf * 1255148da7eaSKevin Wolf * If the cluster is newly allocated, m->nb_clusters is set to the number of 125668d100e9SKevin Wolf * contiguous clusters that have been allocated. In this case, the other 125768d100e9SKevin Wolf * fields of m are valid and contain information about the first allocated 125868d100e9SKevin Wolf * cluster. 1259148da7eaSKevin Wolf * 126068d100e9SKevin Wolf * If the request conflicts with another write request in flight, the coroutine 126168d100e9SKevin Wolf * is queued and will be reentered when the dependency has completed. 1262148da7eaSKevin Wolf * 1263148da7eaSKevin Wolf * Return 0 on success and -errno in error cases 126445aba42fSKevin Wolf */ 1265f4f0d391SKevin Wolf int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset, 1266d46a0bb2SKevin Wolf unsigned int *bytes, uint64_t *host_offset, 1267d46a0bb2SKevin Wolf QCowL2Meta **m) 126845aba42fSKevin Wolf { 1269ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 1270710c2496SKevin Wolf uint64_t start, remaining; 1271250196f1SKevin Wolf uint64_t cluster_offset; 127265eb2e35SKevin Wolf uint64_t cur_bytes; 1273710c2496SKevin Wolf int ret; 127445aba42fSKevin Wolf 1275d46a0bb2SKevin Wolf trace_qcow2_alloc_clusters_offset(qemu_coroutine_self(), offset, *bytes); 1276710c2496SKevin Wolf 127772424114SKevin Wolf again: 127816f0587eSHu Tao start = offset; 1279d46a0bb2SKevin Wolf remaining = *bytes; 12800af729ecSKevin Wolf cluster_offset = 0; 12810af729ecSKevin Wolf *host_offset = 0; 1282ecdd5333SKevin Wolf cur_bytes = 0; 1283ecdd5333SKevin Wolf *m = NULL; 12840af729ecSKevin Wolf 12852c3b32d2SKevin Wolf while (true) { 1286ecdd5333SKevin Wolf 1287ecdd5333SKevin Wolf if (!*host_offset) { 1288ecdd5333SKevin Wolf *host_offset = start_of_cluster(s, cluster_offset); 1289ecdd5333SKevin Wolf } 1290ecdd5333SKevin Wolf 1291ecdd5333SKevin Wolf assert(remaining >= cur_bytes); 1292ecdd5333SKevin Wolf 1293ecdd5333SKevin Wolf start += cur_bytes; 1294ecdd5333SKevin Wolf remaining -= cur_bytes; 1295ecdd5333SKevin Wolf cluster_offset += cur_bytes; 1296ecdd5333SKevin Wolf 1297ecdd5333SKevin Wolf if (remaining == 0) { 1298ecdd5333SKevin Wolf break; 1299ecdd5333SKevin Wolf } 1300ecdd5333SKevin Wolf 1301ecdd5333SKevin Wolf cur_bytes = remaining; 1302ecdd5333SKevin Wolf 1303250196f1SKevin Wolf /* 130417a71e58SKevin Wolf * Now start gathering as many contiguous clusters as possible: 130517a71e58SKevin Wolf * 130617a71e58SKevin Wolf * 1. Check for overlaps with in-flight allocations 130717a71e58SKevin Wolf * 13082c3b32d2SKevin Wolf * a) Overlap not in the first cluster -> shorten this request and 13092c3b32d2SKevin Wolf * let the caller handle the rest in its next loop iteration. 131017a71e58SKevin Wolf * 13112c3b32d2SKevin Wolf * b) Real overlaps of two requests. Yield and restart the search 13122c3b32d2SKevin Wolf * for contiguous clusters (the situation could have changed 13132c3b32d2SKevin Wolf * while we were sleeping) 131417a71e58SKevin Wolf * 131517a71e58SKevin Wolf * c) TODO: Request starts in the same cluster as the in-flight 13162c3b32d2SKevin Wolf * allocation ends. Shorten the COW of the in-fight allocation, 13172c3b32d2SKevin Wolf * set cluster_offset to write to the same cluster and set up 13182c3b32d2SKevin Wolf * the right synchronisation between the in-flight request and 13192c3b32d2SKevin Wolf * the new one. 132017a71e58SKevin Wolf */ 1321ecdd5333SKevin Wolf ret = handle_dependencies(bs, start, &cur_bytes, m); 132217a71e58SKevin Wolf if (ret == -EAGAIN) { 1323ecdd5333SKevin Wolf /* Currently handle_dependencies() doesn't yield if we already had 1324ecdd5333SKevin Wolf * an allocation. If it did, we would have to clean up the L2Meta 1325ecdd5333SKevin Wolf * structs before starting over. */ 1326ecdd5333SKevin Wolf assert(*m == NULL); 132717a71e58SKevin Wolf goto again; 132817a71e58SKevin Wolf } else if (ret < 0) { 132917a71e58SKevin Wolf return ret; 1330ecdd5333SKevin Wolf } else if (cur_bytes == 0) { 1331ecdd5333SKevin Wolf break; 133217a71e58SKevin Wolf } else { 133317a71e58SKevin Wolf /* handle_dependencies() may have decreased cur_bytes (shortened 133417a71e58SKevin Wolf * the allocations below) so that the next dependency is processed 133517a71e58SKevin Wolf * correctly during the next loop iteration. */ 133617a71e58SKevin Wolf } 133717a71e58SKevin Wolf 133872424114SKevin Wolf /* 13390af729ecSKevin Wolf * 2. Count contiguous COPIED clusters. 134072424114SKevin Wolf */ 1341710c2496SKevin Wolf ret = handle_copied(bs, start, &cluster_offset, &cur_bytes, m); 134272424114SKevin Wolf if (ret < 0) { 134372424114SKevin Wolf return ret; 13440af729ecSKevin Wolf } else if (ret) { 1345ecdd5333SKevin Wolf continue; 1346e62daaf6SKevin Wolf } else if (cur_bytes == 0) { 13472c3b32d2SKevin Wolf break; 134872424114SKevin Wolf } 134972424114SKevin Wolf 13500af729ecSKevin Wolf /* 13510af729ecSKevin Wolf * 3. If the request still hasn't completed, allocate new clusters, 13520af729ecSKevin Wolf * considering any cluster_offset of steps 1c or 2. 13530af729ecSKevin Wolf */ 1354710c2496SKevin Wolf ret = handle_alloc(bs, start, &cluster_offset, &cur_bytes, m); 1355037689d8SKevin Wolf if (ret < 0) { 1356037689d8SKevin Wolf return ret; 1357710c2496SKevin Wolf } else if (ret) { 1358ecdd5333SKevin Wolf continue; 13592c3b32d2SKevin Wolf } else { 13602c3b32d2SKevin Wolf assert(cur_bytes == 0); 13612c3b32d2SKevin Wolf break; 13622c3b32d2SKevin Wolf } 1363710c2496SKevin Wolf } 1364250196f1SKevin Wolf 1365d46a0bb2SKevin Wolf *bytes -= remaining; 1366d46a0bb2SKevin Wolf assert(*bytes > 0); 1367710c2496SKevin Wolf assert(*host_offset != 0); 136845aba42fSKevin Wolf 1369148da7eaSKevin Wolf return 0; 137045aba42fSKevin Wolf } 137145aba42fSKevin Wolf 137245aba42fSKevin Wolf static int decompress_buffer(uint8_t *out_buf, int out_buf_size, 137345aba42fSKevin Wolf const uint8_t *buf, int buf_size) 137445aba42fSKevin Wolf { 137545aba42fSKevin Wolf z_stream strm1, *strm = &strm1; 137645aba42fSKevin Wolf int ret, out_len; 137745aba42fSKevin Wolf 137845aba42fSKevin Wolf memset(strm, 0, sizeof(*strm)); 137945aba42fSKevin Wolf 138045aba42fSKevin Wolf strm->next_in = (uint8_t *)buf; 138145aba42fSKevin Wolf strm->avail_in = buf_size; 138245aba42fSKevin Wolf strm->next_out = out_buf; 138345aba42fSKevin Wolf strm->avail_out = out_buf_size; 138445aba42fSKevin Wolf 138545aba42fSKevin Wolf ret = inflateInit2(strm, -12); 138645aba42fSKevin Wolf if (ret != Z_OK) 138745aba42fSKevin Wolf return -1; 138845aba42fSKevin Wolf ret = inflate(strm, Z_FINISH); 138945aba42fSKevin Wolf out_len = strm->next_out - out_buf; 139045aba42fSKevin Wolf if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) || 139145aba42fSKevin Wolf out_len != out_buf_size) { 139245aba42fSKevin Wolf inflateEnd(strm); 139345aba42fSKevin Wolf return -1; 139445aba42fSKevin Wolf } 139545aba42fSKevin Wolf inflateEnd(strm); 139645aba42fSKevin Wolf return 0; 139745aba42fSKevin Wolf } 139845aba42fSKevin Wolf 139966f82ceeSKevin Wolf int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset) 140045aba42fSKevin Wolf { 1401ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 140245aba42fSKevin Wolf int ret, csize, nb_csectors, sector_offset; 140345aba42fSKevin Wolf uint64_t coffset; 140445aba42fSKevin Wolf 140545aba42fSKevin Wolf coffset = cluster_offset & s->cluster_offset_mask; 140645aba42fSKevin Wolf if (s->cluster_cache_offset != coffset) { 140745aba42fSKevin Wolf nb_csectors = ((cluster_offset >> s->csize_shift) & s->csize_mask) + 1; 140845aba42fSKevin Wolf sector_offset = coffset & 511; 140945aba42fSKevin Wolf csize = nb_csectors * 512 - sector_offset; 141066f82ceeSKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_READ_COMPRESSED); 1411fbcbbf4eSKevin Wolf ret = bdrv_read(bs->file, coffset >> 9, s->cluster_data, 14129a4f4c31SKevin Wolf nb_csectors); 141345aba42fSKevin Wolf if (ret < 0) { 14148af36488SKevin Wolf return ret; 141545aba42fSKevin Wolf } 141645aba42fSKevin Wolf if (decompress_buffer(s->cluster_cache, s->cluster_size, 141745aba42fSKevin Wolf s->cluster_data + sector_offset, csize) < 0) { 14188af36488SKevin Wolf return -EIO; 141945aba42fSKevin Wolf } 142045aba42fSKevin Wolf s->cluster_cache_offset = coffset; 142145aba42fSKevin Wolf } 142245aba42fSKevin Wolf return 0; 142345aba42fSKevin Wolf } 14245ea929e3SKevin Wolf 14255ea929e3SKevin Wolf /* 14265ea929e3SKevin Wolf * This discards as many clusters of nb_clusters as possible at once (i.e. 14275ea929e3SKevin Wolf * all clusters in the same L2 table) and returns the number of discarded 14285ea929e3SKevin Wolf * clusters. 14295ea929e3SKevin Wolf */ 14305ea929e3SKevin Wolf static int discard_single_l2(BlockDriverState *bs, uint64_t offset, 1431b6d36defSMax Reitz uint64_t nb_clusters, enum qcow2_discard_type type, 1432b6d36defSMax Reitz bool full_discard) 14335ea929e3SKevin Wolf { 1434ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 14353948d1d4SKevin Wolf uint64_t *l2_table; 14365ea929e3SKevin Wolf int l2_index; 14375ea929e3SKevin Wolf int ret; 14385ea929e3SKevin Wolf int i; 14395ea929e3SKevin Wolf 14403948d1d4SKevin Wolf ret = get_cluster_table(bs, offset, &l2_table, &l2_index); 14415ea929e3SKevin Wolf if (ret < 0) { 14425ea929e3SKevin Wolf return ret; 14435ea929e3SKevin Wolf } 14445ea929e3SKevin Wolf 14455ea929e3SKevin Wolf /* Limit nb_clusters to one L2 table */ 14465ea929e3SKevin Wolf nb_clusters = MIN(nb_clusters, s->l2_size - l2_index); 1447b6d36defSMax Reitz assert(nb_clusters <= INT_MAX); 14485ea929e3SKevin Wolf 14495ea929e3SKevin Wolf for (i = 0; i < nb_clusters; i++) { 1450c883db0dSMax Reitz uint64_t old_l2_entry; 14515ea929e3SKevin Wolf 1452c883db0dSMax Reitz old_l2_entry = be64_to_cpu(l2_table[l2_index + i]); 1453a71835a0SKevin Wolf 1454a71835a0SKevin Wolf /* 1455808c4b6fSMax Reitz * If full_discard is false, make sure that a discarded area reads back 1456808c4b6fSMax Reitz * as zeroes for v3 images (we cannot do it for v2 without actually 1457808c4b6fSMax Reitz * writing a zero-filled buffer). We can skip the operation if the 1458808c4b6fSMax Reitz * cluster is already marked as zero, or if it's unallocated and we 1459808c4b6fSMax Reitz * don't have a backing file. 1460a71835a0SKevin Wolf * 1461a71835a0SKevin Wolf * TODO We might want to use bdrv_get_block_status(bs) here, but we're 1462a71835a0SKevin Wolf * holding s->lock, so that doesn't work today. 1463808c4b6fSMax Reitz * 1464808c4b6fSMax Reitz * If full_discard is true, the sector should not read back as zeroes, 1465808c4b6fSMax Reitz * but rather fall through to the backing file. 1466a71835a0SKevin Wolf */ 1467c883db0dSMax Reitz switch (qcow2_get_cluster_type(old_l2_entry)) { 1468c883db0dSMax Reitz case QCOW2_CLUSTER_UNALLOCATED: 1469760e0063SKevin Wolf if (full_discard || !bs->backing) { 1470a71835a0SKevin Wolf continue; 1471a71835a0SKevin Wolf } 1472c883db0dSMax Reitz break; 1473a71835a0SKevin Wolf 1474c883db0dSMax Reitz case QCOW2_CLUSTER_ZERO: 1475808c4b6fSMax Reitz if (!full_discard) { 14765ea929e3SKevin Wolf continue; 1477808c4b6fSMax Reitz } 1478808c4b6fSMax Reitz break; 1479c883db0dSMax Reitz 1480c883db0dSMax Reitz case QCOW2_CLUSTER_NORMAL: 1481c883db0dSMax Reitz case QCOW2_CLUSTER_COMPRESSED: 1482c883db0dSMax Reitz break; 1483c883db0dSMax Reitz 1484c883db0dSMax Reitz default: 1485c883db0dSMax Reitz abort(); 14865ea929e3SKevin Wolf } 14875ea929e3SKevin Wolf 14885ea929e3SKevin Wolf /* First remove L2 entries */ 148972e80b89SAlberto Garcia qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table); 1490808c4b6fSMax Reitz if (!full_discard && s->qcow_version >= 3) { 1491a71835a0SKevin Wolf l2_table[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO); 1492a71835a0SKevin Wolf } else { 14935ea929e3SKevin Wolf l2_table[l2_index + i] = cpu_to_be64(0); 1494a71835a0SKevin Wolf } 14955ea929e3SKevin Wolf 14965ea929e3SKevin Wolf /* Then decrease the refcount */ 1497c883db0dSMax Reitz qcow2_free_any_clusters(bs, old_l2_entry, 1, type); 14985ea929e3SKevin Wolf } 14995ea929e3SKevin Wolf 1500a3f1afb4SAlberto Garcia qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table); 15015ea929e3SKevin Wolf 15025ea929e3SKevin Wolf return nb_clusters; 15035ea929e3SKevin Wolf } 15045ea929e3SKevin Wolf 15055ea929e3SKevin Wolf int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset, 1506808c4b6fSMax Reitz int nb_sectors, enum qcow2_discard_type type, bool full_discard) 15075ea929e3SKevin Wolf { 1508ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 15095ea929e3SKevin Wolf uint64_t end_offset; 1510b6d36defSMax Reitz uint64_t nb_clusters; 15115ea929e3SKevin Wolf int ret; 15125ea929e3SKevin Wolf 15135ea929e3SKevin Wolf end_offset = offset + (nb_sectors << BDRV_SECTOR_BITS); 15145ea929e3SKevin Wolf 15155ea929e3SKevin Wolf /* Round start up and end down */ 15165ea929e3SKevin Wolf offset = align_offset(offset, s->cluster_size); 1517ac95acdbSHu Tao end_offset = start_of_cluster(s, end_offset); 15185ea929e3SKevin Wolf 15195ea929e3SKevin Wolf if (offset > end_offset) { 15205ea929e3SKevin Wolf return 0; 15215ea929e3SKevin Wolf } 15225ea929e3SKevin Wolf 15235ea929e3SKevin Wolf nb_clusters = size_to_clusters(s, end_offset - offset); 15245ea929e3SKevin Wolf 15250b919faeSKevin Wolf s->cache_discards = true; 15260b919faeSKevin Wolf 15275ea929e3SKevin Wolf /* Each L2 table is handled by its own loop iteration */ 15285ea929e3SKevin Wolf while (nb_clusters > 0) { 1529808c4b6fSMax Reitz ret = discard_single_l2(bs, offset, nb_clusters, type, full_discard); 15305ea929e3SKevin Wolf if (ret < 0) { 15310b919faeSKevin Wolf goto fail; 15325ea929e3SKevin Wolf } 15335ea929e3SKevin Wolf 15345ea929e3SKevin Wolf nb_clusters -= ret; 15355ea929e3SKevin Wolf offset += (ret * s->cluster_size); 15365ea929e3SKevin Wolf } 15375ea929e3SKevin Wolf 15380b919faeSKevin Wolf ret = 0; 15390b919faeSKevin Wolf fail: 15400b919faeSKevin Wolf s->cache_discards = false; 15410b919faeSKevin Wolf qcow2_process_discards(bs, ret); 15420b919faeSKevin Wolf 15430b919faeSKevin Wolf return ret; 15445ea929e3SKevin Wolf } 1545621f0589SKevin Wolf 1546621f0589SKevin Wolf /* 1547621f0589SKevin Wolf * This zeroes as many clusters of nb_clusters as possible at once (i.e. 1548621f0589SKevin Wolf * all clusters in the same L2 table) and returns the number of zeroed 1549621f0589SKevin Wolf * clusters. 1550621f0589SKevin Wolf */ 1551621f0589SKevin Wolf static int zero_single_l2(BlockDriverState *bs, uint64_t offset, 1552b6d36defSMax Reitz uint64_t nb_clusters) 1553621f0589SKevin Wolf { 1554ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 1555621f0589SKevin Wolf uint64_t *l2_table; 1556621f0589SKevin Wolf int l2_index; 1557621f0589SKevin Wolf int ret; 1558621f0589SKevin Wolf int i; 1559621f0589SKevin Wolf 1560621f0589SKevin Wolf ret = get_cluster_table(bs, offset, &l2_table, &l2_index); 1561621f0589SKevin Wolf if (ret < 0) { 1562621f0589SKevin Wolf return ret; 1563621f0589SKevin Wolf } 1564621f0589SKevin Wolf 1565621f0589SKevin Wolf /* Limit nb_clusters to one L2 table */ 1566621f0589SKevin Wolf nb_clusters = MIN(nb_clusters, s->l2_size - l2_index); 1567b6d36defSMax Reitz assert(nb_clusters <= INT_MAX); 1568621f0589SKevin Wolf 1569621f0589SKevin Wolf for (i = 0; i < nb_clusters; i++) { 1570621f0589SKevin Wolf uint64_t old_offset; 1571621f0589SKevin Wolf 1572621f0589SKevin Wolf old_offset = be64_to_cpu(l2_table[l2_index + i]); 1573621f0589SKevin Wolf 1574621f0589SKevin Wolf /* Update L2 entries */ 157572e80b89SAlberto Garcia qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table); 1576621f0589SKevin Wolf if (old_offset & QCOW_OFLAG_COMPRESSED) { 1577621f0589SKevin Wolf l2_table[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO); 15786cfcb9b8SKevin Wolf qcow2_free_any_clusters(bs, old_offset, 1, QCOW2_DISCARD_REQUEST); 1579621f0589SKevin Wolf } else { 1580621f0589SKevin Wolf l2_table[l2_index + i] |= cpu_to_be64(QCOW_OFLAG_ZERO); 1581621f0589SKevin Wolf } 1582621f0589SKevin Wolf } 1583621f0589SKevin Wolf 1584a3f1afb4SAlberto Garcia qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table); 1585621f0589SKevin Wolf 1586621f0589SKevin Wolf return nb_clusters; 1587621f0589SKevin Wolf } 1588621f0589SKevin Wolf 1589621f0589SKevin Wolf int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors) 1590621f0589SKevin Wolf { 1591ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 1592b6d36defSMax Reitz uint64_t nb_clusters; 1593621f0589SKevin Wolf int ret; 1594621f0589SKevin Wolf 1595621f0589SKevin Wolf /* The zero flag is only supported by version 3 and newer */ 1596621f0589SKevin Wolf if (s->qcow_version < 3) { 1597621f0589SKevin Wolf return -ENOTSUP; 1598621f0589SKevin Wolf } 1599621f0589SKevin Wolf 1600621f0589SKevin Wolf /* Each L2 table is handled by its own loop iteration */ 1601621f0589SKevin Wolf nb_clusters = size_to_clusters(s, nb_sectors << BDRV_SECTOR_BITS); 1602621f0589SKevin Wolf 16030b919faeSKevin Wolf s->cache_discards = true; 16040b919faeSKevin Wolf 1605621f0589SKevin Wolf while (nb_clusters > 0) { 1606621f0589SKevin Wolf ret = zero_single_l2(bs, offset, nb_clusters); 1607621f0589SKevin Wolf if (ret < 0) { 16080b919faeSKevin Wolf goto fail; 1609621f0589SKevin Wolf } 1610621f0589SKevin Wolf 1611621f0589SKevin Wolf nb_clusters -= ret; 1612621f0589SKevin Wolf offset += (ret * s->cluster_size); 1613621f0589SKevin Wolf } 1614621f0589SKevin Wolf 16150b919faeSKevin Wolf ret = 0; 16160b919faeSKevin Wolf fail: 16170b919faeSKevin Wolf s->cache_discards = false; 16180b919faeSKevin Wolf qcow2_process_discards(bs, ret); 16190b919faeSKevin Wolf 16200b919faeSKevin Wolf return ret; 1621621f0589SKevin Wolf } 162232b6444dSMax Reitz 162332b6444dSMax Reitz /* 162432b6444dSMax Reitz * Expands all zero clusters in a specific L1 table (or deallocates them, for 162532b6444dSMax Reitz * non-backed non-pre-allocated zero clusters). 162632b6444dSMax Reitz * 16274057a2b2SMax Reitz * l1_entries and *visited_l1_entries are used to keep track of progress for 16284057a2b2SMax Reitz * status_cb(). l1_entries contains the total number of L1 entries and 16294057a2b2SMax Reitz * *visited_l1_entries counts all visited L1 entries. 163032b6444dSMax Reitz */ 163132b6444dSMax Reitz static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table, 1632ecf58777SMax Reitz int l1_size, int64_t *visited_l1_entries, 16334057a2b2SMax Reitz int64_t l1_entries, 16348b13976dSMax Reitz BlockDriverAmendStatusCB *status_cb, 16358b13976dSMax Reitz void *cb_opaque) 163632b6444dSMax Reitz { 1637ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 163832b6444dSMax Reitz bool is_active_l1 = (l1_table == s->l1_table); 163932b6444dSMax Reitz uint64_t *l2_table = NULL; 164032b6444dSMax Reitz int ret; 164132b6444dSMax Reitz int i, j; 164232b6444dSMax Reitz 164332b6444dSMax Reitz if (!is_active_l1) { 164432b6444dSMax Reitz /* inactive L2 tables require a buffer to be stored in when loading 164532b6444dSMax Reitz * them from disk */ 16469a4f4c31SKevin Wolf l2_table = qemu_try_blockalign(bs->file->bs, s->cluster_size); 1647de82815dSKevin Wolf if (l2_table == NULL) { 1648de82815dSKevin Wolf return -ENOMEM; 1649de82815dSKevin Wolf } 165032b6444dSMax Reitz } 165132b6444dSMax Reitz 165232b6444dSMax Reitz for (i = 0; i < l1_size; i++) { 165332b6444dSMax Reitz uint64_t l2_offset = l1_table[i] & L1E_OFFSET_MASK; 165432b6444dSMax Reitz bool l2_dirty = false; 16550e06528eSMax Reitz uint64_t l2_refcount; 165632b6444dSMax Reitz 165732b6444dSMax Reitz if (!l2_offset) { 165832b6444dSMax Reitz /* unallocated */ 16594057a2b2SMax Reitz (*visited_l1_entries)++; 16604057a2b2SMax Reitz if (status_cb) { 16618b13976dSMax Reitz status_cb(bs, *visited_l1_entries, l1_entries, cb_opaque); 16624057a2b2SMax Reitz } 166332b6444dSMax Reitz continue; 166432b6444dSMax Reitz } 166532b6444dSMax Reitz 16668dd93d93SMax Reitz if (offset_into_cluster(s, l2_offset)) { 16678dd93d93SMax Reitz qcow2_signal_corruption(bs, true, -1, -1, "L2 table offset %#" 16688dd93d93SMax Reitz PRIx64 " unaligned (L1 index: %#x)", 16698dd93d93SMax Reitz l2_offset, i); 16708dd93d93SMax Reitz ret = -EIO; 16718dd93d93SMax Reitz goto fail; 16728dd93d93SMax Reitz } 16738dd93d93SMax Reitz 167432b6444dSMax Reitz if (is_active_l1) { 167532b6444dSMax Reitz /* get active L2 tables from cache */ 167632b6444dSMax Reitz ret = qcow2_cache_get(bs, s->l2_table_cache, l2_offset, 167732b6444dSMax Reitz (void **)&l2_table); 167832b6444dSMax Reitz } else { 167932b6444dSMax Reitz /* load inactive L2 tables from disk */ 1680fbcbbf4eSKevin Wolf ret = bdrv_read(bs->file, l2_offset / BDRV_SECTOR_SIZE, 168132b6444dSMax Reitz (void *)l2_table, s->cluster_sectors); 168232b6444dSMax Reitz } 168332b6444dSMax Reitz if (ret < 0) { 168432b6444dSMax Reitz goto fail; 168532b6444dSMax Reitz } 168632b6444dSMax Reitz 16877324c10fSMax Reitz ret = qcow2_get_refcount(bs, l2_offset >> s->cluster_bits, 16887324c10fSMax Reitz &l2_refcount); 16897324c10fSMax Reitz if (ret < 0) { 1690ecf58777SMax Reitz goto fail; 1691ecf58777SMax Reitz } 1692ecf58777SMax Reitz 169332b6444dSMax Reitz for (j = 0; j < s->l2_size; j++) { 169432b6444dSMax Reitz uint64_t l2_entry = be64_to_cpu(l2_table[j]); 1695ecf58777SMax Reitz int64_t offset = l2_entry & L2E_OFFSET_MASK; 169632b6444dSMax Reitz int cluster_type = qcow2_get_cluster_type(l2_entry); 1697320c7066SMax Reitz bool preallocated = offset != 0; 169832b6444dSMax Reitz 1699ecf58777SMax Reitz if (cluster_type != QCOW2_CLUSTER_ZERO) { 170032b6444dSMax Reitz continue; 170132b6444dSMax Reitz } 170232b6444dSMax Reitz 1703320c7066SMax Reitz if (!preallocated) { 1704760e0063SKevin Wolf if (!bs->backing) { 170532b6444dSMax Reitz /* not backed; therefore we can simply deallocate the 170632b6444dSMax Reitz * cluster */ 170732b6444dSMax Reitz l2_table[j] = 0; 170832b6444dSMax Reitz l2_dirty = true; 170932b6444dSMax Reitz continue; 171032b6444dSMax Reitz } 171132b6444dSMax Reitz 171232b6444dSMax Reitz offset = qcow2_alloc_clusters(bs, s->cluster_size); 171332b6444dSMax Reitz if (offset < 0) { 171432b6444dSMax Reitz ret = offset; 171532b6444dSMax Reitz goto fail; 171632b6444dSMax Reitz } 1717ecf58777SMax Reitz 1718ecf58777SMax Reitz if (l2_refcount > 1) { 1719ecf58777SMax Reitz /* For shared L2 tables, set the refcount accordingly (it is 1720ecf58777SMax Reitz * already 1 and needs to be l2_refcount) */ 1721ecf58777SMax Reitz ret = qcow2_update_cluster_refcount(bs, 17222aabe7c7SMax Reitz offset >> s->cluster_bits, 17232aabe7c7SMax Reitz refcount_diff(1, l2_refcount), false, 1724ecf58777SMax Reitz QCOW2_DISCARD_OTHER); 1725ecf58777SMax Reitz if (ret < 0) { 1726ecf58777SMax Reitz qcow2_free_clusters(bs, offset, s->cluster_size, 1727ecf58777SMax Reitz QCOW2_DISCARD_OTHER); 1728ecf58777SMax Reitz goto fail; 1729ecf58777SMax Reitz } 1730ecf58777SMax Reitz } 173132b6444dSMax Reitz } 173232b6444dSMax Reitz 17338dd93d93SMax Reitz if (offset_into_cluster(s, offset)) { 17348dd93d93SMax Reitz qcow2_signal_corruption(bs, true, -1, -1, "Data cluster offset " 17358dd93d93SMax Reitz "%#" PRIx64 " unaligned (L2 offset: %#" 17368dd93d93SMax Reitz PRIx64 ", L2 index: %#x)", offset, 17378dd93d93SMax Reitz l2_offset, j); 17388dd93d93SMax Reitz if (!preallocated) { 17398dd93d93SMax Reitz qcow2_free_clusters(bs, offset, s->cluster_size, 17408dd93d93SMax Reitz QCOW2_DISCARD_ALWAYS); 17418dd93d93SMax Reitz } 17428dd93d93SMax Reitz ret = -EIO; 17438dd93d93SMax Reitz goto fail; 17448dd93d93SMax Reitz } 17458dd93d93SMax Reitz 1746231bb267SMax Reitz ret = qcow2_pre_write_overlap_check(bs, 0, offset, s->cluster_size); 174732b6444dSMax Reitz if (ret < 0) { 1748320c7066SMax Reitz if (!preallocated) { 174932b6444dSMax Reitz qcow2_free_clusters(bs, offset, s->cluster_size, 175032b6444dSMax Reitz QCOW2_DISCARD_ALWAYS); 1751320c7066SMax Reitz } 175232b6444dSMax Reitz goto fail; 175332b6444dSMax Reitz } 175432b6444dSMax Reitz 1755720ff280SKevin Wolf ret = bdrv_pwrite_zeroes(bs->file, offset, s->cluster_size, 0); 175632b6444dSMax Reitz if (ret < 0) { 1757320c7066SMax Reitz if (!preallocated) { 175832b6444dSMax Reitz qcow2_free_clusters(bs, offset, s->cluster_size, 175932b6444dSMax Reitz QCOW2_DISCARD_ALWAYS); 1760320c7066SMax Reitz } 176132b6444dSMax Reitz goto fail; 176232b6444dSMax Reitz } 176332b6444dSMax Reitz 1764ecf58777SMax Reitz if (l2_refcount == 1) { 176532b6444dSMax Reitz l2_table[j] = cpu_to_be64(offset | QCOW_OFLAG_COPIED); 1766ecf58777SMax Reitz } else { 1767ecf58777SMax Reitz l2_table[j] = cpu_to_be64(offset); 1768e390cf5aSMax Reitz } 1769ecf58777SMax Reitz l2_dirty = true; 177032b6444dSMax Reitz } 177132b6444dSMax Reitz 177232b6444dSMax Reitz if (is_active_l1) { 177332b6444dSMax Reitz if (l2_dirty) { 177472e80b89SAlberto Garcia qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table); 177532b6444dSMax Reitz qcow2_cache_depends_on_flush(s->l2_table_cache); 177632b6444dSMax Reitz } 1777a3f1afb4SAlberto Garcia qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table); 177832b6444dSMax Reitz } else { 177932b6444dSMax Reitz if (l2_dirty) { 1780231bb267SMax Reitz ret = qcow2_pre_write_overlap_check(bs, 1781231bb267SMax Reitz QCOW2_OL_INACTIVE_L2 | QCOW2_OL_ACTIVE_L2, l2_offset, 178232b6444dSMax Reitz s->cluster_size); 178332b6444dSMax Reitz if (ret < 0) { 178432b6444dSMax Reitz goto fail; 178532b6444dSMax Reitz } 178632b6444dSMax Reitz 178718d51c4bSKevin Wolf ret = bdrv_write(bs->file, l2_offset / BDRV_SECTOR_SIZE, 178832b6444dSMax Reitz (void *)l2_table, s->cluster_sectors); 178932b6444dSMax Reitz if (ret < 0) { 179032b6444dSMax Reitz goto fail; 179132b6444dSMax Reitz } 179232b6444dSMax Reitz } 179332b6444dSMax Reitz } 17944057a2b2SMax Reitz 17954057a2b2SMax Reitz (*visited_l1_entries)++; 17964057a2b2SMax Reitz if (status_cb) { 17978b13976dSMax Reitz status_cb(bs, *visited_l1_entries, l1_entries, cb_opaque); 17984057a2b2SMax Reitz } 179932b6444dSMax Reitz } 180032b6444dSMax Reitz 180132b6444dSMax Reitz ret = 0; 180232b6444dSMax Reitz 180332b6444dSMax Reitz fail: 180432b6444dSMax Reitz if (l2_table) { 180532b6444dSMax Reitz if (!is_active_l1) { 180632b6444dSMax Reitz qemu_vfree(l2_table); 180732b6444dSMax Reitz } else { 180832b6444dSMax Reitz qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table); 180932b6444dSMax Reitz } 181032b6444dSMax Reitz } 181132b6444dSMax Reitz return ret; 181232b6444dSMax Reitz } 181332b6444dSMax Reitz 181432b6444dSMax Reitz /* 181532b6444dSMax Reitz * For backed images, expands all zero clusters on the image. For non-backed 181632b6444dSMax Reitz * images, deallocates all non-pre-allocated zero clusters (and claims the 181732b6444dSMax Reitz * allocation for pre-allocated ones). This is important for downgrading to a 181832b6444dSMax Reitz * qcow2 version which doesn't yet support metadata zero clusters. 181932b6444dSMax Reitz */ 18204057a2b2SMax Reitz int qcow2_expand_zero_clusters(BlockDriverState *bs, 18218b13976dSMax Reitz BlockDriverAmendStatusCB *status_cb, 18228b13976dSMax Reitz void *cb_opaque) 182332b6444dSMax Reitz { 1824ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 182532b6444dSMax Reitz uint64_t *l1_table = NULL; 18264057a2b2SMax Reitz int64_t l1_entries = 0, visited_l1_entries = 0; 182732b6444dSMax Reitz int ret; 182832b6444dSMax Reitz int i, j; 182932b6444dSMax Reitz 18304057a2b2SMax Reitz if (status_cb) { 18314057a2b2SMax Reitz l1_entries = s->l1_size; 18324057a2b2SMax Reitz for (i = 0; i < s->nb_snapshots; i++) { 18334057a2b2SMax Reitz l1_entries += s->snapshots[i].l1_size; 18344057a2b2SMax Reitz } 18354057a2b2SMax Reitz } 18364057a2b2SMax Reitz 183732b6444dSMax Reitz ret = expand_zero_clusters_in_l1(bs, s->l1_table, s->l1_size, 18384057a2b2SMax Reitz &visited_l1_entries, l1_entries, 18398b13976dSMax Reitz status_cb, cb_opaque); 184032b6444dSMax Reitz if (ret < 0) { 184132b6444dSMax Reitz goto fail; 184232b6444dSMax Reitz } 184332b6444dSMax Reitz 184432b6444dSMax Reitz /* Inactive L1 tables may point to active L2 tables - therefore it is 184532b6444dSMax Reitz * necessary to flush the L2 table cache before trying to access the L2 184632b6444dSMax Reitz * tables pointed to by inactive L1 entries (else we might try to expand 184732b6444dSMax Reitz * zero clusters that have already been expanded); furthermore, it is also 184832b6444dSMax Reitz * necessary to empty the L2 table cache, since it may contain tables which 184932b6444dSMax Reitz * are now going to be modified directly on disk, bypassing the cache. 185032b6444dSMax Reitz * qcow2_cache_empty() does both for us. */ 185132b6444dSMax Reitz ret = qcow2_cache_empty(bs, s->l2_table_cache); 185232b6444dSMax Reitz if (ret < 0) { 185332b6444dSMax Reitz goto fail; 185432b6444dSMax Reitz } 185532b6444dSMax Reitz 185632b6444dSMax Reitz for (i = 0; i < s->nb_snapshots; i++) { 1857d737b78cSLaurent Vivier int l1_sectors = DIV_ROUND_UP(s->snapshots[i].l1_size * 1858d737b78cSLaurent Vivier sizeof(uint64_t), BDRV_SECTOR_SIZE); 185932b6444dSMax Reitz 186032b6444dSMax Reitz l1_table = g_realloc(l1_table, l1_sectors * BDRV_SECTOR_SIZE); 186132b6444dSMax Reitz 1862fbcbbf4eSKevin Wolf ret = bdrv_read(bs->file, 18639a4f4c31SKevin Wolf s->snapshots[i].l1_table_offset / BDRV_SECTOR_SIZE, 18649a4f4c31SKevin Wolf (void *)l1_table, l1_sectors); 186532b6444dSMax Reitz if (ret < 0) { 186632b6444dSMax Reitz goto fail; 186732b6444dSMax Reitz } 186832b6444dSMax Reitz 186932b6444dSMax Reitz for (j = 0; j < s->snapshots[i].l1_size; j++) { 187032b6444dSMax Reitz be64_to_cpus(&l1_table[j]); 187132b6444dSMax Reitz } 187232b6444dSMax Reitz 187332b6444dSMax Reitz ret = expand_zero_clusters_in_l1(bs, l1_table, s->snapshots[i].l1_size, 18744057a2b2SMax Reitz &visited_l1_entries, l1_entries, 18758b13976dSMax Reitz status_cb, cb_opaque); 187632b6444dSMax Reitz if (ret < 0) { 187732b6444dSMax Reitz goto fail; 187832b6444dSMax Reitz } 187932b6444dSMax Reitz } 188032b6444dSMax Reitz 188132b6444dSMax Reitz ret = 0; 188232b6444dSMax Reitz 188332b6444dSMax Reitz fail: 188432b6444dSMax Reitz g_free(l1_table); 188532b6444dSMax Reitz return ret; 188632b6444dSMax Reitz } 1887