145aba42fSKevin Wolf /* 245aba42fSKevin Wolf * Block driver for the QCOW version 2 format 345aba42fSKevin Wolf * 445aba42fSKevin Wolf * Copyright (c) 2004-2006 Fabrice Bellard 545aba42fSKevin Wolf * 645aba42fSKevin Wolf * Permission is hereby granted, free of charge, to any person obtaining a copy 745aba42fSKevin Wolf * of this software and associated documentation files (the "Software"), to deal 845aba42fSKevin Wolf * in the Software without restriction, including without limitation the rights 945aba42fSKevin Wolf * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 1045aba42fSKevin Wolf * copies of the Software, and to permit persons to whom the Software is 1145aba42fSKevin Wolf * furnished to do so, subject to the following conditions: 1245aba42fSKevin Wolf * 1345aba42fSKevin Wolf * The above copyright notice and this permission notice shall be included in 1445aba42fSKevin Wolf * all copies or substantial portions of the Software. 1545aba42fSKevin Wolf * 1645aba42fSKevin Wolf * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1745aba42fSKevin Wolf * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1845aba42fSKevin Wolf * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1945aba42fSKevin Wolf * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 2045aba42fSKevin Wolf * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 2145aba42fSKevin Wolf * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 2245aba42fSKevin Wolf * THE SOFTWARE. 2345aba42fSKevin Wolf */ 2445aba42fSKevin Wolf 2580c71a24SPeter Maydell #include "qemu/osdep.h" 2645aba42fSKevin Wolf #include <zlib.h> 2745aba42fSKevin Wolf 28c9a442e4SAlberto Garcia #include "qapi/error.h" 290d8c41daSMichael S. Tsirkin #include "qcow2.h" 3058369e22SPaolo Bonzini #include "qemu/bswap.h" 313cce16f4SKevin Wolf #include "trace.h" 3245aba42fSKevin Wolf 3346b732cdSPavel Butsykin int qcow2_shrink_l1_table(BlockDriverState *bs, uint64_t exact_size) 3446b732cdSPavel Butsykin { 3546b732cdSPavel Butsykin BDRVQcow2State *s = bs->opaque; 3646b732cdSPavel Butsykin int new_l1_size, i, ret; 3746b732cdSPavel Butsykin 3846b732cdSPavel Butsykin if (exact_size >= s->l1_size) { 3946b732cdSPavel Butsykin return 0; 4046b732cdSPavel Butsykin } 4146b732cdSPavel Butsykin 4246b732cdSPavel Butsykin new_l1_size = exact_size; 4346b732cdSPavel Butsykin 4446b732cdSPavel Butsykin #ifdef DEBUG_ALLOC2 4546b732cdSPavel Butsykin fprintf(stderr, "shrink l1_table from %d to %d\n", s->l1_size, new_l1_size); 4646b732cdSPavel Butsykin #endif 4746b732cdSPavel Butsykin 4846b732cdSPavel Butsykin BLKDBG_EVENT(bs->file, BLKDBG_L1_SHRINK_WRITE_TABLE); 4946b732cdSPavel Butsykin ret = bdrv_pwrite_zeroes(bs->file, s->l1_table_offset + 5046b732cdSPavel Butsykin new_l1_size * sizeof(uint64_t), 5146b732cdSPavel Butsykin (s->l1_size - new_l1_size) * sizeof(uint64_t), 0); 5246b732cdSPavel Butsykin if (ret < 0) { 5346b732cdSPavel Butsykin goto fail; 5446b732cdSPavel Butsykin } 5546b732cdSPavel Butsykin 5646b732cdSPavel Butsykin ret = bdrv_flush(bs->file->bs); 5746b732cdSPavel Butsykin if (ret < 0) { 5846b732cdSPavel Butsykin goto fail; 5946b732cdSPavel Butsykin } 6046b732cdSPavel Butsykin 6146b732cdSPavel Butsykin BLKDBG_EVENT(bs->file, BLKDBG_L1_SHRINK_FREE_L2_CLUSTERS); 6246b732cdSPavel Butsykin for (i = s->l1_size - 1; i > new_l1_size - 1; i--) { 6346b732cdSPavel Butsykin if ((s->l1_table[i] & L1E_OFFSET_MASK) == 0) { 6446b732cdSPavel Butsykin continue; 6546b732cdSPavel Butsykin } 6646b732cdSPavel Butsykin qcow2_free_clusters(bs, s->l1_table[i] & L1E_OFFSET_MASK, 6746b732cdSPavel Butsykin s->cluster_size, QCOW2_DISCARD_ALWAYS); 6846b732cdSPavel Butsykin s->l1_table[i] = 0; 6946b732cdSPavel Butsykin } 7046b732cdSPavel Butsykin return 0; 7146b732cdSPavel Butsykin 7246b732cdSPavel Butsykin fail: 7346b732cdSPavel Butsykin /* 7446b732cdSPavel Butsykin * If the write in the l1_table failed the image may contain a partially 7546b732cdSPavel Butsykin * overwritten l1_table. In this case it would be better to clear the 7646b732cdSPavel Butsykin * l1_table in memory to avoid possible image corruption. 7746b732cdSPavel Butsykin */ 7846b732cdSPavel Butsykin memset(s->l1_table + new_l1_size, 0, 7946b732cdSPavel Butsykin (s->l1_size - new_l1_size) * sizeof(uint64_t)); 8046b732cdSPavel Butsykin return ret; 8146b732cdSPavel Butsykin } 8246b732cdSPavel Butsykin 832cf7cfa1SKevin Wolf int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size, 842cf7cfa1SKevin Wolf bool exact_size) 8545aba42fSKevin Wolf { 86ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 872cf7cfa1SKevin Wolf int new_l1_size2, ret, i; 8845aba42fSKevin Wolf uint64_t *new_l1_table; 89fda74f82SMax Reitz int64_t old_l1_table_offset, old_l1_size; 902cf7cfa1SKevin Wolf int64_t new_l1_table_offset, new_l1_size; 9145aba42fSKevin Wolf uint8_t data[12]; 9245aba42fSKevin Wolf 9372893756SStefan Hajnoczi if (min_size <= s->l1_size) 9445aba42fSKevin Wolf return 0; 9572893756SStefan Hajnoczi 96b93f9950SMax Reitz /* Do a sanity check on min_size before trying to calculate new_l1_size 97b93f9950SMax Reitz * (this prevents overflows during the while loop for the calculation of 98b93f9950SMax Reitz * new_l1_size) */ 99b93f9950SMax Reitz if (min_size > INT_MAX / sizeof(uint64_t)) { 100b93f9950SMax Reitz return -EFBIG; 101b93f9950SMax Reitz } 102b93f9950SMax Reitz 10372893756SStefan Hajnoczi if (exact_size) { 10472893756SStefan Hajnoczi new_l1_size = min_size; 10572893756SStefan Hajnoczi } else { 10672893756SStefan Hajnoczi /* Bump size up to reduce the number of times we have to grow */ 10772893756SStefan Hajnoczi new_l1_size = s->l1_size; 108d191d12dSStefan Weil if (new_l1_size == 0) { 109d191d12dSStefan Weil new_l1_size = 1; 110d191d12dSStefan Weil } 11145aba42fSKevin Wolf while (min_size > new_l1_size) { 11221cf3e12SMarc-André Lureau new_l1_size = DIV_ROUND_UP(new_l1_size * 3, 2); 11345aba42fSKevin Wolf } 11472893756SStefan Hajnoczi } 11572893756SStefan Hajnoczi 11684c26520SMax Reitz QEMU_BUILD_BUG_ON(QCOW_MAX_L1_SIZE > INT_MAX); 11784c26520SMax Reitz if (new_l1_size > QCOW_MAX_L1_SIZE / sizeof(uint64_t)) { 1182cf7cfa1SKevin Wolf return -EFBIG; 1192cf7cfa1SKevin Wolf } 1202cf7cfa1SKevin Wolf 12145aba42fSKevin Wolf #ifdef DEBUG_ALLOC2 1222cf7cfa1SKevin Wolf fprintf(stderr, "grow l1_table from %d to %" PRId64 "\n", 1232cf7cfa1SKevin Wolf s->l1_size, new_l1_size); 12445aba42fSKevin Wolf #endif 12545aba42fSKevin Wolf 12645aba42fSKevin Wolf new_l1_size2 = sizeof(uint64_t) * new_l1_size; 127ef97d608SAlberto Garcia new_l1_table = qemu_try_blockalign(bs->file->bs, new_l1_size2); 128de82815dSKevin Wolf if (new_l1_table == NULL) { 129de82815dSKevin Wolf return -ENOMEM; 130de82815dSKevin Wolf } 131ef97d608SAlberto Garcia memset(new_l1_table, 0, new_l1_size2); 132de82815dSKevin Wolf 1330647d47cSStefan Hajnoczi if (s->l1_size) { 13445aba42fSKevin Wolf memcpy(new_l1_table, s->l1_table, s->l1_size * sizeof(uint64_t)); 1350647d47cSStefan Hajnoczi } 13645aba42fSKevin Wolf 13745aba42fSKevin Wolf /* write new table (align to cluster) */ 13866f82ceeSKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ALLOC_TABLE); 139ed6ccf0fSKevin Wolf new_l1_table_offset = qcow2_alloc_clusters(bs, new_l1_size2); 1405d757b56SKevin Wolf if (new_l1_table_offset < 0) { 141de82815dSKevin Wolf qemu_vfree(new_l1_table); 1425d757b56SKevin Wolf return new_l1_table_offset; 1435d757b56SKevin Wolf } 14429c1a730SKevin Wolf 14529c1a730SKevin Wolf ret = qcow2_cache_flush(bs, s->refcount_block_cache); 14629c1a730SKevin Wolf if (ret < 0) { 14780fa3341SKevin Wolf goto fail; 14829c1a730SKevin Wolf } 14945aba42fSKevin Wolf 150cf93980eSMax Reitz /* the L1 position has not yet been updated, so these clusters must 151cf93980eSMax Reitz * indeed be completely free */ 152231bb267SMax Reitz ret = qcow2_pre_write_overlap_check(bs, 0, new_l1_table_offset, 153966b000fSKevin Wolf new_l1_size2, false); 154cf93980eSMax Reitz if (ret < 0) { 155cf93980eSMax Reitz goto fail; 156cf93980eSMax Reitz } 157cf93980eSMax Reitz 15866f82ceeSKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_WRITE_TABLE); 15945aba42fSKevin Wolf for(i = 0; i < s->l1_size; i++) 16045aba42fSKevin Wolf new_l1_table[i] = cpu_to_be64(new_l1_table[i]); 161d9ca2ea2SKevin Wolf ret = bdrv_pwrite_sync(bs->file, new_l1_table_offset, 1629a4f4c31SKevin Wolf new_l1_table, new_l1_size2); 1638b3b7206SKevin Wolf if (ret < 0) 16445aba42fSKevin Wolf goto fail; 16545aba42fSKevin Wolf for(i = 0; i < s->l1_size; i++) 16645aba42fSKevin Wolf new_l1_table[i] = be64_to_cpu(new_l1_table[i]); 16745aba42fSKevin Wolf 16845aba42fSKevin Wolf /* set new table */ 16966f82ceeSKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ACTIVATE_TABLE); 170f1f7a1ddSPeter Maydell stl_be_p(data, new_l1_size); 171e4ef9f46SPeter Maydell stq_be_p(data + 4, new_l1_table_offset); 172d9ca2ea2SKevin Wolf ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_size), 1739a4f4c31SKevin Wolf data, sizeof(data)); 1748b3b7206SKevin Wolf if (ret < 0) { 17545aba42fSKevin Wolf goto fail; 176fb8fa77cSKevin Wolf } 177de82815dSKevin Wolf qemu_vfree(s->l1_table); 178fda74f82SMax Reitz old_l1_table_offset = s->l1_table_offset; 17945aba42fSKevin Wolf s->l1_table_offset = new_l1_table_offset; 18045aba42fSKevin Wolf s->l1_table = new_l1_table; 181fda74f82SMax Reitz old_l1_size = s->l1_size; 18245aba42fSKevin Wolf s->l1_size = new_l1_size; 183fda74f82SMax Reitz qcow2_free_clusters(bs, old_l1_table_offset, old_l1_size * sizeof(uint64_t), 184fda74f82SMax Reitz QCOW2_DISCARD_OTHER); 18545aba42fSKevin Wolf return 0; 18645aba42fSKevin Wolf fail: 187de82815dSKevin Wolf qemu_vfree(new_l1_table); 1886cfcb9b8SKevin Wolf qcow2_free_clusters(bs, new_l1_table_offset, new_l1_size2, 1896cfcb9b8SKevin Wolf QCOW2_DISCARD_OTHER); 1908b3b7206SKevin Wolf return ret; 19145aba42fSKevin Wolf } 19245aba42fSKevin Wolf 19345aba42fSKevin Wolf /* 19445aba42fSKevin Wolf * l2_load 19545aba42fSKevin Wolf * 196e2b5713eSAlberto Garcia * @bs: The BlockDriverState 197e2b5713eSAlberto Garcia * @offset: A guest offset, used to calculate what slice of the L2 198e2b5713eSAlberto Garcia * table to load. 199e2b5713eSAlberto Garcia * @l2_offset: Offset to the L2 table in the image file. 200e2b5713eSAlberto Garcia * @l2_slice: Location to store the pointer to the L2 slice. 20145aba42fSKevin Wolf * 202e2b5713eSAlberto Garcia * Loads a L2 slice into memory (L2 slices are the parts of L2 tables 203e2b5713eSAlberto Garcia * that are loaded by the qcow2 cache). If the slice is in the cache, 204e2b5713eSAlberto Garcia * the cache is used; otherwise the L2 slice is loaded from the image 205e2b5713eSAlberto Garcia * file. 20645aba42fSKevin Wolf */ 207e2b5713eSAlberto Garcia static int l2_load(BlockDriverState *bs, uint64_t offset, 208e2b5713eSAlberto Garcia uint64_t l2_offset, uint64_t **l2_slice) 20945aba42fSKevin Wolf { 210ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 211c8fd8554SAlberto Garcia int start_of_slice = l2_entry_size(s) * 212e2b5713eSAlberto Garcia (offset_to_l2_index(s, offset) - offset_to_l2_slice_index(s, offset)); 21345aba42fSKevin Wolf 214e2b5713eSAlberto Garcia return qcow2_cache_get(bs, s->l2_table_cache, l2_offset + start_of_slice, 215e2b5713eSAlberto Garcia (void **)l2_slice); 21655c17e98SKevin Wolf } 21755c17e98SKevin Wolf 21845aba42fSKevin Wolf /* 219da86f8cbSAlberto Garcia * Writes an L1 entry to disk (note that depending on the alignment 220da86f8cbSAlberto Garcia * requirements this function may write more that just one entry in 221da86f8cbSAlberto Garcia * order to prevent bdrv_pwrite from performing a read-modify-write) 2226583e3c7SKevin Wolf */ 223e23e400eSMax Reitz int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index) 2246583e3c7SKevin Wolf { 225ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 2266583e3c7SKevin Wolf int l1_start_index; 227f7defcb6SKevin Wolf int i, ret; 228da86f8cbSAlberto Garcia int bufsize = MAX(sizeof(uint64_t), 229da86f8cbSAlberto Garcia MIN(bs->file->bs->bl.request_alignment, s->cluster_size)); 230da86f8cbSAlberto Garcia int nentries = bufsize / sizeof(uint64_t); 231da86f8cbSAlberto Garcia g_autofree uint64_t *buf = g_try_new0(uint64_t, nentries); 2326583e3c7SKevin Wolf 233da86f8cbSAlberto Garcia if (buf == NULL) { 234da86f8cbSAlberto Garcia return -ENOMEM; 235da86f8cbSAlberto Garcia } 236da86f8cbSAlberto Garcia 237da86f8cbSAlberto Garcia l1_start_index = QEMU_ALIGN_DOWN(l1_index, nentries); 238da86f8cbSAlberto Garcia for (i = 0; i < MIN(nentries, s->l1_size - l1_start_index); i++) { 2396583e3c7SKevin Wolf buf[i] = cpu_to_be64(s->l1_table[l1_start_index + i]); 2406583e3c7SKevin Wolf } 2416583e3c7SKevin Wolf 242231bb267SMax Reitz ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_ACTIVE_L1, 243da86f8cbSAlberto Garcia s->l1_table_offset + 8 * l1_start_index, bufsize, false); 244cf93980eSMax Reitz if (ret < 0) { 245cf93980eSMax Reitz return ret; 246cf93980eSMax Reitz } 247cf93980eSMax Reitz 24866f82ceeSKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE); 249d9ca2ea2SKevin Wolf ret = bdrv_pwrite_sync(bs->file, 2509a4f4c31SKevin Wolf s->l1_table_offset + 8 * l1_start_index, 251da86f8cbSAlberto Garcia buf, bufsize); 252f7defcb6SKevin Wolf if (ret < 0) { 253f7defcb6SKevin Wolf return ret; 2546583e3c7SKevin Wolf } 2556583e3c7SKevin Wolf 2566583e3c7SKevin Wolf return 0; 2576583e3c7SKevin Wolf } 2586583e3c7SKevin Wolf 2596583e3c7SKevin Wolf /* 26045aba42fSKevin Wolf * l2_allocate 26145aba42fSKevin Wolf * 26245aba42fSKevin Wolf * Allocate a new l2 entry in the file. If l1_index points to an already 26345aba42fSKevin Wolf * used entry in the L2 table (i.e. we are doing a copy on write for the L2 26445aba42fSKevin Wolf * table) copy the contents of the old L2 table into the newly allocated one. 26545aba42fSKevin Wolf * Otherwise the new table is initialized with zeros. 26645aba42fSKevin Wolf * 26745aba42fSKevin Wolf */ 26845aba42fSKevin Wolf 2693861946aSAlberto Garcia static int l2_allocate(BlockDriverState *bs, int l1_index) 27045aba42fSKevin Wolf { 271ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 2726583e3c7SKevin Wolf uint64_t old_l2_offset; 2733861946aSAlberto Garcia uint64_t *l2_slice = NULL; 2743861946aSAlberto Garcia unsigned slice, slice_size2, n_slices; 275f4f0d391SKevin Wolf int64_t l2_offset; 276c46e1167SKevin Wolf int ret; 27745aba42fSKevin Wolf 27845aba42fSKevin Wolf old_l2_offset = s->l1_table[l1_index]; 27945aba42fSKevin Wolf 2803cce16f4SKevin Wolf trace_qcow2_l2_allocate(bs, l1_index); 2813cce16f4SKevin Wolf 28245aba42fSKevin Wolf /* allocate a new l2 entry */ 28345aba42fSKevin Wolf 284c8fd8554SAlberto Garcia l2_offset = qcow2_alloc_clusters(bs, s->l2_size * l2_entry_size(s)); 2855d757b56SKevin Wolf if (l2_offset < 0) { 286be0b742eSMax Reitz ret = l2_offset; 287be0b742eSMax Reitz goto fail; 2885d757b56SKevin Wolf } 28929c1a730SKevin Wolf 290c1c43990SAlberto Garcia /* The offset must fit in the offset field of the L1 table entry */ 291c1c43990SAlberto Garcia assert((l2_offset & L1E_OFFSET_MASK) == l2_offset); 292c1c43990SAlberto Garcia 29398839750SAlberto Garcia /* If we're allocating the table at offset 0 then something is wrong */ 29498839750SAlberto Garcia if (l2_offset == 0) { 29598839750SAlberto Garcia qcow2_signal_corruption(bs, true, -1, -1, "Preventing invalid " 29698839750SAlberto Garcia "allocation of L2 table at offset 0"); 29798839750SAlberto Garcia ret = -EIO; 29898839750SAlberto Garcia goto fail; 29998839750SAlberto Garcia } 30098839750SAlberto Garcia 30129c1a730SKevin Wolf ret = qcow2_cache_flush(bs, s->refcount_block_cache); 30229c1a730SKevin Wolf if (ret < 0) { 30329c1a730SKevin Wolf goto fail; 30429c1a730SKevin Wolf } 30545aba42fSKevin Wolf 30645aba42fSKevin Wolf /* allocate a new entry in the l2 cache */ 30745aba42fSKevin Wolf 308c8fd8554SAlberto Garcia slice_size2 = s->l2_slice_size * l2_entry_size(s); 3093861946aSAlberto Garcia n_slices = s->cluster_size / slice_size2; 3103861946aSAlberto Garcia 3113cce16f4SKevin Wolf trace_qcow2_l2_allocate_get_empty(bs, l1_index); 3123861946aSAlberto Garcia for (slice = 0; slice < n_slices; slice++) { 3136580bb09SAlberto Garcia ret = qcow2_cache_get_empty(bs, s->l2_table_cache, 3143861946aSAlberto Garcia l2_offset + slice * slice_size2, 3153861946aSAlberto Garcia (void **) &l2_slice); 31629c1a730SKevin Wolf if (ret < 0) { 317be0b742eSMax Reitz goto fail; 31829c1a730SKevin Wolf } 31929c1a730SKevin Wolf 3208e37f681SKevin Wolf if ((old_l2_offset & L1E_OFFSET_MASK) == 0) { 3213861946aSAlberto Garcia /* if there was no old l2 table, clear the new slice */ 3223861946aSAlberto Garcia memset(l2_slice, 0, slice_size2); 32345aba42fSKevin Wolf } else { 3243861946aSAlberto Garcia uint64_t *old_slice; 3253861946aSAlberto Garcia uint64_t old_l2_slice_offset = 3263861946aSAlberto Garcia (old_l2_offset & L1E_OFFSET_MASK) + slice * slice_size2; 32729c1a730SKevin Wolf 3283861946aSAlberto Garcia /* if there was an old l2 table, read a slice from the disk */ 32966f82ceeSKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_COW_READ); 3303861946aSAlberto Garcia ret = qcow2_cache_get(bs, s->l2_table_cache, old_l2_slice_offset, 3313861946aSAlberto Garcia (void **) &old_slice); 33229c1a730SKevin Wolf if (ret < 0) { 33329c1a730SKevin Wolf goto fail; 33429c1a730SKevin Wolf } 33529c1a730SKevin Wolf 3363861946aSAlberto Garcia memcpy(l2_slice, old_slice, slice_size2); 33729c1a730SKevin Wolf 3383861946aSAlberto Garcia qcow2_cache_put(s->l2_table_cache, (void **) &old_slice); 33945aba42fSKevin Wolf } 34029c1a730SKevin Wolf 3413861946aSAlberto Garcia /* write the l2 slice to the file */ 34266f82ceeSKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_WRITE); 34329c1a730SKevin Wolf 3443cce16f4SKevin Wolf trace_qcow2_l2_allocate_write_l2(bs, l1_index); 3453861946aSAlberto Garcia qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice); 3463861946aSAlberto Garcia qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); 3476580bb09SAlberto Garcia } 3486580bb09SAlberto Garcia 34929c1a730SKevin Wolf ret = qcow2_cache_flush(bs, s->l2_table_cache); 350c46e1167SKevin Wolf if (ret < 0) { 351175e1152SKevin Wolf goto fail; 352175e1152SKevin Wolf } 353175e1152SKevin Wolf 354175e1152SKevin Wolf /* update the L1 entry */ 3553cce16f4SKevin Wolf trace_qcow2_l2_allocate_write_l1(bs, l1_index); 356175e1152SKevin Wolf s->l1_table[l1_index] = l2_offset | QCOW_OFLAG_COPIED; 357e23e400eSMax Reitz ret = qcow2_write_l1_entry(bs, l1_index); 358175e1152SKevin Wolf if (ret < 0) { 359175e1152SKevin Wolf goto fail; 360c46e1167SKevin Wolf } 36145aba42fSKevin Wolf 3623cce16f4SKevin Wolf trace_qcow2_l2_allocate_done(bs, l1_index, 0); 363c46e1167SKevin Wolf return 0; 364175e1152SKevin Wolf 365175e1152SKevin Wolf fail: 3663cce16f4SKevin Wolf trace_qcow2_l2_allocate_done(bs, l1_index, ret); 3673861946aSAlberto Garcia if (l2_slice != NULL) { 3683861946aSAlberto Garcia qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); 3698585afd8SMax Reitz } 37068dba0bfSKevin Wolf s->l1_table[l1_index] = old_l2_offset; 371e3b21ef9SMax Reitz if (l2_offset > 0) { 372c8fd8554SAlberto Garcia qcow2_free_clusters(bs, l2_offset, s->l2_size * l2_entry_size(s), 373e3b21ef9SMax Reitz QCOW2_DISCARD_ALWAYS); 374e3b21ef9SMax Reitz } 375175e1152SKevin Wolf return ret; 37645aba42fSKevin Wolf } 37745aba42fSKevin Wolf 3782bfcc4a0SKevin Wolf /* 37970d1cbaeSAlberto Garcia * For a given L2 entry, count the number of contiguous subclusters of 38070d1cbaeSAlberto Garcia * the same type starting from @sc_from. Compressed clusters are 38170d1cbaeSAlberto Garcia * treated as if they were divided into subclusters of size 38270d1cbaeSAlberto Garcia * s->subcluster_size. 38370d1cbaeSAlberto Garcia * 38470d1cbaeSAlberto Garcia * Return the number of contiguous subclusters and set @type to the 38570d1cbaeSAlberto Garcia * subcluster type. 38670d1cbaeSAlberto Garcia * 38770d1cbaeSAlberto Garcia * If the L2 entry is invalid return -errno and set @type to 38870d1cbaeSAlberto Garcia * QCOW2_SUBCLUSTER_INVALID. 38970d1cbaeSAlberto Garcia */ 39070d1cbaeSAlberto Garcia static int qcow2_get_subcluster_range_type(BlockDriverState *bs, 39170d1cbaeSAlberto Garcia uint64_t l2_entry, 39270d1cbaeSAlberto Garcia uint64_t l2_bitmap, 39370d1cbaeSAlberto Garcia unsigned sc_from, 39470d1cbaeSAlberto Garcia QCow2SubclusterType *type) 39570d1cbaeSAlberto Garcia { 39670d1cbaeSAlberto Garcia BDRVQcow2State *s = bs->opaque; 39770d1cbaeSAlberto Garcia uint32_t val; 39870d1cbaeSAlberto Garcia 39970d1cbaeSAlberto Garcia *type = qcow2_get_subcluster_type(bs, l2_entry, l2_bitmap, sc_from); 40070d1cbaeSAlberto Garcia 40170d1cbaeSAlberto Garcia if (*type == QCOW2_SUBCLUSTER_INVALID) { 40270d1cbaeSAlberto Garcia return -EINVAL; 40370d1cbaeSAlberto Garcia } else if (!has_subclusters(s) || *type == QCOW2_SUBCLUSTER_COMPRESSED) { 40470d1cbaeSAlberto Garcia return s->subclusters_per_cluster - sc_from; 40570d1cbaeSAlberto Garcia } 40670d1cbaeSAlberto Garcia 40770d1cbaeSAlberto Garcia switch (*type) { 40870d1cbaeSAlberto Garcia case QCOW2_SUBCLUSTER_NORMAL: 40970d1cbaeSAlberto Garcia val = l2_bitmap | QCOW_OFLAG_SUB_ALLOC_RANGE(0, sc_from); 41070d1cbaeSAlberto Garcia return cto32(val) - sc_from; 41170d1cbaeSAlberto Garcia 41270d1cbaeSAlberto Garcia case QCOW2_SUBCLUSTER_ZERO_PLAIN: 41370d1cbaeSAlberto Garcia case QCOW2_SUBCLUSTER_ZERO_ALLOC: 41470d1cbaeSAlberto Garcia val = (l2_bitmap | QCOW_OFLAG_SUB_ZERO_RANGE(0, sc_from)) >> 32; 41570d1cbaeSAlberto Garcia return cto32(val) - sc_from; 41670d1cbaeSAlberto Garcia 41770d1cbaeSAlberto Garcia case QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN: 41870d1cbaeSAlberto Garcia case QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC: 41970d1cbaeSAlberto Garcia val = ((l2_bitmap >> 32) | l2_bitmap) 42070d1cbaeSAlberto Garcia & ~QCOW_OFLAG_SUB_ALLOC_RANGE(0, sc_from); 42170d1cbaeSAlberto Garcia return ctz32(val) - sc_from; 42270d1cbaeSAlberto Garcia 42370d1cbaeSAlberto Garcia default: 42470d1cbaeSAlberto Garcia g_assert_not_reached(); 42570d1cbaeSAlberto Garcia } 42670d1cbaeSAlberto Garcia } 42770d1cbaeSAlberto Garcia 42870d1cbaeSAlberto Garcia /* 4293f9c6b3bSAlberto Garcia * Return the number of contiguous subclusters of the exact same type 4303f9c6b3bSAlberto Garcia * in a given L2 slice, starting from cluster @l2_index, subcluster 4313f9c6b3bSAlberto Garcia * @sc_index. Allocated subclusters are required to be contiguous in 4323f9c6b3bSAlberto Garcia * the image file. 4333f9c6b3bSAlberto Garcia * At most @nb_clusters are checked (note that this means clusters, 4343f9c6b3bSAlberto Garcia * not subclusters). 4353f9c6b3bSAlberto Garcia * Compressed clusters are always processed one by one but for the 4363f9c6b3bSAlberto Garcia * purpose of this count they are treated as if they were divided into 4373f9c6b3bSAlberto Garcia * subclusters of size s->subcluster_size. 4383f9c6b3bSAlberto Garcia * On failure return -errno and update @l2_index to point to the 4393f9c6b3bSAlberto Garcia * invalid entry. 4402bfcc4a0SKevin Wolf */ 4413f9c6b3bSAlberto Garcia static int count_contiguous_subclusters(BlockDriverState *bs, int nb_clusters, 4423f9c6b3bSAlberto Garcia unsigned sc_index, uint64_t *l2_slice, 4433f9c6b3bSAlberto Garcia unsigned *l2_index) 44445aba42fSKevin Wolf { 44512c6aebeSAlberto Garcia BDRVQcow2State *s = bs->opaque; 4463f9c6b3bSAlberto Garcia int i, count = 0; 4473f9c6b3bSAlberto Garcia bool check_offset = false; 4483f9c6b3bSAlberto Garcia uint64_t expected_offset = 0; 4493f9c6b3bSAlberto Garcia QCow2SubclusterType expected_type = QCOW2_SUBCLUSTER_NORMAL, type; 45045aba42fSKevin Wolf 4513f9c6b3bSAlberto Garcia assert(*l2_index + nb_clusters <= s->l2_slice_size); 45215684a47SMax Reitz 45361653008SKevin Wolf for (i = 0; i < nb_clusters; i++) { 4543f9c6b3bSAlberto Garcia unsigned first_sc = (i == 0) ? sc_index : 0; 4553f9c6b3bSAlberto Garcia uint64_t l2_entry = get_l2_entry(s, l2_slice, *l2_index + i); 4563f9c6b3bSAlberto Garcia uint64_t l2_bitmap = get_l2_bitmap(s, l2_slice, *l2_index + i); 4573f9c6b3bSAlberto Garcia int ret = qcow2_get_subcluster_range_type(bs, l2_entry, l2_bitmap, 4583f9c6b3bSAlberto Garcia first_sc, &type); 4593f9c6b3bSAlberto Garcia if (ret < 0) { 4603f9c6b3bSAlberto Garcia *l2_index += i; /* Point to the invalid entry */ 4613f9c6b3bSAlberto Garcia return -EIO; 4623f9c6b3bSAlberto Garcia } 4633f9c6b3bSAlberto Garcia if (i == 0) { 4643f9c6b3bSAlberto Garcia if (type == QCOW2_SUBCLUSTER_COMPRESSED) { 4653f9c6b3bSAlberto Garcia /* Compressed clusters are always processed one by one */ 4663f9c6b3bSAlberto Garcia return ret; 4673f9c6b3bSAlberto Garcia } 4683f9c6b3bSAlberto Garcia expected_type = type; 4693f9c6b3bSAlberto Garcia expected_offset = l2_entry & L2E_OFFSET_MASK; 4703f9c6b3bSAlberto Garcia check_offset = (type == QCOW2_SUBCLUSTER_NORMAL || 4713f9c6b3bSAlberto Garcia type == QCOW2_SUBCLUSTER_ZERO_ALLOC || 4723f9c6b3bSAlberto Garcia type == QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC); 4733f9c6b3bSAlberto Garcia } else if (type != expected_type) { 4743f9c6b3bSAlberto Garcia break; 4753f9c6b3bSAlberto Garcia } else if (check_offset) { 4763f9c6b3bSAlberto Garcia expected_offset += s->cluster_size; 4773f9c6b3bSAlberto Garcia if (expected_offset != (l2_entry & L2E_OFFSET_MASK)) { 4783f9c6b3bSAlberto Garcia break; 4793f9c6b3bSAlberto Garcia } 4803f9c6b3bSAlberto Garcia } 4813f9c6b3bSAlberto Garcia count += ret; 4823f9c6b3bSAlberto Garcia /* Stop if there are type changes before the end of the cluster */ 4833f9c6b3bSAlberto Garcia if (first_sc + ret < s->subclusters_per_cluster) { 48445aba42fSKevin Wolf break; 4852bfcc4a0SKevin Wolf } 4862bfcc4a0SKevin Wolf } 48745aba42fSKevin Wolf 4883f9c6b3bSAlberto Garcia return count; 48945aba42fSKevin Wolf } 49045aba42fSKevin Wolf 491672f0f2cSAlberto Garcia static int coroutine_fn do_perform_cow_read(BlockDriverState *bs, 492aaa4d20bSKevin Wolf uint64_t src_cluster_offset, 493e034f5bcSAlberto Garcia unsigned offset_in_cluster, 49486b862c4SAlberto Garcia QEMUIOVector *qiov) 49545aba42fSKevin Wolf { 496aaa4d20bSKevin Wolf int ret; 4971b9f1491SKevin Wolf 49886b862c4SAlberto Garcia if (qiov->size == 0) { 49999450c6fSAlberto Garcia return 0; 50099450c6fSAlberto Garcia } 50199450c6fSAlberto Garcia 50266f82ceeSKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_COW_READ); 503aef4acb6SStefan Hajnoczi 504dba28555SMax Reitz if (!bs->drv) { 505672f0f2cSAlberto Garcia return -ENOMEDIUM; 506dba28555SMax Reitz } 507dba28555SMax Reitz 508aef4acb6SStefan Hajnoczi /* Call .bdrv_co_readv() directly instead of using the public block-layer 509aef4acb6SStefan Hajnoczi * interface. This avoids double I/O throttling and request tracking, 510aef4acb6SStefan Hajnoczi * which can lead to deadlock when block layer copy-on-read is enabled. 511aef4acb6SStefan Hajnoczi */ 512df893d25SVladimir Sementsov-Ogievskiy ret = bs->drv->bdrv_co_preadv_part(bs, 513df893d25SVladimir Sementsov-Ogievskiy src_cluster_offset + offset_in_cluster, 514df893d25SVladimir Sementsov-Ogievskiy qiov->size, qiov, 0, 0); 5151b9f1491SKevin Wolf if (ret < 0) { 516672f0f2cSAlberto Garcia return ret; 5171b9f1491SKevin Wolf } 5181b9f1491SKevin Wolf 519672f0f2cSAlberto Garcia return 0; 520672f0f2cSAlberto Garcia } 521672f0f2cSAlberto Garcia 522672f0f2cSAlberto Garcia static int coroutine_fn do_perform_cow_write(BlockDriverState *bs, 523672f0f2cSAlberto Garcia uint64_t cluster_offset, 524672f0f2cSAlberto Garcia unsigned offset_in_cluster, 52586b862c4SAlberto Garcia QEMUIOVector *qiov) 526672f0f2cSAlberto Garcia { 527966b000fSKevin Wolf BDRVQcow2State *s = bs->opaque; 528672f0f2cSAlberto Garcia int ret; 529672f0f2cSAlberto Garcia 53086b862c4SAlberto Garcia if (qiov->size == 0) { 531672f0f2cSAlberto Garcia return 0; 532672f0f2cSAlberto Garcia } 533672f0f2cSAlberto Garcia 534231bb267SMax Reitz ret = qcow2_pre_write_overlap_check(bs, 0, 535966b000fSKevin Wolf cluster_offset + offset_in_cluster, qiov->size, true); 536cf93980eSMax Reitz if (ret < 0) { 537672f0f2cSAlberto Garcia return ret; 538cf93980eSMax Reitz } 539cf93980eSMax Reitz 54066f82ceeSKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_COW_WRITE); 541966b000fSKevin Wolf ret = bdrv_co_pwritev(s->data_file, cluster_offset + offset_in_cluster, 54286b862c4SAlberto Garcia qiov->size, qiov, 0); 5431b9f1491SKevin Wolf if (ret < 0) { 544672f0f2cSAlberto Garcia return ret; 5451b9f1491SKevin Wolf } 5461b9f1491SKevin Wolf 547672f0f2cSAlberto Garcia return 0; 54845aba42fSKevin Wolf } 54945aba42fSKevin Wolf 55045aba42fSKevin Wolf 55145aba42fSKevin Wolf /* 552388e5816SAlberto Garcia * get_host_offset 55345aba42fSKevin Wolf * 554388e5816SAlberto Garcia * For a given offset of the virtual disk find the equivalent host 555388e5816SAlberto Garcia * offset in the qcow2 file and store it in *host_offset. Neither 556388e5816SAlberto Garcia * offset needs to be aligned to a cluster boundary. 557388e5816SAlberto Garcia * 558388e5816SAlberto Garcia * If the cluster is unallocated then *host_offset will be 0. 559388e5816SAlberto Garcia * If the cluster is compressed then *host_offset will contain the 560388e5816SAlberto Garcia * complete compressed cluster descriptor. 56145aba42fSKevin Wolf * 562ecfe1863SKevin Wolf * On entry, *bytes is the maximum number of contiguous bytes starting at 563ecfe1863SKevin Wolf * offset that we are interested in. 56445aba42fSKevin Wolf * 565ecfe1863SKevin Wolf * On exit, *bytes is the number of bytes starting at offset that have the same 56610dabdc5SAlberto Garcia * subcluster type and (if applicable) are stored contiguously in the image 56710dabdc5SAlberto Garcia * file. The subcluster type is stored in *subcluster_type. 56810dabdc5SAlberto Garcia * Compressed clusters are always processed one by one. 56945aba42fSKevin Wolf * 570ca4a0bb8SAlberto Garcia * Returns 0 on success, -errno in error cases. 57145aba42fSKevin Wolf */ 572388e5816SAlberto Garcia int qcow2_get_host_offset(BlockDriverState *bs, uint64_t offset, 573ca4a0bb8SAlberto Garcia unsigned int *bytes, uint64_t *host_offset, 57410dabdc5SAlberto Garcia QCow2SubclusterType *subcluster_type) 57545aba42fSKevin Wolf { 576ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 5773f9c6b3bSAlberto Garcia unsigned int l2_index, sc_index; 5783f9c6b3bSAlberto Garcia uint64_t l1_index, l2_offset, *l2_slice, l2_entry, l2_bitmap; 5793f9c6b3bSAlberto Garcia int sc; 580c834cba9SMax Reitz unsigned int offset_in_cluster; 581c834cba9SMax Reitz uint64_t bytes_available, bytes_needed, nb_clusters; 5823f9c6b3bSAlberto Garcia QCow2SubclusterType type; 58355c17e98SKevin Wolf int ret; 584b2f65d6bSKevin Wolf 585b2f65d6bSKevin Wolf offset_in_cluster = offset_into_cluster(s, offset); 586ecfe1863SKevin Wolf bytes_needed = (uint64_t) *bytes + offset_in_cluster; 58745aba42fSKevin Wolf 588b2f65d6bSKevin Wolf /* compute how many bytes there are between the start of the cluster 589fd630039SAlberto Garcia * containing offset and the end of the l2 slice that contains 590fd630039SAlberto Garcia * the entry pointing to it */ 591fd630039SAlberto Garcia bytes_available = 592fd630039SAlberto Garcia ((uint64_t) (s->l2_slice_size - offset_to_l2_slice_index(s, offset))) 593fd630039SAlberto Garcia << s->cluster_bits; 59445aba42fSKevin Wolf 595b2f65d6bSKevin Wolf if (bytes_needed > bytes_available) { 596b2f65d6bSKevin Wolf bytes_needed = bytes_available; 59745aba42fSKevin Wolf } 59845aba42fSKevin Wolf 599388e5816SAlberto Garcia *host_offset = 0; 60045aba42fSKevin Wolf 601b6af0975SDaniel P. Berrange /* seek to the l2 offset in the l1 table */ 60245aba42fSKevin Wolf 60305b5b6eeSAlberto Garcia l1_index = offset_to_l1_index(s, offset); 60468d000a3SKevin Wolf if (l1_index >= s->l1_size) { 6053f9c6b3bSAlberto Garcia type = QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN; 60645aba42fSKevin Wolf goto out; 60768d000a3SKevin Wolf } 60845aba42fSKevin Wolf 60968d000a3SKevin Wolf l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK; 61068d000a3SKevin Wolf if (!l2_offset) { 6113f9c6b3bSAlberto Garcia type = QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN; 61245aba42fSKevin Wolf goto out; 61368d000a3SKevin Wolf } 61445aba42fSKevin Wolf 615a97c67eeSMax Reitz if (offset_into_cluster(s, l2_offset)) { 616a97c67eeSMax Reitz qcow2_signal_corruption(bs, true, -1, -1, "L2 table offset %#" PRIx64 617a97c67eeSMax Reitz " unaligned (L1 index: %#" PRIx64 ")", 618a97c67eeSMax Reitz l2_offset, l1_index); 619a97c67eeSMax Reitz return -EIO; 620a97c67eeSMax Reitz } 621a97c67eeSMax Reitz 622fd630039SAlberto Garcia /* load the l2 slice in memory */ 62345aba42fSKevin Wolf 624fd630039SAlberto Garcia ret = l2_load(bs, offset, l2_offset, &l2_slice); 62555c17e98SKevin Wolf if (ret < 0) { 62655c17e98SKevin Wolf return ret; 6271c46efaaSKevin Wolf } 62845aba42fSKevin Wolf 62945aba42fSKevin Wolf /* find the cluster offset for the given disk offset */ 63045aba42fSKevin Wolf 631fd630039SAlberto Garcia l2_index = offset_to_l2_slice_index(s, offset); 6323f9c6b3bSAlberto Garcia sc_index = offset_to_sc_index(s, offset); 63312c6aebeSAlberto Garcia l2_entry = get_l2_entry(s, l2_slice, l2_index); 6343f9c6b3bSAlberto Garcia l2_bitmap = get_l2_bitmap(s, l2_slice, l2_index); 635b6d36defSMax Reitz 636b2f65d6bSKevin Wolf nb_clusters = size_to_clusters(s, bytes_needed); 637c834cba9SMax Reitz /* bytes_needed <= *bytes + offset_in_cluster, both of which are unsigned 638c834cba9SMax Reitz * integers; the minimum cluster size is 512, so this assertion is always 639c834cba9SMax Reitz * true */ 640c834cba9SMax Reitz assert(nb_clusters <= INT_MAX); 64145aba42fSKevin Wolf 6423f9c6b3bSAlberto Garcia type = qcow2_get_subcluster_type(bs, l2_entry, l2_bitmap, sc_index); 6433f9c6b3bSAlberto Garcia if (s->qcow_version < 3 && (type == QCOW2_SUBCLUSTER_ZERO_PLAIN || 6443f9c6b3bSAlberto Garcia type == QCOW2_SUBCLUSTER_ZERO_ALLOC)) { 645a97c67eeSMax Reitz qcow2_signal_corruption(bs, true, -1, -1, "Zero cluster entry found" 646a97c67eeSMax Reitz " in pre-v3 image (L2 offset: %#" PRIx64 647a97c67eeSMax Reitz ", L2 index: %#x)", l2_offset, l2_index); 648a97c67eeSMax Reitz ret = -EIO; 649a97c67eeSMax Reitz goto fail; 650381b487dSPaolo Bonzini } 651fdfab37dSEric Blake switch (type) { 6523f9c6b3bSAlberto Garcia case QCOW2_SUBCLUSTER_INVALID: 6533f9c6b3bSAlberto Garcia break; /* This is handled by count_contiguous_subclusters() below */ 6543f9c6b3bSAlberto Garcia case QCOW2_SUBCLUSTER_COMPRESSED: 655966b000fSKevin Wolf if (has_data_file(bs)) { 656966b000fSKevin Wolf qcow2_signal_corruption(bs, true, -1, -1, "Compressed cluster " 657966b000fSKevin Wolf "entry found in image with external data " 658966b000fSKevin Wolf "file (L2 offset: %#" PRIx64 ", L2 index: " 659966b000fSKevin Wolf "%#x)", l2_offset, l2_index); 660966b000fSKevin Wolf ret = -EIO; 661966b000fSKevin Wolf goto fail; 662966b000fSKevin Wolf } 663388e5816SAlberto Garcia *host_offset = l2_entry & L2E_COMPRESSED_OFFSET_SIZE_MASK; 6646377af48SKevin Wolf break; 6653f9c6b3bSAlberto Garcia case QCOW2_SUBCLUSTER_ZERO_PLAIN: 6663f9c6b3bSAlberto Garcia case QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN: 66768d000a3SKevin Wolf break; 6683f9c6b3bSAlberto Garcia case QCOW2_SUBCLUSTER_ZERO_ALLOC: 6693f9c6b3bSAlberto Garcia case QCOW2_SUBCLUSTER_NORMAL: 6703f9c6b3bSAlberto Garcia case QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC: { 671388e5816SAlberto Garcia uint64_t host_cluster_offset = l2_entry & L2E_OFFSET_MASK; 672388e5816SAlberto Garcia *host_offset = host_cluster_offset + offset_in_cluster; 673388e5816SAlberto Garcia if (offset_into_cluster(s, host_cluster_offset)) { 674fdfab37dSEric Blake qcow2_signal_corruption(bs, true, -1, -1, 675fdfab37dSEric Blake "Cluster allocation offset %#" 676a97c67eeSMax Reitz PRIx64 " unaligned (L2 offset: %#" PRIx64 677388e5816SAlberto Garcia ", L2 index: %#x)", host_cluster_offset, 678a97c67eeSMax Reitz l2_offset, l2_index); 679a97c67eeSMax Reitz ret = -EIO; 680a97c67eeSMax Reitz goto fail; 681a97c67eeSMax Reitz } 682388e5816SAlberto Garcia if (has_data_file(bs) && *host_offset != offset) { 683966b000fSKevin Wolf qcow2_signal_corruption(bs, true, -1, -1, 684966b000fSKevin Wolf "External data file host cluster offset %#" 685966b000fSKevin Wolf PRIx64 " does not match guest cluster " 686966b000fSKevin Wolf "offset: %#" PRIx64 687388e5816SAlberto Garcia ", L2 index: %#x)", host_cluster_offset, 688966b000fSKevin Wolf offset - offset_in_cluster, l2_index); 689966b000fSKevin Wolf ret = -EIO; 690966b000fSKevin Wolf goto fail; 691966b000fSKevin Wolf } 69268d000a3SKevin Wolf break; 693388e5816SAlberto Garcia } 6941417d7e4SKevin Wolf default: 6951417d7e4SKevin Wolf abort(); 69645aba42fSKevin Wolf } 69745aba42fSKevin Wolf 6983f9c6b3bSAlberto Garcia sc = count_contiguous_subclusters(bs, nb_clusters, sc_index, 6993f9c6b3bSAlberto Garcia l2_slice, &l2_index); 7003f9c6b3bSAlberto Garcia if (sc < 0) { 7013f9c6b3bSAlberto Garcia qcow2_signal_corruption(bs, true, -1, -1, "Invalid cluster entry found " 7023f9c6b3bSAlberto Garcia " (L2 offset: %#" PRIx64 ", L2 index: %#x)", 7033f9c6b3bSAlberto Garcia l2_offset, l2_index); 7043f9c6b3bSAlberto Garcia ret = -EIO; 7053f9c6b3bSAlberto Garcia goto fail; 7063f9c6b3bSAlberto Garcia } 707fd630039SAlberto Garcia qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); 70829c1a730SKevin Wolf 7093f9c6b3bSAlberto Garcia bytes_available = ((int64_t)sc + sc_index) << s->subcluster_bits; 71068d000a3SKevin Wolf 71145aba42fSKevin Wolf out: 712b2f65d6bSKevin Wolf if (bytes_available > bytes_needed) { 713b2f65d6bSKevin Wolf bytes_available = bytes_needed; 714b2f65d6bSKevin Wolf } 71545aba42fSKevin Wolf 716c834cba9SMax Reitz /* bytes_available <= bytes_needed <= *bytes + offset_in_cluster; 717c834cba9SMax Reitz * subtracting offset_in_cluster will therefore definitely yield something 718c834cba9SMax Reitz * not exceeding UINT_MAX */ 719c834cba9SMax Reitz assert(bytes_available - offset_in_cluster <= UINT_MAX); 720ecfe1863SKevin Wolf *bytes = bytes_available - offset_in_cluster; 72145aba42fSKevin Wolf 7223f9c6b3bSAlberto Garcia *subcluster_type = type; 723ca4a0bb8SAlberto Garcia 724ca4a0bb8SAlberto Garcia return 0; 725a97c67eeSMax Reitz 726a97c67eeSMax Reitz fail: 727fd630039SAlberto Garcia qcow2_cache_put(s->l2_table_cache, (void **)&l2_slice); 728a97c67eeSMax Reitz return ret; 72945aba42fSKevin Wolf } 73045aba42fSKevin Wolf 73145aba42fSKevin Wolf /* 73245aba42fSKevin Wolf * get_cluster_table 73345aba42fSKevin Wolf * 73445aba42fSKevin Wolf * for a given disk offset, load (and allocate if needed) 735c03bfc5bSAlberto Garcia * the appropriate slice of its l2 table. 73645aba42fSKevin Wolf * 737c03bfc5bSAlberto Garcia * the cluster index in the l2 slice is given to the caller. 73845aba42fSKevin Wolf * 7391e3e8f1aSKevin Wolf * Returns 0 on success, -errno in failure case 74045aba42fSKevin Wolf */ 74145aba42fSKevin Wolf static int get_cluster_table(BlockDriverState *bs, uint64_t offset, 742c03bfc5bSAlberto Garcia uint64_t **new_l2_slice, 74345aba42fSKevin Wolf int *new_l2_index) 74445aba42fSKevin Wolf { 745ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 7462cf7cfa1SKevin Wolf unsigned int l2_index; 7472cf7cfa1SKevin Wolf uint64_t l1_index, l2_offset; 748c03bfc5bSAlberto Garcia uint64_t *l2_slice = NULL; 74980ee15a6SKevin Wolf int ret; 75045aba42fSKevin Wolf 751b6af0975SDaniel P. Berrange /* seek to the l2 offset in the l1 table */ 75245aba42fSKevin Wolf 75305b5b6eeSAlberto Garcia l1_index = offset_to_l1_index(s, offset); 75445aba42fSKevin Wolf if (l1_index >= s->l1_size) { 75572893756SStefan Hajnoczi ret = qcow2_grow_l1_table(bs, l1_index + 1, false); 7561e3e8f1aSKevin Wolf if (ret < 0) { 7571e3e8f1aSKevin Wolf return ret; 7581e3e8f1aSKevin Wolf } 75945aba42fSKevin Wolf } 7608e37f681SKevin Wolf 7612cf7cfa1SKevin Wolf assert(l1_index < s->l1_size); 7628e37f681SKevin Wolf l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK; 763a97c67eeSMax Reitz if (offset_into_cluster(s, l2_offset)) { 764a97c67eeSMax Reitz qcow2_signal_corruption(bs, true, -1, -1, "L2 table offset %#" PRIx64 765a97c67eeSMax Reitz " unaligned (L1 index: %#" PRIx64 ")", 766a97c67eeSMax Reitz l2_offset, l1_index); 767a97c67eeSMax Reitz return -EIO; 768a97c67eeSMax Reitz } 76945aba42fSKevin Wolf 77005f9ee46SAlberto Garcia if (!(s->l1_table[l1_index] & QCOW_OFLAG_COPIED)) { 77116fde5f2SKevin Wolf /* First allocate a new L2 table (and do COW if needed) */ 7723861946aSAlberto Garcia ret = l2_allocate(bs, l1_index); 773c46e1167SKevin Wolf if (ret < 0) { 774c46e1167SKevin Wolf return ret; 7751e3e8f1aSKevin Wolf } 77616fde5f2SKevin Wolf 77716fde5f2SKevin Wolf /* Then decrease the refcount of the old table */ 77816fde5f2SKevin Wolf if (l2_offset) { 779c8fd8554SAlberto Garcia qcow2_free_clusters(bs, l2_offset, s->l2_size * l2_entry_size(s), 7806cfcb9b8SKevin Wolf QCOW2_DISCARD_OTHER); 78116fde5f2SKevin Wolf } 7823861946aSAlberto Garcia 7833861946aSAlberto Garcia /* Get the offset of the newly-allocated l2 table */ 7843861946aSAlberto Garcia l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK; 7853861946aSAlberto Garcia assert(offset_into_cluster(s, l2_offset) == 0); 78605f9ee46SAlberto Garcia } 78705f9ee46SAlberto Garcia 788c03bfc5bSAlberto Garcia /* load the l2 slice in memory */ 789c03bfc5bSAlberto Garcia ret = l2_load(bs, offset, l2_offset, &l2_slice); 7903861946aSAlberto Garcia if (ret < 0) { 7913861946aSAlberto Garcia return ret; 7923861946aSAlberto Garcia } 79345aba42fSKevin Wolf 79445aba42fSKevin Wolf /* find the cluster offset for the given disk offset */ 79545aba42fSKevin Wolf 796c03bfc5bSAlberto Garcia l2_index = offset_to_l2_slice_index(s, offset); 79745aba42fSKevin Wolf 798c03bfc5bSAlberto Garcia *new_l2_slice = l2_slice; 79945aba42fSKevin Wolf *new_l2_index = l2_index; 80045aba42fSKevin Wolf 8011e3e8f1aSKevin Wolf return 0; 80245aba42fSKevin Wolf } 80345aba42fSKevin Wolf 80445aba42fSKevin Wolf /* 80545aba42fSKevin Wolf * alloc_compressed_cluster_offset 80645aba42fSKevin Wolf * 80777e023ffSKevin Wolf * For a given offset on the virtual disk, allocate a new compressed cluster 80877e023ffSKevin Wolf * and put the host offset of the cluster into *host_offset. If a cluster is 80977e023ffSKevin Wolf * already allocated at the offset, return an error. 81045aba42fSKevin Wolf * 81177e023ffSKevin Wolf * Return 0 on success and -errno in error cases 81245aba42fSKevin Wolf */ 81377e023ffSKevin Wolf int qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, 81445aba42fSKevin Wolf uint64_t offset, 81577e023ffSKevin Wolf int compressed_size, 81677e023ffSKevin Wolf uint64_t *host_offset) 81745aba42fSKevin Wolf { 818ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 81945aba42fSKevin Wolf int l2_index, ret; 820e4e72548SAlberto Garcia uint64_t *l2_slice; 821f4f0d391SKevin Wolf int64_t cluster_offset; 82245aba42fSKevin Wolf int nb_csectors; 82345aba42fSKevin Wolf 824966b000fSKevin Wolf if (has_data_file(bs)) { 825966b000fSKevin Wolf return 0; 826966b000fSKevin Wolf } 827966b000fSKevin Wolf 828e4e72548SAlberto Garcia ret = get_cluster_table(bs, offset, &l2_slice, &l2_index); 8291e3e8f1aSKevin Wolf if (ret < 0) { 83077e023ffSKevin Wolf return ret; 8311e3e8f1aSKevin Wolf } 83245aba42fSKevin Wolf 833b0b6862eSKevin Wolf /* Compression can't overwrite anything. Fail if the cluster was already 834b0b6862eSKevin Wolf * allocated. */ 83512c6aebeSAlberto Garcia cluster_offset = get_l2_entry(s, l2_slice, l2_index); 836b0b6862eSKevin Wolf if (cluster_offset & L2E_OFFSET_MASK) { 837e4e72548SAlberto Garcia qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); 83877e023ffSKevin Wolf return -EIO; 8398f1efd00SKevin Wolf } 84045aba42fSKevin Wolf 841ed6ccf0fSKevin Wolf cluster_offset = qcow2_alloc_bytes(bs, compressed_size); 8425d757b56SKevin Wolf if (cluster_offset < 0) { 843e4e72548SAlberto Garcia qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); 84477e023ffSKevin Wolf return cluster_offset; 8455d757b56SKevin Wolf } 8465d757b56SKevin Wolf 847b6c24694SAlberto Garcia nb_csectors = 848b6c24694SAlberto Garcia (cluster_offset + compressed_size - 1) / QCOW2_COMPRESSED_SECTOR_SIZE - 849b6c24694SAlberto Garcia (cluster_offset / QCOW2_COMPRESSED_SECTOR_SIZE); 85045aba42fSKevin Wolf 8513a75a870SAlberto Garcia /* The offset and size must fit in their fields of the L2 table entry */ 8523a75a870SAlberto Garcia assert((cluster_offset & s->cluster_offset_mask) == cluster_offset); 8533a75a870SAlberto Garcia assert((nb_csectors & s->csize_mask) == nb_csectors); 8543a75a870SAlberto Garcia 85545aba42fSKevin Wolf cluster_offset |= QCOW_OFLAG_COMPRESSED | 85645aba42fSKevin Wolf ((uint64_t)nb_csectors << s->csize_shift); 85745aba42fSKevin Wolf 85845aba42fSKevin Wolf /* update L2 table */ 85945aba42fSKevin Wolf 86045aba42fSKevin Wolf /* compressed clusters never have the copied flag */ 86145aba42fSKevin Wolf 86266f82ceeSKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE_COMPRESSED); 863e4e72548SAlberto Garcia qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice); 86412c6aebeSAlberto Garcia set_l2_entry(s, l2_slice, l2_index, cluster_offset); 865ff4cdec7SAlberto Garcia if (has_subclusters(s)) { 866ff4cdec7SAlberto Garcia set_l2_bitmap(s, l2_slice, l2_index, 0); 867ff4cdec7SAlberto Garcia } 868e4e72548SAlberto Garcia qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); 86945aba42fSKevin Wolf 87077e023ffSKevin Wolf *host_offset = cluster_offset & s->cluster_offset_mask; 87177e023ffSKevin Wolf return 0; 87245aba42fSKevin Wolf } 87345aba42fSKevin Wolf 87499450c6fSAlberto Garcia static int perform_cow(BlockDriverState *bs, QCowL2Meta *m) 875593fb83cSKevin Wolf { 876ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 87799450c6fSAlberto Garcia Qcow2COWRegion *start = &m->cow_start; 87899450c6fSAlberto Garcia Qcow2COWRegion *end = &m->cow_end; 879672f0f2cSAlberto Garcia unsigned buffer_size; 880b3cf1c7cSAlberto Garcia unsigned data_bytes = end->offset - (start->offset + start->nb_bytes); 881b3cf1c7cSAlberto Garcia bool merge_reads; 882672f0f2cSAlberto Garcia uint8_t *start_buffer, *end_buffer; 88386b862c4SAlberto Garcia QEMUIOVector qiov; 884593fb83cSKevin Wolf int ret; 885593fb83cSKevin Wolf 886672f0f2cSAlberto Garcia assert(start->nb_bytes <= UINT_MAX - end->nb_bytes); 887b3cf1c7cSAlberto Garcia assert(start->nb_bytes + end->nb_bytes <= UINT_MAX - data_bytes); 888b3cf1c7cSAlberto Garcia assert(start->offset + start->nb_bytes <= end->offset); 889672f0f2cSAlberto Garcia 890c8bb23cbSAnton Nefedov if ((start->nb_bytes == 0 && end->nb_bytes == 0) || m->skip_cow) { 891593fb83cSKevin Wolf return 0; 892593fb83cSKevin Wolf } 893593fb83cSKevin Wolf 894b3cf1c7cSAlberto Garcia /* If we have to read both the start and end COW regions and the 895b3cf1c7cSAlberto Garcia * middle region is not too large then perform just one read 896b3cf1c7cSAlberto Garcia * operation */ 897b3cf1c7cSAlberto Garcia merge_reads = start->nb_bytes && end->nb_bytes && data_bytes <= 16384; 898b3cf1c7cSAlberto Garcia if (merge_reads) { 899b3cf1c7cSAlberto Garcia buffer_size = start->nb_bytes + data_bytes + end->nb_bytes; 900b3cf1c7cSAlberto Garcia } else { 901b3cf1c7cSAlberto Garcia /* If we have to do two reads, add some padding in the middle 902b3cf1c7cSAlberto Garcia * if necessary to make sure that the end region is optimally 903b3cf1c7cSAlberto Garcia * aligned. */ 904b3cf1c7cSAlberto Garcia size_t align = bdrv_opt_mem_align(bs); 905b3cf1c7cSAlberto Garcia assert(align > 0 && align <= UINT_MAX); 906b3cf1c7cSAlberto Garcia assert(QEMU_ALIGN_UP(start->nb_bytes, align) <= 907b3cf1c7cSAlberto Garcia UINT_MAX - end->nb_bytes); 908b3cf1c7cSAlberto Garcia buffer_size = QEMU_ALIGN_UP(start->nb_bytes, align) + end->nb_bytes; 909b3cf1c7cSAlberto Garcia } 910b3cf1c7cSAlberto Garcia 911b3cf1c7cSAlberto Garcia /* Reserve a buffer large enough to store all the data that we're 912b3cf1c7cSAlberto Garcia * going to read */ 913672f0f2cSAlberto Garcia start_buffer = qemu_try_blockalign(bs, buffer_size); 914672f0f2cSAlberto Garcia if (start_buffer == NULL) { 915672f0f2cSAlberto Garcia return -ENOMEM; 916672f0f2cSAlberto Garcia } 917672f0f2cSAlberto Garcia /* The part of the buffer where the end region is located */ 918672f0f2cSAlberto Garcia end_buffer = start_buffer + buffer_size - end->nb_bytes; 919672f0f2cSAlberto Garcia 9205396234bSVladimir Sementsov-Ogievskiy qemu_iovec_init(&qiov, 2 + (m->data_qiov ? 9215396234bSVladimir Sementsov-Ogievskiy qemu_iovec_subvec_niov(m->data_qiov, 9225396234bSVladimir Sementsov-Ogievskiy m->data_qiov_offset, 9235396234bSVladimir Sementsov-Ogievskiy data_bytes) 9245396234bSVladimir Sementsov-Ogievskiy : 0)); 92586b862c4SAlberto Garcia 926593fb83cSKevin Wolf qemu_co_mutex_unlock(&s->lock); 927b3cf1c7cSAlberto Garcia /* First we read the existing data from both COW regions. We 928b3cf1c7cSAlberto Garcia * either read the whole region in one go, or the start and end 929b3cf1c7cSAlberto Garcia * regions separately. */ 930b3cf1c7cSAlberto Garcia if (merge_reads) { 93186b862c4SAlberto Garcia qemu_iovec_add(&qiov, start_buffer, buffer_size); 93286b862c4SAlberto Garcia ret = do_perform_cow_read(bs, m->offset, start->offset, &qiov); 933b3cf1c7cSAlberto Garcia } else { 93486b862c4SAlberto Garcia qemu_iovec_add(&qiov, start_buffer, start->nb_bytes); 93586b862c4SAlberto Garcia ret = do_perform_cow_read(bs, m->offset, start->offset, &qiov); 936593fb83cSKevin Wolf if (ret < 0) { 93799450c6fSAlberto Garcia goto fail; 938593fb83cSKevin Wolf } 939593fb83cSKevin Wolf 94086b862c4SAlberto Garcia qemu_iovec_reset(&qiov); 94186b862c4SAlberto Garcia qemu_iovec_add(&qiov, end_buffer, end->nb_bytes); 94286b862c4SAlberto Garcia ret = do_perform_cow_read(bs, m->offset, end->offset, &qiov); 943b3cf1c7cSAlberto Garcia } 944672f0f2cSAlberto Garcia if (ret < 0) { 945672f0f2cSAlberto Garcia goto fail; 946672f0f2cSAlberto Garcia } 94799450c6fSAlberto Garcia 948672f0f2cSAlberto Garcia /* Encrypt the data if necessary before writing it */ 949672f0f2cSAlberto Garcia if (bs->encrypted) { 950603fbd07SMaxim Levitsky ret = qcow2_co_encrypt(bs, 951603fbd07SMaxim Levitsky m->alloc_offset + start->offset, 952603fbd07SMaxim Levitsky m->offset + start->offset, 953603fbd07SMaxim Levitsky start_buffer, start->nb_bytes); 954603fbd07SMaxim Levitsky if (ret < 0) { 955603fbd07SMaxim Levitsky goto fail; 956603fbd07SMaxim Levitsky } 957603fbd07SMaxim Levitsky 958603fbd07SMaxim Levitsky ret = qcow2_co_encrypt(bs, 959603fbd07SMaxim Levitsky m->alloc_offset + end->offset, 960603fbd07SMaxim Levitsky m->offset + end->offset, 961603fbd07SMaxim Levitsky end_buffer, end->nb_bytes); 962603fbd07SMaxim Levitsky if (ret < 0) { 963672f0f2cSAlberto Garcia goto fail; 964672f0f2cSAlberto Garcia } 965672f0f2cSAlberto Garcia } 966672f0f2cSAlberto Garcia 967ee22a9d8SAlberto Garcia /* And now we can write everything. If we have the guest data we 968ee22a9d8SAlberto Garcia * can write everything in one single operation */ 969ee22a9d8SAlberto Garcia if (m->data_qiov) { 970ee22a9d8SAlberto Garcia qemu_iovec_reset(&qiov); 971ee22a9d8SAlberto Garcia if (start->nb_bytes) { 972ee22a9d8SAlberto Garcia qemu_iovec_add(&qiov, start_buffer, start->nb_bytes); 973ee22a9d8SAlberto Garcia } 9745396234bSVladimir Sementsov-Ogievskiy qemu_iovec_concat(&qiov, m->data_qiov, m->data_qiov_offset, data_bytes); 975ee22a9d8SAlberto Garcia if (end->nb_bytes) { 976ee22a9d8SAlberto Garcia qemu_iovec_add(&qiov, end_buffer, end->nb_bytes); 977ee22a9d8SAlberto Garcia } 978ee22a9d8SAlberto Garcia /* NOTE: we have a write_aio blkdebug event here followed by 979ee22a9d8SAlberto Garcia * a cow_write one in do_perform_cow_write(), but there's only 980ee22a9d8SAlberto Garcia * one single I/O operation */ 981ee22a9d8SAlberto Garcia BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO); 982ee22a9d8SAlberto Garcia ret = do_perform_cow_write(bs, m->alloc_offset, start->offset, &qiov); 983ee22a9d8SAlberto Garcia } else { 984ee22a9d8SAlberto Garcia /* If there's no guest data then write both COW regions separately */ 98586b862c4SAlberto Garcia qemu_iovec_reset(&qiov); 98686b862c4SAlberto Garcia qemu_iovec_add(&qiov, start_buffer, start->nb_bytes); 98786b862c4SAlberto Garcia ret = do_perform_cow_write(bs, m->alloc_offset, start->offset, &qiov); 988672f0f2cSAlberto Garcia if (ret < 0) { 989672f0f2cSAlberto Garcia goto fail; 990672f0f2cSAlberto Garcia } 991672f0f2cSAlberto Garcia 99286b862c4SAlberto Garcia qemu_iovec_reset(&qiov); 99386b862c4SAlberto Garcia qemu_iovec_add(&qiov, end_buffer, end->nb_bytes); 99486b862c4SAlberto Garcia ret = do_perform_cow_write(bs, m->alloc_offset, end->offset, &qiov); 995ee22a9d8SAlberto Garcia } 996ee22a9d8SAlberto Garcia 99799450c6fSAlberto Garcia fail: 99899450c6fSAlberto Garcia qemu_co_mutex_lock(&s->lock); 99999450c6fSAlberto Garcia 1000593fb83cSKevin Wolf /* 1001593fb83cSKevin Wolf * Before we update the L2 table to actually point to the new cluster, we 1002593fb83cSKevin Wolf * need to be sure that the refcounts have been increased and COW was 1003593fb83cSKevin Wolf * handled. 1004593fb83cSKevin Wolf */ 100599450c6fSAlberto Garcia if (ret == 0) { 1006593fb83cSKevin Wolf qcow2_cache_depends_on_flush(s->l2_table_cache); 100799450c6fSAlberto Garcia } 1008593fb83cSKevin Wolf 1009672f0f2cSAlberto Garcia qemu_vfree(start_buffer); 101086b862c4SAlberto Garcia qemu_iovec_destroy(&qiov); 101199450c6fSAlberto Garcia return ret; 1012593fb83cSKevin Wolf } 1013593fb83cSKevin Wolf 1014148da7eaSKevin Wolf int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) 101545aba42fSKevin Wolf { 1016ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 101745aba42fSKevin Wolf int i, j = 0, l2_index, ret; 1018a002c0b0SAlberto Garcia uint64_t *old_cluster, *l2_slice; 1019250196f1SKevin Wolf uint64_t cluster_offset = m->alloc_offset; 102045aba42fSKevin Wolf 10213cce16f4SKevin Wolf trace_qcow2_cluster_link_l2(qemu_coroutine_self(), m->nb_clusters); 1022f50f88b9SKevin Wolf assert(m->nb_clusters > 0); 102345aba42fSKevin Wolf 10245839e53bSMarkus Armbruster old_cluster = g_try_new(uint64_t, m->nb_clusters); 1025de82815dSKevin Wolf if (old_cluster == NULL) { 1026de82815dSKevin Wolf ret = -ENOMEM; 1027de82815dSKevin Wolf goto err; 1028de82815dSKevin Wolf } 102945aba42fSKevin Wolf 103045aba42fSKevin Wolf /* copy content of unmodified sectors */ 103199450c6fSAlberto Garcia ret = perform_cow(bs, m); 1032593fb83cSKevin Wolf if (ret < 0) { 103345aba42fSKevin Wolf goto err; 103445aba42fSKevin Wolf } 103545aba42fSKevin Wolf 1036593fb83cSKevin Wolf /* Update L2 table. */ 103774c4510aSKevin Wolf if (s->use_lazy_refcounts) { 1038280d3735SKevin Wolf qcow2_mark_dirty(bs); 1039280d3735SKevin Wolf } 1040bfe8043eSStefan Hajnoczi if (qcow2_need_accurate_refcounts(s)) { 1041bfe8043eSStefan Hajnoczi qcow2_cache_set_dependency(bs, s->l2_table_cache, 1042bfe8043eSStefan Hajnoczi s->refcount_block_cache); 1043bfe8043eSStefan Hajnoczi } 1044280d3735SKevin Wolf 1045a002c0b0SAlberto Garcia ret = get_cluster_table(bs, m->offset, &l2_slice, &l2_index); 10461e3e8f1aSKevin Wolf if (ret < 0) { 104745aba42fSKevin Wolf goto err; 10481e3e8f1aSKevin Wolf } 1049a002c0b0SAlberto Garcia qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice); 105045aba42fSKevin Wolf 1051a002c0b0SAlberto Garcia assert(l2_index + m->nb_clusters <= s->l2_slice_size); 105245aba42fSKevin Wolf for (i = 0; i < m->nb_clusters; i++) { 1053348fcc4fSTuguoyi uint64_t offset = cluster_offset + ((uint64_t)i << s->cluster_bits); 105445aba42fSKevin Wolf /* if two concurrent writes happen to the same unallocated cluster 105545aba42fSKevin Wolf * each write allocates separate cluster and writes data concurrently. 105645aba42fSKevin Wolf * The first one to complete updates l2 table with pointer to its 105745aba42fSKevin Wolf * cluster the second one has to do RMW (which is done above by 1058aaa4d20bSKevin Wolf * perform_cow()), update l2 table with its cluster pointer and free 105945aba42fSKevin Wolf * old cluster. This is what this loop does */ 106012c6aebeSAlberto Garcia if (get_l2_entry(s, l2_slice, l2_index + i) != 0) { 106112c6aebeSAlberto Garcia old_cluster[j++] = get_l2_entry(s, l2_slice, l2_index + i); 1062aaa4d20bSKevin Wolf } 106345aba42fSKevin Wolf 10643a75a870SAlberto Garcia /* The offset must fit in the offset field of the L2 table entry */ 10653a75a870SAlberto Garcia assert((offset & L2E_OFFSET_MASK) == offset); 10663a75a870SAlberto Garcia 106712c6aebeSAlberto Garcia set_l2_entry(s, l2_slice, l2_index + i, offset | QCOW_OFLAG_COPIED); 1068aca00cd9SAlberto Garcia 1069aca00cd9SAlberto Garcia /* Update bitmap with the subclusters that were just written */ 107040dee943SAlberto Garcia if (has_subclusters(s) && !m->prealloc) { 1071aca00cd9SAlberto Garcia uint64_t l2_bitmap = get_l2_bitmap(s, l2_slice, l2_index + i); 1072aca00cd9SAlberto Garcia unsigned written_from = m->cow_start.offset; 1073aca00cd9SAlberto Garcia unsigned written_to = m->cow_end.offset + m->cow_end.nb_bytes ?: 1074aca00cd9SAlberto Garcia m->nb_clusters << s->cluster_bits; 1075aca00cd9SAlberto Garcia int first_sc, last_sc; 1076aca00cd9SAlberto Garcia /* Narrow written_from and written_to down to the current cluster */ 1077aca00cd9SAlberto Garcia written_from = MAX(written_from, i << s->cluster_bits); 1078aca00cd9SAlberto Garcia written_to = MIN(written_to, (i + 1) << s->cluster_bits); 1079aca00cd9SAlberto Garcia assert(written_from < written_to); 1080aca00cd9SAlberto Garcia first_sc = offset_to_sc_index(s, written_from); 1081aca00cd9SAlberto Garcia last_sc = offset_to_sc_index(s, written_to - 1); 1082aca00cd9SAlberto Garcia l2_bitmap |= QCOW_OFLAG_SUB_ALLOC_RANGE(first_sc, last_sc + 1); 1083aca00cd9SAlberto Garcia l2_bitmap &= ~QCOW_OFLAG_SUB_ZERO_RANGE(first_sc, last_sc + 1); 1084aca00cd9SAlberto Garcia set_l2_bitmap(s, l2_slice, l2_index + i, l2_bitmap); 1085aca00cd9SAlberto Garcia } 108645aba42fSKevin Wolf } 108745aba42fSKevin Wolf 10889f8e668eSKevin Wolf 1089a002c0b0SAlberto Garcia qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); 109045aba42fSKevin Wolf 10917ec5e6a4SKevin Wolf /* 10927ec5e6a4SKevin Wolf * If this was a COW, we need to decrease the refcount of the old cluster. 10936cfcb9b8SKevin Wolf * 10946cfcb9b8SKevin Wolf * Don't discard clusters that reach a refcount of 0 (e.g. compressed 10956cfcb9b8SKevin Wolf * clusters), the next write will reuse them anyway. 10967ec5e6a4SKevin Wolf */ 1097564a6b69SMax Reitz if (!m->keep_old_clusters && j != 0) { 10987ec5e6a4SKevin Wolf for (i = 0; i < j; i++) { 109912c6aebeSAlberto Garcia qcow2_free_any_clusters(bs, old_cluster[i], 1, QCOW2_DISCARD_NEVER); 11007ec5e6a4SKevin Wolf } 11017ec5e6a4SKevin Wolf } 110245aba42fSKevin Wolf 110345aba42fSKevin Wolf ret = 0; 110445aba42fSKevin Wolf err: 11057267c094SAnthony Liguori g_free(old_cluster); 110645aba42fSKevin Wolf return ret; 110745aba42fSKevin Wolf } 110845aba42fSKevin Wolf 11098b24cd14SKevin Wolf /** 11108b24cd14SKevin Wolf * Frees the allocated clusters because the request failed and they won't 11118b24cd14SKevin Wolf * actually be linked. 11128b24cd14SKevin Wolf */ 11138b24cd14SKevin Wolf void qcow2_alloc_cluster_abort(BlockDriverState *bs, QCowL2Meta *m) 11148b24cd14SKevin Wolf { 11158b24cd14SKevin Wolf BDRVQcow2State *s = bs->opaque; 11163ede935fSMax Reitz if (!has_data_file(bs) && !m->keep_old_clusters) { 1117c3b6658cSKevin Wolf qcow2_free_clusters(bs, m->alloc_offset, 1118c3b6658cSKevin Wolf m->nb_clusters << s->cluster_bits, 11198b24cd14SKevin Wolf QCOW2_DISCARD_NEVER); 11208b24cd14SKevin Wolf } 1121c3b6658cSKevin Wolf } 11228b24cd14SKevin Wolf 112345aba42fSKevin Wolf /* 11248f91d690SAlberto Garcia * For a given write request, create a new QCowL2Meta structure, add 112557538c86SAlberto Garcia * it to @m and the BDRVQcow2State.cluster_allocs list. If the write 112657538c86SAlberto Garcia * request does not need copy-on-write or changes to the L2 metadata 112757538c86SAlberto Garcia * then this function does nothing. 11288f91d690SAlberto Garcia * 11298f91d690SAlberto Garcia * @host_cluster_offset points to the beginning of the first cluster. 11308f91d690SAlberto Garcia * 11318f91d690SAlberto Garcia * @guest_offset and @bytes indicate the offset and length of the 11328f91d690SAlberto Garcia * request. 11338f91d690SAlberto Garcia * 113457538c86SAlberto Garcia * @l2_slice contains the L2 entries of all clusters involved in this 113557538c86SAlberto Garcia * write request. 113657538c86SAlberto Garcia * 11378f91d690SAlberto Garcia * If @keep_old is true it means that the clusters were already 11388f91d690SAlberto Garcia * allocated and will be overwritten. If false then the clusters are 11398f91d690SAlberto Garcia * new and we have to decrease the reference count of the old ones. 1140d53ec3d8SAlberto Garcia * 1141d53ec3d8SAlberto Garcia * Returns 0 on success, -errno on failure. 11428f91d690SAlberto Garcia */ 1143d53ec3d8SAlberto Garcia static int calculate_l2_meta(BlockDriverState *bs, uint64_t host_cluster_offset, 11448f91d690SAlberto Garcia uint64_t guest_offset, unsigned bytes, 114557538c86SAlberto Garcia uint64_t *l2_slice, QCowL2Meta **m, bool keep_old) 11468f91d690SAlberto Garcia { 11478f91d690SAlberto Garcia BDRVQcow2State *s = bs->opaque; 1148d53ec3d8SAlberto Garcia int sc_index, l2_index = offset_to_l2_slice_index(s, guest_offset); 1149d53ec3d8SAlberto Garcia uint64_t l2_entry, l2_bitmap; 115057538c86SAlberto Garcia unsigned cow_start_from, cow_end_to; 11518f91d690SAlberto Garcia unsigned cow_start_to = offset_into_cluster(s, guest_offset); 11528f91d690SAlberto Garcia unsigned cow_end_from = cow_start_to + bytes; 11538f91d690SAlberto Garcia unsigned nb_clusters = size_to_clusters(s, cow_end_from); 11548f91d690SAlberto Garcia QCowL2Meta *old_m = *m; 1155d53ec3d8SAlberto Garcia QCow2SubclusterType type; 1156d53ec3d8SAlberto Garcia int i; 1157d53ec3d8SAlberto Garcia bool skip_cow = keep_old; 115857538c86SAlberto Garcia 115957538c86SAlberto Garcia assert(nb_clusters <= s->l2_slice_size - l2_index); 116057538c86SAlberto Garcia 1161d53ec3d8SAlberto Garcia /* Check the type of all affected subclusters */ 116257538c86SAlberto Garcia for (i = 0; i < nb_clusters; i++) { 116312c6aebeSAlberto Garcia l2_entry = get_l2_entry(s, l2_slice, l2_index + i); 1164d53ec3d8SAlberto Garcia l2_bitmap = get_l2_bitmap(s, l2_slice, l2_index + i); 1165d53ec3d8SAlberto Garcia if (skip_cow) { 1166d53ec3d8SAlberto Garcia unsigned write_from = MAX(cow_start_to, i << s->cluster_bits); 1167d53ec3d8SAlberto Garcia unsigned write_to = MIN(cow_end_from, (i + 1) << s->cluster_bits); 1168d53ec3d8SAlberto Garcia int first_sc = offset_to_sc_index(s, write_from); 1169d53ec3d8SAlberto Garcia int last_sc = offset_to_sc_index(s, write_to - 1); 1170d53ec3d8SAlberto Garcia int cnt = qcow2_get_subcluster_range_type(bs, l2_entry, l2_bitmap, 1171d53ec3d8SAlberto Garcia first_sc, &type); 1172d53ec3d8SAlberto Garcia /* Is any of the subclusters of type != QCOW2_SUBCLUSTER_NORMAL ? */ 1173d53ec3d8SAlberto Garcia if (type != QCOW2_SUBCLUSTER_NORMAL || first_sc + cnt <= last_sc) { 1174d53ec3d8SAlberto Garcia skip_cow = false; 1175d53ec3d8SAlberto Garcia } 1176d53ec3d8SAlberto Garcia } else { 1177d53ec3d8SAlberto Garcia /* If we can't skip the cow we can still look for invalid entries */ 1178d53ec3d8SAlberto Garcia type = qcow2_get_subcluster_type(bs, l2_entry, l2_bitmap, 0); 1179d53ec3d8SAlberto Garcia } 1180d53ec3d8SAlberto Garcia if (type == QCOW2_SUBCLUSTER_INVALID) { 1181d53ec3d8SAlberto Garcia int l1_index = offset_to_l1_index(s, guest_offset); 1182d53ec3d8SAlberto Garcia uint64_t l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK; 1183d53ec3d8SAlberto Garcia qcow2_signal_corruption(bs, true, -1, -1, "Invalid cluster " 1184d53ec3d8SAlberto Garcia "entry found (L2 offset: %#" PRIx64 1185d53ec3d8SAlberto Garcia ", L2 index: %#x)", 1186d53ec3d8SAlberto Garcia l2_offset, l2_index + i); 1187d53ec3d8SAlberto Garcia return -EIO; 118857538c86SAlberto Garcia } 118957538c86SAlberto Garcia } 1190d53ec3d8SAlberto Garcia 1191d53ec3d8SAlberto Garcia if (skip_cow) { 1192d53ec3d8SAlberto Garcia return 0; 119357538c86SAlberto Garcia } 119457538c86SAlberto Garcia 119557538c86SAlberto Garcia /* Get the L2 entry of the first cluster */ 119612c6aebeSAlberto Garcia l2_entry = get_l2_entry(s, l2_slice, l2_index); 1197d53ec3d8SAlberto Garcia l2_bitmap = get_l2_bitmap(s, l2_slice, l2_index); 1198d53ec3d8SAlberto Garcia sc_index = offset_to_sc_index(s, guest_offset); 1199d53ec3d8SAlberto Garcia type = qcow2_get_subcluster_type(bs, l2_entry, l2_bitmap, sc_index); 120057538c86SAlberto Garcia 1201d53ec3d8SAlberto Garcia if (!keep_old) { 1202d53ec3d8SAlberto Garcia switch (type) { 1203d53ec3d8SAlberto Garcia case QCOW2_SUBCLUSTER_COMPRESSED: 1204d53ec3d8SAlberto Garcia cow_start_from = 0; 1205d53ec3d8SAlberto Garcia break; 1206d53ec3d8SAlberto Garcia case QCOW2_SUBCLUSTER_NORMAL: 1207d53ec3d8SAlberto Garcia case QCOW2_SUBCLUSTER_ZERO_ALLOC: 1208d53ec3d8SAlberto Garcia case QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC: 1209d53ec3d8SAlberto Garcia if (has_subclusters(s)) { 1210d53ec3d8SAlberto Garcia /* Skip all leading zero and unallocated subclusters */ 1211d53ec3d8SAlberto Garcia uint32_t alloc_bitmap = l2_bitmap & QCOW_L2_BITMAP_ALL_ALLOC; 1212d53ec3d8SAlberto Garcia cow_start_from = 1213d53ec3d8SAlberto Garcia MIN(sc_index, ctz32(alloc_bitmap)) << s->subcluster_bits; 121457538c86SAlberto Garcia } else { 121557538c86SAlberto Garcia cow_start_from = 0; 121657538c86SAlberto Garcia } 1217d53ec3d8SAlberto Garcia break; 1218d53ec3d8SAlberto Garcia case QCOW2_SUBCLUSTER_ZERO_PLAIN: 1219d53ec3d8SAlberto Garcia case QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN: 1220d53ec3d8SAlberto Garcia cow_start_from = sc_index << s->subcluster_bits; 1221d53ec3d8SAlberto Garcia break; 1222d53ec3d8SAlberto Garcia default: 1223d53ec3d8SAlberto Garcia g_assert_not_reached(); 1224d53ec3d8SAlberto Garcia } 1225d53ec3d8SAlberto Garcia } else { 1226d53ec3d8SAlberto Garcia switch (type) { 1227d53ec3d8SAlberto Garcia case QCOW2_SUBCLUSTER_NORMAL: 1228d53ec3d8SAlberto Garcia cow_start_from = cow_start_to; 1229d53ec3d8SAlberto Garcia break; 1230d53ec3d8SAlberto Garcia case QCOW2_SUBCLUSTER_ZERO_ALLOC: 1231d53ec3d8SAlberto Garcia case QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC: 1232d53ec3d8SAlberto Garcia cow_start_from = sc_index << s->subcluster_bits; 1233d53ec3d8SAlberto Garcia break; 1234d53ec3d8SAlberto Garcia default: 1235d53ec3d8SAlberto Garcia g_assert_not_reached(); 1236d53ec3d8SAlberto Garcia } 1237d53ec3d8SAlberto Garcia } 123857538c86SAlberto Garcia 123957538c86SAlberto Garcia /* Get the L2 entry of the last cluster */ 1240d53ec3d8SAlberto Garcia l2_index += nb_clusters - 1; 1241d53ec3d8SAlberto Garcia l2_entry = get_l2_entry(s, l2_slice, l2_index); 1242d53ec3d8SAlberto Garcia l2_bitmap = get_l2_bitmap(s, l2_slice, l2_index); 1243d53ec3d8SAlberto Garcia sc_index = offset_to_sc_index(s, guest_offset + bytes - 1); 1244d53ec3d8SAlberto Garcia type = qcow2_get_subcluster_type(bs, l2_entry, l2_bitmap, sc_index); 124557538c86SAlberto Garcia 1246d53ec3d8SAlberto Garcia if (!keep_old) { 1247d53ec3d8SAlberto Garcia switch (type) { 1248d53ec3d8SAlberto Garcia case QCOW2_SUBCLUSTER_COMPRESSED: 124957538c86SAlberto Garcia cow_end_to = ROUND_UP(cow_end_from, s->cluster_size); 1250d53ec3d8SAlberto Garcia break; 1251d53ec3d8SAlberto Garcia case QCOW2_SUBCLUSTER_NORMAL: 1252d53ec3d8SAlberto Garcia case QCOW2_SUBCLUSTER_ZERO_ALLOC: 1253d53ec3d8SAlberto Garcia case QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC: 1254d53ec3d8SAlberto Garcia cow_end_to = ROUND_UP(cow_end_from, s->cluster_size); 1255d53ec3d8SAlberto Garcia if (has_subclusters(s)) { 1256d53ec3d8SAlberto Garcia /* Skip all trailing zero and unallocated subclusters */ 1257d53ec3d8SAlberto Garcia uint32_t alloc_bitmap = l2_bitmap & QCOW_L2_BITMAP_ALL_ALLOC; 1258d53ec3d8SAlberto Garcia cow_end_to -= 1259d53ec3d8SAlberto Garcia MIN(s->subclusters_per_cluster - sc_index - 1, 1260d53ec3d8SAlberto Garcia clz32(alloc_bitmap)) << s->subcluster_bits; 1261d53ec3d8SAlberto Garcia } 1262d53ec3d8SAlberto Garcia break; 1263d53ec3d8SAlberto Garcia case QCOW2_SUBCLUSTER_ZERO_PLAIN: 1264d53ec3d8SAlberto Garcia case QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN: 1265d53ec3d8SAlberto Garcia cow_end_to = ROUND_UP(cow_end_from, s->subcluster_size); 1266d53ec3d8SAlberto Garcia break; 1267d53ec3d8SAlberto Garcia default: 1268d53ec3d8SAlberto Garcia g_assert_not_reached(); 1269d53ec3d8SAlberto Garcia } 1270d53ec3d8SAlberto Garcia } else { 1271d53ec3d8SAlberto Garcia switch (type) { 1272d53ec3d8SAlberto Garcia case QCOW2_SUBCLUSTER_NORMAL: 1273d53ec3d8SAlberto Garcia cow_end_to = cow_end_from; 1274d53ec3d8SAlberto Garcia break; 1275d53ec3d8SAlberto Garcia case QCOW2_SUBCLUSTER_ZERO_ALLOC: 1276d53ec3d8SAlberto Garcia case QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC: 1277d53ec3d8SAlberto Garcia cow_end_to = ROUND_UP(cow_end_from, s->subcluster_size); 1278d53ec3d8SAlberto Garcia break; 1279d53ec3d8SAlberto Garcia default: 1280d53ec3d8SAlberto Garcia g_assert_not_reached(); 1281d53ec3d8SAlberto Garcia } 128257538c86SAlberto Garcia } 12838f91d690SAlberto Garcia 12848f91d690SAlberto Garcia *m = g_malloc0(sizeof(**m)); 12858f91d690SAlberto Garcia **m = (QCowL2Meta) { 12868f91d690SAlberto Garcia .next = old_m, 12878f91d690SAlberto Garcia 12888f91d690SAlberto Garcia .alloc_offset = host_cluster_offset, 12898f91d690SAlberto Garcia .offset = start_of_cluster(s, guest_offset), 12908f91d690SAlberto Garcia .nb_clusters = nb_clusters, 12918f91d690SAlberto Garcia 12928f91d690SAlberto Garcia .keep_old_clusters = keep_old, 12938f91d690SAlberto Garcia 12948f91d690SAlberto Garcia .cow_start = { 12958f91d690SAlberto Garcia .offset = cow_start_from, 12968f91d690SAlberto Garcia .nb_bytes = cow_start_to - cow_start_from, 12978f91d690SAlberto Garcia }, 12988f91d690SAlberto Garcia .cow_end = { 12998f91d690SAlberto Garcia .offset = cow_end_from, 13008f91d690SAlberto Garcia .nb_bytes = cow_end_to - cow_end_from, 13018f91d690SAlberto Garcia }, 13028f91d690SAlberto Garcia }; 13038f91d690SAlberto Garcia 13048f91d690SAlberto Garcia qemu_co_queue_init(&(*m)->dependent_requests); 13058f91d690SAlberto Garcia QLIST_INSERT_HEAD(&s->cluster_allocs, *m, next_in_flight); 1306d53ec3d8SAlberto Garcia 1307d53ec3d8SAlberto Garcia return 0; 13088f91d690SAlberto Garcia } 13098f91d690SAlberto Garcia 131057538c86SAlberto Garcia /* 131157538c86SAlberto Garcia * Returns true if writing to the cluster pointed to by @l2_entry 131257538c86SAlberto Garcia * requires a new allocation (that is, if the cluster is unallocated 131357538c86SAlberto Garcia * or has refcount > 1 and therefore cannot be written in-place). 131457538c86SAlberto Garcia */ 131557538c86SAlberto Garcia static bool cluster_needs_new_alloc(BlockDriverState *bs, uint64_t l2_entry) 1316c1587d87SAlberto Garcia { 1317c1587d87SAlberto Garcia switch (qcow2_get_cluster_type(bs, l2_entry)) { 1318c1587d87SAlberto Garcia case QCOW2_CLUSTER_NORMAL: 131957538c86SAlberto Garcia case QCOW2_CLUSTER_ZERO_ALLOC: 1320c1587d87SAlberto Garcia if (l2_entry & QCOW_OFLAG_COPIED) { 1321c1587d87SAlberto Garcia return false; 1322c1587d87SAlberto Garcia } 1323*b9be6faeSThomas Huth /* fallthrough */ 1324c1587d87SAlberto Garcia case QCOW2_CLUSTER_UNALLOCATED: 1325c1587d87SAlberto Garcia case QCOW2_CLUSTER_COMPRESSED: 1326c1587d87SAlberto Garcia case QCOW2_CLUSTER_ZERO_PLAIN: 1327c1587d87SAlberto Garcia return true; 1328c1587d87SAlberto Garcia default: 1329c1587d87SAlberto Garcia abort(); 1330c1587d87SAlberto Garcia } 1331c1587d87SAlberto Garcia } 1332c1587d87SAlberto Garcia 13338f91d690SAlberto Garcia /* 133457538c86SAlberto Garcia * Returns the number of contiguous clusters that can be written to 133557538c86SAlberto Garcia * using one single write request, starting from @l2_index. 133657538c86SAlberto Garcia * At most @nb_clusters are checked. 133757538c86SAlberto Garcia * 133857538c86SAlberto Garcia * If @new_alloc is true this counts clusters that are either 133957538c86SAlberto Garcia * unallocated, or allocated but with refcount > 1 (so they need to be 134057538c86SAlberto Garcia * newly allocated and COWed). 134157538c86SAlberto Garcia * 134257538c86SAlberto Garcia * If @new_alloc is false this counts clusters that are already 134357538c86SAlberto Garcia * allocated and can be overwritten in-place (this includes clusters 134457538c86SAlberto Garcia * of type QCOW2_CLUSTER_ZERO_ALLOC). 1345bf319eceSKevin Wolf */ 134657538c86SAlberto Garcia static int count_single_write_clusters(BlockDriverState *bs, int nb_clusters, 134757538c86SAlberto Garcia uint64_t *l2_slice, int l2_index, 134857538c86SAlberto Garcia bool new_alloc) 1349bf319eceSKevin Wolf { 135057538c86SAlberto Garcia BDRVQcow2State *s = bs->opaque; 135112c6aebeSAlberto Garcia uint64_t l2_entry = get_l2_entry(s, l2_slice, l2_index); 135257538c86SAlberto Garcia uint64_t expected_offset = l2_entry & L2E_OFFSET_MASK; 1353143550a8SKevin Wolf int i; 1354bf319eceSKevin Wolf 1355143550a8SKevin Wolf for (i = 0; i < nb_clusters; i++) { 135612c6aebeSAlberto Garcia l2_entry = get_l2_entry(s, l2_slice, l2_index + i); 135757538c86SAlberto Garcia if (cluster_needs_new_alloc(bs, l2_entry) != new_alloc) { 1358bf319eceSKevin Wolf break; 1359143550a8SKevin Wolf } 136057538c86SAlberto Garcia if (!new_alloc) { 136157538c86SAlberto Garcia if (expected_offset != (l2_entry & L2E_OFFSET_MASK)) { 136257538c86SAlberto Garcia break; 136357538c86SAlberto Garcia } 136457538c86SAlberto Garcia expected_offset += s->cluster_size; 136557538c86SAlberto Garcia } 1366bf319eceSKevin Wolf } 1367bf319eceSKevin Wolf 1368bf319eceSKevin Wolf assert(i <= nb_clusters); 1369bf319eceSKevin Wolf return i; 1370bf319eceSKevin Wolf } 1371bf319eceSKevin Wolf 1372bf319eceSKevin Wolf /* 1373250196f1SKevin Wolf * Check if there already is an AIO write request in flight which allocates 1374250196f1SKevin Wolf * the same cluster. In this case we need to wait until the previous 1375250196f1SKevin Wolf * request has completed and updated the L2 table accordingly. 137665eb2e35SKevin Wolf * 137765eb2e35SKevin Wolf * Returns: 137865eb2e35SKevin Wolf * 0 if there was no dependency. *cur_bytes indicates the number of 137965eb2e35SKevin Wolf * bytes from guest_offset that can be read before the next 138065eb2e35SKevin Wolf * dependency must be processed (or the request is complete) 138165eb2e35SKevin Wolf * 138265eb2e35SKevin Wolf * -EAGAIN if we had to wait for another request, previously gathered 138365eb2e35SKevin Wolf * information on cluster allocation may be invalid now. The caller 138465eb2e35SKevin Wolf * must start over anyway, so consider *cur_bytes undefined. 1385250196f1SKevin Wolf */ 1386226c3c26SKevin Wolf static int handle_dependencies(BlockDriverState *bs, uint64_t guest_offset, 1387ecdd5333SKevin Wolf uint64_t *cur_bytes, QCowL2Meta **m) 1388226c3c26SKevin Wolf { 1389ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 1390226c3c26SKevin Wolf QCowL2Meta *old_alloc; 139165eb2e35SKevin Wolf uint64_t bytes = *cur_bytes; 1392226c3c26SKevin Wolf 1393250196f1SKevin Wolf QLIST_FOREACH(old_alloc, &s->cluster_allocs, next_in_flight) { 1394250196f1SKevin Wolf 139565eb2e35SKevin Wolf uint64_t start = guest_offset; 139665eb2e35SKevin Wolf uint64_t end = start + bytes; 1397d53ec3d8SAlberto Garcia uint64_t old_start = start_of_cluster(s, l2meta_cow_start(old_alloc)); 1398d53ec3d8SAlberto Garcia uint64_t old_end = ROUND_UP(l2meta_cow_end(old_alloc), s->cluster_size); 1399250196f1SKevin Wolf 1400d9d74f41SKevin Wolf if (end <= old_start || start >= old_end) { 1401250196f1SKevin Wolf /* No intersection */ 1402250196f1SKevin Wolf } else { 1403250196f1SKevin Wolf if (start < old_start) { 1404250196f1SKevin Wolf /* Stop at the start of a running allocation */ 140565eb2e35SKevin Wolf bytes = old_start - start; 1406250196f1SKevin Wolf } else { 140765eb2e35SKevin Wolf bytes = 0; 1408250196f1SKevin Wolf } 1409250196f1SKevin Wolf 1410ecdd5333SKevin Wolf /* Stop if already an l2meta exists. After yielding, it wouldn't 1411ecdd5333SKevin Wolf * be valid any more, so we'd have to clean up the old L2Metas 1412ecdd5333SKevin Wolf * and deal with requests depending on them before starting to 1413ecdd5333SKevin Wolf * gather new ones. Not worth the trouble. */ 1414ecdd5333SKevin Wolf if (bytes == 0 && *m) { 1415ecdd5333SKevin Wolf *cur_bytes = 0; 1416ecdd5333SKevin Wolf return 0; 1417ecdd5333SKevin Wolf } 1418ecdd5333SKevin Wolf 141965eb2e35SKevin Wolf if (bytes == 0) { 1420250196f1SKevin Wolf /* Wait for the dependency to complete. We need to recheck 1421250196f1SKevin Wolf * the free/allocated clusters when we continue. */ 14221ace7ceaSPaolo Bonzini qemu_co_queue_wait(&old_alloc->dependent_requests, &s->lock); 1423250196f1SKevin Wolf return -EAGAIN; 1424250196f1SKevin Wolf } 1425250196f1SKevin Wolf } 1426250196f1SKevin Wolf } 1427250196f1SKevin Wolf 142865eb2e35SKevin Wolf /* Make sure that existing clusters and new allocations are only used up to 142965eb2e35SKevin Wolf * the next dependency if we shortened the request above */ 143065eb2e35SKevin Wolf *cur_bytes = bytes; 1431250196f1SKevin Wolf 1432226c3c26SKevin Wolf return 0; 1433226c3c26SKevin Wolf } 1434226c3c26SKevin Wolf 1435226c3c26SKevin Wolf /* 143657538c86SAlberto Garcia * Checks how many already allocated clusters that don't require a new 143757538c86SAlberto Garcia * allocation there are at the given guest_offset (up to *bytes). 143857538c86SAlberto Garcia * If *host_offset is not INV_OFFSET, only physically contiguous clusters 143957538c86SAlberto Garcia * beginning at this host offset are counted. 14400af729ecSKevin Wolf * 1441411d62b0SKevin Wolf * Note that guest_offset may not be cluster aligned. In this case, the 1442411d62b0SKevin Wolf * returned *host_offset points to exact byte referenced by guest_offset and 1443411d62b0SKevin Wolf * therefore isn't cluster aligned as well. 14440af729ecSKevin Wolf * 14450af729ecSKevin Wolf * Returns: 14460af729ecSKevin Wolf * 0: if no allocated clusters are available at the given offset. 14470af729ecSKevin Wolf * *bytes is normally unchanged. It is set to 0 if the cluster 144857538c86SAlberto Garcia * is allocated and can be overwritten in-place but doesn't have 144957538c86SAlberto Garcia * the right physical offset. 14500af729ecSKevin Wolf * 145157538c86SAlberto Garcia * 1: if allocated clusters that can be overwritten in place are 145257538c86SAlberto Garcia * available at the requested offset. *bytes may have decreased 145357538c86SAlberto Garcia * and describes the length of the area that can be written to. 14540af729ecSKevin Wolf * 14550af729ecSKevin Wolf * -errno: in error cases 14560af729ecSKevin Wolf */ 14570af729ecSKevin Wolf static int handle_copied(BlockDriverState *bs, uint64_t guest_offset, 1458c53ede9fSKevin Wolf uint64_t *host_offset, uint64_t *bytes, QCowL2Meta **m) 14590af729ecSKevin Wolf { 1460ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 14610af729ecSKevin Wolf int l2_index; 146257538c86SAlberto Garcia uint64_t l2_entry, cluster_offset; 1463cde91766SAlberto Garcia uint64_t *l2_slice; 1464b6d36defSMax Reitz uint64_t nb_clusters; 1465c53ede9fSKevin Wolf unsigned int keep_clusters; 1466a3f1afb4SAlberto Garcia int ret; 14670af729ecSKevin Wolf 14680af729ecSKevin Wolf trace_qcow2_handle_copied(qemu_coroutine_self(), guest_offset, *host_offset, 14690af729ecSKevin Wolf *bytes); 14700af729ecSKevin Wolf 1471c6d619ccSKevin Wolf assert(*host_offset == INV_OFFSET || offset_into_cluster(s, guest_offset) 1472411d62b0SKevin Wolf == offset_into_cluster(s, *host_offset)); 1473411d62b0SKevin Wolf 1474acb0467fSKevin Wolf /* 1475cde91766SAlberto Garcia * Calculate the number of clusters to look for. We stop at L2 slice 1476acb0467fSKevin Wolf * boundaries to keep things simple. 1477acb0467fSKevin Wolf */ 1478acb0467fSKevin Wolf nb_clusters = 1479acb0467fSKevin Wolf size_to_clusters(s, offset_into_cluster(s, guest_offset) + *bytes); 1480acb0467fSKevin Wolf 1481cde91766SAlberto Garcia l2_index = offset_to_l2_slice_index(s, guest_offset); 1482cde91766SAlberto Garcia nb_clusters = MIN(nb_clusters, s->l2_slice_size - l2_index); 148357538c86SAlberto Garcia /* Limit total byte count to BDRV_REQUEST_MAX_BYTES */ 148457538c86SAlberto Garcia nb_clusters = MIN(nb_clusters, BDRV_REQUEST_MAX_BYTES >> s->cluster_bits); 1485acb0467fSKevin Wolf 14860af729ecSKevin Wolf /* Find L2 entry for the first involved cluster */ 1487cde91766SAlberto Garcia ret = get_cluster_table(bs, guest_offset, &l2_slice, &l2_index); 14880af729ecSKevin Wolf if (ret < 0) { 14890af729ecSKevin Wolf return ret; 14900af729ecSKevin Wolf } 14910af729ecSKevin Wolf 149212c6aebeSAlberto Garcia l2_entry = get_l2_entry(s, l2_slice, l2_index); 149357538c86SAlberto Garcia cluster_offset = l2_entry & L2E_OFFSET_MASK; 14940af729ecSKevin Wolf 149557538c86SAlberto Garcia if (!cluster_needs_new_alloc(bs, l2_entry)) { 149657538c86SAlberto Garcia if (offset_into_cluster(s, cluster_offset)) { 149757538c86SAlberto Garcia qcow2_signal_corruption(bs, true, -1, -1, "%s cluster offset " 149857538c86SAlberto Garcia "%#" PRIx64 " unaligned (guest offset: %#" 149957538c86SAlberto Garcia PRIx64 ")", l2_entry & QCOW_OFLAG_ZERO ? 150057538c86SAlberto Garcia "Preallocated zero" : "Data", 150157538c86SAlberto Garcia cluster_offset, guest_offset); 1502a97c67eeSMax Reitz ret = -EIO; 1503a97c67eeSMax Reitz goto out; 1504a97c67eeSMax Reitz } 1505a97c67eeSMax Reitz 150657538c86SAlberto Garcia /* If a specific host_offset is required, check it */ 150757538c86SAlberto Garcia if (*host_offset != INV_OFFSET && cluster_offset != *host_offset) { 1508e62daaf6SKevin Wolf *bytes = 0; 1509e62daaf6SKevin Wolf ret = 0; 1510e62daaf6SKevin Wolf goto out; 1511e62daaf6SKevin Wolf } 1512e62daaf6SKevin Wolf 15130af729ecSKevin Wolf /* We keep all QCOW_OFLAG_COPIED clusters */ 151457538c86SAlberto Garcia keep_clusters = count_single_write_clusters(bs, nb_clusters, l2_slice, 151557538c86SAlberto Garcia l2_index, false); 1516c53ede9fSKevin Wolf assert(keep_clusters <= nb_clusters); 1517c53ede9fSKevin Wolf 1518c53ede9fSKevin Wolf *bytes = MIN(*bytes, 1519c53ede9fSKevin Wolf keep_clusters * s->cluster_size 1520c53ede9fSKevin Wolf - offset_into_cluster(s, guest_offset)); 152157538c86SAlberto Garcia assert(*bytes != 0); 152257538c86SAlberto Garcia 1523d53ec3d8SAlberto Garcia ret = calculate_l2_meta(bs, cluster_offset, guest_offset, 152457538c86SAlberto Garcia *bytes, l2_slice, m, true); 1525d53ec3d8SAlberto Garcia if (ret < 0) { 1526d53ec3d8SAlberto Garcia goto out; 1527d53ec3d8SAlberto Garcia } 15280af729ecSKevin Wolf 15290af729ecSKevin Wolf ret = 1; 15300af729ecSKevin Wolf } else { 15310af729ecSKevin Wolf ret = 0; 15320af729ecSKevin Wolf } 15330af729ecSKevin Wolf 15340af729ecSKevin Wolf /* Cleanup */ 1535e62daaf6SKevin Wolf out: 1536cde91766SAlberto Garcia qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); 15370af729ecSKevin Wolf 1538e62daaf6SKevin Wolf /* Only return a host offset if we actually made progress. Otherwise we 1539e62daaf6SKevin Wolf * would make requirements for handle_alloc() that it can't fulfill */ 1540a97c67eeSMax Reitz if (ret > 0) { 154157538c86SAlberto Garcia *host_offset = cluster_offset + offset_into_cluster(s, guest_offset); 1542e62daaf6SKevin Wolf } 1543e62daaf6SKevin Wolf 15440af729ecSKevin Wolf return ret; 15450af729ecSKevin Wolf } 15460af729ecSKevin Wolf 15470af729ecSKevin Wolf /* 1548226c3c26SKevin Wolf * Allocates new clusters for the given guest_offset. 1549226c3c26SKevin Wolf * 1550226c3c26SKevin Wolf * At most *nb_clusters are allocated, and on return *nb_clusters is updated to 1551226c3c26SKevin Wolf * contain the number of clusters that have been allocated and are contiguous 1552226c3c26SKevin Wolf * in the image file. 1553226c3c26SKevin Wolf * 1554c6d619ccSKevin Wolf * If *host_offset is not INV_OFFSET, it specifies the offset in the image file 1555c6d619ccSKevin Wolf * at which the new clusters must start. *nb_clusters can be 0 on return in 1556c6d619ccSKevin Wolf * this case if the cluster at host_offset is already in use. If *host_offset 1557c6d619ccSKevin Wolf * is INV_OFFSET, the clusters can be allocated anywhere in the image file. 1558226c3c26SKevin Wolf * 1559226c3c26SKevin Wolf * *host_offset is updated to contain the offset into the image file at which 1560226c3c26SKevin Wolf * the first allocated cluster starts. 1561226c3c26SKevin Wolf * 1562226c3c26SKevin Wolf * Return 0 on success and -errno in error cases. -EAGAIN means that the 1563226c3c26SKevin Wolf * function has been waiting for another request and the allocation must be 1564226c3c26SKevin Wolf * restarted, but the whole request should not be failed. 1565226c3c26SKevin Wolf */ 1566226c3c26SKevin Wolf static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset, 1567b6d36defSMax Reitz uint64_t *host_offset, uint64_t *nb_clusters) 1568226c3c26SKevin Wolf { 1569ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 1570226c3c26SKevin Wolf 1571226c3c26SKevin Wolf trace_qcow2_do_alloc_clusters_offset(qemu_coroutine_self(), guest_offset, 1572226c3c26SKevin Wolf *host_offset, *nb_clusters); 1573226c3c26SKevin Wolf 1574966b000fSKevin Wolf if (has_data_file(bs)) { 1575966b000fSKevin Wolf assert(*host_offset == INV_OFFSET || 1576966b000fSKevin Wolf *host_offset == start_of_cluster(s, guest_offset)); 1577966b000fSKevin Wolf *host_offset = start_of_cluster(s, guest_offset); 1578966b000fSKevin Wolf return 0; 1579966b000fSKevin Wolf } 1580966b000fSKevin Wolf 1581250196f1SKevin Wolf /* Allocate new clusters */ 1582250196f1SKevin Wolf trace_qcow2_cluster_alloc_phys(qemu_coroutine_self()); 1583c6d619ccSKevin Wolf if (*host_offset == INV_OFFSET) { 1584df021791SKevin Wolf int64_t cluster_offset = 1585df021791SKevin Wolf qcow2_alloc_clusters(bs, *nb_clusters * s->cluster_size); 1586250196f1SKevin Wolf if (cluster_offset < 0) { 1587250196f1SKevin Wolf return cluster_offset; 1588250196f1SKevin Wolf } 1589250196f1SKevin Wolf *host_offset = cluster_offset; 1590250196f1SKevin Wolf return 0; 1591df021791SKevin Wolf } else { 1592b6d36defSMax Reitz int64_t ret = qcow2_alloc_clusters_at(bs, *host_offset, *nb_clusters); 1593df021791SKevin Wolf if (ret < 0) { 1594df021791SKevin Wolf return ret; 1595df021791SKevin Wolf } 1596df021791SKevin Wolf *nb_clusters = ret; 1597df021791SKevin Wolf return 0; 1598df021791SKevin Wolf } 1599250196f1SKevin Wolf } 1600250196f1SKevin Wolf 1601250196f1SKevin Wolf /* 160257538c86SAlberto Garcia * Allocates new clusters for an area that is either still unallocated or 160357538c86SAlberto Garcia * cannot be overwritten in-place. If *host_offset is not INV_OFFSET, 160457538c86SAlberto Garcia * clusters are only allocated if the new allocation can match the specified 160557538c86SAlberto Garcia * host offset. 160610f0ed8bSKevin Wolf * 1607411d62b0SKevin Wolf * Note that guest_offset may not be cluster aligned. In this case, the 1608411d62b0SKevin Wolf * returned *host_offset points to exact byte referenced by guest_offset and 1609411d62b0SKevin Wolf * therefore isn't cluster aligned as well. 161010f0ed8bSKevin Wolf * 161110f0ed8bSKevin Wolf * Returns: 161210f0ed8bSKevin Wolf * 0: if no clusters could be allocated. *bytes is set to 0, 161310f0ed8bSKevin Wolf * *host_offset is left unchanged. 161410f0ed8bSKevin Wolf * 161510f0ed8bSKevin Wolf * 1: if new clusters were allocated. *bytes may be decreased if the 161610f0ed8bSKevin Wolf * new allocation doesn't cover all of the requested area. 161710f0ed8bSKevin Wolf * *host_offset is updated to contain the host offset of the first 161810f0ed8bSKevin Wolf * newly allocated cluster. 161910f0ed8bSKevin Wolf * 162010f0ed8bSKevin Wolf * -errno: in error cases 162110f0ed8bSKevin Wolf */ 162210f0ed8bSKevin Wolf static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset, 1623c37f4cd7SKevin Wolf uint64_t *host_offset, uint64_t *bytes, QCowL2Meta **m) 162410f0ed8bSKevin Wolf { 1625ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 162610f0ed8bSKevin Wolf int l2_index; 16276d99a344SAlberto Garcia uint64_t *l2_slice; 1628b6d36defSMax Reitz uint64_t nb_clusters; 162910f0ed8bSKevin Wolf int ret; 163010f0ed8bSKevin Wolf 163157538c86SAlberto Garcia uint64_t alloc_cluster_offset; 163210f0ed8bSKevin Wolf 163310f0ed8bSKevin Wolf trace_qcow2_handle_alloc(qemu_coroutine_self(), guest_offset, *host_offset, 163410f0ed8bSKevin Wolf *bytes); 163510f0ed8bSKevin Wolf assert(*bytes > 0); 163610f0ed8bSKevin Wolf 1637f5bc6350SKevin Wolf /* 16386d99a344SAlberto Garcia * Calculate the number of clusters to look for. We stop at L2 slice 1639f5bc6350SKevin Wolf * boundaries to keep things simple. 1640f5bc6350SKevin Wolf */ 1641c37f4cd7SKevin Wolf nb_clusters = 1642c37f4cd7SKevin Wolf size_to_clusters(s, offset_into_cluster(s, guest_offset) + *bytes); 1643c37f4cd7SKevin Wolf 16446d99a344SAlberto Garcia l2_index = offset_to_l2_slice_index(s, guest_offset); 16456d99a344SAlberto Garcia nb_clusters = MIN(nb_clusters, s->l2_slice_size - l2_index); 164657538c86SAlberto Garcia /* Limit total allocation byte count to BDRV_REQUEST_MAX_BYTES */ 164757538c86SAlberto Garcia nb_clusters = MIN(nb_clusters, BDRV_REQUEST_MAX_BYTES >> s->cluster_bits); 1648d1b9d19fSMax Reitz 164910f0ed8bSKevin Wolf /* Find L2 entry for the first involved cluster */ 16506d99a344SAlberto Garcia ret = get_cluster_table(bs, guest_offset, &l2_slice, &l2_index); 165110f0ed8bSKevin Wolf if (ret < 0) { 165210f0ed8bSKevin Wolf return ret; 165310f0ed8bSKevin Wolf } 165410f0ed8bSKevin Wolf 165557538c86SAlberto Garcia nb_clusters = count_single_write_clusters(bs, nb_clusters, 165657538c86SAlberto Garcia l2_slice, l2_index, true); 165710f0ed8bSKevin Wolf 1658ecdd5333SKevin Wolf /* This function is only called when there were no non-COW clusters, so if 1659ecdd5333SKevin Wolf * we can't find any unallocated or COW clusters either, something is 1660ecdd5333SKevin Wolf * wrong with our code. */ 1661ecdd5333SKevin Wolf assert(nb_clusters > 0); 1662ecdd5333SKevin Wolf 166357538c86SAlberto Garcia /* Allocate at a given offset in the image file */ 1664c6d619ccSKevin Wolf alloc_cluster_offset = *host_offset == INV_OFFSET ? INV_OFFSET : 1665c6d619ccSKevin Wolf start_of_cluster(s, *host_offset); 166683baa9a4SKevin Wolf ret = do_alloc_cluster_offset(bs, guest_offset, &alloc_cluster_offset, 166710f0ed8bSKevin Wolf &nb_clusters); 166810f0ed8bSKevin Wolf if (ret < 0) { 166957538c86SAlberto Garcia goto out; 167010f0ed8bSKevin Wolf } 167110f0ed8bSKevin Wolf 167283baa9a4SKevin Wolf /* Can't extend contiguous allocation */ 167383baa9a4SKevin Wolf if (nb_clusters == 0) { 167483baa9a4SKevin Wolf *bytes = 0; 167557538c86SAlberto Garcia ret = 0; 167657538c86SAlberto Garcia goto out; 167783baa9a4SKevin Wolf } 167883baa9a4SKevin Wolf 1679c6d619ccSKevin Wolf assert(alloc_cluster_offset != INV_OFFSET); 1680ff52aab2SMax Reitz 168110f0ed8bSKevin Wolf /* 168283baa9a4SKevin Wolf * Save info needed for meta data update. 168383baa9a4SKevin Wolf * 168485567393SKevin Wolf * requested_bytes: Number of bytes from the start of the first 168510f0ed8bSKevin Wolf * newly allocated cluster to the end of the (possibly shortened 168610f0ed8bSKevin Wolf * before) write request. 168710f0ed8bSKevin Wolf * 168885567393SKevin Wolf * avail_bytes: Number of bytes from the start of the first 168910f0ed8bSKevin Wolf * newly allocated to the end of the last newly allocated cluster. 169010f0ed8bSKevin Wolf * 169185567393SKevin Wolf * nb_bytes: The number of bytes from the start of the first 169283baa9a4SKevin Wolf * newly allocated cluster to the end of the area that the write 169310f0ed8bSKevin Wolf * request actually writes to (excluding COW at the end) 169410f0ed8bSKevin Wolf */ 169585567393SKevin Wolf uint64_t requested_bytes = *bytes + offset_into_cluster(s, guest_offset); 1696d1b9d19fSMax Reitz int avail_bytes = nb_clusters << s->cluster_bits; 169785567393SKevin Wolf int nb_bytes = MIN(requested_bytes, avail_bytes); 169810f0ed8bSKevin Wolf 1699411d62b0SKevin Wolf *host_offset = alloc_cluster_offset + offset_into_cluster(s, guest_offset); 170085567393SKevin Wolf *bytes = MIN(*bytes, nb_bytes - offset_into_cluster(s, guest_offset)); 1701c37f4cd7SKevin Wolf assert(*bytes != 0); 170210f0ed8bSKevin Wolf 1703d53ec3d8SAlberto Garcia ret = calculate_l2_meta(bs, alloc_cluster_offset, guest_offset, *bytes, 1704d53ec3d8SAlberto Garcia l2_slice, m, false); 1705d53ec3d8SAlberto Garcia if (ret < 0) { 1706d53ec3d8SAlberto Garcia goto out; 1707d53ec3d8SAlberto Garcia } 17088f91d690SAlberto Garcia 170957538c86SAlberto Garcia ret = 1; 171010f0ed8bSKevin Wolf 171157538c86SAlberto Garcia out: 171257538c86SAlberto Garcia qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); 171357538c86SAlberto Garcia if (ret < 0 && *m && (*m)->nb_clusters > 0) { 171410f0ed8bSKevin Wolf QLIST_REMOVE(*m, next_in_flight); 171510f0ed8bSKevin Wolf } 171610f0ed8bSKevin Wolf return ret; 171710f0ed8bSKevin Wolf } 171810f0ed8bSKevin Wolf 171910f0ed8bSKevin Wolf /* 172045aba42fSKevin Wolf * alloc_cluster_offset 172145aba42fSKevin Wolf * 1722250196f1SKevin Wolf * For a given offset on the virtual disk, find the cluster offset in qcow2 1723250196f1SKevin Wolf * file. If the offset is not found, allocate a new cluster. 172445aba42fSKevin Wolf * 1725250196f1SKevin Wolf * If the cluster was already allocated, m->nb_clusters is set to 0 and 1726a7912369SFrediano Ziglio * other fields in m are meaningless. 172745aba42fSKevin Wolf * 1728148da7eaSKevin Wolf * If the cluster is newly allocated, m->nb_clusters is set to the number of 172968d100e9SKevin Wolf * contiguous clusters that have been allocated. In this case, the other 173068d100e9SKevin Wolf * fields of m are valid and contain information about the first allocated 173168d100e9SKevin Wolf * cluster. 1732148da7eaSKevin Wolf * 173368d100e9SKevin Wolf * If the request conflicts with another write request in flight, the coroutine 173468d100e9SKevin Wolf * is queued and will be reentered when the dependency has completed. 1735148da7eaSKevin Wolf * 1736148da7eaSKevin Wolf * Return 0 on success and -errno in error cases 173745aba42fSKevin Wolf */ 1738f4f0d391SKevin Wolf int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset, 1739d46a0bb2SKevin Wolf unsigned int *bytes, uint64_t *host_offset, 1740d46a0bb2SKevin Wolf QCowL2Meta **m) 174145aba42fSKevin Wolf { 1742ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 1743710c2496SKevin Wolf uint64_t start, remaining; 1744250196f1SKevin Wolf uint64_t cluster_offset; 174565eb2e35SKevin Wolf uint64_t cur_bytes; 1746710c2496SKevin Wolf int ret; 174745aba42fSKevin Wolf 1748d46a0bb2SKevin Wolf trace_qcow2_alloc_clusters_offset(qemu_coroutine_self(), offset, *bytes); 1749710c2496SKevin Wolf 175072424114SKevin Wolf again: 175116f0587eSHu Tao start = offset; 1752d46a0bb2SKevin Wolf remaining = *bytes; 1753c6d619ccSKevin Wolf cluster_offset = INV_OFFSET; 1754c6d619ccSKevin Wolf *host_offset = INV_OFFSET; 1755ecdd5333SKevin Wolf cur_bytes = 0; 1756ecdd5333SKevin Wolf *m = NULL; 17570af729ecSKevin Wolf 17582c3b32d2SKevin Wolf while (true) { 1759ecdd5333SKevin Wolf 1760c6d619ccSKevin Wolf if (*host_offset == INV_OFFSET && cluster_offset != INV_OFFSET) { 1761ecdd5333SKevin Wolf *host_offset = start_of_cluster(s, cluster_offset); 1762ecdd5333SKevin Wolf } 1763ecdd5333SKevin Wolf 1764ecdd5333SKevin Wolf assert(remaining >= cur_bytes); 1765ecdd5333SKevin Wolf 1766ecdd5333SKevin Wolf start += cur_bytes; 1767ecdd5333SKevin Wolf remaining -= cur_bytes; 1768c6d619ccSKevin Wolf 1769c6d619ccSKevin Wolf if (cluster_offset != INV_OFFSET) { 1770ecdd5333SKevin Wolf cluster_offset += cur_bytes; 1771c6d619ccSKevin Wolf } 1772ecdd5333SKevin Wolf 1773ecdd5333SKevin Wolf if (remaining == 0) { 1774ecdd5333SKevin Wolf break; 1775ecdd5333SKevin Wolf } 1776ecdd5333SKevin Wolf 1777ecdd5333SKevin Wolf cur_bytes = remaining; 1778ecdd5333SKevin Wolf 1779250196f1SKevin Wolf /* 178017a71e58SKevin Wolf * Now start gathering as many contiguous clusters as possible: 178117a71e58SKevin Wolf * 178217a71e58SKevin Wolf * 1. Check for overlaps with in-flight allocations 178317a71e58SKevin Wolf * 17842c3b32d2SKevin Wolf * a) Overlap not in the first cluster -> shorten this request and 17852c3b32d2SKevin Wolf * let the caller handle the rest in its next loop iteration. 178617a71e58SKevin Wolf * 17872c3b32d2SKevin Wolf * b) Real overlaps of two requests. Yield and restart the search 17882c3b32d2SKevin Wolf * for contiguous clusters (the situation could have changed 17892c3b32d2SKevin Wolf * while we were sleeping) 179017a71e58SKevin Wolf * 179117a71e58SKevin Wolf * c) TODO: Request starts in the same cluster as the in-flight 17922c3b32d2SKevin Wolf * allocation ends. Shorten the COW of the in-fight allocation, 17932c3b32d2SKevin Wolf * set cluster_offset to write to the same cluster and set up 17942c3b32d2SKevin Wolf * the right synchronisation between the in-flight request and 17952c3b32d2SKevin Wolf * the new one. 179617a71e58SKevin Wolf */ 1797ecdd5333SKevin Wolf ret = handle_dependencies(bs, start, &cur_bytes, m); 179817a71e58SKevin Wolf if (ret == -EAGAIN) { 1799ecdd5333SKevin Wolf /* Currently handle_dependencies() doesn't yield if we already had 1800ecdd5333SKevin Wolf * an allocation. If it did, we would have to clean up the L2Meta 1801ecdd5333SKevin Wolf * structs before starting over. */ 1802ecdd5333SKevin Wolf assert(*m == NULL); 180317a71e58SKevin Wolf goto again; 180417a71e58SKevin Wolf } else if (ret < 0) { 180517a71e58SKevin Wolf return ret; 1806ecdd5333SKevin Wolf } else if (cur_bytes == 0) { 1807ecdd5333SKevin Wolf break; 180817a71e58SKevin Wolf } else { 180917a71e58SKevin Wolf /* handle_dependencies() may have decreased cur_bytes (shortened 181017a71e58SKevin Wolf * the allocations below) so that the next dependency is processed 181117a71e58SKevin Wolf * correctly during the next loop iteration. */ 181217a71e58SKevin Wolf } 181317a71e58SKevin Wolf 181472424114SKevin Wolf /* 18150af729ecSKevin Wolf * 2. Count contiguous COPIED clusters. 181672424114SKevin Wolf */ 1817710c2496SKevin Wolf ret = handle_copied(bs, start, &cluster_offset, &cur_bytes, m); 181872424114SKevin Wolf if (ret < 0) { 181972424114SKevin Wolf return ret; 18200af729ecSKevin Wolf } else if (ret) { 1821ecdd5333SKevin Wolf continue; 1822e62daaf6SKevin Wolf } else if (cur_bytes == 0) { 18232c3b32d2SKevin Wolf break; 182472424114SKevin Wolf } 182572424114SKevin Wolf 18260af729ecSKevin Wolf /* 18270af729ecSKevin Wolf * 3. If the request still hasn't completed, allocate new clusters, 18280af729ecSKevin Wolf * considering any cluster_offset of steps 1c or 2. 18290af729ecSKevin Wolf */ 1830710c2496SKevin Wolf ret = handle_alloc(bs, start, &cluster_offset, &cur_bytes, m); 1831037689d8SKevin Wolf if (ret < 0) { 1832037689d8SKevin Wolf return ret; 1833710c2496SKevin Wolf } else if (ret) { 1834ecdd5333SKevin Wolf continue; 18352c3b32d2SKevin Wolf } else { 18362c3b32d2SKevin Wolf assert(cur_bytes == 0); 18372c3b32d2SKevin Wolf break; 18382c3b32d2SKevin Wolf } 1839710c2496SKevin Wolf } 1840250196f1SKevin Wolf 1841d46a0bb2SKevin Wolf *bytes -= remaining; 1842d46a0bb2SKevin Wolf assert(*bytes > 0); 1843c6d619ccSKevin Wolf assert(*host_offset != INV_OFFSET); 184445aba42fSKevin Wolf 1845148da7eaSKevin Wolf return 0; 184645aba42fSKevin Wolf } 184745aba42fSKevin Wolf 18485ea929e3SKevin Wolf /* 18495ea929e3SKevin Wolf * This discards as many clusters of nb_clusters as possible at once (i.e. 185021ab3addSAlberto Garcia * all clusters in the same L2 slice) and returns the number of discarded 18515ea929e3SKevin Wolf * clusters. 18525ea929e3SKevin Wolf */ 185321ab3addSAlberto Garcia static int discard_in_l2_slice(BlockDriverState *bs, uint64_t offset, 185421ab3addSAlberto Garcia uint64_t nb_clusters, 185521ab3addSAlberto Garcia enum qcow2_discard_type type, bool full_discard) 18565ea929e3SKevin Wolf { 1857ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 185821ab3addSAlberto Garcia uint64_t *l2_slice; 18595ea929e3SKevin Wolf int l2_index; 18605ea929e3SKevin Wolf int ret; 18615ea929e3SKevin Wolf int i; 18625ea929e3SKevin Wolf 186321ab3addSAlberto Garcia ret = get_cluster_table(bs, offset, &l2_slice, &l2_index); 18645ea929e3SKevin Wolf if (ret < 0) { 18655ea929e3SKevin Wolf return ret; 18665ea929e3SKevin Wolf } 18675ea929e3SKevin Wolf 186821ab3addSAlberto Garcia /* Limit nb_clusters to one L2 slice */ 186921ab3addSAlberto Garcia nb_clusters = MIN(nb_clusters, s->l2_slice_size - l2_index); 1870b6d36defSMax Reitz assert(nb_clusters <= INT_MAX); 18715ea929e3SKevin Wolf 18725ea929e3SKevin Wolf for (i = 0; i < nb_clusters; i++) { 1873a68cd703SAlberto Garcia uint64_t old_l2_entry = get_l2_entry(s, l2_slice, l2_index + i); 1874a68cd703SAlberto Garcia uint64_t old_l2_bitmap = get_l2_bitmap(s, l2_slice, l2_index + i); 1875a68cd703SAlberto Garcia uint64_t new_l2_entry = old_l2_entry; 1876a68cd703SAlberto Garcia uint64_t new_l2_bitmap = old_l2_bitmap; 1877a68cd703SAlberto Garcia QCow2ClusterType cluster_type = 1878a68cd703SAlberto Garcia qcow2_get_cluster_type(bs, old_l2_entry); 1879a71835a0SKevin Wolf 1880a71835a0SKevin Wolf /* 1881a68cd703SAlberto Garcia * If full_discard is true, the cluster should not read back as zeroes, 1882a68cd703SAlberto Garcia * but rather fall through to the backing file. 1883a68cd703SAlberto Garcia * 1884808c4b6fSMax Reitz * If full_discard is false, make sure that a discarded area reads back 1885808c4b6fSMax Reitz * as zeroes for v3 images (we cannot do it for v2 without actually 1886808c4b6fSMax Reitz * writing a zero-filled buffer). We can skip the operation if the 1887808c4b6fSMax Reitz * cluster is already marked as zero, or if it's unallocated and we 1888808c4b6fSMax Reitz * don't have a backing file. 1889a71835a0SKevin Wolf * 1890237d78f8SEric Blake * TODO We might want to use bdrv_block_status(bs) here, but we're 1891a71835a0SKevin Wolf * holding s->lock, so that doesn't work today. 1892a71835a0SKevin Wolf */ 1893a68cd703SAlberto Garcia if (full_discard) { 1894a68cd703SAlberto Garcia new_l2_entry = new_l2_bitmap = 0; 1895a68cd703SAlberto Garcia } else if (bs->backing || qcow2_cluster_is_allocated(cluster_type)) { 1896a68cd703SAlberto Garcia if (has_subclusters(s)) { 1897a68cd703SAlberto Garcia new_l2_entry = 0; 1898a68cd703SAlberto Garcia new_l2_bitmap = QCOW_L2_BITMAP_ALL_ZEROES; 1899a68cd703SAlberto Garcia } else { 1900a68cd703SAlberto Garcia new_l2_entry = s->qcow_version >= 3 ? QCOW_OFLAG_ZERO : 0; 1901a71835a0SKevin Wolf } 1902808c4b6fSMax Reitz } 1903c883db0dSMax Reitz 1904a68cd703SAlberto Garcia if (old_l2_entry == new_l2_entry && old_l2_bitmap == new_l2_bitmap) { 1905a68cd703SAlberto Garcia continue; 19065ea929e3SKevin Wolf } 19075ea929e3SKevin Wolf 19085ea929e3SKevin Wolf /* First remove L2 entries */ 190921ab3addSAlberto Garcia qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice); 1910a68cd703SAlberto Garcia set_l2_entry(s, l2_slice, l2_index + i, new_l2_entry); 1911a68cd703SAlberto Garcia if (has_subclusters(s)) { 1912a68cd703SAlberto Garcia set_l2_bitmap(s, l2_slice, l2_index + i, new_l2_bitmap); 1913a71835a0SKevin Wolf } 19145ea929e3SKevin Wolf /* Then decrease the refcount */ 1915c883db0dSMax Reitz qcow2_free_any_clusters(bs, old_l2_entry, 1, type); 19165ea929e3SKevin Wolf } 19175ea929e3SKevin Wolf 191821ab3addSAlberto Garcia qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); 19195ea929e3SKevin Wolf 19205ea929e3SKevin Wolf return nb_clusters; 19215ea929e3SKevin Wolf } 19225ea929e3SKevin Wolf 1923d2cb36afSEric Blake int qcow2_cluster_discard(BlockDriverState *bs, uint64_t offset, 1924d2cb36afSEric Blake uint64_t bytes, enum qcow2_discard_type type, 1925d2cb36afSEric Blake bool full_discard) 19265ea929e3SKevin Wolf { 1927ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 1928d2cb36afSEric Blake uint64_t end_offset = offset + bytes; 1929b6d36defSMax Reitz uint64_t nb_clusters; 1930d2cb36afSEric Blake int64_t cleared; 19315ea929e3SKevin Wolf int ret; 19325ea929e3SKevin Wolf 1933f10ee139SEric Blake /* Caller must pass aligned values, except at image end */ 19340c1bd469SEric Blake assert(QEMU_IS_ALIGNED(offset, s->cluster_size)); 1935f10ee139SEric Blake assert(QEMU_IS_ALIGNED(end_offset, s->cluster_size) || 1936f10ee139SEric Blake end_offset == bs->total_sectors << BDRV_SECTOR_BITS); 19375ea929e3SKevin Wolf 1938d2cb36afSEric Blake nb_clusters = size_to_clusters(s, bytes); 19395ea929e3SKevin Wolf 19400b919faeSKevin Wolf s->cache_discards = true; 19410b919faeSKevin Wolf 194221ab3addSAlberto Garcia /* Each L2 slice is handled by its own loop iteration */ 19435ea929e3SKevin Wolf while (nb_clusters > 0) { 194421ab3addSAlberto Garcia cleared = discard_in_l2_slice(bs, offset, nb_clusters, type, 1945d2cb36afSEric Blake full_discard); 1946d2cb36afSEric Blake if (cleared < 0) { 1947d2cb36afSEric Blake ret = cleared; 19480b919faeSKevin Wolf goto fail; 19495ea929e3SKevin Wolf } 19505ea929e3SKevin Wolf 1951d2cb36afSEric Blake nb_clusters -= cleared; 1952d2cb36afSEric Blake offset += (cleared * s->cluster_size); 19535ea929e3SKevin Wolf } 19545ea929e3SKevin Wolf 19550b919faeSKevin Wolf ret = 0; 19560b919faeSKevin Wolf fail: 19570b919faeSKevin Wolf s->cache_discards = false; 19580b919faeSKevin Wolf qcow2_process_discards(bs, ret); 19590b919faeSKevin Wolf 19600b919faeSKevin Wolf return ret; 19615ea929e3SKevin Wolf } 1962621f0589SKevin Wolf 1963621f0589SKevin Wolf /* 1964621f0589SKevin Wolf * This zeroes as many clusters of nb_clusters as possible at once (i.e. 1965a9a9f8f0SAlberto Garcia * all clusters in the same L2 slice) and returns the number of zeroed 1966621f0589SKevin Wolf * clusters. 1967621f0589SKevin Wolf */ 1968a9a9f8f0SAlberto Garcia static int zero_in_l2_slice(BlockDriverState *bs, uint64_t offset, 1969170f4b2eSFam Zheng uint64_t nb_clusters, int flags) 1970621f0589SKevin Wolf { 1971ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 1972a9a9f8f0SAlberto Garcia uint64_t *l2_slice; 1973621f0589SKevin Wolf int l2_index; 1974621f0589SKevin Wolf int ret; 1975621f0589SKevin Wolf int i; 1976621f0589SKevin Wolf 1977a9a9f8f0SAlberto Garcia ret = get_cluster_table(bs, offset, &l2_slice, &l2_index); 1978621f0589SKevin Wolf if (ret < 0) { 1979621f0589SKevin Wolf return ret; 1980621f0589SKevin Wolf } 1981621f0589SKevin Wolf 1982a9a9f8f0SAlberto Garcia /* Limit nb_clusters to one L2 slice */ 1983a9a9f8f0SAlberto Garcia nb_clusters = MIN(nb_clusters, s->l2_slice_size - l2_index); 1984b6d36defSMax Reitz assert(nb_clusters <= INT_MAX); 1985621f0589SKevin Wolf 1986621f0589SKevin Wolf for (i = 0; i < nb_clusters; i++) { 1987205fa507SAlberto Garcia uint64_t old_l2_entry = get_l2_entry(s, l2_slice, l2_index + i); 1988205fa507SAlberto Garcia uint64_t old_l2_bitmap = get_l2_bitmap(s, l2_slice, l2_index + i); 1989205fa507SAlberto Garcia QCow2ClusterType type = qcow2_get_cluster_type(bs, old_l2_entry); 1990205fa507SAlberto Garcia bool unmap = (type == QCOW2_CLUSTER_COMPRESSED) || 1991205fa507SAlberto Garcia ((flags & BDRV_REQ_MAY_UNMAP) && qcow2_cluster_is_allocated(type)); 1992205fa507SAlberto Garcia uint64_t new_l2_entry = unmap ? 0 : old_l2_entry; 1993205fa507SAlberto Garcia uint64_t new_l2_bitmap = old_l2_bitmap; 1994621f0589SKevin Wolf 1995205fa507SAlberto Garcia if (has_subclusters(s)) { 1996205fa507SAlberto Garcia new_l2_bitmap = QCOW_L2_BITMAP_ALL_ZEROES; 1997205fa507SAlberto Garcia } else { 1998205fa507SAlberto Garcia new_l2_entry |= QCOW_OFLAG_ZERO; 1999205fa507SAlberto Garcia } 2000621f0589SKevin Wolf 2001205fa507SAlberto Garcia if (old_l2_entry == new_l2_entry && old_l2_bitmap == new_l2_bitmap) { 200206cc5e2bSEric Blake continue; 200306cc5e2bSEric Blake } 200406cc5e2bSEric Blake 2005a9a9f8f0SAlberto Garcia qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice); 2006205fa507SAlberto Garcia if (unmap) { 2007205fa507SAlberto Garcia qcow2_free_any_clusters(bs, old_l2_entry, 1, QCOW2_DISCARD_REQUEST); 2008205fa507SAlberto Garcia } 2009205fa507SAlberto Garcia set_l2_entry(s, l2_slice, l2_index + i, new_l2_entry); 2010205fa507SAlberto Garcia if (has_subclusters(s)) { 2011205fa507SAlberto Garcia set_l2_bitmap(s, l2_slice, l2_index + i, new_l2_bitmap); 2012621f0589SKevin Wolf } 2013621f0589SKevin Wolf } 2014621f0589SKevin Wolf 2015a9a9f8f0SAlberto Garcia qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); 2016621f0589SKevin Wolf 2017621f0589SKevin Wolf return nb_clusters; 2018621f0589SKevin Wolf } 2019621f0589SKevin Wolf 2020a6841a2dSAlberto Garcia static int zero_l2_subclusters(BlockDriverState *bs, uint64_t offset, 2021a6841a2dSAlberto Garcia unsigned nb_subclusters) 2022a6841a2dSAlberto Garcia { 2023a6841a2dSAlberto Garcia BDRVQcow2State *s = bs->opaque; 2024a6841a2dSAlberto Garcia uint64_t *l2_slice; 2025a6841a2dSAlberto Garcia uint64_t old_l2_bitmap, l2_bitmap; 2026a6841a2dSAlberto Garcia int l2_index, ret, sc = offset_to_sc_index(s, offset); 2027a6841a2dSAlberto Garcia 2028a6841a2dSAlberto Garcia /* For full clusters use zero_in_l2_slice() instead */ 2029a6841a2dSAlberto Garcia assert(nb_subclusters > 0 && nb_subclusters < s->subclusters_per_cluster); 2030a6841a2dSAlberto Garcia assert(sc + nb_subclusters <= s->subclusters_per_cluster); 2031a6841a2dSAlberto Garcia assert(offset_into_subcluster(s, offset) == 0); 2032a6841a2dSAlberto Garcia 2033a6841a2dSAlberto Garcia ret = get_cluster_table(bs, offset, &l2_slice, &l2_index); 2034a6841a2dSAlberto Garcia if (ret < 0) { 2035a6841a2dSAlberto Garcia return ret; 2036a6841a2dSAlberto Garcia } 2037a6841a2dSAlberto Garcia 2038a6841a2dSAlberto Garcia switch (qcow2_get_cluster_type(bs, get_l2_entry(s, l2_slice, l2_index))) { 2039a6841a2dSAlberto Garcia case QCOW2_CLUSTER_COMPRESSED: 2040a6841a2dSAlberto Garcia ret = -ENOTSUP; /* We cannot partially zeroize compressed clusters */ 2041a6841a2dSAlberto Garcia goto out; 2042a6841a2dSAlberto Garcia case QCOW2_CLUSTER_NORMAL: 2043a6841a2dSAlberto Garcia case QCOW2_CLUSTER_UNALLOCATED: 2044a6841a2dSAlberto Garcia break; 2045a6841a2dSAlberto Garcia default: 2046a6841a2dSAlberto Garcia g_assert_not_reached(); 2047a6841a2dSAlberto Garcia } 2048a6841a2dSAlberto Garcia 2049a6841a2dSAlberto Garcia old_l2_bitmap = l2_bitmap = get_l2_bitmap(s, l2_slice, l2_index); 2050a6841a2dSAlberto Garcia 2051a6841a2dSAlberto Garcia l2_bitmap |= QCOW_OFLAG_SUB_ZERO_RANGE(sc, sc + nb_subclusters); 2052a6841a2dSAlberto Garcia l2_bitmap &= ~QCOW_OFLAG_SUB_ALLOC_RANGE(sc, sc + nb_subclusters); 2053a6841a2dSAlberto Garcia 2054a6841a2dSAlberto Garcia if (old_l2_bitmap != l2_bitmap) { 2055a6841a2dSAlberto Garcia set_l2_bitmap(s, l2_slice, l2_index, l2_bitmap); 2056a6841a2dSAlberto Garcia qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice); 2057a6841a2dSAlberto Garcia } 2058a6841a2dSAlberto Garcia 2059a6841a2dSAlberto Garcia ret = 0; 2060a6841a2dSAlberto Garcia out: 2061a6841a2dSAlberto Garcia qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); 2062a6841a2dSAlberto Garcia 2063a6841a2dSAlberto Garcia return ret; 2064a6841a2dSAlberto Garcia } 2065a6841a2dSAlberto Garcia 2066a6841a2dSAlberto Garcia int qcow2_subcluster_zeroize(BlockDriverState *bs, uint64_t offset, 2067d2cb36afSEric Blake uint64_t bytes, int flags) 2068621f0589SKevin Wolf { 2069ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 2070d2cb36afSEric Blake uint64_t end_offset = offset + bytes; 2071b6d36defSMax Reitz uint64_t nb_clusters; 2072a6841a2dSAlberto Garcia unsigned head, tail; 2073d2cb36afSEric Blake int64_t cleared; 2074621f0589SKevin Wolf int ret; 2075621f0589SKevin Wolf 20766c3944dcSKevin Wolf /* If we have to stay in sync with an external data file, zero out 20776c3944dcSKevin Wolf * s->data_file first. */ 20786c3944dcSKevin Wolf if (data_file_is_raw(bs)) { 20796c3944dcSKevin Wolf assert(has_data_file(bs)); 20806c3944dcSKevin Wolf ret = bdrv_co_pwrite_zeroes(s->data_file, offset, bytes, flags); 20816c3944dcSKevin Wolf if (ret < 0) { 20826c3944dcSKevin Wolf return ret; 20836c3944dcSKevin Wolf } 20846c3944dcSKevin Wolf } 20856c3944dcSKevin Wolf 2086f10ee139SEric Blake /* Caller must pass aligned values, except at image end */ 2087a6841a2dSAlberto Garcia assert(offset_into_subcluster(s, offset) == 0); 2088a6841a2dSAlberto Garcia assert(offset_into_subcluster(s, end_offset) == 0 || 2089f01643fbSKevin Wolf end_offset >= bs->total_sectors << BDRV_SECTOR_BITS); 2090f10ee139SEric Blake 209161b30439SKevin Wolf /* 209261b30439SKevin Wolf * The zero flag is only supported by version 3 and newer. However, if we 209361b30439SKevin Wolf * have no backing file, we can resort to discard in version 2. 209461b30439SKevin Wolf */ 2095621f0589SKevin Wolf if (s->qcow_version < 3) { 209661b30439SKevin Wolf if (!bs->backing) { 209761b30439SKevin Wolf return qcow2_cluster_discard(bs, offset, bytes, 209861b30439SKevin Wolf QCOW2_DISCARD_REQUEST, false); 209961b30439SKevin Wolf } 2100621f0589SKevin Wolf return -ENOTSUP; 2101621f0589SKevin Wolf } 2102621f0589SKevin Wolf 2103a6841a2dSAlberto Garcia head = MIN(end_offset, ROUND_UP(offset, s->cluster_size)) - offset; 2104a6841a2dSAlberto Garcia offset += head; 2105a6841a2dSAlberto Garcia 2106a6841a2dSAlberto Garcia tail = (end_offset >= bs->total_sectors << BDRV_SECTOR_BITS) ? 0 : 2107a6841a2dSAlberto Garcia end_offset - MAX(offset, start_of_cluster(s, end_offset)); 2108a6841a2dSAlberto Garcia end_offset -= tail; 2109621f0589SKevin Wolf 21100b919faeSKevin Wolf s->cache_discards = true; 21110b919faeSKevin Wolf 2112a6841a2dSAlberto Garcia if (head) { 2113a6841a2dSAlberto Garcia ret = zero_l2_subclusters(bs, offset - head, 2114a6841a2dSAlberto Garcia size_to_subclusters(s, head)); 2115a6841a2dSAlberto Garcia if (ret < 0) { 2116a6841a2dSAlberto Garcia goto fail; 2117a6841a2dSAlberto Garcia } 2118a6841a2dSAlberto Garcia } 2119a6841a2dSAlberto Garcia 2120a6841a2dSAlberto Garcia /* Each L2 slice is handled by its own loop iteration */ 2121a6841a2dSAlberto Garcia nb_clusters = size_to_clusters(s, end_offset - offset); 2122a6841a2dSAlberto Garcia 2123621f0589SKevin Wolf while (nb_clusters > 0) { 2124a9a9f8f0SAlberto Garcia cleared = zero_in_l2_slice(bs, offset, nb_clusters, flags); 2125d2cb36afSEric Blake if (cleared < 0) { 2126d2cb36afSEric Blake ret = cleared; 21270b919faeSKevin Wolf goto fail; 2128621f0589SKevin Wolf } 2129621f0589SKevin Wolf 2130d2cb36afSEric Blake nb_clusters -= cleared; 2131d2cb36afSEric Blake offset += (cleared * s->cluster_size); 2132621f0589SKevin Wolf } 2133621f0589SKevin Wolf 2134a6841a2dSAlberto Garcia if (tail) { 2135a6841a2dSAlberto Garcia ret = zero_l2_subclusters(bs, end_offset, size_to_subclusters(s, tail)); 2136a6841a2dSAlberto Garcia if (ret < 0) { 2137a6841a2dSAlberto Garcia goto fail; 2138a6841a2dSAlberto Garcia } 2139a6841a2dSAlberto Garcia } 2140a6841a2dSAlberto Garcia 21410b919faeSKevin Wolf ret = 0; 21420b919faeSKevin Wolf fail: 21430b919faeSKevin Wolf s->cache_discards = false; 21440b919faeSKevin Wolf qcow2_process_discards(bs, ret); 21450b919faeSKevin Wolf 21460b919faeSKevin Wolf return ret; 2147621f0589SKevin Wolf } 214832b6444dSMax Reitz 214932b6444dSMax Reitz /* 215032b6444dSMax Reitz * Expands all zero clusters in a specific L1 table (or deallocates them, for 215132b6444dSMax Reitz * non-backed non-pre-allocated zero clusters). 215232b6444dSMax Reitz * 21534057a2b2SMax Reitz * l1_entries and *visited_l1_entries are used to keep track of progress for 21544057a2b2SMax Reitz * status_cb(). l1_entries contains the total number of L1 entries and 21554057a2b2SMax Reitz * *visited_l1_entries counts all visited L1 entries. 215632b6444dSMax Reitz */ 215732b6444dSMax Reitz static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table, 2158ecf58777SMax Reitz int l1_size, int64_t *visited_l1_entries, 21594057a2b2SMax Reitz int64_t l1_entries, 21608b13976dSMax Reitz BlockDriverAmendStatusCB *status_cb, 21618b13976dSMax Reitz void *cb_opaque) 216232b6444dSMax Reitz { 2163ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 216432b6444dSMax Reitz bool is_active_l1 = (l1_table == s->l1_table); 2165415184f5SAlberto Garcia uint64_t *l2_slice = NULL; 2166415184f5SAlberto Garcia unsigned slice, slice_size2, n_slices; 216732b6444dSMax Reitz int ret; 216832b6444dSMax Reitz int i, j; 216932b6444dSMax Reitz 21707bbb5920SAlberto Garcia /* qcow2_downgrade() is not allowed in images with subclusters */ 21717bbb5920SAlberto Garcia assert(!has_subclusters(s)); 21727bbb5920SAlberto Garcia 2173c8fd8554SAlberto Garcia slice_size2 = s->l2_slice_size * l2_entry_size(s); 2174415184f5SAlberto Garcia n_slices = s->cluster_size / slice_size2; 2175415184f5SAlberto Garcia 217632b6444dSMax Reitz if (!is_active_l1) { 217732b6444dSMax Reitz /* inactive L2 tables require a buffer to be stored in when loading 217832b6444dSMax Reitz * them from disk */ 2179415184f5SAlberto Garcia l2_slice = qemu_try_blockalign(bs->file->bs, slice_size2); 2180415184f5SAlberto Garcia if (l2_slice == NULL) { 2181de82815dSKevin Wolf return -ENOMEM; 2182de82815dSKevin Wolf } 218332b6444dSMax Reitz } 218432b6444dSMax Reitz 218532b6444dSMax Reitz for (i = 0; i < l1_size; i++) { 218632b6444dSMax Reitz uint64_t l2_offset = l1_table[i] & L1E_OFFSET_MASK; 21870e06528eSMax Reitz uint64_t l2_refcount; 218832b6444dSMax Reitz 218932b6444dSMax Reitz if (!l2_offset) { 219032b6444dSMax Reitz /* unallocated */ 21914057a2b2SMax Reitz (*visited_l1_entries)++; 21924057a2b2SMax Reitz if (status_cb) { 21938b13976dSMax Reitz status_cb(bs, *visited_l1_entries, l1_entries, cb_opaque); 21944057a2b2SMax Reitz } 219532b6444dSMax Reitz continue; 219632b6444dSMax Reitz } 219732b6444dSMax Reitz 21988dd93d93SMax Reitz if (offset_into_cluster(s, l2_offset)) { 21998dd93d93SMax Reitz qcow2_signal_corruption(bs, true, -1, -1, "L2 table offset %#" 22008dd93d93SMax Reitz PRIx64 " unaligned (L1 index: %#x)", 22018dd93d93SMax Reitz l2_offset, i); 22028dd93d93SMax Reitz ret = -EIO; 22038dd93d93SMax Reitz goto fail; 22048dd93d93SMax Reitz } 22058dd93d93SMax Reitz 22069b765486SAlberto Garcia ret = qcow2_get_refcount(bs, l2_offset >> s->cluster_bits, 22079b765486SAlberto Garcia &l2_refcount); 22089b765486SAlberto Garcia if (ret < 0) { 22099b765486SAlberto Garcia goto fail; 22109b765486SAlberto Garcia } 22119b765486SAlberto Garcia 2212415184f5SAlberto Garcia for (slice = 0; slice < n_slices; slice++) { 2213415184f5SAlberto Garcia uint64_t slice_offset = l2_offset + slice * slice_size2; 2214415184f5SAlberto Garcia bool l2_dirty = false; 221532b6444dSMax Reitz if (is_active_l1) { 221632b6444dSMax Reitz /* get active L2 tables from cache */ 2217415184f5SAlberto Garcia ret = qcow2_cache_get(bs, s->l2_table_cache, slice_offset, 2218415184f5SAlberto Garcia (void **)&l2_slice); 221932b6444dSMax Reitz } else { 222032b6444dSMax Reitz /* load inactive L2 tables from disk */ 2221415184f5SAlberto Garcia ret = bdrv_pread(bs->file, slice_offset, l2_slice, slice_size2); 222232b6444dSMax Reitz } 222332b6444dSMax Reitz if (ret < 0) { 222432b6444dSMax Reitz goto fail; 222532b6444dSMax Reitz } 222632b6444dSMax Reitz 2227415184f5SAlberto Garcia for (j = 0; j < s->l2_slice_size; j++) { 222812c6aebeSAlberto Garcia uint64_t l2_entry = get_l2_entry(s, l2_slice, j); 2229ecf58777SMax Reitz int64_t offset = l2_entry & L2E_OFFSET_MASK; 2230226494ffSAlberto Garcia QCow2ClusterType cluster_type = 2231808c2bb4SKevin Wolf qcow2_get_cluster_type(bs, l2_entry); 223232b6444dSMax Reitz 2233fdfab37dSEric Blake if (cluster_type != QCOW2_CLUSTER_ZERO_PLAIN && 2234fdfab37dSEric Blake cluster_type != QCOW2_CLUSTER_ZERO_ALLOC) { 223532b6444dSMax Reitz continue; 223632b6444dSMax Reitz } 223732b6444dSMax Reitz 2238fdfab37dSEric Blake if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) { 2239760e0063SKevin Wolf if (!bs->backing) { 22407bbb5920SAlberto Garcia /* 22417bbb5920SAlberto Garcia * not backed; therefore we can simply deallocate the 22427bbb5920SAlberto Garcia * cluster. No need to call set_l2_bitmap(), this 22437bbb5920SAlberto Garcia * function doesn't support images with subclusters. 22447bbb5920SAlberto Garcia */ 224512c6aebeSAlberto Garcia set_l2_entry(s, l2_slice, j, 0); 224632b6444dSMax Reitz l2_dirty = true; 224732b6444dSMax Reitz continue; 224832b6444dSMax Reitz } 224932b6444dSMax Reitz 225032b6444dSMax Reitz offset = qcow2_alloc_clusters(bs, s->cluster_size); 225132b6444dSMax Reitz if (offset < 0) { 225232b6444dSMax Reitz ret = offset; 225332b6444dSMax Reitz goto fail; 225432b6444dSMax Reitz } 2255ecf58777SMax Reitz 22563a75a870SAlberto Garcia /* The offset must fit in the offset field */ 22573a75a870SAlberto Garcia assert((offset & L2E_OFFSET_MASK) == offset); 22583a75a870SAlberto Garcia 2259ecf58777SMax Reitz if (l2_refcount > 1) { 2260226494ffSAlberto Garcia /* For shared L2 tables, set the refcount accordingly 2261226494ffSAlberto Garcia * (it is already 1 and needs to be l2_refcount) */ 2262226494ffSAlberto Garcia ret = qcow2_update_cluster_refcount( 2263226494ffSAlberto Garcia bs, offset >> s->cluster_bits, 22642aabe7c7SMax Reitz refcount_diff(1, l2_refcount), false, 2265ecf58777SMax Reitz QCOW2_DISCARD_OTHER); 2266ecf58777SMax Reitz if (ret < 0) { 2267ecf58777SMax Reitz qcow2_free_clusters(bs, offset, s->cluster_size, 2268ecf58777SMax Reitz QCOW2_DISCARD_OTHER); 2269ecf58777SMax Reitz goto fail; 2270ecf58777SMax Reitz } 2271ecf58777SMax Reitz } 227232b6444dSMax Reitz } 227332b6444dSMax Reitz 22748dd93d93SMax Reitz if (offset_into_cluster(s, offset)) { 2275415184f5SAlberto Garcia int l2_index = slice * s->l2_slice_size + j; 2276226494ffSAlberto Garcia qcow2_signal_corruption( 2277226494ffSAlberto Garcia bs, true, -1, -1, 2278bcb07dbaSEric Blake "Cluster allocation offset " 22798dd93d93SMax Reitz "%#" PRIx64 " unaligned (L2 offset: %#" 22808dd93d93SMax Reitz PRIx64 ", L2 index: %#x)", offset, 2281415184f5SAlberto Garcia l2_offset, l2_index); 2282fdfab37dSEric Blake if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) { 22838dd93d93SMax Reitz qcow2_free_clusters(bs, offset, s->cluster_size, 22848dd93d93SMax Reitz QCOW2_DISCARD_ALWAYS); 22858dd93d93SMax Reitz } 22868dd93d93SMax Reitz ret = -EIO; 22878dd93d93SMax Reitz goto fail; 22888dd93d93SMax Reitz } 22898dd93d93SMax Reitz 2290226494ffSAlberto Garcia ret = qcow2_pre_write_overlap_check(bs, 0, offset, 2291966b000fSKevin Wolf s->cluster_size, true); 229232b6444dSMax Reitz if (ret < 0) { 2293fdfab37dSEric Blake if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) { 229432b6444dSMax Reitz qcow2_free_clusters(bs, offset, s->cluster_size, 229532b6444dSMax Reitz QCOW2_DISCARD_ALWAYS); 2296320c7066SMax Reitz } 229732b6444dSMax Reitz goto fail; 229832b6444dSMax Reitz } 229932b6444dSMax Reitz 2300966b000fSKevin Wolf ret = bdrv_pwrite_zeroes(s->data_file, offset, 2301966b000fSKevin Wolf s->cluster_size, 0); 230232b6444dSMax Reitz if (ret < 0) { 2303fdfab37dSEric Blake if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) { 230432b6444dSMax Reitz qcow2_free_clusters(bs, offset, s->cluster_size, 230532b6444dSMax Reitz QCOW2_DISCARD_ALWAYS); 2306320c7066SMax Reitz } 230732b6444dSMax Reitz goto fail; 230832b6444dSMax Reitz } 230932b6444dSMax Reitz 2310ecf58777SMax Reitz if (l2_refcount == 1) { 231112c6aebeSAlberto Garcia set_l2_entry(s, l2_slice, j, offset | QCOW_OFLAG_COPIED); 2312ecf58777SMax Reitz } else { 231312c6aebeSAlberto Garcia set_l2_entry(s, l2_slice, j, offset); 2314e390cf5aSMax Reitz } 23157bbb5920SAlberto Garcia /* 23167bbb5920SAlberto Garcia * No need to call set_l2_bitmap() after set_l2_entry() because 23177bbb5920SAlberto Garcia * this function doesn't support images with subclusters. 23187bbb5920SAlberto Garcia */ 2319ecf58777SMax Reitz l2_dirty = true; 232032b6444dSMax Reitz } 232132b6444dSMax Reitz 232232b6444dSMax Reitz if (is_active_l1) { 232332b6444dSMax Reitz if (l2_dirty) { 2324415184f5SAlberto Garcia qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice); 232532b6444dSMax Reitz qcow2_cache_depends_on_flush(s->l2_table_cache); 232632b6444dSMax Reitz } 2327415184f5SAlberto Garcia qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); 232832b6444dSMax Reitz } else { 232932b6444dSMax Reitz if (l2_dirty) { 2330226494ffSAlberto Garcia ret = qcow2_pre_write_overlap_check( 2331226494ffSAlberto Garcia bs, QCOW2_OL_INACTIVE_L2 | QCOW2_OL_ACTIVE_L2, 2332966b000fSKevin Wolf slice_offset, slice_size2, false); 233332b6444dSMax Reitz if (ret < 0) { 233432b6444dSMax Reitz goto fail; 233532b6444dSMax Reitz } 233632b6444dSMax Reitz 2337415184f5SAlberto Garcia ret = bdrv_pwrite(bs->file, slice_offset, 2338415184f5SAlberto Garcia l2_slice, slice_size2); 233932b6444dSMax Reitz if (ret < 0) { 234032b6444dSMax Reitz goto fail; 234132b6444dSMax Reitz } 234232b6444dSMax Reitz } 234332b6444dSMax Reitz } 2344226494ffSAlberto Garcia } 23454057a2b2SMax Reitz 23464057a2b2SMax Reitz (*visited_l1_entries)++; 23474057a2b2SMax Reitz if (status_cb) { 23488b13976dSMax Reitz status_cb(bs, *visited_l1_entries, l1_entries, cb_opaque); 23494057a2b2SMax Reitz } 235032b6444dSMax Reitz } 235132b6444dSMax Reitz 235232b6444dSMax Reitz ret = 0; 235332b6444dSMax Reitz 235432b6444dSMax Reitz fail: 2355415184f5SAlberto Garcia if (l2_slice) { 235632b6444dSMax Reitz if (!is_active_l1) { 2357415184f5SAlberto Garcia qemu_vfree(l2_slice); 235832b6444dSMax Reitz } else { 2359415184f5SAlberto Garcia qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); 236032b6444dSMax Reitz } 236132b6444dSMax Reitz } 236232b6444dSMax Reitz return ret; 236332b6444dSMax Reitz } 236432b6444dSMax Reitz 236532b6444dSMax Reitz /* 236632b6444dSMax Reitz * For backed images, expands all zero clusters on the image. For non-backed 236732b6444dSMax Reitz * images, deallocates all non-pre-allocated zero clusters (and claims the 236832b6444dSMax Reitz * allocation for pre-allocated ones). This is important for downgrading to a 236932b6444dSMax Reitz * qcow2 version which doesn't yet support metadata zero clusters. 237032b6444dSMax Reitz */ 23714057a2b2SMax Reitz int qcow2_expand_zero_clusters(BlockDriverState *bs, 23728b13976dSMax Reitz BlockDriverAmendStatusCB *status_cb, 23738b13976dSMax Reitz void *cb_opaque) 237432b6444dSMax Reitz { 2375ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 237632b6444dSMax Reitz uint64_t *l1_table = NULL; 23774057a2b2SMax Reitz int64_t l1_entries = 0, visited_l1_entries = 0; 237832b6444dSMax Reitz int ret; 237932b6444dSMax Reitz int i, j; 238032b6444dSMax Reitz 23814057a2b2SMax Reitz if (status_cb) { 23824057a2b2SMax Reitz l1_entries = s->l1_size; 23834057a2b2SMax Reitz for (i = 0; i < s->nb_snapshots; i++) { 23844057a2b2SMax Reitz l1_entries += s->snapshots[i].l1_size; 23854057a2b2SMax Reitz } 23864057a2b2SMax Reitz } 23874057a2b2SMax Reitz 238832b6444dSMax Reitz ret = expand_zero_clusters_in_l1(bs, s->l1_table, s->l1_size, 23894057a2b2SMax Reitz &visited_l1_entries, l1_entries, 23908b13976dSMax Reitz status_cb, cb_opaque); 239132b6444dSMax Reitz if (ret < 0) { 239232b6444dSMax Reitz goto fail; 239332b6444dSMax Reitz } 239432b6444dSMax Reitz 239532b6444dSMax Reitz /* Inactive L1 tables may point to active L2 tables - therefore it is 239632b6444dSMax Reitz * necessary to flush the L2 table cache before trying to access the L2 239732b6444dSMax Reitz * tables pointed to by inactive L1 entries (else we might try to expand 239832b6444dSMax Reitz * zero clusters that have already been expanded); furthermore, it is also 239932b6444dSMax Reitz * necessary to empty the L2 table cache, since it may contain tables which 240032b6444dSMax Reitz * are now going to be modified directly on disk, bypassing the cache. 240132b6444dSMax Reitz * qcow2_cache_empty() does both for us. */ 240232b6444dSMax Reitz ret = qcow2_cache_empty(bs, s->l2_table_cache); 240332b6444dSMax Reitz if (ret < 0) { 240432b6444dSMax Reitz goto fail; 240532b6444dSMax Reitz } 240632b6444dSMax Reitz 240732b6444dSMax Reitz for (i = 0; i < s->nb_snapshots; i++) { 2408c9a442e4SAlberto Garcia int l1_size2; 2409c9a442e4SAlberto Garcia uint64_t *new_l1_table; 2410c9a442e4SAlberto Garcia Error *local_err = NULL; 241132b6444dSMax Reitz 2412c9a442e4SAlberto Garcia ret = qcow2_validate_table(bs, s->snapshots[i].l1_table_offset, 2413c9a442e4SAlberto Garcia s->snapshots[i].l1_size, sizeof(uint64_t), 2414c9a442e4SAlberto Garcia QCOW_MAX_L1_SIZE, "Snapshot L1 table", 2415c9a442e4SAlberto Garcia &local_err); 2416c9a442e4SAlberto Garcia if (ret < 0) { 2417c9a442e4SAlberto Garcia error_report_err(local_err); 2418c9a442e4SAlberto Garcia goto fail; 2419c9a442e4SAlberto Garcia } 2420c9a442e4SAlberto Garcia 2421c9a442e4SAlberto Garcia l1_size2 = s->snapshots[i].l1_size * sizeof(uint64_t); 2422c9a442e4SAlberto Garcia new_l1_table = g_try_realloc(l1_table, l1_size2); 2423de7269d2SAlberto Garcia 2424de7269d2SAlberto Garcia if (!new_l1_table) { 2425de7269d2SAlberto Garcia ret = -ENOMEM; 2426de7269d2SAlberto Garcia goto fail; 2427de7269d2SAlberto Garcia } 2428de7269d2SAlberto Garcia 2429de7269d2SAlberto Garcia l1_table = new_l1_table; 243032b6444dSMax Reitz 2431c9a442e4SAlberto Garcia ret = bdrv_pread(bs->file, s->snapshots[i].l1_table_offset, 2432c9a442e4SAlberto Garcia l1_table, l1_size2); 243332b6444dSMax Reitz if (ret < 0) { 243432b6444dSMax Reitz goto fail; 243532b6444dSMax Reitz } 243632b6444dSMax Reitz 243732b6444dSMax Reitz for (j = 0; j < s->snapshots[i].l1_size; j++) { 243832b6444dSMax Reitz be64_to_cpus(&l1_table[j]); 243932b6444dSMax Reitz } 244032b6444dSMax Reitz 244132b6444dSMax Reitz ret = expand_zero_clusters_in_l1(bs, l1_table, s->snapshots[i].l1_size, 24424057a2b2SMax Reitz &visited_l1_entries, l1_entries, 24438b13976dSMax Reitz status_cb, cb_opaque); 244432b6444dSMax Reitz if (ret < 0) { 244532b6444dSMax Reitz goto fail; 244632b6444dSMax Reitz } 244732b6444dSMax Reitz } 244832b6444dSMax Reitz 244932b6444dSMax Reitz ret = 0; 245032b6444dSMax Reitz 245132b6444dSMax Reitz fail: 245232b6444dSMax Reitz g_free(l1_table); 245332b6444dSMax Reitz return ret; 245432b6444dSMax Reitz } 2455