145aba42fSKevin Wolf /* 245aba42fSKevin Wolf * Block driver for the QCOW version 2 format 345aba42fSKevin Wolf * 445aba42fSKevin Wolf * Copyright (c) 2004-2006 Fabrice Bellard 545aba42fSKevin Wolf * 645aba42fSKevin Wolf * Permission is hereby granted, free of charge, to any person obtaining a copy 745aba42fSKevin Wolf * of this software and associated documentation files (the "Software"), to deal 845aba42fSKevin Wolf * in the Software without restriction, including without limitation the rights 945aba42fSKevin Wolf * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 1045aba42fSKevin Wolf * copies of the Software, and to permit persons to whom the Software is 1145aba42fSKevin Wolf * furnished to do so, subject to the following conditions: 1245aba42fSKevin Wolf * 1345aba42fSKevin Wolf * The above copyright notice and this permission notice shall be included in 1445aba42fSKevin Wolf * all copies or substantial portions of the Software. 1545aba42fSKevin Wolf * 1645aba42fSKevin Wolf * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1745aba42fSKevin Wolf * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1845aba42fSKevin Wolf * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1945aba42fSKevin Wolf * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 2045aba42fSKevin Wolf * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 2145aba42fSKevin Wolf * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 2245aba42fSKevin Wolf * THE SOFTWARE. 2345aba42fSKevin Wolf */ 2445aba42fSKevin Wolf 2545aba42fSKevin Wolf #include <zlib.h> 2645aba42fSKevin Wolf 2745aba42fSKevin Wolf #include "qemu-common.h" 2845aba42fSKevin Wolf #include "block_int.h" 2945aba42fSKevin Wolf #include "block/qcow2.h" 3045aba42fSKevin Wolf 3172893756SStefan Hajnoczi int qcow2_grow_l1_table(BlockDriverState *bs, int min_size, bool exact_size) 3245aba42fSKevin Wolf { 3345aba42fSKevin Wolf BDRVQcowState *s = bs->opaque; 3445aba42fSKevin Wolf int new_l1_size, new_l1_size2, ret, i; 3545aba42fSKevin Wolf uint64_t *new_l1_table; 365d757b56SKevin Wolf int64_t new_l1_table_offset; 3745aba42fSKevin Wolf uint8_t data[12]; 3845aba42fSKevin Wolf 3972893756SStefan Hajnoczi if (min_size <= s->l1_size) 4045aba42fSKevin Wolf return 0; 4172893756SStefan Hajnoczi 4272893756SStefan Hajnoczi if (exact_size) { 4372893756SStefan Hajnoczi new_l1_size = min_size; 4472893756SStefan Hajnoczi } else { 4572893756SStefan Hajnoczi /* Bump size up to reduce the number of times we have to grow */ 4672893756SStefan Hajnoczi new_l1_size = s->l1_size; 47d191d12dSStefan Weil if (new_l1_size == 0) { 48d191d12dSStefan Weil new_l1_size = 1; 49d191d12dSStefan Weil } 5045aba42fSKevin Wolf while (min_size > new_l1_size) { 5145aba42fSKevin Wolf new_l1_size = (new_l1_size * 3 + 1) / 2; 5245aba42fSKevin Wolf } 5372893756SStefan Hajnoczi } 5472893756SStefan Hajnoczi 5545aba42fSKevin Wolf #ifdef DEBUG_ALLOC2 5635ee5e39SFrediano Ziglio fprintf(stderr, "grow l1_table from %d to %d\n", s->l1_size, new_l1_size); 5745aba42fSKevin Wolf #endif 5845aba42fSKevin Wolf 5945aba42fSKevin Wolf new_l1_size2 = sizeof(uint64_t) * new_l1_size; 607267c094SAnthony Liguori new_l1_table = g_malloc0(align_offset(new_l1_size2, 512)); 6145aba42fSKevin Wolf memcpy(new_l1_table, s->l1_table, s->l1_size * sizeof(uint64_t)); 6245aba42fSKevin Wolf 6345aba42fSKevin Wolf /* write new table (align to cluster) */ 6466f82ceeSKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ALLOC_TABLE); 65ed6ccf0fSKevin Wolf new_l1_table_offset = qcow2_alloc_clusters(bs, new_l1_size2); 665d757b56SKevin Wolf if (new_l1_table_offset < 0) { 677267c094SAnthony Liguori g_free(new_l1_table); 685d757b56SKevin Wolf return new_l1_table_offset; 695d757b56SKevin Wolf } 7029c1a730SKevin Wolf 7129c1a730SKevin Wolf ret = qcow2_cache_flush(bs, s->refcount_block_cache); 7229c1a730SKevin Wolf if (ret < 0) { 7380fa3341SKevin Wolf goto fail; 7429c1a730SKevin Wolf } 7545aba42fSKevin Wolf 7666f82ceeSKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_WRITE_TABLE); 7745aba42fSKevin Wolf for(i = 0; i < s->l1_size; i++) 7845aba42fSKevin Wolf new_l1_table[i] = cpu_to_be64(new_l1_table[i]); 798b3b7206SKevin Wolf ret = bdrv_pwrite_sync(bs->file, new_l1_table_offset, new_l1_table, new_l1_size2); 808b3b7206SKevin Wolf if (ret < 0) 8145aba42fSKevin Wolf goto fail; 8245aba42fSKevin Wolf for(i = 0; i < s->l1_size; i++) 8345aba42fSKevin Wolf new_l1_table[i] = be64_to_cpu(new_l1_table[i]); 8445aba42fSKevin Wolf 8545aba42fSKevin Wolf /* set new table */ 8666f82ceeSKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ACTIVATE_TABLE); 8745aba42fSKevin Wolf cpu_to_be32w((uint32_t*)data, new_l1_size); 88653df36bSAurelien Jarno cpu_to_be64wu((uint64_t*)(data + 4), new_l1_table_offset); 898b3b7206SKevin Wolf ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_size), data,sizeof(data)); 908b3b7206SKevin Wolf if (ret < 0) { 9145aba42fSKevin Wolf goto fail; 92fb8fa77cSKevin Wolf } 937267c094SAnthony Liguori g_free(s->l1_table); 94ed6ccf0fSKevin Wolf qcow2_free_clusters(bs, s->l1_table_offset, s->l1_size * sizeof(uint64_t)); 9545aba42fSKevin Wolf s->l1_table_offset = new_l1_table_offset; 9645aba42fSKevin Wolf s->l1_table = new_l1_table; 9745aba42fSKevin Wolf s->l1_size = new_l1_size; 9845aba42fSKevin Wolf return 0; 9945aba42fSKevin Wolf fail: 1007267c094SAnthony Liguori g_free(new_l1_table); 101fb8fa77cSKevin Wolf qcow2_free_clusters(bs, new_l1_table_offset, new_l1_size2); 1028b3b7206SKevin Wolf return ret; 10345aba42fSKevin Wolf } 10445aba42fSKevin Wolf 10545aba42fSKevin Wolf /* 10645aba42fSKevin Wolf * l2_load 10745aba42fSKevin Wolf * 10845aba42fSKevin Wolf * Loads a L2 table into memory. If the table is in the cache, the cache 10945aba42fSKevin Wolf * is used; otherwise the L2 table is loaded from the image file. 11045aba42fSKevin Wolf * 11145aba42fSKevin Wolf * Returns a pointer to the L2 table on success, or NULL if the read from 11245aba42fSKevin Wolf * the image file failed. 11345aba42fSKevin Wolf */ 11445aba42fSKevin Wolf 11555c17e98SKevin Wolf static int l2_load(BlockDriverState *bs, uint64_t l2_offset, 11655c17e98SKevin Wolf uint64_t **l2_table) 11745aba42fSKevin Wolf { 11845aba42fSKevin Wolf BDRVQcowState *s = bs->opaque; 11955c17e98SKevin Wolf int ret; 12045aba42fSKevin Wolf 12129c1a730SKevin Wolf ret = qcow2_cache_get(bs, s->l2_table_cache, l2_offset, (void**) l2_table); 12245aba42fSKevin Wolf 12355c17e98SKevin Wolf return ret; 12455c17e98SKevin Wolf } 12555c17e98SKevin Wolf 12645aba42fSKevin Wolf /* 1276583e3c7SKevin Wolf * Writes one sector of the L1 table to the disk (can't update single entries 1286583e3c7SKevin Wolf * and we really don't want bdrv_pread to perform a read-modify-write) 1296583e3c7SKevin Wolf */ 1306583e3c7SKevin Wolf #define L1_ENTRIES_PER_SECTOR (512 / 8) 13166f82ceeSKevin Wolf static int write_l1_entry(BlockDriverState *bs, int l1_index) 1326583e3c7SKevin Wolf { 13366f82ceeSKevin Wolf BDRVQcowState *s = bs->opaque; 1346583e3c7SKevin Wolf uint64_t buf[L1_ENTRIES_PER_SECTOR]; 1356583e3c7SKevin Wolf int l1_start_index; 136f7defcb6SKevin Wolf int i, ret; 1376583e3c7SKevin Wolf 1386583e3c7SKevin Wolf l1_start_index = l1_index & ~(L1_ENTRIES_PER_SECTOR - 1); 1396583e3c7SKevin Wolf for (i = 0; i < L1_ENTRIES_PER_SECTOR; i++) { 1406583e3c7SKevin Wolf buf[i] = cpu_to_be64(s->l1_table[l1_start_index + i]); 1416583e3c7SKevin Wolf } 1426583e3c7SKevin Wolf 14366f82ceeSKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE); 1448b3b7206SKevin Wolf ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset + 8 * l1_start_index, 145f7defcb6SKevin Wolf buf, sizeof(buf)); 146f7defcb6SKevin Wolf if (ret < 0) { 147f7defcb6SKevin Wolf return ret; 1486583e3c7SKevin Wolf } 1496583e3c7SKevin Wolf 1506583e3c7SKevin Wolf return 0; 1516583e3c7SKevin Wolf } 1526583e3c7SKevin Wolf 1536583e3c7SKevin Wolf /* 15445aba42fSKevin Wolf * l2_allocate 15545aba42fSKevin Wolf * 15645aba42fSKevin Wolf * Allocate a new l2 entry in the file. If l1_index points to an already 15745aba42fSKevin Wolf * used entry in the L2 table (i.e. we are doing a copy on write for the L2 15845aba42fSKevin Wolf * table) copy the contents of the old L2 table into the newly allocated one. 15945aba42fSKevin Wolf * Otherwise the new table is initialized with zeros. 16045aba42fSKevin Wolf * 16145aba42fSKevin Wolf */ 16245aba42fSKevin Wolf 163c46e1167SKevin Wolf static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table) 16445aba42fSKevin Wolf { 16545aba42fSKevin Wolf BDRVQcowState *s = bs->opaque; 1666583e3c7SKevin Wolf uint64_t old_l2_offset; 167f4f0d391SKevin Wolf uint64_t *l2_table; 168f4f0d391SKevin Wolf int64_t l2_offset; 169c46e1167SKevin Wolf int ret; 17045aba42fSKevin Wolf 17145aba42fSKevin Wolf old_l2_offset = s->l1_table[l1_index]; 17245aba42fSKevin Wolf 17345aba42fSKevin Wolf /* allocate a new l2 entry */ 17445aba42fSKevin Wolf 175ed6ccf0fSKevin Wolf l2_offset = qcow2_alloc_clusters(bs, s->l2_size * sizeof(uint64_t)); 1765d757b56SKevin Wolf if (l2_offset < 0) { 177c46e1167SKevin Wolf return l2_offset; 1785d757b56SKevin Wolf } 17929c1a730SKevin Wolf 18029c1a730SKevin Wolf ret = qcow2_cache_flush(bs, s->refcount_block_cache); 18129c1a730SKevin Wolf if (ret < 0) { 18229c1a730SKevin Wolf goto fail; 18329c1a730SKevin Wolf } 18445aba42fSKevin Wolf 18545aba42fSKevin Wolf /* allocate a new entry in the l2 cache */ 18645aba42fSKevin Wolf 18729c1a730SKevin Wolf ret = qcow2_cache_get_empty(bs, s->l2_table_cache, l2_offset, (void**) table); 18829c1a730SKevin Wolf if (ret < 0) { 18929c1a730SKevin Wolf return ret; 19029c1a730SKevin Wolf } 19129c1a730SKevin Wolf 19229c1a730SKevin Wolf l2_table = *table; 19345aba42fSKevin Wolf 19445aba42fSKevin Wolf if (old_l2_offset == 0) { 19545aba42fSKevin Wolf /* if there was no old l2 table, clear the new table */ 19645aba42fSKevin Wolf memset(l2_table, 0, s->l2_size * sizeof(uint64_t)); 19745aba42fSKevin Wolf } else { 19829c1a730SKevin Wolf uint64_t* old_table; 19929c1a730SKevin Wolf 20045aba42fSKevin Wolf /* if there was an old l2 table, read it from the disk */ 20166f82ceeSKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_COW_READ); 20229c1a730SKevin Wolf ret = qcow2_cache_get(bs, s->l2_table_cache, old_l2_offset, 20329c1a730SKevin Wolf (void**) &old_table); 20429c1a730SKevin Wolf if (ret < 0) { 20529c1a730SKevin Wolf goto fail; 20629c1a730SKevin Wolf } 20729c1a730SKevin Wolf 20829c1a730SKevin Wolf memcpy(l2_table, old_table, s->cluster_size); 20929c1a730SKevin Wolf 21029c1a730SKevin Wolf ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &old_table); 211c46e1167SKevin Wolf if (ret < 0) { 212175e1152SKevin Wolf goto fail; 213c46e1167SKevin Wolf } 21445aba42fSKevin Wolf } 21529c1a730SKevin Wolf 21645aba42fSKevin Wolf /* write the l2 table to the file */ 21766f82ceeSKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_WRITE); 21829c1a730SKevin Wolf 21929c1a730SKevin Wolf qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table); 22029c1a730SKevin Wolf ret = qcow2_cache_flush(bs, s->l2_table_cache); 221c46e1167SKevin Wolf if (ret < 0) { 222175e1152SKevin Wolf goto fail; 223175e1152SKevin Wolf } 224175e1152SKevin Wolf 225175e1152SKevin Wolf /* update the L1 entry */ 226175e1152SKevin Wolf s->l1_table[l1_index] = l2_offset | QCOW_OFLAG_COPIED; 227175e1152SKevin Wolf ret = write_l1_entry(bs, l1_index); 228175e1152SKevin Wolf if (ret < 0) { 229175e1152SKevin Wolf goto fail; 230c46e1167SKevin Wolf } 23145aba42fSKevin Wolf 232c46e1167SKevin Wolf *table = l2_table; 233c46e1167SKevin Wolf return 0; 234175e1152SKevin Wolf 235175e1152SKevin Wolf fail: 23629c1a730SKevin Wolf qcow2_cache_put(bs, s->l2_table_cache, (void**) table); 23768dba0bfSKevin Wolf s->l1_table[l1_index] = old_l2_offset; 238175e1152SKevin Wolf return ret; 23945aba42fSKevin Wolf } 24045aba42fSKevin Wolf 24145aba42fSKevin Wolf static int count_contiguous_clusters(uint64_t nb_clusters, int cluster_size, 24245aba42fSKevin Wolf uint64_t *l2_table, uint64_t start, uint64_t mask) 24345aba42fSKevin Wolf { 24445aba42fSKevin Wolf int i; 24545aba42fSKevin Wolf uint64_t offset = be64_to_cpu(l2_table[0]) & ~mask; 24645aba42fSKevin Wolf 24745aba42fSKevin Wolf if (!offset) 24845aba42fSKevin Wolf return 0; 24945aba42fSKevin Wolf 25045aba42fSKevin Wolf for (i = start; i < start + nb_clusters; i++) 25180ee15a6SKevin Wolf if (offset + (uint64_t) i * cluster_size != (be64_to_cpu(l2_table[i]) & ~mask)) 25245aba42fSKevin Wolf break; 25345aba42fSKevin Wolf 25445aba42fSKevin Wolf return (i - start); 25545aba42fSKevin Wolf } 25645aba42fSKevin Wolf 25745aba42fSKevin Wolf static int count_contiguous_free_clusters(uint64_t nb_clusters, uint64_t *l2_table) 25845aba42fSKevin Wolf { 25945aba42fSKevin Wolf int i = 0; 26045aba42fSKevin Wolf 26145aba42fSKevin Wolf while(nb_clusters-- && l2_table[i] == 0) 26245aba42fSKevin Wolf i++; 26345aba42fSKevin Wolf 26445aba42fSKevin Wolf return i; 26545aba42fSKevin Wolf } 26645aba42fSKevin Wolf 26745aba42fSKevin Wolf /* The crypt function is compatible with the linux cryptoloop 26845aba42fSKevin Wolf algorithm for < 4 GB images. NOTE: out_buf == in_buf is 26945aba42fSKevin Wolf supported */ 270ed6ccf0fSKevin Wolf void qcow2_encrypt_sectors(BDRVQcowState *s, int64_t sector_num, 27145aba42fSKevin Wolf uint8_t *out_buf, const uint8_t *in_buf, 27245aba42fSKevin Wolf int nb_sectors, int enc, 27345aba42fSKevin Wolf const AES_KEY *key) 27445aba42fSKevin Wolf { 27545aba42fSKevin Wolf union { 27645aba42fSKevin Wolf uint64_t ll[2]; 27745aba42fSKevin Wolf uint8_t b[16]; 27845aba42fSKevin Wolf } ivec; 27945aba42fSKevin Wolf int i; 28045aba42fSKevin Wolf 28145aba42fSKevin Wolf for(i = 0; i < nb_sectors; i++) { 28245aba42fSKevin Wolf ivec.ll[0] = cpu_to_le64(sector_num); 28345aba42fSKevin Wolf ivec.ll[1] = 0; 28445aba42fSKevin Wolf AES_cbc_encrypt(in_buf, out_buf, 512, key, 28545aba42fSKevin Wolf ivec.b, enc); 28645aba42fSKevin Wolf sector_num++; 28745aba42fSKevin Wolf in_buf += 512; 28845aba42fSKevin Wolf out_buf += 512; 28945aba42fSKevin Wolf } 29045aba42fSKevin Wolf } 29145aba42fSKevin Wolf 29245aba42fSKevin Wolf 2937c80ab3fSJes Sorensen static int qcow2_read(BlockDriverState *bs, int64_t sector_num, 29472ecf02dSKevin Wolf uint8_t *buf, int nb_sectors) 29545aba42fSKevin Wolf { 29645aba42fSKevin Wolf BDRVQcowState *s = bs->opaque; 29745aba42fSKevin Wolf int ret, index_in_cluster, n, n1; 29845aba42fSKevin Wolf uint64_t cluster_offset; 299bd28f835SKevin Wolf struct iovec iov; 300bd28f835SKevin Wolf QEMUIOVector qiov; 30145aba42fSKevin Wolf 30245aba42fSKevin Wolf while (nb_sectors > 0) { 30345aba42fSKevin Wolf n = nb_sectors; 3041c46efaaSKevin Wolf 3051c46efaaSKevin Wolf ret = qcow2_get_cluster_offset(bs, sector_num << 9, &n, 3061c46efaaSKevin Wolf &cluster_offset); 3071c46efaaSKevin Wolf if (ret < 0) { 3081c46efaaSKevin Wolf return ret; 3091c46efaaSKevin Wolf } 3101c46efaaSKevin Wolf 31145aba42fSKevin Wolf index_in_cluster = sector_num & (s->cluster_sectors - 1); 31245aba42fSKevin Wolf if (!cluster_offset) { 31345aba42fSKevin Wolf if (bs->backing_hd) { 31445aba42fSKevin Wolf /* read from the base image */ 315bd28f835SKevin Wolf iov.iov_base = buf; 316bd28f835SKevin Wolf iov.iov_len = n * 512; 317bd28f835SKevin Wolf qemu_iovec_init_external(&qiov, &iov, 1); 318bd28f835SKevin Wolf 319bd28f835SKevin Wolf n1 = qcow2_backing_read1(bs->backing_hd, &qiov, sector_num, n); 32045aba42fSKevin Wolf if (n1 > 0) { 32166f82ceeSKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING); 32245aba42fSKevin Wolf ret = bdrv_read(bs->backing_hd, sector_num, buf, n1); 32345aba42fSKevin Wolf if (ret < 0) 32445aba42fSKevin Wolf return -1; 32545aba42fSKevin Wolf } 32645aba42fSKevin Wolf } else { 32745aba42fSKevin Wolf memset(buf, 0, 512 * n); 32845aba42fSKevin Wolf } 32945aba42fSKevin Wolf } else if (cluster_offset & QCOW_OFLAG_COMPRESSED) { 33066f82ceeSKevin Wolf if (qcow2_decompress_cluster(bs, cluster_offset) < 0) 33145aba42fSKevin Wolf return -1; 33245aba42fSKevin Wolf memcpy(buf, s->cluster_cache + index_in_cluster * 512, 512 * n); 33345aba42fSKevin Wolf } else { 33466f82ceeSKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_READ); 33566f82ceeSKevin Wolf ret = bdrv_pread(bs->file, cluster_offset + index_in_cluster * 512, buf, n * 512); 33645aba42fSKevin Wolf if (ret != n * 512) 33745aba42fSKevin Wolf return -1; 33845aba42fSKevin Wolf if (s->crypt_method) { 339ed6ccf0fSKevin Wolf qcow2_encrypt_sectors(s, sector_num, buf, buf, n, 0, 34045aba42fSKevin Wolf &s->aes_decrypt_key); 34145aba42fSKevin Wolf } 34245aba42fSKevin Wolf } 34345aba42fSKevin Wolf nb_sectors -= n; 34445aba42fSKevin Wolf sector_num += n; 34545aba42fSKevin Wolf buf += n * 512; 34645aba42fSKevin Wolf } 34745aba42fSKevin Wolf return 0; 34845aba42fSKevin Wolf } 34945aba42fSKevin Wolf 35045aba42fSKevin Wolf static int copy_sectors(BlockDriverState *bs, uint64_t start_sect, 35145aba42fSKevin Wolf uint64_t cluster_offset, int n_start, int n_end) 35245aba42fSKevin Wolf { 35345aba42fSKevin Wolf BDRVQcowState *s = bs->opaque; 35445aba42fSKevin Wolf int n, ret; 35545aba42fSKevin Wolf 35645aba42fSKevin Wolf n = n_end - n_start; 35745aba42fSKevin Wolf if (n <= 0) 35845aba42fSKevin Wolf return 0; 35966f82ceeSKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_COW_READ); 3607c80ab3fSJes Sorensen ret = qcow2_read(bs, start_sect + n_start, s->cluster_data, n); 36145aba42fSKevin Wolf if (ret < 0) 36245aba42fSKevin Wolf return ret; 36345aba42fSKevin Wolf if (s->crypt_method) { 364ed6ccf0fSKevin Wolf qcow2_encrypt_sectors(s, start_sect + n_start, 36545aba42fSKevin Wolf s->cluster_data, 36645aba42fSKevin Wolf s->cluster_data, n, 1, 36745aba42fSKevin Wolf &s->aes_encrypt_key); 36845aba42fSKevin Wolf } 36966f82ceeSKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_COW_WRITE); 3709f8e668eSKevin Wolf ret = bdrv_write(bs->file, (cluster_offset >> 9) + n_start, 37145aba42fSKevin Wolf s->cluster_data, n); 37245aba42fSKevin Wolf if (ret < 0) 37345aba42fSKevin Wolf return ret; 37445aba42fSKevin Wolf return 0; 37545aba42fSKevin Wolf } 37645aba42fSKevin Wolf 37745aba42fSKevin Wolf 37845aba42fSKevin Wolf /* 37945aba42fSKevin Wolf * get_cluster_offset 38045aba42fSKevin Wolf * 3811c46efaaSKevin Wolf * For a given offset of the disk image, find the cluster offset in 3821c46efaaSKevin Wolf * qcow2 file. The offset is stored in *cluster_offset. 38345aba42fSKevin Wolf * 384d57237f2SDevin Nakamura * on entry, *num is the number of contiguous sectors we'd like to 38545aba42fSKevin Wolf * access following offset. 38645aba42fSKevin Wolf * 387d57237f2SDevin Nakamura * on exit, *num is the number of contiguous sectors we can read. 38845aba42fSKevin Wolf * 3891c46efaaSKevin Wolf * Return 0, if the offset is found 3901c46efaaSKevin Wolf * Return -errno, otherwise. 39145aba42fSKevin Wolf * 39245aba42fSKevin Wolf */ 39345aba42fSKevin Wolf 3941c46efaaSKevin Wolf int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, 3951c46efaaSKevin Wolf int *num, uint64_t *cluster_offset) 39645aba42fSKevin Wolf { 39745aba42fSKevin Wolf BDRVQcowState *s = bs->opaque; 39880ee15a6SKevin Wolf unsigned int l1_index, l2_index; 3991c46efaaSKevin Wolf uint64_t l2_offset, *l2_table; 40045aba42fSKevin Wolf int l1_bits, c; 40180ee15a6SKevin Wolf unsigned int index_in_cluster, nb_clusters; 40280ee15a6SKevin Wolf uint64_t nb_available, nb_needed; 40355c17e98SKevin Wolf int ret; 40445aba42fSKevin Wolf 40545aba42fSKevin Wolf index_in_cluster = (offset >> 9) & (s->cluster_sectors - 1); 40645aba42fSKevin Wolf nb_needed = *num + index_in_cluster; 40745aba42fSKevin Wolf 40845aba42fSKevin Wolf l1_bits = s->l2_bits + s->cluster_bits; 40945aba42fSKevin Wolf 41045aba42fSKevin Wolf /* compute how many bytes there are between the offset and 41145aba42fSKevin Wolf * the end of the l1 entry 41245aba42fSKevin Wolf */ 41345aba42fSKevin Wolf 41480ee15a6SKevin Wolf nb_available = (1ULL << l1_bits) - (offset & ((1ULL << l1_bits) - 1)); 41545aba42fSKevin Wolf 41645aba42fSKevin Wolf /* compute the number of available sectors */ 41745aba42fSKevin Wolf 41845aba42fSKevin Wolf nb_available = (nb_available >> 9) + index_in_cluster; 41945aba42fSKevin Wolf 42045aba42fSKevin Wolf if (nb_needed > nb_available) { 42145aba42fSKevin Wolf nb_needed = nb_available; 42245aba42fSKevin Wolf } 42345aba42fSKevin Wolf 4241c46efaaSKevin Wolf *cluster_offset = 0; 42545aba42fSKevin Wolf 42645aba42fSKevin Wolf /* seek the the l2 offset in the l1 table */ 42745aba42fSKevin Wolf 42845aba42fSKevin Wolf l1_index = offset >> l1_bits; 42945aba42fSKevin Wolf if (l1_index >= s->l1_size) 43045aba42fSKevin Wolf goto out; 43145aba42fSKevin Wolf 43245aba42fSKevin Wolf l2_offset = s->l1_table[l1_index]; 43345aba42fSKevin Wolf 43445aba42fSKevin Wolf /* seek the l2 table of the given l2 offset */ 43545aba42fSKevin Wolf 43645aba42fSKevin Wolf if (!l2_offset) 43745aba42fSKevin Wolf goto out; 43845aba42fSKevin Wolf 43945aba42fSKevin Wolf /* load the l2 table in memory */ 44045aba42fSKevin Wolf 44145aba42fSKevin Wolf l2_offset &= ~QCOW_OFLAG_COPIED; 44255c17e98SKevin Wolf ret = l2_load(bs, l2_offset, &l2_table); 44355c17e98SKevin Wolf if (ret < 0) { 44455c17e98SKevin Wolf return ret; 4451c46efaaSKevin Wolf } 44645aba42fSKevin Wolf 44745aba42fSKevin Wolf /* find the cluster offset for the given disk offset */ 44845aba42fSKevin Wolf 44945aba42fSKevin Wolf l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1); 4501c46efaaSKevin Wolf *cluster_offset = be64_to_cpu(l2_table[l2_index]); 45145aba42fSKevin Wolf nb_clusters = size_to_clusters(s, nb_needed << 9); 45245aba42fSKevin Wolf 4531c46efaaSKevin Wolf if (!*cluster_offset) { 45445aba42fSKevin Wolf /* how many empty clusters ? */ 45545aba42fSKevin Wolf c = count_contiguous_free_clusters(nb_clusters, &l2_table[l2_index]); 45645aba42fSKevin Wolf } else { 45745aba42fSKevin Wolf /* how many allocated clusters ? */ 45845aba42fSKevin Wolf c = count_contiguous_clusters(nb_clusters, s->cluster_size, 45945aba42fSKevin Wolf &l2_table[l2_index], 0, QCOW_OFLAG_COPIED); 46045aba42fSKevin Wolf } 46145aba42fSKevin Wolf 46229c1a730SKevin Wolf qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); 46329c1a730SKevin Wolf 46445aba42fSKevin Wolf nb_available = (c * s->cluster_sectors); 46545aba42fSKevin Wolf out: 46645aba42fSKevin Wolf if (nb_available > nb_needed) 46745aba42fSKevin Wolf nb_available = nb_needed; 46845aba42fSKevin Wolf 46945aba42fSKevin Wolf *num = nb_available - index_in_cluster; 47045aba42fSKevin Wolf 4711c46efaaSKevin Wolf *cluster_offset &=~QCOW_OFLAG_COPIED; 4721c46efaaSKevin Wolf return 0; 47345aba42fSKevin Wolf } 47445aba42fSKevin Wolf 47545aba42fSKevin Wolf /* 47645aba42fSKevin Wolf * get_cluster_table 47745aba42fSKevin Wolf * 47845aba42fSKevin Wolf * for a given disk offset, load (and allocate if needed) 47945aba42fSKevin Wolf * the l2 table. 48045aba42fSKevin Wolf * 48145aba42fSKevin Wolf * the l2 table offset in the qcow2 file and the cluster index 48245aba42fSKevin Wolf * in the l2 table are given to the caller. 48345aba42fSKevin Wolf * 4841e3e8f1aSKevin Wolf * Returns 0 on success, -errno in failure case 48545aba42fSKevin Wolf */ 48645aba42fSKevin Wolf static int get_cluster_table(BlockDriverState *bs, uint64_t offset, 48745aba42fSKevin Wolf uint64_t **new_l2_table, 48845aba42fSKevin Wolf uint64_t *new_l2_offset, 48945aba42fSKevin Wolf int *new_l2_index) 49045aba42fSKevin Wolf { 49145aba42fSKevin Wolf BDRVQcowState *s = bs->opaque; 49280ee15a6SKevin Wolf unsigned int l1_index, l2_index; 493c46e1167SKevin Wolf uint64_t l2_offset; 494c46e1167SKevin Wolf uint64_t *l2_table = NULL; 49580ee15a6SKevin Wolf int ret; 49645aba42fSKevin Wolf 49745aba42fSKevin Wolf /* seek the the l2 offset in the l1 table */ 49845aba42fSKevin Wolf 49945aba42fSKevin Wolf l1_index = offset >> (s->l2_bits + s->cluster_bits); 50045aba42fSKevin Wolf if (l1_index >= s->l1_size) { 50172893756SStefan Hajnoczi ret = qcow2_grow_l1_table(bs, l1_index + 1, false); 5021e3e8f1aSKevin Wolf if (ret < 0) { 5031e3e8f1aSKevin Wolf return ret; 5041e3e8f1aSKevin Wolf } 50545aba42fSKevin Wolf } 50645aba42fSKevin Wolf l2_offset = s->l1_table[l1_index]; 50745aba42fSKevin Wolf 50845aba42fSKevin Wolf /* seek the l2 table of the given l2 offset */ 50945aba42fSKevin Wolf 51045aba42fSKevin Wolf if (l2_offset & QCOW_OFLAG_COPIED) { 51145aba42fSKevin Wolf /* load the l2 table in memory */ 51245aba42fSKevin Wolf l2_offset &= ~QCOW_OFLAG_COPIED; 51355c17e98SKevin Wolf ret = l2_load(bs, l2_offset, &l2_table); 51455c17e98SKevin Wolf if (ret < 0) { 51555c17e98SKevin Wolf return ret; 5161e3e8f1aSKevin Wolf } 51745aba42fSKevin Wolf } else { 51816fde5f2SKevin Wolf /* First allocate a new L2 table (and do COW if needed) */ 519c46e1167SKevin Wolf ret = l2_allocate(bs, l1_index, &l2_table); 520c46e1167SKevin Wolf if (ret < 0) { 521c46e1167SKevin Wolf return ret; 5221e3e8f1aSKevin Wolf } 52316fde5f2SKevin Wolf 52416fde5f2SKevin Wolf /* Then decrease the refcount of the old table */ 52516fde5f2SKevin Wolf if (l2_offset) { 52616fde5f2SKevin Wolf qcow2_free_clusters(bs, l2_offset, s->l2_size * sizeof(uint64_t)); 52716fde5f2SKevin Wolf } 52845aba42fSKevin Wolf l2_offset = s->l1_table[l1_index] & ~QCOW_OFLAG_COPIED; 52945aba42fSKevin Wolf } 53045aba42fSKevin Wolf 53145aba42fSKevin Wolf /* find the cluster offset for the given disk offset */ 53245aba42fSKevin Wolf 53345aba42fSKevin Wolf l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1); 53445aba42fSKevin Wolf 53545aba42fSKevin Wolf *new_l2_table = l2_table; 53645aba42fSKevin Wolf *new_l2_offset = l2_offset; 53745aba42fSKevin Wolf *new_l2_index = l2_index; 53845aba42fSKevin Wolf 5391e3e8f1aSKevin Wolf return 0; 54045aba42fSKevin Wolf } 54145aba42fSKevin Wolf 54245aba42fSKevin Wolf /* 54345aba42fSKevin Wolf * alloc_compressed_cluster_offset 54445aba42fSKevin Wolf * 54545aba42fSKevin Wolf * For a given offset of the disk image, return cluster offset in 54645aba42fSKevin Wolf * qcow2 file. 54745aba42fSKevin Wolf * 54845aba42fSKevin Wolf * If the offset is not found, allocate a new compressed cluster. 54945aba42fSKevin Wolf * 55045aba42fSKevin Wolf * Return the cluster offset if successful, 55145aba42fSKevin Wolf * Return 0, otherwise. 55245aba42fSKevin Wolf * 55345aba42fSKevin Wolf */ 55445aba42fSKevin Wolf 555ed6ccf0fSKevin Wolf uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, 55645aba42fSKevin Wolf uint64_t offset, 55745aba42fSKevin Wolf int compressed_size) 55845aba42fSKevin Wolf { 55945aba42fSKevin Wolf BDRVQcowState *s = bs->opaque; 56045aba42fSKevin Wolf int l2_index, ret; 561f4f0d391SKevin Wolf uint64_t l2_offset, *l2_table; 562f4f0d391SKevin Wolf int64_t cluster_offset; 56345aba42fSKevin Wolf int nb_csectors; 56445aba42fSKevin Wolf 56545aba42fSKevin Wolf ret = get_cluster_table(bs, offset, &l2_table, &l2_offset, &l2_index); 5661e3e8f1aSKevin Wolf if (ret < 0) { 56745aba42fSKevin Wolf return 0; 5681e3e8f1aSKevin Wolf } 56945aba42fSKevin Wolf 57045aba42fSKevin Wolf cluster_offset = be64_to_cpu(l2_table[l2_index]); 57145aba42fSKevin Wolf if (cluster_offset & QCOW_OFLAG_COPIED) 57245aba42fSKevin Wolf return cluster_offset & ~QCOW_OFLAG_COPIED; 57345aba42fSKevin Wolf 57445aba42fSKevin Wolf if (cluster_offset) 575ed6ccf0fSKevin Wolf qcow2_free_any_clusters(bs, cluster_offset, 1); 57645aba42fSKevin Wolf 577ed6ccf0fSKevin Wolf cluster_offset = qcow2_alloc_bytes(bs, compressed_size); 5785d757b56SKevin Wolf if (cluster_offset < 0) { 57929c1a730SKevin Wolf qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); 5805d757b56SKevin Wolf return 0; 5815d757b56SKevin Wolf } 5825d757b56SKevin Wolf 58345aba42fSKevin Wolf nb_csectors = ((cluster_offset + compressed_size - 1) >> 9) - 58445aba42fSKevin Wolf (cluster_offset >> 9); 58545aba42fSKevin Wolf 58645aba42fSKevin Wolf cluster_offset |= QCOW_OFLAG_COMPRESSED | 58745aba42fSKevin Wolf ((uint64_t)nb_csectors << s->csize_shift); 58845aba42fSKevin Wolf 58945aba42fSKevin Wolf /* update L2 table */ 59045aba42fSKevin Wolf 59145aba42fSKevin Wolf /* compressed clusters never have the copied flag */ 59245aba42fSKevin Wolf 59366f82ceeSKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE_COMPRESSED); 59429c1a730SKevin Wolf qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table); 59545aba42fSKevin Wolf l2_table[l2_index] = cpu_to_be64(cluster_offset); 59629c1a730SKevin Wolf ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); 59729c1a730SKevin Wolf if (ret < 0) { 59845aba42fSKevin Wolf return 0; 59929c1a730SKevin Wolf } 60045aba42fSKevin Wolf 60145aba42fSKevin Wolf return cluster_offset; 60245aba42fSKevin Wolf } 60345aba42fSKevin Wolf 604148da7eaSKevin Wolf int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) 60545aba42fSKevin Wolf { 60645aba42fSKevin Wolf BDRVQcowState *s = bs->opaque; 60745aba42fSKevin Wolf int i, j = 0, l2_index, ret; 60845aba42fSKevin Wolf uint64_t *old_cluster, start_sect, l2_offset, *l2_table; 609148da7eaSKevin Wolf uint64_t cluster_offset = m->cluster_offset; 61029c1a730SKevin Wolf bool cow = false; 61145aba42fSKevin Wolf 61245aba42fSKevin Wolf if (m->nb_clusters == 0) 61345aba42fSKevin Wolf return 0; 61445aba42fSKevin Wolf 6157267c094SAnthony Liguori old_cluster = g_malloc(m->nb_clusters * sizeof(uint64_t)); 61645aba42fSKevin Wolf 61745aba42fSKevin Wolf /* copy content of unmodified sectors */ 61845aba42fSKevin Wolf start_sect = (m->offset & ~(s->cluster_size - 1)) >> 9; 61945aba42fSKevin Wolf if (m->n_start) { 62029c1a730SKevin Wolf cow = true; 62145aba42fSKevin Wolf ret = copy_sectors(bs, start_sect, cluster_offset, 0, m->n_start); 62245aba42fSKevin Wolf if (ret < 0) 62345aba42fSKevin Wolf goto err; 62445aba42fSKevin Wolf } 62545aba42fSKevin Wolf 62645aba42fSKevin Wolf if (m->nb_available & (s->cluster_sectors - 1)) { 62745aba42fSKevin Wolf uint64_t end = m->nb_available & ~(uint64_t)(s->cluster_sectors - 1); 62829c1a730SKevin Wolf cow = true; 62945aba42fSKevin Wolf ret = copy_sectors(bs, start_sect + end, cluster_offset + (end << 9), 63045aba42fSKevin Wolf m->nb_available - end, s->cluster_sectors); 63145aba42fSKevin Wolf if (ret < 0) 63245aba42fSKevin Wolf goto err; 63345aba42fSKevin Wolf } 63445aba42fSKevin Wolf 63529c1a730SKevin Wolf /* 63629c1a730SKevin Wolf * Update L2 table. 63729c1a730SKevin Wolf * 63829c1a730SKevin Wolf * Before we update the L2 table to actually point to the new cluster, we 63929c1a730SKevin Wolf * need to be sure that the refcounts have been increased and COW was 64029c1a730SKevin Wolf * handled. 64129c1a730SKevin Wolf */ 64229c1a730SKevin Wolf if (cow) { 6433de0a294SKevin Wolf qcow2_cache_depends_on_flush(s->l2_table_cache); 64429c1a730SKevin Wolf } 64529c1a730SKevin Wolf 64629c1a730SKevin Wolf qcow2_cache_set_dependency(bs, s->l2_table_cache, s->refcount_block_cache); 6471e3e8f1aSKevin Wolf ret = get_cluster_table(bs, m->offset, &l2_table, &l2_offset, &l2_index); 6481e3e8f1aSKevin Wolf if (ret < 0) { 64945aba42fSKevin Wolf goto err; 6501e3e8f1aSKevin Wolf } 65129c1a730SKevin Wolf qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table); 65245aba42fSKevin Wolf 65345aba42fSKevin Wolf for (i = 0; i < m->nb_clusters; i++) { 65445aba42fSKevin Wolf /* if two concurrent writes happen to the same unallocated cluster 65545aba42fSKevin Wolf * each write allocates separate cluster and writes data concurrently. 65645aba42fSKevin Wolf * The first one to complete updates l2 table with pointer to its 65745aba42fSKevin Wolf * cluster the second one has to do RMW (which is done above by 65845aba42fSKevin Wolf * copy_sectors()), update l2 table with its cluster pointer and free 65945aba42fSKevin Wolf * old cluster. This is what this loop does */ 66045aba42fSKevin Wolf if(l2_table[l2_index + i] != 0) 66145aba42fSKevin Wolf old_cluster[j++] = l2_table[l2_index + i]; 66245aba42fSKevin Wolf 66345aba42fSKevin Wolf l2_table[l2_index + i] = cpu_to_be64((cluster_offset + 66445aba42fSKevin Wolf (i << s->cluster_bits)) | QCOW_OFLAG_COPIED); 66545aba42fSKevin Wolf } 66645aba42fSKevin Wolf 6679f8e668eSKevin Wolf 66829c1a730SKevin Wolf ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); 669c835d00fSKevin Wolf if (ret < 0) { 67045aba42fSKevin Wolf goto err; 6714c1612d9SKevin Wolf } 67245aba42fSKevin Wolf 6737ec5e6a4SKevin Wolf /* 6747ec5e6a4SKevin Wolf * If this was a COW, we need to decrease the refcount of the old cluster. 6757ec5e6a4SKevin Wolf * Also flush bs->file to get the right order for L2 and refcount update. 6767ec5e6a4SKevin Wolf */ 6777ec5e6a4SKevin Wolf if (j != 0) { 6787ec5e6a4SKevin Wolf for (i = 0; i < j; i++) { 679ed6ccf0fSKevin Wolf qcow2_free_any_clusters(bs, 680ed6ccf0fSKevin Wolf be64_to_cpu(old_cluster[i]) & ~QCOW_OFLAG_COPIED, 1); 6817ec5e6a4SKevin Wolf } 6827ec5e6a4SKevin Wolf } 68345aba42fSKevin Wolf 68445aba42fSKevin Wolf ret = 0; 68545aba42fSKevin Wolf err: 6867267c094SAnthony Liguori g_free(old_cluster); 68745aba42fSKevin Wolf return ret; 68845aba42fSKevin Wolf } 68945aba42fSKevin Wolf 69045aba42fSKevin Wolf /* 69145aba42fSKevin Wolf * alloc_cluster_offset 69245aba42fSKevin Wolf * 693148da7eaSKevin Wolf * For a given offset of the disk image, return cluster offset in qcow2 file. 69445aba42fSKevin Wolf * If the offset is not found, allocate a new cluster. 69545aba42fSKevin Wolf * 696148da7eaSKevin Wolf * If the cluster was already allocated, m->nb_clusters is set to 0, 697*a7912369SFrediano Ziglio * other fields in m are meaningless. 69845aba42fSKevin Wolf * 699148da7eaSKevin Wolf * If the cluster is newly allocated, m->nb_clusters is set to the number of 70068d100e9SKevin Wolf * contiguous clusters that have been allocated. In this case, the other 70168d100e9SKevin Wolf * fields of m are valid and contain information about the first allocated 70268d100e9SKevin Wolf * cluster. 703148da7eaSKevin Wolf * 70468d100e9SKevin Wolf * If the request conflicts with another write request in flight, the coroutine 70568d100e9SKevin Wolf * is queued and will be reentered when the dependency has completed. 706148da7eaSKevin Wolf * 707148da7eaSKevin Wolf * Return 0 on success and -errno in error cases 70845aba42fSKevin Wolf */ 709f4f0d391SKevin Wolf int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset, 710f4f0d391SKevin Wolf int n_start, int n_end, int *num, QCowL2Meta *m) 71145aba42fSKevin Wolf { 71245aba42fSKevin Wolf BDRVQcowState *s = bs->opaque; 71345aba42fSKevin Wolf int l2_index, ret; 7145d757b56SKevin Wolf uint64_t l2_offset, *l2_table; 7155d757b56SKevin Wolf int64_t cluster_offset; 71680ee15a6SKevin Wolf unsigned int nb_clusters, i = 0; 717f214978aSKevin Wolf QCowL2Meta *old_alloc; 71845aba42fSKevin Wolf 71945aba42fSKevin Wolf ret = get_cluster_table(bs, offset, &l2_table, &l2_offset, &l2_index); 7201e3e8f1aSKevin Wolf if (ret < 0) { 721148da7eaSKevin Wolf return ret; 7221e3e8f1aSKevin Wolf } 72345aba42fSKevin Wolf 72468d100e9SKevin Wolf again: 72545aba42fSKevin Wolf nb_clusters = size_to_clusters(s, n_end << 9); 72645aba42fSKevin Wolf 72745aba42fSKevin Wolf nb_clusters = MIN(nb_clusters, s->l2_size - l2_index); 72845aba42fSKevin Wolf 72945aba42fSKevin Wolf cluster_offset = be64_to_cpu(l2_table[l2_index]); 73045aba42fSKevin Wolf 73145aba42fSKevin Wolf /* We keep all QCOW_OFLAG_COPIED clusters */ 73245aba42fSKevin Wolf 73345aba42fSKevin Wolf if (cluster_offset & QCOW_OFLAG_COPIED) { 73445aba42fSKevin Wolf nb_clusters = count_contiguous_clusters(nb_clusters, s->cluster_size, 73545aba42fSKevin Wolf &l2_table[l2_index], 0, 0); 73645aba42fSKevin Wolf 73745aba42fSKevin Wolf cluster_offset &= ~QCOW_OFLAG_COPIED; 73845aba42fSKevin Wolf m->nb_clusters = 0; 73945aba42fSKevin Wolf 74045aba42fSKevin Wolf goto out; 74145aba42fSKevin Wolf } 74245aba42fSKevin Wolf 74345aba42fSKevin Wolf /* for the moment, multiple compressed clusters are not managed */ 74445aba42fSKevin Wolf 74545aba42fSKevin Wolf if (cluster_offset & QCOW_OFLAG_COMPRESSED) 74645aba42fSKevin Wolf nb_clusters = 1; 74745aba42fSKevin Wolf 74845aba42fSKevin Wolf /* how many available clusters ? */ 74945aba42fSKevin Wolf 75045aba42fSKevin Wolf while (i < nb_clusters) { 75145aba42fSKevin Wolf i += count_contiguous_clusters(nb_clusters - i, s->cluster_size, 75245aba42fSKevin Wolf &l2_table[l2_index], i, 0); 7534805bb66SKevin Wolf if ((i >= nb_clusters) || be64_to_cpu(l2_table[l2_index + i])) { 75445aba42fSKevin Wolf break; 7554805bb66SKevin Wolf } 75645aba42fSKevin Wolf 75745aba42fSKevin Wolf i += count_contiguous_free_clusters(nb_clusters - i, 75845aba42fSKevin Wolf &l2_table[l2_index + i]); 7594805bb66SKevin Wolf if (i >= nb_clusters) { 7604805bb66SKevin Wolf break; 7614805bb66SKevin Wolf } 76245aba42fSKevin Wolf 76345aba42fSKevin Wolf cluster_offset = be64_to_cpu(l2_table[l2_index + i]); 76445aba42fSKevin Wolf 76545aba42fSKevin Wolf if ((cluster_offset & QCOW_OFLAG_COPIED) || 76645aba42fSKevin Wolf (cluster_offset & QCOW_OFLAG_COMPRESSED)) 76745aba42fSKevin Wolf break; 76845aba42fSKevin Wolf } 7694805bb66SKevin Wolf assert(i <= nb_clusters); 77045aba42fSKevin Wolf nb_clusters = i; 77145aba42fSKevin Wolf 772f214978aSKevin Wolf /* 773f214978aSKevin Wolf * Check if there already is an AIO write request in flight which allocates 774f214978aSKevin Wolf * the same cluster. In this case we need to wait until the previous 775f214978aSKevin Wolf * request has completed and updated the L2 table accordingly. 776f214978aSKevin Wolf */ 77772cf2d4fSBlue Swirl QLIST_FOREACH(old_alloc, &s->cluster_allocs, next_in_flight) { 778f214978aSKevin Wolf 779f214978aSKevin Wolf uint64_t end_offset = offset + nb_clusters * s->cluster_size; 780f214978aSKevin Wolf uint64_t old_offset = old_alloc->offset; 781f214978aSKevin Wolf uint64_t old_end_offset = old_alloc->offset + 782f214978aSKevin Wolf old_alloc->nb_clusters * s->cluster_size; 783f214978aSKevin Wolf 784f214978aSKevin Wolf if (end_offset < old_offset || offset > old_end_offset) { 785f214978aSKevin Wolf /* No intersection */ 786f214978aSKevin Wolf } else { 787f214978aSKevin Wolf if (offset < old_offset) { 788f214978aSKevin Wolf /* Stop at the start of a running allocation */ 789f214978aSKevin Wolf nb_clusters = (old_offset - offset) >> s->cluster_bits; 790f214978aSKevin Wolf } else { 791f214978aSKevin Wolf nb_clusters = 0; 792f214978aSKevin Wolf } 793f214978aSKevin Wolf 794f214978aSKevin Wolf if (nb_clusters == 0) { 79568d100e9SKevin Wolf /* Wait for the dependency to complete. We need to recheck 79668d100e9SKevin Wolf * the free/allocated clusters when we continue. */ 79768d100e9SKevin Wolf qemu_co_mutex_unlock(&s->lock); 79868d100e9SKevin Wolf qemu_co_queue_wait(&old_alloc->dependent_requests); 79968d100e9SKevin Wolf qemu_co_mutex_lock(&s->lock); 80068d100e9SKevin Wolf goto again; 801f214978aSKevin Wolf } 802f214978aSKevin Wolf } 803f214978aSKevin Wolf } 804f214978aSKevin Wolf 805f214978aSKevin Wolf if (!nb_clusters) { 806f214978aSKevin Wolf abort(); 807f214978aSKevin Wolf } 808f214978aSKevin Wolf 80972cf2d4fSBlue Swirl QLIST_INSERT_HEAD(&s->cluster_allocs, m, next_in_flight); 810f214978aSKevin Wolf 81145aba42fSKevin Wolf /* allocate a new cluster */ 81245aba42fSKevin Wolf 813ed6ccf0fSKevin Wolf cluster_offset = qcow2_alloc_clusters(bs, nb_clusters * s->cluster_size); 8145d757b56SKevin Wolf if (cluster_offset < 0) { 81529c1a730SKevin Wolf ret = cluster_offset; 81629c1a730SKevin Wolf goto fail; 8175d757b56SKevin Wolf } 81845aba42fSKevin Wolf 81945aba42fSKevin Wolf /* save info needed for meta data update */ 82045aba42fSKevin Wolf m->offset = offset; 82145aba42fSKevin Wolf m->n_start = n_start; 82245aba42fSKevin Wolf m->nb_clusters = nb_clusters; 82345aba42fSKevin Wolf 82445aba42fSKevin Wolf out: 82529c1a730SKevin Wolf ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); 82629c1a730SKevin Wolf if (ret < 0) { 8279e2a3701SKevin Wolf goto fail_put; 82829c1a730SKevin Wolf } 82929c1a730SKevin Wolf 83045aba42fSKevin Wolf m->nb_available = MIN(nb_clusters << (s->cluster_bits - 9), n_end); 831148da7eaSKevin Wolf m->cluster_offset = cluster_offset; 83245aba42fSKevin Wolf 83345aba42fSKevin Wolf *num = m->nb_available - n_start; 83445aba42fSKevin Wolf 835148da7eaSKevin Wolf return 0; 83629c1a730SKevin Wolf 83729c1a730SKevin Wolf fail: 83829c1a730SKevin Wolf qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); 8399e2a3701SKevin Wolf fail_put: 8409e2a3701SKevin Wolf QLIST_REMOVE(m, next_in_flight); 84129c1a730SKevin Wolf return ret; 84245aba42fSKevin Wolf } 84345aba42fSKevin Wolf 84445aba42fSKevin Wolf static int decompress_buffer(uint8_t *out_buf, int out_buf_size, 84545aba42fSKevin Wolf const uint8_t *buf, int buf_size) 84645aba42fSKevin Wolf { 84745aba42fSKevin Wolf z_stream strm1, *strm = &strm1; 84845aba42fSKevin Wolf int ret, out_len; 84945aba42fSKevin Wolf 85045aba42fSKevin Wolf memset(strm, 0, sizeof(*strm)); 85145aba42fSKevin Wolf 85245aba42fSKevin Wolf strm->next_in = (uint8_t *)buf; 85345aba42fSKevin Wolf strm->avail_in = buf_size; 85445aba42fSKevin Wolf strm->next_out = out_buf; 85545aba42fSKevin Wolf strm->avail_out = out_buf_size; 85645aba42fSKevin Wolf 85745aba42fSKevin Wolf ret = inflateInit2(strm, -12); 85845aba42fSKevin Wolf if (ret != Z_OK) 85945aba42fSKevin Wolf return -1; 86045aba42fSKevin Wolf ret = inflate(strm, Z_FINISH); 86145aba42fSKevin Wolf out_len = strm->next_out - out_buf; 86245aba42fSKevin Wolf if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) || 86345aba42fSKevin Wolf out_len != out_buf_size) { 86445aba42fSKevin Wolf inflateEnd(strm); 86545aba42fSKevin Wolf return -1; 86645aba42fSKevin Wolf } 86745aba42fSKevin Wolf inflateEnd(strm); 86845aba42fSKevin Wolf return 0; 86945aba42fSKevin Wolf } 87045aba42fSKevin Wolf 87166f82ceeSKevin Wolf int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset) 87245aba42fSKevin Wolf { 87366f82ceeSKevin Wolf BDRVQcowState *s = bs->opaque; 87445aba42fSKevin Wolf int ret, csize, nb_csectors, sector_offset; 87545aba42fSKevin Wolf uint64_t coffset; 87645aba42fSKevin Wolf 87745aba42fSKevin Wolf coffset = cluster_offset & s->cluster_offset_mask; 87845aba42fSKevin Wolf if (s->cluster_cache_offset != coffset) { 87945aba42fSKevin Wolf nb_csectors = ((cluster_offset >> s->csize_shift) & s->csize_mask) + 1; 88045aba42fSKevin Wolf sector_offset = coffset & 511; 88145aba42fSKevin Wolf csize = nb_csectors * 512 - sector_offset; 88266f82ceeSKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_READ_COMPRESSED); 88366f82ceeSKevin Wolf ret = bdrv_read(bs->file, coffset >> 9, s->cluster_data, nb_csectors); 88445aba42fSKevin Wolf if (ret < 0) { 8858af36488SKevin Wolf return ret; 88645aba42fSKevin Wolf } 88745aba42fSKevin Wolf if (decompress_buffer(s->cluster_cache, s->cluster_size, 88845aba42fSKevin Wolf s->cluster_data + sector_offset, csize) < 0) { 8898af36488SKevin Wolf return -EIO; 89045aba42fSKevin Wolf } 89145aba42fSKevin Wolf s->cluster_cache_offset = coffset; 89245aba42fSKevin Wolf } 89345aba42fSKevin Wolf return 0; 89445aba42fSKevin Wolf } 8955ea929e3SKevin Wolf 8965ea929e3SKevin Wolf /* 8975ea929e3SKevin Wolf * This discards as many clusters of nb_clusters as possible at once (i.e. 8985ea929e3SKevin Wolf * all clusters in the same L2 table) and returns the number of discarded 8995ea929e3SKevin Wolf * clusters. 9005ea929e3SKevin Wolf */ 9015ea929e3SKevin Wolf static int discard_single_l2(BlockDriverState *bs, uint64_t offset, 9025ea929e3SKevin Wolf unsigned int nb_clusters) 9035ea929e3SKevin Wolf { 9045ea929e3SKevin Wolf BDRVQcowState *s = bs->opaque; 9055ea929e3SKevin Wolf uint64_t l2_offset, *l2_table; 9065ea929e3SKevin Wolf int l2_index; 9075ea929e3SKevin Wolf int ret; 9085ea929e3SKevin Wolf int i; 9095ea929e3SKevin Wolf 9105ea929e3SKevin Wolf ret = get_cluster_table(bs, offset, &l2_table, &l2_offset, &l2_index); 9115ea929e3SKevin Wolf if (ret < 0) { 9125ea929e3SKevin Wolf return ret; 9135ea929e3SKevin Wolf } 9145ea929e3SKevin Wolf 9155ea929e3SKevin Wolf /* Limit nb_clusters to one L2 table */ 9165ea929e3SKevin Wolf nb_clusters = MIN(nb_clusters, s->l2_size - l2_index); 9175ea929e3SKevin Wolf 9185ea929e3SKevin Wolf for (i = 0; i < nb_clusters; i++) { 9195ea929e3SKevin Wolf uint64_t old_offset; 9205ea929e3SKevin Wolf 9215ea929e3SKevin Wolf old_offset = be64_to_cpu(l2_table[l2_index + i]); 9225ea929e3SKevin Wolf old_offset &= ~QCOW_OFLAG_COPIED; 9235ea929e3SKevin Wolf 9245ea929e3SKevin Wolf if (old_offset == 0) { 9255ea929e3SKevin Wolf continue; 9265ea929e3SKevin Wolf } 9275ea929e3SKevin Wolf 9285ea929e3SKevin Wolf /* First remove L2 entries */ 9295ea929e3SKevin Wolf qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table); 9305ea929e3SKevin Wolf l2_table[l2_index + i] = cpu_to_be64(0); 9315ea929e3SKevin Wolf 9325ea929e3SKevin Wolf /* Then decrease the refcount */ 9335ea929e3SKevin Wolf qcow2_free_any_clusters(bs, old_offset, 1); 9345ea929e3SKevin Wolf } 9355ea929e3SKevin Wolf 9365ea929e3SKevin Wolf ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); 9375ea929e3SKevin Wolf if (ret < 0) { 9385ea929e3SKevin Wolf return ret; 9395ea929e3SKevin Wolf } 9405ea929e3SKevin Wolf 9415ea929e3SKevin Wolf return nb_clusters; 9425ea929e3SKevin Wolf } 9435ea929e3SKevin Wolf 9445ea929e3SKevin Wolf int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset, 9455ea929e3SKevin Wolf int nb_sectors) 9465ea929e3SKevin Wolf { 9475ea929e3SKevin Wolf BDRVQcowState *s = bs->opaque; 9485ea929e3SKevin Wolf uint64_t end_offset; 9495ea929e3SKevin Wolf unsigned int nb_clusters; 9505ea929e3SKevin Wolf int ret; 9515ea929e3SKevin Wolf 9525ea929e3SKevin Wolf end_offset = offset + (nb_sectors << BDRV_SECTOR_BITS); 9535ea929e3SKevin Wolf 9545ea929e3SKevin Wolf /* Round start up and end down */ 9555ea929e3SKevin Wolf offset = align_offset(offset, s->cluster_size); 9565ea929e3SKevin Wolf end_offset &= ~(s->cluster_size - 1); 9575ea929e3SKevin Wolf 9585ea929e3SKevin Wolf if (offset > end_offset) { 9595ea929e3SKevin Wolf return 0; 9605ea929e3SKevin Wolf } 9615ea929e3SKevin Wolf 9625ea929e3SKevin Wolf nb_clusters = size_to_clusters(s, end_offset - offset); 9635ea929e3SKevin Wolf 9645ea929e3SKevin Wolf /* Each L2 table is handled by its own loop iteration */ 9655ea929e3SKevin Wolf while (nb_clusters > 0) { 9665ea929e3SKevin Wolf ret = discard_single_l2(bs, offset, nb_clusters); 9675ea929e3SKevin Wolf if (ret < 0) { 9685ea929e3SKevin Wolf return ret; 9695ea929e3SKevin Wolf } 9705ea929e3SKevin Wolf 9715ea929e3SKevin Wolf nb_clusters -= ret; 9725ea929e3SKevin Wolf offset += (ret * s->cluster_size); 9735ea929e3SKevin Wolf } 9745ea929e3SKevin Wolf 9755ea929e3SKevin Wolf return 0; 9765ea929e3SKevin Wolf } 977