xref: /qemu/block/qcow2-cluster.c (revision a7912369)
145aba42fSKevin Wolf /*
245aba42fSKevin Wolf  * Block driver for the QCOW version 2 format
345aba42fSKevin Wolf  *
445aba42fSKevin Wolf  * Copyright (c) 2004-2006 Fabrice Bellard
545aba42fSKevin Wolf  *
645aba42fSKevin Wolf  * Permission is hereby granted, free of charge, to any person obtaining a copy
745aba42fSKevin Wolf  * of this software and associated documentation files (the "Software"), to deal
845aba42fSKevin Wolf  * in the Software without restriction, including without limitation the rights
945aba42fSKevin Wolf  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
1045aba42fSKevin Wolf  * copies of the Software, and to permit persons to whom the Software is
1145aba42fSKevin Wolf  * furnished to do so, subject to the following conditions:
1245aba42fSKevin Wolf  *
1345aba42fSKevin Wolf  * The above copyright notice and this permission notice shall be included in
1445aba42fSKevin Wolf  * all copies or substantial portions of the Software.
1545aba42fSKevin Wolf  *
1645aba42fSKevin Wolf  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1745aba42fSKevin Wolf  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1845aba42fSKevin Wolf  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
1945aba42fSKevin Wolf  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
2045aba42fSKevin Wolf  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
2145aba42fSKevin Wolf  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
2245aba42fSKevin Wolf  * THE SOFTWARE.
2345aba42fSKevin Wolf  */
2445aba42fSKevin Wolf 
2545aba42fSKevin Wolf #include <zlib.h>
2645aba42fSKevin Wolf 
2745aba42fSKevin Wolf #include "qemu-common.h"
2845aba42fSKevin Wolf #include "block_int.h"
2945aba42fSKevin Wolf #include "block/qcow2.h"
3045aba42fSKevin Wolf 
3172893756SStefan Hajnoczi int qcow2_grow_l1_table(BlockDriverState *bs, int min_size, bool exact_size)
3245aba42fSKevin Wolf {
3345aba42fSKevin Wolf     BDRVQcowState *s = bs->opaque;
3445aba42fSKevin Wolf     int new_l1_size, new_l1_size2, ret, i;
3545aba42fSKevin Wolf     uint64_t *new_l1_table;
365d757b56SKevin Wolf     int64_t new_l1_table_offset;
3745aba42fSKevin Wolf     uint8_t data[12];
3845aba42fSKevin Wolf 
3972893756SStefan Hajnoczi     if (min_size <= s->l1_size)
4045aba42fSKevin Wolf         return 0;
4172893756SStefan Hajnoczi 
4272893756SStefan Hajnoczi     if (exact_size) {
4372893756SStefan Hajnoczi         new_l1_size = min_size;
4472893756SStefan Hajnoczi     } else {
4572893756SStefan Hajnoczi         /* Bump size up to reduce the number of times we have to grow */
4672893756SStefan Hajnoczi         new_l1_size = s->l1_size;
47d191d12dSStefan Weil         if (new_l1_size == 0) {
48d191d12dSStefan Weil             new_l1_size = 1;
49d191d12dSStefan Weil         }
5045aba42fSKevin Wolf         while (min_size > new_l1_size) {
5145aba42fSKevin Wolf             new_l1_size = (new_l1_size * 3 + 1) / 2;
5245aba42fSKevin Wolf         }
5372893756SStefan Hajnoczi     }
5472893756SStefan Hajnoczi 
5545aba42fSKevin Wolf #ifdef DEBUG_ALLOC2
5635ee5e39SFrediano Ziglio     fprintf(stderr, "grow l1_table from %d to %d\n", s->l1_size, new_l1_size);
5745aba42fSKevin Wolf #endif
5845aba42fSKevin Wolf 
5945aba42fSKevin Wolf     new_l1_size2 = sizeof(uint64_t) * new_l1_size;
607267c094SAnthony Liguori     new_l1_table = g_malloc0(align_offset(new_l1_size2, 512));
6145aba42fSKevin Wolf     memcpy(new_l1_table, s->l1_table, s->l1_size * sizeof(uint64_t));
6245aba42fSKevin Wolf 
6345aba42fSKevin Wolf     /* write new table (align to cluster) */
6466f82ceeSKevin Wolf     BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ALLOC_TABLE);
65ed6ccf0fSKevin Wolf     new_l1_table_offset = qcow2_alloc_clusters(bs, new_l1_size2);
665d757b56SKevin Wolf     if (new_l1_table_offset < 0) {
677267c094SAnthony Liguori         g_free(new_l1_table);
685d757b56SKevin Wolf         return new_l1_table_offset;
695d757b56SKevin Wolf     }
7029c1a730SKevin Wolf 
7129c1a730SKevin Wolf     ret = qcow2_cache_flush(bs, s->refcount_block_cache);
7229c1a730SKevin Wolf     if (ret < 0) {
7380fa3341SKevin Wolf         goto fail;
7429c1a730SKevin Wolf     }
7545aba42fSKevin Wolf 
7666f82ceeSKevin Wolf     BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_WRITE_TABLE);
7745aba42fSKevin Wolf     for(i = 0; i < s->l1_size; i++)
7845aba42fSKevin Wolf         new_l1_table[i] = cpu_to_be64(new_l1_table[i]);
798b3b7206SKevin Wolf     ret = bdrv_pwrite_sync(bs->file, new_l1_table_offset, new_l1_table, new_l1_size2);
808b3b7206SKevin Wolf     if (ret < 0)
8145aba42fSKevin Wolf         goto fail;
8245aba42fSKevin Wolf     for(i = 0; i < s->l1_size; i++)
8345aba42fSKevin Wolf         new_l1_table[i] = be64_to_cpu(new_l1_table[i]);
8445aba42fSKevin Wolf 
8545aba42fSKevin Wolf     /* set new table */
8666f82ceeSKevin Wolf     BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ACTIVATE_TABLE);
8745aba42fSKevin Wolf     cpu_to_be32w((uint32_t*)data, new_l1_size);
88653df36bSAurelien Jarno     cpu_to_be64wu((uint64_t*)(data + 4), new_l1_table_offset);
898b3b7206SKevin Wolf     ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_size), data,sizeof(data));
908b3b7206SKevin Wolf     if (ret < 0) {
9145aba42fSKevin Wolf         goto fail;
92fb8fa77cSKevin Wolf     }
937267c094SAnthony Liguori     g_free(s->l1_table);
94ed6ccf0fSKevin Wolf     qcow2_free_clusters(bs, s->l1_table_offset, s->l1_size * sizeof(uint64_t));
9545aba42fSKevin Wolf     s->l1_table_offset = new_l1_table_offset;
9645aba42fSKevin Wolf     s->l1_table = new_l1_table;
9745aba42fSKevin Wolf     s->l1_size = new_l1_size;
9845aba42fSKevin Wolf     return 0;
9945aba42fSKevin Wolf  fail:
1007267c094SAnthony Liguori     g_free(new_l1_table);
101fb8fa77cSKevin Wolf     qcow2_free_clusters(bs, new_l1_table_offset, new_l1_size2);
1028b3b7206SKevin Wolf     return ret;
10345aba42fSKevin Wolf }
10445aba42fSKevin Wolf 
10545aba42fSKevin Wolf /*
10645aba42fSKevin Wolf  * l2_load
10745aba42fSKevin Wolf  *
10845aba42fSKevin Wolf  * Loads a L2 table into memory. If the table is in the cache, the cache
10945aba42fSKevin Wolf  * is used; otherwise the L2 table is loaded from the image file.
11045aba42fSKevin Wolf  *
11145aba42fSKevin Wolf  * Returns a pointer to the L2 table on success, or NULL if the read from
11245aba42fSKevin Wolf  * the image file failed.
11345aba42fSKevin Wolf  */
11445aba42fSKevin Wolf 
11555c17e98SKevin Wolf static int l2_load(BlockDriverState *bs, uint64_t l2_offset,
11655c17e98SKevin Wolf     uint64_t **l2_table)
11745aba42fSKevin Wolf {
11845aba42fSKevin Wolf     BDRVQcowState *s = bs->opaque;
11955c17e98SKevin Wolf     int ret;
12045aba42fSKevin Wolf 
12129c1a730SKevin Wolf     ret = qcow2_cache_get(bs, s->l2_table_cache, l2_offset, (void**) l2_table);
12245aba42fSKevin Wolf 
12355c17e98SKevin Wolf     return ret;
12455c17e98SKevin Wolf }
12555c17e98SKevin Wolf 
12645aba42fSKevin Wolf /*
1276583e3c7SKevin Wolf  * Writes one sector of the L1 table to the disk (can't update single entries
1286583e3c7SKevin Wolf  * and we really don't want bdrv_pread to perform a read-modify-write)
1296583e3c7SKevin Wolf  */
1306583e3c7SKevin Wolf #define L1_ENTRIES_PER_SECTOR (512 / 8)
13166f82ceeSKevin Wolf static int write_l1_entry(BlockDriverState *bs, int l1_index)
1326583e3c7SKevin Wolf {
13366f82ceeSKevin Wolf     BDRVQcowState *s = bs->opaque;
1346583e3c7SKevin Wolf     uint64_t buf[L1_ENTRIES_PER_SECTOR];
1356583e3c7SKevin Wolf     int l1_start_index;
136f7defcb6SKevin Wolf     int i, ret;
1376583e3c7SKevin Wolf 
1386583e3c7SKevin Wolf     l1_start_index = l1_index & ~(L1_ENTRIES_PER_SECTOR - 1);
1396583e3c7SKevin Wolf     for (i = 0; i < L1_ENTRIES_PER_SECTOR; i++) {
1406583e3c7SKevin Wolf         buf[i] = cpu_to_be64(s->l1_table[l1_start_index + i]);
1416583e3c7SKevin Wolf     }
1426583e3c7SKevin Wolf 
14366f82ceeSKevin Wolf     BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE);
1448b3b7206SKevin Wolf     ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset + 8 * l1_start_index,
145f7defcb6SKevin Wolf         buf, sizeof(buf));
146f7defcb6SKevin Wolf     if (ret < 0) {
147f7defcb6SKevin Wolf         return ret;
1486583e3c7SKevin Wolf     }
1496583e3c7SKevin Wolf 
1506583e3c7SKevin Wolf     return 0;
1516583e3c7SKevin Wolf }
1526583e3c7SKevin Wolf 
1536583e3c7SKevin Wolf /*
15445aba42fSKevin Wolf  * l2_allocate
15545aba42fSKevin Wolf  *
15645aba42fSKevin Wolf  * Allocate a new l2 entry in the file. If l1_index points to an already
15745aba42fSKevin Wolf  * used entry in the L2 table (i.e. we are doing a copy on write for the L2
15845aba42fSKevin Wolf  * table) copy the contents of the old L2 table into the newly allocated one.
15945aba42fSKevin Wolf  * Otherwise the new table is initialized with zeros.
16045aba42fSKevin Wolf  *
16145aba42fSKevin Wolf  */
16245aba42fSKevin Wolf 
163c46e1167SKevin Wolf static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table)
16445aba42fSKevin Wolf {
16545aba42fSKevin Wolf     BDRVQcowState *s = bs->opaque;
1666583e3c7SKevin Wolf     uint64_t old_l2_offset;
167f4f0d391SKevin Wolf     uint64_t *l2_table;
168f4f0d391SKevin Wolf     int64_t l2_offset;
169c46e1167SKevin Wolf     int ret;
17045aba42fSKevin Wolf 
17145aba42fSKevin Wolf     old_l2_offset = s->l1_table[l1_index];
17245aba42fSKevin Wolf 
17345aba42fSKevin Wolf     /* allocate a new l2 entry */
17445aba42fSKevin Wolf 
175ed6ccf0fSKevin Wolf     l2_offset = qcow2_alloc_clusters(bs, s->l2_size * sizeof(uint64_t));
1765d757b56SKevin Wolf     if (l2_offset < 0) {
177c46e1167SKevin Wolf         return l2_offset;
1785d757b56SKevin Wolf     }
17929c1a730SKevin Wolf 
18029c1a730SKevin Wolf     ret = qcow2_cache_flush(bs, s->refcount_block_cache);
18129c1a730SKevin Wolf     if (ret < 0) {
18229c1a730SKevin Wolf         goto fail;
18329c1a730SKevin Wolf     }
18445aba42fSKevin Wolf 
18545aba42fSKevin Wolf     /* allocate a new entry in the l2 cache */
18645aba42fSKevin Wolf 
18729c1a730SKevin Wolf     ret = qcow2_cache_get_empty(bs, s->l2_table_cache, l2_offset, (void**) table);
18829c1a730SKevin Wolf     if (ret < 0) {
18929c1a730SKevin Wolf         return ret;
19029c1a730SKevin Wolf     }
19129c1a730SKevin Wolf 
19229c1a730SKevin Wolf     l2_table = *table;
19345aba42fSKevin Wolf 
19445aba42fSKevin Wolf     if (old_l2_offset == 0) {
19545aba42fSKevin Wolf         /* if there was no old l2 table, clear the new table */
19645aba42fSKevin Wolf         memset(l2_table, 0, s->l2_size * sizeof(uint64_t));
19745aba42fSKevin Wolf     } else {
19829c1a730SKevin Wolf         uint64_t* old_table;
19929c1a730SKevin Wolf 
20045aba42fSKevin Wolf         /* if there was an old l2 table, read it from the disk */
20166f82ceeSKevin Wolf         BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_COW_READ);
20229c1a730SKevin Wolf         ret = qcow2_cache_get(bs, s->l2_table_cache, old_l2_offset,
20329c1a730SKevin Wolf             (void**) &old_table);
20429c1a730SKevin Wolf         if (ret < 0) {
20529c1a730SKevin Wolf             goto fail;
20629c1a730SKevin Wolf         }
20729c1a730SKevin Wolf 
20829c1a730SKevin Wolf         memcpy(l2_table, old_table, s->cluster_size);
20929c1a730SKevin Wolf 
21029c1a730SKevin Wolf         ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &old_table);
211c46e1167SKevin Wolf         if (ret < 0) {
212175e1152SKevin Wolf             goto fail;
213c46e1167SKevin Wolf         }
21445aba42fSKevin Wolf     }
21529c1a730SKevin Wolf 
21645aba42fSKevin Wolf     /* write the l2 table to the file */
21766f82ceeSKevin Wolf     BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_WRITE);
21829c1a730SKevin Wolf 
21929c1a730SKevin Wolf     qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
22029c1a730SKevin Wolf     ret = qcow2_cache_flush(bs, s->l2_table_cache);
221c46e1167SKevin Wolf     if (ret < 0) {
222175e1152SKevin Wolf         goto fail;
223175e1152SKevin Wolf     }
224175e1152SKevin Wolf 
225175e1152SKevin Wolf     /* update the L1 entry */
226175e1152SKevin Wolf     s->l1_table[l1_index] = l2_offset | QCOW_OFLAG_COPIED;
227175e1152SKevin Wolf     ret = write_l1_entry(bs, l1_index);
228175e1152SKevin Wolf     if (ret < 0) {
229175e1152SKevin Wolf         goto fail;
230c46e1167SKevin Wolf     }
23145aba42fSKevin Wolf 
232c46e1167SKevin Wolf     *table = l2_table;
233c46e1167SKevin Wolf     return 0;
234175e1152SKevin Wolf 
235175e1152SKevin Wolf fail:
23629c1a730SKevin Wolf     qcow2_cache_put(bs, s->l2_table_cache, (void**) table);
23768dba0bfSKevin Wolf     s->l1_table[l1_index] = old_l2_offset;
238175e1152SKevin Wolf     return ret;
23945aba42fSKevin Wolf }
24045aba42fSKevin Wolf 
24145aba42fSKevin Wolf static int count_contiguous_clusters(uint64_t nb_clusters, int cluster_size,
24245aba42fSKevin Wolf         uint64_t *l2_table, uint64_t start, uint64_t mask)
24345aba42fSKevin Wolf {
24445aba42fSKevin Wolf     int i;
24545aba42fSKevin Wolf     uint64_t offset = be64_to_cpu(l2_table[0]) & ~mask;
24645aba42fSKevin Wolf 
24745aba42fSKevin Wolf     if (!offset)
24845aba42fSKevin Wolf         return 0;
24945aba42fSKevin Wolf 
25045aba42fSKevin Wolf     for (i = start; i < start + nb_clusters; i++)
25180ee15a6SKevin Wolf         if (offset + (uint64_t) i * cluster_size != (be64_to_cpu(l2_table[i]) & ~mask))
25245aba42fSKevin Wolf             break;
25345aba42fSKevin Wolf 
25445aba42fSKevin Wolf 	return (i - start);
25545aba42fSKevin Wolf }
25645aba42fSKevin Wolf 
25745aba42fSKevin Wolf static int count_contiguous_free_clusters(uint64_t nb_clusters, uint64_t *l2_table)
25845aba42fSKevin Wolf {
25945aba42fSKevin Wolf     int i = 0;
26045aba42fSKevin Wolf 
26145aba42fSKevin Wolf     while(nb_clusters-- && l2_table[i] == 0)
26245aba42fSKevin Wolf         i++;
26345aba42fSKevin Wolf 
26445aba42fSKevin Wolf     return i;
26545aba42fSKevin Wolf }
26645aba42fSKevin Wolf 
26745aba42fSKevin Wolf /* The crypt function is compatible with the linux cryptoloop
26845aba42fSKevin Wolf    algorithm for < 4 GB images. NOTE: out_buf == in_buf is
26945aba42fSKevin Wolf    supported */
270ed6ccf0fSKevin Wolf void qcow2_encrypt_sectors(BDRVQcowState *s, int64_t sector_num,
27145aba42fSKevin Wolf                            uint8_t *out_buf, const uint8_t *in_buf,
27245aba42fSKevin Wolf                            int nb_sectors, int enc,
27345aba42fSKevin Wolf                            const AES_KEY *key)
27445aba42fSKevin Wolf {
27545aba42fSKevin Wolf     union {
27645aba42fSKevin Wolf         uint64_t ll[2];
27745aba42fSKevin Wolf         uint8_t b[16];
27845aba42fSKevin Wolf     } ivec;
27945aba42fSKevin Wolf     int i;
28045aba42fSKevin Wolf 
28145aba42fSKevin Wolf     for(i = 0; i < nb_sectors; i++) {
28245aba42fSKevin Wolf         ivec.ll[0] = cpu_to_le64(sector_num);
28345aba42fSKevin Wolf         ivec.ll[1] = 0;
28445aba42fSKevin Wolf         AES_cbc_encrypt(in_buf, out_buf, 512, key,
28545aba42fSKevin Wolf                         ivec.b, enc);
28645aba42fSKevin Wolf         sector_num++;
28745aba42fSKevin Wolf         in_buf += 512;
28845aba42fSKevin Wolf         out_buf += 512;
28945aba42fSKevin Wolf     }
29045aba42fSKevin Wolf }
29145aba42fSKevin Wolf 
29245aba42fSKevin Wolf 
2937c80ab3fSJes Sorensen static int qcow2_read(BlockDriverState *bs, int64_t sector_num,
29472ecf02dSKevin Wolf                       uint8_t *buf, int nb_sectors)
29545aba42fSKevin Wolf {
29645aba42fSKevin Wolf     BDRVQcowState *s = bs->opaque;
29745aba42fSKevin Wolf     int ret, index_in_cluster, n, n1;
29845aba42fSKevin Wolf     uint64_t cluster_offset;
299bd28f835SKevin Wolf     struct iovec iov;
300bd28f835SKevin Wolf     QEMUIOVector qiov;
30145aba42fSKevin Wolf 
30245aba42fSKevin Wolf     while (nb_sectors > 0) {
30345aba42fSKevin Wolf         n = nb_sectors;
3041c46efaaSKevin Wolf 
3051c46efaaSKevin Wolf         ret = qcow2_get_cluster_offset(bs, sector_num << 9, &n,
3061c46efaaSKevin Wolf             &cluster_offset);
3071c46efaaSKevin Wolf         if (ret < 0) {
3081c46efaaSKevin Wolf             return ret;
3091c46efaaSKevin Wolf         }
3101c46efaaSKevin Wolf 
31145aba42fSKevin Wolf         index_in_cluster = sector_num & (s->cluster_sectors - 1);
31245aba42fSKevin Wolf         if (!cluster_offset) {
31345aba42fSKevin Wolf             if (bs->backing_hd) {
31445aba42fSKevin Wolf                 /* read from the base image */
315bd28f835SKevin Wolf                 iov.iov_base = buf;
316bd28f835SKevin Wolf                 iov.iov_len = n * 512;
317bd28f835SKevin Wolf                 qemu_iovec_init_external(&qiov, &iov, 1);
318bd28f835SKevin Wolf 
319bd28f835SKevin Wolf                 n1 = qcow2_backing_read1(bs->backing_hd, &qiov, sector_num, n);
32045aba42fSKevin Wolf                 if (n1 > 0) {
32166f82ceeSKevin Wolf                     BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING);
32245aba42fSKevin Wolf                     ret = bdrv_read(bs->backing_hd, sector_num, buf, n1);
32345aba42fSKevin Wolf                     if (ret < 0)
32445aba42fSKevin Wolf                         return -1;
32545aba42fSKevin Wolf                 }
32645aba42fSKevin Wolf             } else {
32745aba42fSKevin Wolf                 memset(buf, 0, 512 * n);
32845aba42fSKevin Wolf             }
32945aba42fSKevin Wolf         } else if (cluster_offset & QCOW_OFLAG_COMPRESSED) {
33066f82ceeSKevin Wolf             if (qcow2_decompress_cluster(bs, cluster_offset) < 0)
33145aba42fSKevin Wolf                 return -1;
33245aba42fSKevin Wolf             memcpy(buf, s->cluster_cache + index_in_cluster * 512, 512 * n);
33345aba42fSKevin Wolf         } else {
33466f82ceeSKevin Wolf             BLKDBG_EVENT(bs->file, BLKDBG_READ);
33566f82ceeSKevin Wolf             ret = bdrv_pread(bs->file, cluster_offset + index_in_cluster * 512, buf, n * 512);
33645aba42fSKevin Wolf             if (ret != n * 512)
33745aba42fSKevin Wolf                 return -1;
33845aba42fSKevin Wolf             if (s->crypt_method) {
339ed6ccf0fSKevin Wolf                 qcow2_encrypt_sectors(s, sector_num, buf, buf, n, 0,
34045aba42fSKevin Wolf                                 &s->aes_decrypt_key);
34145aba42fSKevin Wolf             }
34245aba42fSKevin Wolf         }
34345aba42fSKevin Wolf         nb_sectors -= n;
34445aba42fSKevin Wolf         sector_num += n;
34545aba42fSKevin Wolf         buf += n * 512;
34645aba42fSKevin Wolf     }
34745aba42fSKevin Wolf     return 0;
34845aba42fSKevin Wolf }
34945aba42fSKevin Wolf 
35045aba42fSKevin Wolf static int copy_sectors(BlockDriverState *bs, uint64_t start_sect,
35145aba42fSKevin Wolf                         uint64_t cluster_offset, int n_start, int n_end)
35245aba42fSKevin Wolf {
35345aba42fSKevin Wolf     BDRVQcowState *s = bs->opaque;
35445aba42fSKevin Wolf     int n, ret;
35545aba42fSKevin Wolf 
35645aba42fSKevin Wolf     n = n_end - n_start;
35745aba42fSKevin Wolf     if (n <= 0)
35845aba42fSKevin Wolf         return 0;
35966f82ceeSKevin Wolf     BLKDBG_EVENT(bs->file, BLKDBG_COW_READ);
3607c80ab3fSJes Sorensen     ret = qcow2_read(bs, start_sect + n_start, s->cluster_data, n);
36145aba42fSKevin Wolf     if (ret < 0)
36245aba42fSKevin Wolf         return ret;
36345aba42fSKevin Wolf     if (s->crypt_method) {
364ed6ccf0fSKevin Wolf         qcow2_encrypt_sectors(s, start_sect + n_start,
36545aba42fSKevin Wolf                         s->cluster_data,
36645aba42fSKevin Wolf                         s->cluster_data, n, 1,
36745aba42fSKevin Wolf                         &s->aes_encrypt_key);
36845aba42fSKevin Wolf     }
36966f82ceeSKevin Wolf     BLKDBG_EVENT(bs->file, BLKDBG_COW_WRITE);
3709f8e668eSKevin Wolf     ret = bdrv_write(bs->file, (cluster_offset >> 9) + n_start,
37145aba42fSKevin Wolf         s->cluster_data, n);
37245aba42fSKevin Wolf     if (ret < 0)
37345aba42fSKevin Wolf         return ret;
37445aba42fSKevin Wolf     return 0;
37545aba42fSKevin Wolf }
37645aba42fSKevin Wolf 
37745aba42fSKevin Wolf 
37845aba42fSKevin Wolf /*
37945aba42fSKevin Wolf  * get_cluster_offset
38045aba42fSKevin Wolf  *
3811c46efaaSKevin Wolf  * For a given offset of the disk image, find the cluster offset in
3821c46efaaSKevin Wolf  * qcow2 file. The offset is stored in *cluster_offset.
38345aba42fSKevin Wolf  *
384d57237f2SDevin Nakamura  * on entry, *num is the number of contiguous sectors we'd like to
38545aba42fSKevin Wolf  * access following offset.
38645aba42fSKevin Wolf  *
387d57237f2SDevin Nakamura  * on exit, *num is the number of contiguous sectors we can read.
38845aba42fSKevin Wolf  *
3891c46efaaSKevin Wolf  * Return 0, if the offset is found
3901c46efaaSKevin Wolf  * Return -errno, otherwise.
39145aba42fSKevin Wolf  *
39245aba42fSKevin Wolf  */
39345aba42fSKevin Wolf 
3941c46efaaSKevin Wolf int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
3951c46efaaSKevin Wolf     int *num, uint64_t *cluster_offset)
39645aba42fSKevin Wolf {
39745aba42fSKevin Wolf     BDRVQcowState *s = bs->opaque;
39880ee15a6SKevin Wolf     unsigned int l1_index, l2_index;
3991c46efaaSKevin Wolf     uint64_t l2_offset, *l2_table;
40045aba42fSKevin Wolf     int l1_bits, c;
40180ee15a6SKevin Wolf     unsigned int index_in_cluster, nb_clusters;
40280ee15a6SKevin Wolf     uint64_t nb_available, nb_needed;
40355c17e98SKevin Wolf     int ret;
40445aba42fSKevin Wolf 
40545aba42fSKevin Wolf     index_in_cluster = (offset >> 9) & (s->cluster_sectors - 1);
40645aba42fSKevin Wolf     nb_needed = *num + index_in_cluster;
40745aba42fSKevin Wolf 
40845aba42fSKevin Wolf     l1_bits = s->l2_bits + s->cluster_bits;
40945aba42fSKevin Wolf 
41045aba42fSKevin Wolf     /* compute how many bytes there are between the offset and
41145aba42fSKevin Wolf      * the end of the l1 entry
41245aba42fSKevin Wolf      */
41345aba42fSKevin Wolf 
41480ee15a6SKevin Wolf     nb_available = (1ULL << l1_bits) - (offset & ((1ULL << l1_bits) - 1));
41545aba42fSKevin Wolf 
41645aba42fSKevin Wolf     /* compute the number of available sectors */
41745aba42fSKevin Wolf 
41845aba42fSKevin Wolf     nb_available = (nb_available >> 9) + index_in_cluster;
41945aba42fSKevin Wolf 
42045aba42fSKevin Wolf     if (nb_needed > nb_available) {
42145aba42fSKevin Wolf         nb_needed = nb_available;
42245aba42fSKevin Wolf     }
42345aba42fSKevin Wolf 
4241c46efaaSKevin Wolf     *cluster_offset = 0;
42545aba42fSKevin Wolf 
42645aba42fSKevin Wolf     /* seek the the l2 offset in the l1 table */
42745aba42fSKevin Wolf 
42845aba42fSKevin Wolf     l1_index = offset >> l1_bits;
42945aba42fSKevin Wolf     if (l1_index >= s->l1_size)
43045aba42fSKevin Wolf         goto out;
43145aba42fSKevin Wolf 
43245aba42fSKevin Wolf     l2_offset = s->l1_table[l1_index];
43345aba42fSKevin Wolf 
43445aba42fSKevin Wolf     /* seek the l2 table of the given l2 offset */
43545aba42fSKevin Wolf 
43645aba42fSKevin Wolf     if (!l2_offset)
43745aba42fSKevin Wolf         goto out;
43845aba42fSKevin Wolf 
43945aba42fSKevin Wolf     /* load the l2 table in memory */
44045aba42fSKevin Wolf 
44145aba42fSKevin Wolf     l2_offset &= ~QCOW_OFLAG_COPIED;
44255c17e98SKevin Wolf     ret = l2_load(bs, l2_offset, &l2_table);
44355c17e98SKevin Wolf     if (ret < 0) {
44455c17e98SKevin Wolf         return ret;
4451c46efaaSKevin Wolf     }
44645aba42fSKevin Wolf 
44745aba42fSKevin Wolf     /* find the cluster offset for the given disk offset */
44845aba42fSKevin Wolf 
44945aba42fSKevin Wolf     l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
4501c46efaaSKevin Wolf     *cluster_offset = be64_to_cpu(l2_table[l2_index]);
45145aba42fSKevin Wolf     nb_clusters = size_to_clusters(s, nb_needed << 9);
45245aba42fSKevin Wolf 
4531c46efaaSKevin Wolf     if (!*cluster_offset) {
45445aba42fSKevin Wolf         /* how many empty clusters ? */
45545aba42fSKevin Wolf         c = count_contiguous_free_clusters(nb_clusters, &l2_table[l2_index]);
45645aba42fSKevin Wolf     } else {
45745aba42fSKevin Wolf         /* how many allocated clusters ? */
45845aba42fSKevin Wolf         c = count_contiguous_clusters(nb_clusters, s->cluster_size,
45945aba42fSKevin Wolf                 &l2_table[l2_index], 0, QCOW_OFLAG_COPIED);
46045aba42fSKevin Wolf     }
46145aba42fSKevin Wolf 
46229c1a730SKevin Wolf     qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
46329c1a730SKevin Wolf 
46445aba42fSKevin Wolf    nb_available = (c * s->cluster_sectors);
46545aba42fSKevin Wolf out:
46645aba42fSKevin Wolf     if (nb_available > nb_needed)
46745aba42fSKevin Wolf         nb_available = nb_needed;
46845aba42fSKevin Wolf 
46945aba42fSKevin Wolf     *num = nb_available - index_in_cluster;
47045aba42fSKevin Wolf 
4711c46efaaSKevin Wolf     *cluster_offset &=~QCOW_OFLAG_COPIED;
4721c46efaaSKevin Wolf     return 0;
47345aba42fSKevin Wolf }
47445aba42fSKevin Wolf 
47545aba42fSKevin Wolf /*
47645aba42fSKevin Wolf  * get_cluster_table
47745aba42fSKevin Wolf  *
47845aba42fSKevin Wolf  * for a given disk offset, load (and allocate if needed)
47945aba42fSKevin Wolf  * the l2 table.
48045aba42fSKevin Wolf  *
48145aba42fSKevin Wolf  * the l2 table offset in the qcow2 file and the cluster index
48245aba42fSKevin Wolf  * in the l2 table are given to the caller.
48345aba42fSKevin Wolf  *
4841e3e8f1aSKevin Wolf  * Returns 0 on success, -errno in failure case
48545aba42fSKevin Wolf  */
48645aba42fSKevin Wolf static int get_cluster_table(BlockDriverState *bs, uint64_t offset,
48745aba42fSKevin Wolf                              uint64_t **new_l2_table,
48845aba42fSKevin Wolf                              uint64_t *new_l2_offset,
48945aba42fSKevin Wolf                              int *new_l2_index)
49045aba42fSKevin Wolf {
49145aba42fSKevin Wolf     BDRVQcowState *s = bs->opaque;
49280ee15a6SKevin Wolf     unsigned int l1_index, l2_index;
493c46e1167SKevin Wolf     uint64_t l2_offset;
494c46e1167SKevin Wolf     uint64_t *l2_table = NULL;
49580ee15a6SKevin Wolf     int ret;
49645aba42fSKevin Wolf 
49745aba42fSKevin Wolf     /* seek the the l2 offset in the l1 table */
49845aba42fSKevin Wolf 
49945aba42fSKevin Wolf     l1_index = offset >> (s->l2_bits + s->cluster_bits);
50045aba42fSKevin Wolf     if (l1_index >= s->l1_size) {
50172893756SStefan Hajnoczi         ret = qcow2_grow_l1_table(bs, l1_index + 1, false);
5021e3e8f1aSKevin Wolf         if (ret < 0) {
5031e3e8f1aSKevin Wolf             return ret;
5041e3e8f1aSKevin Wolf         }
50545aba42fSKevin Wolf     }
50645aba42fSKevin Wolf     l2_offset = s->l1_table[l1_index];
50745aba42fSKevin Wolf 
50845aba42fSKevin Wolf     /* seek the l2 table of the given l2 offset */
50945aba42fSKevin Wolf 
51045aba42fSKevin Wolf     if (l2_offset & QCOW_OFLAG_COPIED) {
51145aba42fSKevin Wolf         /* load the l2 table in memory */
51245aba42fSKevin Wolf         l2_offset &= ~QCOW_OFLAG_COPIED;
51355c17e98SKevin Wolf         ret = l2_load(bs, l2_offset, &l2_table);
51455c17e98SKevin Wolf         if (ret < 0) {
51555c17e98SKevin Wolf             return ret;
5161e3e8f1aSKevin Wolf         }
51745aba42fSKevin Wolf     } else {
51816fde5f2SKevin Wolf         /* First allocate a new L2 table (and do COW if needed) */
519c46e1167SKevin Wolf         ret = l2_allocate(bs, l1_index, &l2_table);
520c46e1167SKevin Wolf         if (ret < 0) {
521c46e1167SKevin Wolf             return ret;
5221e3e8f1aSKevin Wolf         }
52316fde5f2SKevin Wolf 
52416fde5f2SKevin Wolf         /* Then decrease the refcount of the old table */
52516fde5f2SKevin Wolf         if (l2_offset) {
52616fde5f2SKevin Wolf             qcow2_free_clusters(bs, l2_offset, s->l2_size * sizeof(uint64_t));
52716fde5f2SKevin Wolf         }
52845aba42fSKevin Wolf         l2_offset = s->l1_table[l1_index] & ~QCOW_OFLAG_COPIED;
52945aba42fSKevin Wolf     }
53045aba42fSKevin Wolf 
53145aba42fSKevin Wolf     /* find the cluster offset for the given disk offset */
53245aba42fSKevin Wolf 
53345aba42fSKevin Wolf     l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
53445aba42fSKevin Wolf 
53545aba42fSKevin Wolf     *new_l2_table = l2_table;
53645aba42fSKevin Wolf     *new_l2_offset = l2_offset;
53745aba42fSKevin Wolf     *new_l2_index = l2_index;
53845aba42fSKevin Wolf 
5391e3e8f1aSKevin Wolf     return 0;
54045aba42fSKevin Wolf }
54145aba42fSKevin Wolf 
54245aba42fSKevin Wolf /*
54345aba42fSKevin Wolf  * alloc_compressed_cluster_offset
54445aba42fSKevin Wolf  *
54545aba42fSKevin Wolf  * For a given offset of the disk image, return cluster offset in
54645aba42fSKevin Wolf  * qcow2 file.
54745aba42fSKevin Wolf  *
54845aba42fSKevin Wolf  * If the offset is not found, allocate a new compressed cluster.
54945aba42fSKevin Wolf  *
55045aba42fSKevin Wolf  * Return the cluster offset if successful,
55145aba42fSKevin Wolf  * Return 0, otherwise.
55245aba42fSKevin Wolf  *
55345aba42fSKevin Wolf  */
55445aba42fSKevin Wolf 
555ed6ccf0fSKevin Wolf uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
55645aba42fSKevin Wolf                                                uint64_t offset,
55745aba42fSKevin Wolf                                                int compressed_size)
55845aba42fSKevin Wolf {
55945aba42fSKevin Wolf     BDRVQcowState *s = bs->opaque;
56045aba42fSKevin Wolf     int l2_index, ret;
561f4f0d391SKevin Wolf     uint64_t l2_offset, *l2_table;
562f4f0d391SKevin Wolf     int64_t cluster_offset;
56345aba42fSKevin Wolf     int nb_csectors;
56445aba42fSKevin Wolf 
56545aba42fSKevin Wolf     ret = get_cluster_table(bs, offset, &l2_table, &l2_offset, &l2_index);
5661e3e8f1aSKevin Wolf     if (ret < 0) {
56745aba42fSKevin Wolf         return 0;
5681e3e8f1aSKevin Wolf     }
56945aba42fSKevin Wolf 
57045aba42fSKevin Wolf     cluster_offset = be64_to_cpu(l2_table[l2_index]);
57145aba42fSKevin Wolf     if (cluster_offset & QCOW_OFLAG_COPIED)
57245aba42fSKevin Wolf         return cluster_offset & ~QCOW_OFLAG_COPIED;
57345aba42fSKevin Wolf 
57445aba42fSKevin Wolf     if (cluster_offset)
575ed6ccf0fSKevin Wolf         qcow2_free_any_clusters(bs, cluster_offset, 1);
57645aba42fSKevin Wolf 
577ed6ccf0fSKevin Wolf     cluster_offset = qcow2_alloc_bytes(bs, compressed_size);
5785d757b56SKevin Wolf     if (cluster_offset < 0) {
57929c1a730SKevin Wolf         qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
5805d757b56SKevin Wolf         return 0;
5815d757b56SKevin Wolf     }
5825d757b56SKevin Wolf 
58345aba42fSKevin Wolf     nb_csectors = ((cluster_offset + compressed_size - 1) >> 9) -
58445aba42fSKevin Wolf                   (cluster_offset >> 9);
58545aba42fSKevin Wolf 
58645aba42fSKevin Wolf     cluster_offset |= QCOW_OFLAG_COMPRESSED |
58745aba42fSKevin Wolf                       ((uint64_t)nb_csectors << s->csize_shift);
58845aba42fSKevin Wolf 
58945aba42fSKevin Wolf     /* update L2 table */
59045aba42fSKevin Wolf 
59145aba42fSKevin Wolf     /* compressed clusters never have the copied flag */
59245aba42fSKevin Wolf 
59366f82ceeSKevin Wolf     BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE_COMPRESSED);
59429c1a730SKevin Wolf     qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
59545aba42fSKevin Wolf     l2_table[l2_index] = cpu_to_be64(cluster_offset);
59629c1a730SKevin Wolf     ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
59729c1a730SKevin Wolf     if (ret < 0) {
59845aba42fSKevin Wolf         return 0;
59929c1a730SKevin Wolf     }
60045aba42fSKevin Wolf 
60145aba42fSKevin Wolf     return cluster_offset;
60245aba42fSKevin Wolf }
60345aba42fSKevin Wolf 
604148da7eaSKevin Wolf int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
60545aba42fSKevin Wolf {
60645aba42fSKevin Wolf     BDRVQcowState *s = bs->opaque;
60745aba42fSKevin Wolf     int i, j = 0, l2_index, ret;
60845aba42fSKevin Wolf     uint64_t *old_cluster, start_sect, l2_offset, *l2_table;
609148da7eaSKevin Wolf     uint64_t cluster_offset = m->cluster_offset;
61029c1a730SKevin Wolf     bool cow = false;
61145aba42fSKevin Wolf 
61245aba42fSKevin Wolf     if (m->nb_clusters == 0)
61345aba42fSKevin Wolf         return 0;
61445aba42fSKevin Wolf 
6157267c094SAnthony Liguori     old_cluster = g_malloc(m->nb_clusters * sizeof(uint64_t));
61645aba42fSKevin Wolf 
61745aba42fSKevin Wolf     /* copy content of unmodified sectors */
61845aba42fSKevin Wolf     start_sect = (m->offset & ~(s->cluster_size - 1)) >> 9;
61945aba42fSKevin Wolf     if (m->n_start) {
62029c1a730SKevin Wolf         cow = true;
62145aba42fSKevin Wolf         ret = copy_sectors(bs, start_sect, cluster_offset, 0, m->n_start);
62245aba42fSKevin Wolf         if (ret < 0)
62345aba42fSKevin Wolf             goto err;
62445aba42fSKevin Wolf     }
62545aba42fSKevin Wolf 
62645aba42fSKevin Wolf     if (m->nb_available & (s->cluster_sectors - 1)) {
62745aba42fSKevin Wolf         uint64_t end = m->nb_available & ~(uint64_t)(s->cluster_sectors - 1);
62829c1a730SKevin Wolf         cow = true;
62945aba42fSKevin Wolf         ret = copy_sectors(bs, start_sect + end, cluster_offset + (end << 9),
63045aba42fSKevin Wolf                 m->nb_available - end, s->cluster_sectors);
63145aba42fSKevin Wolf         if (ret < 0)
63245aba42fSKevin Wolf             goto err;
63345aba42fSKevin Wolf     }
63445aba42fSKevin Wolf 
63529c1a730SKevin Wolf     /*
63629c1a730SKevin Wolf      * Update L2 table.
63729c1a730SKevin Wolf      *
63829c1a730SKevin Wolf      * Before we update the L2 table to actually point to the new cluster, we
63929c1a730SKevin Wolf      * need to be sure that the refcounts have been increased and COW was
64029c1a730SKevin Wolf      * handled.
64129c1a730SKevin Wolf      */
64229c1a730SKevin Wolf     if (cow) {
6433de0a294SKevin Wolf         qcow2_cache_depends_on_flush(s->l2_table_cache);
64429c1a730SKevin Wolf     }
64529c1a730SKevin Wolf 
64629c1a730SKevin Wolf     qcow2_cache_set_dependency(bs, s->l2_table_cache, s->refcount_block_cache);
6471e3e8f1aSKevin Wolf     ret = get_cluster_table(bs, m->offset, &l2_table, &l2_offset, &l2_index);
6481e3e8f1aSKevin Wolf     if (ret < 0) {
64945aba42fSKevin Wolf         goto err;
6501e3e8f1aSKevin Wolf     }
65129c1a730SKevin Wolf     qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
65245aba42fSKevin Wolf 
65345aba42fSKevin Wolf     for (i = 0; i < m->nb_clusters; i++) {
65445aba42fSKevin Wolf         /* if two concurrent writes happen to the same unallocated cluster
65545aba42fSKevin Wolf 	 * each write allocates separate cluster and writes data concurrently.
65645aba42fSKevin Wolf 	 * The first one to complete updates l2 table with pointer to its
65745aba42fSKevin Wolf 	 * cluster the second one has to do RMW (which is done above by
65845aba42fSKevin Wolf 	 * copy_sectors()), update l2 table with its cluster pointer and free
65945aba42fSKevin Wolf 	 * old cluster. This is what this loop does */
66045aba42fSKevin Wolf         if(l2_table[l2_index + i] != 0)
66145aba42fSKevin Wolf             old_cluster[j++] = l2_table[l2_index + i];
66245aba42fSKevin Wolf 
66345aba42fSKevin Wolf         l2_table[l2_index + i] = cpu_to_be64((cluster_offset +
66445aba42fSKevin Wolf                     (i << s->cluster_bits)) | QCOW_OFLAG_COPIED);
66545aba42fSKevin Wolf      }
66645aba42fSKevin Wolf 
6679f8e668eSKevin Wolf 
66829c1a730SKevin Wolf     ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
669c835d00fSKevin Wolf     if (ret < 0) {
67045aba42fSKevin Wolf         goto err;
6714c1612d9SKevin Wolf     }
67245aba42fSKevin Wolf 
6737ec5e6a4SKevin Wolf     /*
6747ec5e6a4SKevin Wolf      * If this was a COW, we need to decrease the refcount of the old cluster.
6757ec5e6a4SKevin Wolf      * Also flush bs->file to get the right order for L2 and refcount update.
6767ec5e6a4SKevin Wolf      */
6777ec5e6a4SKevin Wolf     if (j != 0) {
6787ec5e6a4SKevin Wolf         for (i = 0; i < j; i++) {
679ed6ccf0fSKevin Wolf             qcow2_free_any_clusters(bs,
680ed6ccf0fSKevin Wolf                 be64_to_cpu(old_cluster[i]) & ~QCOW_OFLAG_COPIED, 1);
6817ec5e6a4SKevin Wolf         }
6827ec5e6a4SKevin Wolf     }
68345aba42fSKevin Wolf 
68445aba42fSKevin Wolf     ret = 0;
68545aba42fSKevin Wolf err:
6867267c094SAnthony Liguori     g_free(old_cluster);
68745aba42fSKevin Wolf     return ret;
68845aba42fSKevin Wolf  }
68945aba42fSKevin Wolf 
69045aba42fSKevin Wolf /*
69145aba42fSKevin Wolf  * alloc_cluster_offset
69245aba42fSKevin Wolf  *
693148da7eaSKevin Wolf  * For a given offset of the disk image, return cluster offset in qcow2 file.
69445aba42fSKevin Wolf  * If the offset is not found, allocate a new cluster.
69545aba42fSKevin Wolf  *
696148da7eaSKevin Wolf  * If the cluster was already allocated, m->nb_clusters is set to 0,
697*a7912369SFrediano Ziglio  * other fields in m are meaningless.
69845aba42fSKevin Wolf  *
699148da7eaSKevin Wolf  * If the cluster is newly allocated, m->nb_clusters is set to the number of
70068d100e9SKevin Wolf  * contiguous clusters that have been allocated. In this case, the other
70168d100e9SKevin Wolf  * fields of m are valid and contain information about the first allocated
70268d100e9SKevin Wolf  * cluster.
703148da7eaSKevin Wolf  *
70468d100e9SKevin Wolf  * If the request conflicts with another write request in flight, the coroutine
70568d100e9SKevin Wolf  * is queued and will be reentered when the dependency has completed.
706148da7eaSKevin Wolf  *
707148da7eaSKevin Wolf  * Return 0 on success and -errno in error cases
70845aba42fSKevin Wolf  */
709f4f0d391SKevin Wolf int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
710f4f0d391SKevin Wolf     int n_start, int n_end, int *num, QCowL2Meta *m)
71145aba42fSKevin Wolf {
71245aba42fSKevin Wolf     BDRVQcowState *s = bs->opaque;
71345aba42fSKevin Wolf     int l2_index, ret;
7145d757b56SKevin Wolf     uint64_t l2_offset, *l2_table;
7155d757b56SKevin Wolf     int64_t cluster_offset;
71680ee15a6SKevin Wolf     unsigned int nb_clusters, i = 0;
717f214978aSKevin Wolf     QCowL2Meta *old_alloc;
71845aba42fSKevin Wolf 
71945aba42fSKevin Wolf     ret = get_cluster_table(bs, offset, &l2_table, &l2_offset, &l2_index);
7201e3e8f1aSKevin Wolf     if (ret < 0) {
721148da7eaSKevin Wolf         return ret;
7221e3e8f1aSKevin Wolf     }
72345aba42fSKevin Wolf 
72468d100e9SKevin Wolf again:
72545aba42fSKevin Wolf     nb_clusters = size_to_clusters(s, n_end << 9);
72645aba42fSKevin Wolf 
72745aba42fSKevin Wolf     nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);
72845aba42fSKevin Wolf 
72945aba42fSKevin Wolf     cluster_offset = be64_to_cpu(l2_table[l2_index]);
73045aba42fSKevin Wolf 
73145aba42fSKevin Wolf     /* We keep all QCOW_OFLAG_COPIED clusters */
73245aba42fSKevin Wolf 
73345aba42fSKevin Wolf     if (cluster_offset & QCOW_OFLAG_COPIED) {
73445aba42fSKevin Wolf         nb_clusters = count_contiguous_clusters(nb_clusters, s->cluster_size,
73545aba42fSKevin Wolf                 &l2_table[l2_index], 0, 0);
73645aba42fSKevin Wolf 
73745aba42fSKevin Wolf         cluster_offset &= ~QCOW_OFLAG_COPIED;
73845aba42fSKevin Wolf         m->nb_clusters = 0;
73945aba42fSKevin Wolf 
74045aba42fSKevin Wolf         goto out;
74145aba42fSKevin Wolf     }
74245aba42fSKevin Wolf 
74345aba42fSKevin Wolf     /* for the moment, multiple compressed clusters are not managed */
74445aba42fSKevin Wolf 
74545aba42fSKevin Wolf     if (cluster_offset & QCOW_OFLAG_COMPRESSED)
74645aba42fSKevin Wolf         nb_clusters = 1;
74745aba42fSKevin Wolf 
74845aba42fSKevin Wolf     /* how many available clusters ? */
74945aba42fSKevin Wolf 
75045aba42fSKevin Wolf     while (i < nb_clusters) {
75145aba42fSKevin Wolf         i += count_contiguous_clusters(nb_clusters - i, s->cluster_size,
75245aba42fSKevin Wolf                 &l2_table[l2_index], i, 0);
7534805bb66SKevin Wolf         if ((i >= nb_clusters) || be64_to_cpu(l2_table[l2_index + i])) {
75445aba42fSKevin Wolf             break;
7554805bb66SKevin Wolf         }
75645aba42fSKevin Wolf 
75745aba42fSKevin Wolf         i += count_contiguous_free_clusters(nb_clusters - i,
75845aba42fSKevin Wolf                 &l2_table[l2_index + i]);
7594805bb66SKevin Wolf         if (i >= nb_clusters) {
7604805bb66SKevin Wolf             break;
7614805bb66SKevin Wolf         }
76245aba42fSKevin Wolf 
76345aba42fSKevin Wolf         cluster_offset = be64_to_cpu(l2_table[l2_index + i]);
76445aba42fSKevin Wolf 
76545aba42fSKevin Wolf         if ((cluster_offset & QCOW_OFLAG_COPIED) ||
76645aba42fSKevin Wolf                 (cluster_offset & QCOW_OFLAG_COMPRESSED))
76745aba42fSKevin Wolf             break;
76845aba42fSKevin Wolf     }
7694805bb66SKevin Wolf     assert(i <= nb_clusters);
77045aba42fSKevin Wolf     nb_clusters = i;
77145aba42fSKevin Wolf 
772f214978aSKevin Wolf     /*
773f214978aSKevin Wolf      * Check if there already is an AIO write request in flight which allocates
774f214978aSKevin Wolf      * the same cluster. In this case we need to wait until the previous
775f214978aSKevin Wolf      * request has completed and updated the L2 table accordingly.
776f214978aSKevin Wolf      */
77772cf2d4fSBlue Swirl     QLIST_FOREACH(old_alloc, &s->cluster_allocs, next_in_flight) {
778f214978aSKevin Wolf 
779f214978aSKevin Wolf         uint64_t end_offset = offset + nb_clusters * s->cluster_size;
780f214978aSKevin Wolf         uint64_t old_offset = old_alloc->offset;
781f214978aSKevin Wolf         uint64_t old_end_offset = old_alloc->offset +
782f214978aSKevin Wolf             old_alloc->nb_clusters * s->cluster_size;
783f214978aSKevin Wolf 
784f214978aSKevin Wolf         if (end_offset < old_offset || offset > old_end_offset) {
785f214978aSKevin Wolf             /* No intersection */
786f214978aSKevin Wolf         } else {
787f214978aSKevin Wolf             if (offset < old_offset) {
788f214978aSKevin Wolf                 /* Stop at the start of a running allocation */
789f214978aSKevin Wolf                 nb_clusters = (old_offset - offset) >> s->cluster_bits;
790f214978aSKevin Wolf             } else {
791f214978aSKevin Wolf                 nb_clusters = 0;
792f214978aSKevin Wolf             }
793f214978aSKevin Wolf 
794f214978aSKevin Wolf             if (nb_clusters == 0) {
79568d100e9SKevin Wolf                 /* Wait for the dependency to complete. We need to recheck
79668d100e9SKevin Wolf                  * the free/allocated clusters when we continue. */
79768d100e9SKevin Wolf                 qemu_co_mutex_unlock(&s->lock);
79868d100e9SKevin Wolf                 qemu_co_queue_wait(&old_alloc->dependent_requests);
79968d100e9SKevin Wolf                 qemu_co_mutex_lock(&s->lock);
80068d100e9SKevin Wolf                 goto again;
801f214978aSKevin Wolf             }
802f214978aSKevin Wolf         }
803f214978aSKevin Wolf     }
804f214978aSKevin Wolf 
805f214978aSKevin Wolf     if (!nb_clusters) {
806f214978aSKevin Wolf         abort();
807f214978aSKevin Wolf     }
808f214978aSKevin Wolf 
80972cf2d4fSBlue Swirl     QLIST_INSERT_HEAD(&s->cluster_allocs, m, next_in_flight);
810f214978aSKevin Wolf 
81145aba42fSKevin Wolf     /* allocate a new cluster */
81245aba42fSKevin Wolf 
813ed6ccf0fSKevin Wolf     cluster_offset = qcow2_alloc_clusters(bs, nb_clusters * s->cluster_size);
8145d757b56SKevin Wolf     if (cluster_offset < 0) {
81529c1a730SKevin Wolf         ret = cluster_offset;
81629c1a730SKevin Wolf         goto fail;
8175d757b56SKevin Wolf     }
81845aba42fSKevin Wolf 
81945aba42fSKevin Wolf     /* save info needed for meta data update */
82045aba42fSKevin Wolf     m->offset = offset;
82145aba42fSKevin Wolf     m->n_start = n_start;
82245aba42fSKevin Wolf     m->nb_clusters = nb_clusters;
82345aba42fSKevin Wolf 
82445aba42fSKevin Wolf out:
82529c1a730SKevin Wolf     ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
82629c1a730SKevin Wolf     if (ret < 0) {
8279e2a3701SKevin Wolf         goto fail_put;
82829c1a730SKevin Wolf     }
82929c1a730SKevin Wolf 
83045aba42fSKevin Wolf     m->nb_available = MIN(nb_clusters << (s->cluster_bits - 9), n_end);
831148da7eaSKevin Wolf     m->cluster_offset = cluster_offset;
83245aba42fSKevin Wolf 
83345aba42fSKevin Wolf     *num = m->nb_available - n_start;
83445aba42fSKevin Wolf 
835148da7eaSKevin Wolf     return 0;
83629c1a730SKevin Wolf 
83729c1a730SKevin Wolf fail:
83829c1a730SKevin Wolf     qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
8399e2a3701SKevin Wolf fail_put:
8409e2a3701SKevin Wolf     QLIST_REMOVE(m, next_in_flight);
84129c1a730SKevin Wolf     return ret;
84245aba42fSKevin Wolf }
84345aba42fSKevin Wolf 
84445aba42fSKevin Wolf static int decompress_buffer(uint8_t *out_buf, int out_buf_size,
84545aba42fSKevin Wolf                              const uint8_t *buf, int buf_size)
84645aba42fSKevin Wolf {
84745aba42fSKevin Wolf     z_stream strm1, *strm = &strm1;
84845aba42fSKevin Wolf     int ret, out_len;
84945aba42fSKevin Wolf 
85045aba42fSKevin Wolf     memset(strm, 0, sizeof(*strm));
85145aba42fSKevin Wolf 
85245aba42fSKevin Wolf     strm->next_in = (uint8_t *)buf;
85345aba42fSKevin Wolf     strm->avail_in = buf_size;
85445aba42fSKevin Wolf     strm->next_out = out_buf;
85545aba42fSKevin Wolf     strm->avail_out = out_buf_size;
85645aba42fSKevin Wolf 
85745aba42fSKevin Wolf     ret = inflateInit2(strm, -12);
85845aba42fSKevin Wolf     if (ret != Z_OK)
85945aba42fSKevin Wolf         return -1;
86045aba42fSKevin Wolf     ret = inflate(strm, Z_FINISH);
86145aba42fSKevin Wolf     out_len = strm->next_out - out_buf;
86245aba42fSKevin Wolf     if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) ||
86345aba42fSKevin Wolf         out_len != out_buf_size) {
86445aba42fSKevin Wolf         inflateEnd(strm);
86545aba42fSKevin Wolf         return -1;
86645aba42fSKevin Wolf     }
86745aba42fSKevin Wolf     inflateEnd(strm);
86845aba42fSKevin Wolf     return 0;
86945aba42fSKevin Wolf }
87045aba42fSKevin Wolf 
87166f82ceeSKevin Wolf int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset)
87245aba42fSKevin Wolf {
87366f82ceeSKevin Wolf     BDRVQcowState *s = bs->opaque;
87445aba42fSKevin Wolf     int ret, csize, nb_csectors, sector_offset;
87545aba42fSKevin Wolf     uint64_t coffset;
87645aba42fSKevin Wolf 
87745aba42fSKevin Wolf     coffset = cluster_offset & s->cluster_offset_mask;
87845aba42fSKevin Wolf     if (s->cluster_cache_offset != coffset) {
87945aba42fSKevin Wolf         nb_csectors = ((cluster_offset >> s->csize_shift) & s->csize_mask) + 1;
88045aba42fSKevin Wolf         sector_offset = coffset & 511;
88145aba42fSKevin Wolf         csize = nb_csectors * 512 - sector_offset;
88266f82ceeSKevin Wolf         BLKDBG_EVENT(bs->file, BLKDBG_READ_COMPRESSED);
88366f82ceeSKevin Wolf         ret = bdrv_read(bs->file, coffset >> 9, s->cluster_data, nb_csectors);
88445aba42fSKevin Wolf         if (ret < 0) {
8858af36488SKevin Wolf             return ret;
88645aba42fSKevin Wolf         }
88745aba42fSKevin Wolf         if (decompress_buffer(s->cluster_cache, s->cluster_size,
88845aba42fSKevin Wolf                               s->cluster_data + sector_offset, csize) < 0) {
8898af36488SKevin Wolf             return -EIO;
89045aba42fSKevin Wolf         }
89145aba42fSKevin Wolf         s->cluster_cache_offset = coffset;
89245aba42fSKevin Wolf     }
89345aba42fSKevin Wolf     return 0;
89445aba42fSKevin Wolf }
8955ea929e3SKevin Wolf 
8965ea929e3SKevin Wolf /*
8975ea929e3SKevin Wolf  * This discards as many clusters of nb_clusters as possible at once (i.e.
8985ea929e3SKevin Wolf  * all clusters in the same L2 table) and returns the number of discarded
8995ea929e3SKevin Wolf  * clusters.
9005ea929e3SKevin Wolf  */
9015ea929e3SKevin Wolf static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
9025ea929e3SKevin Wolf     unsigned int nb_clusters)
9035ea929e3SKevin Wolf {
9045ea929e3SKevin Wolf     BDRVQcowState *s = bs->opaque;
9055ea929e3SKevin Wolf     uint64_t l2_offset, *l2_table;
9065ea929e3SKevin Wolf     int l2_index;
9075ea929e3SKevin Wolf     int ret;
9085ea929e3SKevin Wolf     int i;
9095ea929e3SKevin Wolf 
9105ea929e3SKevin Wolf     ret = get_cluster_table(bs, offset, &l2_table, &l2_offset, &l2_index);
9115ea929e3SKevin Wolf     if (ret < 0) {
9125ea929e3SKevin Wolf         return ret;
9135ea929e3SKevin Wolf     }
9145ea929e3SKevin Wolf 
9155ea929e3SKevin Wolf     /* Limit nb_clusters to one L2 table */
9165ea929e3SKevin Wolf     nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);
9175ea929e3SKevin Wolf 
9185ea929e3SKevin Wolf     for (i = 0; i < nb_clusters; i++) {
9195ea929e3SKevin Wolf         uint64_t old_offset;
9205ea929e3SKevin Wolf 
9215ea929e3SKevin Wolf         old_offset = be64_to_cpu(l2_table[l2_index + i]);
9225ea929e3SKevin Wolf         old_offset &= ~QCOW_OFLAG_COPIED;
9235ea929e3SKevin Wolf 
9245ea929e3SKevin Wolf         if (old_offset == 0) {
9255ea929e3SKevin Wolf             continue;
9265ea929e3SKevin Wolf         }
9275ea929e3SKevin Wolf 
9285ea929e3SKevin Wolf         /* First remove L2 entries */
9295ea929e3SKevin Wolf         qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
9305ea929e3SKevin Wolf         l2_table[l2_index + i] = cpu_to_be64(0);
9315ea929e3SKevin Wolf 
9325ea929e3SKevin Wolf         /* Then decrease the refcount */
9335ea929e3SKevin Wolf         qcow2_free_any_clusters(bs, old_offset, 1);
9345ea929e3SKevin Wolf     }
9355ea929e3SKevin Wolf 
9365ea929e3SKevin Wolf     ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
9375ea929e3SKevin Wolf     if (ret < 0) {
9385ea929e3SKevin Wolf         return ret;
9395ea929e3SKevin Wolf     }
9405ea929e3SKevin Wolf 
9415ea929e3SKevin Wolf     return nb_clusters;
9425ea929e3SKevin Wolf }
9435ea929e3SKevin Wolf 
9445ea929e3SKevin Wolf int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,
9455ea929e3SKevin Wolf     int nb_sectors)
9465ea929e3SKevin Wolf {
9475ea929e3SKevin Wolf     BDRVQcowState *s = bs->opaque;
9485ea929e3SKevin Wolf     uint64_t end_offset;
9495ea929e3SKevin Wolf     unsigned int nb_clusters;
9505ea929e3SKevin Wolf     int ret;
9515ea929e3SKevin Wolf 
9525ea929e3SKevin Wolf     end_offset = offset + (nb_sectors << BDRV_SECTOR_BITS);
9535ea929e3SKevin Wolf 
9545ea929e3SKevin Wolf     /* Round start up and end down */
9555ea929e3SKevin Wolf     offset = align_offset(offset, s->cluster_size);
9565ea929e3SKevin Wolf     end_offset &= ~(s->cluster_size - 1);
9575ea929e3SKevin Wolf 
9585ea929e3SKevin Wolf     if (offset > end_offset) {
9595ea929e3SKevin Wolf         return 0;
9605ea929e3SKevin Wolf     }
9615ea929e3SKevin Wolf 
9625ea929e3SKevin Wolf     nb_clusters = size_to_clusters(s, end_offset - offset);
9635ea929e3SKevin Wolf 
9645ea929e3SKevin Wolf     /* Each L2 table is handled by its own loop iteration */
9655ea929e3SKevin Wolf     while (nb_clusters > 0) {
9665ea929e3SKevin Wolf         ret = discard_single_l2(bs, offset, nb_clusters);
9675ea929e3SKevin Wolf         if (ret < 0) {
9685ea929e3SKevin Wolf             return ret;
9695ea929e3SKevin Wolf         }
9705ea929e3SKevin Wolf 
9715ea929e3SKevin Wolf         nb_clusters -= ret;
9725ea929e3SKevin Wolf         offset += (ret * s->cluster_size);
9735ea929e3SKevin Wolf     }
9745ea929e3SKevin Wolf 
9755ea929e3SKevin Wolf     return 0;
9765ea929e3SKevin Wolf }
977