xref: /qemu/block/qcow.c (revision b6d5066d)
1019d6b8fSAnthony Liguori /*
2019d6b8fSAnthony Liguori  * Block driver for the QCOW format
3019d6b8fSAnthony Liguori  *
4019d6b8fSAnthony Liguori  * Copyright (c) 2004-2006 Fabrice Bellard
5019d6b8fSAnthony Liguori  *
6019d6b8fSAnthony Liguori  * Permission is hereby granted, free of charge, to any person obtaining a copy
7019d6b8fSAnthony Liguori  * of this software and associated documentation files (the "Software"), to deal
8019d6b8fSAnthony Liguori  * in the Software without restriction, including without limitation the rights
9019d6b8fSAnthony Liguori  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10019d6b8fSAnthony Liguori  * copies of the Software, and to permit persons to whom the Software is
11019d6b8fSAnthony Liguori  * furnished to do so, subject to the following conditions:
12019d6b8fSAnthony Liguori  *
13019d6b8fSAnthony Liguori  * The above copyright notice and this permission notice shall be included in
14019d6b8fSAnthony Liguori  * all copies or substantial portions of the Software.
15019d6b8fSAnthony Liguori  *
16019d6b8fSAnthony Liguori  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17019d6b8fSAnthony Liguori  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18019d6b8fSAnthony Liguori  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19019d6b8fSAnthony Liguori  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20019d6b8fSAnthony Liguori  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21019d6b8fSAnthony Liguori  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22019d6b8fSAnthony Liguori  * THE SOFTWARE.
23019d6b8fSAnthony Liguori  */
24019d6b8fSAnthony Liguori #include "qemu-common.h"
25737e150eSPaolo Bonzini #include "block/block_int.h"
261de7afc9SPaolo Bonzini #include "qemu/module.h"
27019d6b8fSAnthony Liguori #include <zlib.h>
28753d9b82SAurelien Jarno #include "qemu/aes.h"
29caf71f86SPaolo Bonzini #include "migration/migration.h"
30019d6b8fSAnthony Liguori 
31019d6b8fSAnthony Liguori /**************************************************************/
32019d6b8fSAnthony Liguori /* QEMU COW block driver with compression and encryption support */
33019d6b8fSAnthony Liguori 
34019d6b8fSAnthony Liguori #define QCOW_MAGIC (('Q' << 24) | ('F' << 16) | ('I' << 8) | 0xfb)
35019d6b8fSAnthony Liguori #define QCOW_VERSION 1
36019d6b8fSAnthony Liguori 
37019d6b8fSAnthony Liguori #define QCOW_CRYPT_NONE 0
38019d6b8fSAnthony Liguori #define QCOW_CRYPT_AES  1
39019d6b8fSAnthony Liguori 
40019d6b8fSAnthony Liguori #define QCOW_OFLAG_COMPRESSED (1LL << 63)
41019d6b8fSAnthony Liguori 
42019d6b8fSAnthony Liguori typedef struct QCowHeader {
43019d6b8fSAnthony Liguori     uint32_t magic;
44019d6b8fSAnthony Liguori     uint32_t version;
45019d6b8fSAnthony Liguori     uint64_t backing_file_offset;
46019d6b8fSAnthony Liguori     uint32_t backing_file_size;
47019d6b8fSAnthony Liguori     uint32_t mtime;
48019d6b8fSAnthony Liguori     uint64_t size; /* in bytes */
49019d6b8fSAnthony Liguori     uint8_t cluster_bits;
50019d6b8fSAnthony Liguori     uint8_t l2_bits;
51019d6b8fSAnthony Liguori     uint32_t crypt_method;
52019d6b8fSAnthony Liguori     uint64_t l1_table_offset;
53019d6b8fSAnthony Liguori } QCowHeader;
54019d6b8fSAnthony Liguori 
55019d6b8fSAnthony Liguori #define L2_CACHE_SIZE 16
56019d6b8fSAnthony Liguori 
57019d6b8fSAnthony Liguori typedef struct BDRVQcowState {
58019d6b8fSAnthony Liguori     int cluster_bits;
59019d6b8fSAnthony Liguori     int cluster_size;
60019d6b8fSAnthony Liguori     int cluster_sectors;
61019d6b8fSAnthony Liguori     int l2_bits;
62019d6b8fSAnthony Liguori     int l2_size;
63019d6b8fSAnthony Liguori     int l1_size;
64019d6b8fSAnthony Liguori     uint64_t cluster_offset_mask;
65019d6b8fSAnthony Liguori     uint64_t l1_table_offset;
66019d6b8fSAnthony Liguori     uint64_t *l1_table;
67019d6b8fSAnthony Liguori     uint64_t *l2_cache;
68019d6b8fSAnthony Liguori     uint64_t l2_cache_offsets[L2_CACHE_SIZE];
69019d6b8fSAnthony Liguori     uint32_t l2_cache_counts[L2_CACHE_SIZE];
70019d6b8fSAnthony Liguori     uint8_t *cluster_cache;
71019d6b8fSAnthony Liguori     uint8_t *cluster_data;
72019d6b8fSAnthony Liguori     uint64_t cluster_cache_offset;
73019d6b8fSAnthony Liguori     uint32_t crypt_method; /* current crypt method, 0 if no key yet */
74019d6b8fSAnthony Liguori     uint32_t crypt_method_header;
75019d6b8fSAnthony Liguori     AES_KEY aes_encrypt_key;
76019d6b8fSAnthony Liguori     AES_KEY aes_decrypt_key;
7752b8eb60SKevin Wolf     CoMutex lock;
78fd9f102cSKevin Wolf     Error *migration_blocker;
79019d6b8fSAnthony Liguori } BDRVQcowState;
80019d6b8fSAnthony Liguori 
8166f82ceeSKevin Wolf static int decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset);
82019d6b8fSAnthony Liguori 
83019d6b8fSAnthony Liguori static int qcow_probe(const uint8_t *buf, int buf_size, const char *filename)
84019d6b8fSAnthony Liguori {
85019d6b8fSAnthony Liguori     const QCowHeader *cow_header = (const void *)buf;
86019d6b8fSAnthony Liguori 
87019d6b8fSAnthony Liguori     if (buf_size >= sizeof(QCowHeader) &&
88019d6b8fSAnthony Liguori         be32_to_cpu(cow_header->magic) == QCOW_MAGIC &&
89019d6b8fSAnthony Liguori         be32_to_cpu(cow_header->version) == QCOW_VERSION)
90019d6b8fSAnthony Liguori         return 100;
91019d6b8fSAnthony Liguori     else
92019d6b8fSAnthony Liguori         return 0;
93019d6b8fSAnthony Liguori }
94019d6b8fSAnthony Liguori 
95015a1036SMax Reitz static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
96015a1036SMax Reitz                      Error **errp)
97019d6b8fSAnthony Liguori {
98019d6b8fSAnthony Liguori     BDRVQcowState *s = bs->opaque;
9984b0ec02SLi Zhi Hui     int len, i, shift, ret;
100019d6b8fSAnthony Liguori     QCowHeader header;
101019d6b8fSAnthony Liguori 
10284b0ec02SLi Zhi Hui     ret = bdrv_pread(bs->file, 0, &header, sizeof(header));
10384b0ec02SLi Zhi Hui     if (ret < 0) {
104019d6b8fSAnthony Liguori         goto fail;
10584b0ec02SLi Zhi Hui     }
106019d6b8fSAnthony Liguori     be32_to_cpus(&header.magic);
107019d6b8fSAnthony Liguori     be32_to_cpus(&header.version);
108019d6b8fSAnthony Liguori     be64_to_cpus(&header.backing_file_offset);
109019d6b8fSAnthony Liguori     be32_to_cpus(&header.backing_file_size);
110019d6b8fSAnthony Liguori     be32_to_cpus(&header.mtime);
111019d6b8fSAnthony Liguori     be64_to_cpus(&header.size);
112019d6b8fSAnthony Liguori     be32_to_cpus(&header.crypt_method);
113019d6b8fSAnthony Liguori     be64_to_cpus(&header.l1_table_offset);
114019d6b8fSAnthony Liguori 
11584b0ec02SLi Zhi Hui     if (header.magic != QCOW_MAGIC) {
11615bac0d5SStefan Weil         ret = -EMEDIUMTYPE;
117019d6b8fSAnthony Liguori         goto fail;
11884b0ec02SLi Zhi Hui     }
11984b0ec02SLi Zhi Hui     if (header.version != QCOW_VERSION) {
12084b0ec02SLi Zhi Hui         char version[64];
12184b0ec02SLi Zhi Hui         snprintf(version, sizeof(version), "QCOW version %d", header.version);
122*b6d5066dSPaolo Bonzini         error_set(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
12384b0ec02SLi Zhi Hui                   bs->device_name, "qcow", version);
12484b0ec02SLi Zhi Hui         ret = -ENOTSUP;
125019d6b8fSAnthony Liguori         goto fail;
12684b0ec02SLi Zhi Hui     }
12784b0ec02SLi Zhi Hui 
12884b0ec02SLi Zhi Hui     if (header.size <= 1 || header.cluster_bits < 9) {
129*b6d5066dSPaolo Bonzini         error_setg(errp, "invalid value in qcow header");
13084b0ec02SLi Zhi Hui         ret = -EINVAL;
131019d6b8fSAnthony Liguori         goto fail;
13284b0ec02SLi Zhi Hui     }
13384b0ec02SLi Zhi Hui     if (header.crypt_method > QCOW_CRYPT_AES) {
134*b6d5066dSPaolo Bonzini         error_setg(errp, "invalid encryption method in qcow header");
13584b0ec02SLi Zhi Hui         ret = -EINVAL;
13684b0ec02SLi Zhi Hui         goto fail;
13784b0ec02SLi Zhi Hui     }
138019d6b8fSAnthony Liguori     s->crypt_method_header = header.crypt_method;
13984b0ec02SLi Zhi Hui     if (s->crypt_method_header) {
140019d6b8fSAnthony Liguori         bs->encrypted = 1;
14184b0ec02SLi Zhi Hui     }
142019d6b8fSAnthony Liguori     s->cluster_bits = header.cluster_bits;
143019d6b8fSAnthony Liguori     s->cluster_size = 1 << s->cluster_bits;
144019d6b8fSAnthony Liguori     s->cluster_sectors = 1 << (s->cluster_bits - 9);
145019d6b8fSAnthony Liguori     s->l2_bits = header.l2_bits;
146019d6b8fSAnthony Liguori     s->l2_size = 1 << s->l2_bits;
147019d6b8fSAnthony Liguori     bs->total_sectors = header.size / 512;
148019d6b8fSAnthony Liguori     s->cluster_offset_mask = (1LL << (63 - s->cluster_bits)) - 1;
149019d6b8fSAnthony Liguori 
150019d6b8fSAnthony Liguori     /* read the level 1 table */
151019d6b8fSAnthony Liguori     shift = s->cluster_bits + s->l2_bits;
152019d6b8fSAnthony Liguori     s->l1_size = (header.size + (1LL << shift) - 1) >> shift;
153019d6b8fSAnthony Liguori 
154019d6b8fSAnthony Liguori     s->l1_table_offset = header.l1_table_offset;
1557267c094SAnthony Liguori     s->l1_table = g_malloc(s->l1_size * sizeof(uint64_t));
15684b0ec02SLi Zhi Hui 
15784b0ec02SLi Zhi Hui     ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table,
15884b0ec02SLi Zhi Hui                s->l1_size * sizeof(uint64_t));
15984b0ec02SLi Zhi Hui     if (ret < 0) {
160019d6b8fSAnthony Liguori         goto fail;
16184b0ec02SLi Zhi Hui     }
16284b0ec02SLi Zhi Hui 
163019d6b8fSAnthony Liguori     for(i = 0;i < s->l1_size; i++) {
164019d6b8fSAnthony Liguori         be64_to_cpus(&s->l1_table[i]);
165019d6b8fSAnthony Liguori     }
166019d6b8fSAnthony Liguori     /* alloc L2 cache */
1677267c094SAnthony Liguori     s->l2_cache = g_malloc(s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
1687267c094SAnthony Liguori     s->cluster_cache = g_malloc(s->cluster_size);
1697267c094SAnthony Liguori     s->cluster_data = g_malloc(s->cluster_size);
170019d6b8fSAnthony Liguori     s->cluster_cache_offset = -1;
171019d6b8fSAnthony Liguori 
172019d6b8fSAnthony Liguori     /* read the backing file name */
173019d6b8fSAnthony Liguori     if (header.backing_file_offset != 0) {
174019d6b8fSAnthony Liguori         len = header.backing_file_size;
17584b0ec02SLi Zhi Hui         if (len > 1023) {
176019d6b8fSAnthony Liguori             len = 1023;
17784b0ec02SLi Zhi Hui         }
17884b0ec02SLi Zhi Hui         ret = bdrv_pread(bs->file, header.backing_file_offset,
17984b0ec02SLi Zhi Hui                    bs->backing_file, len);
18084b0ec02SLi Zhi Hui         if (ret < 0) {
181019d6b8fSAnthony Liguori             goto fail;
18284b0ec02SLi Zhi Hui         }
183019d6b8fSAnthony Liguori         bs->backing_file[len] = '\0';
184019d6b8fSAnthony Liguori     }
185de33b1f3SScott Wood 
186fd9f102cSKevin Wolf     /* Disable migration when qcow images are used */
187fd9f102cSKevin Wolf     error_set(&s->migration_blocker,
188fd9f102cSKevin Wolf               QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
189fd9f102cSKevin Wolf               "qcow", bs->device_name, "live migration");
190fd9f102cSKevin Wolf     migrate_add_blocker(s->migration_blocker);
191fd9f102cSKevin Wolf 
192de33b1f3SScott Wood     qemu_co_mutex_init(&s->lock);
193019d6b8fSAnthony Liguori     return 0;
194019d6b8fSAnthony Liguori 
195019d6b8fSAnthony Liguori  fail:
1967267c094SAnthony Liguori     g_free(s->l1_table);
1977267c094SAnthony Liguori     g_free(s->l2_cache);
1987267c094SAnthony Liguori     g_free(s->cluster_cache);
1997267c094SAnthony Liguori     g_free(s->cluster_data);
20084b0ec02SLi Zhi Hui     return ret;
201019d6b8fSAnthony Liguori }
202019d6b8fSAnthony Liguori 
203d177692eSJeff Cody 
204d177692eSJeff Cody /* We have nothing to do for QCOW reopen, stubs just return
205d177692eSJeff Cody  * success */
206d177692eSJeff Cody static int qcow_reopen_prepare(BDRVReopenState *state,
207d177692eSJeff Cody                                BlockReopenQueue *queue, Error **errp)
208d177692eSJeff Cody {
209d177692eSJeff Cody     return 0;
210d177692eSJeff Cody }
211d177692eSJeff Cody 
212019d6b8fSAnthony Liguori static int qcow_set_key(BlockDriverState *bs, const char *key)
213019d6b8fSAnthony Liguori {
214019d6b8fSAnthony Liguori     BDRVQcowState *s = bs->opaque;
215019d6b8fSAnthony Liguori     uint8_t keybuf[16];
216019d6b8fSAnthony Liguori     int len, i;
217019d6b8fSAnthony Liguori 
218019d6b8fSAnthony Liguori     memset(keybuf, 0, 16);
219019d6b8fSAnthony Liguori     len = strlen(key);
220019d6b8fSAnthony Liguori     if (len > 16)
221019d6b8fSAnthony Liguori         len = 16;
222019d6b8fSAnthony Liguori     /* XXX: we could compress the chars to 7 bits to increase
223019d6b8fSAnthony Liguori        entropy */
224019d6b8fSAnthony Liguori     for(i = 0;i < len;i++) {
225019d6b8fSAnthony Liguori         keybuf[i] = key[i];
226019d6b8fSAnthony Liguori     }
227019d6b8fSAnthony Liguori     s->crypt_method = s->crypt_method_header;
228019d6b8fSAnthony Liguori 
229019d6b8fSAnthony Liguori     if (AES_set_encrypt_key(keybuf, 128, &s->aes_encrypt_key) != 0)
230019d6b8fSAnthony Liguori         return -1;
231019d6b8fSAnthony Liguori     if (AES_set_decrypt_key(keybuf, 128, &s->aes_decrypt_key) != 0)
232019d6b8fSAnthony Liguori         return -1;
233019d6b8fSAnthony Liguori     return 0;
234019d6b8fSAnthony Liguori }
235019d6b8fSAnthony Liguori 
236019d6b8fSAnthony Liguori /* The crypt function is compatible with the linux cryptoloop
237019d6b8fSAnthony Liguori    algorithm for < 4 GB images. NOTE: out_buf == in_buf is
238019d6b8fSAnthony Liguori    supported */
239019d6b8fSAnthony Liguori static void encrypt_sectors(BDRVQcowState *s, int64_t sector_num,
240019d6b8fSAnthony Liguori                             uint8_t *out_buf, const uint8_t *in_buf,
241019d6b8fSAnthony Liguori                             int nb_sectors, int enc,
242019d6b8fSAnthony Liguori                             const AES_KEY *key)
243019d6b8fSAnthony Liguori {
244019d6b8fSAnthony Liguori     union {
245019d6b8fSAnthony Liguori         uint64_t ll[2];
246019d6b8fSAnthony Liguori         uint8_t b[16];
247019d6b8fSAnthony Liguori     } ivec;
248019d6b8fSAnthony Liguori     int i;
249019d6b8fSAnthony Liguori 
250019d6b8fSAnthony Liguori     for(i = 0; i < nb_sectors; i++) {
251019d6b8fSAnthony Liguori         ivec.ll[0] = cpu_to_le64(sector_num);
252019d6b8fSAnthony Liguori         ivec.ll[1] = 0;
253019d6b8fSAnthony Liguori         AES_cbc_encrypt(in_buf, out_buf, 512, key,
254019d6b8fSAnthony Liguori                         ivec.b, enc);
255019d6b8fSAnthony Liguori         sector_num++;
256019d6b8fSAnthony Liguori         in_buf += 512;
257019d6b8fSAnthony Liguori         out_buf += 512;
258019d6b8fSAnthony Liguori     }
259019d6b8fSAnthony Liguori }
260019d6b8fSAnthony Liguori 
261019d6b8fSAnthony Liguori /* 'allocate' is:
262019d6b8fSAnthony Liguori  *
263019d6b8fSAnthony Liguori  * 0 to not allocate.
264019d6b8fSAnthony Liguori  *
265019d6b8fSAnthony Liguori  * 1 to allocate a normal cluster (for sector indexes 'n_start' to
266019d6b8fSAnthony Liguori  * 'n_end')
267019d6b8fSAnthony Liguori  *
268019d6b8fSAnthony Liguori  * 2 to allocate a compressed cluster of size
269019d6b8fSAnthony Liguori  * 'compressed_size'. 'compressed_size' must be > 0 and <
270019d6b8fSAnthony Liguori  * cluster_size
271019d6b8fSAnthony Liguori  *
272019d6b8fSAnthony Liguori  * return 0 if not allocated.
273019d6b8fSAnthony Liguori  */
274019d6b8fSAnthony Liguori static uint64_t get_cluster_offset(BlockDriverState *bs,
275019d6b8fSAnthony Liguori                                    uint64_t offset, int allocate,
276019d6b8fSAnthony Liguori                                    int compressed_size,
277019d6b8fSAnthony Liguori                                    int n_start, int n_end)
278019d6b8fSAnthony Liguori {
279019d6b8fSAnthony Liguori     BDRVQcowState *s = bs->opaque;
280019d6b8fSAnthony Liguori     int min_index, i, j, l1_index, l2_index;
281019d6b8fSAnthony Liguori     uint64_t l2_offset, *l2_table, cluster_offset, tmp;
282019d6b8fSAnthony Liguori     uint32_t min_count;
283019d6b8fSAnthony Liguori     int new_l2_table;
284019d6b8fSAnthony Liguori 
285019d6b8fSAnthony Liguori     l1_index = offset >> (s->l2_bits + s->cluster_bits);
286019d6b8fSAnthony Liguori     l2_offset = s->l1_table[l1_index];
287019d6b8fSAnthony Liguori     new_l2_table = 0;
288019d6b8fSAnthony Liguori     if (!l2_offset) {
289019d6b8fSAnthony Liguori         if (!allocate)
290019d6b8fSAnthony Liguori             return 0;
291019d6b8fSAnthony Liguori         /* allocate a new l2 entry */
29266f82ceeSKevin Wolf         l2_offset = bdrv_getlength(bs->file);
293019d6b8fSAnthony Liguori         /* round to cluster size */
294019d6b8fSAnthony Liguori         l2_offset = (l2_offset + s->cluster_size - 1) & ~(s->cluster_size - 1);
295019d6b8fSAnthony Liguori         /* update the L1 entry */
296019d6b8fSAnthony Liguori         s->l1_table[l1_index] = l2_offset;
297019d6b8fSAnthony Liguori         tmp = cpu_to_be64(l2_offset);
2985e5557d9SKevin Wolf         if (bdrv_pwrite_sync(bs->file,
2995e5557d9SKevin Wolf                 s->l1_table_offset + l1_index * sizeof(tmp),
3005e5557d9SKevin Wolf                 &tmp, sizeof(tmp)) < 0)
301019d6b8fSAnthony Liguori             return 0;
302019d6b8fSAnthony Liguori         new_l2_table = 1;
303019d6b8fSAnthony Liguori     }
304019d6b8fSAnthony Liguori     for(i = 0; i < L2_CACHE_SIZE; i++) {
305019d6b8fSAnthony Liguori         if (l2_offset == s->l2_cache_offsets[i]) {
306019d6b8fSAnthony Liguori             /* increment the hit count */
307019d6b8fSAnthony Liguori             if (++s->l2_cache_counts[i] == 0xffffffff) {
308019d6b8fSAnthony Liguori                 for(j = 0; j < L2_CACHE_SIZE; j++) {
309019d6b8fSAnthony Liguori                     s->l2_cache_counts[j] >>= 1;
310019d6b8fSAnthony Liguori                 }
311019d6b8fSAnthony Liguori             }
312019d6b8fSAnthony Liguori             l2_table = s->l2_cache + (i << s->l2_bits);
313019d6b8fSAnthony Liguori             goto found;
314019d6b8fSAnthony Liguori         }
315019d6b8fSAnthony Liguori     }
316019d6b8fSAnthony Liguori     /* not found: load a new entry in the least used one */
317019d6b8fSAnthony Liguori     min_index = 0;
318019d6b8fSAnthony Liguori     min_count = 0xffffffff;
319019d6b8fSAnthony Liguori     for(i = 0; i < L2_CACHE_SIZE; i++) {
320019d6b8fSAnthony Liguori         if (s->l2_cache_counts[i] < min_count) {
321019d6b8fSAnthony Liguori             min_count = s->l2_cache_counts[i];
322019d6b8fSAnthony Liguori             min_index = i;
323019d6b8fSAnthony Liguori         }
324019d6b8fSAnthony Liguori     }
325019d6b8fSAnthony Liguori     l2_table = s->l2_cache + (min_index << s->l2_bits);
326019d6b8fSAnthony Liguori     if (new_l2_table) {
327019d6b8fSAnthony Liguori         memset(l2_table, 0, s->l2_size * sizeof(uint64_t));
3285e5557d9SKevin Wolf         if (bdrv_pwrite_sync(bs->file, l2_offset, l2_table,
3295e5557d9SKevin Wolf                 s->l2_size * sizeof(uint64_t)) < 0)
330019d6b8fSAnthony Liguori             return 0;
331019d6b8fSAnthony Liguori     } else {
33266f82ceeSKevin Wolf         if (bdrv_pread(bs->file, l2_offset, l2_table, s->l2_size * sizeof(uint64_t)) !=
333019d6b8fSAnthony Liguori             s->l2_size * sizeof(uint64_t))
334019d6b8fSAnthony Liguori             return 0;
335019d6b8fSAnthony Liguori     }
336019d6b8fSAnthony Liguori     s->l2_cache_offsets[min_index] = l2_offset;
337019d6b8fSAnthony Liguori     s->l2_cache_counts[min_index] = 1;
338019d6b8fSAnthony Liguori  found:
339019d6b8fSAnthony Liguori     l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
340019d6b8fSAnthony Liguori     cluster_offset = be64_to_cpu(l2_table[l2_index]);
341019d6b8fSAnthony Liguori     if (!cluster_offset ||
342019d6b8fSAnthony Liguori         ((cluster_offset & QCOW_OFLAG_COMPRESSED) && allocate == 1)) {
343019d6b8fSAnthony Liguori         if (!allocate)
344019d6b8fSAnthony Liguori             return 0;
345019d6b8fSAnthony Liguori         /* allocate a new cluster */
346019d6b8fSAnthony Liguori         if ((cluster_offset & QCOW_OFLAG_COMPRESSED) &&
347019d6b8fSAnthony Liguori             (n_end - n_start) < s->cluster_sectors) {
348019d6b8fSAnthony Liguori             /* if the cluster is already compressed, we must
349019d6b8fSAnthony Liguori                decompress it in the case it is not completely
350019d6b8fSAnthony Liguori                overwritten */
35166f82ceeSKevin Wolf             if (decompress_cluster(bs, cluster_offset) < 0)
352019d6b8fSAnthony Liguori                 return 0;
35366f82ceeSKevin Wolf             cluster_offset = bdrv_getlength(bs->file);
354019d6b8fSAnthony Liguori             cluster_offset = (cluster_offset + s->cluster_size - 1) &
355019d6b8fSAnthony Liguori                 ~(s->cluster_size - 1);
356019d6b8fSAnthony Liguori             /* write the cluster content */
35766f82ceeSKevin Wolf             if (bdrv_pwrite(bs->file, cluster_offset, s->cluster_cache, s->cluster_size) !=
358019d6b8fSAnthony Liguori                 s->cluster_size)
359019d6b8fSAnthony Liguori                 return -1;
360019d6b8fSAnthony Liguori         } else {
36166f82ceeSKevin Wolf             cluster_offset = bdrv_getlength(bs->file);
362019d6b8fSAnthony Liguori             if (allocate == 1) {
363019d6b8fSAnthony Liguori                 /* round to cluster size */
364019d6b8fSAnthony Liguori                 cluster_offset = (cluster_offset + s->cluster_size - 1) &
365019d6b8fSAnthony Liguori                     ~(s->cluster_size - 1);
36666f82ceeSKevin Wolf                 bdrv_truncate(bs->file, cluster_offset + s->cluster_size);
367019d6b8fSAnthony Liguori                 /* if encrypted, we must initialize the cluster
368019d6b8fSAnthony Liguori                    content which won't be written */
369019d6b8fSAnthony Liguori                 if (s->crypt_method &&
370019d6b8fSAnthony Liguori                     (n_end - n_start) < s->cluster_sectors) {
371019d6b8fSAnthony Liguori                     uint64_t start_sect;
372019d6b8fSAnthony Liguori                     start_sect = (offset & ~(s->cluster_size - 1)) >> 9;
373019d6b8fSAnthony Liguori                     memset(s->cluster_data + 512, 0x00, 512);
374019d6b8fSAnthony Liguori                     for(i = 0; i < s->cluster_sectors; i++) {
375019d6b8fSAnthony Liguori                         if (i < n_start || i >= n_end) {
376019d6b8fSAnthony Liguori                             encrypt_sectors(s, start_sect + i,
377019d6b8fSAnthony Liguori                                             s->cluster_data,
378019d6b8fSAnthony Liguori                                             s->cluster_data + 512, 1, 1,
379019d6b8fSAnthony Liguori                                             &s->aes_encrypt_key);
38066f82ceeSKevin Wolf                             if (bdrv_pwrite(bs->file, cluster_offset + i * 512,
381019d6b8fSAnthony Liguori                                             s->cluster_data, 512) != 512)
382019d6b8fSAnthony Liguori                                 return -1;
383019d6b8fSAnthony Liguori                         }
384019d6b8fSAnthony Liguori                     }
385019d6b8fSAnthony Liguori                 }
386019d6b8fSAnthony Liguori             } else if (allocate == 2) {
387019d6b8fSAnthony Liguori                 cluster_offset |= QCOW_OFLAG_COMPRESSED |
388019d6b8fSAnthony Liguori                     (uint64_t)compressed_size << (63 - s->cluster_bits);
389019d6b8fSAnthony Liguori             }
390019d6b8fSAnthony Liguori         }
391019d6b8fSAnthony Liguori         /* update L2 table */
392019d6b8fSAnthony Liguori         tmp = cpu_to_be64(cluster_offset);
393019d6b8fSAnthony Liguori         l2_table[l2_index] = tmp;
3945e5557d9SKevin Wolf         if (bdrv_pwrite_sync(bs->file, l2_offset + l2_index * sizeof(tmp),
3955e5557d9SKevin Wolf                 &tmp, sizeof(tmp)) < 0)
396019d6b8fSAnthony Liguori             return 0;
397019d6b8fSAnthony Liguori     }
398019d6b8fSAnthony Liguori     return cluster_offset;
399019d6b8fSAnthony Liguori }
400019d6b8fSAnthony Liguori 
401b6b8a333SPaolo Bonzini static int64_t coroutine_fn qcow_co_get_block_status(BlockDriverState *bs,
402f8a2e5e3SStefan Hajnoczi         int64_t sector_num, int nb_sectors, int *pnum)
403019d6b8fSAnthony Liguori {
404019d6b8fSAnthony Liguori     BDRVQcowState *s = bs->opaque;
405019d6b8fSAnthony Liguori     int index_in_cluster, n;
406019d6b8fSAnthony Liguori     uint64_t cluster_offset;
407019d6b8fSAnthony Liguori 
408f8a2e5e3SStefan Hajnoczi     qemu_co_mutex_lock(&s->lock);
409019d6b8fSAnthony Liguori     cluster_offset = get_cluster_offset(bs, sector_num << 9, 0, 0, 0, 0);
410f8a2e5e3SStefan Hajnoczi     qemu_co_mutex_unlock(&s->lock);
411019d6b8fSAnthony Liguori     index_in_cluster = sector_num & (s->cluster_sectors - 1);
412019d6b8fSAnthony Liguori     n = s->cluster_sectors - index_in_cluster;
413019d6b8fSAnthony Liguori     if (n > nb_sectors)
414019d6b8fSAnthony Liguori         n = nb_sectors;
415019d6b8fSAnthony Liguori     *pnum = n;
4164bc74be9SPaolo Bonzini     if (!cluster_offset) {
4174bc74be9SPaolo Bonzini         return 0;
4184bc74be9SPaolo Bonzini     }
4194bc74be9SPaolo Bonzini     if ((cluster_offset & QCOW_OFLAG_COMPRESSED) || s->crypt_method) {
4204bc74be9SPaolo Bonzini         return BDRV_BLOCK_DATA;
4214bc74be9SPaolo Bonzini     }
4224bc74be9SPaolo Bonzini     cluster_offset |= (index_in_cluster << BDRV_SECTOR_BITS);
4234bc74be9SPaolo Bonzini     return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | cluster_offset;
424019d6b8fSAnthony Liguori }
425019d6b8fSAnthony Liguori 
426019d6b8fSAnthony Liguori static int decompress_buffer(uint8_t *out_buf, int out_buf_size,
427019d6b8fSAnthony Liguori                              const uint8_t *buf, int buf_size)
428019d6b8fSAnthony Liguori {
429019d6b8fSAnthony Liguori     z_stream strm1, *strm = &strm1;
430019d6b8fSAnthony Liguori     int ret, out_len;
431019d6b8fSAnthony Liguori 
432019d6b8fSAnthony Liguori     memset(strm, 0, sizeof(*strm));
433019d6b8fSAnthony Liguori 
434019d6b8fSAnthony Liguori     strm->next_in = (uint8_t *)buf;
435019d6b8fSAnthony Liguori     strm->avail_in = buf_size;
436019d6b8fSAnthony Liguori     strm->next_out = out_buf;
437019d6b8fSAnthony Liguori     strm->avail_out = out_buf_size;
438019d6b8fSAnthony Liguori 
439019d6b8fSAnthony Liguori     ret = inflateInit2(strm, -12);
440019d6b8fSAnthony Liguori     if (ret != Z_OK)
441019d6b8fSAnthony Liguori         return -1;
442019d6b8fSAnthony Liguori     ret = inflate(strm, Z_FINISH);
443019d6b8fSAnthony Liguori     out_len = strm->next_out - out_buf;
444019d6b8fSAnthony Liguori     if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) ||
445019d6b8fSAnthony Liguori         out_len != out_buf_size) {
446019d6b8fSAnthony Liguori         inflateEnd(strm);
447019d6b8fSAnthony Liguori         return -1;
448019d6b8fSAnthony Liguori     }
449019d6b8fSAnthony Liguori     inflateEnd(strm);
450019d6b8fSAnthony Liguori     return 0;
451019d6b8fSAnthony Liguori }
452019d6b8fSAnthony Liguori 
45366f82ceeSKevin Wolf static int decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset)
454019d6b8fSAnthony Liguori {
45566f82ceeSKevin Wolf     BDRVQcowState *s = bs->opaque;
456019d6b8fSAnthony Liguori     int ret, csize;
457019d6b8fSAnthony Liguori     uint64_t coffset;
458019d6b8fSAnthony Liguori 
459019d6b8fSAnthony Liguori     coffset = cluster_offset & s->cluster_offset_mask;
460019d6b8fSAnthony Liguori     if (s->cluster_cache_offset != coffset) {
461019d6b8fSAnthony Liguori         csize = cluster_offset >> (63 - s->cluster_bits);
462019d6b8fSAnthony Liguori         csize &= (s->cluster_size - 1);
46366f82ceeSKevin Wolf         ret = bdrv_pread(bs->file, coffset, s->cluster_data, csize);
464019d6b8fSAnthony Liguori         if (ret != csize)
465019d6b8fSAnthony Liguori             return -1;
466019d6b8fSAnthony Liguori         if (decompress_buffer(s->cluster_cache, s->cluster_size,
467019d6b8fSAnthony Liguori                               s->cluster_data, csize) < 0) {
468019d6b8fSAnthony Liguori             return -1;
469019d6b8fSAnthony Liguori         }
470019d6b8fSAnthony Liguori         s->cluster_cache_offset = coffset;
471019d6b8fSAnthony Liguori     }
472019d6b8fSAnthony Liguori     return 0;
473019d6b8fSAnthony Liguori }
474019d6b8fSAnthony Liguori 
475a968168cSDong Xu Wang static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num,
47627deebe8SFrediano Ziglio                          int nb_sectors, QEMUIOVector *qiov)
477ad53089bSChristoph Hellwig {
478019d6b8fSAnthony Liguori     BDRVQcowState *s = bs->opaque;
479019d6b8fSAnthony Liguori     int index_in_cluster;
48027deebe8SFrediano Ziglio     int ret = 0, n;
48143ca85b5SFrediano Ziglio     uint64_t cluster_offset;
482430bbaaaSFrediano Ziglio     struct iovec hd_iov;
483430bbaaaSFrediano Ziglio     QEMUIOVector hd_qiov;
48427deebe8SFrediano Ziglio     uint8_t *buf;
48527deebe8SFrediano Ziglio     void *orig_buf;
486019d6b8fSAnthony Liguori 
48727deebe8SFrediano Ziglio     if (qiov->niov > 1) {
48827deebe8SFrediano Ziglio         buf = orig_buf = qemu_blockalign(bs, qiov->size);
48927deebe8SFrediano Ziglio     } else {
49027deebe8SFrediano Ziglio         orig_buf = NULL;
49127deebe8SFrediano Ziglio         buf = (uint8_t *)qiov->iov->iov_base;
492019d6b8fSAnthony Liguori     }
493019d6b8fSAnthony Liguori 
49427deebe8SFrediano Ziglio     qemu_co_mutex_lock(&s->lock);
49527deebe8SFrediano Ziglio 
49627deebe8SFrediano Ziglio     while (nb_sectors != 0) {
49743ca85b5SFrediano Ziglio         /* prepare next request */
49827deebe8SFrediano Ziglio         cluster_offset = get_cluster_offset(bs, sector_num << 9,
499019d6b8fSAnthony Liguori                                                  0, 0, 0, 0);
50027deebe8SFrediano Ziglio         index_in_cluster = sector_num & (s->cluster_sectors - 1);
501430bbaaaSFrediano Ziglio         n = s->cluster_sectors - index_in_cluster;
50227deebe8SFrediano Ziglio         if (n > nb_sectors) {
50327deebe8SFrediano Ziglio             n = nb_sectors;
504430bbaaaSFrediano Ziglio         }
505019d6b8fSAnthony Liguori 
506430bbaaaSFrediano Ziglio         if (!cluster_offset) {
507019d6b8fSAnthony Liguori             if (bs->backing_hd) {
508019d6b8fSAnthony Liguori                 /* read from the base image */
50927deebe8SFrediano Ziglio                 hd_iov.iov_base = (void *)buf;
510430bbaaaSFrediano Ziglio                 hd_iov.iov_len = n * 512;
511430bbaaaSFrediano Ziglio                 qemu_iovec_init_external(&hd_qiov, &hd_iov, 1);
51252b8eb60SKevin Wolf                 qemu_co_mutex_unlock(&s->lock);
51327deebe8SFrediano Ziglio                 ret = bdrv_co_readv(bs->backing_hd, sector_num,
514430bbaaaSFrediano Ziglio                                     n, &hd_qiov);
51552b8eb60SKevin Wolf                 qemu_co_mutex_lock(&s->lock);
51652b8eb60SKevin Wolf                 if (ret < 0) {
51727deebe8SFrediano Ziglio                     goto fail;
5185614c188SStefan Weil                 }
519019d6b8fSAnthony Liguori             } else {
520019d6b8fSAnthony Liguori                 /* Note: in this case, no need to wait */
52127deebe8SFrediano Ziglio                 memset(buf, 0, 512 * n);
522019d6b8fSAnthony Liguori             }
523430bbaaaSFrediano Ziglio         } else if (cluster_offset & QCOW_OFLAG_COMPRESSED) {
524019d6b8fSAnthony Liguori             /* add AIO support for compressed blocks ? */
525430bbaaaSFrediano Ziglio             if (decompress_cluster(bs, cluster_offset) < 0) {
52627deebe8SFrediano Ziglio                 goto fail;
5275614c188SStefan Weil             }
52827deebe8SFrediano Ziglio             memcpy(buf,
529430bbaaaSFrediano Ziglio                    s->cluster_cache + index_in_cluster * 512, 512 * n);
530019d6b8fSAnthony Liguori         } else {
531430bbaaaSFrediano Ziglio             if ((cluster_offset & 511) != 0) {
53227deebe8SFrediano Ziglio                 goto fail;
533019d6b8fSAnthony Liguori             }
53427deebe8SFrediano Ziglio             hd_iov.iov_base = (void *)buf;
535430bbaaaSFrediano Ziglio             hd_iov.iov_len = n * 512;
536430bbaaaSFrediano Ziglio             qemu_iovec_init_external(&hd_qiov, &hd_iov, 1);
53752b8eb60SKevin Wolf             qemu_co_mutex_unlock(&s->lock);
53852b8eb60SKevin Wolf             ret = bdrv_co_readv(bs->file,
539430bbaaaSFrediano Ziglio                                 (cluster_offset >> 9) + index_in_cluster,
540430bbaaaSFrediano Ziglio                                 n, &hd_qiov);
54152b8eb60SKevin Wolf             qemu_co_mutex_lock(&s->lock);
54252b8eb60SKevin Wolf             if (ret < 0) {
54327deebe8SFrediano Ziglio                 break;
544019d6b8fSAnthony Liguori             }
54543ca85b5SFrediano Ziglio             if (s->crypt_method) {
54627deebe8SFrediano Ziglio                 encrypt_sectors(s, sector_num, buf, buf,
54743ca85b5SFrediano Ziglio                                 n, 0,
54843ca85b5SFrediano Ziglio                                 &s->aes_decrypt_key);
54943ca85b5SFrediano Ziglio             }
55043ca85b5SFrediano Ziglio         }
55127deebe8SFrediano Ziglio         ret = 0;
55243ca85b5SFrediano Ziglio 
55327deebe8SFrediano Ziglio         nb_sectors -= n;
55427deebe8SFrediano Ziglio         sector_num += n;
55527deebe8SFrediano Ziglio         buf += n * 512;
55652b8eb60SKevin Wolf     }
557019d6b8fSAnthony Liguori 
55827deebe8SFrediano Ziglio done:
55952b8eb60SKevin Wolf     qemu_co_mutex_unlock(&s->lock);
56052b8eb60SKevin Wolf 
56127deebe8SFrediano Ziglio     if (qiov->niov > 1) {
56203396148SMichael Tokarev         qemu_iovec_from_buf(qiov, 0, orig_buf, qiov->size);
56327deebe8SFrediano Ziglio         qemu_vfree(orig_buf);
564019d6b8fSAnthony Liguori     }
56552b8eb60SKevin Wolf 
56652b8eb60SKevin Wolf     return ret;
56727deebe8SFrediano Ziglio 
56827deebe8SFrediano Ziglio fail:
56927deebe8SFrediano Ziglio     ret = -EIO;
57027deebe8SFrediano Ziglio     goto done;
571019d6b8fSAnthony Liguori }
572019d6b8fSAnthony Liguori 
573a968168cSDong Xu Wang static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num,
57427deebe8SFrediano Ziglio                           int nb_sectors, QEMUIOVector *qiov)
575019d6b8fSAnthony Liguori {
576019d6b8fSAnthony Liguori     BDRVQcowState *s = bs->opaque;
577019d6b8fSAnthony Liguori     int index_in_cluster;
578019d6b8fSAnthony Liguori     uint64_t cluster_offset;
579019d6b8fSAnthony Liguori     const uint8_t *src_buf;
58027deebe8SFrediano Ziglio     int ret = 0, n;
581430bbaaaSFrediano Ziglio     uint8_t *cluster_data = NULL;
582430bbaaaSFrediano Ziglio     struct iovec hd_iov;
583430bbaaaSFrediano Ziglio     QEMUIOVector hd_qiov;
58427deebe8SFrediano Ziglio     uint8_t *buf;
58527deebe8SFrediano Ziglio     void *orig_buf;
586019d6b8fSAnthony Liguori 
58727deebe8SFrediano Ziglio     s->cluster_cache_offset = -1; /* disable compressed cache */
58827deebe8SFrediano Ziglio 
58927deebe8SFrediano Ziglio     if (qiov->niov > 1) {
59027deebe8SFrediano Ziglio         buf = orig_buf = qemu_blockalign(bs, qiov->size);
591d5e6b161SMichael Tokarev         qemu_iovec_to_buf(qiov, 0, buf, qiov->size);
59227deebe8SFrediano Ziglio     } else {
59327deebe8SFrediano Ziglio         orig_buf = NULL;
59427deebe8SFrediano Ziglio         buf = (uint8_t *)qiov->iov->iov_base;
595019d6b8fSAnthony Liguori     }
596019d6b8fSAnthony Liguori 
59727deebe8SFrediano Ziglio     qemu_co_mutex_lock(&s->lock);
59827deebe8SFrediano Ziglio 
59927deebe8SFrediano Ziglio     while (nb_sectors != 0) {
60027deebe8SFrediano Ziglio 
60127deebe8SFrediano Ziglio         index_in_cluster = sector_num & (s->cluster_sectors - 1);
602430bbaaaSFrediano Ziglio         n = s->cluster_sectors - index_in_cluster;
60327deebe8SFrediano Ziglio         if (n > nb_sectors) {
60427deebe8SFrediano Ziglio             n = nb_sectors;
605430bbaaaSFrediano Ziglio         }
60627deebe8SFrediano Ziglio         cluster_offset = get_cluster_offset(bs, sector_num << 9, 1, 0,
607019d6b8fSAnthony Liguori                                             index_in_cluster,
608430bbaaaSFrediano Ziglio                                             index_in_cluster + n);
609019d6b8fSAnthony Liguori         if (!cluster_offset || (cluster_offset & 511) != 0) {
61027deebe8SFrediano Ziglio             ret = -EIO;
61127deebe8SFrediano Ziglio             break;
612019d6b8fSAnthony Liguori         }
613019d6b8fSAnthony Liguori         if (s->crypt_method) {
614430bbaaaSFrediano Ziglio             if (!cluster_data) {
615430bbaaaSFrediano Ziglio                 cluster_data = g_malloc0(s->cluster_size);
616019d6b8fSAnthony Liguori             }
61727deebe8SFrediano Ziglio             encrypt_sectors(s, sector_num, cluster_data, buf,
618430bbaaaSFrediano Ziglio                             n, 1, &s->aes_encrypt_key);
619430bbaaaSFrediano Ziglio             src_buf = cluster_data;
620019d6b8fSAnthony Liguori         } else {
62127deebe8SFrediano Ziglio             src_buf = buf;
622019d6b8fSAnthony Liguori         }
623019d6b8fSAnthony Liguori 
624430bbaaaSFrediano Ziglio         hd_iov.iov_base = (void *)src_buf;
625430bbaaaSFrediano Ziglio         hd_iov.iov_len = n * 512;
626430bbaaaSFrediano Ziglio         qemu_iovec_init_external(&hd_qiov, &hd_iov, 1);
62752b8eb60SKevin Wolf         qemu_co_mutex_unlock(&s->lock);
62852b8eb60SKevin Wolf         ret = bdrv_co_writev(bs->file,
629019d6b8fSAnthony Liguori                              (cluster_offset >> 9) + index_in_cluster,
630430bbaaaSFrediano Ziglio                              n, &hd_qiov);
63152b8eb60SKevin Wolf         qemu_co_mutex_lock(&s->lock);
63252b8eb60SKevin Wolf         if (ret < 0) {
63327deebe8SFrediano Ziglio             break;
6345614c188SStefan Weil         }
63527deebe8SFrediano Ziglio         ret = 0;
63643ca85b5SFrediano Ziglio 
63727deebe8SFrediano Ziglio         nb_sectors -= n;
63827deebe8SFrediano Ziglio         sector_num += n;
63927deebe8SFrediano Ziglio         buf += n * 512;
640019d6b8fSAnthony Liguori     }
64152b8eb60SKevin Wolf     qemu_co_mutex_unlock(&s->lock);
642019d6b8fSAnthony Liguori 
64327deebe8SFrediano Ziglio     if (qiov->niov > 1) {
64427deebe8SFrediano Ziglio         qemu_vfree(orig_buf);
645b11a24deSKevin Wolf     }
646add8d262SStefan Weil     g_free(cluster_data);
647b11a24deSKevin Wolf 
64852b8eb60SKevin Wolf     return ret;
649019d6b8fSAnthony Liguori }
650019d6b8fSAnthony Liguori 
651019d6b8fSAnthony Liguori static void qcow_close(BlockDriverState *bs)
652019d6b8fSAnthony Liguori {
653019d6b8fSAnthony Liguori     BDRVQcowState *s = bs->opaque;
654fd9f102cSKevin Wolf 
6557267c094SAnthony Liguori     g_free(s->l1_table);
6567267c094SAnthony Liguori     g_free(s->l2_cache);
6577267c094SAnthony Liguori     g_free(s->cluster_cache);
6587267c094SAnthony Liguori     g_free(s->cluster_data);
659fd9f102cSKevin Wolf 
660fd9f102cSKevin Wolf     migrate_del_blocker(s->migration_blocker);
661fd9f102cSKevin Wolf     error_free(s->migration_blocker);
662019d6b8fSAnthony Liguori }
663019d6b8fSAnthony Liguori 
664d5124c00SMax Reitz static int qcow_create(const char *filename, QEMUOptionParameter *options,
665d5124c00SMax Reitz                        Error **errp)
666019d6b8fSAnthony Liguori {
6672b16c9ffSLi Zhi Hui     int header_size, backing_filename_len, l1_size, shift, i;
668019d6b8fSAnthony Liguori     QCowHeader header;
6692b16c9ffSLi Zhi Hui     uint8_t *tmp;
6700e7e1989SKevin Wolf     int64_t total_size = 0;
6710e7e1989SKevin Wolf     const char *backing_file = NULL;
6720e7e1989SKevin Wolf     int flags = 0;
67334b5d2c6SMax Reitz     Error *local_err = NULL;
6743e1a8134SKirill A. Shutemov     int ret;
6752b16c9ffSLi Zhi Hui     BlockDriverState *qcow_bs;
6760e7e1989SKevin Wolf 
6770e7e1989SKevin Wolf     /* Read out options */
6780e7e1989SKevin Wolf     while (options && options->name) {
6790e7e1989SKevin Wolf         if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
6800e7e1989SKevin Wolf             total_size = options->value.n / 512;
6810e7e1989SKevin Wolf         } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
6820e7e1989SKevin Wolf             backing_file = options->value.s;
6830e7e1989SKevin Wolf         } else if (!strcmp(options->name, BLOCK_OPT_ENCRYPT)) {
6840e7e1989SKevin Wolf             flags |= options->value.n ? BLOCK_FLAG_ENCRYPT : 0;
6850e7e1989SKevin Wolf         }
6860e7e1989SKevin Wolf         options++;
6870e7e1989SKevin Wolf     }
688019d6b8fSAnthony Liguori 
689cc84d90fSMax Reitz     ret = bdrv_create_file(filename, options, &local_err);
6902b16c9ffSLi Zhi Hui     if (ret < 0) {
691*b6d5066dSPaolo Bonzini         error_propagate(errp, local_err);
6922b16c9ffSLi Zhi Hui         return ret;
6932b16c9ffSLi Zhi Hui     }
6942b16c9ffSLi Zhi Hui 
6952e40134bSMax Reitz     qcow_bs = NULL;
6962e40134bSMax Reitz     ret = bdrv_open(&qcow_bs, filename, NULL, NULL,
6972e40134bSMax Reitz                     BDRV_O_RDWR | BDRV_O_PROTOCOL, NULL, &local_err);
6982b16c9ffSLi Zhi Hui     if (ret < 0) {
699*b6d5066dSPaolo Bonzini         error_propagate(errp, local_err);
7002b16c9ffSLi Zhi Hui         return ret;
7012b16c9ffSLi Zhi Hui     }
7022b16c9ffSLi Zhi Hui 
7032b16c9ffSLi Zhi Hui     ret = bdrv_truncate(qcow_bs, 0);
7042b16c9ffSLi Zhi Hui     if (ret < 0) {
7052b16c9ffSLi Zhi Hui         goto exit;
7062b16c9ffSLi Zhi Hui     }
7072b16c9ffSLi Zhi Hui 
708019d6b8fSAnthony Liguori     memset(&header, 0, sizeof(header));
709019d6b8fSAnthony Liguori     header.magic = cpu_to_be32(QCOW_MAGIC);
710019d6b8fSAnthony Liguori     header.version = cpu_to_be32(QCOW_VERSION);
711019d6b8fSAnthony Liguori     header.size = cpu_to_be64(total_size * 512);
712019d6b8fSAnthony Liguori     header_size = sizeof(header);
713019d6b8fSAnthony Liguori     backing_filename_len = 0;
714019d6b8fSAnthony Liguori     if (backing_file) {
715019d6b8fSAnthony Liguori         if (strcmp(backing_file, "fat:")) {
716019d6b8fSAnthony Liguori             header.backing_file_offset = cpu_to_be64(header_size);
717019d6b8fSAnthony Liguori             backing_filename_len = strlen(backing_file);
718019d6b8fSAnthony Liguori             header.backing_file_size = cpu_to_be32(backing_filename_len);
719019d6b8fSAnthony Liguori             header_size += backing_filename_len;
720019d6b8fSAnthony Liguori         } else {
721019d6b8fSAnthony Liguori             /* special backing file for vvfat */
722019d6b8fSAnthony Liguori             backing_file = NULL;
723019d6b8fSAnthony Liguori         }
724019d6b8fSAnthony Liguori         header.cluster_bits = 9; /* 512 byte cluster to avoid copying
725019d6b8fSAnthony Liguori                                     unmodifyed sectors */
726019d6b8fSAnthony Liguori         header.l2_bits = 12; /* 32 KB L2 tables */
727019d6b8fSAnthony Liguori     } else {
728019d6b8fSAnthony Liguori         header.cluster_bits = 12; /* 4 KB clusters */
729019d6b8fSAnthony Liguori         header.l2_bits = 9; /* 4 KB L2 tables */
730019d6b8fSAnthony Liguori     }
731019d6b8fSAnthony Liguori     header_size = (header_size + 7) & ~7;
732019d6b8fSAnthony Liguori     shift = header.cluster_bits + header.l2_bits;
733019d6b8fSAnthony Liguori     l1_size = ((total_size * 512) + (1LL << shift) - 1) >> shift;
734019d6b8fSAnthony Liguori 
735019d6b8fSAnthony Liguori     header.l1_table_offset = cpu_to_be64(header_size);
736019d6b8fSAnthony Liguori     if (flags & BLOCK_FLAG_ENCRYPT) {
737019d6b8fSAnthony Liguori         header.crypt_method = cpu_to_be32(QCOW_CRYPT_AES);
738019d6b8fSAnthony Liguori     } else {
739019d6b8fSAnthony Liguori         header.crypt_method = cpu_to_be32(QCOW_CRYPT_NONE);
740019d6b8fSAnthony Liguori     }
741019d6b8fSAnthony Liguori 
742019d6b8fSAnthony Liguori     /* write all the data */
7432b16c9ffSLi Zhi Hui     ret = bdrv_pwrite(qcow_bs, 0, &header, sizeof(header));
7443e1a8134SKirill A. Shutemov     if (ret != sizeof(header)) {
7453e1a8134SKirill A. Shutemov         goto exit;
7463e1a8134SKirill A. Shutemov     }
7473e1a8134SKirill A. Shutemov 
748019d6b8fSAnthony Liguori     if (backing_file) {
7492b16c9ffSLi Zhi Hui         ret = bdrv_pwrite(qcow_bs, sizeof(header),
7502b16c9ffSLi Zhi Hui             backing_file, backing_filename_len);
7513e1a8134SKirill A. Shutemov         if (ret != backing_filename_len) {
7523e1a8134SKirill A. Shutemov             goto exit;
753019d6b8fSAnthony Liguori         }
7543e1a8134SKirill A. Shutemov     }
7553e1a8134SKirill A. Shutemov 
7562b16c9ffSLi Zhi Hui     tmp = g_malloc0(BDRV_SECTOR_SIZE);
7572b16c9ffSLi Zhi Hui     for (i = 0; i < ((sizeof(uint64_t)*l1_size + BDRV_SECTOR_SIZE - 1)/
7582b16c9ffSLi Zhi Hui         BDRV_SECTOR_SIZE); i++) {
7592b16c9ffSLi Zhi Hui         ret = bdrv_pwrite(qcow_bs, header_size +
7602b16c9ffSLi Zhi Hui             BDRV_SECTOR_SIZE*i, tmp, BDRV_SECTOR_SIZE);
7612b16c9ffSLi Zhi Hui         if (ret != BDRV_SECTOR_SIZE) {
7622b16c9ffSLi Zhi Hui             g_free(tmp);
7632b16c9ffSLi Zhi Hui             goto exit;
7642b16c9ffSLi Zhi Hui         }
7652b16c9ffSLi Zhi Hui     }
7662b16c9ffSLi Zhi Hui 
7672b16c9ffSLi Zhi Hui     g_free(tmp);
7683e1a8134SKirill A. Shutemov     ret = 0;
7693e1a8134SKirill A. Shutemov exit:
7704f6fd349SFam Zheng     bdrv_unref(qcow_bs);
7713e1a8134SKirill A. Shutemov     return ret;
772019d6b8fSAnthony Liguori }
773019d6b8fSAnthony Liguori 
774019d6b8fSAnthony Liguori static int qcow_make_empty(BlockDriverState *bs)
775019d6b8fSAnthony Liguori {
776019d6b8fSAnthony Liguori     BDRVQcowState *s = bs->opaque;
777019d6b8fSAnthony Liguori     uint32_t l1_length = s->l1_size * sizeof(uint64_t);
778019d6b8fSAnthony Liguori     int ret;
779019d6b8fSAnthony Liguori 
780019d6b8fSAnthony Liguori     memset(s->l1_table, 0, l1_length);
7815e5557d9SKevin Wolf     if (bdrv_pwrite_sync(bs->file, s->l1_table_offset, s->l1_table,
7825e5557d9SKevin Wolf             l1_length) < 0)
783019d6b8fSAnthony Liguori         return -1;
78466f82ceeSKevin Wolf     ret = bdrv_truncate(bs->file, s->l1_table_offset + l1_length);
785019d6b8fSAnthony Liguori     if (ret < 0)
786019d6b8fSAnthony Liguori         return ret;
787019d6b8fSAnthony Liguori 
788019d6b8fSAnthony Liguori     memset(s->l2_cache, 0, s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
789019d6b8fSAnthony Liguori     memset(s->l2_cache_offsets, 0, L2_CACHE_SIZE * sizeof(uint64_t));
790019d6b8fSAnthony Liguori     memset(s->l2_cache_counts, 0, L2_CACHE_SIZE * sizeof(uint32_t));
791019d6b8fSAnthony Liguori 
792019d6b8fSAnthony Liguori     return 0;
793019d6b8fSAnthony Liguori }
794019d6b8fSAnthony Liguori 
795019d6b8fSAnthony Liguori /* XXX: put compressed sectors first, then all the cluster aligned
796019d6b8fSAnthony Liguori    tables to avoid losing bytes in alignment */
797019d6b8fSAnthony Liguori static int qcow_write_compressed(BlockDriverState *bs, int64_t sector_num,
798019d6b8fSAnthony Liguori                                  const uint8_t *buf, int nb_sectors)
799019d6b8fSAnthony Liguori {
800019d6b8fSAnthony Liguori     BDRVQcowState *s = bs->opaque;
801019d6b8fSAnthony Liguori     z_stream strm;
802019d6b8fSAnthony Liguori     int ret, out_len;
803019d6b8fSAnthony Liguori     uint8_t *out_buf;
804019d6b8fSAnthony Liguori     uint64_t cluster_offset;
805019d6b8fSAnthony Liguori 
80616b3c5cdSStefan Hajnoczi     if (nb_sectors != s->cluster_sectors) {
80716b3c5cdSStefan Hajnoczi         ret = -EINVAL;
80816b3c5cdSStefan Hajnoczi 
80916b3c5cdSStefan Hajnoczi         /* Zero-pad last write if image size is not cluster aligned */
81016b3c5cdSStefan Hajnoczi         if (sector_num + nb_sectors == bs->total_sectors &&
81116b3c5cdSStefan Hajnoczi             nb_sectors < s->cluster_sectors) {
81216b3c5cdSStefan Hajnoczi             uint8_t *pad_buf = qemu_blockalign(bs, s->cluster_size);
81316b3c5cdSStefan Hajnoczi             memset(pad_buf, 0, s->cluster_size);
81416b3c5cdSStefan Hajnoczi             memcpy(pad_buf, buf, nb_sectors * BDRV_SECTOR_SIZE);
81516b3c5cdSStefan Hajnoczi             ret = qcow_write_compressed(bs, sector_num,
81616b3c5cdSStefan Hajnoczi                                         pad_buf, s->cluster_sectors);
81716b3c5cdSStefan Hajnoczi             qemu_vfree(pad_buf);
81816b3c5cdSStefan Hajnoczi         }
81916b3c5cdSStefan Hajnoczi         return ret;
82016b3c5cdSStefan Hajnoczi     }
821019d6b8fSAnthony Liguori 
8227267c094SAnthony Liguori     out_buf = g_malloc(s->cluster_size + (s->cluster_size / 1000) + 128);
823019d6b8fSAnthony Liguori 
824019d6b8fSAnthony Liguori     /* best compression, small window, no zlib header */
825019d6b8fSAnthony Liguori     memset(&strm, 0, sizeof(strm));
826019d6b8fSAnthony Liguori     ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION,
827019d6b8fSAnthony Liguori                        Z_DEFLATED, -12,
828019d6b8fSAnthony Liguori                        9, Z_DEFAULT_STRATEGY);
829019d6b8fSAnthony Liguori     if (ret != 0) {
83064ebe71aSKevin Wolf         ret = -EINVAL;
83164ebe71aSKevin Wolf         goto fail;
832019d6b8fSAnthony Liguori     }
833019d6b8fSAnthony Liguori 
834019d6b8fSAnthony Liguori     strm.avail_in = s->cluster_size;
835019d6b8fSAnthony Liguori     strm.next_in = (uint8_t *)buf;
836019d6b8fSAnthony Liguori     strm.avail_out = s->cluster_size;
837019d6b8fSAnthony Liguori     strm.next_out = out_buf;
838019d6b8fSAnthony Liguori 
839019d6b8fSAnthony Liguori     ret = deflate(&strm, Z_FINISH);
840019d6b8fSAnthony Liguori     if (ret != Z_STREAM_END && ret != Z_OK) {
841019d6b8fSAnthony Liguori         deflateEnd(&strm);
84264ebe71aSKevin Wolf         ret = -EINVAL;
84364ebe71aSKevin Wolf         goto fail;
844019d6b8fSAnthony Liguori     }
845019d6b8fSAnthony Liguori     out_len = strm.next_out - out_buf;
846019d6b8fSAnthony Liguori 
847019d6b8fSAnthony Liguori     deflateEnd(&strm);
848019d6b8fSAnthony Liguori 
849019d6b8fSAnthony Liguori     if (ret != Z_STREAM_END || out_len >= s->cluster_size) {
850019d6b8fSAnthony Liguori         /* could not compress: write normal cluster */
85164ebe71aSKevin Wolf         ret = bdrv_write(bs, sector_num, buf, s->cluster_sectors);
85264ebe71aSKevin Wolf         if (ret < 0) {
85364ebe71aSKevin Wolf             goto fail;
85464ebe71aSKevin Wolf         }
855019d6b8fSAnthony Liguori     } else {
856019d6b8fSAnthony Liguori         cluster_offset = get_cluster_offset(bs, sector_num << 9, 2,
857019d6b8fSAnthony Liguori                                             out_len, 0, 0);
85864ebe71aSKevin Wolf         if (cluster_offset == 0) {
85964ebe71aSKevin Wolf             ret = -EIO;
86064ebe71aSKevin Wolf             goto fail;
86164ebe71aSKevin Wolf         }
86264ebe71aSKevin Wolf 
863019d6b8fSAnthony Liguori         cluster_offset &= s->cluster_offset_mask;
86464ebe71aSKevin Wolf         ret = bdrv_pwrite(bs->file, cluster_offset, out_buf, out_len);
86564ebe71aSKevin Wolf         if (ret < 0) {
86664ebe71aSKevin Wolf             goto fail;
867019d6b8fSAnthony Liguori         }
868019d6b8fSAnthony Liguori     }
869019d6b8fSAnthony Liguori 
87064ebe71aSKevin Wolf     ret = 0;
87164ebe71aSKevin Wolf fail:
8727267c094SAnthony Liguori     g_free(out_buf);
87364ebe71aSKevin Wolf     return ret;
874019d6b8fSAnthony Liguori }
875019d6b8fSAnthony Liguori 
876019d6b8fSAnthony Liguori static int qcow_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
877019d6b8fSAnthony Liguori {
878019d6b8fSAnthony Liguori     BDRVQcowState *s = bs->opaque;
879019d6b8fSAnthony Liguori     bdi->cluster_size = s->cluster_size;
880019d6b8fSAnthony Liguori     return 0;
881019d6b8fSAnthony Liguori }
882019d6b8fSAnthony Liguori 
8830e7e1989SKevin Wolf 
8840e7e1989SKevin Wolf static QEMUOptionParameter qcow_create_options[] = {
885db08adf5SKevin Wolf     {
886db08adf5SKevin Wolf         .name = BLOCK_OPT_SIZE,
887db08adf5SKevin Wolf         .type = OPT_SIZE,
888db08adf5SKevin Wolf         .help = "Virtual disk size"
889db08adf5SKevin Wolf     },
890db08adf5SKevin Wolf     {
891db08adf5SKevin Wolf         .name = BLOCK_OPT_BACKING_FILE,
892db08adf5SKevin Wolf         .type = OPT_STRING,
893db08adf5SKevin Wolf         .help = "File name of a base image"
894db08adf5SKevin Wolf     },
895db08adf5SKevin Wolf     {
896db08adf5SKevin Wolf         .name = BLOCK_OPT_ENCRYPT,
897db08adf5SKevin Wolf         .type = OPT_FLAG,
898db08adf5SKevin Wolf         .help = "Encrypt the image"
899db08adf5SKevin Wolf     },
9000e7e1989SKevin Wolf     { NULL }
9010e7e1989SKevin Wolf };
9020e7e1989SKevin Wolf 
903019d6b8fSAnthony Liguori static BlockDriver bdrv_qcow = {
904019d6b8fSAnthony Liguori     .format_name	= "qcow",
905019d6b8fSAnthony Liguori     .instance_size	= sizeof(BDRVQcowState),
906019d6b8fSAnthony Liguori     .bdrv_probe		= qcow_probe,
907019d6b8fSAnthony Liguori     .bdrv_open		= qcow_open,
908019d6b8fSAnthony Liguori     .bdrv_close		= qcow_close,
909d177692eSJeff Cody     .bdrv_reopen_prepare = qcow_reopen_prepare,
910019d6b8fSAnthony Liguori     .bdrv_create	= qcow_create,
9113ac21627SPeter Lieven     .bdrv_has_zero_init     = bdrv_has_zero_init_1,
912c68b89acSKevin Wolf 
91352b8eb60SKevin Wolf     .bdrv_co_readv          = qcow_co_readv,
91452b8eb60SKevin Wolf     .bdrv_co_writev         = qcow_co_writev,
915b6b8a333SPaolo Bonzini     .bdrv_co_get_block_status   = qcow_co_get_block_status,
916c68b89acSKevin Wolf 
917c68b89acSKevin Wolf     .bdrv_set_key           = qcow_set_key,
918c68b89acSKevin Wolf     .bdrv_make_empty        = qcow_make_empty,
919019d6b8fSAnthony Liguori     .bdrv_write_compressed  = qcow_write_compressed,
920019d6b8fSAnthony Liguori     .bdrv_get_info          = qcow_get_info,
9210e7e1989SKevin Wolf 
9220e7e1989SKevin Wolf     .create_options = qcow_create_options,
923019d6b8fSAnthony Liguori };
924019d6b8fSAnthony Liguori 
925019d6b8fSAnthony Liguori static void bdrv_qcow_init(void)
926019d6b8fSAnthony Liguori {
927019d6b8fSAnthony Liguori     bdrv_register(&bdrv_qcow);
928019d6b8fSAnthony Liguori }
929019d6b8fSAnthony Liguori 
930019d6b8fSAnthony Liguori block_init(bdrv_qcow_init);
931