1*019d6b8fSAnthony Liguori /* 2*019d6b8fSAnthony Liguori * Block driver for the QCOW format 3*019d6b8fSAnthony Liguori * 4*019d6b8fSAnthony Liguori * Copyright (c) 2004-2006 Fabrice Bellard 5*019d6b8fSAnthony Liguori * 6*019d6b8fSAnthony Liguori * Permission is hereby granted, free of charge, to any person obtaining a copy 7*019d6b8fSAnthony Liguori * of this software and associated documentation files (the "Software"), to deal 8*019d6b8fSAnthony Liguori * in the Software without restriction, including without limitation the rights 9*019d6b8fSAnthony Liguori * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10*019d6b8fSAnthony Liguori * copies of the Software, and to permit persons to whom the Software is 11*019d6b8fSAnthony Liguori * furnished to do so, subject to the following conditions: 12*019d6b8fSAnthony Liguori * 13*019d6b8fSAnthony Liguori * The above copyright notice and this permission notice shall be included in 14*019d6b8fSAnthony Liguori * all copies or substantial portions of the Software. 15*019d6b8fSAnthony Liguori * 16*019d6b8fSAnthony Liguori * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17*019d6b8fSAnthony Liguori * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18*019d6b8fSAnthony Liguori * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19*019d6b8fSAnthony Liguori * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20*019d6b8fSAnthony Liguori * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21*019d6b8fSAnthony Liguori * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22*019d6b8fSAnthony Liguori * THE SOFTWARE. 23*019d6b8fSAnthony Liguori */ 24*019d6b8fSAnthony Liguori #include "qemu-common.h" 25*019d6b8fSAnthony Liguori #include "block_int.h" 26*019d6b8fSAnthony Liguori #include "module.h" 27*019d6b8fSAnthony Liguori #include <zlib.h> 28*019d6b8fSAnthony Liguori #include "aes.h" 29*019d6b8fSAnthony Liguori 30*019d6b8fSAnthony Liguori /**************************************************************/ 31*019d6b8fSAnthony Liguori /* QEMU COW block driver with compression and encryption support */ 32*019d6b8fSAnthony Liguori 33*019d6b8fSAnthony Liguori #define QCOW_MAGIC (('Q' << 24) | ('F' << 16) | ('I' << 8) | 0xfb) 34*019d6b8fSAnthony Liguori #define QCOW_VERSION 1 35*019d6b8fSAnthony Liguori 36*019d6b8fSAnthony Liguori #define QCOW_CRYPT_NONE 0 37*019d6b8fSAnthony Liguori #define QCOW_CRYPT_AES 1 38*019d6b8fSAnthony Liguori 39*019d6b8fSAnthony Liguori #define QCOW_OFLAG_COMPRESSED (1LL << 63) 40*019d6b8fSAnthony Liguori 41*019d6b8fSAnthony Liguori typedef struct QCowHeader { 42*019d6b8fSAnthony Liguori uint32_t magic; 43*019d6b8fSAnthony Liguori uint32_t version; 44*019d6b8fSAnthony Liguori uint64_t backing_file_offset; 45*019d6b8fSAnthony Liguori uint32_t backing_file_size; 46*019d6b8fSAnthony Liguori uint32_t mtime; 47*019d6b8fSAnthony Liguori uint64_t size; /* in bytes */ 48*019d6b8fSAnthony Liguori uint8_t cluster_bits; 49*019d6b8fSAnthony Liguori uint8_t l2_bits; 50*019d6b8fSAnthony Liguori uint32_t crypt_method; 51*019d6b8fSAnthony Liguori uint64_t l1_table_offset; 52*019d6b8fSAnthony Liguori } QCowHeader; 53*019d6b8fSAnthony Liguori 54*019d6b8fSAnthony Liguori #define L2_CACHE_SIZE 16 55*019d6b8fSAnthony Liguori 56*019d6b8fSAnthony Liguori typedef struct BDRVQcowState { 57*019d6b8fSAnthony Liguori BlockDriverState *hd; 58*019d6b8fSAnthony Liguori int cluster_bits; 59*019d6b8fSAnthony Liguori int cluster_size; 60*019d6b8fSAnthony Liguori int cluster_sectors; 61*019d6b8fSAnthony Liguori int l2_bits; 62*019d6b8fSAnthony Liguori int l2_size; 63*019d6b8fSAnthony Liguori int l1_size; 64*019d6b8fSAnthony Liguori uint64_t cluster_offset_mask; 65*019d6b8fSAnthony Liguori uint64_t l1_table_offset; 66*019d6b8fSAnthony Liguori uint64_t *l1_table; 67*019d6b8fSAnthony Liguori uint64_t *l2_cache; 68*019d6b8fSAnthony Liguori uint64_t l2_cache_offsets[L2_CACHE_SIZE]; 69*019d6b8fSAnthony Liguori uint32_t l2_cache_counts[L2_CACHE_SIZE]; 70*019d6b8fSAnthony Liguori uint8_t *cluster_cache; 71*019d6b8fSAnthony Liguori uint8_t *cluster_data; 72*019d6b8fSAnthony Liguori uint64_t cluster_cache_offset; 73*019d6b8fSAnthony Liguori uint32_t crypt_method; /* current crypt method, 0 if no key yet */ 74*019d6b8fSAnthony Liguori uint32_t crypt_method_header; 75*019d6b8fSAnthony Liguori AES_KEY aes_encrypt_key; 76*019d6b8fSAnthony Liguori AES_KEY aes_decrypt_key; 77*019d6b8fSAnthony Liguori } BDRVQcowState; 78*019d6b8fSAnthony Liguori 79*019d6b8fSAnthony Liguori static int decompress_cluster(BDRVQcowState *s, uint64_t cluster_offset); 80*019d6b8fSAnthony Liguori 81*019d6b8fSAnthony Liguori static int qcow_probe(const uint8_t *buf, int buf_size, const char *filename) 82*019d6b8fSAnthony Liguori { 83*019d6b8fSAnthony Liguori const QCowHeader *cow_header = (const void *)buf; 84*019d6b8fSAnthony Liguori 85*019d6b8fSAnthony Liguori if (buf_size >= sizeof(QCowHeader) && 86*019d6b8fSAnthony Liguori be32_to_cpu(cow_header->magic) == QCOW_MAGIC && 87*019d6b8fSAnthony Liguori be32_to_cpu(cow_header->version) == QCOW_VERSION) 88*019d6b8fSAnthony Liguori return 100; 89*019d6b8fSAnthony Liguori else 90*019d6b8fSAnthony Liguori return 0; 91*019d6b8fSAnthony Liguori } 92*019d6b8fSAnthony Liguori 93*019d6b8fSAnthony Liguori static int qcow_open(BlockDriverState *bs, const char *filename, int flags) 94*019d6b8fSAnthony Liguori { 95*019d6b8fSAnthony Liguori BDRVQcowState *s = bs->opaque; 96*019d6b8fSAnthony Liguori int len, i, shift, ret; 97*019d6b8fSAnthony Liguori QCowHeader header; 98*019d6b8fSAnthony Liguori 99*019d6b8fSAnthony Liguori ret = bdrv_file_open(&s->hd, filename, flags); 100*019d6b8fSAnthony Liguori if (ret < 0) 101*019d6b8fSAnthony Liguori return ret; 102*019d6b8fSAnthony Liguori if (bdrv_pread(s->hd, 0, &header, sizeof(header)) != sizeof(header)) 103*019d6b8fSAnthony Liguori goto fail; 104*019d6b8fSAnthony Liguori be32_to_cpus(&header.magic); 105*019d6b8fSAnthony Liguori be32_to_cpus(&header.version); 106*019d6b8fSAnthony Liguori be64_to_cpus(&header.backing_file_offset); 107*019d6b8fSAnthony Liguori be32_to_cpus(&header.backing_file_size); 108*019d6b8fSAnthony Liguori be32_to_cpus(&header.mtime); 109*019d6b8fSAnthony Liguori be64_to_cpus(&header.size); 110*019d6b8fSAnthony Liguori be32_to_cpus(&header.crypt_method); 111*019d6b8fSAnthony Liguori be64_to_cpus(&header.l1_table_offset); 112*019d6b8fSAnthony Liguori 113*019d6b8fSAnthony Liguori if (header.magic != QCOW_MAGIC || header.version != QCOW_VERSION) 114*019d6b8fSAnthony Liguori goto fail; 115*019d6b8fSAnthony Liguori if (header.size <= 1 || header.cluster_bits < 9) 116*019d6b8fSAnthony Liguori goto fail; 117*019d6b8fSAnthony Liguori if (header.crypt_method > QCOW_CRYPT_AES) 118*019d6b8fSAnthony Liguori goto fail; 119*019d6b8fSAnthony Liguori s->crypt_method_header = header.crypt_method; 120*019d6b8fSAnthony Liguori if (s->crypt_method_header) 121*019d6b8fSAnthony Liguori bs->encrypted = 1; 122*019d6b8fSAnthony Liguori s->cluster_bits = header.cluster_bits; 123*019d6b8fSAnthony Liguori s->cluster_size = 1 << s->cluster_bits; 124*019d6b8fSAnthony Liguori s->cluster_sectors = 1 << (s->cluster_bits - 9); 125*019d6b8fSAnthony Liguori s->l2_bits = header.l2_bits; 126*019d6b8fSAnthony Liguori s->l2_size = 1 << s->l2_bits; 127*019d6b8fSAnthony Liguori bs->total_sectors = header.size / 512; 128*019d6b8fSAnthony Liguori s->cluster_offset_mask = (1LL << (63 - s->cluster_bits)) - 1; 129*019d6b8fSAnthony Liguori 130*019d6b8fSAnthony Liguori /* read the level 1 table */ 131*019d6b8fSAnthony Liguori shift = s->cluster_bits + s->l2_bits; 132*019d6b8fSAnthony Liguori s->l1_size = (header.size + (1LL << shift) - 1) >> shift; 133*019d6b8fSAnthony Liguori 134*019d6b8fSAnthony Liguori s->l1_table_offset = header.l1_table_offset; 135*019d6b8fSAnthony Liguori s->l1_table = qemu_malloc(s->l1_size * sizeof(uint64_t)); 136*019d6b8fSAnthony Liguori if (!s->l1_table) 137*019d6b8fSAnthony Liguori goto fail; 138*019d6b8fSAnthony Liguori if (bdrv_pread(s->hd, s->l1_table_offset, s->l1_table, s->l1_size * sizeof(uint64_t)) != 139*019d6b8fSAnthony Liguori s->l1_size * sizeof(uint64_t)) 140*019d6b8fSAnthony Liguori goto fail; 141*019d6b8fSAnthony Liguori for(i = 0;i < s->l1_size; i++) { 142*019d6b8fSAnthony Liguori be64_to_cpus(&s->l1_table[i]); 143*019d6b8fSAnthony Liguori } 144*019d6b8fSAnthony Liguori /* alloc L2 cache */ 145*019d6b8fSAnthony Liguori s->l2_cache = qemu_malloc(s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t)); 146*019d6b8fSAnthony Liguori if (!s->l2_cache) 147*019d6b8fSAnthony Liguori goto fail; 148*019d6b8fSAnthony Liguori s->cluster_cache = qemu_malloc(s->cluster_size); 149*019d6b8fSAnthony Liguori if (!s->cluster_cache) 150*019d6b8fSAnthony Liguori goto fail; 151*019d6b8fSAnthony Liguori s->cluster_data = qemu_malloc(s->cluster_size); 152*019d6b8fSAnthony Liguori if (!s->cluster_data) 153*019d6b8fSAnthony Liguori goto fail; 154*019d6b8fSAnthony Liguori s->cluster_cache_offset = -1; 155*019d6b8fSAnthony Liguori 156*019d6b8fSAnthony Liguori /* read the backing file name */ 157*019d6b8fSAnthony Liguori if (header.backing_file_offset != 0) { 158*019d6b8fSAnthony Liguori len = header.backing_file_size; 159*019d6b8fSAnthony Liguori if (len > 1023) 160*019d6b8fSAnthony Liguori len = 1023; 161*019d6b8fSAnthony Liguori if (bdrv_pread(s->hd, header.backing_file_offset, bs->backing_file, len) != len) 162*019d6b8fSAnthony Liguori goto fail; 163*019d6b8fSAnthony Liguori bs->backing_file[len] = '\0'; 164*019d6b8fSAnthony Liguori } 165*019d6b8fSAnthony Liguori return 0; 166*019d6b8fSAnthony Liguori 167*019d6b8fSAnthony Liguori fail: 168*019d6b8fSAnthony Liguori qemu_free(s->l1_table); 169*019d6b8fSAnthony Liguori qemu_free(s->l2_cache); 170*019d6b8fSAnthony Liguori qemu_free(s->cluster_cache); 171*019d6b8fSAnthony Liguori qemu_free(s->cluster_data); 172*019d6b8fSAnthony Liguori bdrv_delete(s->hd); 173*019d6b8fSAnthony Liguori return -1; 174*019d6b8fSAnthony Liguori } 175*019d6b8fSAnthony Liguori 176*019d6b8fSAnthony Liguori static int qcow_set_key(BlockDriverState *bs, const char *key) 177*019d6b8fSAnthony Liguori { 178*019d6b8fSAnthony Liguori BDRVQcowState *s = bs->opaque; 179*019d6b8fSAnthony Liguori uint8_t keybuf[16]; 180*019d6b8fSAnthony Liguori int len, i; 181*019d6b8fSAnthony Liguori 182*019d6b8fSAnthony Liguori memset(keybuf, 0, 16); 183*019d6b8fSAnthony Liguori len = strlen(key); 184*019d6b8fSAnthony Liguori if (len > 16) 185*019d6b8fSAnthony Liguori len = 16; 186*019d6b8fSAnthony Liguori /* XXX: we could compress the chars to 7 bits to increase 187*019d6b8fSAnthony Liguori entropy */ 188*019d6b8fSAnthony Liguori for(i = 0;i < len;i++) { 189*019d6b8fSAnthony Liguori keybuf[i] = key[i]; 190*019d6b8fSAnthony Liguori } 191*019d6b8fSAnthony Liguori s->crypt_method = s->crypt_method_header; 192*019d6b8fSAnthony Liguori 193*019d6b8fSAnthony Liguori if (AES_set_encrypt_key(keybuf, 128, &s->aes_encrypt_key) != 0) 194*019d6b8fSAnthony Liguori return -1; 195*019d6b8fSAnthony Liguori if (AES_set_decrypt_key(keybuf, 128, &s->aes_decrypt_key) != 0) 196*019d6b8fSAnthony Liguori return -1; 197*019d6b8fSAnthony Liguori #if 0 198*019d6b8fSAnthony Liguori /* test */ 199*019d6b8fSAnthony Liguori { 200*019d6b8fSAnthony Liguori uint8_t in[16]; 201*019d6b8fSAnthony Liguori uint8_t out[16]; 202*019d6b8fSAnthony Liguori uint8_t tmp[16]; 203*019d6b8fSAnthony Liguori for(i=0;i<16;i++) 204*019d6b8fSAnthony Liguori in[i] = i; 205*019d6b8fSAnthony Liguori AES_encrypt(in, tmp, &s->aes_encrypt_key); 206*019d6b8fSAnthony Liguori AES_decrypt(tmp, out, &s->aes_decrypt_key); 207*019d6b8fSAnthony Liguori for(i = 0; i < 16; i++) 208*019d6b8fSAnthony Liguori printf(" %02x", tmp[i]); 209*019d6b8fSAnthony Liguori printf("\n"); 210*019d6b8fSAnthony Liguori for(i = 0; i < 16; i++) 211*019d6b8fSAnthony Liguori printf(" %02x", out[i]); 212*019d6b8fSAnthony Liguori printf("\n"); 213*019d6b8fSAnthony Liguori } 214*019d6b8fSAnthony Liguori #endif 215*019d6b8fSAnthony Liguori return 0; 216*019d6b8fSAnthony Liguori } 217*019d6b8fSAnthony Liguori 218*019d6b8fSAnthony Liguori /* The crypt function is compatible with the linux cryptoloop 219*019d6b8fSAnthony Liguori algorithm for < 4 GB images. NOTE: out_buf == in_buf is 220*019d6b8fSAnthony Liguori supported */ 221*019d6b8fSAnthony Liguori static void encrypt_sectors(BDRVQcowState *s, int64_t sector_num, 222*019d6b8fSAnthony Liguori uint8_t *out_buf, const uint8_t *in_buf, 223*019d6b8fSAnthony Liguori int nb_sectors, int enc, 224*019d6b8fSAnthony Liguori const AES_KEY *key) 225*019d6b8fSAnthony Liguori { 226*019d6b8fSAnthony Liguori union { 227*019d6b8fSAnthony Liguori uint64_t ll[2]; 228*019d6b8fSAnthony Liguori uint8_t b[16]; 229*019d6b8fSAnthony Liguori } ivec; 230*019d6b8fSAnthony Liguori int i; 231*019d6b8fSAnthony Liguori 232*019d6b8fSAnthony Liguori for(i = 0; i < nb_sectors; i++) { 233*019d6b8fSAnthony Liguori ivec.ll[0] = cpu_to_le64(sector_num); 234*019d6b8fSAnthony Liguori ivec.ll[1] = 0; 235*019d6b8fSAnthony Liguori AES_cbc_encrypt(in_buf, out_buf, 512, key, 236*019d6b8fSAnthony Liguori ivec.b, enc); 237*019d6b8fSAnthony Liguori sector_num++; 238*019d6b8fSAnthony Liguori in_buf += 512; 239*019d6b8fSAnthony Liguori out_buf += 512; 240*019d6b8fSAnthony Liguori } 241*019d6b8fSAnthony Liguori } 242*019d6b8fSAnthony Liguori 243*019d6b8fSAnthony Liguori /* 'allocate' is: 244*019d6b8fSAnthony Liguori * 245*019d6b8fSAnthony Liguori * 0 to not allocate. 246*019d6b8fSAnthony Liguori * 247*019d6b8fSAnthony Liguori * 1 to allocate a normal cluster (for sector indexes 'n_start' to 248*019d6b8fSAnthony Liguori * 'n_end') 249*019d6b8fSAnthony Liguori * 250*019d6b8fSAnthony Liguori * 2 to allocate a compressed cluster of size 251*019d6b8fSAnthony Liguori * 'compressed_size'. 'compressed_size' must be > 0 and < 252*019d6b8fSAnthony Liguori * cluster_size 253*019d6b8fSAnthony Liguori * 254*019d6b8fSAnthony Liguori * return 0 if not allocated. 255*019d6b8fSAnthony Liguori */ 256*019d6b8fSAnthony Liguori static uint64_t get_cluster_offset(BlockDriverState *bs, 257*019d6b8fSAnthony Liguori uint64_t offset, int allocate, 258*019d6b8fSAnthony Liguori int compressed_size, 259*019d6b8fSAnthony Liguori int n_start, int n_end) 260*019d6b8fSAnthony Liguori { 261*019d6b8fSAnthony Liguori BDRVQcowState *s = bs->opaque; 262*019d6b8fSAnthony Liguori int min_index, i, j, l1_index, l2_index; 263*019d6b8fSAnthony Liguori uint64_t l2_offset, *l2_table, cluster_offset, tmp; 264*019d6b8fSAnthony Liguori uint32_t min_count; 265*019d6b8fSAnthony Liguori int new_l2_table; 266*019d6b8fSAnthony Liguori 267*019d6b8fSAnthony Liguori l1_index = offset >> (s->l2_bits + s->cluster_bits); 268*019d6b8fSAnthony Liguori l2_offset = s->l1_table[l1_index]; 269*019d6b8fSAnthony Liguori new_l2_table = 0; 270*019d6b8fSAnthony Liguori if (!l2_offset) { 271*019d6b8fSAnthony Liguori if (!allocate) 272*019d6b8fSAnthony Liguori return 0; 273*019d6b8fSAnthony Liguori /* allocate a new l2 entry */ 274*019d6b8fSAnthony Liguori l2_offset = bdrv_getlength(s->hd); 275*019d6b8fSAnthony Liguori /* round to cluster size */ 276*019d6b8fSAnthony Liguori l2_offset = (l2_offset + s->cluster_size - 1) & ~(s->cluster_size - 1); 277*019d6b8fSAnthony Liguori /* update the L1 entry */ 278*019d6b8fSAnthony Liguori s->l1_table[l1_index] = l2_offset; 279*019d6b8fSAnthony Liguori tmp = cpu_to_be64(l2_offset); 280*019d6b8fSAnthony Liguori if (bdrv_pwrite(s->hd, s->l1_table_offset + l1_index * sizeof(tmp), 281*019d6b8fSAnthony Liguori &tmp, sizeof(tmp)) != sizeof(tmp)) 282*019d6b8fSAnthony Liguori return 0; 283*019d6b8fSAnthony Liguori new_l2_table = 1; 284*019d6b8fSAnthony Liguori } 285*019d6b8fSAnthony Liguori for(i = 0; i < L2_CACHE_SIZE; i++) { 286*019d6b8fSAnthony Liguori if (l2_offset == s->l2_cache_offsets[i]) { 287*019d6b8fSAnthony Liguori /* increment the hit count */ 288*019d6b8fSAnthony Liguori if (++s->l2_cache_counts[i] == 0xffffffff) { 289*019d6b8fSAnthony Liguori for(j = 0; j < L2_CACHE_SIZE; j++) { 290*019d6b8fSAnthony Liguori s->l2_cache_counts[j] >>= 1; 291*019d6b8fSAnthony Liguori } 292*019d6b8fSAnthony Liguori } 293*019d6b8fSAnthony Liguori l2_table = s->l2_cache + (i << s->l2_bits); 294*019d6b8fSAnthony Liguori goto found; 295*019d6b8fSAnthony Liguori } 296*019d6b8fSAnthony Liguori } 297*019d6b8fSAnthony Liguori /* not found: load a new entry in the least used one */ 298*019d6b8fSAnthony Liguori min_index = 0; 299*019d6b8fSAnthony Liguori min_count = 0xffffffff; 300*019d6b8fSAnthony Liguori for(i = 0; i < L2_CACHE_SIZE; i++) { 301*019d6b8fSAnthony Liguori if (s->l2_cache_counts[i] < min_count) { 302*019d6b8fSAnthony Liguori min_count = s->l2_cache_counts[i]; 303*019d6b8fSAnthony Liguori min_index = i; 304*019d6b8fSAnthony Liguori } 305*019d6b8fSAnthony Liguori } 306*019d6b8fSAnthony Liguori l2_table = s->l2_cache + (min_index << s->l2_bits); 307*019d6b8fSAnthony Liguori if (new_l2_table) { 308*019d6b8fSAnthony Liguori memset(l2_table, 0, s->l2_size * sizeof(uint64_t)); 309*019d6b8fSAnthony Liguori if (bdrv_pwrite(s->hd, l2_offset, l2_table, s->l2_size * sizeof(uint64_t)) != 310*019d6b8fSAnthony Liguori s->l2_size * sizeof(uint64_t)) 311*019d6b8fSAnthony Liguori return 0; 312*019d6b8fSAnthony Liguori } else { 313*019d6b8fSAnthony Liguori if (bdrv_pread(s->hd, l2_offset, l2_table, s->l2_size * sizeof(uint64_t)) != 314*019d6b8fSAnthony Liguori s->l2_size * sizeof(uint64_t)) 315*019d6b8fSAnthony Liguori return 0; 316*019d6b8fSAnthony Liguori } 317*019d6b8fSAnthony Liguori s->l2_cache_offsets[min_index] = l2_offset; 318*019d6b8fSAnthony Liguori s->l2_cache_counts[min_index] = 1; 319*019d6b8fSAnthony Liguori found: 320*019d6b8fSAnthony Liguori l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1); 321*019d6b8fSAnthony Liguori cluster_offset = be64_to_cpu(l2_table[l2_index]); 322*019d6b8fSAnthony Liguori if (!cluster_offset || 323*019d6b8fSAnthony Liguori ((cluster_offset & QCOW_OFLAG_COMPRESSED) && allocate == 1)) { 324*019d6b8fSAnthony Liguori if (!allocate) 325*019d6b8fSAnthony Liguori return 0; 326*019d6b8fSAnthony Liguori /* allocate a new cluster */ 327*019d6b8fSAnthony Liguori if ((cluster_offset & QCOW_OFLAG_COMPRESSED) && 328*019d6b8fSAnthony Liguori (n_end - n_start) < s->cluster_sectors) { 329*019d6b8fSAnthony Liguori /* if the cluster is already compressed, we must 330*019d6b8fSAnthony Liguori decompress it in the case it is not completely 331*019d6b8fSAnthony Liguori overwritten */ 332*019d6b8fSAnthony Liguori if (decompress_cluster(s, cluster_offset) < 0) 333*019d6b8fSAnthony Liguori return 0; 334*019d6b8fSAnthony Liguori cluster_offset = bdrv_getlength(s->hd); 335*019d6b8fSAnthony Liguori cluster_offset = (cluster_offset + s->cluster_size - 1) & 336*019d6b8fSAnthony Liguori ~(s->cluster_size - 1); 337*019d6b8fSAnthony Liguori /* write the cluster content */ 338*019d6b8fSAnthony Liguori if (bdrv_pwrite(s->hd, cluster_offset, s->cluster_cache, s->cluster_size) != 339*019d6b8fSAnthony Liguori s->cluster_size) 340*019d6b8fSAnthony Liguori return -1; 341*019d6b8fSAnthony Liguori } else { 342*019d6b8fSAnthony Liguori cluster_offset = bdrv_getlength(s->hd); 343*019d6b8fSAnthony Liguori if (allocate == 1) { 344*019d6b8fSAnthony Liguori /* round to cluster size */ 345*019d6b8fSAnthony Liguori cluster_offset = (cluster_offset + s->cluster_size - 1) & 346*019d6b8fSAnthony Liguori ~(s->cluster_size - 1); 347*019d6b8fSAnthony Liguori bdrv_truncate(s->hd, cluster_offset + s->cluster_size); 348*019d6b8fSAnthony Liguori /* if encrypted, we must initialize the cluster 349*019d6b8fSAnthony Liguori content which won't be written */ 350*019d6b8fSAnthony Liguori if (s->crypt_method && 351*019d6b8fSAnthony Liguori (n_end - n_start) < s->cluster_sectors) { 352*019d6b8fSAnthony Liguori uint64_t start_sect; 353*019d6b8fSAnthony Liguori start_sect = (offset & ~(s->cluster_size - 1)) >> 9; 354*019d6b8fSAnthony Liguori memset(s->cluster_data + 512, 0x00, 512); 355*019d6b8fSAnthony Liguori for(i = 0; i < s->cluster_sectors; i++) { 356*019d6b8fSAnthony Liguori if (i < n_start || i >= n_end) { 357*019d6b8fSAnthony Liguori encrypt_sectors(s, start_sect + i, 358*019d6b8fSAnthony Liguori s->cluster_data, 359*019d6b8fSAnthony Liguori s->cluster_data + 512, 1, 1, 360*019d6b8fSAnthony Liguori &s->aes_encrypt_key); 361*019d6b8fSAnthony Liguori if (bdrv_pwrite(s->hd, cluster_offset + i * 512, 362*019d6b8fSAnthony Liguori s->cluster_data, 512) != 512) 363*019d6b8fSAnthony Liguori return -1; 364*019d6b8fSAnthony Liguori } 365*019d6b8fSAnthony Liguori } 366*019d6b8fSAnthony Liguori } 367*019d6b8fSAnthony Liguori } else if (allocate == 2) { 368*019d6b8fSAnthony Liguori cluster_offset |= QCOW_OFLAG_COMPRESSED | 369*019d6b8fSAnthony Liguori (uint64_t)compressed_size << (63 - s->cluster_bits); 370*019d6b8fSAnthony Liguori } 371*019d6b8fSAnthony Liguori } 372*019d6b8fSAnthony Liguori /* update L2 table */ 373*019d6b8fSAnthony Liguori tmp = cpu_to_be64(cluster_offset); 374*019d6b8fSAnthony Liguori l2_table[l2_index] = tmp; 375*019d6b8fSAnthony Liguori if (bdrv_pwrite(s->hd, 376*019d6b8fSAnthony Liguori l2_offset + l2_index * sizeof(tmp), &tmp, sizeof(tmp)) != sizeof(tmp)) 377*019d6b8fSAnthony Liguori return 0; 378*019d6b8fSAnthony Liguori } 379*019d6b8fSAnthony Liguori return cluster_offset; 380*019d6b8fSAnthony Liguori } 381*019d6b8fSAnthony Liguori 382*019d6b8fSAnthony Liguori static int qcow_is_allocated(BlockDriverState *bs, int64_t sector_num, 383*019d6b8fSAnthony Liguori int nb_sectors, int *pnum) 384*019d6b8fSAnthony Liguori { 385*019d6b8fSAnthony Liguori BDRVQcowState *s = bs->opaque; 386*019d6b8fSAnthony Liguori int index_in_cluster, n; 387*019d6b8fSAnthony Liguori uint64_t cluster_offset; 388*019d6b8fSAnthony Liguori 389*019d6b8fSAnthony Liguori cluster_offset = get_cluster_offset(bs, sector_num << 9, 0, 0, 0, 0); 390*019d6b8fSAnthony Liguori index_in_cluster = sector_num & (s->cluster_sectors - 1); 391*019d6b8fSAnthony Liguori n = s->cluster_sectors - index_in_cluster; 392*019d6b8fSAnthony Liguori if (n > nb_sectors) 393*019d6b8fSAnthony Liguori n = nb_sectors; 394*019d6b8fSAnthony Liguori *pnum = n; 395*019d6b8fSAnthony Liguori return (cluster_offset != 0); 396*019d6b8fSAnthony Liguori } 397*019d6b8fSAnthony Liguori 398*019d6b8fSAnthony Liguori static int decompress_buffer(uint8_t *out_buf, int out_buf_size, 399*019d6b8fSAnthony Liguori const uint8_t *buf, int buf_size) 400*019d6b8fSAnthony Liguori { 401*019d6b8fSAnthony Liguori z_stream strm1, *strm = &strm1; 402*019d6b8fSAnthony Liguori int ret, out_len; 403*019d6b8fSAnthony Liguori 404*019d6b8fSAnthony Liguori memset(strm, 0, sizeof(*strm)); 405*019d6b8fSAnthony Liguori 406*019d6b8fSAnthony Liguori strm->next_in = (uint8_t *)buf; 407*019d6b8fSAnthony Liguori strm->avail_in = buf_size; 408*019d6b8fSAnthony Liguori strm->next_out = out_buf; 409*019d6b8fSAnthony Liguori strm->avail_out = out_buf_size; 410*019d6b8fSAnthony Liguori 411*019d6b8fSAnthony Liguori ret = inflateInit2(strm, -12); 412*019d6b8fSAnthony Liguori if (ret != Z_OK) 413*019d6b8fSAnthony Liguori return -1; 414*019d6b8fSAnthony Liguori ret = inflate(strm, Z_FINISH); 415*019d6b8fSAnthony Liguori out_len = strm->next_out - out_buf; 416*019d6b8fSAnthony Liguori if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) || 417*019d6b8fSAnthony Liguori out_len != out_buf_size) { 418*019d6b8fSAnthony Liguori inflateEnd(strm); 419*019d6b8fSAnthony Liguori return -1; 420*019d6b8fSAnthony Liguori } 421*019d6b8fSAnthony Liguori inflateEnd(strm); 422*019d6b8fSAnthony Liguori return 0; 423*019d6b8fSAnthony Liguori } 424*019d6b8fSAnthony Liguori 425*019d6b8fSAnthony Liguori static int decompress_cluster(BDRVQcowState *s, uint64_t cluster_offset) 426*019d6b8fSAnthony Liguori { 427*019d6b8fSAnthony Liguori int ret, csize; 428*019d6b8fSAnthony Liguori uint64_t coffset; 429*019d6b8fSAnthony Liguori 430*019d6b8fSAnthony Liguori coffset = cluster_offset & s->cluster_offset_mask; 431*019d6b8fSAnthony Liguori if (s->cluster_cache_offset != coffset) { 432*019d6b8fSAnthony Liguori csize = cluster_offset >> (63 - s->cluster_bits); 433*019d6b8fSAnthony Liguori csize &= (s->cluster_size - 1); 434*019d6b8fSAnthony Liguori ret = bdrv_pread(s->hd, coffset, s->cluster_data, csize); 435*019d6b8fSAnthony Liguori if (ret != csize) 436*019d6b8fSAnthony Liguori return -1; 437*019d6b8fSAnthony Liguori if (decompress_buffer(s->cluster_cache, s->cluster_size, 438*019d6b8fSAnthony Liguori s->cluster_data, csize) < 0) { 439*019d6b8fSAnthony Liguori return -1; 440*019d6b8fSAnthony Liguori } 441*019d6b8fSAnthony Liguori s->cluster_cache_offset = coffset; 442*019d6b8fSAnthony Liguori } 443*019d6b8fSAnthony Liguori return 0; 444*019d6b8fSAnthony Liguori } 445*019d6b8fSAnthony Liguori 446*019d6b8fSAnthony Liguori #if 0 447*019d6b8fSAnthony Liguori 448*019d6b8fSAnthony Liguori static int qcow_read(BlockDriverState *bs, int64_t sector_num, 449*019d6b8fSAnthony Liguori uint8_t *buf, int nb_sectors) 450*019d6b8fSAnthony Liguori { 451*019d6b8fSAnthony Liguori BDRVQcowState *s = bs->opaque; 452*019d6b8fSAnthony Liguori int ret, index_in_cluster, n; 453*019d6b8fSAnthony Liguori uint64_t cluster_offset; 454*019d6b8fSAnthony Liguori 455*019d6b8fSAnthony Liguori while (nb_sectors > 0) { 456*019d6b8fSAnthony Liguori cluster_offset = get_cluster_offset(bs, sector_num << 9, 0, 0, 0, 0); 457*019d6b8fSAnthony Liguori index_in_cluster = sector_num & (s->cluster_sectors - 1); 458*019d6b8fSAnthony Liguori n = s->cluster_sectors - index_in_cluster; 459*019d6b8fSAnthony Liguori if (n > nb_sectors) 460*019d6b8fSAnthony Liguori n = nb_sectors; 461*019d6b8fSAnthony Liguori if (!cluster_offset) { 462*019d6b8fSAnthony Liguori if (bs->backing_hd) { 463*019d6b8fSAnthony Liguori /* read from the base image */ 464*019d6b8fSAnthony Liguori ret = bdrv_read(bs->backing_hd, sector_num, buf, n); 465*019d6b8fSAnthony Liguori if (ret < 0) 466*019d6b8fSAnthony Liguori return -1; 467*019d6b8fSAnthony Liguori } else { 468*019d6b8fSAnthony Liguori memset(buf, 0, 512 * n); 469*019d6b8fSAnthony Liguori } 470*019d6b8fSAnthony Liguori } else if (cluster_offset & QCOW_OFLAG_COMPRESSED) { 471*019d6b8fSAnthony Liguori if (decompress_cluster(s, cluster_offset) < 0) 472*019d6b8fSAnthony Liguori return -1; 473*019d6b8fSAnthony Liguori memcpy(buf, s->cluster_cache + index_in_cluster * 512, 512 * n); 474*019d6b8fSAnthony Liguori } else { 475*019d6b8fSAnthony Liguori ret = bdrv_pread(s->hd, cluster_offset + index_in_cluster * 512, buf, n * 512); 476*019d6b8fSAnthony Liguori if (ret != n * 512) 477*019d6b8fSAnthony Liguori return -1; 478*019d6b8fSAnthony Liguori if (s->crypt_method) { 479*019d6b8fSAnthony Liguori encrypt_sectors(s, sector_num, buf, buf, n, 0, 480*019d6b8fSAnthony Liguori &s->aes_decrypt_key); 481*019d6b8fSAnthony Liguori } 482*019d6b8fSAnthony Liguori } 483*019d6b8fSAnthony Liguori nb_sectors -= n; 484*019d6b8fSAnthony Liguori sector_num += n; 485*019d6b8fSAnthony Liguori buf += n * 512; 486*019d6b8fSAnthony Liguori } 487*019d6b8fSAnthony Liguori return 0; 488*019d6b8fSAnthony Liguori } 489*019d6b8fSAnthony Liguori #endif 490*019d6b8fSAnthony Liguori 491*019d6b8fSAnthony Liguori static int qcow_write(BlockDriverState *bs, int64_t sector_num, 492*019d6b8fSAnthony Liguori const uint8_t *buf, int nb_sectors) 493*019d6b8fSAnthony Liguori { 494*019d6b8fSAnthony Liguori BDRVQcowState *s = bs->opaque; 495*019d6b8fSAnthony Liguori int ret, index_in_cluster, n; 496*019d6b8fSAnthony Liguori uint64_t cluster_offset; 497*019d6b8fSAnthony Liguori 498*019d6b8fSAnthony Liguori while (nb_sectors > 0) { 499*019d6b8fSAnthony Liguori index_in_cluster = sector_num & (s->cluster_sectors - 1); 500*019d6b8fSAnthony Liguori n = s->cluster_sectors - index_in_cluster; 501*019d6b8fSAnthony Liguori if (n > nb_sectors) 502*019d6b8fSAnthony Liguori n = nb_sectors; 503*019d6b8fSAnthony Liguori cluster_offset = get_cluster_offset(bs, sector_num << 9, 1, 0, 504*019d6b8fSAnthony Liguori index_in_cluster, 505*019d6b8fSAnthony Liguori index_in_cluster + n); 506*019d6b8fSAnthony Liguori if (!cluster_offset) 507*019d6b8fSAnthony Liguori return -1; 508*019d6b8fSAnthony Liguori if (s->crypt_method) { 509*019d6b8fSAnthony Liguori encrypt_sectors(s, sector_num, s->cluster_data, buf, n, 1, 510*019d6b8fSAnthony Liguori &s->aes_encrypt_key); 511*019d6b8fSAnthony Liguori ret = bdrv_pwrite(s->hd, cluster_offset + index_in_cluster * 512, 512*019d6b8fSAnthony Liguori s->cluster_data, n * 512); 513*019d6b8fSAnthony Liguori } else { 514*019d6b8fSAnthony Liguori ret = bdrv_pwrite(s->hd, cluster_offset + index_in_cluster * 512, buf, n * 512); 515*019d6b8fSAnthony Liguori } 516*019d6b8fSAnthony Liguori if (ret != n * 512) 517*019d6b8fSAnthony Liguori return -1; 518*019d6b8fSAnthony Liguori nb_sectors -= n; 519*019d6b8fSAnthony Liguori sector_num += n; 520*019d6b8fSAnthony Liguori buf += n * 512; 521*019d6b8fSAnthony Liguori } 522*019d6b8fSAnthony Liguori s->cluster_cache_offset = -1; /* disable compressed cache */ 523*019d6b8fSAnthony Liguori return 0; 524*019d6b8fSAnthony Liguori } 525*019d6b8fSAnthony Liguori 526*019d6b8fSAnthony Liguori typedef struct QCowAIOCB { 527*019d6b8fSAnthony Liguori BlockDriverAIOCB common; 528*019d6b8fSAnthony Liguori int64_t sector_num; 529*019d6b8fSAnthony Liguori QEMUIOVector *qiov; 530*019d6b8fSAnthony Liguori uint8_t *buf; 531*019d6b8fSAnthony Liguori void *orig_buf; 532*019d6b8fSAnthony Liguori int nb_sectors; 533*019d6b8fSAnthony Liguori int n; 534*019d6b8fSAnthony Liguori uint64_t cluster_offset; 535*019d6b8fSAnthony Liguori uint8_t *cluster_data; 536*019d6b8fSAnthony Liguori struct iovec hd_iov; 537*019d6b8fSAnthony Liguori QEMUIOVector hd_qiov; 538*019d6b8fSAnthony Liguori BlockDriverAIOCB *hd_aiocb; 539*019d6b8fSAnthony Liguori } QCowAIOCB; 540*019d6b8fSAnthony Liguori 541*019d6b8fSAnthony Liguori static void qcow_aio_read_cb(void *opaque, int ret) 542*019d6b8fSAnthony Liguori { 543*019d6b8fSAnthony Liguori QCowAIOCB *acb = opaque; 544*019d6b8fSAnthony Liguori BlockDriverState *bs = acb->common.bs; 545*019d6b8fSAnthony Liguori BDRVQcowState *s = bs->opaque; 546*019d6b8fSAnthony Liguori int index_in_cluster; 547*019d6b8fSAnthony Liguori 548*019d6b8fSAnthony Liguori acb->hd_aiocb = NULL; 549*019d6b8fSAnthony Liguori if (ret < 0) 550*019d6b8fSAnthony Liguori goto done; 551*019d6b8fSAnthony Liguori 552*019d6b8fSAnthony Liguori redo: 553*019d6b8fSAnthony Liguori /* post process the read buffer */ 554*019d6b8fSAnthony Liguori if (!acb->cluster_offset) { 555*019d6b8fSAnthony Liguori /* nothing to do */ 556*019d6b8fSAnthony Liguori } else if (acb->cluster_offset & QCOW_OFLAG_COMPRESSED) { 557*019d6b8fSAnthony Liguori /* nothing to do */ 558*019d6b8fSAnthony Liguori } else { 559*019d6b8fSAnthony Liguori if (s->crypt_method) { 560*019d6b8fSAnthony Liguori encrypt_sectors(s, acb->sector_num, acb->buf, acb->buf, 561*019d6b8fSAnthony Liguori acb->n, 0, 562*019d6b8fSAnthony Liguori &s->aes_decrypt_key); 563*019d6b8fSAnthony Liguori } 564*019d6b8fSAnthony Liguori } 565*019d6b8fSAnthony Liguori 566*019d6b8fSAnthony Liguori acb->nb_sectors -= acb->n; 567*019d6b8fSAnthony Liguori acb->sector_num += acb->n; 568*019d6b8fSAnthony Liguori acb->buf += acb->n * 512; 569*019d6b8fSAnthony Liguori 570*019d6b8fSAnthony Liguori if (acb->nb_sectors == 0) { 571*019d6b8fSAnthony Liguori /* request completed */ 572*019d6b8fSAnthony Liguori ret = 0; 573*019d6b8fSAnthony Liguori goto done; 574*019d6b8fSAnthony Liguori } 575*019d6b8fSAnthony Liguori 576*019d6b8fSAnthony Liguori /* prepare next AIO request */ 577*019d6b8fSAnthony Liguori acb->cluster_offset = get_cluster_offset(bs, acb->sector_num << 9, 578*019d6b8fSAnthony Liguori 0, 0, 0, 0); 579*019d6b8fSAnthony Liguori index_in_cluster = acb->sector_num & (s->cluster_sectors - 1); 580*019d6b8fSAnthony Liguori acb->n = s->cluster_sectors - index_in_cluster; 581*019d6b8fSAnthony Liguori if (acb->n > acb->nb_sectors) 582*019d6b8fSAnthony Liguori acb->n = acb->nb_sectors; 583*019d6b8fSAnthony Liguori 584*019d6b8fSAnthony Liguori if (!acb->cluster_offset) { 585*019d6b8fSAnthony Liguori if (bs->backing_hd) { 586*019d6b8fSAnthony Liguori /* read from the base image */ 587*019d6b8fSAnthony Liguori acb->hd_iov.iov_base = (void *)acb->buf; 588*019d6b8fSAnthony Liguori acb->hd_iov.iov_len = acb->n * 512; 589*019d6b8fSAnthony Liguori qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1); 590*019d6b8fSAnthony Liguori acb->hd_aiocb = bdrv_aio_readv(bs->backing_hd, acb->sector_num, 591*019d6b8fSAnthony Liguori &acb->hd_qiov, acb->n, qcow_aio_read_cb, acb); 592*019d6b8fSAnthony Liguori if (acb->hd_aiocb == NULL) 593*019d6b8fSAnthony Liguori goto done; 594*019d6b8fSAnthony Liguori } else { 595*019d6b8fSAnthony Liguori /* Note: in this case, no need to wait */ 596*019d6b8fSAnthony Liguori memset(acb->buf, 0, 512 * acb->n); 597*019d6b8fSAnthony Liguori goto redo; 598*019d6b8fSAnthony Liguori } 599*019d6b8fSAnthony Liguori } else if (acb->cluster_offset & QCOW_OFLAG_COMPRESSED) { 600*019d6b8fSAnthony Liguori /* add AIO support for compressed blocks ? */ 601*019d6b8fSAnthony Liguori if (decompress_cluster(s, acb->cluster_offset) < 0) 602*019d6b8fSAnthony Liguori goto done; 603*019d6b8fSAnthony Liguori memcpy(acb->buf, 604*019d6b8fSAnthony Liguori s->cluster_cache + index_in_cluster * 512, 512 * acb->n); 605*019d6b8fSAnthony Liguori goto redo; 606*019d6b8fSAnthony Liguori } else { 607*019d6b8fSAnthony Liguori if ((acb->cluster_offset & 511) != 0) { 608*019d6b8fSAnthony Liguori ret = -EIO; 609*019d6b8fSAnthony Liguori goto done; 610*019d6b8fSAnthony Liguori } 611*019d6b8fSAnthony Liguori acb->hd_iov.iov_base = (void *)acb->buf; 612*019d6b8fSAnthony Liguori acb->hd_iov.iov_len = acb->n * 512; 613*019d6b8fSAnthony Liguori qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1); 614*019d6b8fSAnthony Liguori acb->hd_aiocb = bdrv_aio_readv(s->hd, 615*019d6b8fSAnthony Liguori (acb->cluster_offset >> 9) + index_in_cluster, 616*019d6b8fSAnthony Liguori &acb->hd_qiov, acb->n, qcow_aio_read_cb, acb); 617*019d6b8fSAnthony Liguori if (acb->hd_aiocb == NULL) 618*019d6b8fSAnthony Liguori goto done; 619*019d6b8fSAnthony Liguori } 620*019d6b8fSAnthony Liguori 621*019d6b8fSAnthony Liguori return; 622*019d6b8fSAnthony Liguori 623*019d6b8fSAnthony Liguori done: 624*019d6b8fSAnthony Liguori if (acb->qiov->niov > 1) { 625*019d6b8fSAnthony Liguori qemu_iovec_from_buffer(acb->qiov, acb->orig_buf, acb->qiov->size); 626*019d6b8fSAnthony Liguori qemu_vfree(acb->orig_buf); 627*019d6b8fSAnthony Liguori } 628*019d6b8fSAnthony Liguori acb->common.cb(acb->common.opaque, ret); 629*019d6b8fSAnthony Liguori qemu_aio_release(acb); 630*019d6b8fSAnthony Liguori } 631*019d6b8fSAnthony Liguori 632*019d6b8fSAnthony Liguori static BlockDriverAIOCB *qcow_aio_readv(BlockDriverState *bs, 633*019d6b8fSAnthony Liguori int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, 634*019d6b8fSAnthony Liguori BlockDriverCompletionFunc *cb, void *opaque) 635*019d6b8fSAnthony Liguori { 636*019d6b8fSAnthony Liguori QCowAIOCB *acb; 637*019d6b8fSAnthony Liguori 638*019d6b8fSAnthony Liguori acb = qemu_aio_get(bs, cb, opaque); 639*019d6b8fSAnthony Liguori if (!acb) 640*019d6b8fSAnthony Liguori return NULL; 641*019d6b8fSAnthony Liguori acb->hd_aiocb = NULL; 642*019d6b8fSAnthony Liguori acb->sector_num = sector_num; 643*019d6b8fSAnthony Liguori acb->qiov = qiov; 644*019d6b8fSAnthony Liguori if (qiov->niov > 1) 645*019d6b8fSAnthony Liguori acb->buf = acb->orig_buf = qemu_blockalign(bs, qiov->size); 646*019d6b8fSAnthony Liguori else 647*019d6b8fSAnthony Liguori acb->buf = (uint8_t *)qiov->iov->iov_base; 648*019d6b8fSAnthony Liguori acb->nb_sectors = nb_sectors; 649*019d6b8fSAnthony Liguori acb->n = 0; 650*019d6b8fSAnthony Liguori acb->cluster_offset = 0; 651*019d6b8fSAnthony Liguori 652*019d6b8fSAnthony Liguori qcow_aio_read_cb(acb, 0); 653*019d6b8fSAnthony Liguori return &acb->common; 654*019d6b8fSAnthony Liguori } 655*019d6b8fSAnthony Liguori 656*019d6b8fSAnthony Liguori static void qcow_aio_write_cb(void *opaque, int ret) 657*019d6b8fSAnthony Liguori { 658*019d6b8fSAnthony Liguori QCowAIOCB *acb = opaque; 659*019d6b8fSAnthony Liguori BlockDriverState *bs = acb->common.bs; 660*019d6b8fSAnthony Liguori BDRVQcowState *s = bs->opaque; 661*019d6b8fSAnthony Liguori int index_in_cluster; 662*019d6b8fSAnthony Liguori uint64_t cluster_offset; 663*019d6b8fSAnthony Liguori const uint8_t *src_buf; 664*019d6b8fSAnthony Liguori 665*019d6b8fSAnthony Liguori acb->hd_aiocb = NULL; 666*019d6b8fSAnthony Liguori 667*019d6b8fSAnthony Liguori if (ret < 0) 668*019d6b8fSAnthony Liguori goto done; 669*019d6b8fSAnthony Liguori 670*019d6b8fSAnthony Liguori acb->nb_sectors -= acb->n; 671*019d6b8fSAnthony Liguori acb->sector_num += acb->n; 672*019d6b8fSAnthony Liguori acb->buf += acb->n * 512; 673*019d6b8fSAnthony Liguori 674*019d6b8fSAnthony Liguori if (acb->nb_sectors == 0) { 675*019d6b8fSAnthony Liguori /* request completed */ 676*019d6b8fSAnthony Liguori ret = 0; 677*019d6b8fSAnthony Liguori goto done; 678*019d6b8fSAnthony Liguori } 679*019d6b8fSAnthony Liguori 680*019d6b8fSAnthony Liguori index_in_cluster = acb->sector_num & (s->cluster_sectors - 1); 681*019d6b8fSAnthony Liguori acb->n = s->cluster_sectors - index_in_cluster; 682*019d6b8fSAnthony Liguori if (acb->n > acb->nb_sectors) 683*019d6b8fSAnthony Liguori acb->n = acb->nb_sectors; 684*019d6b8fSAnthony Liguori cluster_offset = get_cluster_offset(bs, acb->sector_num << 9, 1, 0, 685*019d6b8fSAnthony Liguori index_in_cluster, 686*019d6b8fSAnthony Liguori index_in_cluster + acb->n); 687*019d6b8fSAnthony Liguori if (!cluster_offset || (cluster_offset & 511) != 0) { 688*019d6b8fSAnthony Liguori ret = -EIO; 689*019d6b8fSAnthony Liguori goto done; 690*019d6b8fSAnthony Liguori } 691*019d6b8fSAnthony Liguori if (s->crypt_method) { 692*019d6b8fSAnthony Liguori if (!acb->cluster_data) { 693*019d6b8fSAnthony Liguori acb->cluster_data = qemu_mallocz(s->cluster_size); 694*019d6b8fSAnthony Liguori if (!acb->cluster_data) { 695*019d6b8fSAnthony Liguori ret = -ENOMEM; 696*019d6b8fSAnthony Liguori goto done; 697*019d6b8fSAnthony Liguori } 698*019d6b8fSAnthony Liguori } 699*019d6b8fSAnthony Liguori encrypt_sectors(s, acb->sector_num, acb->cluster_data, acb->buf, 700*019d6b8fSAnthony Liguori acb->n, 1, &s->aes_encrypt_key); 701*019d6b8fSAnthony Liguori src_buf = acb->cluster_data; 702*019d6b8fSAnthony Liguori } else { 703*019d6b8fSAnthony Liguori src_buf = acb->buf; 704*019d6b8fSAnthony Liguori } 705*019d6b8fSAnthony Liguori 706*019d6b8fSAnthony Liguori acb->hd_iov.iov_base = (void *)src_buf; 707*019d6b8fSAnthony Liguori acb->hd_iov.iov_len = acb->n * 512; 708*019d6b8fSAnthony Liguori qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1); 709*019d6b8fSAnthony Liguori acb->hd_aiocb = bdrv_aio_writev(s->hd, 710*019d6b8fSAnthony Liguori (cluster_offset >> 9) + index_in_cluster, 711*019d6b8fSAnthony Liguori &acb->hd_qiov, acb->n, 712*019d6b8fSAnthony Liguori qcow_aio_write_cb, acb); 713*019d6b8fSAnthony Liguori if (acb->hd_aiocb == NULL) 714*019d6b8fSAnthony Liguori goto done; 715*019d6b8fSAnthony Liguori return; 716*019d6b8fSAnthony Liguori 717*019d6b8fSAnthony Liguori done: 718*019d6b8fSAnthony Liguori if (acb->qiov->niov > 1) 719*019d6b8fSAnthony Liguori qemu_vfree(acb->orig_buf); 720*019d6b8fSAnthony Liguori acb->common.cb(acb->common.opaque, ret); 721*019d6b8fSAnthony Liguori qemu_aio_release(acb); 722*019d6b8fSAnthony Liguori } 723*019d6b8fSAnthony Liguori 724*019d6b8fSAnthony Liguori static BlockDriverAIOCB *qcow_aio_writev(BlockDriverState *bs, 725*019d6b8fSAnthony Liguori int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, 726*019d6b8fSAnthony Liguori BlockDriverCompletionFunc *cb, void *opaque) 727*019d6b8fSAnthony Liguori { 728*019d6b8fSAnthony Liguori BDRVQcowState *s = bs->opaque; 729*019d6b8fSAnthony Liguori QCowAIOCB *acb; 730*019d6b8fSAnthony Liguori 731*019d6b8fSAnthony Liguori s->cluster_cache_offset = -1; /* disable compressed cache */ 732*019d6b8fSAnthony Liguori 733*019d6b8fSAnthony Liguori acb = qemu_aio_get(bs, cb, opaque); 734*019d6b8fSAnthony Liguori if (!acb) 735*019d6b8fSAnthony Liguori return NULL; 736*019d6b8fSAnthony Liguori acb->hd_aiocb = NULL; 737*019d6b8fSAnthony Liguori acb->sector_num = sector_num; 738*019d6b8fSAnthony Liguori acb->qiov = qiov; 739*019d6b8fSAnthony Liguori if (qiov->niov > 1) { 740*019d6b8fSAnthony Liguori acb->buf = acb->orig_buf = qemu_blockalign(bs, qiov->size); 741*019d6b8fSAnthony Liguori qemu_iovec_to_buffer(qiov, acb->buf); 742*019d6b8fSAnthony Liguori } else { 743*019d6b8fSAnthony Liguori acb->buf = (uint8_t *)qiov->iov->iov_base; 744*019d6b8fSAnthony Liguori } 745*019d6b8fSAnthony Liguori acb->nb_sectors = nb_sectors; 746*019d6b8fSAnthony Liguori acb->n = 0; 747*019d6b8fSAnthony Liguori 748*019d6b8fSAnthony Liguori qcow_aio_write_cb(acb, 0); 749*019d6b8fSAnthony Liguori return &acb->common; 750*019d6b8fSAnthony Liguori } 751*019d6b8fSAnthony Liguori 752*019d6b8fSAnthony Liguori static void qcow_aio_cancel(BlockDriverAIOCB *blockacb) 753*019d6b8fSAnthony Liguori { 754*019d6b8fSAnthony Liguori QCowAIOCB *acb = (QCowAIOCB *)blockacb; 755*019d6b8fSAnthony Liguori if (acb->hd_aiocb) 756*019d6b8fSAnthony Liguori bdrv_aio_cancel(acb->hd_aiocb); 757*019d6b8fSAnthony Liguori qemu_aio_release(acb); 758*019d6b8fSAnthony Liguori } 759*019d6b8fSAnthony Liguori 760*019d6b8fSAnthony Liguori static void qcow_close(BlockDriverState *bs) 761*019d6b8fSAnthony Liguori { 762*019d6b8fSAnthony Liguori BDRVQcowState *s = bs->opaque; 763*019d6b8fSAnthony Liguori qemu_free(s->l1_table); 764*019d6b8fSAnthony Liguori qemu_free(s->l2_cache); 765*019d6b8fSAnthony Liguori qemu_free(s->cluster_cache); 766*019d6b8fSAnthony Liguori qemu_free(s->cluster_data); 767*019d6b8fSAnthony Liguori bdrv_delete(s->hd); 768*019d6b8fSAnthony Liguori } 769*019d6b8fSAnthony Liguori 770*019d6b8fSAnthony Liguori static int qcow_create(const char *filename, int64_t total_size, 771*019d6b8fSAnthony Liguori const char *backing_file, int flags) 772*019d6b8fSAnthony Liguori { 773*019d6b8fSAnthony Liguori int fd, header_size, backing_filename_len, l1_size, i, shift; 774*019d6b8fSAnthony Liguori QCowHeader header; 775*019d6b8fSAnthony Liguori uint64_t tmp; 776*019d6b8fSAnthony Liguori 777*019d6b8fSAnthony Liguori fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0644); 778*019d6b8fSAnthony Liguori if (fd < 0) 779*019d6b8fSAnthony Liguori return -1; 780*019d6b8fSAnthony Liguori memset(&header, 0, sizeof(header)); 781*019d6b8fSAnthony Liguori header.magic = cpu_to_be32(QCOW_MAGIC); 782*019d6b8fSAnthony Liguori header.version = cpu_to_be32(QCOW_VERSION); 783*019d6b8fSAnthony Liguori header.size = cpu_to_be64(total_size * 512); 784*019d6b8fSAnthony Liguori header_size = sizeof(header); 785*019d6b8fSAnthony Liguori backing_filename_len = 0; 786*019d6b8fSAnthony Liguori if (backing_file) { 787*019d6b8fSAnthony Liguori if (strcmp(backing_file, "fat:")) { 788*019d6b8fSAnthony Liguori header.backing_file_offset = cpu_to_be64(header_size); 789*019d6b8fSAnthony Liguori backing_filename_len = strlen(backing_file); 790*019d6b8fSAnthony Liguori header.backing_file_size = cpu_to_be32(backing_filename_len); 791*019d6b8fSAnthony Liguori header_size += backing_filename_len; 792*019d6b8fSAnthony Liguori } else { 793*019d6b8fSAnthony Liguori /* special backing file for vvfat */ 794*019d6b8fSAnthony Liguori backing_file = NULL; 795*019d6b8fSAnthony Liguori } 796*019d6b8fSAnthony Liguori header.cluster_bits = 9; /* 512 byte cluster to avoid copying 797*019d6b8fSAnthony Liguori unmodifyed sectors */ 798*019d6b8fSAnthony Liguori header.l2_bits = 12; /* 32 KB L2 tables */ 799*019d6b8fSAnthony Liguori } else { 800*019d6b8fSAnthony Liguori header.cluster_bits = 12; /* 4 KB clusters */ 801*019d6b8fSAnthony Liguori header.l2_bits = 9; /* 4 KB L2 tables */ 802*019d6b8fSAnthony Liguori } 803*019d6b8fSAnthony Liguori header_size = (header_size + 7) & ~7; 804*019d6b8fSAnthony Liguori shift = header.cluster_bits + header.l2_bits; 805*019d6b8fSAnthony Liguori l1_size = ((total_size * 512) + (1LL << shift) - 1) >> shift; 806*019d6b8fSAnthony Liguori 807*019d6b8fSAnthony Liguori header.l1_table_offset = cpu_to_be64(header_size); 808*019d6b8fSAnthony Liguori if (flags & BLOCK_FLAG_ENCRYPT) { 809*019d6b8fSAnthony Liguori header.crypt_method = cpu_to_be32(QCOW_CRYPT_AES); 810*019d6b8fSAnthony Liguori } else { 811*019d6b8fSAnthony Liguori header.crypt_method = cpu_to_be32(QCOW_CRYPT_NONE); 812*019d6b8fSAnthony Liguori } 813*019d6b8fSAnthony Liguori 814*019d6b8fSAnthony Liguori /* write all the data */ 815*019d6b8fSAnthony Liguori write(fd, &header, sizeof(header)); 816*019d6b8fSAnthony Liguori if (backing_file) { 817*019d6b8fSAnthony Liguori write(fd, backing_file, backing_filename_len); 818*019d6b8fSAnthony Liguori } 819*019d6b8fSAnthony Liguori lseek(fd, header_size, SEEK_SET); 820*019d6b8fSAnthony Liguori tmp = 0; 821*019d6b8fSAnthony Liguori for(i = 0;i < l1_size; i++) { 822*019d6b8fSAnthony Liguori write(fd, &tmp, sizeof(tmp)); 823*019d6b8fSAnthony Liguori } 824*019d6b8fSAnthony Liguori close(fd); 825*019d6b8fSAnthony Liguori return 0; 826*019d6b8fSAnthony Liguori } 827*019d6b8fSAnthony Liguori 828*019d6b8fSAnthony Liguori static int qcow_make_empty(BlockDriverState *bs) 829*019d6b8fSAnthony Liguori { 830*019d6b8fSAnthony Liguori BDRVQcowState *s = bs->opaque; 831*019d6b8fSAnthony Liguori uint32_t l1_length = s->l1_size * sizeof(uint64_t); 832*019d6b8fSAnthony Liguori int ret; 833*019d6b8fSAnthony Liguori 834*019d6b8fSAnthony Liguori memset(s->l1_table, 0, l1_length); 835*019d6b8fSAnthony Liguori if (bdrv_pwrite(s->hd, s->l1_table_offset, s->l1_table, l1_length) < 0) 836*019d6b8fSAnthony Liguori return -1; 837*019d6b8fSAnthony Liguori ret = bdrv_truncate(s->hd, s->l1_table_offset + l1_length); 838*019d6b8fSAnthony Liguori if (ret < 0) 839*019d6b8fSAnthony Liguori return ret; 840*019d6b8fSAnthony Liguori 841*019d6b8fSAnthony Liguori memset(s->l2_cache, 0, s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t)); 842*019d6b8fSAnthony Liguori memset(s->l2_cache_offsets, 0, L2_CACHE_SIZE * sizeof(uint64_t)); 843*019d6b8fSAnthony Liguori memset(s->l2_cache_counts, 0, L2_CACHE_SIZE * sizeof(uint32_t)); 844*019d6b8fSAnthony Liguori 845*019d6b8fSAnthony Liguori return 0; 846*019d6b8fSAnthony Liguori } 847*019d6b8fSAnthony Liguori 848*019d6b8fSAnthony Liguori /* XXX: put compressed sectors first, then all the cluster aligned 849*019d6b8fSAnthony Liguori tables to avoid losing bytes in alignment */ 850*019d6b8fSAnthony Liguori static int qcow_write_compressed(BlockDriverState *bs, int64_t sector_num, 851*019d6b8fSAnthony Liguori const uint8_t *buf, int nb_sectors) 852*019d6b8fSAnthony Liguori { 853*019d6b8fSAnthony Liguori BDRVQcowState *s = bs->opaque; 854*019d6b8fSAnthony Liguori z_stream strm; 855*019d6b8fSAnthony Liguori int ret, out_len; 856*019d6b8fSAnthony Liguori uint8_t *out_buf; 857*019d6b8fSAnthony Liguori uint64_t cluster_offset; 858*019d6b8fSAnthony Liguori 859*019d6b8fSAnthony Liguori if (nb_sectors != s->cluster_sectors) 860*019d6b8fSAnthony Liguori return -EINVAL; 861*019d6b8fSAnthony Liguori 862*019d6b8fSAnthony Liguori out_buf = qemu_malloc(s->cluster_size + (s->cluster_size / 1000) + 128); 863*019d6b8fSAnthony Liguori if (!out_buf) 864*019d6b8fSAnthony Liguori return -1; 865*019d6b8fSAnthony Liguori 866*019d6b8fSAnthony Liguori /* best compression, small window, no zlib header */ 867*019d6b8fSAnthony Liguori memset(&strm, 0, sizeof(strm)); 868*019d6b8fSAnthony Liguori ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION, 869*019d6b8fSAnthony Liguori Z_DEFLATED, -12, 870*019d6b8fSAnthony Liguori 9, Z_DEFAULT_STRATEGY); 871*019d6b8fSAnthony Liguori if (ret != 0) { 872*019d6b8fSAnthony Liguori qemu_free(out_buf); 873*019d6b8fSAnthony Liguori return -1; 874*019d6b8fSAnthony Liguori } 875*019d6b8fSAnthony Liguori 876*019d6b8fSAnthony Liguori strm.avail_in = s->cluster_size; 877*019d6b8fSAnthony Liguori strm.next_in = (uint8_t *)buf; 878*019d6b8fSAnthony Liguori strm.avail_out = s->cluster_size; 879*019d6b8fSAnthony Liguori strm.next_out = out_buf; 880*019d6b8fSAnthony Liguori 881*019d6b8fSAnthony Liguori ret = deflate(&strm, Z_FINISH); 882*019d6b8fSAnthony Liguori if (ret != Z_STREAM_END && ret != Z_OK) { 883*019d6b8fSAnthony Liguori qemu_free(out_buf); 884*019d6b8fSAnthony Liguori deflateEnd(&strm); 885*019d6b8fSAnthony Liguori return -1; 886*019d6b8fSAnthony Liguori } 887*019d6b8fSAnthony Liguori out_len = strm.next_out - out_buf; 888*019d6b8fSAnthony Liguori 889*019d6b8fSAnthony Liguori deflateEnd(&strm); 890*019d6b8fSAnthony Liguori 891*019d6b8fSAnthony Liguori if (ret != Z_STREAM_END || out_len >= s->cluster_size) { 892*019d6b8fSAnthony Liguori /* could not compress: write normal cluster */ 893*019d6b8fSAnthony Liguori qcow_write(bs, sector_num, buf, s->cluster_sectors); 894*019d6b8fSAnthony Liguori } else { 895*019d6b8fSAnthony Liguori cluster_offset = get_cluster_offset(bs, sector_num << 9, 2, 896*019d6b8fSAnthony Liguori out_len, 0, 0); 897*019d6b8fSAnthony Liguori cluster_offset &= s->cluster_offset_mask; 898*019d6b8fSAnthony Liguori if (bdrv_pwrite(s->hd, cluster_offset, out_buf, out_len) != out_len) { 899*019d6b8fSAnthony Liguori qemu_free(out_buf); 900*019d6b8fSAnthony Liguori return -1; 901*019d6b8fSAnthony Liguori } 902*019d6b8fSAnthony Liguori } 903*019d6b8fSAnthony Liguori 904*019d6b8fSAnthony Liguori qemu_free(out_buf); 905*019d6b8fSAnthony Liguori return 0; 906*019d6b8fSAnthony Liguori } 907*019d6b8fSAnthony Liguori 908*019d6b8fSAnthony Liguori static void qcow_flush(BlockDriverState *bs) 909*019d6b8fSAnthony Liguori { 910*019d6b8fSAnthony Liguori BDRVQcowState *s = bs->opaque; 911*019d6b8fSAnthony Liguori bdrv_flush(s->hd); 912*019d6b8fSAnthony Liguori } 913*019d6b8fSAnthony Liguori 914*019d6b8fSAnthony Liguori static int qcow_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) 915*019d6b8fSAnthony Liguori { 916*019d6b8fSAnthony Liguori BDRVQcowState *s = bs->opaque; 917*019d6b8fSAnthony Liguori bdi->cluster_size = s->cluster_size; 918*019d6b8fSAnthony Liguori return 0; 919*019d6b8fSAnthony Liguori } 920*019d6b8fSAnthony Liguori 921*019d6b8fSAnthony Liguori static BlockDriver bdrv_qcow = { 922*019d6b8fSAnthony Liguori .format_name = "qcow", 923*019d6b8fSAnthony Liguori .instance_size = sizeof(BDRVQcowState), 924*019d6b8fSAnthony Liguori .bdrv_probe = qcow_probe, 925*019d6b8fSAnthony Liguori .bdrv_open = qcow_open, 926*019d6b8fSAnthony Liguori .bdrv_close = qcow_close, 927*019d6b8fSAnthony Liguori .bdrv_create = qcow_create, 928*019d6b8fSAnthony Liguori .bdrv_flush = qcow_flush, 929*019d6b8fSAnthony Liguori .bdrv_is_allocated = qcow_is_allocated, 930*019d6b8fSAnthony Liguori .bdrv_set_key = qcow_set_key, 931*019d6b8fSAnthony Liguori .bdrv_make_empty = qcow_make_empty, 932*019d6b8fSAnthony Liguori .bdrv_aio_readv = qcow_aio_readv, 933*019d6b8fSAnthony Liguori .bdrv_aio_writev = qcow_aio_writev, 934*019d6b8fSAnthony Liguori .bdrv_aio_cancel = qcow_aio_cancel, 935*019d6b8fSAnthony Liguori .aiocb_size = sizeof(QCowAIOCB), 936*019d6b8fSAnthony Liguori .bdrv_write_compressed = qcow_write_compressed, 937*019d6b8fSAnthony Liguori .bdrv_get_info = qcow_get_info, 938*019d6b8fSAnthony Liguori }; 939*019d6b8fSAnthony Liguori 940*019d6b8fSAnthony Liguori static void bdrv_qcow_init(void) 941*019d6b8fSAnthony Liguori { 942*019d6b8fSAnthony Liguori bdrv_register(&bdrv_qcow); 943*019d6b8fSAnthony Liguori } 944*019d6b8fSAnthony Liguori 945*019d6b8fSAnthony Liguori block_init(bdrv_qcow_init); 946