149381094SKevin Wolf /* 249381094SKevin Wolf * L2/refcount table cache for the QCOW2 format 349381094SKevin Wolf * 449381094SKevin Wolf * Copyright (c) 2010 Kevin Wolf <kwolf@redhat.com> 549381094SKevin Wolf * 649381094SKevin Wolf * Permission is hereby granted, free of charge, to any person obtaining a copy 749381094SKevin Wolf * of this software and associated documentation files (the "Software"), to deal 849381094SKevin Wolf * in the Software without restriction, including without limitation the rights 949381094SKevin Wolf * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 1049381094SKevin Wolf * copies of the Software, and to permit persons to whom the Software is 1149381094SKevin Wolf * furnished to do so, subject to the following conditions: 1249381094SKevin Wolf * 1349381094SKevin Wolf * The above copyright notice and this permission notice shall be included in 1449381094SKevin Wolf * all copies or substantial portions of the Software. 1549381094SKevin Wolf * 1649381094SKevin Wolf * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1749381094SKevin Wolf * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1849381094SKevin Wolf * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1949381094SKevin Wolf * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 2049381094SKevin Wolf * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 2149381094SKevin Wolf * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 2249381094SKevin Wolf * THE SOFTWARE. 2349381094SKevin Wolf */ 2449381094SKevin Wolf 25737e150eSPaolo Bonzini #include "block/block_int.h" 2649381094SKevin Wolf #include "qemu-common.h" 2749381094SKevin Wolf #include "qcow2.h" 283cce16f4SKevin Wolf #include "trace.h" 2949381094SKevin Wolf 3049381094SKevin Wolf typedef struct Qcow2CachedTable { 3149381094SKevin Wolf int64_t offset; 3249381094SKevin Wolf bool dirty; 332693310eSAlberto Garcia uint64_t lru_counter; 3449381094SKevin Wolf int ref; 3549381094SKevin Wolf } Qcow2CachedTable; 3649381094SKevin Wolf 3749381094SKevin Wolf struct Qcow2Cache { 3849381094SKevin Wolf Qcow2CachedTable* entries; 3949381094SKevin Wolf struct Qcow2Cache* depends; 40bf595021SJes Sorensen int size; 413de0a294SKevin Wolf bool depends_on_flush; 4272e80b89SAlberto Garcia void *table_array; 432693310eSAlberto Garcia uint64_t lru_counter; 4449381094SKevin Wolf }; 4549381094SKevin Wolf 4672e80b89SAlberto Garcia static inline void *qcow2_cache_get_table_addr(BlockDriverState *bs, 4772e80b89SAlberto Garcia Qcow2Cache *c, int table) 4872e80b89SAlberto Garcia { 4972e80b89SAlberto Garcia BDRVQcowState *s = bs->opaque; 5072e80b89SAlberto Garcia return (uint8_t *) c->table_array + (size_t) table * s->cluster_size; 5172e80b89SAlberto Garcia } 5272e80b89SAlberto Garcia 53baf07d60SAlberto Garcia static inline int qcow2_cache_get_table_idx(BlockDriverState *bs, 54baf07d60SAlberto Garcia Qcow2Cache *c, void *table) 55baf07d60SAlberto Garcia { 56baf07d60SAlberto Garcia BDRVQcowState *s = bs->opaque; 57baf07d60SAlberto Garcia ptrdiff_t table_offset = (uint8_t *) table - (uint8_t *) c->table_array; 58baf07d60SAlberto Garcia int idx = table_offset / s->cluster_size; 59baf07d60SAlberto Garcia assert(idx >= 0 && idx < c->size && table_offset % s->cluster_size == 0); 60baf07d60SAlberto Garcia return idx; 61baf07d60SAlberto Garcia } 62baf07d60SAlberto Garcia 636af4e9eaSPaolo Bonzini Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables) 6449381094SKevin Wolf { 6549381094SKevin Wolf BDRVQcowState *s = bs->opaque; 6649381094SKevin Wolf Qcow2Cache *c; 6749381094SKevin Wolf 6802004bd4SMax Reitz c = g_new0(Qcow2Cache, 1); 6949381094SKevin Wolf c->size = num_tables; 7002004bd4SMax Reitz c->entries = g_try_new0(Qcow2CachedTable, num_tables); 7172e80b89SAlberto Garcia c->table_array = qemu_try_blockalign(bs->file, 7272e80b89SAlberto Garcia (size_t) num_tables * s->cluster_size); 7349381094SKevin Wolf 7472e80b89SAlberto Garcia if (!c->entries || !c->table_array) { 7572e80b89SAlberto Garcia qemu_vfree(c->table_array); 7672e80b89SAlberto Garcia g_free(c->entries); 7772e80b89SAlberto Garcia g_free(c); 7872e80b89SAlberto Garcia c = NULL; 7949381094SKevin Wolf } 8049381094SKevin Wolf 8149381094SKevin Wolf return c; 8249381094SKevin Wolf } 8349381094SKevin Wolf 8449381094SKevin Wolf int qcow2_cache_destroy(BlockDriverState* bs, Qcow2Cache *c) 8549381094SKevin Wolf { 8649381094SKevin Wolf int i; 8749381094SKevin Wolf 8849381094SKevin Wolf for (i = 0; i < c->size; i++) { 8949381094SKevin Wolf assert(c->entries[i].ref == 0); 9049381094SKevin Wolf } 9149381094SKevin Wolf 9272e80b89SAlberto Garcia qemu_vfree(c->table_array); 937267c094SAnthony Liguori g_free(c->entries); 947267c094SAnthony Liguori g_free(c); 9549381094SKevin Wolf 9649381094SKevin Wolf return 0; 9749381094SKevin Wolf } 9849381094SKevin Wolf 9949381094SKevin Wolf static int qcow2_cache_flush_dependency(BlockDriverState *bs, Qcow2Cache *c) 10049381094SKevin Wolf { 10149381094SKevin Wolf int ret; 10249381094SKevin Wolf 10349381094SKevin Wolf ret = qcow2_cache_flush(bs, c->depends); 10449381094SKevin Wolf if (ret < 0) { 10549381094SKevin Wolf return ret; 10649381094SKevin Wolf } 10749381094SKevin Wolf 10849381094SKevin Wolf c->depends = NULL; 1093de0a294SKevin Wolf c->depends_on_flush = false; 1103de0a294SKevin Wolf 11149381094SKevin Wolf return 0; 11249381094SKevin Wolf } 11349381094SKevin Wolf 11449381094SKevin Wolf static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i) 11549381094SKevin Wolf { 11649381094SKevin Wolf BDRVQcowState *s = bs->opaque; 1173de0a294SKevin Wolf int ret = 0; 11849381094SKevin Wolf 11949381094SKevin Wolf if (!c->entries[i].dirty || !c->entries[i].offset) { 12049381094SKevin Wolf return 0; 12149381094SKevin Wolf } 12249381094SKevin Wolf 1233cce16f4SKevin Wolf trace_qcow2_cache_entry_flush(qemu_coroutine_self(), 1243cce16f4SKevin Wolf c == s->l2_table_cache, i); 1253cce16f4SKevin Wolf 12649381094SKevin Wolf if (c->depends) { 12749381094SKevin Wolf ret = qcow2_cache_flush_dependency(bs, c); 1283de0a294SKevin Wolf } else if (c->depends_on_flush) { 1293de0a294SKevin Wolf ret = bdrv_flush(bs->file); 1303de0a294SKevin Wolf if (ret >= 0) { 1313de0a294SKevin Wolf c->depends_on_flush = false; 1323de0a294SKevin Wolf } 1333de0a294SKevin Wolf } 1343de0a294SKevin Wolf 13549381094SKevin Wolf if (ret < 0) { 13649381094SKevin Wolf return ret; 13749381094SKevin Wolf } 13849381094SKevin Wolf 13929c1a730SKevin Wolf if (c == s->refcount_block_cache) { 140231bb267SMax Reitz ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_REFCOUNT_BLOCK, 141cf93980eSMax Reitz c->entries[i].offset, s->cluster_size); 142cf93980eSMax Reitz } else if (c == s->l2_table_cache) { 143231bb267SMax Reitz ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_ACTIVE_L2, 144cf93980eSMax Reitz c->entries[i].offset, s->cluster_size); 145cf93980eSMax Reitz } else { 146231bb267SMax Reitz ret = qcow2_pre_write_overlap_check(bs, 0, 147cf93980eSMax Reitz c->entries[i].offset, s->cluster_size); 148cf93980eSMax Reitz } 149cf93980eSMax Reitz 150cf93980eSMax Reitz if (ret < 0) { 151cf93980eSMax Reitz return ret; 152cf93980eSMax Reitz } 153cf93980eSMax Reitz 154cf93980eSMax Reitz if (c == s->refcount_block_cache) { 15529c1a730SKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_UPDATE_PART); 15629c1a730SKevin Wolf } else if (c == s->l2_table_cache) { 15729c1a730SKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE); 15829c1a730SKevin Wolf } 15929c1a730SKevin Wolf 16072e80b89SAlberto Garcia ret = bdrv_pwrite(bs->file, c->entries[i].offset, 16172e80b89SAlberto Garcia qcow2_cache_get_table_addr(bs, c, i), s->cluster_size); 16249381094SKevin Wolf if (ret < 0) { 16349381094SKevin Wolf return ret; 16449381094SKevin Wolf } 16549381094SKevin Wolf 16649381094SKevin Wolf c->entries[i].dirty = false; 16749381094SKevin Wolf 16849381094SKevin Wolf return 0; 16949381094SKevin Wolf } 17049381094SKevin Wolf 17149381094SKevin Wolf int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c) 17249381094SKevin Wolf { 1733cce16f4SKevin Wolf BDRVQcowState *s = bs->opaque; 17449381094SKevin Wolf int result = 0; 17549381094SKevin Wolf int ret; 17649381094SKevin Wolf int i; 17749381094SKevin Wolf 1783cce16f4SKevin Wolf trace_qcow2_cache_flush(qemu_coroutine_self(), c == s->l2_table_cache); 1793cce16f4SKevin Wolf 18049381094SKevin Wolf for (i = 0; i < c->size; i++) { 18149381094SKevin Wolf ret = qcow2_cache_entry_flush(bs, c, i); 18249381094SKevin Wolf if (ret < 0 && result != -ENOSPC) { 18349381094SKevin Wolf result = ret; 18449381094SKevin Wolf } 18549381094SKevin Wolf } 18649381094SKevin Wolf 18749381094SKevin Wolf if (result == 0) { 18849381094SKevin Wolf ret = bdrv_flush(bs->file); 18949381094SKevin Wolf if (ret < 0) { 19049381094SKevin Wolf result = ret; 19149381094SKevin Wolf } 19249381094SKevin Wolf } 19349381094SKevin Wolf 19449381094SKevin Wolf return result; 19549381094SKevin Wolf } 19649381094SKevin Wolf 19749381094SKevin Wolf int qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c, 19849381094SKevin Wolf Qcow2Cache *dependency) 19949381094SKevin Wolf { 20049381094SKevin Wolf int ret; 20149381094SKevin Wolf 20249381094SKevin Wolf if (dependency->depends) { 20349381094SKevin Wolf ret = qcow2_cache_flush_dependency(bs, dependency); 20449381094SKevin Wolf if (ret < 0) { 20549381094SKevin Wolf return ret; 20649381094SKevin Wolf } 20749381094SKevin Wolf } 20849381094SKevin Wolf 20949381094SKevin Wolf if (c->depends && (c->depends != dependency)) { 21049381094SKevin Wolf ret = qcow2_cache_flush_dependency(bs, c); 21149381094SKevin Wolf if (ret < 0) { 21249381094SKevin Wolf return ret; 21349381094SKevin Wolf } 21449381094SKevin Wolf } 21549381094SKevin Wolf 21649381094SKevin Wolf c->depends = dependency; 21749381094SKevin Wolf return 0; 21849381094SKevin Wolf } 21949381094SKevin Wolf 2203de0a294SKevin Wolf void qcow2_cache_depends_on_flush(Qcow2Cache *c) 2213de0a294SKevin Wolf { 2223de0a294SKevin Wolf c->depends_on_flush = true; 2233de0a294SKevin Wolf } 2243de0a294SKevin Wolf 225e7108feaSMax Reitz int qcow2_cache_empty(BlockDriverState *bs, Qcow2Cache *c) 226e7108feaSMax Reitz { 227e7108feaSMax Reitz int ret, i; 228e7108feaSMax Reitz 229e7108feaSMax Reitz ret = qcow2_cache_flush(bs, c); 230e7108feaSMax Reitz if (ret < 0) { 231e7108feaSMax Reitz return ret; 232e7108feaSMax Reitz } 233e7108feaSMax Reitz 234e7108feaSMax Reitz for (i = 0; i < c->size; i++) { 235e7108feaSMax Reitz assert(c->entries[i].ref == 0); 236e7108feaSMax Reitz c->entries[i].offset = 0; 2372693310eSAlberto Garcia c->entries[i].lru_counter = 0; 238e7108feaSMax Reitz } 239e7108feaSMax Reitz 2402693310eSAlberto Garcia c->lru_counter = 0; 2412693310eSAlberto Garcia 242e7108feaSMax Reitz return 0; 243e7108feaSMax Reitz } 244e7108feaSMax Reitz 24549381094SKevin Wolf static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c, 24649381094SKevin Wolf uint64_t offset, void **table, bool read_from_disk) 24749381094SKevin Wolf { 24849381094SKevin Wolf BDRVQcowState *s = bs->opaque; 24949381094SKevin Wolf int i; 25049381094SKevin Wolf int ret; 251812e4082SAlberto Garcia int lookup_index; 252fdfbca82SAlberto Garcia uint64_t min_lru_counter = UINT64_MAX; 253fdfbca82SAlberto Garcia int min_lru_index = -1; 25449381094SKevin Wolf 2553cce16f4SKevin Wolf trace_qcow2_cache_get(qemu_coroutine_self(), c == s->l2_table_cache, 2563cce16f4SKevin Wolf offset, read_from_disk); 2573cce16f4SKevin Wolf 25849381094SKevin Wolf /* Check if the table is already cached */ 259812e4082SAlberto Garcia i = lookup_index = (offset / s->cluster_size * 4) % c->size; 260812e4082SAlberto Garcia do { 261fdfbca82SAlberto Garcia const Qcow2CachedTable *t = &c->entries[i]; 262fdfbca82SAlberto Garcia if (t->offset == offset) { 26349381094SKevin Wolf goto found; 26449381094SKevin Wolf } 265fdfbca82SAlberto Garcia if (t->ref == 0 && t->lru_counter < min_lru_counter) { 266fdfbca82SAlberto Garcia min_lru_counter = t->lru_counter; 267fdfbca82SAlberto Garcia min_lru_index = i; 268fdfbca82SAlberto Garcia } 269812e4082SAlberto Garcia if (++i == c->size) { 270812e4082SAlberto Garcia i = 0; 27149381094SKevin Wolf } 272812e4082SAlberto Garcia } while (i != lookup_index); 27349381094SKevin Wolf 274fdfbca82SAlberto Garcia if (min_lru_index == -1) { 275fdfbca82SAlberto Garcia /* This can't happen in current synchronous code, but leave the check 276fdfbca82SAlberto Garcia * here as a reminder for whoever starts using AIO with the cache */ 277fdfbca82SAlberto Garcia abort(); 278fdfbca82SAlberto Garcia } 279fdfbca82SAlberto Garcia 280fdfbca82SAlberto Garcia /* Cache miss: write a table back and replace it */ 281fdfbca82SAlberto Garcia i = min_lru_index; 2823cce16f4SKevin Wolf trace_qcow2_cache_get_replace_entry(qemu_coroutine_self(), 2833cce16f4SKevin Wolf c == s->l2_table_cache, i); 28449381094SKevin Wolf if (i < 0) { 28549381094SKevin Wolf return i; 28649381094SKevin Wolf } 28749381094SKevin Wolf 28849381094SKevin Wolf ret = qcow2_cache_entry_flush(bs, c, i); 28949381094SKevin Wolf if (ret < 0) { 29049381094SKevin Wolf return ret; 29149381094SKevin Wolf } 29249381094SKevin Wolf 2933cce16f4SKevin Wolf trace_qcow2_cache_get_read(qemu_coroutine_self(), 2943cce16f4SKevin Wolf c == s->l2_table_cache, i); 29549381094SKevin Wolf c->entries[i].offset = 0; 29649381094SKevin Wolf if (read_from_disk) { 29729c1a730SKevin Wolf if (c == s->l2_table_cache) { 29829c1a730SKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_L2_LOAD); 29929c1a730SKevin Wolf } 30029c1a730SKevin Wolf 30172e80b89SAlberto Garcia ret = bdrv_pread(bs->file, offset, qcow2_cache_get_table_addr(bs, c, i), 30272e80b89SAlberto Garcia s->cluster_size); 30349381094SKevin Wolf if (ret < 0) { 30449381094SKevin Wolf return ret; 30549381094SKevin Wolf } 30649381094SKevin Wolf } 30749381094SKevin Wolf 30849381094SKevin Wolf c->entries[i].offset = offset; 30949381094SKevin Wolf 31049381094SKevin Wolf /* And return the right table */ 31149381094SKevin Wolf found: 31249381094SKevin Wolf c->entries[i].ref++; 31372e80b89SAlberto Garcia *table = qcow2_cache_get_table_addr(bs, c, i); 3143cce16f4SKevin Wolf 3153cce16f4SKevin Wolf trace_qcow2_cache_get_done(qemu_coroutine_self(), 3163cce16f4SKevin Wolf c == s->l2_table_cache, i); 3173cce16f4SKevin Wolf 31849381094SKevin Wolf return 0; 31949381094SKevin Wolf } 32049381094SKevin Wolf 32149381094SKevin Wolf int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset, 32249381094SKevin Wolf void **table) 32349381094SKevin Wolf { 32449381094SKevin Wolf return qcow2_cache_do_get(bs, c, offset, table, true); 32549381094SKevin Wolf } 32649381094SKevin Wolf 32749381094SKevin Wolf int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset, 32849381094SKevin Wolf void **table) 32949381094SKevin Wolf { 33049381094SKevin Wolf return qcow2_cache_do_get(bs, c, offset, table, false); 33149381094SKevin Wolf } 33249381094SKevin Wolf 333*a3f1afb4SAlberto Garcia void qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table) 33449381094SKevin Wolf { 335baf07d60SAlberto Garcia int i = qcow2_cache_get_table_idx(bs, c, *table); 33649381094SKevin Wolf 33749381094SKevin Wolf c->entries[i].ref--; 33849381094SKevin Wolf *table = NULL; 33949381094SKevin Wolf 3402693310eSAlberto Garcia if (c->entries[i].ref == 0) { 3412693310eSAlberto Garcia c->entries[i].lru_counter = ++c->lru_counter; 3422693310eSAlberto Garcia } 3432693310eSAlberto Garcia 34449381094SKevin Wolf assert(c->entries[i].ref >= 0); 34549381094SKevin Wolf } 34649381094SKevin Wolf 34772e80b89SAlberto Garcia void qcow2_cache_entry_mark_dirty(BlockDriverState *bs, Qcow2Cache *c, 34872e80b89SAlberto Garcia void *table) 34949381094SKevin Wolf { 350baf07d60SAlberto Garcia int i = qcow2_cache_get_table_idx(bs, c, table); 351baf07d60SAlberto Garcia assert(c->entries[i].offset != 0); 35249381094SKevin Wolf c->entries[i].dirty = true; 35349381094SKevin Wolf } 354