149381094SKevin Wolf /* 249381094SKevin Wolf * L2/refcount table cache for the QCOW2 format 349381094SKevin Wolf * 449381094SKevin Wolf * Copyright (c) 2010 Kevin Wolf <kwolf@redhat.com> 549381094SKevin Wolf * 649381094SKevin Wolf * Permission is hereby granted, free of charge, to any person obtaining a copy 749381094SKevin Wolf * of this software and associated documentation files (the "Software"), to deal 849381094SKevin Wolf * in the Software without restriction, including without limitation the rights 949381094SKevin Wolf * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 1049381094SKevin Wolf * copies of the Software, and to permit persons to whom the Software is 1149381094SKevin Wolf * furnished to do so, subject to the following conditions: 1249381094SKevin Wolf * 1349381094SKevin Wolf * The above copyright notice and this permission notice shall be included in 1449381094SKevin Wolf * all copies or substantial portions of the Software. 1549381094SKevin Wolf * 1649381094SKevin Wolf * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1749381094SKevin Wolf * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1849381094SKevin Wolf * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1949381094SKevin Wolf * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 2049381094SKevin Wolf * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 2149381094SKevin Wolf * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 2249381094SKevin Wolf * THE SOFTWARE. 2349381094SKevin Wolf */ 2449381094SKevin Wolf 2549381094SKevin Wolf #include "block_int.h" 2649381094SKevin Wolf #include "qemu-common.h" 2749381094SKevin Wolf #include "qcow2.h" 283cce16f4SKevin Wolf #include "trace.h" 2949381094SKevin Wolf 3049381094SKevin Wolf typedef struct Qcow2CachedTable { 3149381094SKevin Wolf void* table; 3249381094SKevin Wolf int64_t offset; 3349381094SKevin Wolf bool dirty; 3449381094SKevin Wolf int cache_hits; 3549381094SKevin Wolf int ref; 3649381094SKevin Wolf } Qcow2CachedTable; 3749381094SKevin Wolf 3849381094SKevin Wolf struct Qcow2Cache { 3949381094SKevin Wolf Qcow2CachedTable* entries; 4049381094SKevin Wolf struct Qcow2Cache* depends; 41bf595021SJes Sorensen int size; 423de0a294SKevin Wolf bool depends_on_flush; 4349381094SKevin Wolf }; 4449381094SKevin Wolf 45*6af4e9eaSPaolo Bonzini Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables) 4649381094SKevin Wolf { 4749381094SKevin Wolf BDRVQcowState *s = bs->opaque; 4849381094SKevin Wolf Qcow2Cache *c; 4949381094SKevin Wolf int i; 5049381094SKevin Wolf 517267c094SAnthony Liguori c = g_malloc0(sizeof(*c)); 5249381094SKevin Wolf c->size = num_tables; 537267c094SAnthony Liguori c->entries = g_malloc0(sizeof(*c->entries) * num_tables); 5449381094SKevin Wolf 5549381094SKevin Wolf for (i = 0; i < c->size; i++) { 5649381094SKevin Wolf c->entries[i].table = qemu_blockalign(bs, s->cluster_size); 5749381094SKevin Wolf } 5849381094SKevin Wolf 5949381094SKevin Wolf return c; 6049381094SKevin Wolf } 6149381094SKevin Wolf 6249381094SKevin Wolf int qcow2_cache_destroy(BlockDriverState* bs, Qcow2Cache *c) 6349381094SKevin Wolf { 6449381094SKevin Wolf int i; 6549381094SKevin Wolf 6649381094SKevin Wolf for (i = 0; i < c->size; i++) { 6749381094SKevin Wolf assert(c->entries[i].ref == 0); 6849381094SKevin Wolf qemu_vfree(c->entries[i].table); 6949381094SKevin Wolf } 7049381094SKevin Wolf 717267c094SAnthony Liguori g_free(c->entries); 727267c094SAnthony Liguori g_free(c); 7349381094SKevin Wolf 7449381094SKevin Wolf return 0; 7549381094SKevin Wolf } 7649381094SKevin Wolf 7749381094SKevin Wolf static int qcow2_cache_flush_dependency(BlockDriverState *bs, Qcow2Cache *c) 7849381094SKevin Wolf { 7949381094SKevin Wolf int ret; 8049381094SKevin Wolf 8149381094SKevin Wolf ret = qcow2_cache_flush(bs, c->depends); 8249381094SKevin Wolf if (ret < 0) { 8349381094SKevin Wolf return ret; 8449381094SKevin Wolf } 8549381094SKevin Wolf 8649381094SKevin Wolf c->depends = NULL; 873de0a294SKevin Wolf c->depends_on_flush = false; 883de0a294SKevin Wolf 8949381094SKevin Wolf return 0; 9049381094SKevin Wolf } 9149381094SKevin Wolf 9249381094SKevin Wolf static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i) 9349381094SKevin Wolf { 9449381094SKevin Wolf BDRVQcowState *s = bs->opaque; 953de0a294SKevin Wolf int ret = 0; 9649381094SKevin Wolf 9749381094SKevin Wolf if (!c->entries[i].dirty || !c->entries[i].offset) { 9849381094SKevin Wolf return 0; 9949381094SKevin Wolf } 10049381094SKevin Wolf 1013cce16f4SKevin Wolf trace_qcow2_cache_entry_flush(qemu_coroutine_self(), 1023cce16f4SKevin Wolf c == s->l2_table_cache, i); 1033cce16f4SKevin Wolf 10449381094SKevin Wolf if (c->depends) { 10549381094SKevin Wolf ret = qcow2_cache_flush_dependency(bs, c); 1063de0a294SKevin Wolf } else if (c->depends_on_flush) { 1073de0a294SKevin Wolf ret = bdrv_flush(bs->file); 1083de0a294SKevin Wolf if (ret >= 0) { 1093de0a294SKevin Wolf c->depends_on_flush = false; 1103de0a294SKevin Wolf } 1113de0a294SKevin Wolf } 1123de0a294SKevin Wolf 11349381094SKevin Wolf if (ret < 0) { 11449381094SKevin Wolf return ret; 11549381094SKevin Wolf } 11649381094SKevin Wolf 11729c1a730SKevin Wolf if (c == s->refcount_block_cache) { 11829c1a730SKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_UPDATE_PART); 11929c1a730SKevin Wolf } else if (c == s->l2_table_cache) { 12029c1a730SKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE); 12129c1a730SKevin Wolf } 12229c1a730SKevin Wolf 12349381094SKevin Wolf ret = bdrv_pwrite(bs->file, c->entries[i].offset, c->entries[i].table, 12449381094SKevin Wolf s->cluster_size); 12549381094SKevin Wolf if (ret < 0) { 12649381094SKevin Wolf return ret; 12749381094SKevin Wolf } 12849381094SKevin Wolf 12949381094SKevin Wolf c->entries[i].dirty = false; 13049381094SKevin Wolf 13149381094SKevin Wolf return 0; 13249381094SKevin Wolf } 13349381094SKevin Wolf 13449381094SKevin Wolf int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c) 13549381094SKevin Wolf { 1363cce16f4SKevin Wolf BDRVQcowState *s = bs->opaque; 13749381094SKevin Wolf int result = 0; 13849381094SKevin Wolf int ret; 13949381094SKevin Wolf int i; 14049381094SKevin Wolf 1413cce16f4SKevin Wolf trace_qcow2_cache_flush(qemu_coroutine_self(), c == s->l2_table_cache); 1423cce16f4SKevin Wolf 14349381094SKevin Wolf for (i = 0; i < c->size; i++) { 14449381094SKevin Wolf ret = qcow2_cache_entry_flush(bs, c, i); 14549381094SKevin Wolf if (ret < 0 && result != -ENOSPC) { 14649381094SKevin Wolf result = ret; 14749381094SKevin Wolf } 14849381094SKevin Wolf } 14949381094SKevin Wolf 15049381094SKevin Wolf if (result == 0) { 15149381094SKevin Wolf ret = bdrv_flush(bs->file); 15249381094SKevin Wolf if (ret < 0) { 15349381094SKevin Wolf result = ret; 15449381094SKevin Wolf } 15549381094SKevin Wolf } 15649381094SKevin Wolf 15749381094SKevin Wolf return result; 15849381094SKevin Wolf } 15949381094SKevin Wolf 16049381094SKevin Wolf int qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c, 16149381094SKevin Wolf Qcow2Cache *dependency) 16249381094SKevin Wolf { 16349381094SKevin Wolf int ret; 16449381094SKevin Wolf 16549381094SKevin Wolf if (dependency->depends) { 16649381094SKevin Wolf ret = qcow2_cache_flush_dependency(bs, dependency); 16749381094SKevin Wolf if (ret < 0) { 16849381094SKevin Wolf return ret; 16949381094SKevin Wolf } 17049381094SKevin Wolf } 17149381094SKevin Wolf 17249381094SKevin Wolf if (c->depends && (c->depends != dependency)) { 17349381094SKevin Wolf ret = qcow2_cache_flush_dependency(bs, c); 17449381094SKevin Wolf if (ret < 0) { 17549381094SKevin Wolf return ret; 17649381094SKevin Wolf } 17749381094SKevin Wolf } 17849381094SKevin Wolf 17949381094SKevin Wolf c->depends = dependency; 18049381094SKevin Wolf return 0; 18149381094SKevin Wolf } 18249381094SKevin Wolf 1833de0a294SKevin Wolf void qcow2_cache_depends_on_flush(Qcow2Cache *c) 1843de0a294SKevin Wolf { 1853de0a294SKevin Wolf c->depends_on_flush = true; 1863de0a294SKevin Wolf } 1873de0a294SKevin Wolf 18849381094SKevin Wolf static int qcow2_cache_find_entry_to_replace(Qcow2Cache *c) 18949381094SKevin Wolf { 19049381094SKevin Wolf int i; 19149381094SKevin Wolf int min_count = INT_MAX; 19249381094SKevin Wolf int min_index = -1; 19349381094SKevin Wolf 19449381094SKevin Wolf 19549381094SKevin Wolf for (i = 0; i < c->size; i++) { 19649381094SKevin Wolf if (c->entries[i].ref) { 19749381094SKevin Wolf continue; 19849381094SKevin Wolf } 19949381094SKevin Wolf 20049381094SKevin Wolf if (c->entries[i].cache_hits < min_count) { 20149381094SKevin Wolf min_index = i; 20249381094SKevin Wolf min_count = c->entries[i].cache_hits; 20349381094SKevin Wolf } 20449381094SKevin Wolf 20549381094SKevin Wolf /* Give newer hits priority */ 20649381094SKevin Wolf /* TODO Check how to optimize the replacement strategy */ 20749381094SKevin Wolf c->entries[i].cache_hits /= 2; 20849381094SKevin Wolf } 20949381094SKevin Wolf 21049381094SKevin Wolf if (min_index == -1) { 21149381094SKevin Wolf /* This can't happen in current synchronous code, but leave the check 21249381094SKevin Wolf * here as a reminder for whoever starts using AIO with the cache */ 21349381094SKevin Wolf abort(); 21449381094SKevin Wolf } 21549381094SKevin Wolf return min_index; 21649381094SKevin Wolf } 21749381094SKevin Wolf 21849381094SKevin Wolf static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c, 21949381094SKevin Wolf uint64_t offset, void **table, bool read_from_disk) 22049381094SKevin Wolf { 22149381094SKevin Wolf BDRVQcowState *s = bs->opaque; 22249381094SKevin Wolf int i; 22349381094SKevin Wolf int ret; 22449381094SKevin Wolf 2253cce16f4SKevin Wolf trace_qcow2_cache_get(qemu_coroutine_self(), c == s->l2_table_cache, 2263cce16f4SKevin Wolf offset, read_from_disk); 2273cce16f4SKevin Wolf 22849381094SKevin Wolf /* Check if the table is already cached */ 22949381094SKevin Wolf for (i = 0; i < c->size; i++) { 23049381094SKevin Wolf if (c->entries[i].offset == offset) { 23149381094SKevin Wolf goto found; 23249381094SKevin Wolf } 23349381094SKevin Wolf } 23449381094SKevin Wolf 23549381094SKevin Wolf /* If not, write a table back and replace it */ 23649381094SKevin Wolf i = qcow2_cache_find_entry_to_replace(c); 2373cce16f4SKevin Wolf trace_qcow2_cache_get_replace_entry(qemu_coroutine_self(), 2383cce16f4SKevin Wolf c == s->l2_table_cache, i); 23949381094SKevin Wolf if (i < 0) { 24049381094SKevin Wolf return i; 24149381094SKevin Wolf } 24249381094SKevin Wolf 24349381094SKevin Wolf ret = qcow2_cache_entry_flush(bs, c, i); 24449381094SKevin Wolf if (ret < 0) { 24549381094SKevin Wolf return ret; 24649381094SKevin Wolf } 24749381094SKevin Wolf 2483cce16f4SKevin Wolf trace_qcow2_cache_get_read(qemu_coroutine_self(), 2493cce16f4SKevin Wolf c == s->l2_table_cache, i); 25049381094SKevin Wolf c->entries[i].offset = 0; 25149381094SKevin Wolf if (read_from_disk) { 25229c1a730SKevin Wolf if (c == s->l2_table_cache) { 25329c1a730SKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_L2_LOAD); 25429c1a730SKevin Wolf } 25529c1a730SKevin Wolf 25649381094SKevin Wolf ret = bdrv_pread(bs->file, offset, c->entries[i].table, s->cluster_size); 25749381094SKevin Wolf if (ret < 0) { 25849381094SKevin Wolf return ret; 25949381094SKevin Wolf } 26049381094SKevin Wolf } 26149381094SKevin Wolf 26249381094SKevin Wolf /* Give the table some hits for the start so that it won't be replaced 26349381094SKevin Wolf * immediately. The number 32 is completely arbitrary. */ 26449381094SKevin Wolf c->entries[i].cache_hits = 32; 26549381094SKevin Wolf c->entries[i].offset = offset; 26649381094SKevin Wolf 26749381094SKevin Wolf /* And return the right table */ 26849381094SKevin Wolf found: 26949381094SKevin Wolf c->entries[i].cache_hits++; 27049381094SKevin Wolf c->entries[i].ref++; 27149381094SKevin Wolf *table = c->entries[i].table; 2723cce16f4SKevin Wolf 2733cce16f4SKevin Wolf trace_qcow2_cache_get_done(qemu_coroutine_self(), 2743cce16f4SKevin Wolf c == s->l2_table_cache, i); 2753cce16f4SKevin Wolf 27649381094SKevin Wolf return 0; 27749381094SKevin Wolf } 27849381094SKevin Wolf 27949381094SKevin Wolf int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset, 28049381094SKevin Wolf void **table) 28149381094SKevin Wolf { 28249381094SKevin Wolf return qcow2_cache_do_get(bs, c, offset, table, true); 28349381094SKevin Wolf } 28449381094SKevin Wolf 28549381094SKevin Wolf int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset, 28649381094SKevin Wolf void **table) 28749381094SKevin Wolf { 28849381094SKevin Wolf return qcow2_cache_do_get(bs, c, offset, table, false); 28949381094SKevin Wolf } 29049381094SKevin Wolf 29149381094SKevin Wolf int qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table) 29249381094SKevin Wolf { 29349381094SKevin Wolf int i; 29449381094SKevin Wolf 29549381094SKevin Wolf for (i = 0; i < c->size; i++) { 29649381094SKevin Wolf if (c->entries[i].table == *table) { 29749381094SKevin Wolf goto found; 29849381094SKevin Wolf } 29949381094SKevin Wolf } 30049381094SKevin Wolf return -ENOENT; 30149381094SKevin Wolf 30249381094SKevin Wolf found: 30349381094SKevin Wolf c->entries[i].ref--; 30449381094SKevin Wolf *table = NULL; 30549381094SKevin Wolf 30649381094SKevin Wolf assert(c->entries[i].ref >= 0); 30749381094SKevin Wolf return 0; 30849381094SKevin Wolf } 30949381094SKevin Wolf 31049381094SKevin Wolf void qcow2_cache_entry_mark_dirty(Qcow2Cache *c, void *table) 31149381094SKevin Wolf { 31249381094SKevin Wolf int i; 31349381094SKevin Wolf 31449381094SKevin Wolf for (i = 0; i < c->size; i++) { 31549381094SKevin Wolf if (c->entries[i].table == table) { 31649381094SKevin Wolf goto found; 31749381094SKevin Wolf } 31849381094SKevin Wolf } 31949381094SKevin Wolf abort(); 32049381094SKevin Wolf 32149381094SKevin Wolf found: 32249381094SKevin Wolf c->entries[i].dirty = true; 32349381094SKevin Wolf } 324