149381094SKevin Wolf /* 249381094SKevin Wolf * L2/refcount table cache for the QCOW2 format 349381094SKevin Wolf * 449381094SKevin Wolf * Copyright (c) 2010 Kevin Wolf <kwolf@redhat.com> 549381094SKevin Wolf * 649381094SKevin Wolf * Permission is hereby granted, free of charge, to any person obtaining a copy 749381094SKevin Wolf * of this software and associated documentation files (the "Software"), to deal 849381094SKevin Wolf * in the Software without restriction, including without limitation the rights 949381094SKevin Wolf * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 1049381094SKevin Wolf * copies of the Software, and to permit persons to whom the Software is 1149381094SKevin Wolf * furnished to do so, subject to the following conditions: 1249381094SKevin Wolf * 1349381094SKevin Wolf * The above copyright notice and this permission notice shall be included in 1449381094SKevin Wolf * all copies or substantial portions of the Software. 1549381094SKevin Wolf * 1649381094SKevin Wolf * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1749381094SKevin Wolf * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1849381094SKevin Wolf * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1949381094SKevin Wolf * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 2049381094SKevin Wolf * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 2149381094SKevin Wolf * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 2249381094SKevin Wolf * THE SOFTWARE. 2349381094SKevin Wolf */ 2449381094SKevin Wolf 25737e150eSPaolo Bonzini #include "block/block_int.h" 2649381094SKevin Wolf #include "qemu-common.h" 2749381094SKevin Wolf #include "qcow2.h" 283cce16f4SKevin Wolf #include "trace.h" 2949381094SKevin Wolf 3049381094SKevin Wolf typedef struct Qcow2CachedTable { 3149381094SKevin Wolf int64_t offset; 3249381094SKevin Wolf bool dirty; 3349381094SKevin Wolf int cache_hits; 3449381094SKevin Wolf int ref; 3549381094SKevin Wolf } Qcow2CachedTable; 3649381094SKevin Wolf 3749381094SKevin Wolf struct Qcow2Cache { 3849381094SKevin Wolf Qcow2CachedTable* entries; 3949381094SKevin Wolf struct Qcow2Cache* depends; 40bf595021SJes Sorensen int size; 413de0a294SKevin Wolf bool depends_on_flush; 4272e80b89SAlberto Garcia void *table_array; 4349381094SKevin Wolf }; 4449381094SKevin Wolf 4572e80b89SAlberto Garcia static inline void *qcow2_cache_get_table_addr(BlockDriverState *bs, 4672e80b89SAlberto Garcia Qcow2Cache *c, int table) 4772e80b89SAlberto Garcia { 4872e80b89SAlberto Garcia BDRVQcowState *s = bs->opaque; 4972e80b89SAlberto Garcia return (uint8_t *) c->table_array + (size_t) table * s->cluster_size; 5072e80b89SAlberto Garcia } 5172e80b89SAlberto Garcia 52*baf07d60SAlberto Garcia static inline int qcow2_cache_get_table_idx(BlockDriverState *bs, 53*baf07d60SAlberto Garcia Qcow2Cache *c, void *table) 54*baf07d60SAlberto Garcia { 55*baf07d60SAlberto Garcia BDRVQcowState *s = bs->opaque; 56*baf07d60SAlberto Garcia ptrdiff_t table_offset = (uint8_t *) table - (uint8_t *) c->table_array; 57*baf07d60SAlberto Garcia int idx = table_offset / s->cluster_size; 58*baf07d60SAlberto Garcia assert(idx >= 0 && idx < c->size && table_offset % s->cluster_size == 0); 59*baf07d60SAlberto Garcia return idx; 60*baf07d60SAlberto Garcia } 61*baf07d60SAlberto Garcia 626af4e9eaSPaolo Bonzini Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables) 6349381094SKevin Wolf { 6449381094SKevin Wolf BDRVQcowState *s = bs->opaque; 6549381094SKevin Wolf Qcow2Cache *c; 6649381094SKevin Wolf 6702004bd4SMax Reitz c = g_new0(Qcow2Cache, 1); 6849381094SKevin Wolf c->size = num_tables; 6902004bd4SMax Reitz c->entries = g_try_new0(Qcow2CachedTable, num_tables); 7072e80b89SAlberto Garcia c->table_array = qemu_try_blockalign(bs->file, 7172e80b89SAlberto Garcia (size_t) num_tables * s->cluster_size); 7249381094SKevin Wolf 7372e80b89SAlberto Garcia if (!c->entries || !c->table_array) { 7472e80b89SAlberto Garcia qemu_vfree(c->table_array); 7572e80b89SAlberto Garcia g_free(c->entries); 7672e80b89SAlberto Garcia g_free(c); 7772e80b89SAlberto Garcia c = NULL; 7849381094SKevin Wolf } 7949381094SKevin Wolf 8049381094SKevin Wolf return c; 8149381094SKevin Wolf } 8249381094SKevin Wolf 8349381094SKevin Wolf int qcow2_cache_destroy(BlockDriverState* bs, Qcow2Cache *c) 8449381094SKevin Wolf { 8549381094SKevin Wolf int i; 8649381094SKevin Wolf 8749381094SKevin Wolf for (i = 0; i < c->size; i++) { 8849381094SKevin Wolf assert(c->entries[i].ref == 0); 8949381094SKevin Wolf } 9049381094SKevin Wolf 9172e80b89SAlberto Garcia qemu_vfree(c->table_array); 927267c094SAnthony Liguori g_free(c->entries); 937267c094SAnthony Liguori g_free(c); 9449381094SKevin Wolf 9549381094SKevin Wolf return 0; 9649381094SKevin Wolf } 9749381094SKevin Wolf 9849381094SKevin Wolf static int qcow2_cache_flush_dependency(BlockDriverState *bs, Qcow2Cache *c) 9949381094SKevin Wolf { 10049381094SKevin Wolf int ret; 10149381094SKevin Wolf 10249381094SKevin Wolf ret = qcow2_cache_flush(bs, c->depends); 10349381094SKevin Wolf if (ret < 0) { 10449381094SKevin Wolf return ret; 10549381094SKevin Wolf } 10649381094SKevin Wolf 10749381094SKevin Wolf c->depends = NULL; 1083de0a294SKevin Wolf c->depends_on_flush = false; 1093de0a294SKevin Wolf 11049381094SKevin Wolf return 0; 11149381094SKevin Wolf } 11249381094SKevin Wolf 11349381094SKevin Wolf static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i) 11449381094SKevin Wolf { 11549381094SKevin Wolf BDRVQcowState *s = bs->opaque; 1163de0a294SKevin Wolf int ret = 0; 11749381094SKevin Wolf 11849381094SKevin Wolf if (!c->entries[i].dirty || !c->entries[i].offset) { 11949381094SKevin Wolf return 0; 12049381094SKevin Wolf } 12149381094SKevin Wolf 1223cce16f4SKevin Wolf trace_qcow2_cache_entry_flush(qemu_coroutine_self(), 1233cce16f4SKevin Wolf c == s->l2_table_cache, i); 1243cce16f4SKevin Wolf 12549381094SKevin Wolf if (c->depends) { 12649381094SKevin Wolf ret = qcow2_cache_flush_dependency(bs, c); 1273de0a294SKevin Wolf } else if (c->depends_on_flush) { 1283de0a294SKevin Wolf ret = bdrv_flush(bs->file); 1293de0a294SKevin Wolf if (ret >= 0) { 1303de0a294SKevin Wolf c->depends_on_flush = false; 1313de0a294SKevin Wolf } 1323de0a294SKevin Wolf } 1333de0a294SKevin Wolf 13449381094SKevin Wolf if (ret < 0) { 13549381094SKevin Wolf return ret; 13649381094SKevin Wolf } 13749381094SKevin Wolf 13829c1a730SKevin Wolf if (c == s->refcount_block_cache) { 139231bb267SMax Reitz ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_REFCOUNT_BLOCK, 140cf93980eSMax Reitz c->entries[i].offset, s->cluster_size); 141cf93980eSMax Reitz } else if (c == s->l2_table_cache) { 142231bb267SMax Reitz ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_ACTIVE_L2, 143cf93980eSMax Reitz c->entries[i].offset, s->cluster_size); 144cf93980eSMax Reitz } else { 145231bb267SMax Reitz ret = qcow2_pre_write_overlap_check(bs, 0, 146cf93980eSMax Reitz c->entries[i].offset, s->cluster_size); 147cf93980eSMax Reitz } 148cf93980eSMax Reitz 149cf93980eSMax Reitz if (ret < 0) { 150cf93980eSMax Reitz return ret; 151cf93980eSMax Reitz } 152cf93980eSMax Reitz 153cf93980eSMax Reitz if (c == s->refcount_block_cache) { 15429c1a730SKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_UPDATE_PART); 15529c1a730SKevin Wolf } else if (c == s->l2_table_cache) { 15629c1a730SKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE); 15729c1a730SKevin Wolf } 15829c1a730SKevin Wolf 15972e80b89SAlberto Garcia ret = bdrv_pwrite(bs->file, c->entries[i].offset, 16072e80b89SAlberto Garcia qcow2_cache_get_table_addr(bs, c, i), s->cluster_size); 16149381094SKevin Wolf if (ret < 0) { 16249381094SKevin Wolf return ret; 16349381094SKevin Wolf } 16449381094SKevin Wolf 16549381094SKevin Wolf c->entries[i].dirty = false; 16649381094SKevin Wolf 16749381094SKevin Wolf return 0; 16849381094SKevin Wolf } 16949381094SKevin Wolf 17049381094SKevin Wolf int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c) 17149381094SKevin Wolf { 1723cce16f4SKevin Wolf BDRVQcowState *s = bs->opaque; 17349381094SKevin Wolf int result = 0; 17449381094SKevin Wolf int ret; 17549381094SKevin Wolf int i; 17649381094SKevin Wolf 1773cce16f4SKevin Wolf trace_qcow2_cache_flush(qemu_coroutine_self(), c == s->l2_table_cache); 1783cce16f4SKevin Wolf 17949381094SKevin Wolf for (i = 0; i < c->size; i++) { 18049381094SKevin Wolf ret = qcow2_cache_entry_flush(bs, c, i); 18149381094SKevin Wolf if (ret < 0 && result != -ENOSPC) { 18249381094SKevin Wolf result = ret; 18349381094SKevin Wolf } 18449381094SKevin Wolf } 18549381094SKevin Wolf 18649381094SKevin Wolf if (result == 0) { 18749381094SKevin Wolf ret = bdrv_flush(bs->file); 18849381094SKevin Wolf if (ret < 0) { 18949381094SKevin Wolf result = ret; 19049381094SKevin Wolf } 19149381094SKevin Wolf } 19249381094SKevin Wolf 19349381094SKevin Wolf return result; 19449381094SKevin Wolf } 19549381094SKevin Wolf 19649381094SKevin Wolf int qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c, 19749381094SKevin Wolf Qcow2Cache *dependency) 19849381094SKevin Wolf { 19949381094SKevin Wolf int ret; 20049381094SKevin Wolf 20149381094SKevin Wolf if (dependency->depends) { 20249381094SKevin Wolf ret = qcow2_cache_flush_dependency(bs, dependency); 20349381094SKevin Wolf if (ret < 0) { 20449381094SKevin Wolf return ret; 20549381094SKevin Wolf } 20649381094SKevin Wolf } 20749381094SKevin Wolf 20849381094SKevin Wolf if (c->depends && (c->depends != dependency)) { 20949381094SKevin Wolf ret = qcow2_cache_flush_dependency(bs, c); 21049381094SKevin Wolf if (ret < 0) { 21149381094SKevin Wolf return ret; 21249381094SKevin Wolf } 21349381094SKevin Wolf } 21449381094SKevin Wolf 21549381094SKevin Wolf c->depends = dependency; 21649381094SKevin Wolf return 0; 21749381094SKevin Wolf } 21849381094SKevin Wolf 2193de0a294SKevin Wolf void qcow2_cache_depends_on_flush(Qcow2Cache *c) 2203de0a294SKevin Wolf { 2213de0a294SKevin Wolf c->depends_on_flush = true; 2223de0a294SKevin Wolf } 2233de0a294SKevin Wolf 224e7108feaSMax Reitz int qcow2_cache_empty(BlockDriverState *bs, Qcow2Cache *c) 225e7108feaSMax Reitz { 226e7108feaSMax Reitz int ret, i; 227e7108feaSMax Reitz 228e7108feaSMax Reitz ret = qcow2_cache_flush(bs, c); 229e7108feaSMax Reitz if (ret < 0) { 230e7108feaSMax Reitz return ret; 231e7108feaSMax Reitz } 232e7108feaSMax Reitz 233e7108feaSMax Reitz for (i = 0; i < c->size; i++) { 234e7108feaSMax Reitz assert(c->entries[i].ref == 0); 235e7108feaSMax Reitz c->entries[i].offset = 0; 236e7108feaSMax Reitz c->entries[i].cache_hits = 0; 237e7108feaSMax Reitz } 238e7108feaSMax Reitz 239e7108feaSMax Reitz return 0; 240e7108feaSMax Reitz } 241e7108feaSMax Reitz 24249381094SKevin Wolf static int qcow2_cache_find_entry_to_replace(Qcow2Cache *c) 24349381094SKevin Wolf { 24449381094SKevin Wolf int i; 24549381094SKevin Wolf int min_count = INT_MAX; 24649381094SKevin Wolf int min_index = -1; 24749381094SKevin Wolf 24849381094SKevin Wolf 24949381094SKevin Wolf for (i = 0; i < c->size; i++) { 25049381094SKevin Wolf if (c->entries[i].ref) { 25149381094SKevin Wolf continue; 25249381094SKevin Wolf } 25349381094SKevin Wolf 25449381094SKevin Wolf if (c->entries[i].cache_hits < min_count) { 25549381094SKevin Wolf min_index = i; 25649381094SKevin Wolf min_count = c->entries[i].cache_hits; 25749381094SKevin Wolf } 25849381094SKevin Wolf 25949381094SKevin Wolf /* Give newer hits priority */ 26049381094SKevin Wolf /* TODO Check how to optimize the replacement strategy */ 2618e8cb375SAlberto Garcia if (c->entries[i].cache_hits > 1) { 26249381094SKevin Wolf c->entries[i].cache_hits /= 2; 26349381094SKevin Wolf } 2648e8cb375SAlberto Garcia } 26549381094SKevin Wolf 26649381094SKevin Wolf if (min_index == -1) { 26749381094SKevin Wolf /* This can't happen in current synchronous code, but leave the check 26849381094SKevin Wolf * here as a reminder for whoever starts using AIO with the cache */ 26949381094SKevin Wolf abort(); 27049381094SKevin Wolf } 27149381094SKevin Wolf return min_index; 27249381094SKevin Wolf } 27349381094SKevin Wolf 27449381094SKevin Wolf static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c, 27549381094SKevin Wolf uint64_t offset, void **table, bool read_from_disk) 27649381094SKevin Wolf { 27749381094SKevin Wolf BDRVQcowState *s = bs->opaque; 27849381094SKevin Wolf int i; 27949381094SKevin Wolf int ret; 28049381094SKevin Wolf 2813cce16f4SKevin Wolf trace_qcow2_cache_get(qemu_coroutine_self(), c == s->l2_table_cache, 2823cce16f4SKevin Wolf offset, read_from_disk); 2833cce16f4SKevin Wolf 28449381094SKevin Wolf /* Check if the table is already cached */ 28549381094SKevin Wolf for (i = 0; i < c->size; i++) { 28649381094SKevin Wolf if (c->entries[i].offset == offset) { 28749381094SKevin Wolf goto found; 28849381094SKevin Wolf } 28949381094SKevin Wolf } 29049381094SKevin Wolf 29149381094SKevin Wolf /* If not, write a table back and replace it */ 29249381094SKevin Wolf i = qcow2_cache_find_entry_to_replace(c); 2933cce16f4SKevin Wolf trace_qcow2_cache_get_replace_entry(qemu_coroutine_self(), 2943cce16f4SKevin Wolf c == s->l2_table_cache, i); 29549381094SKevin Wolf if (i < 0) { 29649381094SKevin Wolf return i; 29749381094SKevin Wolf } 29849381094SKevin Wolf 29949381094SKevin Wolf ret = qcow2_cache_entry_flush(bs, c, i); 30049381094SKevin Wolf if (ret < 0) { 30149381094SKevin Wolf return ret; 30249381094SKevin Wolf } 30349381094SKevin Wolf 3043cce16f4SKevin Wolf trace_qcow2_cache_get_read(qemu_coroutine_self(), 3053cce16f4SKevin Wolf c == s->l2_table_cache, i); 30649381094SKevin Wolf c->entries[i].offset = 0; 30749381094SKevin Wolf if (read_from_disk) { 30829c1a730SKevin Wolf if (c == s->l2_table_cache) { 30929c1a730SKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_L2_LOAD); 31029c1a730SKevin Wolf } 31129c1a730SKevin Wolf 31272e80b89SAlberto Garcia ret = bdrv_pread(bs->file, offset, qcow2_cache_get_table_addr(bs, c, i), 31372e80b89SAlberto Garcia s->cluster_size); 31449381094SKevin Wolf if (ret < 0) { 31549381094SKevin Wolf return ret; 31649381094SKevin Wolf } 31749381094SKevin Wolf } 31849381094SKevin Wolf 31949381094SKevin Wolf /* Give the table some hits for the start so that it won't be replaced 32049381094SKevin Wolf * immediately. The number 32 is completely arbitrary. */ 32149381094SKevin Wolf c->entries[i].cache_hits = 32; 32249381094SKevin Wolf c->entries[i].offset = offset; 32349381094SKevin Wolf 32449381094SKevin Wolf /* And return the right table */ 32549381094SKevin Wolf found: 32649381094SKevin Wolf c->entries[i].cache_hits++; 32749381094SKevin Wolf c->entries[i].ref++; 32872e80b89SAlberto Garcia *table = qcow2_cache_get_table_addr(bs, c, i); 3293cce16f4SKevin Wolf 3303cce16f4SKevin Wolf trace_qcow2_cache_get_done(qemu_coroutine_self(), 3313cce16f4SKevin Wolf c == s->l2_table_cache, i); 3323cce16f4SKevin Wolf 33349381094SKevin Wolf return 0; 33449381094SKevin Wolf } 33549381094SKevin Wolf 33649381094SKevin Wolf int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset, 33749381094SKevin Wolf void **table) 33849381094SKevin Wolf { 33949381094SKevin Wolf return qcow2_cache_do_get(bs, c, offset, table, true); 34049381094SKevin Wolf } 34149381094SKevin Wolf 34249381094SKevin Wolf int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset, 34349381094SKevin Wolf void **table) 34449381094SKevin Wolf { 34549381094SKevin Wolf return qcow2_cache_do_get(bs, c, offset, table, false); 34649381094SKevin Wolf } 34749381094SKevin Wolf 34849381094SKevin Wolf int qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table) 34949381094SKevin Wolf { 350*baf07d60SAlberto Garcia int i = qcow2_cache_get_table_idx(bs, c, *table); 35149381094SKevin Wolf 352*baf07d60SAlberto Garcia if (c->entries[i].offset == 0) { 35349381094SKevin Wolf return -ENOENT; 354*baf07d60SAlberto Garcia } 35549381094SKevin Wolf 35649381094SKevin Wolf c->entries[i].ref--; 35749381094SKevin Wolf *table = NULL; 35849381094SKevin Wolf 35949381094SKevin Wolf assert(c->entries[i].ref >= 0); 36049381094SKevin Wolf return 0; 36149381094SKevin Wolf } 36249381094SKevin Wolf 36372e80b89SAlberto Garcia void qcow2_cache_entry_mark_dirty(BlockDriverState *bs, Qcow2Cache *c, 36472e80b89SAlberto Garcia void *table) 36549381094SKevin Wolf { 366*baf07d60SAlberto Garcia int i = qcow2_cache_get_table_idx(bs, c, table); 367*baf07d60SAlberto Garcia assert(c->entries[i].offset != 0); 36849381094SKevin Wolf c->entries[i].dirty = true; 36949381094SKevin Wolf } 370