xref: /qemu/block/qcow2-cache.c (revision 6af4e9ea)
149381094SKevin Wolf /*
249381094SKevin Wolf  * L2/refcount table cache for the QCOW2 format
349381094SKevin Wolf  *
449381094SKevin Wolf  * Copyright (c) 2010 Kevin Wolf <kwolf@redhat.com>
549381094SKevin Wolf  *
649381094SKevin Wolf  * Permission is hereby granted, free of charge, to any person obtaining a copy
749381094SKevin Wolf  * of this software and associated documentation files (the "Software"), to deal
849381094SKevin Wolf  * in the Software without restriction, including without limitation the rights
949381094SKevin Wolf  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
1049381094SKevin Wolf  * copies of the Software, and to permit persons to whom the Software is
1149381094SKevin Wolf  * furnished to do so, subject to the following conditions:
1249381094SKevin Wolf  *
1349381094SKevin Wolf  * The above copyright notice and this permission notice shall be included in
1449381094SKevin Wolf  * all copies or substantial portions of the Software.
1549381094SKevin Wolf  *
1649381094SKevin Wolf  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1749381094SKevin Wolf  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1849381094SKevin Wolf  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
1949381094SKevin Wolf  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
2049381094SKevin Wolf  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
2149381094SKevin Wolf  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
2249381094SKevin Wolf  * THE SOFTWARE.
2349381094SKevin Wolf  */
2449381094SKevin Wolf 
2549381094SKevin Wolf #include "block_int.h"
2649381094SKevin Wolf #include "qemu-common.h"
2749381094SKevin Wolf #include "qcow2.h"
283cce16f4SKevin Wolf #include "trace.h"
2949381094SKevin Wolf 
3049381094SKevin Wolf typedef struct Qcow2CachedTable {
3149381094SKevin Wolf     void*   table;
3249381094SKevin Wolf     int64_t offset;
3349381094SKevin Wolf     bool    dirty;
3449381094SKevin Wolf     int     cache_hits;
3549381094SKevin Wolf     int     ref;
3649381094SKevin Wolf } Qcow2CachedTable;
3749381094SKevin Wolf 
3849381094SKevin Wolf struct Qcow2Cache {
3949381094SKevin Wolf     Qcow2CachedTable*       entries;
4049381094SKevin Wolf     struct Qcow2Cache*      depends;
41bf595021SJes Sorensen     int                     size;
423de0a294SKevin Wolf     bool                    depends_on_flush;
4349381094SKevin Wolf };
4449381094SKevin Wolf 
45*6af4e9eaSPaolo Bonzini Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables)
4649381094SKevin Wolf {
4749381094SKevin Wolf     BDRVQcowState *s = bs->opaque;
4849381094SKevin Wolf     Qcow2Cache *c;
4949381094SKevin Wolf     int i;
5049381094SKevin Wolf 
517267c094SAnthony Liguori     c = g_malloc0(sizeof(*c));
5249381094SKevin Wolf     c->size = num_tables;
537267c094SAnthony Liguori     c->entries = g_malloc0(sizeof(*c->entries) * num_tables);
5449381094SKevin Wolf 
5549381094SKevin Wolf     for (i = 0; i < c->size; i++) {
5649381094SKevin Wolf         c->entries[i].table = qemu_blockalign(bs, s->cluster_size);
5749381094SKevin Wolf     }
5849381094SKevin Wolf 
5949381094SKevin Wolf     return c;
6049381094SKevin Wolf }
6149381094SKevin Wolf 
6249381094SKevin Wolf int qcow2_cache_destroy(BlockDriverState* bs, Qcow2Cache *c)
6349381094SKevin Wolf {
6449381094SKevin Wolf     int i;
6549381094SKevin Wolf 
6649381094SKevin Wolf     for (i = 0; i < c->size; i++) {
6749381094SKevin Wolf         assert(c->entries[i].ref == 0);
6849381094SKevin Wolf         qemu_vfree(c->entries[i].table);
6949381094SKevin Wolf     }
7049381094SKevin Wolf 
717267c094SAnthony Liguori     g_free(c->entries);
727267c094SAnthony Liguori     g_free(c);
7349381094SKevin Wolf 
7449381094SKevin Wolf     return 0;
7549381094SKevin Wolf }
7649381094SKevin Wolf 
7749381094SKevin Wolf static int qcow2_cache_flush_dependency(BlockDriverState *bs, Qcow2Cache *c)
7849381094SKevin Wolf {
7949381094SKevin Wolf     int ret;
8049381094SKevin Wolf 
8149381094SKevin Wolf     ret = qcow2_cache_flush(bs, c->depends);
8249381094SKevin Wolf     if (ret < 0) {
8349381094SKevin Wolf         return ret;
8449381094SKevin Wolf     }
8549381094SKevin Wolf 
8649381094SKevin Wolf     c->depends = NULL;
873de0a294SKevin Wolf     c->depends_on_flush = false;
883de0a294SKevin Wolf 
8949381094SKevin Wolf     return 0;
9049381094SKevin Wolf }
9149381094SKevin Wolf 
9249381094SKevin Wolf static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
9349381094SKevin Wolf {
9449381094SKevin Wolf     BDRVQcowState *s = bs->opaque;
953de0a294SKevin Wolf     int ret = 0;
9649381094SKevin Wolf 
9749381094SKevin Wolf     if (!c->entries[i].dirty || !c->entries[i].offset) {
9849381094SKevin Wolf         return 0;
9949381094SKevin Wolf     }
10049381094SKevin Wolf 
1013cce16f4SKevin Wolf     trace_qcow2_cache_entry_flush(qemu_coroutine_self(),
1023cce16f4SKevin Wolf                                   c == s->l2_table_cache, i);
1033cce16f4SKevin Wolf 
10449381094SKevin Wolf     if (c->depends) {
10549381094SKevin Wolf         ret = qcow2_cache_flush_dependency(bs, c);
1063de0a294SKevin Wolf     } else if (c->depends_on_flush) {
1073de0a294SKevin Wolf         ret = bdrv_flush(bs->file);
1083de0a294SKevin Wolf         if (ret >= 0) {
1093de0a294SKevin Wolf             c->depends_on_flush = false;
1103de0a294SKevin Wolf         }
1113de0a294SKevin Wolf     }
1123de0a294SKevin Wolf 
11349381094SKevin Wolf     if (ret < 0) {
11449381094SKevin Wolf         return ret;
11549381094SKevin Wolf     }
11649381094SKevin Wolf 
11729c1a730SKevin Wolf     if (c == s->refcount_block_cache) {
11829c1a730SKevin Wolf         BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_UPDATE_PART);
11929c1a730SKevin Wolf     } else if (c == s->l2_table_cache) {
12029c1a730SKevin Wolf         BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE);
12129c1a730SKevin Wolf     }
12229c1a730SKevin Wolf 
12349381094SKevin Wolf     ret = bdrv_pwrite(bs->file, c->entries[i].offset, c->entries[i].table,
12449381094SKevin Wolf         s->cluster_size);
12549381094SKevin Wolf     if (ret < 0) {
12649381094SKevin Wolf         return ret;
12749381094SKevin Wolf     }
12849381094SKevin Wolf 
12949381094SKevin Wolf     c->entries[i].dirty = false;
13049381094SKevin Wolf 
13149381094SKevin Wolf     return 0;
13249381094SKevin Wolf }
13349381094SKevin Wolf 
13449381094SKevin Wolf int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c)
13549381094SKevin Wolf {
1363cce16f4SKevin Wolf     BDRVQcowState *s = bs->opaque;
13749381094SKevin Wolf     int result = 0;
13849381094SKevin Wolf     int ret;
13949381094SKevin Wolf     int i;
14049381094SKevin Wolf 
1413cce16f4SKevin Wolf     trace_qcow2_cache_flush(qemu_coroutine_self(), c == s->l2_table_cache);
1423cce16f4SKevin Wolf 
14349381094SKevin Wolf     for (i = 0; i < c->size; i++) {
14449381094SKevin Wolf         ret = qcow2_cache_entry_flush(bs, c, i);
14549381094SKevin Wolf         if (ret < 0 && result != -ENOSPC) {
14649381094SKevin Wolf             result = ret;
14749381094SKevin Wolf         }
14849381094SKevin Wolf     }
14949381094SKevin Wolf 
15049381094SKevin Wolf     if (result == 0) {
15149381094SKevin Wolf         ret = bdrv_flush(bs->file);
15249381094SKevin Wolf         if (ret < 0) {
15349381094SKevin Wolf             result = ret;
15449381094SKevin Wolf         }
15549381094SKevin Wolf     }
15649381094SKevin Wolf 
15749381094SKevin Wolf     return result;
15849381094SKevin Wolf }
15949381094SKevin Wolf 
16049381094SKevin Wolf int qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c,
16149381094SKevin Wolf     Qcow2Cache *dependency)
16249381094SKevin Wolf {
16349381094SKevin Wolf     int ret;
16449381094SKevin Wolf 
16549381094SKevin Wolf     if (dependency->depends) {
16649381094SKevin Wolf         ret = qcow2_cache_flush_dependency(bs, dependency);
16749381094SKevin Wolf         if (ret < 0) {
16849381094SKevin Wolf             return ret;
16949381094SKevin Wolf         }
17049381094SKevin Wolf     }
17149381094SKevin Wolf 
17249381094SKevin Wolf     if (c->depends && (c->depends != dependency)) {
17349381094SKevin Wolf         ret = qcow2_cache_flush_dependency(bs, c);
17449381094SKevin Wolf         if (ret < 0) {
17549381094SKevin Wolf             return ret;
17649381094SKevin Wolf         }
17749381094SKevin Wolf     }
17849381094SKevin Wolf 
17949381094SKevin Wolf     c->depends = dependency;
18049381094SKevin Wolf     return 0;
18149381094SKevin Wolf }
18249381094SKevin Wolf 
1833de0a294SKevin Wolf void qcow2_cache_depends_on_flush(Qcow2Cache *c)
1843de0a294SKevin Wolf {
1853de0a294SKevin Wolf     c->depends_on_flush = true;
1863de0a294SKevin Wolf }
1873de0a294SKevin Wolf 
18849381094SKevin Wolf static int qcow2_cache_find_entry_to_replace(Qcow2Cache *c)
18949381094SKevin Wolf {
19049381094SKevin Wolf     int i;
19149381094SKevin Wolf     int min_count = INT_MAX;
19249381094SKevin Wolf     int min_index = -1;
19349381094SKevin Wolf 
19449381094SKevin Wolf 
19549381094SKevin Wolf     for (i = 0; i < c->size; i++) {
19649381094SKevin Wolf         if (c->entries[i].ref) {
19749381094SKevin Wolf             continue;
19849381094SKevin Wolf         }
19949381094SKevin Wolf 
20049381094SKevin Wolf         if (c->entries[i].cache_hits < min_count) {
20149381094SKevin Wolf             min_index = i;
20249381094SKevin Wolf             min_count = c->entries[i].cache_hits;
20349381094SKevin Wolf         }
20449381094SKevin Wolf 
20549381094SKevin Wolf         /* Give newer hits priority */
20649381094SKevin Wolf         /* TODO Check how to optimize the replacement strategy */
20749381094SKevin Wolf         c->entries[i].cache_hits /= 2;
20849381094SKevin Wolf     }
20949381094SKevin Wolf 
21049381094SKevin Wolf     if (min_index == -1) {
21149381094SKevin Wolf         /* This can't happen in current synchronous code, but leave the check
21249381094SKevin Wolf          * here as a reminder for whoever starts using AIO with the cache */
21349381094SKevin Wolf         abort();
21449381094SKevin Wolf     }
21549381094SKevin Wolf     return min_index;
21649381094SKevin Wolf }
21749381094SKevin Wolf 
21849381094SKevin Wolf static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,
21949381094SKevin Wolf     uint64_t offset, void **table, bool read_from_disk)
22049381094SKevin Wolf {
22149381094SKevin Wolf     BDRVQcowState *s = bs->opaque;
22249381094SKevin Wolf     int i;
22349381094SKevin Wolf     int ret;
22449381094SKevin Wolf 
2253cce16f4SKevin Wolf     trace_qcow2_cache_get(qemu_coroutine_self(), c == s->l2_table_cache,
2263cce16f4SKevin Wolf                           offset, read_from_disk);
2273cce16f4SKevin Wolf 
22849381094SKevin Wolf     /* Check if the table is already cached */
22949381094SKevin Wolf     for (i = 0; i < c->size; i++) {
23049381094SKevin Wolf         if (c->entries[i].offset == offset) {
23149381094SKevin Wolf             goto found;
23249381094SKevin Wolf         }
23349381094SKevin Wolf     }
23449381094SKevin Wolf 
23549381094SKevin Wolf     /* If not, write a table back and replace it */
23649381094SKevin Wolf     i = qcow2_cache_find_entry_to_replace(c);
2373cce16f4SKevin Wolf     trace_qcow2_cache_get_replace_entry(qemu_coroutine_self(),
2383cce16f4SKevin Wolf                                         c == s->l2_table_cache, i);
23949381094SKevin Wolf     if (i < 0) {
24049381094SKevin Wolf         return i;
24149381094SKevin Wolf     }
24249381094SKevin Wolf 
24349381094SKevin Wolf     ret = qcow2_cache_entry_flush(bs, c, i);
24449381094SKevin Wolf     if (ret < 0) {
24549381094SKevin Wolf         return ret;
24649381094SKevin Wolf     }
24749381094SKevin Wolf 
2483cce16f4SKevin Wolf     trace_qcow2_cache_get_read(qemu_coroutine_self(),
2493cce16f4SKevin Wolf                                c == s->l2_table_cache, i);
25049381094SKevin Wolf     c->entries[i].offset = 0;
25149381094SKevin Wolf     if (read_from_disk) {
25229c1a730SKevin Wolf         if (c == s->l2_table_cache) {
25329c1a730SKevin Wolf             BLKDBG_EVENT(bs->file, BLKDBG_L2_LOAD);
25429c1a730SKevin Wolf         }
25529c1a730SKevin Wolf 
25649381094SKevin Wolf         ret = bdrv_pread(bs->file, offset, c->entries[i].table, s->cluster_size);
25749381094SKevin Wolf         if (ret < 0) {
25849381094SKevin Wolf             return ret;
25949381094SKevin Wolf         }
26049381094SKevin Wolf     }
26149381094SKevin Wolf 
26249381094SKevin Wolf     /* Give the table some hits for the start so that it won't be replaced
26349381094SKevin Wolf      * immediately. The number 32 is completely arbitrary. */
26449381094SKevin Wolf     c->entries[i].cache_hits = 32;
26549381094SKevin Wolf     c->entries[i].offset = offset;
26649381094SKevin Wolf 
26749381094SKevin Wolf     /* And return the right table */
26849381094SKevin Wolf found:
26949381094SKevin Wolf     c->entries[i].cache_hits++;
27049381094SKevin Wolf     c->entries[i].ref++;
27149381094SKevin Wolf     *table = c->entries[i].table;
2723cce16f4SKevin Wolf 
2733cce16f4SKevin Wolf     trace_qcow2_cache_get_done(qemu_coroutine_self(),
2743cce16f4SKevin Wolf                                c == s->l2_table_cache, i);
2753cce16f4SKevin Wolf 
27649381094SKevin Wolf     return 0;
27749381094SKevin Wolf }
27849381094SKevin Wolf 
27949381094SKevin Wolf int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
28049381094SKevin Wolf     void **table)
28149381094SKevin Wolf {
28249381094SKevin Wolf     return qcow2_cache_do_get(bs, c, offset, table, true);
28349381094SKevin Wolf }
28449381094SKevin Wolf 
28549381094SKevin Wolf int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
28649381094SKevin Wolf     void **table)
28749381094SKevin Wolf {
28849381094SKevin Wolf     return qcow2_cache_do_get(bs, c, offset, table, false);
28949381094SKevin Wolf }
29049381094SKevin Wolf 
29149381094SKevin Wolf int qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table)
29249381094SKevin Wolf {
29349381094SKevin Wolf     int i;
29449381094SKevin Wolf 
29549381094SKevin Wolf     for (i = 0; i < c->size; i++) {
29649381094SKevin Wolf         if (c->entries[i].table == *table) {
29749381094SKevin Wolf             goto found;
29849381094SKevin Wolf         }
29949381094SKevin Wolf     }
30049381094SKevin Wolf     return -ENOENT;
30149381094SKevin Wolf 
30249381094SKevin Wolf found:
30349381094SKevin Wolf     c->entries[i].ref--;
30449381094SKevin Wolf     *table = NULL;
30549381094SKevin Wolf 
30649381094SKevin Wolf     assert(c->entries[i].ref >= 0);
30749381094SKevin Wolf     return 0;
30849381094SKevin Wolf }
30949381094SKevin Wolf 
31049381094SKevin Wolf void qcow2_cache_entry_mark_dirty(Qcow2Cache *c, void *table)
31149381094SKevin Wolf {
31249381094SKevin Wolf     int i;
31349381094SKevin Wolf 
31449381094SKevin Wolf     for (i = 0; i < c->size; i++) {
31549381094SKevin Wolf         if (c->entries[i].table == table) {
31649381094SKevin Wolf             goto found;
31749381094SKevin Wolf         }
31849381094SKevin Wolf     }
31949381094SKevin Wolf     abort();
32049381094SKevin Wolf 
32149381094SKevin Wolf found:
32249381094SKevin Wolf     c->entries[i].dirty = true;
32349381094SKevin Wolf }
324