xref: /qemu/block/qcow2-cache.c (revision baf07d60)
149381094SKevin Wolf /*
249381094SKevin Wolf  * L2/refcount table cache for the QCOW2 format
349381094SKevin Wolf  *
449381094SKevin Wolf  * Copyright (c) 2010 Kevin Wolf <kwolf@redhat.com>
549381094SKevin Wolf  *
649381094SKevin Wolf  * Permission is hereby granted, free of charge, to any person obtaining a copy
749381094SKevin Wolf  * of this software and associated documentation files (the "Software"), to deal
849381094SKevin Wolf  * in the Software without restriction, including without limitation the rights
949381094SKevin Wolf  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
1049381094SKevin Wolf  * copies of the Software, and to permit persons to whom the Software is
1149381094SKevin Wolf  * furnished to do so, subject to the following conditions:
1249381094SKevin Wolf  *
1349381094SKevin Wolf  * The above copyright notice and this permission notice shall be included in
1449381094SKevin Wolf  * all copies or substantial portions of the Software.
1549381094SKevin Wolf  *
1649381094SKevin Wolf  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1749381094SKevin Wolf  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1849381094SKevin Wolf  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
1949381094SKevin Wolf  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
2049381094SKevin Wolf  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
2149381094SKevin Wolf  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
2249381094SKevin Wolf  * THE SOFTWARE.
2349381094SKevin Wolf  */
2449381094SKevin Wolf 
25737e150eSPaolo Bonzini #include "block/block_int.h"
2649381094SKevin Wolf #include "qemu-common.h"
2749381094SKevin Wolf #include "qcow2.h"
283cce16f4SKevin Wolf #include "trace.h"
2949381094SKevin Wolf 
3049381094SKevin Wolf typedef struct Qcow2CachedTable {
3149381094SKevin Wolf     int64_t offset;
3249381094SKevin Wolf     bool    dirty;
3349381094SKevin Wolf     int     cache_hits;
3449381094SKevin Wolf     int     ref;
3549381094SKevin Wolf } Qcow2CachedTable;
3649381094SKevin Wolf 
3749381094SKevin Wolf struct Qcow2Cache {
3849381094SKevin Wolf     Qcow2CachedTable*       entries;
3949381094SKevin Wolf     struct Qcow2Cache*      depends;
40bf595021SJes Sorensen     int                     size;
413de0a294SKevin Wolf     bool                    depends_on_flush;
4272e80b89SAlberto Garcia     void                   *table_array;
4349381094SKevin Wolf };
4449381094SKevin Wolf 
4572e80b89SAlberto Garcia static inline void *qcow2_cache_get_table_addr(BlockDriverState *bs,
4672e80b89SAlberto Garcia                     Qcow2Cache *c, int table)
4772e80b89SAlberto Garcia {
4872e80b89SAlberto Garcia     BDRVQcowState *s = bs->opaque;
4972e80b89SAlberto Garcia     return (uint8_t *) c->table_array + (size_t) table * s->cluster_size;
5072e80b89SAlberto Garcia }
5172e80b89SAlberto Garcia 
52*baf07d60SAlberto Garcia static inline int qcow2_cache_get_table_idx(BlockDriverState *bs,
53*baf07d60SAlberto Garcia                   Qcow2Cache *c, void *table)
54*baf07d60SAlberto Garcia {
55*baf07d60SAlberto Garcia     BDRVQcowState *s = bs->opaque;
56*baf07d60SAlberto Garcia     ptrdiff_t table_offset = (uint8_t *) table - (uint8_t *) c->table_array;
57*baf07d60SAlberto Garcia     int idx = table_offset / s->cluster_size;
58*baf07d60SAlberto Garcia     assert(idx >= 0 && idx < c->size && table_offset % s->cluster_size == 0);
59*baf07d60SAlberto Garcia     return idx;
60*baf07d60SAlberto Garcia }
61*baf07d60SAlberto Garcia 
626af4e9eaSPaolo Bonzini Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables)
6349381094SKevin Wolf {
6449381094SKevin Wolf     BDRVQcowState *s = bs->opaque;
6549381094SKevin Wolf     Qcow2Cache *c;
6649381094SKevin Wolf 
6702004bd4SMax Reitz     c = g_new0(Qcow2Cache, 1);
6849381094SKevin Wolf     c->size = num_tables;
6902004bd4SMax Reitz     c->entries = g_try_new0(Qcow2CachedTable, num_tables);
7072e80b89SAlberto Garcia     c->table_array = qemu_try_blockalign(bs->file,
7172e80b89SAlberto Garcia                                          (size_t) num_tables * s->cluster_size);
7249381094SKevin Wolf 
7372e80b89SAlberto Garcia     if (!c->entries || !c->table_array) {
7472e80b89SAlberto Garcia         qemu_vfree(c->table_array);
7572e80b89SAlberto Garcia         g_free(c->entries);
7672e80b89SAlberto Garcia         g_free(c);
7772e80b89SAlberto Garcia         c = NULL;
7849381094SKevin Wolf     }
7949381094SKevin Wolf 
8049381094SKevin Wolf     return c;
8149381094SKevin Wolf }
8249381094SKevin Wolf 
8349381094SKevin Wolf int qcow2_cache_destroy(BlockDriverState* bs, Qcow2Cache *c)
8449381094SKevin Wolf {
8549381094SKevin Wolf     int i;
8649381094SKevin Wolf 
8749381094SKevin Wolf     for (i = 0; i < c->size; i++) {
8849381094SKevin Wolf         assert(c->entries[i].ref == 0);
8949381094SKevin Wolf     }
9049381094SKevin Wolf 
9172e80b89SAlberto Garcia     qemu_vfree(c->table_array);
927267c094SAnthony Liguori     g_free(c->entries);
937267c094SAnthony Liguori     g_free(c);
9449381094SKevin Wolf 
9549381094SKevin Wolf     return 0;
9649381094SKevin Wolf }
9749381094SKevin Wolf 
9849381094SKevin Wolf static int qcow2_cache_flush_dependency(BlockDriverState *bs, Qcow2Cache *c)
9949381094SKevin Wolf {
10049381094SKevin Wolf     int ret;
10149381094SKevin Wolf 
10249381094SKevin Wolf     ret = qcow2_cache_flush(bs, c->depends);
10349381094SKevin Wolf     if (ret < 0) {
10449381094SKevin Wolf         return ret;
10549381094SKevin Wolf     }
10649381094SKevin Wolf 
10749381094SKevin Wolf     c->depends = NULL;
1083de0a294SKevin Wolf     c->depends_on_flush = false;
1093de0a294SKevin Wolf 
11049381094SKevin Wolf     return 0;
11149381094SKevin Wolf }
11249381094SKevin Wolf 
11349381094SKevin Wolf static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
11449381094SKevin Wolf {
11549381094SKevin Wolf     BDRVQcowState *s = bs->opaque;
1163de0a294SKevin Wolf     int ret = 0;
11749381094SKevin Wolf 
11849381094SKevin Wolf     if (!c->entries[i].dirty || !c->entries[i].offset) {
11949381094SKevin Wolf         return 0;
12049381094SKevin Wolf     }
12149381094SKevin Wolf 
1223cce16f4SKevin Wolf     trace_qcow2_cache_entry_flush(qemu_coroutine_self(),
1233cce16f4SKevin Wolf                                   c == s->l2_table_cache, i);
1243cce16f4SKevin Wolf 
12549381094SKevin Wolf     if (c->depends) {
12649381094SKevin Wolf         ret = qcow2_cache_flush_dependency(bs, c);
1273de0a294SKevin Wolf     } else if (c->depends_on_flush) {
1283de0a294SKevin Wolf         ret = bdrv_flush(bs->file);
1293de0a294SKevin Wolf         if (ret >= 0) {
1303de0a294SKevin Wolf             c->depends_on_flush = false;
1313de0a294SKevin Wolf         }
1323de0a294SKevin Wolf     }
1333de0a294SKevin Wolf 
13449381094SKevin Wolf     if (ret < 0) {
13549381094SKevin Wolf         return ret;
13649381094SKevin Wolf     }
13749381094SKevin Wolf 
13829c1a730SKevin Wolf     if (c == s->refcount_block_cache) {
139231bb267SMax Reitz         ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_REFCOUNT_BLOCK,
140cf93980eSMax Reitz                 c->entries[i].offset, s->cluster_size);
141cf93980eSMax Reitz     } else if (c == s->l2_table_cache) {
142231bb267SMax Reitz         ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_ACTIVE_L2,
143cf93980eSMax Reitz                 c->entries[i].offset, s->cluster_size);
144cf93980eSMax Reitz     } else {
145231bb267SMax Reitz         ret = qcow2_pre_write_overlap_check(bs, 0,
146cf93980eSMax Reitz                 c->entries[i].offset, s->cluster_size);
147cf93980eSMax Reitz     }
148cf93980eSMax Reitz 
149cf93980eSMax Reitz     if (ret < 0) {
150cf93980eSMax Reitz         return ret;
151cf93980eSMax Reitz     }
152cf93980eSMax Reitz 
153cf93980eSMax Reitz     if (c == s->refcount_block_cache) {
15429c1a730SKevin Wolf         BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_UPDATE_PART);
15529c1a730SKevin Wolf     } else if (c == s->l2_table_cache) {
15629c1a730SKevin Wolf         BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE);
15729c1a730SKevin Wolf     }
15829c1a730SKevin Wolf 
15972e80b89SAlberto Garcia     ret = bdrv_pwrite(bs->file, c->entries[i].offset,
16072e80b89SAlberto Garcia                       qcow2_cache_get_table_addr(bs, c, i), s->cluster_size);
16149381094SKevin Wolf     if (ret < 0) {
16249381094SKevin Wolf         return ret;
16349381094SKevin Wolf     }
16449381094SKevin Wolf 
16549381094SKevin Wolf     c->entries[i].dirty = false;
16649381094SKevin Wolf 
16749381094SKevin Wolf     return 0;
16849381094SKevin Wolf }
16949381094SKevin Wolf 
17049381094SKevin Wolf int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c)
17149381094SKevin Wolf {
1723cce16f4SKevin Wolf     BDRVQcowState *s = bs->opaque;
17349381094SKevin Wolf     int result = 0;
17449381094SKevin Wolf     int ret;
17549381094SKevin Wolf     int i;
17649381094SKevin Wolf 
1773cce16f4SKevin Wolf     trace_qcow2_cache_flush(qemu_coroutine_self(), c == s->l2_table_cache);
1783cce16f4SKevin Wolf 
17949381094SKevin Wolf     for (i = 0; i < c->size; i++) {
18049381094SKevin Wolf         ret = qcow2_cache_entry_flush(bs, c, i);
18149381094SKevin Wolf         if (ret < 0 && result != -ENOSPC) {
18249381094SKevin Wolf             result = ret;
18349381094SKevin Wolf         }
18449381094SKevin Wolf     }
18549381094SKevin Wolf 
18649381094SKevin Wolf     if (result == 0) {
18749381094SKevin Wolf         ret = bdrv_flush(bs->file);
18849381094SKevin Wolf         if (ret < 0) {
18949381094SKevin Wolf             result = ret;
19049381094SKevin Wolf         }
19149381094SKevin Wolf     }
19249381094SKevin Wolf 
19349381094SKevin Wolf     return result;
19449381094SKevin Wolf }
19549381094SKevin Wolf 
19649381094SKevin Wolf int qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c,
19749381094SKevin Wolf     Qcow2Cache *dependency)
19849381094SKevin Wolf {
19949381094SKevin Wolf     int ret;
20049381094SKevin Wolf 
20149381094SKevin Wolf     if (dependency->depends) {
20249381094SKevin Wolf         ret = qcow2_cache_flush_dependency(bs, dependency);
20349381094SKevin Wolf         if (ret < 0) {
20449381094SKevin Wolf             return ret;
20549381094SKevin Wolf         }
20649381094SKevin Wolf     }
20749381094SKevin Wolf 
20849381094SKevin Wolf     if (c->depends && (c->depends != dependency)) {
20949381094SKevin Wolf         ret = qcow2_cache_flush_dependency(bs, c);
21049381094SKevin Wolf         if (ret < 0) {
21149381094SKevin Wolf             return ret;
21249381094SKevin Wolf         }
21349381094SKevin Wolf     }
21449381094SKevin Wolf 
21549381094SKevin Wolf     c->depends = dependency;
21649381094SKevin Wolf     return 0;
21749381094SKevin Wolf }
21849381094SKevin Wolf 
2193de0a294SKevin Wolf void qcow2_cache_depends_on_flush(Qcow2Cache *c)
2203de0a294SKevin Wolf {
2213de0a294SKevin Wolf     c->depends_on_flush = true;
2223de0a294SKevin Wolf }
2233de0a294SKevin Wolf 
224e7108feaSMax Reitz int qcow2_cache_empty(BlockDriverState *bs, Qcow2Cache *c)
225e7108feaSMax Reitz {
226e7108feaSMax Reitz     int ret, i;
227e7108feaSMax Reitz 
228e7108feaSMax Reitz     ret = qcow2_cache_flush(bs, c);
229e7108feaSMax Reitz     if (ret < 0) {
230e7108feaSMax Reitz         return ret;
231e7108feaSMax Reitz     }
232e7108feaSMax Reitz 
233e7108feaSMax Reitz     for (i = 0; i < c->size; i++) {
234e7108feaSMax Reitz         assert(c->entries[i].ref == 0);
235e7108feaSMax Reitz         c->entries[i].offset = 0;
236e7108feaSMax Reitz         c->entries[i].cache_hits = 0;
237e7108feaSMax Reitz     }
238e7108feaSMax Reitz 
239e7108feaSMax Reitz     return 0;
240e7108feaSMax Reitz }
241e7108feaSMax Reitz 
24249381094SKevin Wolf static int qcow2_cache_find_entry_to_replace(Qcow2Cache *c)
24349381094SKevin Wolf {
24449381094SKevin Wolf     int i;
24549381094SKevin Wolf     int min_count = INT_MAX;
24649381094SKevin Wolf     int min_index = -1;
24749381094SKevin Wolf 
24849381094SKevin Wolf 
24949381094SKevin Wolf     for (i = 0; i < c->size; i++) {
25049381094SKevin Wolf         if (c->entries[i].ref) {
25149381094SKevin Wolf             continue;
25249381094SKevin Wolf         }
25349381094SKevin Wolf 
25449381094SKevin Wolf         if (c->entries[i].cache_hits < min_count) {
25549381094SKevin Wolf             min_index = i;
25649381094SKevin Wolf             min_count = c->entries[i].cache_hits;
25749381094SKevin Wolf         }
25849381094SKevin Wolf 
25949381094SKevin Wolf         /* Give newer hits priority */
26049381094SKevin Wolf         /* TODO Check how to optimize the replacement strategy */
2618e8cb375SAlberto Garcia         if (c->entries[i].cache_hits > 1) {
26249381094SKevin Wolf             c->entries[i].cache_hits /= 2;
26349381094SKevin Wolf         }
2648e8cb375SAlberto Garcia     }
26549381094SKevin Wolf 
26649381094SKevin Wolf     if (min_index == -1) {
26749381094SKevin Wolf         /* This can't happen in current synchronous code, but leave the check
26849381094SKevin Wolf          * here as a reminder for whoever starts using AIO with the cache */
26949381094SKevin Wolf         abort();
27049381094SKevin Wolf     }
27149381094SKevin Wolf     return min_index;
27249381094SKevin Wolf }
27349381094SKevin Wolf 
27449381094SKevin Wolf static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,
27549381094SKevin Wolf     uint64_t offset, void **table, bool read_from_disk)
27649381094SKevin Wolf {
27749381094SKevin Wolf     BDRVQcowState *s = bs->opaque;
27849381094SKevin Wolf     int i;
27949381094SKevin Wolf     int ret;
28049381094SKevin Wolf 
2813cce16f4SKevin Wolf     trace_qcow2_cache_get(qemu_coroutine_self(), c == s->l2_table_cache,
2823cce16f4SKevin Wolf                           offset, read_from_disk);
2833cce16f4SKevin Wolf 
28449381094SKevin Wolf     /* Check if the table is already cached */
28549381094SKevin Wolf     for (i = 0; i < c->size; i++) {
28649381094SKevin Wolf         if (c->entries[i].offset == offset) {
28749381094SKevin Wolf             goto found;
28849381094SKevin Wolf         }
28949381094SKevin Wolf     }
29049381094SKevin Wolf 
29149381094SKevin Wolf     /* If not, write a table back and replace it */
29249381094SKevin Wolf     i = qcow2_cache_find_entry_to_replace(c);
2933cce16f4SKevin Wolf     trace_qcow2_cache_get_replace_entry(qemu_coroutine_self(),
2943cce16f4SKevin Wolf                                         c == s->l2_table_cache, i);
29549381094SKevin Wolf     if (i < 0) {
29649381094SKevin Wolf         return i;
29749381094SKevin Wolf     }
29849381094SKevin Wolf 
29949381094SKevin Wolf     ret = qcow2_cache_entry_flush(bs, c, i);
30049381094SKevin Wolf     if (ret < 0) {
30149381094SKevin Wolf         return ret;
30249381094SKevin Wolf     }
30349381094SKevin Wolf 
3043cce16f4SKevin Wolf     trace_qcow2_cache_get_read(qemu_coroutine_self(),
3053cce16f4SKevin Wolf                                c == s->l2_table_cache, i);
30649381094SKevin Wolf     c->entries[i].offset = 0;
30749381094SKevin Wolf     if (read_from_disk) {
30829c1a730SKevin Wolf         if (c == s->l2_table_cache) {
30929c1a730SKevin Wolf             BLKDBG_EVENT(bs->file, BLKDBG_L2_LOAD);
31029c1a730SKevin Wolf         }
31129c1a730SKevin Wolf 
31272e80b89SAlberto Garcia         ret = bdrv_pread(bs->file, offset, qcow2_cache_get_table_addr(bs, c, i),
31372e80b89SAlberto Garcia                          s->cluster_size);
31449381094SKevin Wolf         if (ret < 0) {
31549381094SKevin Wolf             return ret;
31649381094SKevin Wolf         }
31749381094SKevin Wolf     }
31849381094SKevin Wolf 
31949381094SKevin Wolf     /* Give the table some hits for the start so that it won't be replaced
32049381094SKevin Wolf      * immediately. The number 32 is completely arbitrary. */
32149381094SKevin Wolf     c->entries[i].cache_hits = 32;
32249381094SKevin Wolf     c->entries[i].offset = offset;
32349381094SKevin Wolf 
32449381094SKevin Wolf     /* And return the right table */
32549381094SKevin Wolf found:
32649381094SKevin Wolf     c->entries[i].cache_hits++;
32749381094SKevin Wolf     c->entries[i].ref++;
32872e80b89SAlberto Garcia     *table = qcow2_cache_get_table_addr(bs, c, i);
3293cce16f4SKevin Wolf 
3303cce16f4SKevin Wolf     trace_qcow2_cache_get_done(qemu_coroutine_self(),
3313cce16f4SKevin Wolf                                c == s->l2_table_cache, i);
3323cce16f4SKevin Wolf 
33349381094SKevin Wolf     return 0;
33449381094SKevin Wolf }
33549381094SKevin Wolf 
33649381094SKevin Wolf int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
33749381094SKevin Wolf     void **table)
33849381094SKevin Wolf {
33949381094SKevin Wolf     return qcow2_cache_do_get(bs, c, offset, table, true);
34049381094SKevin Wolf }
34149381094SKevin Wolf 
34249381094SKevin Wolf int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
34349381094SKevin Wolf     void **table)
34449381094SKevin Wolf {
34549381094SKevin Wolf     return qcow2_cache_do_get(bs, c, offset, table, false);
34649381094SKevin Wolf }
34749381094SKevin Wolf 
34849381094SKevin Wolf int qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table)
34949381094SKevin Wolf {
350*baf07d60SAlberto Garcia     int i = qcow2_cache_get_table_idx(bs, c, *table);
35149381094SKevin Wolf 
352*baf07d60SAlberto Garcia     if (c->entries[i].offset == 0) {
35349381094SKevin Wolf         return -ENOENT;
354*baf07d60SAlberto Garcia     }
35549381094SKevin Wolf 
35649381094SKevin Wolf     c->entries[i].ref--;
35749381094SKevin Wolf     *table = NULL;
35849381094SKevin Wolf 
35949381094SKevin Wolf     assert(c->entries[i].ref >= 0);
36049381094SKevin Wolf     return 0;
36149381094SKevin Wolf }
36249381094SKevin Wolf 
36372e80b89SAlberto Garcia void qcow2_cache_entry_mark_dirty(BlockDriverState *bs, Qcow2Cache *c,
36472e80b89SAlberto Garcia      void *table)
36549381094SKevin Wolf {
366*baf07d60SAlberto Garcia     int i = qcow2_cache_get_table_idx(bs, c, table);
367*baf07d60SAlberto Garcia     assert(c->entries[i].offset != 0);
36849381094SKevin Wolf     c->entries[i].dirty = true;
36949381094SKevin Wolf }
370