xref: /qemu/block/qcow2-cache.c (revision a3f1afb4)
149381094SKevin Wolf /*
249381094SKevin Wolf  * L2/refcount table cache for the QCOW2 format
349381094SKevin Wolf  *
449381094SKevin Wolf  * Copyright (c) 2010 Kevin Wolf <kwolf@redhat.com>
549381094SKevin Wolf  *
649381094SKevin Wolf  * Permission is hereby granted, free of charge, to any person obtaining a copy
749381094SKevin Wolf  * of this software and associated documentation files (the "Software"), to deal
849381094SKevin Wolf  * in the Software without restriction, including without limitation the rights
949381094SKevin Wolf  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
1049381094SKevin Wolf  * copies of the Software, and to permit persons to whom the Software is
1149381094SKevin Wolf  * furnished to do so, subject to the following conditions:
1249381094SKevin Wolf  *
1349381094SKevin Wolf  * The above copyright notice and this permission notice shall be included in
1449381094SKevin Wolf  * all copies or substantial portions of the Software.
1549381094SKevin Wolf  *
1649381094SKevin Wolf  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1749381094SKevin Wolf  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1849381094SKevin Wolf  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
1949381094SKevin Wolf  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
2049381094SKevin Wolf  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
2149381094SKevin Wolf  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
2249381094SKevin Wolf  * THE SOFTWARE.
2349381094SKevin Wolf  */
2449381094SKevin Wolf 
25737e150eSPaolo Bonzini #include "block/block_int.h"
2649381094SKevin Wolf #include "qemu-common.h"
2749381094SKevin Wolf #include "qcow2.h"
283cce16f4SKevin Wolf #include "trace.h"
2949381094SKevin Wolf 
3049381094SKevin Wolf typedef struct Qcow2CachedTable {
3149381094SKevin Wolf     int64_t  offset;
3249381094SKevin Wolf     bool     dirty;
332693310eSAlberto Garcia     uint64_t lru_counter;
3449381094SKevin Wolf     int      ref;
3549381094SKevin Wolf } Qcow2CachedTable;
3649381094SKevin Wolf 
3749381094SKevin Wolf struct Qcow2Cache {
3849381094SKevin Wolf     Qcow2CachedTable*       entries;
3949381094SKevin Wolf     struct Qcow2Cache*      depends;
40bf595021SJes Sorensen     int                     size;
413de0a294SKevin Wolf     bool                    depends_on_flush;
4272e80b89SAlberto Garcia     void                   *table_array;
432693310eSAlberto Garcia     uint64_t                lru_counter;
4449381094SKevin Wolf };
4549381094SKevin Wolf 
4672e80b89SAlberto Garcia static inline void *qcow2_cache_get_table_addr(BlockDriverState *bs,
4772e80b89SAlberto Garcia                     Qcow2Cache *c, int table)
4872e80b89SAlberto Garcia {
4972e80b89SAlberto Garcia     BDRVQcowState *s = bs->opaque;
5072e80b89SAlberto Garcia     return (uint8_t *) c->table_array + (size_t) table * s->cluster_size;
5172e80b89SAlberto Garcia }
5272e80b89SAlberto Garcia 
53baf07d60SAlberto Garcia static inline int qcow2_cache_get_table_idx(BlockDriverState *bs,
54baf07d60SAlberto Garcia                   Qcow2Cache *c, void *table)
55baf07d60SAlberto Garcia {
56baf07d60SAlberto Garcia     BDRVQcowState *s = bs->opaque;
57baf07d60SAlberto Garcia     ptrdiff_t table_offset = (uint8_t *) table - (uint8_t *) c->table_array;
58baf07d60SAlberto Garcia     int idx = table_offset / s->cluster_size;
59baf07d60SAlberto Garcia     assert(idx >= 0 && idx < c->size && table_offset % s->cluster_size == 0);
60baf07d60SAlberto Garcia     return idx;
61baf07d60SAlberto Garcia }
62baf07d60SAlberto Garcia 
636af4e9eaSPaolo Bonzini Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables)
6449381094SKevin Wolf {
6549381094SKevin Wolf     BDRVQcowState *s = bs->opaque;
6649381094SKevin Wolf     Qcow2Cache *c;
6749381094SKevin Wolf 
6802004bd4SMax Reitz     c = g_new0(Qcow2Cache, 1);
6949381094SKevin Wolf     c->size = num_tables;
7002004bd4SMax Reitz     c->entries = g_try_new0(Qcow2CachedTable, num_tables);
7172e80b89SAlberto Garcia     c->table_array = qemu_try_blockalign(bs->file,
7272e80b89SAlberto Garcia                                          (size_t) num_tables * s->cluster_size);
7349381094SKevin Wolf 
7472e80b89SAlberto Garcia     if (!c->entries || !c->table_array) {
7572e80b89SAlberto Garcia         qemu_vfree(c->table_array);
7672e80b89SAlberto Garcia         g_free(c->entries);
7772e80b89SAlberto Garcia         g_free(c);
7872e80b89SAlberto Garcia         c = NULL;
7949381094SKevin Wolf     }
8049381094SKevin Wolf 
8149381094SKevin Wolf     return c;
8249381094SKevin Wolf }
8349381094SKevin Wolf 
8449381094SKevin Wolf int qcow2_cache_destroy(BlockDriverState* bs, Qcow2Cache *c)
8549381094SKevin Wolf {
8649381094SKevin Wolf     int i;
8749381094SKevin Wolf 
8849381094SKevin Wolf     for (i = 0; i < c->size; i++) {
8949381094SKevin Wolf         assert(c->entries[i].ref == 0);
9049381094SKevin Wolf     }
9149381094SKevin Wolf 
9272e80b89SAlberto Garcia     qemu_vfree(c->table_array);
937267c094SAnthony Liguori     g_free(c->entries);
947267c094SAnthony Liguori     g_free(c);
9549381094SKevin Wolf 
9649381094SKevin Wolf     return 0;
9749381094SKevin Wolf }
9849381094SKevin Wolf 
9949381094SKevin Wolf static int qcow2_cache_flush_dependency(BlockDriverState *bs, Qcow2Cache *c)
10049381094SKevin Wolf {
10149381094SKevin Wolf     int ret;
10249381094SKevin Wolf 
10349381094SKevin Wolf     ret = qcow2_cache_flush(bs, c->depends);
10449381094SKevin Wolf     if (ret < 0) {
10549381094SKevin Wolf         return ret;
10649381094SKevin Wolf     }
10749381094SKevin Wolf 
10849381094SKevin Wolf     c->depends = NULL;
1093de0a294SKevin Wolf     c->depends_on_flush = false;
1103de0a294SKevin Wolf 
11149381094SKevin Wolf     return 0;
11249381094SKevin Wolf }
11349381094SKevin Wolf 
11449381094SKevin Wolf static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
11549381094SKevin Wolf {
11649381094SKevin Wolf     BDRVQcowState *s = bs->opaque;
1173de0a294SKevin Wolf     int ret = 0;
11849381094SKevin Wolf 
11949381094SKevin Wolf     if (!c->entries[i].dirty || !c->entries[i].offset) {
12049381094SKevin Wolf         return 0;
12149381094SKevin Wolf     }
12249381094SKevin Wolf 
1233cce16f4SKevin Wolf     trace_qcow2_cache_entry_flush(qemu_coroutine_self(),
1243cce16f4SKevin Wolf                                   c == s->l2_table_cache, i);
1253cce16f4SKevin Wolf 
12649381094SKevin Wolf     if (c->depends) {
12749381094SKevin Wolf         ret = qcow2_cache_flush_dependency(bs, c);
1283de0a294SKevin Wolf     } else if (c->depends_on_flush) {
1293de0a294SKevin Wolf         ret = bdrv_flush(bs->file);
1303de0a294SKevin Wolf         if (ret >= 0) {
1313de0a294SKevin Wolf             c->depends_on_flush = false;
1323de0a294SKevin Wolf         }
1333de0a294SKevin Wolf     }
1343de0a294SKevin Wolf 
13549381094SKevin Wolf     if (ret < 0) {
13649381094SKevin Wolf         return ret;
13749381094SKevin Wolf     }
13849381094SKevin Wolf 
13929c1a730SKevin Wolf     if (c == s->refcount_block_cache) {
140231bb267SMax Reitz         ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_REFCOUNT_BLOCK,
141cf93980eSMax Reitz                 c->entries[i].offset, s->cluster_size);
142cf93980eSMax Reitz     } else if (c == s->l2_table_cache) {
143231bb267SMax Reitz         ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_ACTIVE_L2,
144cf93980eSMax Reitz                 c->entries[i].offset, s->cluster_size);
145cf93980eSMax Reitz     } else {
146231bb267SMax Reitz         ret = qcow2_pre_write_overlap_check(bs, 0,
147cf93980eSMax Reitz                 c->entries[i].offset, s->cluster_size);
148cf93980eSMax Reitz     }
149cf93980eSMax Reitz 
150cf93980eSMax Reitz     if (ret < 0) {
151cf93980eSMax Reitz         return ret;
152cf93980eSMax Reitz     }
153cf93980eSMax Reitz 
154cf93980eSMax Reitz     if (c == s->refcount_block_cache) {
15529c1a730SKevin Wolf         BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_UPDATE_PART);
15629c1a730SKevin Wolf     } else if (c == s->l2_table_cache) {
15729c1a730SKevin Wolf         BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE);
15829c1a730SKevin Wolf     }
15929c1a730SKevin Wolf 
16072e80b89SAlberto Garcia     ret = bdrv_pwrite(bs->file, c->entries[i].offset,
16172e80b89SAlberto Garcia                       qcow2_cache_get_table_addr(bs, c, i), s->cluster_size);
16249381094SKevin Wolf     if (ret < 0) {
16349381094SKevin Wolf         return ret;
16449381094SKevin Wolf     }
16549381094SKevin Wolf 
16649381094SKevin Wolf     c->entries[i].dirty = false;
16749381094SKevin Wolf 
16849381094SKevin Wolf     return 0;
16949381094SKevin Wolf }
17049381094SKevin Wolf 
17149381094SKevin Wolf int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c)
17249381094SKevin Wolf {
1733cce16f4SKevin Wolf     BDRVQcowState *s = bs->opaque;
17449381094SKevin Wolf     int result = 0;
17549381094SKevin Wolf     int ret;
17649381094SKevin Wolf     int i;
17749381094SKevin Wolf 
1783cce16f4SKevin Wolf     trace_qcow2_cache_flush(qemu_coroutine_self(), c == s->l2_table_cache);
1793cce16f4SKevin Wolf 
18049381094SKevin Wolf     for (i = 0; i < c->size; i++) {
18149381094SKevin Wolf         ret = qcow2_cache_entry_flush(bs, c, i);
18249381094SKevin Wolf         if (ret < 0 && result != -ENOSPC) {
18349381094SKevin Wolf             result = ret;
18449381094SKevin Wolf         }
18549381094SKevin Wolf     }
18649381094SKevin Wolf 
18749381094SKevin Wolf     if (result == 0) {
18849381094SKevin Wolf         ret = bdrv_flush(bs->file);
18949381094SKevin Wolf         if (ret < 0) {
19049381094SKevin Wolf             result = ret;
19149381094SKevin Wolf         }
19249381094SKevin Wolf     }
19349381094SKevin Wolf 
19449381094SKevin Wolf     return result;
19549381094SKevin Wolf }
19649381094SKevin Wolf 
19749381094SKevin Wolf int qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c,
19849381094SKevin Wolf     Qcow2Cache *dependency)
19949381094SKevin Wolf {
20049381094SKevin Wolf     int ret;
20149381094SKevin Wolf 
20249381094SKevin Wolf     if (dependency->depends) {
20349381094SKevin Wolf         ret = qcow2_cache_flush_dependency(bs, dependency);
20449381094SKevin Wolf         if (ret < 0) {
20549381094SKevin Wolf             return ret;
20649381094SKevin Wolf         }
20749381094SKevin Wolf     }
20849381094SKevin Wolf 
20949381094SKevin Wolf     if (c->depends && (c->depends != dependency)) {
21049381094SKevin Wolf         ret = qcow2_cache_flush_dependency(bs, c);
21149381094SKevin Wolf         if (ret < 0) {
21249381094SKevin Wolf             return ret;
21349381094SKevin Wolf         }
21449381094SKevin Wolf     }
21549381094SKevin Wolf 
21649381094SKevin Wolf     c->depends = dependency;
21749381094SKevin Wolf     return 0;
21849381094SKevin Wolf }
21949381094SKevin Wolf 
2203de0a294SKevin Wolf void qcow2_cache_depends_on_flush(Qcow2Cache *c)
2213de0a294SKevin Wolf {
2223de0a294SKevin Wolf     c->depends_on_flush = true;
2233de0a294SKevin Wolf }
2243de0a294SKevin Wolf 
225e7108feaSMax Reitz int qcow2_cache_empty(BlockDriverState *bs, Qcow2Cache *c)
226e7108feaSMax Reitz {
227e7108feaSMax Reitz     int ret, i;
228e7108feaSMax Reitz 
229e7108feaSMax Reitz     ret = qcow2_cache_flush(bs, c);
230e7108feaSMax Reitz     if (ret < 0) {
231e7108feaSMax Reitz         return ret;
232e7108feaSMax Reitz     }
233e7108feaSMax Reitz 
234e7108feaSMax Reitz     for (i = 0; i < c->size; i++) {
235e7108feaSMax Reitz         assert(c->entries[i].ref == 0);
236e7108feaSMax Reitz         c->entries[i].offset = 0;
2372693310eSAlberto Garcia         c->entries[i].lru_counter = 0;
238e7108feaSMax Reitz     }
239e7108feaSMax Reitz 
2402693310eSAlberto Garcia     c->lru_counter = 0;
2412693310eSAlberto Garcia 
242e7108feaSMax Reitz     return 0;
243e7108feaSMax Reitz }
244e7108feaSMax Reitz 
24549381094SKevin Wolf static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,
24649381094SKevin Wolf     uint64_t offset, void **table, bool read_from_disk)
24749381094SKevin Wolf {
24849381094SKevin Wolf     BDRVQcowState *s = bs->opaque;
24949381094SKevin Wolf     int i;
25049381094SKevin Wolf     int ret;
251812e4082SAlberto Garcia     int lookup_index;
252fdfbca82SAlberto Garcia     uint64_t min_lru_counter = UINT64_MAX;
253fdfbca82SAlberto Garcia     int min_lru_index = -1;
25449381094SKevin Wolf 
2553cce16f4SKevin Wolf     trace_qcow2_cache_get(qemu_coroutine_self(), c == s->l2_table_cache,
2563cce16f4SKevin Wolf                           offset, read_from_disk);
2573cce16f4SKevin Wolf 
25849381094SKevin Wolf     /* Check if the table is already cached */
259812e4082SAlberto Garcia     i = lookup_index = (offset / s->cluster_size * 4) % c->size;
260812e4082SAlberto Garcia     do {
261fdfbca82SAlberto Garcia         const Qcow2CachedTable *t = &c->entries[i];
262fdfbca82SAlberto Garcia         if (t->offset == offset) {
26349381094SKevin Wolf             goto found;
26449381094SKevin Wolf         }
265fdfbca82SAlberto Garcia         if (t->ref == 0 && t->lru_counter < min_lru_counter) {
266fdfbca82SAlberto Garcia             min_lru_counter = t->lru_counter;
267fdfbca82SAlberto Garcia             min_lru_index = i;
268fdfbca82SAlberto Garcia         }
269812e4082SAlberto Garcia         if (++i == c->size) {
270812e4082SAlberto Garcia             i = 0;
27149381094SKevin Wolf         }
272812e4082SAlberto Garcia     } while (i != lookup_index);
27349381094SKevin Wolf 
274fdfbca82SAlberto Garcia     if (min_lru_index == -1) {
275fdfbca82SAlberto Garcia         /* This can't happen in current synchronous code, but leave the check
276fdfbca82SAlberto Garcia          * here as a reminder for whoever starts using AIO with the cache */
277fdfbca82SAlberto Garcia         abort();
278fdfbca82SAlberto Garcia     }
279fdfbca82SAlberto Garcia 
280fdfbca82SAlberto Garcia     /* Cache miss: write a table back and replace it */
281fdfbca82SAlberto Garcia     i = min_lru_index;
2823cce16f4SKevin Wolf     trace_qcow2_cache_get_replace_entry(qemu_coroutine_self(),
2833cce16f4SKevin Wolf                                         c == s->l2_table_cache, i);
28449381094SKevin Wolf     if (i < 0) {
28549381094SKevin Wolf         return i;
28649381094SKevin Wolf     }
28749381094SKevin Wolf 
28849381094SKevin Wolf     ret = qcow2_cache_entry_flush(bs, c, i);
28949381094SKevin Wolf     if (ret < 0) {
29049381094SKevin Wolf         return ret;
29149381094SKevin Wolf     }
29249381094SKevin Wolf 
2933cce16f4SKevin Wolf     trace_qcow2_cache_get_read(qemu_coroutine_self(),
2943cce16f4SKevin Wolf                                c == s->l2_table_cache, i);
29549381094SKevin Wolf     c->entries[i].offset = 0;
29649381094SKevin Wolf     if (read_from_disk) {
29729c1a730SKevin Wolf         if (c == s->l2_table_cache) {
29829c1a730SKevin Wolf             BLKDBG_EVENT(bs->file, BLKDBG_L2_LOAD);
29929c1a730SKevin Wolf         }
30029c1a730SKevin Wolf 
30172e80b89SAlberto Garcia         ret = bdrv_pread(bs->file, offset, qcow2_cache_get_table_addr(bs, c, i),
30272e80b89SAlberto Garcia                          s->cluster_size);
30349381094SKevin Wolf         if (ret < 0) {
30449381094SKevin Wolf             return ret;
30549381094SKevin Wolf         }
30649381094SKevin Wolf     }
30749381094SKevin Wolf 
30849381094SKevin Wolf     c->entries[i].offset = offset;
30949381094SKevin Wolf 
31049381094SKevin Wolf     /* And return the right table */
31149381094SKevin Wolf found:
31249381094SKevin Wolf     c->entries[i].ref++;
31372e80b89SAlberto Garcia     *table = qcow2_cache_get_table_addr(bs, c, i);
3143cce16f4SKevin Wolf 
3153cce16f4SKevin Wolf     trace_qcow2_cache_get_done(qemu_coroutine_self(),
3163cce16f4SKevin Wolf                                c == s->l2_table_cache, i);
3173cce16f4SKevin Wolf 
31849381094SKevin Wolf     return 0;
31949381094SKevin Wolf }
32049381094SKevin Wolf 
32149381094SKevin Wolf int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
32249381094SKevin Wolf     void **table)
32349381094SKevin Wolf {
32449381094SKevin Wolf     return qcow2_cache_do_get(bs, c, offset, table, true);
32549381094SKevin Wolf }
32649381094SKevin Wolf 
32749381094SKevin Wolf int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
32849381094SKevin Wolf     void **table)
32949381094SKevin Wolf {
33049381094SKevin Wolf     return qcow2_cache_do_get(bs, c, offset, table, false);
33149381094SKevin Wolf }
33249381094SKevin Wolf 
333*a3f1afb4SAlberto Garcia void qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table)
33449381094SKevin Wolf {
335baf07d60SAlberto Garcia     int i = qcow2_cache_get_table_idx(bs, c, *table);
33649381094SKevin Wolf 
33749381094SKevin Wolf     c->entries[i].ref--;
33849381094SKevin Wolf     *table = NULL;
33949381094SKevin Wolf 
3402693310eSAlberto Garcia     if (c->entries[i].ref == 0) {
3412693310eSAlberto Garcia         c->entries[i].lru_counter = ++c->lru_counter;
3422693310eSAlberto Garcia     }
3432693310eSAlberto Garcia 
34449381094SKevin Wolf     assert(c->entries[i].ref >= 0);
34549381094SKevin Wolf }
34649381094SKevin Wolf 
34772e80b89SAlberto Garcia void qcow2_cache_entry_mark_dirty(BlockDriverState *bs, Qcow2Cache *c,
34872e80b89SAlberto Garcia      void *table)
34949381094SKevin Wolf {
350baf07d60SAlberto Garcia     int i = qcow2_cache_get_table_idx(bs, c, table);
351baf07d60SAlberto Garcia     assert(c->entries[i].offset != 0);
35249381094SKevin Wolf     c->entries[i].dirty = true;
35349381094SKevin Wolf }
354