xref: /qemu/block/qcow2-cache.c (revision de82815d)
1 /*
2  * L2/refcount table cache for the QCOW2 format
3  *
4  * Copyright (c) 2010 Kevin Wolf <kwolf@redhat.com>
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "block/block_int.h"
26 #include "qemu-common.h"
27 #include "qcow2.h"
28 #include "trace.h"
29 
30 typedef struct Qcow2CachedTable {
31     void*   table;
32     int64_t offset;
33     bool    dirty;
34     int     cache_hits;
35     int     ref;
36 } Qcow2CachedTable;
37 
38 struct Qcow2Cache {
39     Qcow2CachedTable*       entries;
40     struct Qcow2Cache*      depends;
41     int                     size;
42     bool                    depends_on_flush;
43 };
44 
45 Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables)
46 {
47     BDRVQcowState *s = bs->opaque;
48     Qcow2Cache *c;
49     int i;
50 
51     c = g_malloc0(sizeof(*c));
52     c->size = num_tables;
53     c->entries = g_malloc0(sizeof(*c->entries) * num_tables);
54 
55     for (i = 0; i < c->size; i++) {
56         c->entries[i].table = qemu_try_blockalign(bs->file, s->cluster_size);
57         if (c->entries[i].table == NULL) {
58             goto fail;
59         }
60     }
61 
62     return c;
63 
64 fail:
65     for (i = 0; i < c->size; i++) {
66         qemu_vfree(c->entries[i].table);
67     }
68     g_free(c->entries);
69     g_free(c);
70     return NULL;
71 }
72 
73 int qcow2_cache_destroy(BlockDriverState* bs, Qcow2Cache *c)
74 {
75     int i;
76 
77     for (i = 0; i < c->size; i++) {
78         assert(c->entries[i].ref == 0);
79         qemu_vfree(c->entries[i].table);
80     }
81 
82     g_free(c->entries);
83     g_free(c);
84 
85     return 0;
86 }
87 
88 static int qcow2_cache_flush_dependency(BlockDriverState *bs, Qcow2Cache *c)
89 {
90     int ret;
91 
92     ret = qcow2_cache_flush(bs, c->depends);
93     if (ret < 0) {
94         return ret;
95     }
96 
97     c->depends = NULL;
98     c->depends_on_flush = false;
99 
100     return 0;
101 }
102 
103 static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
104 {
105     BDRVQcowState *s = bs->opaque;
106     int ret = 0;
107 
108     if (!c->entries[i].dirty || !c->entries[i].offset) {
109         return 0;
110     }
111 
112     trace_qcow2_cache_entry_flush(qemu_coroutine_self(),
113                                   c == s->l2_table_cache, i);
114 
115     if (c->depends) {
116         ret = qcow2_cache_flush_dependency(bs, c);
117     } else if (c->depends_on_flush) {
118         ret = bdrv_flush(bs->file);
119         if (ret >= 0) {
120             c->depends_on_flush = false;
121         }
122     }
123 
124     if (ret < 0) {
125         return ret;
126     }
127 
128     if (c == s->refcount_block_cache) {
129         ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_REFCOUNT_BLOCK,
130                 c->entries[i].offset, s->cluster_size);
131     } else if (c == s->l2_table_cache) {
132         ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_ACTIVE_L2,
133                 c->entries[i].offset, s->cluster_size);
134     } else {
135         ret = qcow2_pre_write_overlap_check(bs, 0,
136                 c->entries[i].offset, s->cluster_size);
137     }
138 
139     if (ret < 0) {
140         return ret;
141     }
142 
143     if (c == s->refcount_block_cache) {
144         BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_UPDATE_PART);
145     } else if (c == s->l2_table_cache) {
146         BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE);
147     }
148 
149     ret = bdrv_pwrite(bs->file, c->entries[i].offset, c->entries[i].table,
150         s->cluster_size);
151     if (ret < 0) {
152         return ret;
153     }
154 
155     c->entries[i].dirty = false;
156 
157     return 0;
158 }
159 
160 int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c)
161 {
162     BDRVQcowState *s = bs->opaque;
163     int result = 0;
164     int ret;
165     int i;
166 
167     trace_qcow2_cache_flush(qemu_coroutine_self(), c == s->l2_table_cache);
168 
169     for (i = 0; i < c->size; i++) {
170         ret = qcow2_cache_entry_flush(bs, c, i);
171         if (ret < 0 && result != -ENOSPC) {
172             result = ret;
173         }
174     }
175 
176     if (result == 0) {
177         ret = bdrv_flush(bs->file);
178         if (ret < 0) {
179             result = ret;
180         }
181     }
182 
183     return result;
184 }
185 
186 int qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c,
187     Qcow2Cache *dependency)
188 {
189     int ret;
190 
191     if (dependency->depends) {
192         ret = qcow2_cache_flush_dependency(bs, dependency);
193         if (ret < 0) {
194             return ret;
195         }
196     }
197 
198     if (c->depends && (c->depends != dependency)) {
199         ret = qcow2_cache_flush_dependency(bs, c);
200         if (ret < 0) {
201             return ret;
202         }
203     }
204 
205     c->depends = dependency;
206     return 0;
207 }
208 
209 void qcow2_cache_depends_on_flush(Qcow2Cache *c)
210 {
211     c->depends_on_flush = true;
212 }
213 
214 int qcow2_cache_empty(BlockDriverState *bs, Qcow2Cache *c)
215 {
216     int ret, i;
217 
218     ret = qcow2_cache_flush(bs, c);
219     if (ret < 0) {
220         return ret;
221     }
222 
223     for (i = 0; i < c->size; i++) {
224         assert(c->entries[i].ref == 0);
225         c->entries[i].offset = 0;
226         c->entries[i].cache_hits = 0;
227     }
228 
229     return 0;
230 }
231 
232 static int qcow2_cache_find_entry_to_replace(Qcow2Cache *c)
233 {
234     int i;
235     int min_count = INT_MAX;
236     int min_index = -1;
237 
238 
239     for (i = 0; i < c->size; i++) {
240         if (c->entries[i].ref) {
241             continue;
242         }
243 
244         if (c->entries[i].cache_hits < min_count) {
245             min_index = i;
246             min_count = c->entries[i].cache_hits;
247         }
248 
249         /* Give newer hits priority */
250         /* TODO Check how to optimize the replacement strategy */
251         c->entries[i].cache_hits /= 2;
252     }
253 
254     if (min_index == -1) {
255         /* This can't happen in current synchronous code, but leave the check
256          * here as a reminder for whoever starts using AIO with the cache */
257         abort();
258     }
259     return min_index;
260 }
261 
262 static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,
263     uint64_t offset, void **table, bool read_from_disk)
264 {
265     BDRVQcowState *s = bs->opaque;
266     int i;
267     int ret;
268 
269     trace_qcow2_cache_get(qemu_coroutine_self(), c == s->l2_table_cache,
270                           offset, read_from_disk);
271 
272     /* Check if the table is already cached */
273     for (i = 0; i < c->size; i++) {
274         if (c->entries[i].offset == offset) {
275             goto found;
276         }
277     }
278 
279     /* If not, write a table back and replace it */
280     i = qcow2_cache_find_entry_to_replace(c);
281     trace_qcow2_cache_get_replace_entry(qemu_coroutine_self(),
282                                         c == s->l2_table_cache, i);
283     if (i < 0) {
284         return i;
285     }
286 
287     ret = qcow2_cache_entry_flush(bs, c, i);
288     if (ret < 0) {
289         return ret;
290     }
291 
292     trace_qcow2_cache_get_read(qemu_coroutine_self(),
293                                c == s->l2_table_cache, i);
294     c->entries[i].offset = 0;
295     if (read_from_disk) {
296         if (c == s->l2_table_cache) {
297             BLKDBG_EVENT(bs->file, BLKDBG_L2_LOAD);
298         }
299 
300         ret = bdrv_pread(bs->file, offset, c->entries[i].table, s->cluster_size);
301         if (ret < 0) {
302             return ret;
303         }
304     }
305 
306     /* Give the table some hits for the start so that it won't be replaced
307      * immediately. The number 32 is completely arbitrary. */
308     c->entries[i].cache_hits = 32;
309     c->entries[i].offset = offset;
310 
311     /* And return the right table */
312 found:
313     c->entries[i].cache_hits++;
314     c->entries[i].ref++;
315     *table = c->entries[i].table;
316 
317     trace_qcow2_cache_get_done(qemu_coroutine_self(),
318                                c == s->l2_table_cache, i);
319 
320     return 0;
321 }
322 
323 int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
324     void **table)
325 {
326     return qcow2_cache_do_get(bs, c, offset, table, true);
327 }
328 
329 int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
330     void **table)
331 {
332     return qcow2_cache_do_get(bs, c, offset, table, false);
333 }
334 
335 int qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table)
336 {
337     int i;
338 
339     for (i = 0; i < c->size; i++) {
340         if (c->entries[i].table == *table) {
341             goto found;
342         }
343     }
344     return -ENOENT;
345 
346 found:
347     c->entries[i].ref--;
348     *table = NULL;
349 
350     assert(c->entries[i].ref >= 0);
351     return 0;
352 }
353 
354 void qcow2_cache_entry_mark_dirty(Qcow2Cache *c, void *table)
355 {
356     int i;
357 
358     for (i = 0; i < c->size; i++) {
359         if (c->entries[i].table == table) {
360             goto found;
361         }
362     }
363     abort();
364 
365 found:
366     c->entries[i].dirty = true;
367 }
368