xref: /qemu/block/qcow2-cache.c (revision 7a4e543d)
1 /*
2  * L2/refcount table cache for the QCOW2 format
3  *
4  * Copyright (c) 2010 Kevin Wolf <kwolf@redhat.com>
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 /* Needed for CONFIG_MADVISE */
26 #include "qemu/osdep.h"
27 
28 #if defined(CONFIG_MADVISE) || defined(CONFIG_POSIX_MADVISE)
29 #include <sys/mman.h>
30 #endif
31 
32 #include "block/block_int.h"
33 #include "qemu-common.h"
34 #include "qcow2.h"
35 #include "trace.h"
36 
37 typedef struct Qcow2CachedTable {
38     int64_t  offset;
39     uint64_t lru_counter;
40     int      ref;
41     bool     dirty;
42 } Qcow2CachedTable;
43 
44 struct Qcow2Cache {
45     Qcow2CachedTable       *entries;
46     struct Qcow2Cache      *depends;
47     int                     size;
48     bool                    depends_on_flush;
49     void                   *table_array;
50     uint64_t                lru_counter;
51     uint64_t                cache_clean_lru_counter;
52 };
53 
54 static inline void *qcow2_cache_get_table_addr(BlockDriverState *bs,
55                     Qcow2Cache *c, int table)
56 {
57     BDRVQcow2State *s = bs->opaque;
58     return (uint8_t *) c->table_array + (size_t) table * s->cluster_size;
59 }
60 
61 static inline int qcow2_cache_get_table_idx(BlockDriverState *bs,
62                   Qcow2Cache *c, void *table)
63 {
64     BDRVQcow2State *s = bs->opaque;
65     ptrdiff_t table_offset = (uint8_t *) table - (uint8_t *) c->table_array;
66     int idx = table_offset / s->cluster_size;
67     assert(idx >= 0 && idx < c->size && table_offset % s->cluster_size == 0);
68     return idx;
69 }
70 
71 static void qcow2_cache_table_release(BlockDriverState *bs, Qcow2Cache *c,
72                                       int i, int num_tables)
73 {
74 #if QEMU_MADV_DONTNEED != QEMU_MADV_INVALID
75     BDRVQcow2State *s = bs->opaque;
76     void *t = qcow2_cache_get_table_addr(bs, c, i);
77     int align = getpagesize();
78     size_t mem_size = (size_t) s->cluster_size * num_tables;
79     size_t offset = QEMU_ALIGN_UP((uintptr_t) t, align) - (uintptr_t) t;
80     size_t length = QEMU_ALIGN_DOWN(mem_size - offset, align);
81     if (length > 0) {
82         qemu_madvise((uint8_t *) t + offset, length, QEMU_MADV_DONTNEED);
83     }
84 #endif
85 }
86 
87 static inline bool can_clean_entry(Qcow2Cache *c, int i)
88 {
89     Qcow2CachedTable *t = &c->entries[i];
90     return t->ref == 0 && !t->dirty && t->offset != 0 &&
91         t->lru_counter <= c->cache_clean_lru_counter;
92 }
93 
94 void qcow2_cache_clean_unused(BlockDriverState *bs, Qcow2Cache *c)
95 {
96     int i = 0;
97     while (i < c->size) {
98         int to_clean = 0;
99 
100         /* Skip the entries that we don't need to clean */
101         while (i < c->size && !can_clean_entry(c, i)) {
102             i++;
103         }
104 
105         /* And count how many we can clean in a row */
106         while (i < c->size && can_clean_entry(c, i)) {
107             c->entries[i].offset = 0;
108             c->entries[i].lru_counter = 0;
109             i++;
110             to_clean++;
111         }
112 
113         if (to_clean > 0) {
114             qcow2_cache_table_release(bs, c, i - to_clean, to_clean);
115         }
116     }
117 
118     c->cache_clean_lru_counter = c->lru_counter;
119 }
120 
121 Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables)
122 {
123     BDRVQcow2State *s = bs->opaque;
124     Qcow2Cache *c;
125 
126     c = g_new0(Qcow2Cache, 1);
127     c->size = num_tables;
128     c->entries = g_try_new0(Qcow2CachedTable, num_tables);
129     c->table_array = qemu_try_blockalign(bs->file->bs,
130                                          (size_t) num_tables * s->cluster_size);
131 
132     if (!c->entries || !c->table_array) {
133         qemu_vfree(c->table_array);
134         g_free(c->entries);
135         g_free(c);
136         c = NULL;
137     }
138 
139     return c;
140 }
141 
142 int qcow2_cache_destroy(BlockDriverState *bs, Qcow2Cache *c)
143 {
144     int i;
145 
146     for (i = 0; i < c->size; i++) {
147         assert(c->entries[i].ref == 0);
148     }
149 
150     qemu_vfree(c->table_array);
151     g_free(c->entries);
152     g_free(c);
153 
154     return 0;
155 }
156 
157 static int qcow2_cache_flush_dependency(BlockDriverState *bs, Qcow2Cache *c)
158 {
159     int ret;
160 
161     ret = qcow2_cache_flush(bs, c->depends);
162     if (ret < 0) {
163         return ret;
164     }
165 
166     c->depends = NULL;
167     c->depends_on_flush = false;
168 
169     return 0;
170 }
171 
172 static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
173 {
174     BDRVQcow2State *s = bs->opaque;
175     int ret = 0;
176 
177     if (!c->entries[i].dirty || !c->entries[i].offset) {
178         return 0;
179     }
180 
181     trace_qcow2_cache_entry_flush(qemu_coroutine_self(),
182                                   c == s->l2_table_cache, i);
183 
184     if (c->depends) {
185         ret = qcow2_cache_flush_dependency(bs, c);
186     } else if (c->depends_on_flush) {
187         ret = bdrv_flush(bs->file->bs);
188         if (ret >= 0) {
189             c->depends_on_flush = false;
190         }
191     }
192 
193     if (ret < 0) {
194         return ret;
195     }
196 
197     if (c == s->refcount_block_cache) {
198         ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_REFCOUNT_BLOCK,
199                 c->entries[i].offset, s->cluster_size);
200     } else if (c == s->l2_table_cache) {
201         ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_ACTIVE_L2,
202                 c->entries[i].offset, s->cluster_size);
203     } else {
204         ret = qcow2_pre_write_overlap_check(bs, 0,
205                 c->entries[i].offset, s->cluster_size);
206     }
207 
208     if (ret < 0) {
209         return ret;
210     }
211 
212     if (c == s->refcount_block_cache) {
213         BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_UPDATE_PART);
214     } else if (c == s->l2_table_cache) {
215         BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE);
216     }
217 
218     ret = bdrv_pwrite(bs->file->bs, c->entries[i].offset,
219                       qcow2_cache_get_table_addr(bs, c, i), s->cluster_size);
220     if (ret < 0) {
221         return ret;
222     }
223 
224     c->entries[i].dirty = false;
225 
226     return 0;
227 }
228 
229 int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c)
230 {
231     BDRVQcow2State *s = bs->opaque;
232     int result = 0;
233     int ret;
234     int i;
235 
236     trace_qcow2_cache_flush(qemu_coroutine_self(), c == s->l2_table_cache);
237 
238     for (i = 0; i < c->size; i++) {
239         ret = qcow2_cache_entry_flush(bs, c, i);
240         if (ret < 0 && result != -ENOSPC) {
241             result = ret;
242         }
243     }
244 
245     if (result == 0) {
246         ret = bdrv_flush(bs->file->bs);
247         if (ret < 0) {
248             result = ret;
249         }
250     }
251 
252     return result;
253 }
254 
255 int qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c,
256     Qcow2Cache *dependency)
257 {
258     int ret;
259 
260     if (dependency->depends) {
261         ret = qcow2_cache_flush_dependency(bs, dependency);
262         if (ret < 0) {
263             return ret;
264         }
265     }
266 
267     if (c->depends && (c->depends != dependency)) {
268         ret = qcow2_cache_flush_dependency(bs, c);
269         if (ret < 0) {
270             return ret;
271         }
272     }
273 
274     c->depends = dependency;
275     return 0;
276 }
277 
278 void qcow2_cache_depends_on_flush(Qcow2Cache *c)
279 {
280     c->depends_on_flush = true;
281 }
282 
283 int qcow2_cache_empty(BlockDriverState *bs, Qcow2Cache *c)
284 {
285     int ret, i;
286 
287     ret = qcow2_cache_flush(bs, c);
288     if (ret < 0) {
289         return ret;
290     }
291 
292     for (i = 0; i < c->size; i++) {
293         assert(c->entries[i].ref == 0);
294         c->entries[i].offset = 0;
295         c->entries[i].lru_counter = 0;
296     }
297 
298     qcow2_cache_table_release(bs, c, 0, c->size);
299 
300     c->lru_counter = 0;
301 
302     return 0;
303 }
304 
305 static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,
306     uint64_t offset, void **table, bool read_from_disk)
307 {
308     BDRVQcow2State *s = bs->opaque;
309     int i;
310     int ret;
311     int lookup_index;
312     uint64_t min_lru_counter = UINT64_MAX;
313     int min_lru_index = -1;
314 
315     trace_qcow2_cache_get(qemu_coroutine_self(), c == s->l2_table_cache,
316                           offset, read_from_disk);
317 
318     /* Check if the table is already cached */
319     i = lookup_index = (offset / s->cluster_size * 4) % c->size;
320     do {
321         const Qcow2CachedTable *t = &c->entries[i];
322         if (t->offset == offset) {
323             goto found;
324         }
325         if (t->ref == 0 && t->lru_counter < min_lru_counter) {
326             min_lru_counter = t->lru_counter;
327             min_lru_index = i;
328         }
329         if (++i == c->size) {
330             i = 0;
331         }
332     } while (i != lookup_index);
333 
334     if (min_lru_index == -1) {
335         /* This can't happen in current synchronous code, but leave the check
336          * here as a reminder for whoever starts using AIO with the cache */
337         abort();
338     }
339 
340     /* Cache miss: write a table back and replace it */
341     i = min_lru_index;
342     trace_qcow2_cache_get_replace_entry(qemu_coroutine_self(),
343                                         c == s->l2_table_cache, i);
344 
345     ret = qcow2_cache_entry_flush(bs, c, i);
346     if (ret < 0) {
347         return ret;
348     }
349 
350     trace_qcow2_cache_get_read(qemu_coroutine_self(),
351                                c == s->l2_table_cache, i);
352     c->entries[i].offset = 0;
353     if (read_from_disk) {
354         if (c == s->l2_table_cache) {
355             BLKDBG_EVENT(bs->file, BLKDBG_L2_LOAD);
356         }
357 
358         ret = bdrv_pread(bs->file->bs, offset,
359                          qcow2_cache_get_table_addr(bs, c, i),
360                          s->cluster_size);
361         if (ret < 0) {
362             return ret;
363         }
364     }
365 
366     c->entries[i].offset = offset;
367 
368     /* And return the right table */
369 found:
370     c->entries[i].ref++;
371     *table = qcow2_cache_get_table_addr(bs, c, i);
372 
373     trace_qcow2_cache_get_done(qemu_coroutine_self(),
374                                c == s->l2_table_cache, i);
375 
376     return 0;
377 }
378 
379 int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
380     void **table)
381 {
382     return qcow2_cache_do_get(bs, c, offset, table, true);
383 }
384 
385 int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
386     void **table)
387 {
388     return qcow2_cache_do_get(bs, c, offset, table, false);
389 }
390 
391 void qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table)
392 {
393     int i = qcow2_cache_get_table_idx(bs, c, *table);
394 
395     c->entries[i].ref--;
396     *table = NULL;
397 
398     if (c->entries[i].ref == 0) {
399         c->entries[i].lru_counter = ++c->lru_counter;
400     }
401 
402     assert(c->entries[i].ref >= 0);
403 }
404 
405 void qcow2_cache_entry_mark_dirty(BlockDriverState *bs, Qcow2Cache *c,
406      void *table)
407 {
408     int i = qcow2_cache_get_table_idx(bs, c, table);
409     assert(c->entries[i].offset != 0);
410     c->entries[i].dirty = true;
411 }
412