xref: /qemu/block/qcow2-cache.c (revision 29c1a730)
1 /*
2  * L2/refcount table cache for the QCOW2 format
3  *
4  * Copyright (c) 2010 Kevin Wolf <kwolf@redhat.com>
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "block_int.h"
26 #include "qemu-common.h"
27 #include "qcow2.h"
28 
29 typedef struct Qcow2CachedTable {
30     void*   table;
31     int64_t offset;
32     bool    dirty;
33     int     cache_hits;
34     int     ref;
35 } Qcow2CachedTable;
36 
37 struct Qcow2Cache {
38     int                     size;
39     Qcow2CachedTable*       entries;
40     struct Qcow2Cache*      depends;
41     bool                    writethrough;
42 };
43 
44 Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables,
45     bool writethrough)
46 {
47     BDRVQcowState *s = bs->opaque;
48     Qcow2Cache *c;
49     int i;
50 
51     c = qemu_mallocz(sizeof(*c));
52     c->size = num_tables;
53     c->entries = qemu_mallocz(sizeof(*c->entries) * num_tables);
54     c->writethrough = writethrough;
55 
56     for (i = 0; i < c->size; i++) {
57         c->entries[i].table = qemu_blockalign(bs, s->cluster_size);
58     }
59 
60     return c;
61 }
62 
63 int qcow2_cache_destroy(BlockDriverState* bs, Qcow2Cache *c)
64 {
65     int i;
66 
67     for (i = 0; i < c->size; i++) {
68         assert(c->entries[i].ref == 0);
69         qemu_vfree(c->entries[i].table);
70     }
71 
72     qemu_free(c->entries);
73     qemu_free(c);
74 
75     return 0;
76 }
77 
78 static int qcow2_cache_flush_dependency(BlockDriverState *bs, Qcow2Cache *c)
79 {
80     int ret;
81 
82     ret = qcow2_cache_flush(bs, c->depends);
83     if (ret < 0) {
84         return ret;
85     }
86 
87     c->depends = NULL;
88     return 0;
89 }
90 
91 static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
92 {
93     BDRVQcowState *s = bs->opaque;
94     int ret;
95 
96     if (!c->entries[i].dirty || !c->entries[i].offset) {
97         return 0;
98     }
99 
100     if (c->depends) {
101         ret = qcow2_cache_flush_dependency(bs, c);
102         if (ret < 0) {
103             return ret;
104         }
105     }
106 
107     if (c == s->refcount_block_cache) {
108         BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_UPDATE_PART);
109     } else if (c == s->l2_table_cache) {
110         BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE);
111     }
112 
113     ret = bdrv_pwrite(bs->file, c->entries[i].offset, c->entries[i].table,
114         s->cluster_size);
115     if (ret < 0) {
116         return ret;
117     }
118 
119     c->entries[i].dirty = false;
120 
121     return 0;
122 }
123 
124 int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c)
125 {
126     int result = 0;
127     int ret;
128     int i;
129 
130     for (i = 0; i < c->size; i++) {
131         ret = qcow2_cache_entry_flush(bs, c, i);
132         if (ret < 0 && result != -ENOSPC) {
133             result = ret;
134         }
135     }
136 
137     if (result == 0) {
138         ret = bdrv_flush(bs->file);
139         if (ret < 0) {
140             result = ret;
141         }
142     }
143 
144     return result;
145 }
146 
147 int qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c,
148     Qcow2Cache *dependency)
149 {
150     int ret;
151 
152     if (dependency->depends) {
153         ret = qcow2_cache_flush_dependency(bs, dependency);
154         if (ret < 0) {
155             return ret;
156         }
157     }
158 
159     if (c->depends && (c->depends != dependency)) {
160         ret = qcow2_cache_flush_dependency(bs, c);
161         if (ret < 0) {
162             return ret;
163         }
164     }
165 
166     c->depends = dependency;
167     return 0;
168 }
169 
170 static int qcow2_cache_find_entry_to_replace(Qcow2Cache *c)
171 {
172     int i;
173     int min_count = INT_MAX;
174     int min_index = -1;
175 
176 
177     for (i = 0; i < c->size; i++) {
178         if (c->entries[i].ref) {
179             continue;
180         }
181 
182         if (c->entries[i].cache_hits < min_count) {
183             min_index = i;
184             min_count = c->entries[i].cache_hits;
185         }
186 
187         /* Give newer hits priority */
188         /* TODO Check how to optimize the replacement strategy */
189         c->entries[i].cache_hits /= 2;
190     }
191 
192     if (min_index == -1) {
193         /* This can't happen in current synchronous code, but leave the check
194          * here as a reminder for whoever starts using AIO with the cache */
195         abort();
196     }
197     return min_index;
198 }
199 
200 static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,
201     uint64_t offset, void **table, bool read_from_disk)
202 {
203     BDRVQcowState *s = bs->opaque;
204     int i;
205     int ret;
206 
207     /* Check if the table is already cached */
208     for (i = 0; i < c->size; i++) {
209         if (c->entries[i].offset == offset) {
210             goto found;
211         }
212     }
213 
214     /* If not, write a table back and replace it */
215     i = qcow2_cache_find_entry_to_replace(c);
216     if (i < 0) {
217         return i;
218     }
219 
220     ret = qcow2_cache_entry_flush(bs, c, i);
221     if (ret < 0) {
222         return ret;
223     }
224 
225     c->entries[i].offset = 0;
226     if (read_from_disk) {
227         if (c == s->l2_table_cache) {
228             BLKDBG_EVENT(bs->file, BLKDBG_L2_LOAD);
229         }
230 
231         ret = bdrv_pread(bs->file, offset, c->entries[i].table, s->cluster_size);
232         if (ret < 0) {
233             return ret;
234         }
235     }
236 
237     /* Give the table some hits for the start so that it won't be replaced
238      * immediately. The number 32 is completely arbitrary. */
239     c->entries[i].cache_hits = 32;
240     c->entries[i].offset = offset;
241 
242     /* And return the right table */
243 found:
244     c->entries[i].cache_hits++;
245     c->entries[i].ref++;
246     *table = c->entries[i].table;
247     return 0;
248 }
249 
250 int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
251     void **table)
252 {
253     return qcow2_cache_do_get(bs, c, offset, table, true);
254 }
255 
256 int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
257     void **table)
258 {
259     return qcow2_cache_do_get(bs, c, offset, table, false);
260 }
261 
262 int qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table)
263 {
264     int i;
265 
266     for (i = 0; i < c->size; i++) {
267         if (c->entries[i].table == *table) {
268             goto found;
269         }
270     }
271     return -ENOENT;
272 
273 found:
274     c->entries[i].ref--;
275     *table = NULL;
276 
277     assert(c->entries[i].ref >= 0);
278 
279     if (c->writethrough) {
280         return qcow2_cache_entry_flush(bs, c, i);
281     } else {
282         return 0;
283     }
284 }
285 
286 void qcow2_cache_entry_mark_dirty(Qcow2Cache *c, void *table)
287 {
288     int i;
289 
290     for (i = 0; i < c->size; i++) {
291         if (c->entries[i].table == table) {
292             goto found;
293         }
294     }
295     abort();
296 
297 found:
298     c->entries[i].dirty = true;
299 }
300 
301