1 /* 2 * L2/refcount table cache for the QCOW2 format 3 * 4 * Copyright (c) 2010 Kevin Wolf <kwolf@redhat.com> 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 #include "block/block_int.h" 26 #include "qemu-common.h" 27 #include "qcow2.h" 28 #include "trace.h" 29 30 typedef struct Qcow2CachedTable { 31 void* table; 32 int64_t offset; 33 bool dirty; 34 int cache_hits; 35 int ref; 36 } Qcow2CachedTable; 37 38 struct Qcow2Cache { 39 Qcow2CachedTable* entries; 40 struct Qcow2Cache* depends; 41 int size; 42 bool depends_on_flush; 43 }; 44 45 Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables) 46 { 47 BDRVQcowState *s = bs->opaque; 48 Qcow2Cache *c; 49 int i; 50 51 c = g_malloc0(sizeof(*c)); 52 c->size = num_tables; 53 c->entries = g_malloc0(sizeof(*c->entries) * num_tables); 54 55 for (i = 0; i < c->size; i++) { 56 c->entries[i].table = qemu_blockalign(bs, s->cluster_size); 57 } 58 59 return c; 60 } 61 62 int qcow2_cache_destroy(BlockDriverState* bs, Qcow2Cache *c) 63 { 64 int i; 65 66 for (i = 0; i < c->size; i++) { 67 assert(c->entries[i].ref == 0); 68 qemu_vfree(c->entries[i].table); 69 } 70 71 g_free(c->entries); 72 g_free(c); 73 74 return 0; 75 } 76 77 static int qcow2_cache_flush_dependency(BlockDriverState *bs, Qcow2Cache *c) 78 { 79 int ret; 80 81 ret = qcow2_cache_flush(bs, c->depends); 82 if (ret < 0) { 83 return ret; 84 } 85 86 c->depends = NULL; 87 c->depends_on_flush = false; 88 89 return 0; 90 } 91 92 static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i) 93 { 94 BDRVQcowState *s = bs->opaque; 95 int ret = 0; 96 97 if (!c->entries[i].dirty || !c->entries[i].offset) { 98 return 0; 99 } 100 101 trace_qcow2_cache_entry_flush(qemu_coroutine_self(), 102 c == s->l2_table_cache, i); 103 104 if (c->depends) { 105 ret = qcow2_cache_flush_dependency(bs, c); 106 } else if (c->depends_on_flush) { 107 ret = bdrv_flush(bs->file); 108 if (ret >= 0) { 109 c->depends_on_flush = false; 110 } 111 } 112 113 if (ret < 0) { 114 return ret; 115 } 116 117 if (c == s->refcount_block_cache) { 118 ret = qcow2_pre_write_overlap_check(bs, 119 QCOW2_OL_DEFAULT & ~QCOW2_OL_REFCOUNT_BLOCK, 120 c->entries[i].offset, s->cluster_size); 121 } else if (c == s->l2_table_cache) { 122 ret = qcow2_pre_write_overlap_check(bs, 123 QCOW2_OL_DEFAULT & ~QCOW2_OL_ACTIVE_L2, 124 c->entries[i].offset, s->cluster_size); 125 } else { 126 ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT, 127 c->entries[i].offset, s->cluster_size); 128 } 129 130 if (ret < 0) { 131 return ret; 132 } 133 134 if (c == s->refcount_block_cache) { 135 BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_UPDATE_PART); 136 } else if (c == s->l2_table_cache) { 137 BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE); 138 } 139 140 ret = bdrv_pwrite(bs->file, c->entries[i].offset, c->entries[i].table, 141 s->cluster_size); 142 if (ret < 0) { 143 return ret; 144 } 145 146 c->entries[i].dirty = false; 147 148 return 0; 149 } 150 151 int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c) 152 { 153 BDRVQcowState *s = bs->opaque; 154 int result = 0; 155 int ret; 156 int i; 157 158 trace_qcow2_cache_flush(qemu_coroutine_self(), c == s->l2_table_cache); 159 160 for (i = 0; i < c->size; i++) { 161 ret = qcow2_cache_entry_flush(bs, c, i); 162 if (ret < 0 && result != -ENOSPC) { 163 result = ret; 164 } 165 } 166 167 if (result == 0) { 168 ret = bdrv_flush(bs->file); 169 if (ret < 0) { 170 result = ret; 171 } 172 } 173 174 return result; 175 } 176 177 int qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c, 178 Qcow2Cache *dependency) 179 { 180 int ret; 181 182 if (dependency->depends) { 183 ret = qcow2_cache_flush_dependency(bs, dependency); 184 if (ret < 0) { 185 return ret; 186 } 187 } 188 189 if (c->depends && (c->depends != dependency)) { 190 ret = qcow2_cache_flush_dependency(bs, c); 191 if (ret < 0) { 192 return ret; 193 } 194 } 195 196 c->depends = dependency; 197 return 0; 198 } 199 200 void qcow2_cache_depends_on_flush(Qcow2Cache *c) 201 { 202 c->depends_on_flush = true; 203 } 204 205 int qcow2_cache_empty(BlockDriverState *bs, Qcow2Cache *c) 206 { 207 int ret, i; 208 209 ret = qcow2_cache_flush(bs, c); 210 if (ret < 0) { 211 return ret; 212 } 213 214 for (i = 0; i < c->size; i++) { 215 assert(c->entries[i].ref == 0); 216 c->entries[i].offset = 0; 217 c->entries[i].cache_hits = 0; 218 } 219 220 return 0; 221 } 222 223 static int qcow2_cache_find_entry_to_replace(Qcow2Cache *c) 224 { 225 int i; 226 int min_count = INT_MAX; 227 int min_index = -1; 228 229 230 for (i = 0; i < c->size; i++) { 231 if (c->entries[i].ref) { 232 continue; 233 } 234 235 if (c->entries[i].cache_hits < min_count) { 236 min_index = i; 237 min_count = c->entries[i].cache_hits; 238 } 239 240 /* Give newer hits priority */ 241 /* TODO Check how to optimize the replacement strategy */ 242 c->entries[i].cache_hits /= 2; 243 } 244 245 if (min_index == -1) { 246 /* This can't happen in current synchronous code, but leave the check 247 * here as a reminder for whoever starts using AIO with the cache */ 248 abort(); 249 } 250 return min_index; 251 } 252 253 static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c, 254 uint64_t offset, void **table, bool read_from_disk) 255 { 256 BDRVQcowState *s = bs->opaque; 257 int i; 258 int ret; 259 260 trace_qcow2_cache_get(qemu_coroutine_self(), c == s->l2_table_cache, 261 offset, read_from_disk); 262 263 /* Check if the table is already cached */ 264 for (i = 0; i < c->size; i++) { 265 if (c->entries[i].offset == offset) { 266 goto found; 267 } 268 } 269 270 /* If not, write a table back and replace it */ 271 i = qcow2_cache_find_entry_to_replace(c); 272 trace_qcow2_cache_get_replace_entry(qemu_coroutine_self(), 273 c == s->l2_table_cache, i); 274 if (i < 0) { 275 return i; 276 } 277 278 ret = qcow2_cache_entry_flush(bs, c, i); 279 if (ret < 0) { 280 return ret; 281 } 282 283 trace_qcow2_cache_get_read(qemu_coroutine_self(), 284 c == s->l2_table_cache, i); 285 c->entries[i].offset = 0; 286 if (read_from_disk) { 287 if (c == s->l2_table_cache) { 288 BLKDBG_EVENT(bs->file, BLKDBG_L2_LOAD); 289 } 290 291 ret = bdrv_pread(bs->file, offset, c->entries[i].table, s->cluster_size); 292 if (ret < 0) { 293 return ret; 294 } 295 } 296 297 /* Give the table some hits for the start so that it won't be replaced 298 * immediately. The number 32 is completely arbitrary. */ 299 c->entries[i].cache_hits = 32; 300 c->entries[i].offset = offset; 301 302 /* And return the right table */ 303 found: 304 c->entries[i].cache_hits++; 305 c->entries[i].ref++; 306 *table = c->entries[i].table; 307 308 trace_qcow2_cache_get_done(qemu_coroutine_self(), 309 c == s->l2_table_cache, i); 310 311 return 0; 312 } 313 314 int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset, 315 void **table) 316 { 317 return qcow2_cache_do_get(bs, c, offset, table, true); 318 } 319 320 int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset, 321 void **table) 322 { 323 return qcow2_cache_do_get(bs, c, offset, table, false); 324 } 325 326 int qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table) 327 { 328 int i; 329 330 for (i = 0; i < c->size; i++) { 331 if (c->entries[i].table == *table) { 332 goto found; 333 } 334 } 335 return -ENOENT; 336 337 found: 338 c->entries[i].ref--; 339 *table = NULL; 340 341 assert(c->entries[i].ref >= 0); 342 return 0; 343 } 344 345 void qcow2_cache_entry_mark_dirty(Qcow2Cache *c, void *table) 346 { 347 int i; 348 349 for (i = 0; i < c->size; i++) { 350 if (c->entries[i].table == table) { 351 goto found; 352 } 353 } 354 abort(); 355 356 found: 357 c->entries[i].dirty = true; 358 } 359