1 #include <limits.h> 2 #include <sys/types.h> 3 #include <sys/stat.h> 4 #include "basefs_allocator.h" 5 #include "block_range.h" 6 #include "hashmap.h" 7 #include "base_fs.h" 8 9 struct base_fs_allocator { 10 struct ext2fs_hashmap *entries; 11 struct basefs_entry *cur_entry; basefs_open(const char * file)12 /* The next expected logical block to allocate for cur_entry. */ 13 blk64_t next_lblk; 14 /* Blocks which are definitely owned by a single inode in BaseFS. */ 15 ext2fs_block_bitmap exclusive_block_map; 16 /* Blocks which are available to the first inode that requests it. */ 17 ext2fs_block_bitmap dedup_block_map; 18 }; 19 20 static errcode_t basefs_block_allocator(ext2_filsys, blk64_t, blk64_t *, 21 struct blk_alloc_ctx *ctx); 22 23 /* 24 * Free any reserved, but unconsumed block ranges in the allocator. This both 25 * frees the block_range_list data structure and unreserves exclusive blocks 26 * from the block map. 27 */ 28 static void fs_free_blocks_range(ext2_filsys fs, 29 struct base_fs_allocator *allocator, 30 struct block_range_list *list) 31 { 32 ext2fs_block_bitmap exclusive_map = allocator->exclusive_block_map; 33 34 blk64_t block; 35 while (list->head) { basefs_readline(FILE * f,const char * mountpoint,int * err)36 block = consume_next_block(list); 37 if (ext2fs_test_block_bitmap2(exclusive_map, block)) { 38 ext2fs_unmark_block_bitmap2(fs->block_map, block); 39 ext2fs_unmark_block_bitmap2(exclusive_map, block); 40 } 41 } 42 } 43 44 /* 45 * Free any blocks in the bitmap that were reserved but never used. This is 46 * needed to free dedup_block_map and ensure the free block bitmap is 47 * internally consistent. 48 */ 49 static void fs_free_blocks_bitmap(ext2_filsys fs, ext2fs_block_bitmap bitmap) 50 { 51 blk64_t block = 0; 52 blk64_t start = fs->super->s_first_data_block; 53 blk64_t end = ext2fs_blocks_count(fs->super) - 1; 54 errcode_t retval; 55 56 for (;;) { 57 retval = ext2fs_find_first_set_block_bitmap2(bitmap, start, end, 58 &block); 59 if (retval) 60 break; 61 ext2fs_unmark_block_bitmap2(fs->block_map, block); 62 start = block + 1; 63 } 64 } 65 66 static void basefs_allocator_free(ext2_filsys fs, 67 struct base_fs_allocator *allocator) 68 { 69 struct basefs_entry *e; 70 struct ext2fs_hashmap_entry *it = NULL; 71 struct ext2fs_hashmap *entries = allocator->entries; 72 73 if (entries) { 74 while ((e = ext2fs_hashmap_iter_in_order(entries, &it))) { 75 fs_free_blocks_range(fs, allocator, &e->blocks); 76 delete_block_ranges(&e->blocks); 77 } 78 ext2fs_hashmap_free(entries); 79 } 80 fs_free_blocks_bitmap(fs, allocator->dedup_block_map); 81 ext2fs_free_block_bitmap(allocator->exclusive_block_map); 82 ext2fs_free_block_bitmap(allocator->dedup_block_map); 83 free(allocator); 84 } 85 86 /* 87 * Build a bitmap of which blocks are definitely owned by exactly one file in 88 * Base FS. Blocks which are not valid or are de-duplicated are skipped. This 89 * is called during allocator initialization, to ensure that libext2fs does 90 * not allocate which we want to re-use. 91 * free_base_fs_entry(void * e)92 * If a block was allocated in the initial filesystem, it can never be re-used, 93 * so it will appear in neither the exclusive or dedup set. If a block is used 94 * by multiple files, it will be removed from the owned set and instead added 95 * to the dedup set. 96 * 97 * The dedup set is not removed from fs->block_map. This allows us to re-use 98 * dedup blocks separately and not have them be allocated outside of file data. 99 * 100 * This function returns non-zero if the block was owned, and 0 otherwise. 101 */ 102 static int fs_reserve_block(ext2_filsys fs, 103 struct base_fs_allocator *allocator, 104 blk64_t block) 105 { 106 ext2fs_block_bitmap exclusive_map = allocator->exclusive_block_map; 107 ext2fs_block_bitmap dedup_map = allocator->dedup_block_map; 108 109 if (block >= ext2fs_blocks_count(fs->super)) 110 return 0; 111 112 if (ext2fs_test_block_bitmap2(fs->block_map, block)) { 113 if (!ext2fs_test_block_bitmap2(exclusive_map, block)) 114 return 0; 115 ext2fs_unmark_block_bitmap2(exclusive_map, block); 116 ext2fs_mark_block_bitmap2(dedup_map, block); 117 return 0; 118 } else { 119 ext2fs_mark_block_bitmap2(fs->block_map, block); 120 ext2fs_mark_block_bitmap2(exclusive_map, block); 121 return 1; 122 } 123 } 124 125 /* 126 * Walk the requested block list and reserve blocks, either into the owned 127 * pool or the dedup pool as appropriate. We stop once the file has enough 128 * owned blocks to satisfy |file_size|. This allows any extra blocks to be 129 * re-used, since otherwise a large block movement between files could 130 * trigger block allocation errors. 131 */ 132 static void fs_reserve_blocks_range(ext2_filsys fs, init(const char * file,const char * mountpoint)133 struct base_fs_allocator *allocator, 134 struct block_range_list *list, 135 off_t file_size) 136 { 137 blk64_t block; 138 off_t blocks_needed; 139 off_t blocks_acquired = 0; 140 struct block_range *blocks = list->head; 141 142 blocks_needed = file_size + (fs->blocksize - 1); 143 blocks_needed /= fs->blocksize; 144 145 while (blocks) { 146 for (block = blocks->start; block <= blocks->end; block++) { 147 if (fs_reserve_block(fs, allocator, block)) 148 blocks_acquired++; 149 if (blocks_acquired >= blocks_needed) 150 return; 151 } 152 blocks = blocks->next; 153 } 154 } 155 156 /* 157 * For each file in the base FS map, ensure that its blocks are reserved in 158 * the actual block map. This prevents libext2fs from allocating them for 159 * general purpose use, and ensures that if the file needs data blocks, they 160 * can be re-acquired exclusively for that file. 161 * 162 * If a file in the base map is missing, or not a regular file in the new add_block(ext2_filsys fs EXT2FS_ATTR ((unused)),blk64_t blocknr,int metadata,void * data)163 * filesystem, then it's skipped to ensure that its blocks are reusable. 164 */ 165 static errcode_t fs_reserve_blocks(ext2_filsys fs, 166 struct base_fs_allocator *allocator, 167 const char *src_dir) 168 { 169 int nbytes; 170 char full_path[PATH_MAX]; 171 const char *sep = "/"; 172 struct stat st; 173 struct basefs_entry *e; 174 struct ext2fs_hashmap_entry *it = NULL; 175 struct ext2fs_hashmap *entries = allocator->entries; 176 177 if (strlen(src_dir) && src_dir[strlen(src_dir) - 1] == '/') 178 sep = ""; 179 180 while ((e = ext2fs_hashmap_iter_in_order(entries, &it))) { 181 nbytes = snprintf(full_path, sizeof(full_path), "%s%s%s", 182 src_dir, sep, e->path); 183 if (nbytes >= sizeof(full_path)) 184 return ENAMETOOLONG; 185 if (lstat(full_path, &st) || !S_ISREG(st.st_mode)) 186 continue; 187 fs_reserve_blocks_range(fs, allocator, &e->blocks, st.st_size); 188 } 189 return 0; 190 } 191 192 errcode_t base_fs_alloc_load(ext2_filsys fs, const char *file, 193 const char *mountpoint, const char *src_dir) 194 { cleanup(void * data)195 errcode_t retval = 0; 196 struct base_fs_allocator *allocator; 197 198 allocator = calloc(1, sizeof(*allocator)); 199 if (!allocator) { 200 retval = ENOMEM; 201 goto out; 202 } 203 204 retval = ext2fs_read_bitmaps(fs); 205 if (retval) 206 goto err_load; 207 208 allocator->cur_entry = NULL; 209 allocator->entries = basefs_parse(file, mountpoint); 210 if (!allocator->entries) { 211 retval = EIO; 212 goto err_load; 213 } 214 retval = ext2fs_allocate_block_bitmap(fs, "exclusive map", 215 &allocator->exclusive_block_map); 216 if (retval) 217 goto err_load; 218 retval = ext2fs_allocate_block_bitmap(fs, "dedup map", 219 &allocator->dedup_block_map); 220 if (retval) 221 goto err_load; 222 223 retval = fs_reserve_blocks(fs, allocator, src_dir); 224 if (retval) 225 goto err_load; 226 227 /* Override the default allocator */ 228 fs->get_alloc_block2 = basefs_block_allocator; 229 fs->priv_data = allocator; 230 231 goto out; 232 233 err_load: 234 basefs_allocator_free(fs, allocator); 235 out: 236 return retval; 237 } 238 239 /* Try and acquire the next usable block from the Base FS map. */ 240 static errcode_t get_next_block(ext2_filsys fs, struct base_fs_allocator *allocator, 241 struct block_range_list* list, blk64_t *ret) 242 { 243 blk64_t block; 244 ext2fs_block_bitmap exclusive_map = allocator->exclusive_block_map; 245 ext2fs_block_bitmap dedup_map = allocator->dedup_block_map; 246 247 if (!list->head) 248 return EXT2_ET_BLOCK_ALLOC_FAIL; 249 250 block = consume_next_block(list); 251 if (block >= ext2fs_blocks_count(fs->super)) 252 return EXT2_ET_BLOCK_ALLOC_FAIL; 253 if (ext2fs_test_block_bitmap2(exclusive_map, block)) { 254 ext2fs_unmark_block_bitmap2(exclusive_map, block); 255 *ret = block; 256 return 0; 257 } 258 if (ext2fs_test_block_bitmap2(dedup_map, block)) { 259 ext2fs_unmark_block_bitmap2(dedup_map, block); 260 *ret = block; 261 return 0; 262 } 263 return EXT2_ET_BLOCK_ALLOC_FAIL; 264 } 265 266 /* 267 * BaseFS lists blocks in logical block order. However, the allocator hook is 268 * only called if a block needs to be allocated. In the case of a deduplicated 269 * block, or a hole, the hook is not invoked. This means the next block 270 * allocation request will be out of sequence. For example, consider if BaseFS 271 * specifies the following (0 being a hole): 272 * 1 2 3 0 4 5 273 * 274 * If the new file has a hole at logical block 0, we could accidentally 275 * shift the entire expected block list as follows: 276 * 0 1 2 0 3 4 277 * 278 * To account for this, we track the next expected logical block in the 279 * allocator. If the current request is for a later logical block, we skip and 280 * free the intermediate physical blocks that would have been allocated. This 281 * ensures the original block assignment is respected. 282 */ 283 static void skip_blocks(ext2_filsys fs, struct base_fs_allocator *allocator, 284 struct blk_alloc_ctx *ctx) 285 { 286 blk64_t block; 287 struct block_range_list *list = &allocator->cur_entry->blocks; 288 ext2fs_block_bitmap exclusive_map = allocator->exclusive_block_map; 289 290 while (list->head && allocator->next_lblk < ctx->lblk) { 291 block = consume_next_block(list); 292 if (block >= ext2fs_blocks_count(fs->super)) 293 continue; 294 if (ext2fs_test_block_bitmap2(exclusive_map, block)) { 295 ext2fs_unmark_block_bitmap2(exclusive_map, block); 296 ext2fs_unmark_block_bitmap2(fs->block_map, block); 297 } 298 allocator->next_lblk++; 299 } 300 } 301 302 static errcode_t basefs_block_allocator(ext2_filsys fs, blk64_t goal, 303 blk64_t *ret, struct blk_alloc_ctx *ctx) 304 { 305 errcode_t retval; 306 struct base_fs_allocator *allocator = fs->priv_data; 307 struct basefs_entry *e = allocator->cur_entry; 308 ext2fs_block_bitmap dedup_map = allocator->dedup_block_map; 309 310 if (e && ctx && (ctx->flags & BLOCK_ALLOC_DATA)) { 311 if (allocator->next_lblk < ctx->lblk) 312 skip_blocks(fs, allocator, ctx); 313 allocator->next_lblk = ctx->lblk + 1; 314 315 if (!get_next_block(fs, allocator, &e->blocks, ret)) 316 return 0; 317 } 318 319 retval = ext2fs_new_block2(fs, goal, fs->block_map, ret); 320 if (!retval) { 321 ext2fs_mark_block_bitmap2(fs->block_map, *ret); 322 return 0; 323 } 324 if (retval != EXT2_ET_BLOCK_ALLOC_FAIL) 325 return retval; 326 327 /* Try to steal a block from the dedup pool. */ 328 retval = ext2fs_find_first_set_block_bitmap2(dedup_map, 329 fs->super->s_first_data_block, 330 ext2fs_blocks_count(fs->super) - 1, ret); 331 if (!retval) { 332 ext2fs_unmark_block_bitmap2(dedup_map, *ret); 333 return 0; 334 } 335 336 /* 337 * As a last resort, take any block from our file's list. This 338 * risks bloating the diff, but means we are more likely to 339 * successfully build an image. 340 */ 341 while (e->blocks.head) { 342 if (!get_next_block(fs, allocator, &e->blocks, ret)) 343 return 0; 344 } 345 return EXT2_ET_BLOCK_ALLOC_FAIL; 346 } 347 348 void base_fs_alloc_cleanup(ext2_filsys fs) 349 { 350 basefs_allocator_free(fs, fs->priv_data); 351 fs->priv_data = NULL; 352 fs->get_alloc_block2 = NULL; 353 } 354 355 errcode_t base_fs_alloc_set_target(ext2_filsys fs, const char *target_path, 356 const char *name EXT2FS_ATTR((unused)), 357 ext2_ino_t parent_ino EXT2FS_ATTR((unused)), 358 ext2_ino_t root EXT2FS_ATTR((unused)), mode_t mode) 359 { 360 struct base_fs_allocator *allocator = fs->priv_data; 361 362 if (mode != S_IFREG) 363 return 0; 364 365 if (allocator) { 366 allocator->cur_entry = ext2fs_hashmap_lookup(allocator->entries, 367 target_path, 368 strlen(target_path)); 369 allocator->next_lblk = 0; 370 } 371 return 0; 372 } 373 374 errcode_t base_fs_alloc_unset_target(ext2_filsys fs, 375 const char *target_path EXT2FS_ATTR((unused)), 376 const char *name EXT2FS_ATTR((unused)), 377 ext2_ino_t parent_ino EXT2FS_ATTR((unused)), 378 ext2_ino_t root EXT2FS_ATTR((unused)), mode_t mode) 379 { 380 struct base_fs_allocator *allocator = fs->priv_data; 381 382 if (!allocator || !allocator->cur_entry || mode != S_IFREG) 383 return 0; 384 385 fs_free_blocks_range(fs, allocator, &allocator->cur_entry->blocks); 386 delete_block_ranges(&allocator->cur_entry->blocks); 387 return 0; 388 } 389