1 #include <limits.h>
2 #include <sys/types.h>
3 #include <sys/stat.h>
4 #include "basefs_allocator.h"
5 #include "block_range.h"
6 #include "hashmap.h"
7 #include "base_fs.h"
8 
9 struct base_fs_allocator {
10 	struct ext2fs_hashmap *entries;
11 	struct basefs_entry *cur_entry;
basefs_open(const char * file)12 	/* The next expected logical block to allocate for cur_entry. */
13 	blk64_t next_lblk;
14 	/* Blocks which are definitely owned by a single inode in BaseFS. */
15 	ext2fs_block_bitmap exclusive_block_map;
16 	/* Blocks which are available to the first inode that requests it. */
17 	ext2fs_block_bitmap dedup_block_map;
18 };
19 
20 static errcode_t basefs_block_allocator(ext2_filsys, blk64_t, blk64_t *,
21 					struct blk_alloc_ctx *ctx);
22 
23 /*
24  * Free any reserved, but unconsumed block ranges in the allocator. This both
25  * frees the block_range_list data structure and unreserves exclusive blocks
26  * from the block map.
27  */
28 static void fs_free_blocks_range(ext2_filsys fs,
29 				 struct base_fs_allocator *allocator,
30 				 struct block_range_list *list)
31 {
32 	ext2fs_block_bitmap exclusive_map = allocator->exclusive_block_map;
33 
34 	blk64_t block;
35 	while (list->head) {
basefs_readline(FILE * f,const char * mountpoint,int * err)36 		block = consume_next_block(list);
37 		if (ext2fs_test_block_bitmap2(exclusive_map, block)) {
38 			ext2fs_unmark_block_bitmap2(fs->block_map, block);
39 			ext2fs_unmark_block_bitmap2(exclusive_map, block);
40 		}
41 	}
42 }
43 
44 /*
45  * Free any blocks in the bitmap that were reserved but never used. This is
46  * needed to free dedup_block_map and ensure the free block bitmap is
47  * internally consistent.
48  */
49 static void fs_free_blocks_bitmap(ext2_filsys fs, ext2fs_block_bitmap bitmap)
50 {
51 	blk64_t block = 0;
52 	blk64_t start = fs->super->s_first_data_block;
53 	blk64_t end = ext2fs_blocks_count(fs->super) - 1;
54 	errcode_t retval;
55 
56 	for (;;) {
57 		retval = ext2fs_find_first_set_block_bitmap2(bitmap, start, end,
58 			&block);
59 		if (retval)
60 			break;
61 		ext2fs_unmark_block_bitmap2(fs->block_map, block);
62 		start = block + 1;
63 	}
64 }
65 
66 static void basefs_allocator_free(ext2_filsys fs,
67 				  struct base_fs_allocator *allocator)
68 {
69 	struct basefs_entry *e;
70 	struct ext2fs_hashmap_entry *it = NULL;
71 	struct ext2fs_hashmap *entries = allocator->entries;
72 
73 	if (entries) {
74 		while ((e = ext2fs_hashmap_iter_in_order(entries, &it))) {
75 			fs_free_blocks_range(fs, allocator, &e->blocks);
76 			delete_block_ranges(&e->blocks);
77 		}
78 		ext2fs_hashmap_free(entries);
79 	}
80 	fs_free_blocks_bitmap(fs, allocator->dedup_block_map);
81 	ext2fs_free_block_bitmap(allocator->exclusive_block_map);
82 	ext2fs_free_block_bitmap(allocator->dedup_block_map);
83 	free(allocator);
84 }
85 
86 /*
87  * Build a bitmap of which blocks are definitely owned by exactly one file in
88  * Base FS. Blocks which are not valid or are de-duplicated are skipped. This
89  * is called during allocator initialization, to ensure that libext2fs does
90  * not allocate which we want to re-use.
91  *
free_base_fs_entry(void * e)92  * If a block was allocated in the initial filesystem, it can never be re-used,
93  * so it will appear in neither the exclusive or dedup set. If a block is used
94  * by multiple files, it will be removed from the owned set and instead added
95  * to the dedup set.
96  *
97  * The dedup set is not removed from fs->block_map. This allows us to re-use
98  * dedup blocks separately and not have them be allocated outside of file data.
99  *
100  * This function returns non-zero if the block was owned, and 0 otherwise.
101  */
102 static int fs_reserve_block(ext2_filsys fs,
103 			    struct base_fs_allocator *allocator,
104 			    blk64_t block)
105 {
106 	ext2fs_block_bitmap exclusive_map = allocator->exclusive_block_map;
107 	ext2fs_block_bitmap dedup_map = allocator->dedup_block_map;
108 
109 	if (block >= ext2fs_blocks_count(fs->super))
110 		return 0;
111 
112 	if (ext2fs_test_block_bitmap2(fs->block_map, block)) {
113 		if (!ext2fs_test_block_bitmap2(exclusive_map, block))
114 			return 0;
115 		ext2fs_unmark_block_bitmap2(exclusive_map, block);
116 		ext2fs_mark_block_bitmap2(dedup_map, block);
117 		return 0;
118 	} else {
119 		ext2fs_mark_block_bitmap2(fs->block_map, block);
120 		ext2fs_mark_block_bitmap2(exclusive_map, block);
121 		return 1;
122 	}
123 }
124 
125 /*
126  * Walk the requested block list and reserve blocks, either into the owned
127  * pool or the dedup pool as appropriate. We stop once the file has enough
128  * owned blocks to satisfy |file_size|. This allows any extra blocks to be
129  * re-used, since otherwise a large block movement between files could
130  * trigger block allocation errors.
131  */
132 static void fs_reserve_blocks_range(ext2_filsys fs,
init(const char * file,const char * mountpoint)133 				    struct base_fs_allocator *allocator,
134 				    struct block_range_list *list,
135 				    off_t file_size)
136 {
137 	blk64_t block;
138 	off_t blocks_needed;
139 	off_t blocks_acquired = 0;
140 	struct block_range *blocks = list->head;
141 
142 	blocks_needed = file_size + (fs->blocksize - 1);
143 	blocks_needed /= fs->blocksize;
144 
145 	while (blocks) {
146 		for (block = blocks->start; block <= blocks->end; block++) {
147 			if (fs_reserve_block(fs, allocator, block))
148 				blocks_acquired++;
149 			if (blocks_acquired >= blocks_needed)
150 				return;
151 		}
152 		blocks = blocks->next;
153 	}
154 }
155 
156 /*
157  * For each file in the base FS map, ensure that its blocks are reserved in
158  * the actual block map. This prevents libext2fs from allocating them for
159  * general purpose use, and ensures that if the file needs data blocks, they
160  * can be re-acquired exclusively for that file.
161  *
162  * If a file in the base map is missing, or not a regular file in the new
add_block(ext2_filsys fs EXT2FS_ATTR ((unused)),blk64_t blocknr,int metadata,void * data)163  * filesystem, then it's skipped to ensure that its blocks are reusable.
164  */
165 static errcode_t fs_reserve_blocks(ext2_filsys fs,
166 			      struct base_fs_allocator *allocator,
167 			      const char *src_dir)
168 {
169 	int nbytes;
170 	char full_path[PATH_MAX];
171 	const char *sep = "/";
172 	struct stat st;
173 	struct basefs_entry *e;
174 	struct ext2fs_hashmap_entry *it = NULL;
175 	struct ext2fs_hashmap *entries = allocator->entries;
176 
177 	if (strlen(src_dir) && src_dir[strlen(src_dir) - 1] == '/')
178 		sep = "";
179 
180 	while ((e = ext2fs_hashmap_iter_in_order(entries, &it))) {
181 		nbytes = snprintf(full_path, sizeof(full_path), "%s%s%s",
182 			src_dir, sep, e->path);
183 		if (nbytes >= sizeof(full_path))
184 			return ENAMETOOLONG;
185 		if (lstat(full_path, &st) || !S_ISREG(st.st_mode))
186 			continue;
187 		fs_reserve_blocks_range(fs, allocator, &e->blocks, st.st_size);
188 	}
189 	return 0;
190 }
191 
192 errcode_t base_fs_alloc_load(ext2_filsys fs, const char *file,
193 			     const char *mountpoint, const char *src_dir)
194 {
cleanup(void * data)195 	errcode_t retval = 0;
196 	struct base_fs_allocator *allocator;
197 
198 	allocator = calloc(1, sizeof(*allocator));
199 	if (!allocator) {
200 		retval = ENOMEM;
201 		goto out;
202 	}
203 
204 	retval = ext2fs_read_bitmaps(fs);
205 	if (retval)
206 		goto err_load;
207 
208 	allocator->cur_entry = NULL;
209 	allocator->entries = basefs_parse(file, mountpoint);
210 	if (!allocator->entries) {
211 		retval = EIO;
212 		goto err_load;
213 	}
214 	retval = ext2fs_allocate_block_bitmap(fs, "exclusive map",
215 		&allocator->exclusive_block_map);
216 	if (retval)
217 		goto err_load;
218 	retval = ext2fs_allocate_block_bitmap(fs, "dedup map",
219 		&allocator->dedup_block_map);
220 	if (retval)
221 		goto err_load;
222 
223 	retval = fs_reserve_blocks(fs, allocator, src_dir);
224 	if (retval)
225 		goto err_load;
226 
227 	/* Override the default allocator */
228 	fs->get_alloc_block2 = basefs_block_allocator;
229 	fs->priv_data = allocator;
230 
231 	goto out;
232 
233 err_load:
234 	basefs_allocator_free(fs, allocator);
235 out:
236 	return retval;
237 }
238 
239 /* Try and acquire the next usable block from the Base FS map. */
240 static errcode_t get_next_block(ext2_filsys fs, struct base_fs_allocator *allocator,
241 				struct block_range_list* list, blk64_t *ret)
242 {
243 	blk64_t block;
244 	ext2fs_block_bitmap exclusive_map = allocator->exclusive_block_map;
245 	ext2fs_block_bitmap dedup_map = allocator->dedup_block_map;
246 
247 	if (!list->head)
248 		return EXT2_ET_BLOCK_ALLOC_FAIL;
249 
250 	block = consume_next_block(list);
251 	if (block >= ext2fs_blocks_count(fs->super))
252 		return EXT2_ET_BLOCK_ALLOC_FAIL;
253 	if (ext2fs_test_block_bitmap2(exclusive_map, block)) {
254 		ext2fs_unmark_block_bitmap2(exclusive_map, block);
255 		*ret = block;
256 		return 0;
257 	}
258 	if (ext2fs_test_block_bitmap2(dedup_map, block)) {
259 		ext2fs_unmark_block_bitmap2(dedup_map, block);
260 		*ret = block;
261 		return 0;
262 	}
263 	return EXT2_ET_BLOCK_ALLOC_FAIL;
264 }
265 
266 /*
267  * BaseFS lists blocks in logical block order. However, the allocator hook is
268  * only called if a block needs to be allocated. In the case of a deduplicated
269  * block, or a hole, the hook is not invoked. This means the next block
270  * allocation request will be out of sequence. For example, consider if BaseFS
271  * specifies the following (0 being a hole):
272  *     1 2 3 0 4 5
273  *
274  * If the new file has a hole at logical block 0, we could accidentally
275  * shift the entire expected block list as follows:
276  *     0 1 2 0 3 4
277  *
278  * To account for this, we track the next expected logical block in the
279  * allocator. If the current request is for a later logical block, we skip and
280  * free the intermediate physical blocks that would have been allocated. This
281  * ensures the original block assignment is respected.
282  */
283 static void skip_blocks(ext2_filsys fs, struct base_fs_allocator *allocator,
284 			struct blk_alloc_ctx *ctx)
285 {
286 	blk64_t block;
287 	struct block_range_list *list = &allocator->cur_entry->blocks;
288 	ext2fs_block_bitmap exclusive_map = allocator->exclusive_block_map;
289 
290 	while (list->head && allocator->next_lblk < ctx->lblk) {
291 		block = consume_next_block(list);
292 		if (block >= ext2fs_blocks_count(fs->super))
293 			continue;
294 		if (ext2fs_test_block_bitmap2(exclusive_map, block)) {
295 			ext2fs_unmark_block_bitmap2(exclusive_map, block);
296 			ext2fs_unmark_block_bitmap2(fs->block_map, block);
297 		}
298 		allocator->next_lblk++;
299 	}
300 }
301 
302 static errcode_t basefs_block_allocator(ext2_filsys fs, blk64_t goal,
303 					blk64_t *ret, struct blk_alloc_ctx *ctx)
304 {
305 	errcode_t retval;
306 	struct base_fs_allocator *allocator = fs->priv_data;
307 	struct basefs_entry *e = allocator->cur_entry;
308 	ext2fs_block_bitmap dedup_map = allocator->dedup_block_map;
309 
310 	if (e && ctx && (ctx->flags & BLOCK_ALLOC_DATA)) {
311 		if (allocator->next_lblk < ctx->lblk)
312 			skip_blocks(fs, allocator, ctx);
313 		allocator->next_lblk = ctx->lblk + 1;
314 
315 		if (!get_next_block(fs, allocator, &e->blocks, ret))
316 			return 0;
317 	}
318 
319 	retval = ext2fs_new_block2(fs, goal, fs->block_map, ret);
320 	if (!retval) {
321 		ext2fs_mark_block_bitmap2(fs->block_map, *ret);
322 		return 0;
323 	}
324 	if (retval != EXT2_ET_BLOCK_ALLOC_FAIL)
325 		return retval;
326 
327 	/* Try to steal a block from the dedup pool. */
328 	retval = ext2fs_find_first_set_block_bitmap2(dedup_map,
329 		fs->super->s_first_data_block,
330 		ext2fs_blocks_count(fs->super) - 1, ret);
331 	if (!retval) {
332 		ext2fs_unmark_block_bitmap2(dedup_map, *ret);
333 		return 0;
334 	}
335 
336 	/*
337 	 * As a last resort, take any block from our file's list. This
338 	 * risks bloating the diff, but means we are more likely to
339 	 * successfully build an image.
340 	 */
341 	while (e->blocks.head) {
342 		if (!get_next_block(fs, allocator, &e->blocks, ret))
343 			return 0;
344 	}
345 	return EXT2_ET_BLOCK_ALLOC_FAIL;
346 }
347 
348 void base_fs_alloc_cleanup(ext2_filsys fs)
349 {
350 	basefs_allocator_free(fs, fs->priv_data);
351 	fs->priv_data = NULL;
352 	fs->get_alloc_block2 = NULL;
353 }
354 
355 errcode_t base_fs_alloc_set_target(ext2_filsys fs, const char *target_path,
356 	const char *name EXT2FS_ATTR((unused)),
357 	ext2_ino_t parent_ino EXT2FS_ATTR((unused)),
358 	ext2_ino_t root EXT2FS_ATTR((unused)), mode_t mode)
359 {
360 	struct base_fs_allocator *allocator = fs->priv_data;
361 
362 	if (mode != S_IFREG)
363 		return 0;
364 
365 	if (allocator) {
366 		allocator->cur_entry = ext2fs_hashmap_lookup(allocator->entries,
367 						      target_path,
368 						      strlen(target_path));
369 		allocator->next_lblk = 0;
370 	}
371 	return 0;
372 }
373 
374 errcode_t base_fs_alloc_unset_target(ext2_filsys fs,
375         const char *target_path EXT2FS_ATTR((unused)),
376 	const char *name EXT2FS_ATTR((unused)),
377 	ext2_ino_t parent_ino EXT2FS_ATTR((unused)),
378 	ext2_ino_t root EXT2FS_ATTR((unused)), mode_t mode)
379 {
380 	struct base_fs_allocator *allocator = fs->priv_data;
381 
382 	if (!allocator || !allocator->cur_entry || mode != S_IFREG)
383 		return 0;
384 
385 	fs_free_blocks_range(fs, allocator, &allocator->cur_entry->blocks);
386 	delete_block_ranges(&allocator->cur_entry->blocks);
387 	return 0;
388 }
389