1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3  * BTRFS filesystem implementation for U-Boot
4  *
5  * 2017 Marek Behun, CZ.NIC, marek.behun@nic.cz
6  */
7 
8 #include <linux/kernel.h>
9 #include <malloc.h>
10 #include <memalign.h>
11 #include "btrfs.h"
12 #include "disk-io.h"
13 #include "volumes.h"
14 
15 /*
16  * Read the content of symlink inode @ino of @root, into @target.
17  * NOTE: @target will not be \0 termiated, caller should handle it properly.
18  *
19  * Return the number of read data.
20  * Return <0 for error.
21  */
btrfs_readlink(struct btrfs_root * root,u64 ino,char * target)22 int btrfs_readlink(struct btrfs_root *root, u64 ino, char *target)
23 {
24 	struct btrfs_path path;
25 	struct btrfs_key key;
26 	struct btrfs_file_extent_item *fi;
27 	int ret;
28 
29 	key.objectid = ino;
30 	key.type = BTRFS_EXTENT_DATA_KEY;
31 	key.offset = 0;
32 	btrfs_init_path(&path);
33 
34 	ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
35 	if (ret < 0)
36 		return ret;
37 	if (ret > 0) {
38 		ret = -ENOENT;
39 		goto out;
40 	}
41 	fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
42 			    struct btrfs_file_extent_item);
43 	if (btrfs_file_extent_type(path.nodes[0], fi) !=
44 	    BTRFS_FILE_EXTENT_INLINE) {
45 		ret = -EUCLEAN;
46 		error("Extent for symlink %llu must be INLINE type!", ino);
47 		goto out;
48 	}
49 	if (btrfs_file_extent_compression(path.nodes[0], fi) !=
50 	    BTRFS_COMPRESS_NONE) {
51 		ret = -EUCLEAN;
52 		error("Extent for symlink %llu must not be compressed!", ino);
53 		goto out;
54 	}
55 	if (btrfs_file_extent_ram_bytes(path.nodes[0], fi) >=
56 	    root->fs_info->sectorsize) {
57 		ret = -EUCLEAN;
58 		error("Symlink %llu extent data too large (%llu)!\n",
59 			ino, btrfs_file_extent_ram_bytes(path.nodes[0], fi));
60 		goto out;
61 	}
62 	read_extent_buffer(path.nodes[0], target,
63 			btrfs_file_extent_inline_start(fi),
64 			btrfs_file_extent_ram_bytes(path.nodes[0], fi));
65 	ret = btrfs_file_extent_ram_bytes(path.nodes[0], fi);
66 out:
67 	btrfs_release_path(&path);
68 	return ret;
69 }
70 
lookup_root_ref(struct btrfs_fs_info * fs_info,u64 rootid,u64 * root_ret,u64 * dir_ret)71 static int lookup_root_ref(struct btrfs_fs_info *fs_info,
72 			   u64 rootid, u64 *root_ret, u64 *dir_ret)
73 {
74 	struct btrfs_root *root = fs_info->tree_root;
75 	struct btrfs_root_ref *root_ref;
76 	struct btrfs_path path;
77 	struct btrfs_key key;
78 	int ret;
79 
80 	btrfs_init_path(&path);
81 	key.objectid = rootid;
82 	key.type = BTRFS_ROOT_BACKREF_KEY;
83 	key.offset = (u64)-1;
84 
85 	ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
86 	if (ret < 0)
87 		return ret;
88 	/* Should not happen */
89 	if (ret == 0) {
90 		ret = -EUCLEAN;
91 		goto out;
92 	}
93 	ret = btrfs_previous_item(root, &path, rootid, BTRFS_ROOT_BACKREF_KEY);
94 	if (ret < 0)
95 		goto out;
96 	if (ret > 0) {
97 		ret = -ENOENT;
98 		goto out;
99 	}
100 	btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
101 	root_ref = btrfs_item_ptr(path.nodes[0], path.slots[0],
102 				  struct btrfs_root_ref);
103 	*root_ret = key.offset;
104 	*dir_ret = btrfs_root_ref_dirid(path.nodes[0], root_ref);
105 out:
106 	btrfs_release_path(&path);
107 	return ret;
108 }
109 
110 /*
111  * To get the parent inode of @ino of @root.
112  *
113  * @root_ret and @ino_ret will be filled.
114  *
115  * NOTE: This function is not reliable. It can only get one parent inode.
116  * The get the proper parent inode, we need a full VFS inodes stack to
117  * resolve properly.
118  */
get_parent_inode(struct btrfs_root * root,u64 ino,struct btrfs_root ** root_ret,u64 * ino_ret)119 static int get_parent_inode(struct btrfs_root *root, u64 ino,
120 			    struct btrfs_root **root_ret, u64 *ino_ret)
121 {
122 	struct btrfs_fs_info *fs_info = root->fs_info;
123 	struct btrfs_path path;
124 	struct btrfs_key key;
125 	int ret;
126 
127 	if (ino == BTRFS_FIRST_FREE_OBJECTID) {
128 		u64 parent_root = -1;
129 
130 		/* It's top level already, no more parent */
131 		if (root->root_key.objectid == BTRFS_FS_TREE_OBJECTID) {
132 			*root_ret = fs_info->fs_root;
133 			*ino_ret = BTRFS_FIRST_FREE_OBJECTID;
134 			return 0;
135 		}
136 
137 		ret = lookup_root_ref(fs_info, root->root_key.objectid,
138 				      &parent_root, ino_ret);
139 		if (ret < 0)
140 			return ret;
141 
142 		key.objectid = parent_root;
143 		key.type = BTRFS_ROOT_ITEM_KEY;
144 		key.offset = (u64)-1;
145 		*root_ret = btrfs_read_fs_root(fs_info, &key);
146 		if (IS_ERR(*root_ret))
147 			return PTR_ERR(*root_ret);
148 
149 		return 0;
150 	}
151 
152 	btrfs_init_path(&path);
153 	key.objectid = ino;
154 	key.type = BTRFS_INODE_REF_KEY;
155 	key.offset = (u64)-1;
156 
157 	ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
158 	if (ret < 0)
159 		return ret;
160 	/* Should not happen */
161 	if (ret == 0) {
162 		ret = -EUCLEAN;
163 		goto out;
164 	}
165 	ret = btrfs_previous_item(root, &path, ino, BTRFS_INODE_REF_KEY);
166 	if (ret < 0)
167 		goto out;
168 	if (ret > 0) {
169 		ret = -ENOENT;
170 		goto out;
171 	}
172 	btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
173 	*root_ret = root;
174 	*ino_ret = key.offset;
175 out:
176 	btrfs_release_path(&path);
177 	return ret;
178 }
179 
next_length(const char * path)180 static inline int next_length(const char *path)
181 {
182 	int res = 0;
183 	while (*path != '\0' && *path != '/') {
184 		++res;
185 		++path;
186 		if (res > BTRFS_NAME_LEN)
187 			break;
188 	}
189 	return res;
190 }
191 
skip_current_directories(const char * cur)192 static inline const char *skip_current_directories(const char *cur)
193 {
194 	while (1) {
195 		if (cur[0] == '/')
196 			++cur;
197 		else if (cur[0] == '.' && cur[1] == '/')
198 			cur += 2;
199 		else
200 			break;
201 	}
202 
203 	return cur;
204 }
205 
206 /*
207  * Resolve one filename of @ino of @root.
208  *
209  * key_ret:	The child key (either INODE_ITEM or ROOT_ITEM type)
210  * type_ret:	BTRFS_FT_* of the child inode.
211  *
212  * Return 0 with above members filled.
213  * Return <0 for error.
214  */
resolve_one_filename(struct btrfs_root * root,u64 ino,const char * name,int namelen,struct btrfs_key * key_ret,u8 * type_ret)215 static int resolve_one_filename(struct btrfs_root *root, u64 ino,
216 				const char *name, int namelen,
217 				struct btrfs_key *key_ret, u8 *type_ret)
218 {
219 	struct btrfs_dir_item *dir_item;
220 	struct btrfs_path path;
221 	int ret = 0;
222 
223 	btrfs_init_path(&path);
224 
225 	dir_item = btrfs_lookup_dir_item(NULL, root, &path, ino, name,
226 					 namelen, 0);
227 	if (IS_ERR(dir_item)) {
228 		ret = PTR_ERR(dir_item);
229 		goto out;
230 	}
231 
232 	btrfs_dir_item_key_to_cpu(path.nodes[0], dir_item, key_ret);
233 	*type_ret = btrfs_dir_type(path.nodes[0], dir_item);
234 out:
235 	btrfs_release_path(&path);
236 	return ret;
237 }
238 
239 /*
240  * Resolve a full path @filename. The start point is @ino of @root.
241  *
242  * The result will be filled into @root_ret, @ino_ret and @type_ret.
243  */
btrfs_lookup_path(struct btrfs_root * root,u64 ino,const char * filename,struct btrfs_root ** root_ret,u64 * ino_ret,u8 * type_ret,int symlink_limit)244 int btrfs_lookup_path(struct btrfs_root *root, u64 ino, const char *filename,
245 			struct btrfs_root **root_ret, u64 *ino_ret,
246 			u8 *type_ret, int symlink_limit)
247 {
248 	struct btrfs_fs_info *fs_info = root->fs_info;
249 	struct btrfs_root *next_root;
250 	struct btrfs_key key;
251 	const char *cur = filename;
252 	u64 next_ino;
253 	u8 next_type;
254 	u8 type = BTRFS_FT_UNKNOWN;
255 	int len;
256 	int ret = 0;
257 
258 	/* If the path is absolute path, also search from fs root */
259 	if (*cur == '/') {
260 		root = fs_info->fs_root;
261 		ino = btrfs_root_dirid(&root->root_item);
262 		type = BTRFS_FT_DIR;
263 	}
264 
265 	while (*cur != '\0') {
266 		cur = skip_current_directories(cur);
267 
268 		len = next_length(cur);
269 		if (len > BTRFS_NAME_LEN) {
270 			error("%s: Name too long at \"%.*s\"", __func__,
271 			       BTRFS_NAME_LEN, cur);
272 			return -ENAMETOOLONG;
273 		}
274 
275 		if (len == 1 && cur[0] == '.')
276 			break;
277 
278 		if (len == 2 && cur[0] == '.' && cur[1] == '.') {
279 			/* Go one level up */
280 			ret = get_parent_inode(root, ino, &next_root, &next_ino);
281 			if (ret < 0)
282 				return ret;
283 			root = next_root;
284 			ino = next_ino;
285 			goto next;
286 		}
287 
288 		if (!*cur)
289 			break;
290 
291 		ret = resolve_one_filename(root, ino, cur, len, &key, &type);
292 		if (ret < 0)
293 			return ret;
294 
295 		if (key.type == BTRFS_ROOT_ITEM_KEY) {
296 			/* Child inode is a subvolume */
297 
298 			next_root = btrfs_read_fs_root(fs_info, &key);
299 			if (IS_ERR(next_root))
300 				return PTR_ERR(next_root);
301 			root = next_root;
302 			ino = btrfs_root_dirid(&root->root_item);
303 		} else if (type == BTRFS_FT_SYMLINK && symlink_limit >= 0) {
304 			/* Child inode is a symlink */
305 
306 			char *target;
307 
308 			if (symlink_limit == 0) {
309 				error("%s: Too much symlinks!", __func__);
310 				return -EMLINK;
311 			}
312 			target = malloc(fs_info->sectorsize);
313 			if (!target)
314 				return -ENOMEM;
315 			ret = btrfs_readlink(root, key.objectid, target);
316 			if (ret < 0) {
317 				free(target);
318 				return ret;
319 			}
320 			target[ret] = '\0';
321 
322 			ret = btrfs_lookup_path(root, ino, target, &next_root,
323 						&next_ino, &next_type,
324 						symlink_limit);
325 			if (ret < 0)
326 				return ret;
327 			root = next_root;
328 			ino = next_ino;
329 			type = next_type;
330 		} else {
331 			/* Child inode is an inode */
332 			ino = key.objectid;
333 		}
334 next:
335 		cur += len;
336 	}
337 
338 	/* We haven't found anything, but still get no error? */
339 	if (type == BTRFS_FT_UNKNOWN && !ret)
340 		ret = -EUCLEAN;
341 
342 	if (!ret) {
343 		*root_ret = root;
344 		*ino_ret = ino;
345 		*type_ret = type;
346 	}
347 
348 	return ret;
349 }
350 
351 /*
352  * Read out inline extent.
353  *
354  * Since inline extent should only exist for offset 0, no need for extra
355  * parameters.
356  * Truncating should be handled by the caller.
357  *
358  * Return the number of bytes read.
359  * Return <0 for error.
360  */
btrfs_read_extent_inline(struct btrfs_path * path,struct btrfs_file_extent_item * fi,char * dest)361 int btrfs_read_extent_inline(struct btrfs_path *path,
362 			     struct btrfs_file_extent_item *fi, char *dest)
363 {
364 	struct extent_buffer *leaf = path->nodes[0];
365 	int slot = path->slots[0];
366 	char *cbuf = NULL;
367 	char *dbuf = NULL;
368 	u32 csize;
369 	u32 dsize;
370 	int ret;
371 
372 	csize = btrfs_file_extent_inline_item_len(leaf, btrfs_item_nr(slot));
373 	if (btrfs_file_extent_compression(leaf, fi) == BTRFS_COMPRESS_NONE) {
374 		/* Uncompressed, just read it out */
375 		read_extent_buffer(leaf, dest,
376 				btrfs_file_extent_inline_start(fi),
377 				csize);
378 		return csize;
379 	}
380 
381 	/* Compressed extent, prepare the compressed and data buffer */
382 	dsize = btrfs_file_extent_ram_bytes(leaf, fi);
383 	cbuf = malloc(csize);
384 	dbuf = malloc(dsize);
385 	if (!cbuf || !dbuf) {
386 		ret = -ENOMEM;
387 		goto out;
388 	}
389 	read_extent_buffer(leaf, cbuf, btrfs_file_extent_inline_start(fi),
390 			   csize);
391 	ret = btrfs_decompress(btrfs_file_extent_compression(leaf, fi),
392 			       cbuf, csize, dbuf, dsize);
393 	if (ret == (u32)-1) {
394 		ret = -EIO;
395 		goto out;
396 	}
397 	/*
398 	 * The compressed part ends before sector boundary, the remaining needs
399 	 * to be zeroed out.
400 	 */
401 	if (ret < dsize)
402 		memset(dbuf + ret, 0, dsize - ret);
403 	memcpy(dest, dbuf, dsize);
404 	ret = dsize;
405 out:
406 	free(cbuf);
407 	free(dbuf);
408 	return ret;
409 }
410 
411 /*
412  * Read out regular extent.
413  *
414  * Truncating should be handled by the caller.
415  *
416  * @offset and @len should not cross the extent boundary.
417  * Return the number of bytes read.
418  * Return <0 for error.
419  */
btrfs_read_extent_reg(struct btrfs_path * path,struct btrfs_file_extent_item * fi,u64 offset,int len,char * dest)420 int btrfs_read_extent_reg(struct btrfs_path *path,
421 			  struct btrfs_file_extent_item *fi, u64 offset,
422 			  int len, char *dest)
423 {
424 	struct extent_buffer *leaf = path->nodes[0];
425 	struct btrfs_fs_info *fs_info = leaf->fs_info;
426 	struct btrfs_key key;
427 	u64 extent_num_bytes;
428 	u64 disk_bytenr;
429 	u64 read;
430 	char *cbuf = NULL;
431 	char *dbuf = NULL;
432 	u32 csize;
433 	u32 dsize;
434 	bool finished = false;
435 	int num_copies;
436 	int i;
437 	int slot = path->slots[0];
438 	int ret;
439 
440 	btrfs_item_key_to_cpu(leaf, &key, slot);
441 	extent_num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
442 	ASSERT(IS_ALIGNED(offset, fs_info->sectorsize) &&
443 	       IS_ALIGNED(len, fs_info->sectorsize));
444 	ASSERT(offset >= key.offset &&
445 	       offset + len <= key.offset + extent_num_bytes);
446 
447 	/* Preallocated or hole , fill @dest with zero */
448 	if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_PREALLOC ||
449 	    btrfs_file_extent_disk_bytenr(leaf, fi) == 0) {
450 		memset(dest, 0, len);
451 		return len;
452 	}
453 
454 	if (btrfs_file_extent_compression(leaf, fi) == BTRFS_COMPRESS_NONE) {
455 		u64 logical;
456 
457 		logical = btrfs_file_extent_disk_bytenr(leaf, fi) +
458 			  btrfs_file_extent_offset(leaf, fi) +
459 			  offset - key.offset;
460 		read = len;
461 
462 		num_copies = btrfs_num_copies(fs_info, logical, len);
463 		for (i = 1; i <= num_copies; i++) {
464 			ret = read_extent_data(fs_info, dest, logical, &read, i);
465 			if (ret < 0 || read != len)
466 				continue;
467 			finished = true;
468 			break;
469 		}
470 		if (!finished)
471 			return -EIO;
472 		return len;
473 	}
474 
475 	csize = btrfs_file_extent_disk_num_bytes(leaf, fi);
476 	dsize = btrfs_file_extent_ram_bytes(leaf, fi);
477 	disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
478 	num_copies = btrfs_num_copies(fs_info, disk_bytenr, csize);
479 
480 	cbuf = malloc_cache_aligned(csize);
481 	dbuf = malloc_cache_aligned(dsize);
482 	if (!cbuf || !dbuf) {
483 		ret = -ENOMEM;
484 		goto out;
485 	}
486 	/* For compressed extent, we must read the whole on-disk extent */
487 	for (i = 1; i <= num_copies; i++) {
488 		read = csize;
489 		ret = read_extent_data(fs_info, cbuf, disk_bytenr,
490 				       &read, i);
491 		if (ret < 0 || read != csize)
492 			continue;
493 		finished = true;
494 		break;
495 	}
496 	if (!finished) {
497 		ret = -EIO;
498 		goto out;
499 	}
500 
501 	ret = btrfs_decompress(btrfs_file_extent_compression(leaf, fi), cbuf,
502 			       csize, dbuf, dsize);
503 	if (ret == (u32)-1) {
504 		ret = -EIO;
505 		goto out;
506 	}
507 	/*
508 	 * The compressed part ends before sector boundary, the remaining needs
509 	 * to be zeroed out.
510 	 */
511 	if (ret < dsize)
512 		memset(dbuf + ret, 0, dsize - ret);
513 	/* Then copy the needed part */
514 	memcpy(dest, dbuf + btrfs_file_extent_offset(leaf, fi), len);
515 	ret = len;
516 out:
517 	free(cbuf);
518 	free(dbuf);
519 	return ret;
520 }
521 
522 /*
523  * Get the first file extent that covers bytenr @file_offset.
524  *
525  * @file_offset must be aligned to sectorsize.
526  *
527  * return 0 for found, and path points to the file extent.
528  * return >0 for not found, and fill @next_offset.
529  * @next_offset can be 0 if there is no next file extent.
530  * return <0 for error.
531  */
lookup_data_extent(struct btrfs_root * root,struct btrfs_path * path,u64 ino,u64 file_offset,u64 * next_offset)532 static int lookup_data_extent(struct btrfs_root *root, struct btrfs_path *path,
533 			      u64 ino, u64 file_offset, u64 *next_offset)
534 {
535 	struct btrfs_key key;
536 	struct btrfs_file_extent_item *fi;
537 	u8 extent_type;
538 	int ret = 0;
539 
540 	ASSERT(IS_ALIGNED(file_offset, root->fs_info->sectorsize));
541 	key.objectid = ino;
542 	key.type = BTRFS_EXTENT_DATA_KEY;
543 	key.offset = file_offset;
544 
545 	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
546 	/* Error or we're already at the file extent */
547 	if (ret <= 0)
548 		return ret;
549 	if (ret > 0) {
550 		/* Check previous file extent */
551 		ret = btrfs_previous_item(root, path, ino,
552 					  BTRFS_EXTENT_DATA_KEY);
553 		if (ret < 0)
554 			return ret;
555 		if (ret > 0)
556 			goto check_next;
557 	}
558 	/* Now the key.offset must be smaller than @file_offset */
559 	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
560 	if (key.objectid != ino ||
561 	    key.type != BTRFS_EXTENT_DATA_KEY)
562 		goto check_next;
563 
564 	fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
565 			    struct btrfs_file_extent_item);
566 	extent_type = btrfs_file_extent_type(path->nodes[0], fi);
567 	if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
568 		if (file_offset == 0)
569 			return 0;
570 		/* Inline extent should be the only extent, no next extent. */
571 		*next_offset = 0;
572 		return 1;
573 	}
574 
575 	/* This file extent covers @file_offset */
576 	if (key.offset <= file_offset && key.offset +
577 	    btrfs_file_extent_num_bytes(path->nodes[0], fi) > file_offset)
578 		return 0;
579 check_next:
580 	ret = btrfs_next_item(root, path);
581 	if (ret < 0)
582 		return ret;
583 	if (ret > 0) {
584 		*next_offset = 0;
585 		return 1;
586 	}
587 
588 	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
589 	fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
590 			    struct btrfs_file_extent_item);
591 	/* Next next data extent */
592 	if (key.objectid != ino ||
593 	    key.type != BTRFS_EXTENT_DATA_KEY) {
594 		*next_offset = 0;
595 		return 1;
596 	}
597 	/* Current file extent already beyond @file_offset */
598 	if (key.offset > file_offset) {
599 		*next_offset = key.offset;
600 		return 1;
601 	}
602 	/* This file extent covers @file_offset */
603 	if (key.offset <= file_offset && key.offset +
604 	    btrfs_file_extent_num_bytes(path->nodes[0], fi) > file_offset)
605 		return 0;
606 	/* This file extent ends before @file_offset, check next */
607 	ret = btrfs_next_item(root, path);
608 	if (ret < 0)
609 		return ret;
610 	if (ret > 0) {
611 		*next_offset = 0;
612 		return 1;
613 	}
614 	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
615 	if (key.type != BTRFS_EXTENT_DATA_KEY || key.objectid != ino) {
616 		*next_offset = 0;
617 		return 1;
618 	}
619 	*next_offset = key.offset;
620 	return 1;
621 }
622 
read_and_truncate_page(struct btrfs_path * path,struct btrfs_file_extent_item * fi,int start,int len,char * dest)623 static int read_and_truncate_page(struct btrfs_path *path,
624 				  struct btrfs_file_extent_item *fi,
625 				  int start, int len, char *dest)
626 {
627 	struct extent_buffer *leaf = path->nodes[0];
628 	struct btrfs_fs_info *fs_info = leaf->fs_info;
629 	u64 aligned_start = round_down(start, fs_info->sectorsize);
630 	u8 extent_type;
631 	char *buf;
632 	int page_off = start - aligned_start;
633 	int page_len = fs_info->sectorsize - page_off;
634 	int ret;
635 
636 	ASSERT(start + len <= aligned_start + fs_info->sectorsize);
637 	buf = malloc_cache_aligned(fs_info->sectorsize);
638 	if (!buf)
639 		return -ENOMEM;
640 
641 	extent_type = btrfs_file_extent_type(leaf, fi);
642 	if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
643 		ret = btrfs_read_extent_inline(path, fi, buf);
644 		memcpy(dest, buf + page_off, min(page_len, ret));
645 		free(buf);
646 		return len;
647 	}
648 
649 	ret = btrfs_read_extent_reg(path, fi,
650 			round_down(start, fs_info->sectorsize),
651 			fs_info->sectorsize, buf);
652 	if (ret < 0) {
653 		free(buf);
654 		return ret;
655 	}
656 	memcpy(dest, buf + page_off, page_len);
657 	free(buf);
658 	return len;
659 }
660 
btrfs_file_read(struct btrfs_root * root,u64 ino,u64 file_offset,u64 len,char * dest)661 int btrfs_file_read(struct btrfs_root *root, u64 ino, u64 file_offset, u64 len,
662 		    char *dest)
663 {
664 	struct btrfs_fs_info *fs_info = root->fs_info;
665 	struct btrfs_file_extent_item *fi;
666 	struct btrfs_path path;
667 	struct btrfs_key key;
668 	u64 aligned_start = round_down(file_offset, fs_info->sectorsize);
669 	u64 aligned_end = round_down(file_offset + len, fs_info->sectorsize);
670 	u64 next_offset;
671 	u64 cur = aligned_start;
672 	int ret = 0;
673 
674 	btrfs_init_path(&path);
675 
676 	/* Set the whole dest all zero, so we won't need to bother holes */
677 	memset(dest, 0, len);
678 
679 	/* Read out the leading unaligned part */
680 	if (aligned_start != file_offset) {
681 		ret = lookup_data_extent(root, &path, ino, aligned_start,
682 					 &next_offset);
683 		if (ret < 0)
684 			goto out;
685 		if (ret == 0) {
686 			/* Read the unaligned part out*/
687 			fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
688 					struct btrfs_file_extent_item);
689 			ret = read_and_truncate_page(&path, fi, file_offset,
690 					round_up(file_offset, fs_info->sectorsize) -
691 					file_offset, dest);
692 			if (ret < 0)
693 				goto out;
694 			cur += fs_info->sectorsize;
695 		} else {
696 			/* The whole file is a hole */
697 			if (!next_offset) {
698 				memset(dest, 0, len);
699 				return len;
700 			}
701 			cur = next_offset;
702 		}
703 	}
704 
705 	/* Read the aligned part */
706 	while (cur < aligned_end) {
707 		u64 extent_num_bytes;
708 		u8 type;
709 
710 		btrfs_release_path(&path);
711 		ret = lookup_data_extent(root, &path, ino, cur, &next_offset);
712 		if (ret < 0)
713 			goto out;
714 		if (ret > 0) {
715 			/* No next, direct exit */
716 			if (!next_offset) {
717 				ret = 0;
718 				goto out;
719 			}
720 		}
721 		fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
722 				    struct btrfs_file_extent_item);
723 		btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
724 		type = btrfs_file_extent_type(path.nodes[0], fi);
725 		if (type == BTRFS_FILE_EXTENT_INLINE) {
726 			ret = btrfs_read_extent_inline(&path, fi, dest);
727 			goto out;
728 		}
729 		/* Skip holes, as we have zeroed the dest */
730 		if (type == BTRFS_FILE_EXTENT_PREALLOC ||
731 		    btrfs_file_extent_disk_bytenr(path.nodes[0], fi) == 0) {
732 			cur = key.offset + btrfs_file_extent_num_bytes(
733 					path.nodes[0], fi);
734 			continue;
735 		}
736 
737 		/* Read the remaining part of the extent */
738 		extent_num_bytes = btrfs_file_extent_num_bytes(path.nodes[0],
739 							       fi);
740 		ret = btrfs_read_extent_reg(&path, fi, cur,
741 				min(extent_num_bytes, aligned_end - cur),
742 				dest + cur - file_offset);
743 		if (ret < 0)
744 			goto out;
745 		cur += min(extent_num_bytes, aligned_end - cur);
746 	}
747 
748 	/* Read the tailing unaligned part*/
749 	if (file_offset + len != aligned_end) {
750 		btrfs_release_path(&path);
751 		ret = lookup_data_extent(root, &path, ino, aligned_end,
752 					 &next_offset);
753 		/* <0 is error, >0 means no extent */
754 		if (ret)
755 			goto out;
756 		fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
757 				    struct btrfs_file_extent_item);
758 		ret = read_and_truncate_page(&path, fi, aligned_end,
759 				file_offset + len - aligned_end,
760 				dest + aligned_end - file_offset);
761 	}
762 out:
763 	btrfs_release_path(&path);
764 	if (ret < 0)
765 		return ret;
766 	return len;
767 }
768