1 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
2 #ifndef _BTRFS_CTREE_H_
3 #define _BTRFS_CTREE_H_
4
5 #include <linux/btrfs.h>
6 #include <linux/types.h>
7 #ifdef __KERNEL__
8 #include <linux/stddef.h>
9 #else
10 #include <stddef.h>
11 #endif
12
13 /* ASCII for _BHRfS_M, no terminating nul */
14 #define BTRFS_MAGIC 0x4D5F53665248425FULL
15
16 #define BTRFS_MAX_LEVEL 8
17
18 /*
19 * We can actually store much bigger names, but lets not confuse the rest of
20 * linux.
21 */
22 #define BTRFS_NAME_LEN 255
23
24 /*
25 * Theoretical limit is larger, but we keep this down to a sane value. That
26 * should limit greatly the possibility of collisions on inode ref items.
27 */
28 #define BTRFS_LINK_MAX 65535U
29
30 /*
31 * This header contains the structure definitions and constants used
32 * by file system objects that can be retrieved using
33 * the BTRFS_IOC_SEARCH_TREE ioctl. That means basically anything that
34 * is needed to describe a leaf node's key or item contents.
35 */
36
37 /* holds pointers to all of the tree roots */
38 #define BTRFS_ROOT_TREE_OBJECTID 1ULL
39
40 /* stores information about which extents are in use, and reference counts */
41 #define BTRFS_EXTENT_TREE_OBJECTID 2ULL
42
43 /*
44 * chunk tree stores translations from logical -> physical block numbering
45 * the super block points to the chunk tree
46 */
47 #define BTRFS_CHUNK_TREE_OBJECTID 3ULL
48
49 /*
50 * stores information about which areas of a given device are in use.
51 * one per device. The tree of tree roots points to the device tree
52 */
53 #define BTRFS_DEV_TREE_OBJECTID 4ULL
54
55 /* one per subvolume, storing files and directories */
56 #define BTRFS_FS_TREE_OBJECTID 5ULL
57
58 /* directory objectid inside the root tree */
59 #define BTRFS_ROOT_TREE_DIR_OBJECTID 6ULL
60
61 /* holds checksums of all the data extents */
62 #define BTRFS_CSUM_TREE_OBJECTID 7ULL
63
64 /* holds quota configuration and tracking */
65 #define BTRFS_QUOTA_TREE_OBJECTID 8ULL
66
67 /* for storing items that use the BTRFS_UUID_KEY* types */
68 #define BTRFS_UUID_TREE_OBJECTID 9ULL
69
70 /* tracks free space in block groups. */
71 #define BTRFS_FREE_SPACE_TREE_OBJECTID 10ULL
72
73 /* Holds the block group items for extent tree v2. */
74 #define BTRFS_BLOCK_GROUP_TREE_OBJECTID 11ULL
75
76 /* Tracks RAID stripes in block groups. */
77 #define BTRFS_RAID_STRIPE_TREE_OBJECTID 12ULL
78
79 /* device stats in the device tree */
80 #define BTRFS_DEV_STATS_OBJECTID 0ULL
81
82 /* for storing balance parameters in the root tree */
83 #define BTRFS_BALANCE_OBJECTID -4ULL
84
85 /* orphan objectid for tracking unlinked/truncated files */
86 #define BTRFS_ORPHAN_OBJECTID -5ULL
87
88 /* does write ahead logging to speed up fsyncs */
89 #define BTRFS_TREE_LOG_OBJECTID -6ULL
90 #define BTRFS_TREE_LOG_FIXUP_OBJECTID -7ULL
91
92 /* for space balancing */
93 #define BTRFS_TREE_RELOC_OBJECTID -8ULL
94 #define BTRFS_DATA_RELOC_TREE_OBJECTID -9ULL
95
96 /*
97 * extent checksums all have this objectid
98 * this allows them to share the logging tree
99 * for fsyncs
100 */
101 #define BTRFS_EXTENT_CSUM_OBJECTID -10ULL
102
103 /* For storing free space cache */
104 #define BTRFS_FREE_SPACE_OBJECTID -11ULL
105
106 /*
107 * The inode number assigned to the special inode for storing
108 * free ino cache
109 */
110 #define BTRFS_FREE_INO_OBJECTID -12ULL
111
112 /* dummy objectid represents multiple objectids */
113 #define BTRFS_MULTIPLE_OBJECTIDS -255ULL
114
115 /*
116 * All files have objectids in this range.
117 */
118 #define BTRFS_FIRST_FREE_OBJECTID 256ULL
119 #define BTRFS_LAST_FREE_OBJECTID -256ULL
120 #define BTRFS_FIRST_CHUNK_TREE_OBJECTID 256ULL
121
122
123 /*
124 * the device items go into the chunk tree. The key is in the form
125 * [ 1 BTRFS_DEV_ITEM_KEY device_id ]
126 */
127 #define BTRFS_DEV_ITEMS_OBJECTID 1ULL
128
129 #define BTRFS_BTREE_INODE_OBJECTID 1
130
131 #define BTRFS_EMPTY_SUBVOL_DIR_OBJECTID 2
132
133 #define BTRFS_DEV_REPLACE_DEVID 0ULL
134
135 /*
136 * inode items have the data typically returned from stat and store other
137 * info about object characteristics. There is one for every file and dir in
138 * the FS
139 */
140 #define BTRFS_INODE_ITEM_KEY 1
141 #define BTRFS_INODE_REF_KEY 12
142 #define BTRFS_INODE_EXTREF_KEY 13
143 #define BTRFS_XATTR_ITEM_KEY 24
144
145 /*
146 * fs verity items are stored under two different key types on disk.
147 * The descriptor items:
148 * [ inode objectid, BTRFS_VERITY_DESC_ITEM_KEY, offset ]
149 *
150 * At offset 0, we store a btrfs_verity_descriptor_item which tracks the size
151 * of the descriptor item and some extra data for encryption.
152 * Starting at offset 1, these hold the generic fs verity descriptor. The
153 * latter are opaque to btrfs, we just read and write them as a blob for the
154 * higher level verity code. The most common descriptor size is 256 bytes.
155 *
156 * The merkle tree items:
157 * [ inode objectid, BTRFS_VERITY_MERKLE_ITEM_KEY, offset ]
158 *
159 * These also start at offset 0, and correspond to the merkle tree bytes. When
160 * fsverity asks for page 0 of the merkle tree, we pull up one page starting at
161 * offset 0 for this key type. These are also opaque to btrfs, we're blindly
162 * storing whatever fsverity sends down.
163 */
164 #define BTRFS_VERITY_DESC_ITEM_KEY 36
165 #define BTRFS_VERITY_MERKLE_ITEM_KEY 37
166
167 #define BTRFS_ORPHAN_ITEM_KEY 48
168 /* reserve 2-15 close to the inode for later flexibility */
169
170 /*
171 * dir items are the name -> inode pointers in a directory. There is one
172 * for every name in a directory. BTRFS_DIR_LOG_ITEM_KEY is no longer used
173 * but it's still defined here for documentation purposes and to help avoid
174 * having its numerical value reused in the future.
175 */
176 #define BTRFS_DIR_LOG_ITEM_KEY 60
177 #define BTRFS_DIR_LOG_INDEX_KEY 72
178 #define BTRFS_DIR_ITEM_KEY 84
179 #define BTRFS_DIR_INDEX_KEY 96
180 /*
181 * extent data is for file data
182 */
183 #define BTRFS_EXTENT_DATA_KEY 108
184
185 /*
186 * extent csums are stored in a separate tree and hold csums for
187 * an entire extent on disk.
188 */
189 #define BTRFS_EXTENT_CSUM_KEY 128
190
191 /*
192 * root items point to tree roots. They are typically in the root
193 * tree used by the super block to find all the other trees
194 */
195 #define BTRFS_ROOT_ITEM_KEY 132
196
197 /*
198 * root backrefs tie subvols and snapshots to the directory entries that
199 * reference them
200 */
201 #define BTRFS_ROOT_BACKREF_KEY 144
202
203 /*
204 * root refs make a fast index for listing all of the snapshots and
205 * subvolumes referenced by a given root. They point directly to the
206 * directory item in the root that references the subvol
207 */
208 #define BTRFS_ROOT_REF_KEY 156
209
210 /*
211 * extent items are in the extent map tree. These record which blocks
212 * are used, and how many references there are to each block
213 */
214 #define BTRFS_EXTENT_ITEM_KEY 168
215
216 /*
217 * The same as the BTRFS_EXTENT_ITEM_KEY, except it's metadata we already know
218 * the length, so we save the level in key->offset instead of the length.
219 */
220 #define BTRFS_METADATA_ITEM_KEY 169
221
222 /*
223 * Special inline ref key which stores the id of the subvolume which originally
224 * created the extent. This subvolume owns the extent permanently from the
225 * perspective of simple quotas. Needed to know which subvolume to free quota
226 * usage from when the extent is deleted.
227 *
228 * Stored as an inline ref rather to avoid wasting space on a separate item on
229 * top of the existing extent item. However, unlike the other inline refs,
230 * there is one one owner ref per extent rather than one per extent.
231 *
232 * Because of this, it goes at the front of the list of inline refs, and thus
233 * must have a lower type value than any other inline ref type (to satisfy the
234 * disk format rule that inline refs have non-decreasing type).
235 */
236 #define BTRFS_EXTENT_OWNER_REF_KEY 172
237
238 #define BTRFS_TREE_BLOCK_REF_KEY 176
239
240 #define BTRFS_EXTENT_DATA_REF_KEY 178
241
242 /*
243 * Obsolete key. Defintion removed in 6.6, value may be reused in the future.
244 *
245 * #define BTRFS_EXTENT_REF_V0_KEY 180
246 */
247
248 #define BTRFS_SHARED_BLOCK_REF_KEY 182
249
250 #define BTRFS_SHARED_DATA_REF_KEY 184
251
252 /*
253 * block groups give us hints into the extent allocation trees. Which
254 * blocks are free etc etc
255 */
256 #define BTRFS_BLOCK_GROUP_ITEM_KEY 192
257
258 /*
259 * Every block group is represented in the free space tree by a free space info
260 * item, which stores some accounting information. It is keyed on
261 * (block_group_start, FREE_SPACE_INFO, block_group_length).
262 */
263 #define BTRFS_FREE_SPACE_INFO_KEY 198
264
265 /*
266 * A free space extent tracks an extent of space that is free in a block group.
267 * It is keyed on (start, FREE_SPACE_EXTENT, length).
268 */
269 #define BTRFS_FREE_SPACE_EXTENT_KEY 199
270
271 /*
272 * When a block group becomes very fragmented, we convert it to use bitmaps
273 * instead of extents. A free space bitmap is keyed on
274 * (start, FREE_SPACE_BITMAP, length); the corresponding item is a bitmap with
275 * (length / sectorsize) bits.
276 */
277 #define BTRFS_FREE_SPACE_BITMAP_KEY 200
278
279 #define BTRFS_DEV_EXTENT_KEY 204
280 #define BTRFS_DEV_ITEM_KEY 216
281 #define BTRFS_CHUNK_ITEM_KEY 228
282
283 #define BTRFS_RAID_STRIPE_KEY 230
284
285 /*
286 * Records the overall state of the qgroups.
287 * There's only one instance of this key present,
288 * (0, BTRFS_QGROUP_STATUS_KEY, 0)
289 */
290 #define BTRFS_QGROUP_STATUS_KEY 240
291 /*
292 * Records the currently used space of the qgroup.
293 * One key per qgroup, (0, BTRFS_QGROUP_INFO_KEY, qgroupid).
294 */
295 #define BTRFS_QGROUP_INFO_KEY 242
296 /*
297 * Contains the user configured limits for the qgroup.
298 * One key per qgroup, (0, BTRFS_QGROUP_LIMIT_KEY, qgroupid).
299 */
300 #define BTRFS_QGROUP_LIMIT_KEY 244
301 /*
302 * Records the child-parent relationship of qgroups. For
303 * each relation, 2 keys are present:
304 * (childid, BTRFS_QGROUP_RELATION_KEY, parentid)
305 * (parentid, BTRFS_QGROUP_RELATION_KEY, childid)
306 */
307 #define BTRFS_QGROUP_RELATION_KEY 246
308
309 /*
310 * Obsolete name, see BTRFS_TEMPORARY_ITEM_KEY.
311 */
312 #define BTRFS_BALANCE_ITEM_KEY 248
313
314 /*
315 * The key type for tree items that are stored persistently, but do not need to
316 * exist for extended period of time. The items can exist in any tree.
317 *
318 * [subtype, BTRFS_TEMPORARY_ITEM_KEY, data]
319 *
320 * Existing items:
321 *
322 * - balance status item
323 * (BTRFS_BALANCE_OBJECTID, BTRFS_TEMPORARY_ITEM_KEY, 0)
324 */
325 #define BTRFS_TEMPORARY_ITEM_KEY 248
326
327 /*
328 * Obsolete name, see BTRFS_PERSISTENT_ITEM_KEY
329 */
330 #define BTRFS_DEV_STATS_KEY 249
331
332 /*
333 * The key type for tree items that are stored persistently and usually exist
334 * for a long period, eg. filesystem lifetime. The item kinds can be status
335 * information, stats or preference values. The item can exist in any tree.
336 *
337 * [subtype, BTRFS_PERSISTENT_ITEM_KEY, data]
338 *
339 * Existing items:
340 *
341 * - device statistics, store IO stats in the device tree, one key for all
342 * stats
343 * (BTRFS_DEV_STATS_OBJECTID, BTRFS_DEV_STATS_KEY, 0)
344 */
345 #define BTRFS_PERSISTENT_ITEM_KEY 249
346
347 /*
348 * Persistently stores the device replace state in the device tree.
349 * The key is built like this: (0, BTRFS_DEV_REPLACE_KEY, 0).
350 */
351 #define BTRFS_DEV_REPLACE_KEY 250
352
353 /*
354 * Stores items that allow to quickly map UUIDs to something else.
355 * These items are part of the filesystem UUID tree.
356 * The key is built like this:
357 * (UUID_upper_64_bits, BTRFS_UUID_KEY*, UUID_lower_64_bits).
358 */
359 #if BTRFS_UUID_SIZE != 16
360 #error "UUID items require BTRFS_UUID_SIZE == 16!"
361 #endif
362 #define BTRFS_UUID_KEY_SUBVOL 251 /* for UUIDs assigned to subvols */
363 #define BTRFS_UUID_KEY_RECEIVED_SUBVOL 252 /* for UUIDs assigned to
364 * received subvols */
365
366 /*
367 * string items are for debugging. They just store a short string of
368 * data in the FS
369 */
370 #define BTRFS_STRING_ITEM_KEY 253
371
372 /* Maximum metadata block size (nodesize) */
373 #define BTRFS_MAX_METADATA_BLOCKSIZE 65536
374
375 /* 32 bytes in various csum fields */
376 #define BTRFS_CSUM_SIZE 32
377
378 /* csum types */
379 enum btrfs_csum_type {
380 BTRFS_CSUM_TYPE_CRC32 = 0,
381 BTRFS_CSUM_TYPE_XXHASH = 1,
382 BTRFS_CSUM_TYPE_SHA256 = 2,
383 BTRFS_CSUM_TYPE_BLAKE2 = 3,
384 };
385
386 /*
387 * flags definitions for directory entry item type
388 *
389 * Used by:
390 * struct btrfs_dir_item.type
391 *
392 * Values 0..7 must match common file type values in fs_types.h.
393 */
394 #define BTRFS_FT_UNKNOWN 0
395 #define BTRFS_FT_REG_FILE 1
396 #define BTRFS_FT_DIR 2
397 #define BTRFS_FT_CHRDEV 3
398 #define BTRFS_FT_BLKDEV 4
399 #define BTRFS_FT_FIFO 5
400 #define BTRFS_FT_SOCK 6
401 #define BTRFS_FT_SYMLINK 7
402 #define BTRFS_FT_XATTR 8
403 #define BTRFS_FT_MAX 9
404 /* Directory contains encrypted data */
405 #define BTRFS_FT_ENCRYPTED 0x80
406
btrfs_dir_flags_to_ftype(__u8 flags)407 static inline __u8 btrfs_dir_flags_to_ftype(__u8 flags)
408 {
409 return flags & ~BTRFS_FT_ENCRYPTED;
410 }
411
412 /*
413 * Inode flags
414 */
415 #define BTRFS_INODE_NODATASUM (1U << 0)
416 #define BTRFS_INODE_NODATACOW (1U << 1)
417 #define BTRFS_INODE_READONLY (1U << 2)
418 #define BTRFS_INODE_NOCOMPRESS (1U << 3)
419 #define BTRFS_INODE_PREALLOC (1U << 4)
420 #define BTRFS_INODE_SYNC (1U << 5)
421 #define BTRFS_INODE_IMMUTABLE (1U << 6)
422 #define BTRFS_INODE_APPEND (1U << 7)
423 #define BTRFS_INODE_NODUMP (1U << 8)
424 #define BTRFS_INODE_NOATIME (1U << 9)
425 #define BTRFS_INODE_DIRSYNC (1U << 10)
426 #define BTRFS_INODE_COMPRESS (1U << 11)
427
428 #define BTRFS_INODE_ROOT_ITEM_INIT (1U << 31)
429
430 #define BTRFS_INODE_FLAG_MASK \
431 (BTRFS_INODE_NODATASUM | \
432 BTRFS_INODE_NODATACOW | \
433 BTRFS_INODE_READONLY | \
434 BTRFS_INODE_NOCOMPRESS | \
435 BTRFS_INODE_PREALLOC | \
436 BTRFS_INODE_SYNC | \
437 BTRFS_INODE_IMMUTABLE | \
438 BTRFS_INODE_APPEND | \
439 BTRFS_INODE_NODUMP | \
440 BTRFS_INODE_NOATIME | \
441 BTRFS_INODE_DIRSYNC | \
442 BTRFS_INODE_COMPRESS | \
443 BTRFS_INODE_ROOT_ITEM_INIT)
444
445 #define BTRFS_INODE_RO_VERITY (1U << 0)
446
447 #define BTRFS_INODE_RO_FLAG_MASK (BTRFS_INODE_RO_VERITY)
448
449 /*
450 * The key defines the order in the tree, and so it also defines (optimal)
451 * block layout.
452 *
453 * objectid corresponds to the inode number.
454 *
455 * type tells us things about the object, and is a kind of stream selector.
456 * so for a given inode, keys with type of 1 might refer to the inode data,
457 * type of 2 may point to file data in the btree and type == 3 may point to
458 * extents.
459 *
460 * offset is the starting byte offset for this key in the stream.
461 *
462 * btrfs_disk_key is in disk byte order. struct btrfs_key is always
463 * in cpu native order. Otherwise they are identical and their sizes
464 * should be the same (ie both packed)
465 */
466 struct btrfs_disk_key {
467 __le64 objectid;
468 __u8 type;
469 __le64 offset;
470 } __attribute__ ((__packed__));
471
472 struct btrfs_key {
473 __u64 objectid;
474 __u8 type;
475 __u64 offset;
476 } __attribute__ ((__packed__));
477
478 /*
479 * Every tree block (leaf or node) starts with this header.
480 */
481 struct btrfs_header {
482 /* These first four must match the super block */
483 __u8 csum[BTRFS_CSUM_SIZE];
484 /* FS specific uuid */
485 __u8 fsid[BTRFS_FSID_SIZE];
486 /* Which block this node is supposed to live in */
487 __le64 bytenr;
488 __le64 flags;
489
490 /* Allowed to be different from the super from here on down */
491 __u8 chunk_tree_uuid[BTRFS_UUID_SIZE];
492 __le64 generation;
493 __le64 owner;
494 __le32 nritems;
495 __u8 level;
496 } __attribute__ ((__packed__));
497
498 /*
499 * This is a very generous portion of the super block, giving us room to
500 * translate 14 chunks with 3 stripes each.
501 */
502 #define BTRFS_SYSTEM_CHUNK_ARRAY_SIZE 2048
503
504 /*
505 * Just in case we somehow lose the roots and are not able to mount, we store
506 * an array of the roots from previous transactions in the super.
507 */
508 #define BTRFS_NUM_BACKUP_ROOTS 4
509 struct btrfs_root_backup {
510 __le64 tree_root;
511 __le64 tree_root_gen;
512
513 __le64 chunk_root;
514 __le64 chunk_root_gen;
515
516 __le64 extent_root;
517 __le64 extent_root_gen;
518
519 __le64 fs_root;
520 __le64 fs_root_gen;
521
522 __le64 dev_root;
523 __le64 dev_root_gen;
524
525 __le64 csum_root;
526 __le64 csum_root_gen;
527
528 __le64 total_bytes;
529 __le64 bytes_used;
530 __le64 num_devices;
531 /* future */
532 __le64 unused_64[4];
533
534 __u8 tree_root_level;
535 __u8 chunk_root_level;
536 __u8 extent_root_level;
537 __u8 fs_root_level;
538 __u8 dev_root_level;
539 __u8 csum_root_level;
540 /* future and to align */
541 __u8 unused_8[10];
542 } __attribute__ ((__packed__));
543
544 /*
545 * A leaf is full of items. offset and size tell us where to find the item in
546 * the leaf (relative to the start of the data area)
547 */
548 struct btrfs_item {
549 struct btrfs_disk_key key;
550 __le32 offset;
551 __le32 size;
552 } __attribute__ ((__packed__));
553
554 /*
555 * Leaves have an item area and a data area:
556 * [item0, item1....itemN] [free space] [dataN...data1, data0]
557 *
558 * The data is separate from the items to get the keys closer together during
559 * searches.
560 */
561 struct btrfs_leaf {
562 struct btrfs_header header;
563 struct btrfs_item items[];
564 } __attribute__ ((__packed__));
565
566 /*
567 * All non-leaf blocks are nodes, they hold only keys and pointers to other
568 * blocks.
569 */
570 struct btrfs_key_ptr {
571 struct btrfs_disk_key key;
572 __le64 blockptr;
573 __le64 generation;
574 } __attribute__ ((__packed__));
575
576 struct btrfs_node {
577 struct btrfs_header header;
578 struct btrfs_key_ptr ptrs[];
579 } __attribute__ ((__packed__));
580
581 struct btrfs_dev_item {
582 /* the internal btrfs device id */
583 __le64 devid;
584
585 /* size of the device */
586 __le64 total_bytes;
587
588 /* bytes used */
589 __le64 bytes_used;
590
591 /* optimal io alignment for this device */
592 __le32 io_align;
593
594 /* optimal io width for this device */
595 __le32 io_width;
596
597 /* minimal io size for this device */
598 __le32 sector_size;
599
600 /* type and info about this device */
601 __le64 type;
602
603 /* expected generation for this device */
604 __le64 generation;
605
606 /*
607 * starting byte of this partition on the device,
608 * to allow for stripe alignment in the future
609 */
610 __le64 start_offset;
611
612 /* grouping information for allocation decisions */
613 __le32 dev_group;
614
615 /* seek speed 0-100 where 100 is fastest */
616 __u8 seek_speed;
617
618 /* bandwidth 0-100 where 100 is fastest */
619 __u8 bandwidth;
620
621 /* btrfs generated uuid for this device */
622 __u8 uuid[BTRFS_UUID_SIZE];
623
624 /* uuid of FS who owns this device */
625 __u8 fsid[BTRFS_UUID_SIZE];
626 } __attribute__ ((__packed__));
627
628 struct btrfs_stripe {
629 __le64 devid;
630 __le64 offset;
631 __u8 dev_uuid[BTRFS_UUID_SIZE];
632 } __attribute__ ((__packed__));
633
634 struct btrfs_chunk {
635 /* size of this chunk in bytes */
636 __le64 length;
637
638 /* objectid of the root referencing this chunk */
639 __le64 owner;
640
641 __le64 stripe_len;
642 __le64 type;
643
644 /* optimal io alignment for this chunk */
645 __le32 io_align;
646
647 /* optimal io width for this chunk */
648 __le32 io_width;
649
650 /* minimal io size for this chunk */
651 __le32 sector_size;
652
653 /* 2^16 stripes is quite a lot, a second limit is the size of a single
654 * item in the btree
655 */
656 __le16 num_stripes;
657
658 /* sub stripes only matter for raid10 */
659 __le16 sub_stripes;
660 struct btrfs_stripe stripe;
661 /* additional stripes go here */
662 } __attribute__ ((__packed__));
663
664 /*
665 * The super block basically lists the main trees of the FS.
666 */
667 struct btrfs_super_block {
668 /* The first 4 fields must match struct btrfs_header */
669 __u8 csum[BTRFS_CSUM_SIZE];
670 /* FS specific UUID, visible to user */
671 __u8 fsid[BTRFS_FSID_SIZE];
672 /* This block number */
673 __le64 bytenr;
674 __le64 flags;
675
676 /* Allowed to be different from the btrfs_header from here own down */
677 __le64 magic;
678 __le64 generation;
679 __le64 root;
680 __le64 chunk_root;
681 __le64 log_root;
682
683 /*
684 * This member has never been utilized since the very beginning, thus
685 * it's always 0 regardless of kernel version. We always use
686 * generation + 1 to read log tree root. So here we mark it deprecated.
687 */
688 __le64 __unused_log_root_transid;
689 __le64 total_bytes;
690 __le64 bytes_used;
691 __le64 root_dir_objectid;
692 __le64 num_devices;
693 __le32 sectorsize;
694 __le32 nodesize;
695 __le32 __unused_leafsize;
696 __le32 stripesize;
697 __le32 sys_chunk_array_size;
698 __le64 chunk_root_generation;
699 __le64 compat_flags;
700 __le64 compat_ro_flags;
701 __le64 incompat_flags;
702 __le16 csum_type;
703 __u8 root_level;
704 __u8 chunk_root_level;
705 __u8 log_root_level;
706 struct btrfs_dev_item dev_item;
707
708 char label[BTRFS_LABEL_SIZE];
709
710 __le64 cache_generation;
711 __le64 uuid_tree_generation;
712
713 /* The UUID written into btree blocks */
714 __u8 metadata_uuid[BTRFS_FSID_SIZE];
715
716 __u64 nr_global_roots;
717
718 /* Future expansion */
719 __le64 reserved[27];
720 __u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE];
721 struct btrfs_root_backup super_roots[BTRFS_NUM_BACKUP_ROOTS];
722
723 /* Padded to 4096 bytes */
724 __u8 padding[565];
725 } __attribute__ ((__packed__));
726
727 #define BTRFS_FREE_SPACE_EXTENT 1
728 #define BTRFS_FREE_SPACE_BITMAP 2
729
730 struct btrfs_free_space_entry {
731 __le64 offset;
732 __le64 bytes;
733 __u8 type;
734 } __attribute__ ((__packed__));
735
736 struct btrfs_free_space_header {
737 struct btrfs_disk_key location;
738 __le64 generation;
739 __le64 num_entries;
740 __le64 num_bitmaps;
741 } __attribute__ ((__packed__));
742
743 struct btrfs_raid_stride {
744 /* The id of device this raid extent lives on. */
745 __le64 devid;
746 /* The physical location on disk. */
747 __le64 physical;
748 } __attribute__ ((__packed__));
749
750 struct btrfs_stripe_extent {
751 /* An array of raid strides this stripe is composed of. */
752 __DECLARE_FLEX_ARRAY(struct btrfs_raid_stride, strides);
753 } __attribute__ ((__packed__));
754
755 #define BTRFS_HEADER_FLAG_WRITTEN (1ULL << 0)
756 #define BTRFS_HEADER_FLAG_RELOC (1ULL << 1)
757
758 /* Super block flags */
759 /* Errors detected */
760 #define BTRFS_SUPER_FLAG_ERROR (1ULL << 2)
761
762 #define BTRFS_SUPER_FLAG_SEEDING (1ULL << 32)
763 #define BTRFS_SUPER_FLAG_METADUMP (1ULL << 33)
764 #define BTRFS_SUPER_FLAG_METADUMP_V2 (1ULL << 34)
765 #define BTRFS_SUPER_FLAG_CHANGING_FSID (1ULL << 35)
766 #define BTRFS_SUPER_FLAG_CHANGING_FSID_V2 (1ULL << 36)
767
768 /*
769 * Those are temporaray flags utilized by btrfs-progs to do offline conversion.
770 * They are rejected by kernel.
771 * But still keep them all here to avoid conflicts.
772 */
773 #define BTRFS_SUPER_FLAG_CHANGING_BG_TREE (1ULL << 38)
774 #define BTRFS_SUPER_FLAG_CHANGING_DATA_CSUM (1ULL << 39)
775 #define BTRFS_SUPER_FLAG_CHANGING_META_CSUM (1ULL << 40)
776
777 /*
778 * items in the extent btree are used to record the objectid of the
779 * owner of the block and the number of references
780 */
781
782 struct btrfs_extent_item {
783 __le64 refs;
784 __le64 generation;
785 __le64 flags;
786 } __attribute__ ((__packed__));
787
788 struct btrfs_extent_item_v0 {
789 __le32 refs;
790 } __attribute__ ((__packed__));
791
792
793 #define BTRFS_EXTENT_FLAG_DATA (1ULL << 0)
794 #define BTRFS_EXTENT_FLAG_TREE_BLOCK (1ULL << 1)
795
796 /* following flags only apply to tree blocks */
797
798 /* use full backrefs for extent pointers in the block */
799 #define BTRFS_BLOCK_FLAG_FULL_BACKREF (1ULL << 8)
800
801 #define BTRFS_BACKREF_REV_MAX 256
802 #define BTRFS_BACKREF_REV_SHIFT 56
803 #define BTRFS_BACKREF_REV_MASK (((u64)BTRFS_BACKREF_REV_MAX - 1) << \
804 BTRFS_BACKREF_REV_SHIFT)
805
806 #define BTRFS_OLD_BACKREF_REV 0
807 #define BTRFS_MIXED_BACKREF_REV 1
808
809 /*
810 * this flag is only used internally by scrub and may be changed at any time
811 * it is only declared here to avoid collisions
812 */
813 #define BTRFS_EXTENT_FLAG_SUPER (1ULL << 48)
814
815 struct btrfs_tree_block_info {
816 struct btrfs_disk_key key;
817 __u8 level;
818 } __attribute__ ((__packed__));
819
820 struct btrfs_extent_data_ref {
821 __le64 root;
822 __le64 objectid;
823 __le64 offset;
824 __le32 count;
825 } __attribute__ ((__packed__));
826
827 struct btrfs_shared_data_ref {
828 __le32 count;
829 } __attribute__ ((__packed__));
830
831 struct btrfs_extent_owner_ref {
832 __le64 root_id;
833 } __attribute__ ((__packed__));
834
835 struct btrfs_extent_inline_ref {
836 __u8 type;
837 __le64 offset;
838 } __attribute__ ((__packed__));
839
840 /* dev extents record free space on individual devices. The owner
841 * field points back to the chunk allocation mapping tree that allocated
842 * the extent. The chunk tree uuid field is a way to double check the owner
843 */
844 struct btrfs_dev_extent {
845 __le64 chunk_tree;
846 __le64 chunk_objectid;
847 __le64 chunk_offset;
848 __le64 length;
849 __u8 chunk_tree_uuid[BTRFS_UUID_SIZE];
850 } __attribute__ ((__packed__));
851
852 struct btrfs_inode_ref {
853 __le64 index;
854 __le16 name_len;
855 /* name goes here */
856 } __attribute__ ((__packed__));
857
858 struct btrfs_inode_extref {
859 __le64 parent_objectid;
860 __le64 index;
861 __le16 name_len;
862 __u8 name[];
863 /* name goes here */
864 } __attribute__ ((__packed__));
865
866 struct btrfs_timespec {
867 __le64 sec;
868 __le32 nsec;
869 } __attribute__ ((__packed__));
870
871 struct btrfs_inode_item {
872 /* nfs style generation number */
873 __le64 generation;
874 /* transid that last touched this inode */
875 __le64 transid;
876 __le64 size;
877 __le64 nbytes;
878 __le64 block_group;
879 __le32 nlink;
880 __le32 uid;
881 __le32 gid;
882 __le32 mode;
883 __le64 rdev;
884 __le64 flags;
885
886 /* modification sequence number for NFS */
887 __le64 sequence;
888
889 /*
890 * a little future expansion, for more than this we can
891 * just grow the inode item and version it
892 */
893 __le64 reserved[4];
894 struct btrfs_timespec atime;
895 struct btrfs_timespec ctime;
896 struct btrfs_timespec mtime;
897 struct btrfs_timespec otime;
898 } __attribute__ ((__packed__));
899
900 struct btrfs_dir_log_item {
901 __le64 end;
902 } __attribute__ ((__packed__));
903
904 struct btrfs_dir_item {
905 struct btrfs_disk_key location;
906 __le64 transid;
907 __le16 data_len;
908 __le16 name_len;
909 __u8 type;
910 } __attribute__ ((__packed__));
911
912 #define BTRFS_ROOT_SUBVOL_RDONLY (1ULL << 0)
913
914 /*
915 * Internal in-memory flag that a subvolume has been marked for deletion but
916 * still visible as a directory
917 */
918 #define BTRFS_ROOT_SUBVOL_DEAD (1ULL << 48)
919
920 struct btrfs_root_item {
921 struct btrfs_inode_item inode;
922 __le64 generation;
923 __le64 root_dirid;
924 __le64 bytenr;
925 __le64 byte_limit;
926 __le64 bytes_used;
927 __le64 last_snapshot;
928 __le64 flags;
929 __le32 refs;
930 struct btrfs_disk_key drop_progress;
931 __u8 drop_level;
932 __u8 level;
933
934 /*
935 * The following fields appear after subvol_uuids+subvol_times
936 * were introduced.
937 */
938
939 /*
940 * This generation number is used to test if the new fields are valid
941 * and up to date while reading the root item. Every time the root item
942 * is written out, the "generation" field is copied into this field. If
943 * anyone ever mounted the fs with an older kernel, we will have
944 * mismatching generation values here and thus must invalidate the
945 * new fields. See btrfs_update_root and btrfs_find_last_root for
946 * details.
947 * the offset of generation_v2 is also used as the start for the memset
948 * when invalidating the fields.
949 */
950 __le64 generation_v2;
951 __u8 uuid[BTRFS_UUID_SIZE];
952 __u8 parent_uuid[BTRFS_UUID_SIZE];
953 __u8 received_uuid[BTRFS_UUID_SIZE];
954 __le64 ctransid; /* updated when an inode changes */
955 __le64 otransid; /* trans when created */
956 __le64 stransid; /* trans when sent. non-zero for received subvol */
957 __le64 rtransid; /* trans when received. non-zero for received subvol */
958 struct btrfs_timespec ctime;
959 struct btrfs_timespec otime;
960 struct btrfs_timespec stime;
961 struct btrfs_timespec rtime;
962 __le64 reserved[8]; /* for future */
963 } __attribute__ ((__packed__));
964
965 /*
966 * Btrfs root item used to be smaller than current size. The old format ends
967 * at where member generation_v2 is.
968 */
btrfs_legacy_root_item_size(void)969 static inline __u32 btrfs_legacy_root_item_size(void)
970 {
971 return offsetof(struct btrfs_root_item, generation_v2);
972 }
973
974 /*
975 * this is used for both forward and backward root refs
976 */
977 struct btrfs_root_ref {
978 __le64 dirid;
979 __le64 sequence;
980 __le16 name_len;
981 } __attribute__ ((__packed__));
982
983 struct btrfs_disk_balance_args {
984 /*
985 * profiles to operate on, single is denoted by
986 * BTRFS_AVAIL_ALLOC_BIT_SINGLE
987 */
988 __le64 profiles;
989
990 /*
991 * usage filter
992 * BTRFS_BALANCE_ARGS_USAGE with a single value means '0..N'
993 * BTRFS_BALANCE_ARGS_USAGE_RANGE - range syntax, min..max
994 */
995 union {
996 __le64 usage;
997 struct {
998 __le32 usage_min;
999 __le32 usage_max;
1000 };
1001 };
1002
1003 /* devid filter */
1004 __le64 devid;
1005
1006 /* devid subset filter [pstart..pend) */
1007 __le64 pstart;
1008 __le64 pend;
1009
1010 /* btrfs virtual address space subset filter [vstart..vend) */
1011 __le64 vstart;
1012 __le64 vend;
1013
1014 /*
1015 * profile to convert to, single is denoted by
1016 * BTRFS_AVAIL_ALLOC_BIT_SINGLE
1017 */
1018 __le64 target;
1019
1020 /* BTRFS_BALANCE_ARGS_* */
1021 __le64 flags;
1022
1023 /*
1024 * BTRFS_BALANCE_ARGS_LIMIT with value 'limit'
1025 * BTRFS_BALANCE_ARGS_LIMIT_RANGE - the extend version can use minimum
1026 * and maximum
1027 */
1028 union {
1029 __le64 limit;
1030 struct {
1031 __le32 limit_min;
1032 __le32 limit_max;
1033 };
1034 };
1035
1036 /*
1037 * Process chunks that cross stripes_min..stripes_max devices,
1038 * BTRFS_BALANCE_ARGS_STRIPES_RANGE
1039 */
1040 __le32 stripes_min;
1041 __le32 stripes_max;
1042
1043 __le64 unused[6];
1044 } __attribute__ ((__packed__));
1045
1046 /*
1047 * store balance parameters to disk so that balance can be properly
1048 * resumed after crash or unmount
1049 */
1050 struct btrfs_balance_item {
1051 /* BTRFS_BALANCE_* */
1052 __le64 flags;
1053
1054 struct btrfs_disk_balance_args data;
1055 struct btrfs_disk_balance_args meta;
1056 struct btrfs_disk_balance_args sys;
1057
1058 __le64 unused[4];
1059 } __attribute__ ((__packed__));
1060
1061 enum {
1062 BTRFS_FILE_EXTENT_INLINE = 0,
1063 BTRFS_FILE_EXTENT_REG = 1,
1064 BTRFS_FILE_EXTENT_PREALLOC = 2,
1065 BTRFS_NR_FILE_EXTENT_TYPES = 3,
1066 };
1067
1068 struct btrfs_file_extent_item {
1069 /*
1070 * transaction id that created this extent
1071 */
1072 __le64 generation;
1073 /*
1074 * max number of bytes to hold this extent in ram
1075 * when we split a compressed extent we can't know how big
1076 * each of the resulting pieces will be. So, this is
1077 * an upper limit on the size of the extent in ram instead of
1078 * an exact limit.
1079 */
1080 __le64 ram_bytes;
1081
1082 /*
1083 * 32 bits for the various ways we might encode the data,
1084 * including compression and encryption. If any of these
1085 * are set to something a given disk format doesn't understand
1086 * it is treated like an incompat flag for reading and writing,
1087 * but not for stat.
1088 */
1089 __u8 compression;
1090 __u8 encryption;
1091 __le16 other_encoding; /* spare for later use */
1092
1093 /* are we inline data or a real extent? */
1094 __u8 type;
1095
1096 /*
1097 * disk space consumed by the extent, checksum blocks are included
1098 * in these numbers
1099 *
1100 * At this offset in the structure, the inline extent data start.
1101 */
1102 __le64 disk_bytenr;
1103 __le64 disk_num_bytes;
1104 /*
1105 * the logical offset in file blocks (no csums)
1106 * this extent record is for. This allows a file extent to point
1107 * into the middle of an existing extent on disk, sharing it
1108 * between two snapshots (useful if some bytes in the middle of the
1109 * extent have changed
1110 */
1111 __le64 offset;
1112 /*
1113 * the logical number of file blocks (no csums included). This
1114 * always reflects the size uncompressed and without encoding.
1115 */
1116 __le64 num_bytes;
1117
1118 } __attribute__ ((__packed__));
1119
1120 struct btrfs_csum_item {
1121 __u8 csum;
1122 } __attribute__ ((__packed__));
1123
1124 struct btrfs_dev_stats_item {
1125 /*
1126 * grow this item struct at the end for future enhancements and keep
1127 * the existing values unchanged
1128 */
1129 __le64 values[BTRFS_DEV_STAT_VALUES_MAX];
1130 } __attribute__ ((__packed__));
1131
1132 #define BTRFS_DEV_REPLACE_ITEM_CONT_READING_FROM_SRCDEV_MODE_ALWAYS 0
1133 #define BTRFS_DEV_REPLACE_ITEM_CONT_READING_FROM_SRCDEV_MODE_AVOID 1
1134
1135 struct btrfs_dev_replace_item {
1136 /*
1137 * grow this item struct at the end for future enhancements and keep
1138 * the existing values unchanged
1139 */
1140 __le64 src_devid;
1141 __le64 cursor_left;
1142 __le64 cursor_right;
1143 __le64 cont_reading_from_srcdev_mode;
1144
1145 __le64 replace_state;
1146 __le64 time_started;
1147 __le64 time_stopped;
1148 __le64 num_write_errors;
1149 __le64 num_uncorrectable_read_errors;
1150 } __attribute__ ((__packed__));
1151
1152 /* different types of block groups (and chunks) */
1153 #define BTRFS_BLOCK_GROUP_DATA (1ULL << 0)
1154 #define BTRFS_BLOCK_GROUP_SYSTEM (1ULL << 1)
1155 #define BTRFS_BLOCK_GROUP_METADATA (1ULL << 2)
1156 #define BTRFS_BLOCK_GROUP_RAID0 (1ULL << 3)
1157 #define BTRFS_BLOCK_GROUP_RAID1 (1ULL << 4)
1158 #define BTRFS_BLOCK_GROUP_DUP (1ULL << 5)
1159 #define BTRFS_BLOCK_GROUP_RAID10 (1ULL << 6)
1160 #define BTRFS_BLOCK_GROUP_RAID5 (1ULL << 7)
1161 #define BTRFS_BLOCK_GROUP_RAID6 (1ULL << 8)
1162 #define BTRFS_BLOCK_GROUP_RAID1C3 (1ULL << 9)
1163 #define BTRFS_BLOCK_GROUP_RAID1C4 (1ULL << 10)
1164 #define BTRFS_BLOCK_GROUP_RESERVED (BTRFS_AVAIL_ALLOC_BIT_SINGLE | \
1165 BTRFS_SPACE_INFO_GLOBAL_RSV)
1166
1167 #define BTRFS_BLOCK_GROUP_TYPE_MASK (BTRFS_BLOCK_GROUP_DATA | \
1168 BTRFS_BLOCK_GROUP_SYSTEM | \
1169 BTRFS_BLOCK_GROUP_METADATA)
1170
1171 #define BTRFS_BLOCK_GROUP_PROFILE_MASK (BTRFS_BLOCK_GROUP_RAID0 | \
1172 BTRFS_BLOCK_GROUP_RAID1 | \
1173 BTRFS_BLOCK_GROUP_RAID1C3 | \
1174 BTRFS_BLOCK_GROUP_RAID1C4 | \
1175 BTRFS_BLOCK_GROUP_RAID5 | \
1176 BTRFS_BLOCK_GROUP_RAID6 | \
1177 BTRFS_BLOCK_GROUP_DUP | \
1178 BTRFS_BLOCK_GROUP_RAID10)
1179 #define BTRFS_BLOCK_GROUP_RAID56_MASK (BTRFS_BLOCK_GROUP_RAID5 | \
1180 BTRFS_BLOCK_GROUP_RAID6)
1181
1182 #define BTRFS_BLOCK_GROUP_RAID1_MASK (BTRFS_BLOCK_GROUP_RAID1 | \
1183 BTRFS_BLOCK_GROUP_RAID1C3 | \
1184 BTRFS_BLOCK_GROUP_RAID1C4)
1185
1186 /*
1187 * We need a bit for restriper to be able to tell when chunks of type
1188 * SINGLE are available. This "extended" profile format is used in
1189 * fs_info->avail_*_alloc_bits (in-memory) and balance item fields
1190 * (on-disk). The corresponding on-disk bit in chunk.type is reserved
1191 * to avoid remappings between two formats in future.
1192 */
1193 #define BTRFS_AVAIL_ALLOC_BIT_SINGLE (1ULL << 48)
1194
1195 /*
1196 * A fake block group type that is used to communicate global block reserve
1197 * size to userspace via the SPACE_INFO ioctl.
1198 */
1199 #define BTRFS_SPACE_INFO_GLOBAL_RSV (1ULL << 49)
1200
1201 #define BTRFS_EXTENDED_PROFILE_MASK (BTRFS_BLOCK_GROUP_PROFILE_MASK | \
1202 BTRFS_AVAIL_ALLOC_BIT_SINGLE)
1203
chunk_to_extended(__u64 flags)1204 static inline __u64 chunk_to_extended(__u64 flags)
1205 {
1206 if ((flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0)
1207 flags |= BTRFS_AVAIL_ALLOC_BIT_SINGLE;
1208
1209 return flags;
1210 }
extended_to_chunk(__u64 flags)1211 static inline __u64 extended_to_chunk(__u64 flags)
1212 {
1213 return flags & ~BTRFS_AVAIL_ALLOC_BIT_SINGLE;
1214 }
1215
1216 struct btrfs_block_group_item {
1217 __le64 used;
1218 __le64 chunk_objectid;
1219 __le64 flags;
1220 } __attribute__ ((__packed__));
1221
1222 struct btrfs_free_space_info {
1223 __le32 extent_count;
1224 __le32 flags;
1225 } __attribute__ ((__packed__));
1226
1227 #define BTRFS_FREE_SPACE_USING_BITMAPS (1ULL << 0)
1228
1229 #define BTRFS_QGROUP_LEVEL_SHIFT 48
btrfs_qgroup_level(__u64 qgroupid)1230 static inline __u16 btrfs_qgroup_level(__u64 qgroupid)
1231 {
1232 return (__u16)(qgroupid >> BTRFS_QGROUP_LEVEL_SHIFT);
1233 }
1234
1235 /*
1236 * is subvolume quota turned on?
1237 */
1238 #define BTRFS_QGROUP_STATUS_FLAG_ON (1ULL << 0)
1239 /*
1240 * RESCAN is set during the initialization phase
1241 */
1242 #define BTRFS_QGROUP_STATUS_FLAG_RESCAN (1ULL << 1)
1243 /*
1244 * Some qgroup entries are known to be out of date,
1245 * either because the configuration has changed in a way that
1246 * makes a rescan necessary, or because the fs has been mounted
1247 * with a non-qgroup-aware version.
1248 * Turning qouta off and on again makes it inconsistent, too.
1249 */
1250 #define BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT (1ULL << 2)
1251
1252 /*
1253 * Whether or not this filesystem is using simple quotas. Not exactly the
1254 * incompat bit, because we support using simple quotas, disabling it, then
1255 * going back to full qgroup quotas.
1256 */
1257 #define BTRFS_QGROUP_STATUS_FLAG_SIMPLE_MODE (1ULL << 3)
1258
1259 #define BTRFS_QGROUP_STATUS_FLAGS_MASK (BTRFS_QGROUP_STATUS_FLAG_ON | \
1260 BTRFS_QGROUP_STATUS_FLAG_RESCAN | \
1261 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT | \
1262 BTRFS_QGROUP_STATUS_FLAG_SIMPLE_MODE)
1263
1264 #define BTRFS_QGROUP_STATUS_VERSION 1
1265
1266 struct btrfs_qgroup_status_item {
1267 __le64 version;
1268 /*
1269 * the generation is updated during every commit. As older
1270 * versions of btrfs are not aware of qgroups, it will be
1271 * possible to detect inconsistencies by checking the
1272 * generation on mount time
1273 */
1274 __le64 generation;
1275
1276 /* flag definitions see above */
1277 __le64 flags;
1278
1279 /*
1280 * only used during scanning to record the progress
1281 * of the scan. It contains a logical address
1282 */
1283 __le64 rescan;
1284
1285 /*
1286 * The generation when quotas were last enabled. Used by simple quotas to
1287 * avoid decrementing when freeing an extent that was written before
1288 * enable.
1289 *
1290 * Set only if flags contain BTRFS_QGROUP_STATUS_FLAG_SIMPLE_MODE.
1291 */
1292 __le64 enable_gen;
1293 } __attribute__ ((__packed__));
1294
1295 struct btrfs_qgroup_info_item {
1296 __le64 generation;
1297 __le64 rfer;
1298 __le64 rfer_cmpr;
1299 __le64 excl;
1300 __le64 excl_cmpr;
1301 } __attribute__ ((__packed__));
1302
1303 struct btrfs_qgroup_limit_item {
1304 /*
1305 * only updated when any of the other values change
1306 */
1307 __le64 flags;
1308 __le64 max_rfer;
1309 __le64 max_excl;
1310 __le64 rsv_rfer;
1311 __le64 rsv_excl;
1312 } __attribute__ ((__packed__));
1313
1314 struct btrfs_verity_descriptor_item {
1315 /* Size of the verity descriptor in bytes */
1316 __le64 size;
1317 /*
1318 * When we implement support for fscrypt, we will need to encrypt the
1319 * Merkle tree for encrypted verity files. These 128 bits are for the
1320 * eventual storage of an fscrypt initialization vector.
1321 */
1322 __le64 reserved[2];
1323 __u8 encryption;
1324 } __attribute__ ((__packed__));
1325
1326 #endif /* _BTRFS_CTREE_H_ */
1327