xref: /freebsd/sys/contrib/openzfs/include/sys/ddt.h (revision a91a2465)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or https://opensource.org/licenses/CDDL-1.0.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright (c) 2016 by Delphix. All rights reserved.
24  * Copyright (c) 2023, Klara Inc.
25  */
26 
27 #ifndef _SYS_DDT_H
28 #define	_SYS_DDT_H
29 
30 #include <sys/sysmacros.h>
31 #include <sys/types.h>
32 #include <sys/fs/zfs.h>
33 #include <sys/zio.h>
34 #include <sys/dmu.h>
35 
36 #ifdef	__cplusplus
37 extern "C" {
38 #endif
39 
40 struct abd;
41 
42 /*
43  * DDT on-disk storage object types. Each one corresponds to specific
44  * implementation, see ddt_ops_t. The value itself is not stored on disk.
45  *
46  * When searching for an entry, objects types will be searched in this order.
47  *
48  * Note that DDT_TYPES is used as the "no type" for new entries that have not
49  * yet been written to a storage object.
50  */
51 typedef enum {
52 	DDT_TYPE_ZAP = 0,	/* ZAP storage object, ddt_zap */
53 	DDT_TYPES
54 } ddt_type_t;
55 
56 _Static_assert(DDT_TYPES <= UINT8_MAX,
57 	"ddt_type_t must fit in a uint8_t");
58 
59 /* New and updated entries recieve this type, see ddt_sync_entry() */
60 #define	DDT_TYPE_DEFAULT	(DDT_TYPE_ZAP)
61 
62 /*
63  * DDT storage classes. Each class has a separate storage object for each type.
64  * The value itself is not stored on disk.
65  *
66  * When search for an entry, object classes will be searched in this order.
67  *
68  * Note that DDT_CLASSES is used as the "no class" for new entries that have not
69  * yet been written to a storage object.
70  */
71 typedef enum {
72 	DDT_CLASS_DITTO = 0,	/* entry has ditto blocks (obsolete) */
73 	DDT_CLASS_DUPLICATE,	/* entry has multiple references */
74 	DDT_CLASS_UNIQUE,	/* entry has a single reference */
75 	DDT_CLASSES
76 } ddt_class_t;
77 
78 _Static_assert(DDT_CLASSES < UINT8_MAX,
79 	"ddt_class_t must fit in a uint8_t");
80 
81 /*
82  * The "key" part of an on-disk entry. This is the unique "name" for a block,
83  * that is, that parts of the block pointer that will always be the same for
84  * the same data.
85  */
86 typedef struct {
87 	zio_cksum_t	ddk_cksum;	/* 256-bit block checksum */
88 	/*
89 	 * Encoded with logical & physical size, encryption, and compression,
90 	 * as follows:
91 	 *   +-------+-------+-------+-------+-------+-------+-------+-------+
92 	 *   |   0   |   0   |   0   |X| comp|     PSIZE     |     LSIZE     |
93 	 *   +-------+-------+-------+-------+-------+-------+-------+-------+
94 	 */
95 	uint64_t	ddk_prop;
96 } ddt_key_t;
97 
98 /*
99  * Macros for accessing parts of a ddt_key_t. These are similar to their BP_*
100  * counterparts.
101  */
102 #define	DDK_GET_LSIZE(ddk)	\
103 	BF64_GET_SB((ddk)->ddk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1)
104 #define	DDK_SET_LSIZE(ddk, x)	\
105 	BF64_SET_SB((ddk)->ddk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1, x)
106 
107 #define	DDK_GET_PSIZE(ddk)	\
108 	BF64_GET_SB((ddk)->ddk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1)
109 #define	DDK_SET_PSIZE(ddk, x)	\
110 	BF64_SET_SB((ddk)->ddk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1, x)
111 
112 #define	DDK_GET_COMPRESS(ddk)		BF64_GET((ddk)->ddk_prop, 32, 7)
113 #define	DDK_SET_COMPRESS(ddk, x)	BF64_SET((ddk)->ddk_prop, 32, 7, x)
114 
115 #define	DDK_GET_CRYPT(ddk)		BF64_GET((ddk)->ddk_prop, 39, 1)
116 #define	DDK_SET_CRYPT(ddk, x)	BF64_SET((ddk)->ddk_prop, 39, 1, x)
117 
118 /*
119  * The "value" part for an on-disk entry. These are the "physical"
120  * characteristics of the stored block, such as its location on disk (DVAs),
121  * birth txg and ref count.
122  *
123  * Note that an entry has an array of four ddt_phys_t, one for each number of
124  * DVAs (copies= property) and another for additional "ditto" copies. Most
125  * users of ddt_phys_t will handle indexing into or counting the phys they
126  * want.
127  */
128 typedef struct {
129 	dva_t		ddp_dva[SPA_DVAS_PER_BP];
130 	uint64_t	ddp_refcnt;
131 	uint64_t	ddp_phys_birth;
132 } ddt_phys_t;
133 
134 /*
135  * Named indexes into the ddt_phys_t array in each entry.
136  *
137  * Note, we no longer generate new DDT_PHYS_DITTO-type blocks.  However,
138  * we maintain the ability to free existing dedup-ditto blocks.
139  */
140 enum ddt_phys_type {
141 	DDT_PHYS_DITTO = 0,
142 	DDT_PHYS_SINGLE = 1,
143 	DDT_PHYS_DOUBLE = 2,
144 	DDT_PHYS_TRIPLE = 3,
145 	DDT_PHYS_TYPES
146 };
147 
148 /*
149  * A "live" entry, holding changes to an entry made this txg, and other data to
150  * support loading, updating and repairing the entry.
151  */
152 
153 /* State flags for dde_flags */
154 #define	DDE_FLAG_LOADED (1 << 0)	/* entry ready for use */
155 
156 typedef struct {
157 	/* key must be first for ddt_key_compare */
158 	ddt_key_t	dde_key;			/* ddt_tree key */
159 	ddt_phys_t	dde_phys[DDT_PHYS_TYPES];	/* on-disk data */
160 
161 	/* in-flight update IOs */
162 	zio_t		*dde_lead_zio[DDT_PHYS_TYPES];
163 
164 	/* copy of data after a repair read, to be rewritten */
165 	struct abd	*dde_repair_abd;
166 
167 	/* storage type and class the entry was loaded from */
168 	ddt_type_t	dde_type;
169 	ddt_class_t	dde_class;
170 
171 	uint8_t		dde_flags;	/* load state flags */
172 	kcondvar_t	dde_cv;		/* signaled when load completes */
173 
174 	avl_node_t	dde_node;	/* ddt_tree node */
175 } ddt_entry_t;
176 
177 /*
178  * In-core DDT object. This covers all entries and stats for a the whole pool
179  * for a given checksum type.
180  */
181 typedef struct {
182 	kmutex_t	ddt_lock;	/* protects changes to all fields */
183 
184 	avl_tree_t	ddt_tree;	/* "live" (changed) entries this txg */
185 
186 	avl_tree_t	ddt_repair_tree;	/* entries being repaired */
187 
188 	enum zio_checksum ddt_checksum;		/* checksum algorithm in use */
189 	spa_t		*ddt_spa;		/* pool this ddt is on */
190 	objset_t	*ddt_os;		/* ddt objset (always MOS) */
191 
192 	/* per-type/per-class entry store objects */
193 	uint64_t	ddt_object[DDT_TYPES][DDT_CLASSES];
194 
195 	/* object ids for whole-ddt and per-type/per-class stats */
196 	uint64_t	ddt_stat_object;
197 	ddt_object_t	ddt_object_stats[DDT_TYPES][DDT_CLASSES];
198 
199 	/* type/class stats by power-2-sized referenced blocks */
200 	ddt_histogram_t	ddt_histogram[DDT_TYPES][DDT_CLASSES];
201 	ddt_histogram_t	ddt_histogram_cache[DDT_TYPES][DDT_CLASSES];
202 } ddt_t;
203 
204 /*
205  * In-core and on-disk bookmark for DDT walks. This is a cursor for ddt_walk(),
206  * and is stable across calls, even if the DDT is updated, the pool is
207  * restarted or loaded on another system, or OpenZFS is upgraded.
208  */
209 typedef struct {
210 	uint64_t	ddb_class;
211 	uint64_t	ddb_type;
212 	uint64_t	ddb_checksum;
213 	uint64_t	ddb_cursor;
214 } ddt_bookmark_t;
215 
216 extern void ddt_bp_fill(const ddt_phys_t *ddp, blkptr_t *bp,
217     uint64_t txg);
218 extern void ddt_bp_create(enum zio_checksum checksum, const ddt_key_t *ddk,
219     const ddt_phys_t *ddp, blkptr_t *bp);
220 
221 extern void ddt_phys_fill(ddt_phys_t *ddp, const blkptr_t *bp);
222 extern void ddt_phys_clear(ddt_phys_t *ddp);
223 extern void ddt_phys_addref(ddt_phys_t *ddp);
224 extern void ddt_phys_decref(ddt_phys_t *ddp);
225 extern ddt_phys_t *ddt_phys_select(const ddt_entry_t *dde, const blkptr_t *bp);
226 
227 extern void ddt_histogram_add(ddt_histogram_t *dst, const ddt_histogram_t *src);
228 extern void ddt_histogram_stat(ddt_stat_t *dds, const ddt_histogram_t *ddh);
229 extern boolean_t ddt_histogram_empty(const ddt_histogram_t *ddh);
230 extern void ddt_get_dedup_object_stats(spa_t *spa, ddt_object_t *ddo);
231 extern void ddt_get_dedup_histogram(spa_t *spa, ddt_histogram_t *ddh);
232 extern void ddt_get_dedup_stats(spa_t *spa, ddt_stat_t *dds_total);
233 
234 extern uint64_t ddt_get_dedup_dspace(spa_t *spa);
235 extern uint64_t ddt_get_pool_dedup_ratio(spa_t *spa);
236 
237 extern ddt_t *ddt_select(spa_t *spa, const blkptr_t *bp);
238 extern void ddt_enter(ddt_t *ddt);
239 extern void ddt_exit(ddt_t *ddt);
240 extern void ddt_init(void);
241 extern void ddt_fini(void);
242 extern ddt_entry_t *ddt_lookup(ddt_t *ddt, const blkptr_t *bp, boolean_t add);
243 extern void ddt_prefetch(spa_t *spa, const blkptr_t *bp);
244 extern void ddt_remove(ddt_t *ddt, ddt_entry_t *dde);
245 
246 extern boolean_t ddt_class_contains(spa_t *spa, ddt_class_t max_class,
247     const blkptr_t *bp);
248 
249 extern ddt_entry_t *ddt_repair_start(ddt_t *ddt, const blkptr_t *bp);
250 extern void ddt_repair_done(ddt_t *ddt, ddt_entry_t *dde);
251 
252 extern int ddt_key_compare(const void *x1, const void *x2);
253 
254 extern void ddt_create(spa_t *spa);
255 extern int ddt_load(spa_t *spa);
256 extern void ddt_unload(spa_t *spa);
257 extern void ddt_sync(spa_t *spa, uint64_t txg);
258 extern int ddt_walk(spa_t *spa, ddt_bookmark_t *ddb, ddt_entry_t *dde);
259 
260 extern boolean_t ddt_addref(spa_t *spa, const blkptr_t *bp);
261 
262 #ifdef	__cplusplus
263 }
264 #endif
265 
266 #endif	/* _SYS_DDT_H */
267