1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or https://opensource.org/licenses/CDDL-1.0. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2016 by Delphix. All rights reserved. 24 * Copyright (c) 2023, Klara Inc. 25 */ 26 27 #ifndef _SYS_DDT_H 28 #define _SYS_DDT_H 29 30 #include <sys/sysmacros.h> 31 #include <sys/types.h> 32 #include <sys/fs/zfs.h> 33 #include <sys/zio.h> 34 #include <sys/dmu.h> 35 36 #ifdef __cplusplus 37 extern "C" { 38 #endif 39 40 struct abd; 41 42 /* 43 * DDT on-disk storage object types. Each one corresponds to specific 44 * implementation, see ddt_ops_t. The value itself is not stored on disk. 45 * 46 * When searching for an entry, objects types will be searched in this order. 47 * 48 * Note that DDT_TYPES is used as the "no type" for new entries that have not 49 * yet been written to a storage object. 50 */ 51 typedef enum { 52 DDT_TYPE_ZAP = 0, /* ZAP storage object, ddt_zap */ 53 DDT_TYPES 54 } ddt_type_t; 55 56 _Static_assert(DDT_TYPES <= UINT8_MAX, 57 "ddt_type_t must fit in a uint8_t"); 58 59 /* New and updated entries recieve this type, see ddt_sync_entry() */ 60 #define DDT_TYPE_DEFAULT (DDT_TYPE_ZAP) 61 62 /* 63 * DDT storage classes. Each class has a separate storage object for each type. 64 * The value itself is not stored on disk. 65 * 66 * When search for an entry, object classes will be searched in this order. 67 * 68 * Note that DDT_CLASSES is used as the "no class" for new entries that have not 69 * yet been written to a storage object. 70 */ 71 typedef enum { 72 DDT_CLASS_DITTO = 0, /* entry has ditto blocks (obsolete) */ 73 DDT_CLASS_DUPLICATE, /* entry has multiple references */ 74 DDT_CLASS_UNIQUE, /* entry has a single reference */ 75 DDT_CLASSES 76 } ddt_class_t; 77 78 _Static_assert(DDT_CLASSES < UINT8_MAX, 79 "ddt_class_t must fit in a uint8_t"); 80 81 /* 82 * The "key" part of an on-disk entry. This is the unique "name" for a block, 83 * that is, that parts of the block pointer that will always be the same for 84 * the same data. 85 */ 86 typedef struct { 87 zio_cksum_t ddk_cksum; /* 256-bit block checksum */ 88 /* 89 * Encoded with logical & physical size, encryption, and compression, 90 * as follows: 91 * +-------+-------+-------+-------+-------+-------+-------+-------+ 92 * | 0 | 0 | 0 |X| comp| PSIZE | LSIZE | 93 * +-------+-------+-------+-------+-------+-------+-------+-------+ 94 */ 95 uint64_t ddk_prop; 96 } ddt_key_t; 97 98 /* 99 * Macros for accessing parts of a ddt_key_t. These are similar to their BP_* 100 * counterparts. 101 */ 102 #define DDK_GET_LSIZE(ddk) \ 103 BF64_GET_SB((ddk)->ddk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1) 104 #define DDK_SET_LSIZE(ddk, x) \ 105 BF64_SET_SB((ddk)->ddk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1, x) 106 107 #define DDK_GET_PSIZE(ddk) \ 108 BF64_GET_SB((ddk)->ddk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1) 109 #define DDK_SET_PSIZE(ddk, x) \ 110 BF64_SET_SB((ddk)->ddk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1, x) 111 112 #define DDK_GET_COMPRESS(ddk) BF64_GET((ddk)->ddk_prop, 32, 7) 113 #define DDK_SET_COMPRESS(ddk, x) BF64_SET((ddk)->ddk_prop, 32, 7, x) 114 115 #define DDK_GET_CRYPT(ddk) BF64_GET((ddk)->ddk_prop, 39, 1) 116 #define DDK_SET_CRYPT(ddk, x) BF64_SET((ddk)->ddk_prop, 39, 1, x) 117 118 /* 119 * The "value" part for an on-disk entry. These are the "physical" 120 * characteristics of the stored block, such as its location on disk (DVAs), 121 * birth txg and ref count. 122 * 123 * Note that an entry has an array of four ddt_phys_t, one for each number of 124 * DVAs (copies= property) and another for additional "ditto" copies. Most 125 * users of ddt_phys_t will handle indexing into or counting the phys they 126 * want. 127 */ 128 typedef struct { 129 dva_t ddp_dva[SPA_DVAS_PER_BP]; 130 uint64_t ddp_refcnt; 131 uint64_t ddp_phys_birth; 132 } ddt_phys_t; 133 134 /* 135 * Named indexes into the ddt_phys_t array in each entry. 136 * 137 * Note, we no longer generate new DDT_PHYS_DITTO-type blocks. However, 138 * we maintain the ability to free existing dedup-ditto blocks. 139 */ 140 enum ddt_phys_type { 141 DDT_PHYS_DITTO = 0, 142 DDT_PHYS_SINGLE = 1, 143 DDT_PHYS_DOUBLE = 2, 144 DDT_PHYS_TRIPLE = 3, 145 DDT_PHYS_TYPES 146 }; 147 148 /* 149 * A "live" entry, holding changes to an entry made this txg, and other data to 150 * support loading, updating and repairing the entry. 151 */ 152 153 /* State flags for dde_flags */ 154 #define DDE_FLAG_LOADED (1 << 0) /* entry ready for use */ 155 156 typedef struct { 157 /* key must be first for ddt_key_compare */ 158 ddt_key_t dde_key; /* ddt_tree key */ 159 ddt_phys_t dde_phys[DDT_PHYS_TYPES]; /* on-disk data */ 160 161 /* in-flight update IOs */ 162 zio_t *dde_lead_zio[DDT_PHYS_TYPES]; 163 164 /* copy of data after a repair read, to be rewritten */ 165 struct abd *dde_repair_abd; 166 167 /* storage type and class the entry was loaded from */ 168 ddt_type_t dde_type; 169 ddt_class_t dde_class; 170 171 uint8_t dde_flags; /* load state flags */ 172 kcondvar_t dde_cv; /* signaled when load completes */ 173 174 avl_node_t dde_node; /* ddt_tree node */ 175 } ddt_entry_t; 176 177 /* 178 * In-core DDT object. This covers all entries and stats for a the whole pool 179 * for a given checksum type. 180 */ 181 typedef struct { 182 kmutex_t ddt_lock; /* protects changes to all fields */ 183 184 avl_tree_t ddt_tree; /* "live" (changed) entries this txg */ 185 186 avl_tree_t ddt_repair_tree; /* entries being repaired */ 187 188 enum zio_checksum ddt_checksum; /* checksum algorithm in use */ 189 spa_t *ddt_spa; /* pool this ddt is on */ 190 objset_t *ddt_os; /* ddt objset (always MOS) */ 191 192 /* per-type/per-class entry store objects */ 193 uint64_t ddt_object[DDT_TYPES][DDT_CLASSES]; 194 195 /* object ids for whole-ddt and per-type/per-class stats */ 196 uint64_t ddt_stat_object; 197 ddt_object_t ddt_object_stats[DDT_TYPES][DDT_CLASSES]; 198 199 /* type/class stats by power-2-sized referenced blocks */ 200 ddt_histogram_t ddt_histogram[DDT_TYPES][DDT_CLASSES]; 201 ddt_histogram_t ddt_histogram_cache[DDT_TYPES][DDT_CLASSES]; 202 } ddt_t; 203 204 /* 205 * In-core and on-disk bookmark for DDT walks. This is a cursor for ddt_walk(), 206 * and is stable across calls, even if the DDT is updated, the pool is 207 * restarted or loaded on another system, or OpenZFS is upgraded. 208 */ 209 typedef struct { 210 uint64_t ddb_class; 211 uint64_t ddb_type; 212 uint64_t ddb_checksum; 213 uint64_t ddb_cursor; 214 } ddt_bookmark_t; 215 216 extern void ddt_bp_fill(const ddt_phys_t *ddp, blkptr_t *bp, 217 uint64_t txg); 218 extern void ddt_bp_create(enum zio_checksum checksum, const ddt_key_t *ddk, 219 const ddt_phys_t *ddp, blkptr_t *bp); 220 221 extern void ddt_phys_fill(ddt_phys_t *ddp, const blkptr_t *bp); 222 extern void ddt_phys_clear(ddt_phys_t *ddp); 223 extern void ddt_phys_addref(ddt_phys_t *ddp); 224 extern void ddt_phys_decref(ddt_phys_t *ddp); 225 extern ddt_phys_t *ddt_phys_select(const ddt_entry_t *dde, const blkptr_t *bp); 226 227 extern void ddt_histogram_add(ddt_histogram_t *dst, const ddt_histogram_t *src); 228 extern void ddt_histogram_stat(ddt_stat_t *dds, const ddt_histogram_t *ddh); 229 extern boolean_t ddt_histogram_empty(const ddt_histogram_t *ddh); 230 extern void ddt_get_dedup_object_stats(spa_t *spa, ddt_object_t *ddo); 231 extern void ddt_get_dedup_histogram(spa_t *spa, ddt_histogram_t *ddh); 232 extern void ddt_get_dedup_stats(spa_t *spa, ddt_stat_t *dds_total); 233 234 extern uint64_t ddt_get_dedup_dspace(spa_t *spa); 235 extern uint64_t ddt_get_pool_dedup_ratio(spa_t *spa); 236 237 extern ddt_t *ddt_select(spa_t *spa, const blkptr_t *bp); 238 extern void ddt_enter(ddt_t *ddt); 239 extern void ddt_exit(ddt_t *ddt); 240 extern void ddt_init(void); 241 extern void ddt_fini(void); 242 extern ddt_entry_t *ddt_lookup(ddt_t *ddt, const blkptr_t *bp, boolean_t add); 243 extern void ddt_prefetch(spa_t *spa, const blkptr_t *bp); 244 extern void ddt_remove(ddt_t *ddt, ddt_entry_t *dde); 245 246 extern boolean_t ddt_class_contains(spa_t *spa, ddt_class_t max_class, 247 const blkptr_t *bp); 248 249 extern ddt_entry_t *ddt_repair_start(ddt_t *ddt, const blkptr_t *bp); 250 extern void ddt_repair_done(ddt_t *ddt, ddt_entry_t *dde); 251 252 extern int ddt_key_compare(const void *x1, const void *x2); 253 254 extern void ddt_create(spa_t *spa); 255 extern int ddt_load(spa_t *spa); 256 extern void ddt_unload(spa_t *spa); 257 extern void ddt_sync(spa_t *spa, uint64_t txg); 258 extern int ddt_walk(spa_t *spa, ddt_bookmark_t *ddb, ddt_entry_t *dde); 259 260 extern boolean_t ddt_addref(spa_t *spa, const blkptr_t *bp); 261 262 #ifdef __cplusplus 263 } 264 #endif 265 266 #endif /* _SYS_DDT_H */ 267