1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2012, 2016 by Delphix. All rights reserved. 24 * Copyright (c) 2013 by Saso Kiselkov. All rights reserved. 25 * Copyright (c) 2019, Allan Jude 26 * Copyright (c) 2019, Klara Inc. 27 */ 28 29 #ifndef _SYS_ARC_H 30 #define _SYS_ARC_H 31 32 #include <sys/zfs_context.h> 33 34 #ifdef __cplusplus 35 extern "C" { 36 #endif 37 38 #include <sys/zio.h> 39 #include <sys/dmu.h> 40 #include <sys/spa.h> 41 #include <sys/zfs_refcount.h> 42 43 /* 44 * Used by arc_flush() to inform arc_evict_state() that it should evict 45 * all available buffers from the arc state being passed in. 46 */ 47 #define ARC_EVICT_ALL -1ULL 48 49 #define HDR_SET_LSIZE(hdr, x) do { \ 50 ASSERT(IS_P2ALIGNED(x, 1U << SPA_MINBLOCKSHIFT)); \ 51 (hdr)->b_lsize = ((x) >> SPA_MINBLOCKSHIFT); \ 52 _NOTE(CONSTCOND) } while (0) 53 54 #define HDR_SET_PSIZE(hdr, x) do { \ 55 ASSERT(IS_P2ALIGNED((x), 1U << SPA_MINBLOCKSHIFT)); \ 56 (hdr)->b_psize = ((x) >> SPA_MINBLOCKSHIFT); \ 57 _NOTE(CONSTCOND) } while (0) 58 59 #define HDR_GET_LSIZE(hdr) ((hdr)->b_lsize << SPA_MINBLOCKSHIFT) 60 #define HDR_GET_PSIZE(hdr) ((hdr)->b_psize << SPA_MINBLOCKSHIFT) 61 62 typedef struct arc_buf_hdr arc_buf_hdr_t; 63 typedef struct arc_buf arc_buf_t; 64 typedef struct arc_prune arc_prune_t; 65 66 /* 67 * Because the ARC can store encrypted data, errors (not due to bugs) may arise 68 * while transforming data into its desired format - specifically, when 69 * decrypting, the key may not be present, or the HMAC may not be correct 70 * which signifies deliberate tampering with the on-disk state 71 * (assuming that the checksum was correct). If any error occurs, the "buf" 72 * parameter will be NULL. 73 */ 74 typedef void arc_read_done_func_t(zio_t *zio, const zbookmark_phys_t *zb, 75 const blkptr_t *bp, arc_buf_t *buf, void *priv); 76 typedef void arc_write_done_func_t(zio_t *zio, arc_buf_t *buf, void *priv); 77 typedef void arc_prune_func_t(int64_t bytes, void *priv); 78 79 /* Shared module parameters */ 80 extern int zfs_arc_average_blocksize; 81 82 /* generic arc_done_func_t's which you can use */ 83 arc_read_done_func_t arc_bcopy_func; 84 arc_read_done_func_t arc_getbuf_func; 85 86 /* generic arc_prune_func_t wrapper for callbacks */ 87 struct arc_prune { 88 arc_prune_func_t *p_pfunc; 89 void *p_private; 90 uint64_t p_adjust; 91 list_node_t p_node; 92 zfs_refcount_t p_refcnt; 93 }; 94 95 typedef enum arc_strategy { 96 ARC_STRATEGY_META_ONLY = 0, /* Evict only meta data buffers */ 97 ARC_STRATEGY_META_BALANCED = 1, /* Evict data buffers if needed */ 98 } arc_strategy_t; 99 100 typedef enum arc_flags 101 { 102 /* 103 * Public flags that can be passed into the ARC by external consumers. 104 */ 105 ARC_FLAG_WAIT = 1 << 0, /* perform sync I/O */ 106 ARC_FLAG_NOWAIT = 1 << 1, /* perform async I/O */ 107 ARC_FLAG_PREFETCH = 1 << 2, /* I/O is a prefetch */ 108 ARC_FLAG_CACHED = 1 << 3, /* I/O was in cache */ 109 ARC_FLAG_L2CACHE = 1 << 4, /* cache in L2ARC */ 110 ARC_FLAG_PREDICTIVE_PREFETCH = 1 << 5, /* I/O from zfetch */ 111 ARC_FLAG_PRESCIENT_PREFETCH = 1 << 6, /* long min lifespan */ 112 113 /* 114 * Private ARC flags. These flags are private ARC only flags that 115 * will show up in b_flags in the arc_hdr_buf_t. These flags should 116 * only be set by ARC code. 117 */ 118 ARC_FLAG_IN_HASH_TABLE = 1 << 7, /* buffer is hashed */ 119 ARC_FLAG_IO_IN_PROGRESS = 1 << 8, /* I/O in progress */ 120 ARC_FLAG_IO_ERROR = 1 << 9, /* I/O failed for buf */ 121 ARC_FLAG_INDIRECT = 1 << 10, /* indirect block */ 122 /* Indicates that block was read with ASYNC priority. */ 123 ARC_FLAG_PRIO_ASYNC_READ = 1 << 11, 124 ARC_FLAG_L2_WRITING = 1 << 12, /* write in progress */ 125 ARC_FLAG_L2_EVICTED = 1 << 13, /* evicted during I/O */ 126 ARC_FLAG_L2_WRITE_HEAD = 1 << 14, /* head of write list */ 127 /* 128 * Encrypted or authenticated on disk (may be plaintext in memory). 129 * This header has b_crypt_hdr allocated. Does not include indirect 130 * blocks with checksums of MACs which will also have their X 131 * (encrypted) bit set in the bp. 132 */ 133 ARC_FLAG_PROTECTED = 1 << 15, 134 /* data has not been authenticated yet */ 135 ARC_FLAG_NOAUTH = 1 << 16, 136 /* indicates that the buffer contains metadata (otherwise, data) */ 137 ARC_FLAG_BUFC_METADATA = 1 << 17, 138 139 /* Flags specifying whether optional hdr struct fields are defined */ 140 ARC_FLAG_HAS_L1HDR = 1 << 18, 141 ARC_FLAG_HAS_L2HDR = 1 << 19, 142 143 /* 144 * Indicates the arc_buf_hdr_t's b_pdata matches the on-disk data. 145 * This allows the l2arc to use the blkptr's checksum to verify 146 * the data without having to store the checksum in the hdr. 147 */ 148 ARC_FLAG_COMPRESSED_ARC = 1 << 20, 149 ARC_FLAG_SHARED_DATA = 1 << 21, 150 151 /* 152 * Fail this arc_read() (with ENOENT) if the data is not already present 153 * in cache. 154 */ 155 ARC_FLAG_CACHED_ONLY = 1 << 22, 156 157 /* 158 * The arc buffer's compression mode is stored in the top 7 bits of the 159 * flags field, so these dummy flags are included so that MDB can 160 * interpret the enum properly. 161 */ 162 ARC_FLAG_COMPRESS_0 = 1 << 24, 163 ARC_FLAG_COMPRESS_1 = 1 << 25, 164 ARC_FLAG_COMPRESS_2 = 1 << 26, 165 ARC_FLAG_COMPRESS_3 = 1 << 27, 166 ARC_FLAG_COMPRESS_4 = 1 << 28, 167 ARC_FLAG_COMPRESS_5 = 1 << 29, 168 ARC_FLAG_COMPRESS_6 = 1 << 30 169 170 } arc_flags_t; 171 172 typedef enum arc_buf_flags { 173 ARC_BUF_FLAG_SHARED = 1 << 0, 174 ARC_BUF_FLAG_COMPRESSED = 1 << 1, 175 /* 176 * indicates whether this arc_buf_t is encrypted, regardless of 177 * state on-disk 178 */ 179 ARC_BUF_FLAG_ENCRYPTED = 1 << 2 180 } arc_buf_flags_t; 181 182 struct arc_buf { 183 arc_buf_hdr_t *b_hdr; 184 arc_buf_t *b_next; 185 kmutex_t b_evict_lock; 186 void *b_data; 187 arc_buf_flags_t b_flags; 188 }; 189 190 typedef enum arc_buf_contents { 191 ARC_BUFC_INVALID, /* invalid type */ 192 ARC_BUFC_DATA, /* buffer contains data */ 193 ARC_BUFC_METADATA, /* buffer contains metadata */ 194 ARC_BUFC_NUMTYPES 195 } arc_buf_contents_t; 196 197 /* 198 * The following breakdowns of arc_size exist for kstat only. 199 */ 200 typedef enum arc_space_type { 201 ARC_SPACE_DATA, 202 ARC_SPACE_META, 203 ARC_SPACE_HDRS, 204 ARC_SPACE_L2HDRS, 205 ARC_SPACE_DBUF, 206 ARC_SPACE_DNODE, 207 ARC_SPACE_BONUS, 208 ARC_SPACE_ABD_CHUNK_WASTE, 209 ARC_SPACE_NUMTYPES 210 } arc_space_type_t; 211 212 typedef enum arc_state_type { 213 ARC_STATE_ANON, 214 ARC_STATE_MRU, 215 ARC_STATE_MRU_GHOST, 216 ARC_STATE_MFU, 217 ARC_STATE_MFU_GHOST, 218 ARC_STATE_L2C_ONLY, 219 ARC_STATE_NUMTYPES 220 } arc_state_type_t; 221 222 typedef struct arc_buf_info { 223 arc_state_type_t abi_state_type; 224 arc_buf_contents_t abi_state_contents; 225 uint32_t abi_flags; 226 uint32_t abi_bufcnt; 227 uint64_t abi_size; 228 uint64_t abi_spa; 229 uint64_t abi_access; 230 uint32_t abi_mru_hits; 231 uint32_t abi_mru_ghost_hits; 232 uint32_t abi_mfu_hits; 233 uint32_t abi_mfu_ghost_hits; 234 uint32_t abi_l2arc_hits; 235 uint32_t abi_holds; 236 uint64_t abi_l2arc_dattr; 237 uint64_t abi_l2arc_asize; 238 enum zio_compress abi_l2arc_compress; 239 } arc_buf_info_t; 240 241 void arc_space_consume(uint64_t space, arc_space_type_t type); 242 void arc_space_return(uint64_t space, arc_space_type_t type); 243 boolean_t arc_is_metadata(arc_buf_t *buf); 244 boolean_t arc_is_encrypted(arc_buf_t *buf); 245 boolean_t arc_is_unauthenticated(arc_buf_t *buf); 246 enum zio_compress arc_get_compression(arc_buf_t *buf); 247 void arc_get_raw_params(arc_buf_t *buf, boolean_t *byteorder, uint8_t *salt, 248 uint8_t *iv, uint8_t *mac); 249 int arc_untransform(arc_buf_t *buf, spa_t *spa, const zbookmark_phys_t *zb, 250 boolean_t in_place); 251 void arc_convert_to_raw(arc_buf_t *buf, uint64_t dsobj, boolean_t byteorder, 252 dmu_object_type_t ot, const uint8_t *salt, const uint8_t *iv, 253 const uint8_t *mac); 254 arc_buf_t *arc_alloc_buf(spa_t *spa, void *tag, arc_buf_contents_t type, 255 int32_t size); 256 arc_buf_t *arc_alloc_compressed_buf(spa_t *spa, void *tag, 257 uint64_t psize, uint64_t lsize, enum zio_compress compression_type, 258 uint8_t complevel); 259 arc_buf_t *arc_alloc_raw_buf(spa_t *spa, void *tag, uint64_t dsobj, 260 boolean_t byteorder, const uint8_t *salt, const uint8_t *iv, 261 const uint8_t *mac, dmu_object_type_t ot, uint64_t psize, uint64_t lsize, 262 enum zio_compress compression_type, uint8_t complevel); 263 uint8_t arc_get_complevel(arc_buf_t *buf); 264 arc_buf_t *arc_loan_buf(spa_t *spa, boolean_t is_metadata, int size); 265 arc_buf_t *arc_loan_compressed_buf(spa_t *spa, uint64_t psize, uint64_t lsize, 266 enum zio_compress compression_type, uint8_t complevel); 267 arc_buf_t *arc_loan_raw_buf(spa_t *spa, uint64_t dsobj, boolean_t byteorder, 268 const uint8_t *salt, const uint8_t *iv, const uint8_t *mac, 269 dmu_object_type_t ot, uint64_t psize, uint64_t lsize, 270 enum zio_compress compression_type, uint8_t complevel); 271 void arc_return_buf(arc_buf_t *buf, void *tag); 272 void arc_loan_inuse_buf(arc_buf_t *buf, void *tag); 273 void arc_buf_destroy(arc_buf_t *buf, void *tag); 274 void arc_buf_info(arc_buf_t *buf, arc_buf_info_t *abi, int state_index); 275 uint64_t arc_buf_size(arc_buf_t *buf); 276 uint64_t arc_buf_lsize(arc_buf_t *buf); 277 void arc_buf_access(arc_buf_t *buf); 278 void arc_release(arc_buf_t *buf, void *tag); 279 int arc_released(arc_buf_t *buf); 280 void arc_buf_sigsegv(int sig, siginfo_t *si, void *unused); 281 void arc_buf_freeze(arc_buf_t *buf); 282 void arc_buf_thaw(arc_buf_t *buf); 283 #ifdef ZFS_DEBUG 284 int arc_referenced(arc_buf_t *buf); 285 #endif 286 287 int arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, 288 arc_read_done_func_t *done, void *priv, zio_priority_t priority, 289 int flags, arc_flags_t *arc_flags, const zbookmark_phys_t *zb); 290 zio_t *arc_write(zio_t *pio, spa_t *spa, uint64_t txg, 291 blkptr_t *bp, arc_buf_t *buf, boolean_t l2arc, const zio_prop_t *zp, 292 arc_write_done_func_t *ready, arc_write_done_func_t *child_ready, 293 arc_write_done_func_t *physdone, arc_write_done_func_t *done, 294 void *priv, zio_priority_t priority, int zio_flags, 295 const zbookmark_phys_t *zb); 296 297 arc_prune_t *arc_add_prune_callback(arc_prune_func_t *func, void *priv); 298 void arc_remove_prune_callback(arc_prune_t *p); 299 void arc_freed(spa_t *spa, const blkptr_t *bp); 300 301 void arc_flush(spa_t *spa, boolean_t retry); 302 void arc_tempreserve_clear(uint64_t reserve); 303 int arc_tempreserve_space(spa_t *spa, uint64_t reserve, uint64_t txg); 304 305 uint64_t arc_all_memory(void); 306 uint64_t arc_default_max(uint64_t min, uint64_t allmem); 307 uint64_t arc_target_bytes(void); 308 void arc_init(void); 309 void arc_fini(void); 310 311 /* 312 * Level 2 ARC 313 */ 314 315 void l2arc_add_vdev(spa_t *spa, vdev_t *vd); 316 void l2arc_remove_vdev(vdev_t *vd); 317 boolean_t l2arc_vdev_present(vdev_t *vd); 318 void l2arc_rebuild_vdev(vdev_t *vd, boolean_t reopen); 319 boolean_t l2arc_range_check_overlap(uint64_t bottom, uint64_t top, 320 uint64_t check); 321 void l2arc_init(void); 322 void l2arc_fini(void); 323 void l2arc_start(void); 324 void l2arc_stop(void); 325 void l2arc_spa_rebuild_start(spa_t *spa); 326 327 #ifndef _KERNEL 328 extern boolean_t arc_watch; 329 #endif 330 331 #ifdef __cplusplus 332 } 333 #endif 334 335 #endif /* _SYS_ARC_H */ 336