1eda14cbcSMatt Macy /* 2eda14cbcSMatt Macy * CDDL HEADER START 3eda14cbcSMatt Macy * 4eda14cbcSMatt Macy * The contents of this file are subject to the terms of the 5eda14cbcSMatt Macy * Common Development and Distribution License (the "License"). 6eda14cbcSMatt Macy * You may not use this file except in compliance with the License. 7eda14cbcSMatt Macy * 8eda14cbcSMatt Macy * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9271171e0SMartin Matuska * or https://opensource.org/licenses/CDDL-1.0. 10eda14cbcSMatt Macy * See the License for the specific language governing permissions 11eda14cbcSMatt Macy * and limitations under the License. 12eda14cbcSMatt Macy * 13eda14cbcSMatt Macy * When distributing Covered Code, include this CDDL HEADER in each 14eda14cbcSMatt Macy * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15eda14cbcSMatt Macy * If applicable, add the following below this CDDL HEADER, with the 16eda14cbcSMatt Macy * fields enclosed by brackets "[]" replaced with your own identifying 17eda14cbcSMatt Macy * information: Portions Copyright [yyyy] [name of copyright owner] 18eda14cbcSMatt Macy * 19eda14cbcSMatt Macy * CDDL HEADER END 20eda14cbcSMatt Macy */ 21eda14cbcSMatt Macy 22eda14cbcSMatt Macy /* 23eda14cbcSMatt Macy * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 24eda14cbcSMatt Macy * Copyright (c) 2012, 2018 by Delphix. All rights reserved. 25eda14cbcSMatt Macy * Copyright 2017 Nexenta Systems, Inc. 26eda14cbcSMatt Macy */ 27eda14cbcSMatt Macy 28eda14cbcSMatt Macy #ifndef _SYS_ZAP_H 29eda14cbcSMatt Macy #define _SYS_ZAP_H 30eda14cbcSMatt Macy 31eda14cbcSMatt Macy /* 32eda14cbcSMatt Macy * ZAP - ZFS Attribute Processor 33eda14cbcSMatt Macy * 34eda14cbcSMatt Macy * The ZAP is a module which sits on top of the DMU (Data Management 35eda14cbcSMatt Macy * Unit) and implements a higher-level storage primitive using DMU 36eda14cbcSMatt Macy * objects. Its primary consumer is the ZPL (ZFS Posix Layer). 37eda14cbcSMatt Macy * 38eda14cbcSMatt Macy * A "zapobj" is a DMU object which the ZAP uses to stores attributes. 39eda14cbcSMatt Macy * Users should use only zap routines to access a zapobj - they should 40eda14cbcSMatt Macy * not access the DMU object directly using DMU routines. 41eda14cbcSMatt Macy * 42eda14cbcSMatt Macy * The attributes stored in a zapobj are name-value pairs. The name is 43eda14cbcSMatt Macy * a zero-terminated string of up to ZAP_MAXNAMELEN bytes (including 44eda14cbcSMatt Macy * terminating NULL). The value is an array of integers, which may be 45eda14cbcSMatt Macy * 1, 2, 4, or 8 bytes long. The total space used by the array (number 46eda14cbcSMatt Macy * of integers * integer length) can be up to ZAP_MAXVALUELEN bytes. 47eda14cbcSMatt Macy * Note that an 8-byte integer value can be used to store the location 48eda14cbcSMatt Macy * (object number) of another dmu object (which may be itself a zapobj). 49eda14cbcSMatt Macy * Note that you can use a zero-length attribute to store a single bit 50eda14cbcSMatt Macy * of information - the attribute is present or not. 51eda14cbcSMatt Macy * 52eda14cbcSMatt Macy * The ZAP routines are thread-safe. However, you must observe the 53eda14cbcSMatt Macy * DMU's restriction that a transaction may not be operated on 54eda14cbcSMatt Macy * concurrently. 55eda14cbcSMatt Macy * 56eda14cbcSMatt Macy * Any of the routines that return an int may return an I/O error (EIO 57eda14cbcSMatt Macy * or ECHECKSUM). 58eda14cbcSMatt Macy * 59eda14cbcSMatt Macy * 60eda14cbcSMatt Macy * Implementation / Performance Notes: 61eda14cbcSMatt Macy * 62eda14cbcSMatt Macy * The ZAP is intended to operate most efficiently on attributes with 63eda14cbcSMatt Macy * short (49 bytes or less) names and single 8-byte values, for which 64eda14cbcSMatt Macy * the microzap will be used. The ZAP should be efficient enough so 65eda14cbcSMatt Macy * that the user does not need to cache these attributes. 66eda14cbcSMatt Macy * 67eda14cbcSMatt Macy * The ZAP's locking scheme makes its routines thread-safe. Operations 68eda14cbcSMatt Macy * on different zapobjs will be processed concurrently. Operations on 69eda14cbcSMatt Macy * the same zapobj which only read data will be processed concurrently. 70eda14cbcSMatt Macy * Operations on the same zapobj which modify data will be processed 71eda14cbcSMatt Macy * concurrently when there are many attributes in the zapobj (because 72eda14cbcSMatt Macy * the ZAP uses per-block locking - more than 128 * (number of cpus) 73eda14cbcSMatt Macy * small attributes will suffice). 74eda14cbcSMatt Macy */ 75eda14cbcSMatt Macy 76eda14cbcSMatt Macy /* 77eda14cbcSMatt Macy * We're using zero-terminated byte strings (ie. ASCII or UTF-8 C 78eda14cbcSMatt Macy * strings) for the names of attributes, rather than a byte string 79eda14cbcSMatt Macy * bounded by an explicit length. If some day we want to support names 80eda14cbcSMatt Macy * in character sets which have embedded zeros (eg. UTF-16, UTF-32), 81eda14cbcSMatt Macy * we'll have to add routines for using length-bounded strings. 82eda14cbcSMatt Macy */ 83eda14cbcSMatt Macy 84eda14cbcSMatt Macy #include <sys/dmu.h> 85eda14cbcSMatt Macy 86eda14cbcSMatt Macy #ifdef __cplusplus 87eda14cbcSMatt Macy extern "C" { 88eda14cbcSMatt Macy #endif 89eda14cbcSMatt Macy 90eda14cbcSMatt Macy /* 91eda14cbcSMatt Macy * Specifies matching criteria for ZAP lookups. 92eda14cbcSMatt Macy * MT_NORMALIZE Use ZAP normalization flags, which can include both 93eda14cbcSMatt Macy * unicode normalization and case-insensitivity. 94eda14cbcSMatt Macy * MT_MATCH_CASE Do case-sensitive lookups even if MT_NORMALIZE is 95eda14cbcSMatt Macy * specified and ZAP normalization flags include 96eda14cbcSMatt Macy * U8_TEXTPREP_TOUPPER. 97eda14cbcSMatt Macy */ 98eda14cbcSMatt Macy typedef enum matchtype { 99eda14cbcSMatt Macy MT_NORMALIZE = 1 << 0, 100eda14cbcSMatt Macy MT_MATCH_CASE = 1 << 1, 101eda14cbcSMatt Macy } matchtype_t; 102eda14cbcSMatt Macy 103eda14cbcSMatt Macy typedef enum zap_flags { 104eda14cbcSMatt Macy /* Use 64-bit hash value (serialized cursors will always use 64-bits) */ 105eda14cbcSMatt Macy ZAP_FLAG_HASH64 = 1 << 0, 106eda14cbcSMatt Macy /* Key is binary, not string (zap_add_uint64() can be used) */ 107eda14cbcSMatt Macy ZAP_FLAG_UINT64_KEY = 1 << 1, 108eda14cbcSMatt Macy /* 109eda14cbcSMatt Macy * First word of key (which must be an array of uint64) is 110eda14cbcSMatt Macy * already randomly distributed. 111eda14cbcSMatt Macy */ 112eda14cbcSMatt Macy ZAP_FLAG_PRE_HASHED_KEY = 1 << 2, 113e3aa18adSMartin Matuska #if defined(__linux__) && defined(_KERNEL) 114e3aa18adSMartin Matuska } zfs_zap_flags_t; 115e3aa18adSMartin Matuska #define zap_flags_t zfs_zap_flags_t 116e3aa18adSMartin Matuska #else 117eda14cbcSMatt Macy } zap_flags_t; 118e3aa18adSMartin Matuska #endif 119eda14cbcSMatt Macy 120eda14cbcSMatt Macy /* 121eda14cbcSMatt Macy * Create a new zapobj with no attributes and return its object number. 122eda14cbcSMatt Macy */ 123eda14cbcSMatt Macy uint64_t zap_create(objset_t *ds, dmu_object_type_t ot, 124eda14cbcSMatt Macy dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx); 125eda14cbcSMatt Macy uint64_t zap_create_dnsize(objset_t *ds, dmu_object_type_t ot, 126eda14cbcSMatt Macy dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx); 127eda14cbcSMatt Macy uint64_t zap_create_norm(objset_t *ds, int normflags, dmu_object_type_t ot, 128eda14cbcSMatt Macy dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx); 129eda14cbcSMatt Macy uint64_t zap_create_norm_dnsize(objset_t *ds, int normflags, 130eda14cbcSMatt Macy dmu_object_type_t ot, dmu_object_type_t bonustype, int bonuslen, 131eda14cbcSMatt Macy int dnodesize, dmu_tx_t *tx); 132eda14cbcSMatt Macy uint64_t zap_create_flags(objset_t *os, int normflags, zap_flags_t flags, 133eda14cbcSMatt Macy dmu_object_type_t ot, int leaf_blockshift, int indirect_blockshift, 134eda14cbcSMatt Macy dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx); 135eda14cbcSMatt Macy uint64_t zap_create_flags_dnsize(objset_t *os, int normflags, 136eda14cbcSMatt Macy zap_flags_t flags, dmu_object_type_t ot, int leaf_blockshift, 137eda14cbcSMatt Macy int indirect_blockshift, dmu_object_type_t bonustype, int bonuslen, 138eda14cbcSMatt Macy int dnodesize, dmu_tx_t *tx); 139eda14cbcSMatt Macy uint64_t zap_create_hold(objset_t *os, int normflags, zap_flags_t flags, 140eda14cbcSMatt Macy dmu_object_type_t ot, int leaf_blockshift, int indirect_blockshift, 141eda14cbcSMatt Macy dmu_object_type_t bonustype, int bonuslen, int dnodesize, 142a0b956f5SMartin Matuska dnode_t **allocated_dnode, const void *tag, dmu_tx_t *tx); 143eda14cbcSMatt Macy 144eda14cbcSMatt Macy uint64_t zap_create_link(objset_t *os, dmu_object_type_t ot, 145eda14cbcSMatt Macy uint64_t parent_obj, const char *name, dmu_tx_t *tx); 146eda14cbcSMatt Macy uint64_t zap_create_link_dnsize(objset_t *os, dmu_object_type_t ot, 147eda14cbcSMatt Macy uint64_t parent_obj, const char *name, int dnodesize, dmu_tx_t *tx); 148eda14cbcSMatt Macy 149eda14cbcSMatt Macy /* 150eda14cbcSMatt Macy * Initialize an already-allocated object. 151eda14cbcSMatt Macy */ 152eda14cbcSMatt Macy void mzap_create_impl(dnode_t *dn, int normflags, zap_flags_t flags, 153eda14cbcSMatt Macy dmu_tx_t *tx); 154eda14cbcSMatt Macy 155eda14cbcSMatt Macy /* 156eda14cbcSMatt Macy * Create a new zapobj with no attributes from the given (unallocated) 157eda14cbcSMatt Macy * object number. 158eda14cbcSMatt Macy */ 159eda14cbcSMatt Macy int zap_create_claim(objset_t *ds, uint64_t obj, dmu_object_type_t ot, 160eda14cbcSMatt Macy dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx); 161eda14cbcSMatt Macy int zap_create_claim_dnsize(objset_t *ds, uint64_t obj, dmu_object_type_t ot, 162eda14cbcSMatt Macy dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx); 163eda14cbcSMatt Macy int zap_create_claim_norm(objset_t *ds, uint64_t obj, 164eda14cbcSMatt Macy int normflags, dmu_object_type_t ot, 165eda14cbcSMatt Macy dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx); 166eda14cbcSMatt Macy int zap_create_claim_norm_dnsize(objset_t *ds, uint64_t obj, 167eda14cbcSMatt Macy int normflags, dmu_object_type_t ot, 168eda14cbcSMatt Macy dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx); 169eda14cbcSMatt Macy 170eda14cbcSMatt Macy /* 171eda14cbcSMatt Macy * The zapobj passed in must be a valid ZAP object for all of the 172eda14cbcSMatt Macy * following routines. 173eda14cbcSMatt Macy */ 174eda14cbcSMatt Macy 175eda14cbcSMatt Macy /* 176eda14cbcSMatt Macy * Destroy this zapobj and all its attributes. 177eda14cbcSMatt Macy * 178eda14cbcSMatt Macy * Frees the object number using dmu_object_free. 179eda14cbcSMatt Macy */ 180eda14cbcSMatt Macy int zap_destroy(objset_t *ds, uint64_t zapobj, dmu_tx_t *tx); 181eda14cbcSMatt Macy 182eda14cbcSMatt Macy /* 183eda14cbcSMatt Macy * Manipulate attributes. 184eda14cbcSMatt Macy * 185eda14cbcSMatt Macy * 'integer_size' is in bytes, and must be 1, 2, 4, or 8. 186eda14cbcSMatt Macy */ 187eda14cbcSMatt Macy 188eda14cbcSMatt Macy /* 189eda14cbcSMatt Macy * Retrieve the contents of the attribute with the given name. 190eda14cbcSMatt Macy * 191eda14cbcSMatt Macy * If the requested attribute does not exist, the call will fail and 192eda14cbcSMatt Macy * return ENOENT. 193eda14cbcSMatt Macy * 194eda14cbcSMatt Macy * If 'integer_size' is smaller than the attribute's integer size, the 195eda14cbcSMatt Macy * call will fail and return EINVAL. 196eda14cbcSMatt Macy * 197eda14cbcSMatt Macy * If 'integer_size' is equal to or larger than the attribute's integer 198eda14cbcSMatt Macy * size, the call will succeed and return 0. 199eda14cbcSMatt Macy * 200eda14cbcSMatt Macy * When converting to a larger integer size, the integers will be treated as 201eda14cbcSMatt Macy * unsigned (ie. no sign-extension will be performed). 202eda14cbcSMatt Macy * 203eda14cbcSMatt Macy * 'num_integers' is the length (in integers) of 'buf'. 204eda14cbcSMatt Macy * 205eda14cbcSMatt Macy * If the attribute is longer than the buffer, as many integers as will 206eda14cbcSMatt Macy * fit will be transferred to 'buf'. If the entire attribute was not 207eda14cbcSMatt Macy * transferred, the call will return EOVERFLOW. 208eda14cbcSMatt Macy */ 209eda14cbcSMatt Macy int zap_lookup(objset_t *ds, uint64_t zapobj, const char *name, 210eda14cbcSMatt Macy uint64_t integer_size, uint64_t num_integers, void *buf); 211eda14cbcSMatt Macy 212eda14cbcSMatt Macy /* 213eda14cbcSMatt Macy * If rn_len is nonzero, realname will be set to the name of the found 214eda14cbcSMatt Macy * entry (which may be different from the requested name if matchtype is 215eda14cbcSMatt Macy * not MT_EXACT). 216eda14cbcSMatt Macy * 217eda14cbcSMatt Macy * If normalization_conflictp is not NULL, it will be set if there is 218eda14cbcSMatt Macy * another name with the same case/unicode normalized form. 219eda14cbcSMatt Macy */ 220eda14cbcSMatt Macy int zap_lookup_norm(objset_t *ds, uint64_t zapobj, const char *name, 221eda14cbcSMatt Macy uint64_t integer_size, uint64_t num_integers, void *buf, 222eda14cbcSMatt Macy matchtype_t mt, char *realname, int rn_len, 223eda14cbcSMatt Macy boolean_t *normalization_conflictp); 224eda14cbcSMatt Macy int zap_lookup_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, 225eda14cbcSMatt Macy int key_numints, uint64_t integer_size, uint64_t num_integers, void *buf); 226eda14cbcSMatt Macy int zap_contains(objset_t *ds, uint64_t zapobj, const char *name); 227eda14cbcSMatt Macy int zap_prefetch(objset_t *os, uint64_t zapobj, const char *name); 228eda14cbcSMatt Macy int zap_prefetch_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, 229eda14cbcSMatt Macy int key_numints); 230eda14cbcSMatt Macy 231eda14cbcSMatt Macy int zap_lookup_by_dnode(dnode_t *dn, const char *name, 232eda14cbcSMatt Macy uint64_t integer_size, uint64_t num_integers, void *buf); 233eda14cbcSMatt Macy int zap_lookup_norm_by_dnode(dnode_t *dn, const char *name, 234eda14cbcSMatt Macy uint64_t integer_size, uint64_t num_integers, void *buf, 235eda14cbcSMatt Macy matchtype_t mt, char *realname, int rn_len, 236eda14cbcSMatt Macy boolean_t *ncp); 237eda14cbcSMatt Macy 238eda14cbcSMatt Macy int zap_count_write_by_dnode(dnode_t *dn, const char *name, 239eda14cbcSMatt Macy int add, zfs_refcount_t *towrite, zfs_refcount_t *tooverwrite); 240eda14cbcSMatt Macy 241eda14cbcSMatt Macy /* 242eda14cbcSMatt Macy * Create an attribute with the given name and value. 243eda14cbcSMatt Macy * 244eda14cbcSMatt Macy * If an attribute with the given name already exists, the call will 245eda14cbcSMatt Macy * fail and return EEXIST. 246eda14cbcSMatt Macy */ 247eda14cbcSMatt Macy int zap_add(objset_t *ds, uint64_t zapobj, const char *key, 248eda14cbcSMatt Macy int integer_size, uint64_t num_integers, 249eda14cbcSMatt Macy const void *val, dmu_tx_t *tx); 250eda14cbcSMatt Macy int zap_add_by_dnode(dnode_t *dn, const char *key, 251eda14cbcSMatt Macy int integer_size, uint64_t num_integers, 252eda14cbcSMatt Macy const void *val, dmu_tx_t *tx); 253eda14cbcSMatt Macy int zap_add_uint64(objset_t *ds, uint64_t zapobj, const uint64_t *key, 254eda14cbcSMatt Macy int key_numints, int integer_size, uint64_t num_integers, 255eda14cbcSMatt Macy const void *val, dmu_tx_t *tx); 256*783d3ff6SMartin Matuska int zap_add_uint64_by_dnode(dnode_t *dn, const uint64_t *key, 257*783d3ff6SMartin Matuska int key_numints, int integer_size, uint64_t num_integers, 258*783d3ff6SMartin Matuska const void *val, dmu_tx_t *tx); 259eda14cbcSMatt Macy 260eda14cbcSMatt Macy /* 261eda14cbcSMatt Macy * Set the attribute with the given name to the given value. If an 262eda14cbcSMatt Macy * attribute with the given name does not exist, it will be created. If 263eda14cbcSMatt Macy * an attribute with the given name already exists, the previous value 264eda14cbcSMatt Macy * will be overwritten. The integer_size may be different from the 265eda14cbcSMatt Macy * existing attribute's integer size, in which case the attribute's 266eda14cbcSMatt Macy * integer size will be updated to the new value. 267eda14cbcSMatt Macy */ 268eda14cbcSMatt Macy int zap_update(objset_t *ds, uint64_t zapobj, const char *name, 269eda14cbcSMatt Macy int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx); 270eda14cbcSMatt Macy int zap_update_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, 271eda14cbcSMatt Macy int key_numints, 272eda14cbcSMatt Macy int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx); 273*783d3ff6SMartin Matuska int zap_update_uint64_by_dnode(dnode_t *dn, const uint64_t *key, 274*783d3ff6SMartin Matuska int key_numints, 275*783d3ff6SMartin Matuska int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx); 276eda14cbcSMatt Macy 277eda14cbcSMatt Macy /* 278eda14cbcSMatt Macy * Get the length (in integers) and the integer size of the specified 279eda14cbcSMatt Macy * attribute. 280eda14cbcSMatt Macy * 281eda14cbcSMatt Macy * If the requested attribute does not exist, the call will fail and 282eda14cbcSMatt Macy * return ENOENT. 283eda14cbcSMatt Macy */ 284eda14cbcSMatt Macy int zap_length(objset_t *ds, uint64_t zapobj, const char *name, 285eda14cbcSMatt Macy uint64_t *integer_size, uint64_t *num_integers); 286eda14cbcSMatt Macy int zap_length_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, 287eda14cbcSMatt Macy int key_numints, uint64_t *integer_size, uint64_t *num_integers); 288eda14cbcSMatt Macy 289eda14cbcSMatt Macy /* 290eda14cbcSMatt Macy * Remove the specified attribute. 291eda14cbcSMatt Macy * 292eda14cbcSMatt Macy * If the specified attribute does not exist, the call will fail and 293eda14cbcSMatt Macy * return ENOENT. 294eda14cbcSMatt Macy */ 295eda14cbcSMatt Macy int zap_remove(objset_t *ds, uint64_t zapobj, const char *name, dmu_tx_t *tx); 296eda14cbcSMatt Macy int zap_remove_norm(objset_t *ds, uint64_t zapobj, const char *name, 297eda14cbcSMatt Macy matchtype_t mt, dmu_tx_t *tx); 298eda14cbcSMatt Macy int zap_remove_by_dnode(dnode_t *dn, const char *name, dmu_tx_t *tx); 299eda14cbcSMatt Macy int zap_remove_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, 300eda14cbcSMatt Macy int key_numints, dmu_tx_t *tx); 301*783d3ff6SMartin Matuska int zap_remove_uint64_by_dnode(dnode_t *dn, const uint64_t *key, 302*783d3ff6SMartin Matuska int key_numints, dmu_tx_t *tx); 303eda14cbcSMatt Macy 304eda14cbcSMatt Macy /* 305eda14cbcSMatt Macy * Returns (in *count) the number of attributes in the specified zap 306eda14cbcSMatt Macy * object. 307eda14cbcSMatt Macy */ 308eda14cbcSMatt Macy int zap_count(objset_t *ds, uint64_t zapobj, uint64_t *count); 309eda14cbcSMatt Macy 310eda14cbcSMatt Macy /* 311eda14cbcSMatt Macy * Returns (in name) the name of the entry whose (value & mask) 312eda14cbcSMatt Macy * (za_first_integer) is value, or ENOENT if not found. The string 313eda14cbcSMatt Macy * pointed to by name must be at least 256 bytes long. If mask==0, the 314eda14cbcSMatt Macy * match must be exact (ie, same as mask=-1ULL). 315eda14cbcSMatt Macy */ 316eda14cbcSMatt Macy int zap_value_search(objset_t *os, uint64_t zapobj, 317eda14cbcSMatt Macy uint64_t value, uint64_t mask, char *name); 318eda14cbcSMatt Macy 319eda14cbcSMatt Macy /* 320eda14cbcSMatt Macy * Transfer all the entries from fromobj into intoobj. Only works on 321eda14cbcSMatt Macy * int_size=8 num_integers=1 values. Fails if there are any duplicated 322eda14cbcSMatt Macy * entries. 323eda14cbcSMatt Macy */ 324eda14cbcSMatt Macy int zap_join(objset_t *os, uint64_t fromobj, uint64_t intoobj, dmu_tx_t *tx); 325eda14cbcSMatt Macy 326eda14cbcSMatt Macy /* Same as zap_join, but set the values to 'value'. */ 327eda14cbcSMatt Macy int zap_join_key(objset_t *os, uint64_t fromobj, uint64_t intoobj, 328eda14cbcSMatt Macy uint64_t value, dmu_tx_t *tx); 329eda14cbcSMatt Macy 330eda14cbcSMatt Macy /* Same as zap_join, but add together any duplicated entries. */ 331eda14cbcSMatt Macy int zap_join_increment(objset_t *os, uint64_t fromobj, uint64_t intoobj, 332eda14cbcSMatt Macy dmu_tx_t *tx); 333eda14cbcSMatt Macy 334eda14cbcSMatt Macy /* 335eda14cbcSMatt Macy * Manipulate entries where the name + value are the "same" (the name is 336eda14cbcSMatt Macy * a stringified version of the value). 337eda14cbcSMatt Macy */ 338eda14cbcSMatt Macy int zap_add_int(objset_t *os, uint64_t obj, uint64_t value, dmu_tx_t *tx); 339eda14cbcSMatt Macy int zap_remove_int(objset_t *os, uint64_t obj, uint64_t value, dmu_tx_t *tx); 340eda14cbcSMatt Macy int zap_lookup_int(objset_t *os, uint64_t obj, uint64_t value); 341eda14cbcSMatt Macy int zap_increment_int(objset_t *os, uint64_t obj, uint64_t key, int64_t delta, 342eda14cbcSMatt Macy dmu_tx_t *tx); 343eda14cbcSMatt Macy 344eda14cbcSMatt Macy /* Here the key is an int and the value is a different int. */ 345eda14cbcSMatt Macy int zap_add_int_key(objset_t *os, uint64_t obj, 346eda14cbcSMatt Macy uint64_t key, uint64_t value, dmu_tx_t *tx); 347eda14cbcSMatt Macy int zap_update_int_key(objset_t *os, uint64_t obj, 348eda14cbcSMatt Macy uint64_t key, uint64_t value, dmu_tx_t *tx); 349eda14cbcSMatt Macy int zap_lookup_int_key(objset_t *os, uint64_t obj, 350eda14cbcSMatt Macy uint64_t key, uint64_t *valuep); 351eda14cbcSMatt Macy 352eda14cbcSMatt Macy int zap_increment(objset_t *os, uint64_t obj, const char *name, int64_t delta, 353eda14cbcSMatt Macy dmu_tx_t *tx); 354eda14cbcSMatt Macy 355eda14cbcSMatt Macy struct zap; 356eda14cbcSMatt Macy struct zap_leaf; 357eda14cbcSMatt Macy typedef struct zap_cursor { 358eda14cbcSMatt Macy /* This structure is opaque! */ 359eda14cbcSMatt Macy objset_t *zc_objset; 360eda14cbcSMatt Macy struct zap *zc_zap; 361eda14cbcSMatt Macy struct zap_leaf *zc_leaf; 362eda14cbcSMatt Macy uint64_t zc_zapobj; 363eda14cbcSMatt Macy uint64_t zc_serialized; 364eda14cbcSMatt Macy uint64_t zc_hash; 365eda14cbcSMatt Macy uint32_t zc_cd; 366eda14cbcSMatt Macy boolean_t zc_prefetch; 367eda14cbcSMatt Macy } zap_cursor_t; 368eda14cbcSMatt Macy 369eda14cbcSMatt Macy typedef struct { 370eda14cbcSMatt Macy int za_integer_length; 371eda14cbcSMatt Macy /* 372eda14cbcSMatt Macy * za_normalization_conflict will be set if there are additional 373eda14cbcSMatt Macy * entries with this normalized form (eg, "foo" and "Foo"). 374eda14cbcSMatt Macy */ 375eda14cbcSMatt Macy boolean_t za_normalization_conflict; 376eda14cbcSMatt Macy uint64_t za_num_integers; 377eda14cbcSMatt Macy uint64_t za_first_integer; /* no sign extension for <8byte ints */ 378eda14cbcSMatt Macy char za_name[ZAP_MAXNAMELEN]; 379eda14cbcSMatt Macy } zap_attribute_t; 380eda14cbcSMatt Macy 381eda14cbcSMatt Macy /* 382eda14cbcSMatt Macy * The interface for listing all the attributes of a zapobj can be 383eda14cbcSMatt Macy * thought of as cursor moving down a list of the attributes one by 384eda14cbcSMatt Macy * one. The cookie returned by the zap_cursor_serialize routine is 385eda14cbcSMatt Macy * persistent across system calls (and across reboot, even). 386eda14cbcSMatt Macy */ 387eda14cbcSMatt Macy 388eda14cbcSMatt Macy /* 389eda14cbcSMatt Macy * Initialize a zap cursor, pointing to the "first" attribute of the 390eda14cbcSMatt Macy * zapobj. You must _fini the cursor when you are done with it. 391eda14cbcSMatt Macy */ 392eda14cbcSMatt Macy void zap_cursor_init(zap_cursor_t *zc, objset_t *os, uint64_t zapobj); 393eda14cbcSMatt Macy void zap_cursor_init_noprefetch(zap_cursor_t *zc, objset_t *os, 394eda14cbcSMatt Macy uint64_t zapobj); 395eda14cbcSMatt Macy void zap_cursor_fini(zap_cursor_t *zc); 396eda14cbcSMatt Macy 397eda14cbcSMatt Macy /* 398eda14cbcSMatt Macy * Get the attribute currently pointed to by the cursor. Returns 399eda14cbcSMatt Macy * ENOENT if at the end of the attributes. 400eda14cbcSMatt Macy */ 401eda14cbcSMatt Macy int zap_cursor_retrieve(zap_cursor_t *zc, zap_attribute_t *za); 402eda14cbcSMatt Macy 403eda14cbcSMatt Macy /* 404eda14cbcSMatt Macy * Advance the cursor to the next attribute. 405eda14cbcSMatt Macy */ 406eda14cbcSMatt Macy void zap_cursor_advance(zap_cursor_t *zc); 407eda14cbcSMatt Macy 408eda14cbcSMatt Macy /* 409eda14cbcSMatt Macy * Get a persistent cookie pointing to the current position of the zap 410eda14cbcSMatt Macy * cursor. The low 4 bits in the cookie are always zero, and thus can 411eda14cbcSMatt Macy * be used as to differentiate a serialized cookie from a different type 412eda14cbcSMatt Macy * of value. The cookie will be less than 2^32 as long as there are 413eda14cbcSMatt Macy * fewer than 2^22 (4.2 million) entries in the zap object. 414eda14cbcSMatt Macy */ 415eda14cbcSMatt Macy uint64_t zap_cursor_serialize(zap_cursor_t *zc); 416eda14cbcSMatt Macy 417eda14cbcSMatt Macy /* 418eda14cbcSMatt Macy * Initialize a zap cursor pointing to the position recorded by 419eda14cbcSMatt Macy * zap_cursor_serialize (in the "serialized" argument). You can also 420eda14cbcSMatt Macy * use a "serialized" argument of 0 to start at the beginning of the 421eda14cbcSMatt Macy * zapobj (ie. zap_cursor_init_serialized(..., 0) is equivalent to 422eda14cbcSMatt Macy * zap_cursor_init(...).) 423eda14cbcSMatt Macy */ 424eda14cbcSMatt Macy void zap_cursor_init_serialized(zap_cursor_t *zc, objset_t *ds, 425eda14cbcSMatt Macy uint64_t zapobj, uint64_t serialized); 426eda14cbcSMatt Macy 427eda14cbcSMatt Macy 428eda14cbcSMatt Macy #define ZAP_HISTOGRAM_SIZE 10 429eda14cbcSMatt Macy 430eda14cbcSMatt Macy typedef struct zap_stats { 431eda14cbcSMatt Macy /* 432eda14cbcSMatt Macy * Size of the pointer table (in number of entries). 433eda14cbcSMatt Macy * This is always a power of 2, or zero if it's a microzap. 434eda14cbcSMatt Macy * In general, it should be considerably greater than zs_num_leafs. 435eda14cbcSMatt Macy */ 436eda14cbcSMatt Macy uint64_t zs_ptrtbl_len; 437eda14cbcSMatt Macy 438eda14cbcSMatt Macy uint64_t zs_blocksize; /* size of zap blocks */ 439eda14cbcSMatt Macy 440eda14cbcSMatt Macy /* 441eda14cbcSMatt Macy * The number of blocks used. Note that some blocks may be 442eda14cbcSMatt Macy * wasted because old ptrtbl's and large name/value blocks are 443eda14cbcSMatt Macy * not reused. (Although their space is reclaimed, we don't 444eda14cbcSMatt Macy * reuse those offsets in the object.) 445eda14cbcSMatt Macy */ 446eda14cbcSMatt Macy uint64_t zs_num_blocks; 447eda14cbcSMatt Macy 448eda14cbcSMatt Macy /* 449eda14cbcSMatt Macy * Pointer table values from zap_ptrtbl in the zap_phys_t 450eda14cbcSMatt Macy */ 451eda14cbcSMatt Macy uint64_t zs_ptrtbl_nextblk; /* next (larger) copy start block */ 452eda14cbcSMatt Macy uint64_t zs_ptrtbl_blks_copied; /* number source blocks copied */ 453eda14cbcSMatt Macy uint64_t zs_ptrtbl_zt_blk; /* starting block number */ 454eda14cbcSMatt Macy uint64_t zs_ptrtbl_zt_numblks; /* number of blocks */ 455eda14cbcSMatt Macy uint64_t zs_ptrtbl_zt_shift; /* bits to index it */ 456eda14cbcSMatt Macy 457eda14cbcSMatt Macy /* 458eda14cbcSMatt Macy * Values of the other members of the zap_phys_t 459eda14cbcSMatt Macy */ 460eda14cbcSMatt Macy uint64_t zs_block_type; /* ZBT_HEADER */ 461eda14cbcSMatt Macy uint64_t zs_magic; /* ZAP_MAGIC */ 462eda14cbcSMatt Macy uint64_t zs_num_leafs; /* The number of leaf blocks */ 463eda14cbcSMatt Macy uint64_t zs_num_entries; /* The number of zap entries */ 464eda14cbcSMatt Macy uint64_t zs_salt; /* salt to stir into hash function */ 465eda14cbcSMatt Macy 466eda14cbcSMatt Macy /* 467eda14cbcSMatt Macy * Histograms. For all histograms, the last index 468eda14cbcSMatt Macy * (ZAP_HISTOGRAM_SIZE-1) includes any values which are greater 469eda14cbcSMatt Macy * than what can be represented. For example 470eda14cbcSMatt Macy * zs_leafs_with_n5_entries[ZAP_HISTOGRAM_SIZE-1] is the number 471eda14cbcSMatt Macy * of leafs with more than 45 entries. 472eda14cbcSMatt Macy */ 473eda14cbcSMatt Macy 474eda14cbcSMatt Macy /* 475eda14cbcSMatt Macy * zs_leafs_with_n_pointers[n] is the number of leafs with 476eda14cbcSMatt Macy * 2^n pointers to it. 477eda14cbcSMatt Macy */ 478eda14cbcSMatt Macy uint64_t zs_leafs_with_2n_pointers[ZAP_HISTOGRAM_SIZE]; 479eda14cbcSMatt Macy 480eda14cbcSMatt Macy /* 481eda14cbcSMatt Macy * zs_leafs_with_n_entries[n] is the number of leafs with 482eda14cbcSMatt Macy * [n*5, (n+1)*5) entries. In the current implementation, there 483eda14cbcSMatt Macy * can be at most 55 entries in any block, but there may be 484eda14cbcSMatt Macy * fewer if the name or value is large, or the block is not 485eda14cbcSMatt Macy * completely full. 486eda14cbcSMatt Macy */ 487eda14cbcSMatt Macy uint64_t zs_blocks_with_n5_entries[ZAP_HISTOGRAM_SIZE]; 488eda14cbcSMatt Macy 489eda14cbcSMatt Macy /* 490eda14cbcSMatt Macy * zs_leafs_n_tenths_full[n] is the number of leafs whose 491eda14cbcSMatt Macy * fullness is in the range [n/10, (n+1)/10). 492eda14cbcSMatt Macy */ 493eda14cbcSMatt Macy uint64_t zs_blocks_n_tenths_full[ZAP_HISTOGRAM_SIZE]; 494eda14cbcSMatt Macy 495eda14cbcSMatt Macy /* 496eda14cbcSMatt Macy * zs_entries_using_n_chunks[n] is the number of entries which 497eda14cbcSMatt Macy * consume n 24-byte chunks. (Note, large names/values only use 498eda14cbcSMatt Macy * one chunk, but contribute to zs_num_blocks_large.) 499eda14cbcSMatt Macy */ 500eda14cbcSMatt Macy uint64_t zs_entries_using_n_chunks[ZAP_HISTOGRAM_SIZE]; 501eda14cbcSMatt Macy 502eda14cbcSMatt Macy /* 503eda14cbcSMatt Macy * zs_buckets_with_n_entries[n] is the number of buckets (each 504eda14cbcSMatt Macy * leaf has 64 buckets) with n entries. 505eda14cbcSMatt Macy * zs_buckets_with_n_entries[1] should be very close to 506eda14cbcSMatt Macy * zs_num_entries. 507eda14cbcSMatt Macy */ 508eda14cbcSMatt Macy uint64_t zs_buckets_with_n_entries[ZAP_HISTOGRAM_SIZE]; 509eda14cbcSMatt Macy } zap_stats_t; 510eda14cbcSMatt Macy 511eda14cbcSMatt Macy /* 512eda14cbcSMatt Macy * Get statistics about a ZAP object. Note: you need to be aware of the 513eda14cbcSMatt Macy * internal implementation of the ZAP to correctly interpret some of the 514eda14cbcSMatt Macy * statistics. This interface shouldn't be relied on unless you really 515eda14cbcSMatt Macy * know what you're doing. 516eda14cbcSMatt Macy */ 517eda14cbcSMatt Macy int zap_get_stats(objset_t *ds, uint64_t zapobj, zap_stats_t *zs); 518eda14cbcSMatt Macy 519eda14cbcSMatt Macy #ifdef __cplusplus 520eda14cbcSMatt Macy } 521eda14cbcSMatt Macy #endif 522eda14cbcSMatt Macy 523eda14cbcSMatt Macy #endif /* _SYS_ZAP_H */ 524