xref: /freebsd/sys/contrib/openzfs/include/sys/zap.h (revision 783d3ff6)
1eda14cbcSMatt Macy /*
2eda14cbcSMatt Macy  * CDDL HEADER START
3eda14cbcSMatt Macy  *
4eda14cbcSMatt Macy  * The contents of this file are subject to the terms of the
5eda14cbcSMatt Macy  * Common Development and Distribution License (the "License").
6eda14cbcSMatt Macy  * You may not use this file except in compliance with the License.
7eda14cbcSMatt Macy  *
8eda14cbcSMatt Macy  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9271171e0SMartin Matuska  * or https://opensource.org/licenses/CDDL-1.0.
10eda14cbcSMatt Macy  * See the License for the specific language governing permissions
11eda14cbcSMatt Macy  * and limitations under the License.
12eda14cbcSMatt Macy  *
13eda14cbcSMatt Macy  * When distributing Covered Code, include this CDDL HEADER in each
14eda14cbcSMatt Macy  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15eda14cbcSMatt Macy  * If applicable, add the following below this CDDL HEADER, with the
16eda14cbcSMatt Macy  * fields enclosed by brackets "[]" replaced with your own identifying
17eda14cbcSMatt Macy  * information: Portions Copyright [yyyy] [name of copyright owner]
18eda14cbcSMatt Macy  *
19eda14cbcSMatt Macy  * CDDL HEADER END
20eda14cbcSMatt Macy  */
21eda14cbcSMatt Macy 
22eda14cbcSMatt Macy /*
23eda14cbcSMatt Macy  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24eda14cbcSMatt Macy  * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
25eda14cbcSMatt Macy  * Copyright 2017 Nexenta Systems, Inc.
26eda14cbcSMatt Macy  */
27eda14cbcSMatt Macy 
28eda14cbcSMatt Macy #ifndef	_SYS_ZAP_H
29eda14cbcSMatt Macy #define	_SYS_ZAP_H
30eda14cbcSMatt Macy 
31eda14cbcSMatt Macy /*
32eda14cbcSMatt Macy  * ZAP - ZFS Attribute Processor
33eda14cbcSMatt Macy  *
34eda14cbcSMatt Macy  * The ZAP is a module which sits on top of the DMU (Data Management
35eda14cbcSMatt Macy  * Unit) and implements a higher-level storage primitive using DMU
36eda14cbcSMatt Macy  * objects.  Its primary consumer is the ZPL (ZFS Posix Layer).
37eda14cbcSMatt Macy  *
38eda14cbcSMatt Macy  * A "zapobj" is a DMU object which the ZAP uses to stores attributes.
39eda14cbcSMatt Macy  * Users should use only zap routines to access a zapobj - they should
40eda14cbcSMatt Macy  * not access the DMU object directly using DMU routines.
41eda14cbcSMatt Macy  *
42eda14cbcSMatt Macy  * The attributes stored in a zapobj are name-value pairs.  The name is
43eda14cbcSMatt Macy  * a zero-terminated string of up to ZAP_MAXNAMELEN bytes (including
44eda14cbcSMatt Macy  * terminating NULL).  The value is an array of integers, which may be
45eda14cbcSMatt Macy  * 1, 2, 4, or 8 bytes long.  The total space used by the array (number
46eda14cbcSMatt Macy  * of integers * integer length) can be up to ZAP_MAXVALUELEN bytes.
47eda14cbcSMatt Macy  * Note that an 8-byte integer value can be used to store the location
48eda14cbcSMatt Macy  * (object number) of another dmu object (which may be itself a zapobj).
49eda14cbcSMatt Macy  * Note that you can use a zero-length attribute to store a single bit
50eda14cbcSMatt Macy  * of information - the attribute is present or not.
51eda14cbcSMatt Macy  *
52eda14cbcSMatt Macy  * The ZAP routines are thread-safe.  However, you must observe the
53eda14cbcSMatt Macy  * DMU's restriction that a transaction may not be operated on
54eda14cbcSMatt Macy  * concurrently.
55eda14cbcSMatt Macy  *
56eda14cbcSMatt Macy  * Any of the routines that return an int may return an I/O error (EIO
57eda14cbcSMatt Macy  * or ECHECKSUM).
58eda14cbcSMatt Macy  *
59eda14cbcSMatt Macy  *
60eda14cbcSMatt Macy  * Implementation / Performance Notes:
61eda14cbcSMatt Macy  *
62eda14cbcSMatt Macy  * The ZAP is intended to operate most efficiently on attributes with
63eda14cbcSMatt Macy  * short (49 bytes or less) names and single 8-byte values, for which
64eda14cbcSMatt Macy  * the microzap will be used.  The ZAP should be efficient enough so
65eda14cbcSMatt Macy  * that the user does not need to cache these attributes.
66eda14cbcSMatt Macy  *
67eda14cbcSMatt Macy  * The ZAP's locking scheme makes its routines thread-safe.  Operations
68eda14cbcSMatt Macy  * on different zapobjs will be processed concurrently.  Operations on
69eda14cbcSMatt Macy  * the same zapobj which only read data will be processed concurrently.
70eda14cbcSMatt Macy  * Operations on the same zapobj which modify data will be processed
71eda14cbcSMatt Macy  * concurrently when there are many attributes in the zapobj (because
72eda14cbcSMatt Macy  * the ZAP uses per-block locking - more than 128 * (number of cpus)
73eda14cbcSMatt Macy  * small attributes will suffice).
74eda14cbcSMatt Macy  */
75eda14cbcSMatt Macy 
76eda14cbcSMatt Macy /*
77eda14cbcSMatt Macy  * We're using zero-terminated byte strings (ie. ASCII or UTF-8 C
78eda14cbcSMatt Macy  * strings) for the names of attributes, rather than a byte string
79eda14cbcSMatt Macy  * bounded by an explicit length.  If some day we want to support names
80eda14cbcSMatt Macy  * in character sets which have embedded zeros (eg. UTF-16, UTF-32),
81eda14cbcSMatt Macy  * we'll have to add routines for using length-bounded strings.
82eda14cbcSMatt Macy  */
83eda14cbcSMatt Macy 
84eda14cbcSMatt Macy #include <sys/dmu.h>
85eda14cbcSMatt Macy 
86eda14cbcSMatt Macy #ifdef	__cplusplus
87eda14cbcSMatt Macy extern "C" {
88eda14cbcSMatt Macy #endif
89eda14cbcSMatt Macy 
90eda14cbcSMatt Macy /*
91eda14cbcSMatt Macy  * Specifies matching criteria for ZAP lookups.
92eda14cbcSMatt Macy  * MT_NORMALIZE		Use ZAP normalization flags, which can include both
93eda14cbcSMatt Macy  *			unicode normalization and case-insensitivity.
94eda14cbcSMatt Macy  * MT_MATCH_CASE	Do case-sensitive lookups even if MT_NORMALIZE is
95eda14cbcSMatt Macy  *			specified and ZAP normalization flags include
96eda14cbcSMatt Macy  *			U8_TEXTPREP_TOUPPER.
97eda14cbcSMatt Macy  */
98eda14cbcSMatt Macy typedef enum matchtype {
99eda14cbcSMatt Macy 	MT_NORMALIZE = 1 << 0,
100eda14cbcSMatt Macy 	MT_MATCH_CASE = 1 << 1,
101eda14cbcSMatt Macy } matchtype_t;
102eda14cbcSMatt Macy 
103eda14cbcSMatt Macy typedef enum zap_flags {
104eda14cbcSMatt Macy 	/* Use 64-bit hash value (serialized cursors will always use 64-bits) */
105eda14cbcSMatt Macy 	ZAP_FLAG_HASH64 = 1 << 0,
106eda14cbcSMatt Macy 	/* Key is binary, not string (zap_add_uint64() can be used) */
107eda14cbcSMatt Macy 	ZAP_FLAG_UINT64_KEY = 1 << 1,
108eda14cbcSMatt Macy 	/*
109eda14cbcSMatt Macy 	 * First word of key (which must be an array of uint64) is
110eda14cbcSMatt Macy 	 * already randomly distributed.
111eda14cbcSMatt Macy 	 */
112eda14cbcSMatt Macy 	ZAP_FLAG_PRE_HASHED_KEY = 1 << 2,
113e3aa18adSMartin Matuska #if defined(__linux__) && defined(_KERNEL)
114e3aa18adSMartin Matuska } zfs_zap_flags_t;
115e3aa18adSMartin Matuska #define	zap_flags_t	zfs_zap_flags_t
116e3aa18adSMartin Matuska #else
117eda14cbcSMatt Macy } zap_flags_t;
118e3aa18adSMartin Matuska #endif
119eda14cbcSMatt Macy 
120eda14cbcSMatt Macy /*
121eda14cbcSMatt Macy  * Create a new zapobj with no attributes and return its object number.
122eda14cbcSMatt Macy  */
123eda14cbcSMatt Macy uint64_t zap_create(objset_t *ds, dmu_object_type_t ot,
124eda14cbcSMatt Macy     dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
125eda14cbcSMatt Macy uint64_t zap_create_dnsize(objset_t *ds, dmu_object_type_t ot,
126eda14cbcSMatt Macy     dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx);
127eda14cbcSMatt Macy uint64_t zap_create_norm(objset_t *ds, int normflags, dmu_object_type_t ot,
128eda14cbcSMatt Macy     dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
129eda14cbcSMatt Macy uint64_t zap_create_norm_dnsize(objset_t *ds, int normflags,
130eda14cbcSMatt Macy     dmu_object_type_t ot, dmu_object_type_t bonustype, int bonuslen,
131eda14cbcSMatt Macy     int dnodesize, dmu_tx_t *tx);
132eda14cbcSMatt Macy uint64_t zap_create_flags(objset_t *os, int normflags, zap_flags_t flags,
133eda14cbcSMatt Macy     dmu_object_type_t ot, int leaf_blockshift, int indirect_blockshift,
134eda14cbcSMatt Macy     dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
135eda14cbcSMatt Macy uint64_t zap_create_flags_dnsize(objset_t *os, int normflags,
136eda14cbcSMatt Macy     zap_flags_t flags, dmu_object_type_t ot, int leaf_blockshift,
137eda14cbcSMatt Macy     int indirect_blockshift, dmu_object_type_t bonustype, int bonuslen,
138eda14cbcSMatt Macy     int dnodesize, dmu_tx_t *tx);
139eda14cbcSMatt Macy uint64_t zap_create_hold(objset_t *os, int normflags, zap_flags_t flags,
140eda14cbcSMatt Macy     dmu_object_type_t ot, int leaf_blockshift, int indirect_blockshift,
141eda14cbcSMatt Macy     dmu_object_type_t bonustype, int bonuslen, int dnodesize,
142a0b956f5SMartin Matuska     dnode_t **allocated_dnode, const void *tag, dmu_tx_t *tx);
143eda14cbcSMatt Macy 
144eda14cbcSMatt Macy uint64_t zap_create_link(objset_t *os, dmu_object_type_t ot,
145eda14cbcSMatt Macy     uint64_t parent_obj, const char *name, dmu_tx_t *tx);
146eda14cbcSMatt Macy uint64_t zap_create_link_dnsize(objset_t *os, dmu_object_type_t ot,
147eda14cbcSMatt Macy     uint64_t parent_obj, const char *name, int dnodesize, dmu_tx_t *tx);
148eda14cbcSMatt Macy 
149eda14cbcSMatt Macy /*
150eda14cbcSMatt Macy  * Initialize an already-allocated object.
151eda14cbcSMatt Macy  */
152eda14cbcSMatt Macy void mzap_create_impl(dnode_t *dn, int normflags, zap_flags_t flags,
153eda14cbcSMatt Macy     dmu_tx_t *tx);
154eda14cbcSMatt Macy 
155eda14cbcSMatt Macy /*
156eda14cbcSMatt Macy  * Create a new zapobj with no attributes from the given (unallocated)
157eda14cbcSMatt Macy  * object number.
158eda14cbcSMatt Macy  */
159eda14cbcSMatt Macy int zap_create_claim(objset_t *ds, uint64_t obj, dmu_object_type_t ot,
160eda14cbcSMatt Macy     dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
161eda14cbcSMatt Macy int zap_create_claim_dnsize(objset_t *ds, uint64_t obj, dmu_object_type_t ot,
162eda14cbcSMatt Macy     dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx);
163eda14cbcSMatt Macy int zap_create_claim_norm(objset_t *ds, uint64_t obj,
164eda14cbcSMatt Macy     int normflags, dmu_object_type_t ot,
165eda14cbcSMatt Macy     dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
166eda14cbcSMatt Macy int zap_create_claim_norm_dnsize(objset_t *ds, uint64_t obj,
167eda14cbcSMatt Macy     int normflags, dmu_object_type_t ot,
168eda14cbcSMatt Macy     dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx);
169eda14cbcSMatt Macy 
170eda14cbcSMatt Macy /*
171eda14cbcSMatt Macy  * The zapobj passed in must be a valid ZAP object for all of the
172eda14cbcSMatt Macy  * following routines.
173eda14cbcSMatt Macy  */
174eda14cbcSMatt Macy 
175eda14cbcSMatt Macy /*
176eda14cbcSMatt Macy  * Destroy this zapobj and all its attributes.
177eda14cbcSMatt Macy  *
178eda14cbcSMatt Macy  * Frees the object number using dmu_object_free.
179eda14cbcSMatt Macy  */
180eda14cbcSMatt Macy int zap_destroy(objset_t *ds, uint64_t zapobj, dmu_tx_t *tx);
181eda14cbcSMatt Macy 
182eda14cbcSMatt Macy /*
183eda14cbcSMatt Macy  * Manipulate attributes.
184eda14cbcSMatt Macy  *
185eda14cbcSMatt Macy  * 'integer_size' is in bytes, and must be 1, 2, 4, or 8.
186eda14cbcSMatt Macy  */
187eda14cbcSMatt Macy 
188eda14cbcSMatt Macy /*
189eda14cbcSMatt Macy  * Retrieve the contents of the attribute with the given name.
190eda14cbcSMatt Macy  *
191eda14cbcSMatt Macy  * If the requested attribute does not exist, the call will fail and
192eda14cbcSMatt Macy  * return ENOENT.
193eda14cbcSMatt Macy  *
194eda14cbcSMatt Macy  * If 'integer_size' is smaller than the attribute's integer size, the
195eda14cbcSMatt Macy  * call will fail and return EINVAL.
196eda14cbcSMatt Macy  *
197eda14cbcSMatt Macy  * If 'integer_size' is equal to or larger than the attribute's integer
198eda14cbcSMatt Macy  * size, the call will succeed and return 0.
199eda14cbcSMatt Macy  *
200eda14cbcSMatt Macy  * When converting to a larger integer size, the integers will be treated as
201eda14cbcSMatt Macy  * unsigned (ie. no sign-extension will be performed).
202eda14cbcSMatt Macy  *
203eda14cbcSMatt Macy  * 'num_integers' is the length (in integers) of 'buf'.
204eda14cbcSMatt Macy  *
205eda14cbcSMatt Macy  * If the attribute is longer than the buffer, as many integers as will
206eda14cbcSMatt Macy  * fit will be transferred to 'buf'.  If the entire attribute was not
207eda14cbcSMatt Macy  * transferred, the call will return EOVERFLOW.
208eda14cbcSMatt Macy  */
209eda14cbcSMatt Macy int zap_lookup(objset_t *ds, uint64_t zapobj, const char *name,
210eda14cbcSMatt Macy     uint64_t integer_size, uint64_t num_integers, void *buf);
211eda14cbcSMatt Macy 
212eda14cbcSMatt Macy /*
213eda14cbcSMatt Macy  * If rn_len is nonzero, realname will be set to the name of the found
214eda14cbcSMatt Macy  * entry (which may be different from the requested name if matchtype is
215eda14cbcSMatt Macy  * not MT_EXACT).
216eda14cbcSMatt Macy  *
217eda14cbcSMatt Macy  * If normalization_conflictp is not NULL, it will be set if there is
218eda14cbcSMatt Macy  * another name with the same case/unicode normalized form.
219eda14cbcSMatt Macy  */
220eda14cbcSMatt Macy int zap_lookup_norm(objset_t *ds, uint64_t zapobj, const char *name,
221eda14cbcSMatt Macy     uint64_t integer_size, uint64_t num_integers, void *buf,
222eda14cbcSMatt Macy     matchtype_t mt, char *realname, int rn_len,
223eda14cbcSMatt Macy     boolean_t *normalization_conflictp);
224eda14cbcSMatt Macy int zap_lookup_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
225eda14cbcSMatt Macy     int key_numints, uint64_t integer_size, uint64_t num_integers, void *buf);
226eda14cbcSMatt Macy int zap_contains(objset_t *ds, uint64_t zapobj, const char *name);
227eda14cbcSMatt Macy int zap_prefetch(objset_t *os, uint64_t zapobj, const char *name);
228eda14cbcSMatt Macy int zap_prefetch_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
229eda14cbcSMatt Macy     int key_numints);
230eda14cbcSMatt Macy 
231eda14cbcSMatt Macy int zap_lookup_by_dnode(dnode_t *dn, const char *name,
232eda14cbcSMatt Macy     uint64_t integer_size, uint64_t num_integers, void *buf);
233eda14cbcSMatt Macy int zap_lookup_norm_by_dnode(dnode_t *dn, const char *name,
234eda14cbcSMatt Macy     uint64_t integer_size, uint64_t num_integers, void *buf,
235eda14cbcSMatt Macy     matchtype_t mt, char *realname, int rn_len,
236eda14cbcSMatt Macy     boolean_t *ncp);
237eda14cbcSMatt Macy 
238eda14cbcSMatt Macy int zap_count_write_by_dnode(dnode_t *dn, const char *name,
239eda14cbcSMatt Macy     int add, zfs_refcount_t *towrite, zfs_refcount_t *tooverwrite);
240eda14cbcSMatt Macy 
241eda14cbcSMatt Macy /*
242eda14cbcSMatt Macy  * Create an attribute with the given name and value.
243eda14cbcSMatt Macy  *
244eda14cbcSMatt Macy  * If an attribute with the given name already exists, the call will
245eda14cbcSMatt Macy  * fail and return EEXIST.
246eda14cbcSMatt Macy  */
247eda14cbcSMatt Macy int zap_add(objset_t *ds, uint64_t zapobj, const char *key,
248eda14cbcSMatt Macy     int integer_size, uint64_t num_integers,
249eda14cbcSMatt Macy     const void *val, dmu_tx_t *tx);
250eda14cbcSMatt Macy int zap_add_by_dnode(dnode_t *dn, const char *key,
251eda14cbcSMatt Macy     int integer_size, uint64_t num_integers,
252eda14cbcSMatt Macy     const void *val, dmu_tx_t *tx);
253eda14cbcSMatt Macy int zap_add_uint64(objset_t *ds, uint64_t zapobj, const uint64_t *key,
254eda14cbcSMatt Macy     int key_numints, int integer_size, uint64_t num_integers,
255eda14cbcSMatt Macy     const void *val, dmu_tx_t *tx);
256*783d3ff6SMartin Matuska int zap_add_uint64_by_dnode(dnode_t *dn, const uint64_t *key,
257*783d3ff6SMartin Matuska     int key_numints, int integer_size, uint64_t num_integers,
258*783d3ff6SMartin Matuska     const void *val, dmu_tx_t *tx);
259eda14cbcSMatt Macy 
260eda14cbcSMatt Macy /*
261eda14cbcSMatt Macy  * Set the attribute with the given name to the given value.  If an
262eda14cbcSMatt Macy  * attribute with the given name does not exist, it will be created.  If
263eda14cbcSMatt Macy  * an attribute with the given name already exists, the previous value
264eda14cbcSMatt Macy  * will be overwritten.  The integer_size may be different from the
265eda14cbcSMatt Macy  * existing attribute's integer size, in which case the attribute's
266eda14cbcSMatt Macy  * integer size will be updated to the new value.
267eda14cbcSMatt Macy  */
268eda14cbcSMatt Macy int zap_update(objset_t *ds, uint64_t zapobj, const char *name,
269eda14cbcSMatt Macy     int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx);
270eda14cbcSMatt Macy int zap_update_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
271eda14cbcSMatt Macy     int key_numints,
272eda14cbcSMatt Macy     int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx);
273*783d3ff6SMartin Matuska int zap_update_uint64_by_dnode(dnode_t *dn, const uint64_t *key,
274*783d3ff6SMartin Matuska     int key_numints,
275*783d3ff6SMartin Matuska     int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx);
276eda14cbcSMatt Macy 
277eda14cbcSMatt Macy /*
278eda14cbcSMatt Macy  * Get the length (in integers) and the integer size of the specified
279eda14cbcSMatt Macy  * attribute.
280eda14cbcSMatt Macy  *
281eda14cbcSMatt Macy  * If the requested attribute does not exist, the call will fail and
282eda14cbcSMatt Macy  * return ENOENT.
283eda14cbcSMatt Macy  */
284eda14cbcSMatt Macy int zap_length(objset_t *ds, uint64_t zapobj, const char *name,
285eda14cbcSMatt Macy     uint64_t *integer_size, uint64_t *num_integers);
286eda14cbcSMatt Macy int zap_length_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
287eda14cbcSMatt Macy     int key_numints, uint64_t *integer_size, uint64_t *num_integers);
288eda14cbcSMatt Macy 
289eda14cbcSMatt Macy /*
290eda14cbcSMatt Macy  * Remove the specified attribute.
291eda14cbcSMatt Macy  *
292eda14cbcSMatt Macy  * If the specified attribute does not exist, the call will fail and
293eda14cbcSMatt Macy  * return ENOENT.
294eda14cbcSMatt Macy  */
295eda14cbcSMatt Macy int zap_remove(objset_t *ds, uint64_t zapobj, const char *name, dmu_tx_t *tx);
296eda14cbcSMatt Macy int zap_remove_norm(objset_t *ds, uint64_t zapobj, const char *name,
297eda14cbcSMatt Macy     matchtype_t mt, dmu_tx_t *tx);
298eda14cbcSMatt Macy int zap_remove_by_dnode(dnode_t *dn, const char *name, dmu_tx_t *tx);
299eda14cbcSMatt Macy int zap_remove_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
300eda14cbcSMatt Macy     int key_numints, dmu_tx_t *tx);
301*783d3ff6SMartin Matuska int zap_remove_uint64_by_dnode(dnode_t *dn, const uint64_t *key,
302*783d3ff6SMartin Matuska     int key_numints, dmu_tx_t *tx);
303eda14cbcSMatt Macy 
304eda14cbcSMatt Macy /*
305eda14cbcSMatt Macy  * Returns (in *count) the number of attributes in the specified zap
306eda14cbcSMatt Macy  * object.
307eda14cbcSMatt Macy  */
308eda14cbcSMatt Macy int zap_count(objset_t *ds, uint64_t zapobj, uint64_t *count);
309eda14cbcSMatt Macy 
310eda14cbcSMatt Macy /*
311eda14cbcSMatt Macy  * Returns (in name) the name of the entry whose (value & mask)
312eda14cbcSMatt Macy  * (za_first_integer) is value, or ENOENT if not found.  The string
313eda14cbcSMatt Macy  * pointed to by name must be at least 256 bytes long.  If mask==0, the
314eda14cbcSMatt Macy  * match must be exact (ie, same as mask=-1ULL).
315eda14cbcSMatt Macy  */
316eda14cbcSMatt Macy int zap_value_search(objset_t *os, uint64_t zapobj,
317eda14cbcSMatt Macy     uint64_t value, uint64_t mask, char *name);
318eda14cbcSMatt Macy 
319eda14cbcSMatt Macy /*
320eda14cbcSMatt Macy  * Transfer all the entries from fromobj into intoobj.  Only works on
321eda14cbcSMatt Macy  * int_size=8 num_integers=1 values.  Fails if there are any duplicated
322eda14cbcSMatt Macy  * entries.
323eda14cbcSMatt Macy  */
324eda14cbcSMatt Macy int zap_join(objset_t *os, uint64_t fromobj, uint64_t intoobj, dmu_tx_t *tx);
325eda14cbcSMatt Macy 
326eda14cbcSMatt Macy /* Same as zap_join, but set the values to 'value'. */
327eda14cbcSMatt Macy int zap_join_key(objset_t *os, uint64_t fromobj, uint64_t intoobj,
328eda14cbcSMatt Macy     uint64_t value, dmu_tx_t *tx);
329eda14cbcSMatt Macy 
330eda14cbcSMatt Macy /* Same as zap_join, but add together any duplicated entries. */
331eda14cbcSMatt Macy int zap_join_increment(objset_t *os, uint64_t fromobj, uint64_t intoobj,
332eda14cbcSMatt Macy     dmu_tx_t *tx);
333eda14cbcSMatt Macy 
334eda14cbcSMatt Macy /*
335eda14cbcSMatt Macy  * Manipulate entries where the name + value are the "same" (the name is
336eda14cbcSMatt Macy  * a stringified version of the value).
337eda14cbcSMatt Macy  */
338eda14cbcSMatt Macy int zap_add_int(objset_t *os, uint64_t obj, uint64_t value, dmu_tx_t *tx);
339eda14cbcSMatt Macy int zap_remove_int(objset_t *os, uint64_t obj, uint64_t value, dmu_tx_t *tx);
340eda14cbcSMatt Macy int zap_lookup_int(objset_t *os, uint64_t obj, uint64_t value);
341eda14cbcSMatt Macy int zap_increment_int(objset_t *os, uint64_t obj, uint64_t key, int64_t delta,
342eda14cbcSMatt Macy     dmu_tx_t *tx);
343eda14cbcSMatt Macy 
344eda14cbcSMatt Macy /* Here the key is an int and the value is a different int. */
345eda14cbcSMatt Macy int zap_add_int_key(objset_t *os, uint64_t obj,
346eda14cbcSMatt Macy     uint64_t key, uint64_t value, dmu_tx_t *tx);
347eda14cbcSMatt Macy int zap_update_int_key(objset_t *os, uint64_t obj,
348eda14cbcSMatt Macy     uint64_t key, uint64_t value, dmu_tx_t *tx);
349eda14cbcSMatt Macy int zap_lookup_int_key(objset_t *os, uint64_t obj,
350eda14cbcSMatt Macy     uint64_t key, uint64_t *valuep);
351eda14cbcSMatt Macy 
352eda14cbcSMatt Macy int zap_increment(objset_t *os, uint64_t obj, const char *name, int64_t delta,
353eda14cbcSMatt Macy     dmu_tx_t *tx);
354eda14cbcSMatt Macy 
355eda14cbcSMatt Macy struct zap;
356eda14cbcSMatt Macy struct zap_leaf;
357eda14cbcSMatt Macy typedef struct zap_cursor {
358eda14cbcSMatt Macy 	/* This structure is opaque! */
359eda14cbcSMatt Macy 	objset_t *zc_objset;
360eda14cbcSMatt Macy 	struct zap *zc_zap;
361eda14cbcSMatt Macy 	struct zap_leaf *zc_leaf;
362eda14cbcSMatt Macy 	uint64_t zc_zapobj;
363eda14cbcSMatt Macy 	uint64_t zc_serialized;
364eda14cbcSMatt Macy 	uint64_t zc_hash;
365eda14cbcSMatt Macy 	uint32_t zc_cd;
366eda14cbcSMatt Macy 	boolean_t zc_prefetch;
367eda14cbcSMatt Macy } zap_cursor_t;
368eda14cbcSMatt Macy 
369eda14cbcSMatt Macy typedef struct {
370eda14cbcSMatt Macy 	int za_integer_length;
371eda14cbcSMatt Macy 	/*
372eda14cbcSMatt Macy 	 * za_normalization_conflict will be set if there are additional
373eda14cbcSMatt Macy 	 * entries with this normalized form (eg, "foo" and "Foo").
374eda14cbcSMatt Macy 	 */
375eda14cbcSMatt Macy 	boolean_t za_normalization_conflict;
376eda14cbcSMatt Macy 	uint64_t za_num_integers;
377eda14cbcSMatt Macy 	uint64_t za_first_integer;	/* no sign extension for <8byte ints */
378eda14cbcSMatt Macy 	char za_name[ZAP_MAXNAMELEN];
379eda14cbcSMatt Macy } zap_attribute_t;
380eda14cbcSMatt Macy 
381eda14cbcSMatt Macy /*
382eda14cbcSMatt Macy  * The interface for listing all the attributes of a zapobj can be
383eda14cbcSMatt Macy  * thought of as cursor moving down a list of the attributes one by
384eda14cbcSMatt Macy  * one.  The cookie returned by the zap_cursor_serialize routine is
385eda14cbcSMatt Macy  * persistent across system calls (and across reboot, even).
386eda14cbcSMatt Macy  */
387eda14cbcSMatt Macy 
388eda14cbcSMatt Macy /*
389eda14cbcSMatt Macy  * Initialize a zap cursor, pointing to the "first" attribute of the
390eda14cbcSMatt Macy  * zapobj.  You must _fini the cursor when you are done with it.
391eda14cbcSMatt Macy  */
392eda14cbcSMatt Macy void zap_cursor_init(zap_cursor_t *zc, objset_t *os, uint64_t zapobj);
393eda14cbcSMatt Macy void zap_cursor_init_noprefetch(zap_cursor_t *zc, objset_t *os,
394eda14cbcSMatt Macy     uint64_t zapobj);
395eda14cbcSMatt Macy void zap_cursor_fini(zap_cursor_t *zc);
396eda14cbcSMatt Macy 
397eda14cbcSMatt Macy /*
398eda14cbcSMatt Macy  * Get the attribute currently pointed to by the cursor.  Returns
399eda14cbcSMatt Macy  * ENOENT if at the end of the attributes.
400eda14cbcSMatt Macy  */
401eda14cbcSMatt Macy int zap_cursor_retrieve(zap_cursor_t *zc, zap_attribute_t *za);
402eda14cbcSMatt Macy 
403eda14cbcSMatt Macy /*
404eda14cbcSMatt Macy  * Advance the cursor to the next attribute.
405eda14cbcSMatt Macy  */
406eda14cbcSMatt Macy void zap_cursor_advance(zap_cursor_t *zc);
407eda14cbcSMatt Macy 
408eda14cbcSMatt Macy /*
409eda14cbcSMatt Macy  * Get a persistent cookie pointing to the current position of the zap
410eda14cbcSMatt Macy  * cursor.  The low 4 bits in the cookie are always zero, and thus can
411eda14cbcSMatt Macy  * be used as to differentiate a serialized cookie from a different type
412eda14cbcSMatt Macy  * of value.  The cookie will be less than 2^32 as long as there are
413eda14cbcSMatt Macy  * fewer than 2^22 (4.2 million) entries in the zap object.
414eda14cbcSMatt Macy  */
415eda14cbcSMatt Macy uint64_t zap_cursor_serialize(zap_cursor_t *zc);
416eda14cbcSMatt Macy 
417eda14cbcSMatt Macy /*
418eda14cbcSMatt Macy  * Initialize a zap cursor pointing to the position recorded by
419eda14cbcSMatt Macy  * zap_cursor_serialize (in the "serialized" argument).  You can also
420eda14cbcSMatt Macy  * use a "serialized" argument of 0 to start at the beginning of the
421eda14cbcSMatt Macy  * zapobj (ie.  zap_cursor_init_serialized(..., 0) is equivalent to
422eda14cbcSMatt Macy  * zap_cursor_init(...).)
423eda14cbcSMatt Macy  */
424eda14cbcSMatt Macy void zap_cursor_init_serialized(zap_cursor_t *zc, objset_t *ds,
425eda14cbcSMatt Macy     uint64_t zapobj, uint64_t serialized);
426eda14cbcSMatt Macy 
427eda14cbcSMatt Macy 
428eda14cbcSMatt Macy #define	ZAP_HISTOGRAM_SIZE 10
429eda14cbcSMatt Macy 
430eda14cbcSMatt Macy typedef struct zap_stats {
431eda14cbcSMatt Macy 	/*
432eda14cbcSMatt Macy 	 * Size of the pointer table (in number of entries).
433eda14cbcSMatt Macy 	 * This is always a power of 2, or zero if it's a microzap.
434eda14cbcSMatt Macy 	 * In general, it should be considerably greater than zs_num_leafs.
435eda14cbcSMatt Macy 	 */
436eda14cbcSMatt Macy 	uint64_t zs_ptrtbl_len;
437eda14cbcSMatt Macy 
438eda14cbcSMatt Macy 	uint64_t zs_blocksize;		/* size of zap blocks */
439eda14cbcSMatt Macy 
440eda14cbcSMatt Macy 	/*
441eda14cbcSMatt Macy 	 * The number of blocks used.  Note that some blocks may be
442eda14cbcSMatt Macy 	 * wasted because old ptrtbl's and large name/value blocks are
443eda14cbcSMatt Macy 	 * not reused.  (Although their space is reclaimed, we don't
444eda14cbcSMatt Macy 	 * reuse those offsets in the object.)
445eda14cbcSMatt Macy 	 */
446eda14cbcSMatt Macy 	uint64_t zs_num_blocks;
447eda14cbcSMatt Macy 
448eda14cbcSMatt Macy 	/*
449eda14cbcSMatt Macy 	 * Pointer table values from zap_ptrtbl in the zap_phys_t
450eda14cbcSMatt Macy 	 */
451eda14cbcSMatt Macy 	uint64_t zs_ptrtbl_nextblk;	  /* next (larger) copy start block */
452eda14cbcSMatt Macy 	uint64_t zs_ptrtbl_blks_copied;   /* number source blocks copied */
453eda14cbcSMatt Macy 	uint64_t zs_ptrtbl_zt_blk;	  /* starting block number */
454eda14cbcSMatt Macy 	uint64_t zs_ptrtbl_zt_numblks;    /* number of blocks */
455eda14cbcSMatt Macy 	uint64_t zs_ptrtbl_zt_shift;	  /* bits to index it */
456eda14cbcSMatt Macy 
457eda14cbcSMatt Macy 	/*
458eda14cbcSMatt Macy 	 * Values of the other members of the zap_phys_t
459eda14cbcSMatt Macy 	 */
460eda14cbcSMatt Macy 	uint64_t zs_block_type;		/* ZBT_HEADER */
461eda14cbcSMatt Macy 	uint64_t zs_magic;		/* ZAP_MAGIC */
462eda14cbcSMatt Macy 	uint64_t zs_num_leafs;		/* The number of leaf blocks */
463eda14cbcSMatt Macy 	uint64_t zs_num_entries;	/* The number of zap entries */
464eda14cbcSMatt Macy 	uint64_t zs_salt;		/* salt to stir into hash function */
465eda14cbcSMatt Macy 
466eda14cbcSMatt Macy 	/*
467eda14cbcSMatt Macy 	 * Histograms.  For all histograms, the last index
468eda14cbcSMatt Macy 	 * (ZAP_HISTOGRAM_SIZE-1) includes any values which are greater
469eda14cbcSMatt Macy 	 * than what can be represented.  For example
470eda14cbcSMatt Macy 	 * zs_leafs_with_n5_entries[ZAP_HISTOGRAM_SIZE-1] is the number
471eda14cbcSMatt Macy 	 * of leafs with more than 45 entries.
472eda14cbcSMatt Macy 	 */
473eda14cbcSMatt Macy 
474eda14cbcSMatt Macy 	/*
475eda14cbcSMatt Macy 	 * zs_leafs_with_n_pointers[n] is the number of leafs with
476eda14cbcSMatt Macy 	 * 2^n pointers to it.
477eda14cbcSMatt Macy 	 */
478eda14cbcSMatt Macy 	uint64_t zs_leafs_with_2n_pointers[ZAP_HISTOGRAM_SIZE];
479eda14cbcSMatt Macy 
480eda14cbcSMatt Macy 	/*
481eda14cbcSMatt Macy 	 * zs_leafs_with_n_entries[n] is the number of leafs with
482eda14cbcSMatt Macy 	 * [n*5, (n+1)*5) entries.  In the current implementation, there
483eda14cbcSMatt Macy 	 * can be at most 55 entries in any block, but there may be
484eda14cbcSMatt Macy 	 * fewer if the name or value is large, or the block is not
485eda14cbcSMatt Macy 	 * completely full.
486eda14cbcSMatt Macy 	 */
487eda14cbcSMatt Macy 	uint64_t zs_blocks_with_n5_entries[ZAP_HISTOGRAM_SIZE];
488eda14cbcSMatt Macy 
489eda14cbcSMatt Macy 	/*
490eda14cbcSMatt Macy 	 * zs_leafs_n_tenths_full[n] is the number of leafs whose
491eda14cbcSMatt Macy 	 * fullness is in the range [n/10, (n+1)/10).
492eda14cbcSMatt Macy 	 */
493eda14cbcSMatt Macy 	uint64_t zs_blocks_n_tenths_full[ZAP_HISTOGRAM_SIZE];
494eda14cbcSMatt Macy 
495eda14cbcSMatt Macy 	/*
496eda14cbcSMatt Macy 	 * zs_entries_using_n_chunks[n] is the number of entries which
497eda14cbcSMatt Macy 	 * consume n 24-byte chunks.  (Note, large names/values only use
498eda14cbcSMatt Macy 	 * one chunk, but contribute to zs_num_blocks_large.)
499eda14cbcSMatt Macy 	 */
500eda14cbcSMatt Macy 	uint64_t zs_entries_using_n_chunks[ZAP_HISTOGRAM_SIZE];
501eda14cbcSMatt Macy 
502eda14cbcSMatt Macy 	/*
503eda14cbcSMatt Macy 	 * zs_buckets_with_n_entries[n] is the number of buckets (each
504eda14cbcSMatt Macy 	 * leaf has 64 buckets) with n entries.
505eda14cbcSMatt Macy 	 * zs_buckets_with_n_entries[1] should be very close to
506eda14cbcSMatt Macy 	 * zs_num_entries.
507eda14cbcSMatt Macy 	 */
508eda14cbcSMatt Macy 	uint64_t zs_buckets_with_n_entries[ZAP_HISTOGRAM_SIZE];
509eda14cbcSMatt Macy } zap_stats_t;
510eda14cbcSMatt Macy 
511eda14cbcSMatt Macy /*
512eda14cbcSMatt Macy  * Get statistics about a ZAP object.  Note: you need to be aware of the
513eda14cbcSMatt Macy  * internal implementation of the ZAP to correctly interpret some of the
514eda14cbcSMatt Macy  * statistics.  This interface shouldn't be relied on unless you really
515eda14cbcSMatt Macy  * know what you're doing.
516eda14cbcSMatt Macy  */
517eda14cbcSMatt Macy int zap_get_stats(objset_t *ds, uint64_t zapobj, zap_stats_t *zs);
518eda14cbcSMatt Macy 
519eda14cbcSMatt Macy #ifdef	__cplusplus
520eda14cbcSMatt Macy }
521eda14cbcSMatt Macy #endif
522eda14cbcSMatt Macy 
523eda14cbcSMatt Macy #endif	/* _SYS_ZAP_H */
524