xref: /illumos-gate/usr/src/uts/common/fs/zfs/bplist.c (revision 4703203d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/bplist.h>
29 #include <sys/zfs_context.h>
30 
31 static int
32 bplist_hold(bplist_t *bpl)
33 {
34 	ASSERT(MUTEX_HELD(&bpl->bpl_lock));
35 	if (bpl->bpl_dbuf == NULL) {
36 		int err = dmu_bonus_hold(bpl->bpl_mos,
37 		    bpl->bpl_object, bpl, &bpl->bpl_dbuf);
38 		if (err)
39 			return (err);
40 		bpl->bpl_phys = bpl->bpl_dbuf->db_data;
41 	}
42 	return (0);
43 }
44 
45 uint64_t
46 bplist_create(objset_t *mos, int blocksize, dmu_tx_t *tx)
47 {
48 	int size;
49 
50 	size = spa_version(dmu_objset_spa(mos)) < SPA_VERSION_BPLIST_ACCOUNT ?
51 	    BPLIST_SIZE_V0 : sizeof (bplist_phys_t);
52 
53 	return (dmu_object_alloc(mos, DMU_OT_BPLIST, blocksize,
54 	    DMU_OT_BPLIST_HDR, size, tx));
55 }
56 
57 void
58 bplist_destroy(objset_t *mos, uint64_t object, dmu_tx_t *tx)
59 {
60 	VERIFY(dmu_object_free(mos, object, tx) == 0);
61 }
62 
63 int
64 bplist_open(bplist_t *bpl, objset_t *mos, uint64_t object)
65 {
66 	dmu_object_info_t doi;
67 	int err;
68 
69 	err = dmu_object_info(mos, object, &doi);
70 	if (err)
71 		return (err);
72 
73 	mutex_enter(&bpl->bpl_lock);
74 
75 	ASSERT(bpl->bpl_dbuf == NULL);
76 	ASSERT(bpl->bpl_phys == NULL);
77 	ASSERT(bpl->bpl_cached_dbuf == NULL);
78 	ASSERT(bpl->bpl_queue == NULL);
79 	ASSERT(object != 0);
80 	ASSERT3U(doi.doi_type, ==, DMU_OT_BPLIST);
81 	ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_BPLIST_HDR);
82 
83 	bpl->bpl_mos = mos;
84 	bpl->bpl_object = object;
85 	bpl->bpl_blockshift = highbit(doi.doi_data_block_size - 1);
86 	bpl->bpl_bpshift = bpl->bpl_blockshift - SPA_BLKPTRSHIFT;
87 	bpl->bpl_havecomp = (doi.doi_bonus_size == sizeof (bplist_phys_t));
88 
89 	mutex_exit(&bpl->bpl_lock);
90 	return (0);
91 }
92 
93 void
94 bplist_close(bplist_t *bpl)
95 {
96 	mutex_enter(&bpl->bpl_lock);
97 
98 	ASSERT(bpl->bpl_queue == NULL);
99 
100 	if (bpl->bpl_cached_dbuf) {
101 		dmu_buf_rele(bpl->bpl_cached_dbuf, bpl);
102 		bpl->bpl_cached_dbuf = NULL;
103 	}
104 	if (bpl->bpl_dbuf) {
105 		dmu_buf_rele(bpl->bpl_dbuf, bpl);
106 		bpl->bpl_dbuf = NULL;
107 		bpl->bpl_phys = NULL;
108 	}
109 
110 	mutex_exit(&bpl->bpl_lock);
111 }
112 
113 boolean_t
114 bplist_empty(bplist_t *bpl)
115 {
116 	boolean_t rv;
117 
118 	if (bpl->bpl_object == 0)
119 		return (B_TRUE);
120 
121 	mutex_enter(&bpl->bpl_lock);
122 	VERIFY(0 == bplist_hold(bpl)); /* XXX */
123 	rv = (bpl->bpl_phys->bpl_entries == 0);
124 	mutex_exit(&bpl->bpl_lock);
125 
126 	return (rv);
127 }
128 
129 static int
130 bplist_cache(bplist_t *bpl, uint64_t blkid)
131 {
132 	int err = 0;
133 
134 	if (bpl->bpl_cached_dbuf == NULL ||
135 	    bpl->bpl_cached_dbuf->db_offset != (blkid << bpl->bpl_blockshift)) {
136 		if (bpl->bpl_cached_dbuf != NULL)
137 			dmu_buf_rele(bpl->bpl_cached_dbuf, bpl);
138 		err = dmu_buf_hold(bpl->bpl_mos,
139 		    bpl->bpl_object, blkid << bpl->bpl_blockshift,
140 		    bpl, &bpl->bpl_cached_dbuf);
141 		ASSERT(err || bpl->bpl_cached_dbuf->db_size ==
142 		    1ULL << bpl->bpl_blockshift);
143 	}
144 	return (err);
145 }
146 
147 int
148 bplist_iterate(bplist_t *bpl, uint64_t *itorp, blkptr_t *bp)
149 {
150 	uint64_t blk, off;
151 	blkptr_t *bparray;
152 	int err;
153 
154 	mutex_enter(&bpl->bpl_lock);
155 
156 	err = bplist_hold(bpl);
157 	if (err) {
158 		mutex_exit(&bpl->bpl_lock);
159 		return (err);
160 	}
161 
162 	if (*itorp >= bpl->bpl_phys->bpl_entries) {
163 		mutex_exit(&bpl->bpl_lock);
164 		return (ENOENT);
165 	}
166 
167 	blk = *itorp >> bpl->bpl_bpshift;
168 	off = P2PHASE(*itorp, 1ULL << bpl->bpl_bpshift);
169 
170 	err = bplist_cache(bpl, blk);
171 	if (err) {
172 		mutex_exit(&bpl->bpl_lock);
173 		return (err);
174 	}
175 
176 	bparray = bpl->bpl_cached_dbuf->db_data;
177 	*bp = bparray[off];
178 	(*itorp)++;
179 	mutex_exit(&bpl->bpl_lock);
180 	return (0);
181 }
182 
183 int
184 bplist_enqueue(bplist_t *bpl, blkptr_t *bp, dmu_tx_t *tx)
185 {
186 	uint64_t blk, off;
187 	blkptr_t *bparray;
188 	int err;
189 
190 	ASSERT(!BP_IS_HOLE(bp));
191 	mutex_enter(&bpl->bpl_lock);
192 	err = bplist_hold(bpl);
193 	if (err)
194 		return (err);
195 
196 	blk = bpl->bpl_phys->bpl_entries >> bpl->bpl_bpshift;
197 	off = P2PHASE(bpl->bpl_phys->bpl_entries, 1ULL << bpl->bpl_bpshift);
198 
199 	err = bplist_cache(bpl, blk);
200 	if (err) {
201 		mutex_exit(&bpl->bpl_lock);
202 		return (err);
203 	}
204 
205 	dmu_buf_will_dirty(bpl->bpl_cached_dbuf, tx);
206 	bparray = bpl->bpl_cached_dbuf->db_data;
207 	bparray[off] = *bp;
208 
209 	/* We never need the fill count. */
210 	bparray[off].blk_fill = 0;
211 
212 	/* The bplist will compress better if we can leave off the checksum */
213 	bzero(&bparray[off].blk_cksum, sizeof (bparray[off].blk_cksum));
214 
215 	dmu_buf_will_dirty(bpl->bpl_dbuf, tx);
216 	bpl->bpl_phys->bpl_entries++;
217 	bpl->bpl_phys->bpl_bytes +=
218 	    bp_get_dasize(dmu_objset_spa(bpl->bpl_mos), bp);
219 	if (bpl->bpl_havecomp) {
220 		bpl->bpl_phys->bpl_comp += BP_GET_PSIZE(bp);
221 		bpl->bpl_phys->bpl_uncomp += BP_GET_UCSIZE(bp);
222 	}
223 	mutex_exit(&bpl->bpl_lock);
224 
225 	return (0);
226 }
227 
228 /*
229  * Deferred entry; will be written later by bplist_sync().
230  */
231 void
232 bplist_enqueue_deferred(bplist_t *bpl, blkptr_t *bp)
233 {
234 	bplist_q_t *bpq = kmem_alloc(sizeof (*bpq), KM_SLEEP);
235 
236 	ASSERT(!BP_IS_HOLE(bp));
237 	mutex_enter(&bpl->bpl_lock);
238 	bpq->bpq_blk = *bp;
239 	bpq->bpq_next = bpl->bpl_queue;
240 	bpl->bpl_queue = bpq;
241 	mutex_exit(&bpl->bpl_lock);
242 }
243 
244 void
245 bplist_sync(bplist_t *bpl, dmu_tx_t *tx)
246 {
247 	bplist_q_t *bpq;
248 
249 	mutex_enter(&bpl->bpl_lock);
250 	while ((bpq = bpl->bpl_queue) != NULL) {
251 		bpl->bpl_queue = bpq->bpq_next;
252 		mutex_exit(&bpl->bpl_lock);
253 		VERIFY(0 == bplist_enqueue(bpl, &bpq->bpq_blk, tx));
254 		kmem_free(bpq, sizeof (*bpq));
255 		mutex_enter(&bpl->bpl_lock);
256 	}
257 	mutex_exit(&bpl->bpl_lock);
258 }
259 
260 void
261 bplist_vacate(bplist_t *bpl, dmu_tx_t *tx)
262 {
263 	mutex_enter(&bpl->bpl_lock);
264 	ASSERT3P(bpl->bpl_queue, ==, NULL);
265 	VERIFY(0 == bplist_hold(bpl));
266 	dmu_buf_will_dirty(bpl->bpl_dbuf, tx);
267 	VERIFY(0 == dmu_free_range(bpl->bpl_mos,
268 	    bpl->bpl_object, 0, -1ULL, tx));
269 	bpl->bpl_phys->bpl_entries = 0;
270 	bpl->bpl_phys->bpl_bytes = 0;
271 	if (bpl->bpl_havecomp) {
272 		bpl->bpl_phys->bpl_comp = 0;
273 		bpl->bpl_phys->bpl_uncomp = 0;
274 	}
275 	mutex_exit(&bpl->bpl_lock);
276 }
277 
278 int
279 bplist_space(bplist_t *bpl, uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
280 {
281 	uint64_t itor = 0, comp = 0, uncomp = 0;
282 	int err;
283 	blkptr_t bp;
284 
285 	mutex_enter(&bpl->bpl_lock);
286 
287 	err = bplist_hold(bpl);
288 	if (err) {
289 		mutex_exit(&bpl->bpl_lock);
290 		return (err);
291 	}
292 
293 	*usedp = bpl->bpl_phys->bpl_bytes;
294 	if (bpl->bpl_havecomp) {
295 		*compp = bpl->bpl_phys->bpl_comp;
296 		*uncompp = bpl->bpl_phys->bpl_uncomp;
297 	}
298 	mutex_exit(&bpl->bpl_lock);
299 
300 	if (!bpl->bpl_havecomp) {
301 		while ((err = bplist_iterate(bpl, &itor, &bp)) == 0) {
302 			comp += BP_GET_PSIZE(&bp);
303 			uncomp += BP_GET_UCSIZE(&bp);
304 		}
305 		if (err == ENOENT)
306 			err = 0;
307 		*compp = comp;
308 		*uncompp = uncomp;
309 	}
310 
311 	return (err);
312 }
313