1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 #include <sys/bplist.h>
27 #include <sys/zfs_context.h>
28
29 void
bplist_init(bplist_t * bpl)30 bplist_init(bplist_t *bpl)
31 {
32 bzero(bpl, sizeof (*bpl));
33 mutex_init(&bpl->bpl_lock, NULL, MUTEX_DEFAULT, NULL);
34 }
35
36 void
bplist_fini(bplist_t * bpl)37 bplist_fini(bplist_t *bpl)
38 {
39 ASSERT(bpl->bpl_queue == NULL);
40 mutex_destroy(&bpl->bpl_lock);
41 }
42
43 static int
bplist_hold(bplist_t * bpl)44 bplist_hold(bplist_t *bpl)
45 {
46 ASSERT(MUTEX_HELD(&bpl->bpl_lock));
47 if (bpl->bpl_dbuf == NULL) {
48 int err = dmu_bonus_hold(bpl->bpl_mos,
49 bpl->bpl_object, bpl, &bpl->bpl_dbuf);
50 if (err)
51 return (err);
52 bpl->bpl_phys = bpl->bpl_dbuf->db_data;
53 }
54 return (0);
55 }
56
57 uint64_t
bplist_create(objset_t * mos,int blocksize,dmu_tx_t * tx)58 bplist_create(objset_t *mos, int blocksize, dmu_tx_t *tx)
59 {
60 int size;
61
62 size = spa_version(dmu_objset_spa(mos)) < SPA_VERSION_BPLIST_ACCOUNT ?
63 BPLIST_SIZE_V0 : sizeof (bplist_phys_t);
64
65 return (dmu_object_alloc(mos, DMU_OT_BPLIST, blocksize,
66 DMU_OT_BPLIST_HDR, size, tx));
67 }
68
69 void
bplist_destroy(objset_t * mos,uint64_t object,dmu_tx_t * tx)70 bplist_destroy(objset_t *mos, uint64_t object, dmu_tx_t *tx)
71 {
72 VERIFY(dmu_object_free(mos, object, tx) == 0);
73 }
74
75 int
bplist_open(bplist_t * bpl,objset_t * mos,uint64_t object)76 bplist_open(bplist_t *bpl, objset_t *mos, uint64_t object)
77 {
78 dmu_object_info_t doi;
79 int err;
80
81 err = dmu_object_info(mos, object, &doi);
82 if (err)
83 return (err);
84
85 mutex_enter(&bpl->bpl_lock);
86
87 ASSERT(bpl->bpl_dbuf == NULL);
88 ASSERT(bpl->bpl_phys == NULL);
89 ASSERT(bpl->bpl_cached_dbuf == NULL);
90 ASSERT(bpl->bpl_queue == NULL);
91 ASSERT(object != 0);
92 ASSERT3U(doi.doi_type, ==, DMU_OT_BPLIST);
93 ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_BPLIST_HDR);
94
95 bpl->bpl_mos = mos;
96 bpl->bpl_object = object;
97 bpl->bpl_blockshift = highbit(doi.doi_data_block_size - 1);
98 bpl->bpl_bpshift = bpl->bpl_blockshift - SPA_BLKPTRSHIFT;
99 bpl->bpl_havecomp = (doi.doi_bonus_size == sizeof (bplist_phys_t));
100
101 mutex_exit(&bpl->bpl_lock);
102 return (0);
103 }
104
105 void
bplist_close(bplist_t * bpl)106 bplist_close(bplist_t *bpl)
107 {
108 mutex_enter(&bpl->bpl_lock);
109
110 ASSERT(bpl->bpl_queue == NULL);
111
112 if (bpl->bpl_cached_dbuf) {
113 dmu_buf_rele(bpl->bpl_cached_dbuf, bpl);
114 bpl->bpl_cached_dbuf = NULL;
115 }
116 if (bpl->bpl_dbuf) {
117 dmu_buf_rele(bpl->bpl_dbuf, bpl);
118 bpl->bpl_dbuf = NULL;
119 bpl->bpl_phys = NULL;
120 }
121
122 mutex_exit(&bpl->bpl_lock);
123 }
124
125 boolean_t
bplist_empty(bplist_t * bpl)126 bplist_empty(bplist_t *bpl)
127 {
128 boolean_t rv;
129
130 if (bpl->bpl_object == 0)
131 return (B_TRUE);
132
133 mutex_enter(&bpl->bpl_lock);
134 VERIFY(0 == bplist_hold(bpl)); /* XXX */
135 rv = (bpl->bpl_phys->bpl_entries == 0);
136 mutex_exit(&bpl->bpl_lock);
137
138 return (rv);
139 }
140
141 static int
bplist_cache(bplist_t * bpl,uint64_t blkid)142 bplist_cache(bplist_t *bpl, uint64_t blkid)
143 {
144 int err = 0;
145
146 if (bpl->bpl_cached_dbuf == NULL ||
147 bpl->bpl_cached_dbuf->db_offset != (blkid << bpl->bpl_blockshift)) {
148 if (bpl->bpl_cached_dbuf != NULL)
149 dmu_buf_rele(bpl->bpl_cached_dbuf, bpl);
150 err = dmu_buf_hold(bpl->bpl_mos,
151 bpl->bpl_object, blkid << bpl->bpl_blockshift,
152 bpl, &bpl->bpl_cached_dbuf);
153 ASSERT(err || bpl->bpl_cached_dbuf->db_size ==
154 1ULL << bpl->bpl_blockshift);
155 }
156 return (err);
157 }
158
159 int
bplist_iterate(bplist_t * bpl,uint64_t * itorp,blkptr_t * bp)160 bplist_iterate(bplist_t *bpl, uint64_t *itorp, blkptr_t *bp)
161 {
162 uint64_t blk, off;
163 blkptr_t *bparray;
164 int err;
165
166 mutex_enter(&bpl->bpl_lock);
167
168 err = bplist_hold(bpl);
169 if (err) {
170 mutex_exit(&bpl->bpl_lock);
171 return (err);
172 }
173
174 if (*itorp >= bpl->bpl_phys->bpl_entries) {
175 mutex_exit(&bpl->bpl_lock);
176 return (ENOENT);
177 }
178
179 blk = *itorp >> bpl->bpl_bpshift;
180 off = P2PHASE(*itorp, 1ULL << bpl->bpl_bpshift);
181
182 err = bplist_cache(bpl, blk);
183 if (err) {
184 mutex_exit(&bpl->bpl_lock);
185 return (err);
186 }
187
188 bparray = bpl->bpl_cached_dbuf->db_data;
189 *bp = bparray[off];
190 (*itorp)++;
191 mutex_exit(&bpl->bpl_lock);
192 return (0);
193 }
194
195 int
bplist_enqueue(bplist_t * bpl,const blkptr_t * bp,dmu_tx_t * tx)196 bplist_enqueue(bplist_t *bpl, const blkptr_t *bp, dmu_tx_t *tx)
197 {
198 uint64_t blk, off;
199 blkptr_t *bparray;
200 int err;
201
202 ASSERT(!BP_IS_HOLE(bp));
203 mutex_enter(&bpl->bpl_lock);
204 err = bplist_hold(bpl);
205 if (err)
206 return (err);
207
208 blk = bpl->bpl_phys->bpl_entries >> bpl->bpl_bpshift;
209 off = P2PHASE(bpl->bpl_phys->bpl_entries, 1ULL << bpl->bpl_bpshift);
210
211 err = bplist_cache(bpl, blk);
212 if (err) {
213 mutex_exit(&bpl->bpl_lock);
214 return (err);
215 }
216
217 dmu_buf_will_dirty(bpl->bpl_cached_dbuf, tx);
218 bparray = bpl->bpl_cached_dbuf->db_data;
219 bparray[off] = *bp;
220
221 /* We never need the fill count. */
222 bparray[off].blk_fill = 0;
223
224 /* The bplist will compress better if we can leave off the checksum */
225 if (!BP_GET_DEDUP(&bparray[off]))
226 bzero(&bparray[off].blk_cksum, sizeof (bparray[off].blk_cksum));
227
228 dmu_buf_will_dirty(bpl->bpl_dbuf, tx);
229 bpl->bpl_phys->bpl_entries++;
230 bpl->bpl_phys->bpl_bytes +=
231 bp_get_dsize_sync(dmu_objset_spa(bpl->bpl_mos), bp);
232 if (bpl->bpl_havecomp) {
233 bpl->bpl_phys->bpl_comp += BP_GET_PSIZE(bp);
234 bpl->bpl_phys->bpl_uncomp += BP_GET_UCSIZE(bp);
235 }
236 mutex_exit(&bpl->bpl_lock);
237
238 return (0);
239 }
240
241 void
bplist_enqueue_cb(void * bpl,const blkptr_t * bp,dmu_tx_t * tx)242 bplist_enqueue_cb(void *bpl, const blkptr_t *bp, dmu_tx_t *tx)
243 {
244 VERIFY(bplist_enqueue(bpl, bp, tx) == 0);
245 }
246
247 /*
248 * Deferred entry; will be processed later by bplist_sync().
249 */
250 void
bplist_enqueue_deferred(bplist_t * bpl,const blkptr_t * bp)251 bplist_enqueue_deferred(bplist_t *bpl, const blkptr_t *bp)
252 {
253 bplist_q_t *bpq = kmem_alloc(sizeof (*bpq), KM_SLEEP);
254
255 ASSERT(!BP_IS_HOLE(bp));
256 mutex_enter(&bpl->bpl_lock);
257 bpq->bpq_blk = *bp;
258 bpq->bpq_next = bpl->bpl_queue;
259 bpl->bpl_queue = bpq;
260 mutex_exit(&bpl->bpl_lock);
261 }
262
263 void
bplist_sync(bplist_t * bpl,bplist_sync_cb_t * func,void * arg,dmu_tx_t * tx)264 bplist_sync(bplist_t *bpl, bplist_sync_cb_t *func, void *arg, dmu_tx_t *tx)
265 {
266 bplist_q_t *bpq;
267
268 mutex_enter(&bpl->bpl_lock);
269 while ((bpq = bpl->bpl_queue) != NULL) {
270 bpl->bpl_queue = bpq->bpq_next;
271 mutex_exit(&bpl->bpl_lock);
272 func(arg, &bpq->bpq_blk, tx);
273 kmem_free(bpq, sizeof (*bpq));
274 mutex_enter(&bpl->bpl_lock);
275 }
276 mutex_exit(&bpl->bpl_lock);
277 }
278
279 void
bplist_vacate(bplist_t * bpl,dmu_tx_t * tx)280 bplist_vacate(bplist_t *bpl, dmu_tx_t *tx)
281 {
282 mutex_enter(&bpl->bpl_lock);
283 ASSERT3P(bpl->bpl_queue, ==, NULL);
284 VERIFY(0 == bplist_hold(bpl));
285 dmu_buf_will_dirty(bpl->bpl_dbuf, tx);
286 VERIFY(0 == dmu_free_range(bpl->bpl_mos,
287 bpl->bpl_object, 0, -1ULL, tx));
288 bpl->bpl_phys->bpl_entries = 0;
289 bpl->bpl_phys->bpl_bytes = 0;
290 if (bpl->bpl_havecomp) {
291 bpl->bpl_phys->bpl_comp = 0;
292 bpl->bpl_phys->bpl_uncomp = 0;
293 }
294 mutex_exit(&bpl->bpl_lock);
295 }
296
297 int
bplist_space(bplist_t * bpl,uint64_t * usedp,uint64_t * compp,uint64_t * uncompp)298 bplist_space(bplist_t *bpl, uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
299 {
300 int err;
301
302 mutex_enter(&bpl->bpl_lock);
303
304 err = bplist_hold(bpl);
305 if (err) {
306 mutex_exit(&bpl->bpl_lock);
307 return (err);
308 }
309
310 *usedp = bpl->bpl_phys->bpl_bytes;
311 if (bpl->bpl_havecomp) {
312 *compp = bpl->bpl_phys->bpl_comp;
313 *uncompp = bpl->bpl_phys->bpl_uncomp;
314 }
315 mutex_exit(&bpl->bpl_lock);
316
317 if (!bpl->bpl_havecomp) {
318 uint64_t itor = 0, comp = 0, uncomp = 0;
319 blkptr_t bp;
320
321 while ((err = bplist_iterate(bpl, &itor, &bp)) == 0) {
322 comp += BP_GET_PSIZE(&bp);
323 uncomp += BP_GET_UCSIZE(&bp);
324 }
325 if (err == ENOENT)
326 err = 0;
327 *compp = comp;
328 *uncompp = uncomp;
329 }
330
331 return (err);
332 }
333
334 /*
335 * Return (in *dsizep) the amount of space on the deadlist which is:
336 * mintxg < blk_birth <= maxtxg
337 */
338 int
bplist_space_birthrange(bplist_t * bpl,uint64_t mintxg,uint64_t maxtxg,uint64_t * dsizep)339 bplist_space_birthrange(bplist_t *bpl, uint64_t mintxg, uint64_t maxtxg,
340 uint64_t *dsizep)
341 {
342 uint64_t size = 0;
343 uint64_t itor = 0;
344 blkptr_t bp;
345 int err;
346
347 /*
348 * As an optimization, if they want the whole txg range, just
349 * get bpl_bytes rather than iterating over the bps.
350 */
351 if (mintxg < TXG_INITIAL && maxtxg == UINT64_MAX) {
352 mutex_enter(&bpl->bpl_lock);
353 err = bplist_hold(bpl);
354 if (err == 0)
355 *dsizep = bpl->bpl_phys->bpl_bytes;
356 mutex_exit(&bpl->bpl_lock);
357 return (err);
358 }
359
360 while ((err = bplist_iterate(bpl, &itor, &bp)) == 0) {
361 if (bp.blk_birth > mintxg && bp.blk_birth <= maxtxg) {
362 size += bp_get_dsize(dmu_objset_spa(bpl->bpl_mos), &bp);
363 }
364 }
365 if (err == ENOENT)
366 err = 0;
367 *dsizep = size;
368 return (err);
369 }
370