xref: /illumos-gate/usr/src/uts/common/fs/zfs/bplist.c (revision 03831d35)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/bplist.h>
29 #include <sys/zfs_context.h>
30 
31 static int
32 bplist_hold(bplist_t *bpl)
33 {
34 	ASSERT(MUTEX_HELD(&bpl->bpl_lock));
35 	if (bpl->bpl_dbuf == NULL) {
36 		int err = dmu_bonus_hold(bpl->bpl_mos,
37 		    bpl->bpl_object, bpl, &bpl->bpl_dbuf);
38 		if (err)
39 			return (err);
40 		bpl->bpl_phys = bpl->bpl_dbuf->db_data;
41 	}
42 	return (0);
43 }
44 
45 uint64_t
46 bplist_create(objset_t *mos, int blocksize, dmu_tx_t *tx)
47 {
48 	uint64_t obj;
49 
50 	obj = dmu_object_alloc(mos, DMU_OT_BPLIST, blocksize,
51 	    DMU_OT_BPLIST_HDR, sizeof (bplist_phys_t), tx);
52 
53 	return (obj);
54 }
55 
56 void
57 bplist_destroy(objset_t *mos, uint64_t object, dmu_tx_t *tx)
58 {
59 	VERIFY(dmu_object_free(mos, object, tx) == 0);
60 }
61 
62 int
63 bplist_open(bplist_t *bpl, objset_t *mos, uint64_t object)
64 {
65 	dmu_object_info_t doi;
66 	int err;
67 
68 	err = dmu_object_info(mos, object, &doi);
69 	if (err)
70 		return (err);
71 
72 	mutex_enter(&bpl->bpl_lock);
73 
74 	ASSERT(bpl->bpl_dbuf == NULL);
75 	ASSERT(bpl->bpl_phys == NULL);
76 	ASSERT(bpl->bpl_cached_dbuf == NULL);
77 	ASSERT(bpl->bpl_queue == NULL);
78 	ASSERT(object != 0);
79 
80 	bpl->bpl_mos = mos;
81 	bpl->bpl_object = object;
82 	bpl->bpl_blockshift = highbit(doi.doi_data_block_size - 1);
83 	bpl->bpl_bpshift = bpl->bpl_blockshift - SPA_BLKPTRSHIFT;
84 
85 	mutex_exit(&bpl->bpl_lock);
86 	return (0);
87 }
88 
89 void
90 bplist_close(bplist_t *bpl)
91 {
92 	mutex_enter(&bpl->bpl_lock);
93 
94 	ASSERT(bpl->bpl_queue == NULL);
95 
96 	if (bpl->bpl_cached_dbuf) {
97 		dmu_buf_rele(bpl->bpl_cached_dbuf, bpl);
98 		bpl->bpl_cached_dbuf = NULL;
99 	}
100 	if (bpl->bpl_dbuf) {
101 		dmu_buf_rele(bpl->bpl_dbuf, bpl);
102 		bpl->bpl_dbuf = NULL;
103 		bpl->bpl_phys = NULL;
104 	}
105 
106 	mutex_exit(&bpl->bpl_lock);
107 }
108 
109 boolean_t
110 bplist_empty(bplist_t *bpl)
111 {
112 	boolean_t rv;
113 
114 	if (bpl->bpl_object == 0)
115 		return (B_TRUE);
116 
117 	mutex_enter(&bpl->bpl_lock);
118 	VERIFY(0 == bplist_hold(bpl)); /* XXX */
119 	rv = (bpl->bpl_phys->bpl_entries == 0);
120 	mutex_exit(&bpl->bpl_lock);
121 
122 	return (rv);
123 }
124 
125 static int
126 bplist_cache(bplist_t *bpl, uint64_t blkid)
127 {
128 	int err = 0;
129 
130 	if (bpl->bpl_cached_dbuf == NULL ||
131 	    bpl->bpl_cached_dbuf->db_offset != (blkid << bpl->bpl_blockshift)) {
132 		if (bpl->bpl_cached_dbuf != NULL)
133 			dmu_buf_rele(bpl->bpl_cached_dbuf, bpl);
134 		err = dmu_buf_hold(bpl->bpl_mos,
135 		    bpl->bpl_object, blkid << bpl->bpl_blockshift,
136 		    bpl, &bpl->bpl_cached_dbuf);
137 		ASSERT(err || bpl->bpl_cached_dbuf->db_size ==
138 		    1ULL << bpl->bpl_blockshift);
139 	}
140 	return (err);
141 }
142 
143 int
144 bplist_iterate(bplist_t *bpl, uint64_t *itorp, blkptr_t *bp)
145 {
146 	uint64_t blk, off;
147 	blkptr_t *bparray;
148 	int err;
149 
150 	mutex_enter(&bpl->bpl_lock);
151 
152 	err = bplist_hold(bpl);
153 	if (err) {
154 		mutex_exit(&bpl->bpl_lock);
155 		return (err);
156 	}
157 
158 	if (*itorp >= bpl->bpl_phys->bpl_entries) {
159 		mutex_exit(&bpl->bpl_lock);
160 		return (ENOENT);
161 	}
162 
163 	blk = *itorp >> bpl->bpl_bpshift;
164 	off = P2PHASE(*itorp, 1ULL << bpl->bpl_bpshift);
165 
166 	err = bplist_cache(bpl, blk);
167 	if (err) {
168 		mutex_exit(&bpl->bpl_lock);
169 		return (err);
170 	}
171 
172 	bparray = bpl->bpl_cached_dbuf->db_data;
173 	*bp = bparray[off];
174 	(*itorp)++;
175 	mutex_exit(&bpl->bpl_lock);
176 	return (0);
177 }
178 
179 int
180 bplist_enqueue(bplist_t *bpl, blkptr_t *bp, dmu_tx_t *tx)
181 {
182 	uint64_t blk, off;
183 	blkptr_t *bparray;
184 	int err;
185 
186 	ASSERT(!BP_IS_HOLE(bp));
187 	mutex_enter(&bpl->bpl_lock);
188 	err = bplist_hold(bpl);
189 	if (err)
190 		return (err);
191 
192 	blk = bpl->bpl_phys->bpl_entries >> bpl->bpl_bpshift;
193 	off = P2PHASE(bpl->bpl_phys->bpl_entries, 1ULL << bpl->bpl_bpshift);
194 
195 	err = bplist_cache(bpl, blk);
196 	if (err) {
197 		mutex_exit(&bpl->bpl_lock);
198 		return (err);
199 	}
200 
201 	dmu_buf_will_dirty(bpl->bpl_cached_dbuf, tx);
202 	bparray = bpl->bpl_cached_dbuf->db_data;
203 	bparray[off] = *bp;
204 
205 	/* We never need the fill count. */
206 	bparray[off].blk_fill = 0;
207 
208 	/* The bplist will compress better if we can leave off the checksum */
209 	bzero(&bparray[off].blk_cksum, sizeof (bparray[off].blk_cksum));
210 
211 	dmu_buf_will_dirty(bpl->bpl_dbuf, tx);
212 	bpl->bpl_phys->bpl_entries++;
213 	bpl->bpl_phys->bpl_bytes += BP_GET_ASIZE(bp);
214 	mutex_exit(&bpl->bpl_lock);
215 
216 	return (0);
217 }
218 
219 /*
220  * Deferred entry; will be written later by bplist_sync().
221  */
222 void
223 bplist_enqueue_deferred(bplist_t *bpl, blkptr_t *bp)
224 {
225 	bplist_q_t *bpq = kmem_alloc(sizeof (*bpq), KM_SLEEP);
226 
227 	ASSERT(!BP_IS_HOLE(bp));
228 	mutex_enter(&bpl->bpl_lock);
229 	bpq->bpq_blk = *bp;
230 	bpq->bpq_next = bpl->bpl_queue;
231 	bpl->bpl_queue = bpq;
232 	mutex_exit(&bpl->bpl_lock);
233 }
234 
235 void
236 bplist_sync(bplist_t *bpl, dmu_tx_t *tx)
237 {
238 	bplist_q_t *bpq;
239 
240 	mutex_enter(&bpl->bpl_lock);
241 	while ((bpq = bpl->bpl_queue) != NULL) {
242 		bpl->bpl_queue = bpq->bpq_next;
243 		mutex_exit(&bpl->bpl_lock);
244 		VERIFY(0 == bplist_enqueue(bpl, &bpq->bpq_blk, tx));
245 		kmem_free(bpq, sizeof (*bpq));
246 		mutex_enter(&bpl->bpl_lock);
247 	}
248 	mutex_exit(&bpl->bpl_lock);
249 }
250 
251 void
252 bplist_vacate(bplist_t *bpl, dmu_tx_t *tx)
253 {
254 	mutex_enter(&bpl->bpl_lock);
255 	ASSERT3P(bpl->bpl_queue, ==, NULL);
256 	VERIFY(0 == bplist_hold(bpl));
257 	dmu_buf_will_dirty(bpl->bpl_dbuf, tx);
258 	VERIFY(0 == dmu_free_range(bpl->bpl_mos,
259 	    bpl->bpl_object, 0, -1ULL, tx));
260 	bpl->bpl_phys->bpl_entries = 0;
261 	bpl->bpl_phys->bpl_bytes = 0;
262 	mutex_exit(&bpl->bpl_lock);
263 }
264