xref: /illumos-gate/usr/src/uts/common/fs/zfs/bplist.c (revision 4bc0a2ef)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/bplist.h>
30 #include <sys/zfs_context.h>
31 
32 static void
33 bplist_hold(bplist_t *bpl)
34 {
35 	ASSERT(MUTEX_HELD(&bpl->bpl_lock));
36 	if (bpl->bpl_dbuf == NULL) {
37 		bpl->bpl_dbuf = dmu_bonus_hold_tag(bpl->bpl_mos,
38 		    bpl->bpl_object, bpl);
39 		dmu_buf_read(bpl->bpl_dbuf);
40 		bpl->bpl_phys = bpl->bpl_dbuf->db_data;
41 	}
42 }
43 
44 uint64_t
45 bplist_create(objset_t *mos, int blocksize, dmu_tx_t *tx)
46 {
47 	uint64_t obj;
48 
49 	obj = dmu_object_alloc(mos, DMU_OT_BPLIST, blocksize,
50 	    DMU_OT_BPLIST_HDR, sizeof (bplist_phys_t), tx);
51 
52 	return (obj);
53 }
54 
55 void
56 bplist_destroy(objset_t *mos, uint64_t object, dmu_tx_t *tx)
57 {
58 	VERIFY(dmu_object_free(mos, object, tx) == 0);
59 }
60 
61 void
62 bplist_open(bplist_t *bpl, objset_t *mos, uint64_t object)
63 {
64 	dmu_object_info_t doi;
65 
66 	VERIFY(dmu_object_info(mos, object, &doi) == 0);
67 
68 	mutex_enter(&bpl->bpl_lock);
69 
70 	ASSERT(bpl->bpl_dbuf == NULL);
71 	ASSERT(bpl->bpl_phys == NULL);
72 	ASSERT(bpl->bpl_cached_dbuf == NULL);
73 	ASSERT(bpl->bpl_queue == NULL);
74 	ASSERT(object != 0);
75 
76 	bpl->bpl_mos = mos;
77 	bpl->bpl_object = object;
78 	bpl->bpl_blockshift = highbit(doi.doi_data_block_size - 1);
79 	bpl->bpl_bpshift = bpl->bpl_blockshift - SPA_BLKPTRSHIFT;
80 
81 	mutex_exit(&bpl->bpl_lock);
82 }
83 
84 void
85 bplist_close(bplist_t *bpl)
86 {
87 	mutex_enter(&bpl->bpl_lock);
88 
89 	ASSERT(bpl->bpl_queue == NULL);
90 
91 	if (bpl->bpl_cached_dbuf) {
92 		dmu_buf_rele(bpl->bpl_cached_dbuf);
93 		bpl->bpl_cached_dbuf = NULL;
94 	}
95 	if (bpl->bpl_dbuf) {
96 		dmu_buf_rele_tag(bpl->bpl_dbuf, bpl);
97 		bpl->bpl_dbuf = NULL;
98 		bpl->bpl_phys = NULL;
99 	}
100 
101 	mutex_exit(&bpl->bpl_lock);
102 }
103 
104 boolean_t
105 bplist_empty(bplist_t *bpl)
106 {
107 	boolean_t rv;
108 
109 	if (bpl->bpl_object == 0)
110 		return (B_TRUE);
111 
112 	mutex_enter(&bpl->bpl_lock);
113 	bplist_hold(bpl);
114 	rv = (bpl->bpl_phys->bpl_entries == 0);
115 	mutex_exit(&bpl->bpl_lock);
116 
117 	return (rv);
118 }
119 
120 int
121 bplist_iterate(bplist_t *bpl, uint64_t *itorp, blkptr_t *bp)
122 {
123 	uint64_t blk, off;
124 	blkptr_t *bparray;
125 	dmu_buf_t *db;
126 
127 	mutex_enter(&bpl->bpl_lock);
128 	bplist_hold(bpl);
129 
130 	if (*itorp >= bpl->bpl_phys->bpl_entries) {
131 		mutex_exit(&bpl->bpl_lock);
132 		return (ENOENT);
133 	}
134 
135 	blk = *itorp >> bpl->bpl_bpshift;
136 	off = P2PHASE(*itorp, 1ULL << bpl->bpl_bpshift);
137 	db = bpl->bpl_cached_dbuf;
138 
139 	if (db == NULL || db->db_offset != (blk << bpl->bpl_blockshift)) {
140 		if (db != NULL)
141 			dmu_buf_rele(db);
142 		bpl->bpl_cached_dbuf = db = dmu_buf_hold(bpl->bpl_mos,
143 		    bpl->bpl_object, blk << bpl->bpl_blockshift);
144 	}
145 
146 	ASSERT3U(db->db_size, ==, 1ULL << bpl->bpl_blockshift);
147 
148 	dmu_buf_read(db);
149 	bparray = db->db_data;
150 	*bp = bparray[off];
151 	(*itorp)++;
152 	mutex_exit(&bpl->bpl_lock);
153 	return (0);
154 }
155 
156 void
157 bplist_enqueue(bplist_t *bpl, blkptr_t *bp, dmu_tx_t *tx)
158 {
159 	uint64_t blk, off;
160 	blkptr_t *bparray;
161 	dmu_buf_t *db;
162 
163 	ASSERT(!BP_IS_HOLE(bp));
164 	mutex_enter(&bpl->bpl_lock);
165 	bplist_hold(bpl);
166 
167 	blk = bpl->bpl_phys->bpl_entries >> bpl->bpl_bpshift;
168 	off = P2PHASE(bpl->bpl_phys->bpl_entries, 1ULL << bpl->bpl_bpshift);
169 	db = bpl->bpl_cached_dbuf;
170 
171 	if (db == NULL || db->db_offset != (blk << bpl->bpl_blockshift)) {
172 		if (db != NULL)
173 			dmu_buf_rele(db);
174 		bpl->bpl_cached_dbuf = db = dmu_buf_hold(bpl->bpl_mos,
175 		    bpl->bpl_object, blk << bpl->bpl_blockshift);
176 	}
177 
178 	ASSERT3U(db->db_size, ==, 1ULL << bpl->bpl_blockshift);
179 
180 	dmu_buf_will_dirty(db, tx);
181 	bparray = db->db_data;
182 	bparray[off] = *bp;
183 
184 	/* We never need the fill count. */
185 	bparray[off].blk_fill = 0;
186 
187 	/* The bplist will compress better if we can leave off the checksum */
188 	bzero(&bparray[off].blk_cksum, sizeof (bparray[off].blk_cksum));
189 
190 	dmu_buf_will_dirty(bpl->bpl_dbuf, tx);
191 	bpl->bpl_phys->bpl_entries++;
192 	bpl->bpl_phys->bpl_bytes += BP_GET_ASIZE(bp);
193 	mutex_exit(&bpl->bpl_lock);
194 }
195 
196 /*
197  * Deferred entry; will be written later by bplist_sync().
198  */
199 void
200 bplist_enqueue_deferred(bplist_t *bpl, blkptr_t *bp)
201 {
202 	bplist_q_t *bpq = kmem_alloc(sizeof (*bpq), KM_SLEEP);
203 
204 	ASSERT(!BP_IS_HOLE(bp));
205 	mutex_enter(&bpl->bpl_lock);
206 	bpq->bpq_blk = *bp;
207 	bpq->bpq_next = bpl->bpl_queue;
208 	bpl->bpl_queue = bpq;
209 	mutex_exit(&bpl->bpl_lock);
210 }
211 
212 void
213 bplist_sync(bplist_t *bpl, dmu_tx_t *tx)
214 {
215 	bplist_q_t *bpq;
216 
217 	mutex_enter(&bpl->bpl_lock);
218 	while ((bpq = bpl->bpl_queue) != NULL) {
219 		bpl->bpl_queue = bpq->bpq_next;
220 		mutex_exit(&bpl->bpl_lock);
221 		bplist_enqueue(bpl, &bpq->bpq_blk, tx);
222 		kmem_free(bpq, sizeof (*bpq));
223 		mutex_enter(&bpl->bpl_lock);
224 	}
225 	mutex_exit(&bpl->bpl_lock);
226 }
227 
228 void
229 bplist_vacate(bplist_t *bpl, dmu_tx_t *tx)
230 {
231 	mutex_enter(&bpl->bpl_lock);
232 	ASSERT3P(bpl->bpl_queue, ==, NULL);
233 	bplist_hold(bpl);
234 	dmu_buf_will_dirty(bpl->bpl_dbuf, tx);
235 	dmu_free_range(bpl->bpl_mos, bpl->bpl_object, 0, -1ULL, tx);
236 	bpl->bpl_phys->bpl_entries = 0;
237 	bpl->bpl_phys->bpl_bytes = 0;
238 	mutex_exit(&bpl->bpl_lock);
239 }
240