1 /*
2  * CDDL HEADER START
3  *
4  * This file and its contents are supplied under the terms of the
5  * Common Development and Distribution License ("CDDL"), version 1.0.
6  * You may only use this file in accordance with the terms of version
7  * 1.0 of the CDDL.
8  *
9  * A full copy of the text of the CDDL should have accompanied this
10  * source.  A copy of the CDDL is also available via the Internet at
11  * http://www.illumos.org/license/CDDL.
12  *
13  * CDDL HEADER END
14  */
15 
16 /*
17  * Copyright (c) 2015 by Delphix. All rights reserved.
18  */
19 
20 #include <sys/dmu_tx.h>
21 #include <sys/spa.h>
22 #include <sys/dmu.h>
23 #include <sys/dsl_pool.h>
24 #include <sys/vdev_indirect_births.h>
25 
26 #ifdef ZFS_DEBUG
27 static boolean_t
28 vdev_indirect_births_verify(vdev_indirect_births_t *vib)
29 {
30 	ASSERT(vib != NULL);
31 
32 	ASSERT(vib->vib_object != 0);
33 	ASSERT(vib->vib_objset != NULL);
34 	ASSERT(vib->vib_phys != NULL);
35 	ASSERT(vib->vib_dbuf != NULL);
36 
37 	EQUIV(vib->vib_phys->vib_count > 0, vib->vib_entries != NULL);
38 
39 	return (B_TRUE);
40 }
41 #endif
42 
43 uint64_t
44 vdev_indirect_births_count(vdev_indirect_births_t *vib)
45 {
46 	ASSERT(vdev_indirect_births_verify(vib));
47 
48 	return (vib->vib_phys->vib_count);
49 }
50 
51 uint64_t
52 vdev_indirect_births_object(vdev_indirect_births_t *vib)
53 {
54 	ASSERT(vdev_indirect_births_verify(vib));
55 
56 	return (vib->vib_object);
57 }
58 
59 static uint64_t
60 vdev_indirect_births_size_impl(vdev_indirect_births_t *vib)
61 {
62 	return (vib->vib_phys->vib_count * sizeof (*vib->vib_entries));
63 }
64 
65 void
66 vdev_indirect_births_close(vdev_indirect_births_t *vib)
67 {
68 	ASSERT(vdev_indirect_births_verify(vib));
69 
70 	if (vib->vib_phys->vib_count > 0) {
71 		uint64_t births_size = vdev_indirect_births_size_impl(vib);
72 
73 		vmem_free(vib->vib_entries, births_size);
74 		vib->vib_entries = NULL;
75 	}
76 
77 	dmu_buf_rele(vib->vib_dbuf, vib);
78 
79 	vib->vib_objset = NULL;
80 	vib->vib_object = 0;
81 	vib->vib_dbuf = NULL;
82 	vib->vib_phys = NULL;
83 
84 	kmem_free(vib, sizeof (*vib));
85 }
86 
87 uint64_t
88 vdev_indirect_births_alloc(objset_t *os, dmu_tx_t *tx)
89 {
90 	ASSERT(dmu_tx_is_syncing(tx));
91 
92 	return (dmu_object_alloc(os,
93 	    DMU_OTN_UINT64_METADATA, SPA_OLD_MAXBLOCKSIZE,
94 	    DMU_OTN_UINT64_METADATA, sizeof (vdev_indirect_birth_phys_t),
95 	    tx));
96 }
97 
98 vdev_indirect_births_t *
99 vdev_indirect_births_open(objset_t *os, uint64_t births_object)
100 {
101 	vdev_indirect_births_t *vib = kmem_zalloc(sizeof (*vib), KM_SLEEP);
102 
103 	vib->vib_objset = os;
104 	vib->vib_object = births_object;
105 
106 	VERIFY0(dmu_bonus_hold(os, vib->vib_object, vib, &vib->vib_dbuf));
107 	vib->vib_phys = vib->vib_dbuf->db_data;
108 
109 	if (vib->vib_phys->vib_count > 0) {
110 		uint64_t births_size = vdev_indirect_births_size_impl(vib);
111 		vib->vib_entries = vmem_alloc(births_size, KM_SLEEP);
112 		VERIFY0(dmu_read(vib->vib_objset, vib->vib_object, 0,
113 		    births_size, vib->vib_entries, DMU_READ_PREFETCH));
114 	}
115 
116 	ASSERT(vdev_indirect_births_verify(vib));
117 
118 	return (vib);
119 }
120 
121 void
122 vdev_indirect_births_free(objset_t *os, uint64_t object, dmu_tx_t *tx)
123 {
124 	VERIFY0(dmu_object_free(os, object, tx));
125 }
126 
127 void
128 vdev_indirect_births_add_entry(vdev_indirect_births_t *vib,
129     uint64_t max_offset, uint64_t txg, dmu_tx_t *tx)
130 {
131 	vdev_indirect_birth_entry_phys_t vibe;
132 	uint64_t old_size;
133 	uint64_t new_size;
134 	vdev_indirect_birth_entry_phys_t *new_entries;
135 
136 	ASSERT(dmu_tx_is_syncing(tx));
137 	ASSERT(dsl_pool_sync_context(dmu_tx_pool(tx)));
138 	ASSERT(vdev_indirect_births_verify(vib));
139 
140 	dmu_buf_will_dirty(vib->vib_dbuf, tx);
141 
142 	vibe.vibe_offset = max_offset;
143 	vibe.vibe_phys_birth_txg = txg;
144 
145 	old_size = vdev_indirect_births_size_impl(vib);
146 	dmu_write(vib->vib_objset, vib->vib_object, old_size, sizeof (vibe),
147 	    &vibe, tx);
148 	vib->vib_phys->vib_count++;
149 	new_size = vdev_indirect_births_size_impl(vib);
150 
151 	new_entries = vmem_alloc(new_size, KM_SLEEP);
152 	if (old_size > 0) {
153 		bcopy(vib->vib_entries, new_entries, old_size);
154 		vmem_free(vib->vib_entries, old_size);
155 	}
156 	new_entries[vib->vib_phys->vib_count - 1] = vibe;
157 	vib->vib_entries = new_entries;
158 }
159 
160 uint64_t
161 vdev_indirect_births_last_entry_txg(vdev_indirect_births_t *vib)
162 {
163 	ASSERT(vdev_indirect_births_verify(vib));
164 	ASSERT(vib->vib_phys->vib_count > 0);
165 
166 	vdev_indirect_birth_entry_phys_t *last =
167 	    &vib->vib_entries[vib->vib_phys->vib_count - 1];
168 	return (last->vibe_phys_birth_txg);
169 }
170 
171 /*
172  * Return the txg in which the given range was copied (i.e. its physical
173  * birth txg).  The specified offset+asize must be contiguously mapped
174  * (i.e. not a split block).
175  *
176  * The entries are sorted by increasing phys_birth, and also by increasing
177  * offset.  We find the specified offset by binary search.  Note that we
178  * can not use bsearch() because looking at each entry independently is
179  * insufficient to find the correct entry.  Each entry implicitly relies
180  * on the previous entry: an entry indicates that the offsets from the
181  * end of the previous entry to the end of this entry were written in the
182  * specified txg.
183  */
184 uint64_t
185 vdev_indirect_births_physbirth(vdev_indirect_births_t *vib, uint64_t offset,
186     uint64_t asize)
187 {
188 	vdev_indirect_birth_entry_phys_t *base;
189 	vdev_indirect_birth_entry_phys_t *last;
190 
191 	ASSERT(vdev_indirect_births_verify(vib));
192 	ASSERT(vib->vib_phys->vib_count > 0);
193 
194 	base = vib->vib_entries;
195 	last = base + vib->vib_phys->vib_count - 1;
196 
197 	ASSERT3U(offset, <, last->vibe_offset);
198 
199 	while (last >= base) {
200 		vdev_indirect_birth_entry_phys_t *p =
201 		    base + ((last - base) / 2);
202 		if (offset >= p->vibe_offset) {
203 			base = p + 1;
204 		} else if (p == vib->vib_entries ||
205 		    offset >= (p - 1)->vibe_offset) {
206 			ASSERT3U(offset + asize, <=, p->vibe_offset);
207 			return (p->vibe_phys_birth_txg);
208 		} else {
209 			last = p - 1;
210 		}
211 	}
212 	ASSERT(!"offset not found");
213 	return (-1);
214 }
215 
216 #if defined(_KERNEL)
217 EXPORT_SYMBOL(vdev_indirect_births_add_entry);
218 EXPORT_SYMBOL(vdev_indirect_births_alloc);
219 EXPORT_SYMBOL(vdev_indirect_births_close);
220 EXPORT_SYMBOL(vdev_indirect_births_count);
221 EXPORT_SYMBOL(vdev_indirect_births_free);
222 EXPORT_SYMBOL(vdev_indirect_births_last_entry_txg);
223 EXPORT_SYMBOL(vdev_indirect_births_object);
224 EXPORT_SYMBOL(vdev_indirect_births_open);
225 EXPORT_SYMBOL(vdev_indirect_births_physbirth);
226 #endif
227