1 /*
2  * CDDL HEADER START
3  *
4  * This file and its contents are supplied under the terms of the
5  * Common Development and Distribution License ("CDDL"), version 1.0.
6  * You may only use this file in accordance with the terms of version
7  * 1.0 of the CDDL.
8  *
9  * A full copy of the text of the CDDL should have accompanied this
10  * source.  A copy of the CDDL is also available via the Internet at
11  * http://www.illumos.org/license/CDDL.
12  *
13  * CDDL HEADER END
14  */
15 
16 /*
17  * Copyright (c) 2015 by Delphix. All rights reserved.
18  */
19 
20 #include <sys/dmu_tx.h>
21 #include <sys/spa.h>
22 #include <sys/dmu.h>
23 #include <sys/dsl_pool.h>
24 #include <sys/vdev_indirect_births.h>
25 
26 #ifdef ZFS_DEBUG
27 static boolean_t
28 vdev_indirect_births_verify(vdev_indirect_births_t *vib)
29 {
30 	ASSERT(vib != NULL);
31 
32 	ASSERT(vib->vib_object != 0);
33 	ASSERT(vib->vib_objset != NULL);
34 	ASSERT(vib->vib_phys != NULL);
35 	ASSERT(vib->vib_dbuf != NULL);
36 
37 	EQUIV(vib->vib_phys->vib_count > 0, vib->vib_entries != NULL);
38 
39 	return (B_TRUE);
40 }
41 #else
42 #define	vdev_indirect_births_verify(vib) ((void) sizeof (vib), B_TRUE)
43 #endif
44 
45 uint64_t
46 vdev_indirect_births_count(vdev_indirect_births_t *vib)
47 {
48 	ASSERT(vdev_indirect_births_verify(vib));
49 
50 	return (vib->vib_phys->vib_count);
51 }
52 
53 uint64_t
54 vdev_indirect_births_object(vdev_indirect_births_t *vib)
55 {
56 	ASSERT(vdev_indirect_births_verify(vib));
57 
58 	return (vib->vib_object);
59 }
60 
61 static uint64_t
62 vdev_indirect_births_size_impl(vdev_indirect_births_t *vib)
63 {
64 	return (vib->vib_phys->vib_count * sizeof (*vib->vib_entries));
65 }
66 
67 void
68 vdev_indirect_births_close(vdev_indirect_births_t *vib)
69 {
70 	ASSERT(vdev_indirect_births_verify(vib));
71 
72 	if (vib->vib_phys->vib_count > 0) {
73 		uint64_t births_size = vdev_indirect_births_size_impl(vib);
74 
75 		vmem_free(vib->vib_entries, births_size);
76 		vib->vib_entries = NULL;
77 	}
78 
79 	dmu_buf_rele(vib->vib_dbuf, vib);
80 
81 	vib->vib_objset = NULL;
82 	vib->vib_object = 0;
83 	vib->vib_dbuf = NULL;
84 	vib->vib_phys = NULL;
85 
86 	kmem_free(vib, sizeof (*vib));
87 }
88 
89 uint64_t
90 vdev_indirect_births_alloc(objset_t *os, dmu_tx_t *tx)
91 {
92 	ASSERT(dmu_tx_is_syncing(tx));
93 
94 	return (dmu_object_alloc(os,
95 	    DMU_OTN_UINT64_METADATA, SPA_OLD_MAXBLOCKSIZE,
96 	    DMU_OTN_UINT64_METADATA, sizeof (vdev_indirect_birth_phys_t),
97 	    tx));
98 }
99 
100 vdev_indirect_births_t *
101 vdev_indirect_births_open(objset_t *os, uint64_t births_object)
102 {
103 	vdev_indirect_births_t *vib = kmem_zalloc(sizeof (*vib), KM_SLEEP);
104 
105 	vib->vib_objset = os;
106 	vib->vib_object = births_object;
107 
108 	VERIFY0(dmu_bonus_hold(os, vib->vib_object, vib, &vib->vib_dbuf));
109 	vib->vib_phys = vib->vib_dbuf->db_data;
110 
111 	if (vib->vib_phys->vib_count > 0) {
112 		uint64_t births_size = vdev_indirect_births_size_impl(vib);
113 		vib->vib_entries = vmem_alloc(births_size, KM_SLEEP);
114 		VERIFY0(dmu_read(vib->vib_objset, vib->vib_object, 0,
115 		    births_size, vib->vib_entries, DMU_READ_PREFETCH));
116 	}
117 
118 	ASSERT(vdev_indirect_births_verify(vib));
119 
120 	return (vib);
121 }
122 
123 void
124 vdev_indirect_births_free(objset_t *os, uint64_t object, dmu_tx_t *tx)
125 {
126 	VERIFY0(dmu_object_free(os, object, tx));
127 }
128 
129 void
130 vdev_indirect_births_add_entry(vdev_indirect_births_t *vib,
131     uint64_t max_offset, uint64_t txg, dmu_tx_t *tx)
132 {
133 	vdev_indirect_birth_entry_phys_t vibe;
134 	uint64_t old_size;
135 	uint64_t new_size;
136 	vdev_indirect_birth_entry_phys_t *new_entries;
137 
138 	ASSERT(dmu_tx_is_syncing(tx));
139 	ASSERT(dsl_pool_sync_context(dmu_tx_pool(tx)));
140 	ASSERT(vdev_indirect_births_verify(vib));
141 
142 	dmu_buf_will_dirty(vib->vib_dbuf, tx);
143 
144 	vibe.vibe_offset = max_offset;
145 	vibe.vibe_phys_birth_txg = txg;
146 
147 	old_size = vdev_indirect_births_size_impl(vib);
148 	dmu_write(vib->vib_objset, vib->vib_object, old_size, sizeof (vibe),
149 	    &vibe, tx);
150 	vib->vib_phys->vib_count++;
151 	new_size = vdev_indirect_births_size_impl(vib);
152 
153 	new_entries = vmem_alloc(new_size, KM_SLEEP);
154 	if (old_size > 0) {
155 		memcpy(new_entries, vib->vib_entries, old_size);
156 		vmem_free(vib->vib_entries, old_size);
157 	}
158 	new_entries[vib->vib_phys->vib_count - 1] = vibe;
159 	vib->vib_entries = new_entries;
160 }
161 
162 uint64_t
163 vdev_indirect_births_last_entry_txg(vdev_indirect_births_t *vib)
164 {
165 	ASSERT(vdev_indirect_births_verify(vib));
166 	ASSERT(vib->vib_phys->vib_count > 0);
167 
168 	vdev_indirect_birth_entry_phys_t *last =
169 	    &vib->vib_entries[vib->vib_phys->vib_count - 1];
170 	return (last->vibe_phys_birth_txg);
171 }
172 
173 /*
174  * Return the txg in which the given range was copied (i.e. its physical
175  * birth txg).  The specified offset+asize must be contiguously mapped
176  * (i.e. not a split block).
177  *
178  * The entries are sorted by increasing phys_birth, and also by increasing
179  * offset.  We find the specified offset by binary search.  Note that we
180  * can not use bsearch() because looking at each entry independently is
181  * insufficient to find the correct entry.  Each entry implicitly relies
182  * on the previous entry: an entry indicates that the offsets from the
183  * end of the previous entry to the end of this entry were written in the
184  * specified txg.
185  */
186 uint64_t
187 vdev_indirect_births_physbirth(vdev_indirect_births_t *vib, uint64_t offset,
188     uint64_t asize)
189 {
190 	vdev_indirect_birth_entry_phys_t *base;
191 	vdev_indirect_birth_entry_phys_t *last;
192 
193 	ASSERT(vdev_indirect_births_verify(vib));
194 	ASSERT(vib->vib_phys->vib_count > 0);
195 
196 	base = vib->vib_entries;
197 	last = base + vib->vib_phys->vib_count - 1;
198 
199 	ASSERT3U(offset, <, last->vibe_offset);
200 
201 	while (last >= base) {
202 		vdev_indirect_birth_entry_phys_t *p =
203 		    base + ((last - base) / 2);
204 		if (offset >= p->vibe_offset) {
205 			base = p + 1;
206 		} else if (p == vib->vib_entries ||
207 		    offset >= (p - 1)->vibe_offset) {
208 			ASSERT3U(offset + asize, <=, p->vibe_offset);
209 			return (p->vibe_phys_birth_txg);
210 		} else {
211 			last = p - 1;
212 		}
213 	}
214 	ASSERT(!"offset not found");
215 	return (-1);
216 }
217 
218 #if defined(_KERNEL)
219 EXPORT_SYMBOL(vdev_indirect_births_add_entry);
220 EXPORT_SYMBOL(vdev_indirect_births_alloc);
221 EXPORT_SYMBOL(vdev_indirect_births_close);
222 EXPORT_SYMBOL(vdev_indirect_births_count);
223 EXPORT_SYMBOL(vdev_indirect_births_free);
224 EXPORT_SYMBOL(vdev_indirect_births_last_entry_txg);
225 EXPORT_SYMBOL(vdev_indirect_births_object);
226 EXPORT_SYMBOL(vdev_indirect_births_open);
227 EXPORT_SYMBOL(vdev_indirect_births_physbirth);
228 #endif
229