1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/dmu.h>
27 #include <sys/dmu_tx.h>
28 #include <sys/dsl_pool.h>
29 #include <sys/dsl_dir.h>
30 #include <sys/dsl_synctask.h>
31 #include <sys/metaslab.h>
32 #include <sys/cred.h>
33 
34 #define	DST_AVG_BLKSHIFT 14
35 
36 /* ARGSUSED */
37 static int
38 dsl_null_checkfunc(void *arg1, void *arg2, dmu_tx_t *tx)
39 {
40 	return (0);
41 }
42 
43 dsl_sync_task_group_t *
44 dsl_sync_task_group_create(dsl_pool_t *dp)
45 {
46 	dsl_sync_task_group_t *dstg;
47 
48 	dstg = kmem_zalloc(sizeof (dsl_sync_task_group_t), KM_SLEEP);
49 	list_create(&dstg->dstg_tasks, sizeof (dsl_sync_task_t),
50 	    offsetof(dsl_sync_task_t, dst_node));
51 	dstg->dstg_pool = dp;
52 	dstg->dstg_cr = CRED();
53 
54 	return (dstg);
55 }
56 
57 void
58 dsl_sync_task_create(dsl_sync_task_group_t *dstg,
59     dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc,
60     void *arg1, void *arg2, int blocks_modified)
61 {
62 	dsl_sync_task_t *dst;
63 
64 	if (checkfunc == NULL)
65 		checkfunc = dsl_null_checkfunc;
66 	dst = kmem_zalloc(sizeof (dsl_sync_task_t), KM_SLEEP);
67 	dst->dst_checkfunc = checkfunc;
68 	dst->dst_syncfunc = syncfunc;
69 	dst->dst_arg1 = arg1;
70 	dst->dst_arg2 = arg2;
71 	list_insert_tail(&dstg->dstg_tasks, dst);
72 
73 	dstg->dstg_space += blocks_modified << DST_AVG_BLKSHIFT;
74 }
75 
76 int
77 dsl_sync_task_group_wait(dsl_sync_task_group_t *dstg)
78 {
79 	dmu_tx_t *tx;
80 	uint64_t txg;
81 	dsl_sync_task_t *dst;
82 
83 top:
84 	tx = dmu_tx_create_dd(dstg->dstg_pool->dp_mos_dir);
85 	VERIFY(0 == dmu_tx_assign(tx, TXG_WAIT));
86 
87 	txg = dmu_tx_get_txg(tx);
88 
89 	/* Do a preliminary error check. */
90 	dstg->dstg_err = 0;
91 	rw_enter(&dstg->dstg_pool->dp_config_rwlock, RW_READER);
92 	for (dst = list_head(&dstg->dstg_tasks); dst;
93 	    dst = list_next(&dstg->dstg_tasks, dst)) {
94 #ifdef ZFS_DEBUG
95 		/*
96 		 * Only check half the time, otherwise, the sync-context
97 		 * check will almost never fail.
98 		 */
99 		if (spa_get_random(2) == 0)
100 			continue;
101 #endif
102 		dst->dst_err =
103 		    dst->dst_checkfunc(dst->dst_arg1, dst->dst_arg2, tx);
104 		if (dst->dst_err)
105 			dstg->dstg_err = dst->dst_err;
106 	}
107 	rw_exit(&dstg->dstg_pool->dp_config_rwlock);
108 
109 	if (dstg->dstg_err) {
110 		dmu_tx_commit(tx);
111 		return (dstg->dstg_err);
112 	}
113 
114 	/*
115 	 * We don't generally have many sync tasks, so pay the price of
116 	 * add_tail to get the tasks executed in the right order.
117 	 */
118 	VERIFY(0 == txg_list_add_tail(&dstg->dstg_pool->dp_sync_tasks,
119 	    dstg, txg));
120 
121 	dmu_tx_commit(tx);
122 
123 	txg_wait_synced(dstg->dstg_pool, txg);
124 
125 	if (dstg->dstg_err == EAGAIN) {
126 		txg_wait_synced(dstg->dstg_pool, txg + TXG_DEFER_SIZE);
127 		goto top;
128 	}
129 
130 	return (dstg->dstg_err);
131 }
132 
133 void
134 dsl_sync_task_group_nowait(dsl_sync_task_group_t *dstg, dmu_tx_t *tx)
135 {
136 	uint64_t txg;
137 
138 	dstg->dstg_nowaiter = B_TRUE;
139 	dstg->dstg_cr = NULL; /* it won't be valid by the time we sync */
140 	txg = dmu_tx_get_txg(tx);
141 	/*
142 	 * We don't generally have many sync tasks, so pay the price of
143 	 * add_tail to get the tasks executed in the right order.
144 	 */
145 	VERIFY(0 == txg_list_add_tail(&dstg->dstg_pool->dp_sync_tasks,
146 	    dstg, txg));
147 }
148 
149 void
150 dsl_sync_task_group_destroy(dsl_sync_task_group_t *dstg)
151 {
152 	dsl_sync_task_t *dst;
153 
154 	while (dst = list_head(&dstg->dstg_tasks)) {
155 		list_remove(&dstg->dstg_tasks, dst);
156 		kmem_free(dst, sizeof (dsl_sync_task_t));
157 	}
158 	kmem_free(dstg, sizeof (dsl_sync_task_group_t));
159 }
160 
161 void
162 dsl_sync_task_group_sync(dsl_sync_task_group_t *dstg, dmu_tx_t *tx)
163 {
164 	dsl_sync_task_t *dst;
165 	dsl_pool_t *dp = dstg->dstg_pool;
166 	uint64_t quota, used;
167 
168 	ASSERT3U(dstg->dstg_err, ==, 0);
169 
170 	/*
171 	 * Check for sufficient space.  We just check against what's
172 	 * on-disk; we don't want any in-flight accounting to get in our
173 	 * way, because open context may have already used up various
174 	 * in-core limits (arc_tempreserve, dsl_pool_tempreserve).
175 	 */
176 	quota = dsl_pool_adjustedsize(dp, B_FALSE) -
177 	    metaslab_class_get_deferred(spa_normal_class(dp->dp_spa));
178 	used = dp->dp_root_dir->dd_phys->dd_used_bytes;
179 	/* MOS space is triple-dittoed, so we multiply by 3. */
180 	if (dstg->dstg_space > 0 && used + dstg->dstg_space * 3 > quota) {
181 		dstg->dstg_err = ENOSPC;
182 		return;
183 	}
184 
185 	/*
186 	 * Check for errors by calling checkfuncs.
187 	 */
188 	rw_enter(&dp->dp_config_rwlock, RW_WRITER);
189 	for (dst = list_head(&dstg->dstg_tasks); dst;
190 	    dst = list_next(&dstg->dstg_tasks, dst)) {
191 		dst->dst_err =
192 		    dst->dst_checkfunc(dst->dst_arg1, dst->dst_arg2, tx);
193 		if (dst->dst_err)
194 			dstg->dstg_err = dst->dst_err;
195 	}
196 
197 	if (dstg->dstg_err == 0) {
198 		/*
199 		 * Execute sync tasks.
200 		 */
201 		for (dst = list_head(&dstg->dstg_tasks); dst;
202 		    dst = list_next(&dstg->dstg_tasks, dst)) {
203 			dst->dst_syncfunc(dst->dst_arg1, dst->dst_arg2,
204 			    dstg->dstg_cr, tx);
205 		}
206 	}
207 	rw_exit(&dp->dp_config_rwlock);
208 
209 	if (dstg->dstg_nowaiter)
210 		dsl_sync_task_group_destroy(dstg);
211 }
212 
213 int
214 dsl_sync_task_do(dsl_pool_t *dp,
215     dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc,
216     void *arg1, void *arg2, int blocks_modified)
217 {
218 	dsl_sync_task_group_t *dstg;
219 	int err;
220 
221 	dstg = dsl_sync_task_group_create(dp);
222 	dsl_sync_task_create(dstg, checkfunc, syncfunc,
223 	    arg1, arg2, blocks_modified);
224 	err = dsl_sync_task_group_wait(dstg);
225 	dsl_sync_task_group_destroy(dstg);
226 	return (err);
227 }
228 
229 void
230 dsl_sync_task_do_nowait(dsl_pool_t *dp,
231     dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc,
232     void *arg1, void *arg2, int blocks_modified, dmu_tx_t *tx)
233 {
234 	dsl_sync_task_group_t *dstg;
235 
236 	dstg = dsl_sync_task_group_create(dp);
237 	dsl_sync_task_create(dstg, checkfunc, syncfunc,
238 	    arg1, arg2, blocks_modified);
239 	dsl_sync_task_group_nowait(dstg, tx);
240 }
241