xref: /dragonfly/sys/vfs/hammer/hammer_pfs.c (revision 25a2db75)
1 /*
2  * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 /*
35  * HAMMER PFS ioctls - Manage pseudo-fs configurations
36  */
37 
38 #include "hammer.h"
39 
40 static int hammer_pfs_autodetect(struct hammer_ioc_pseudofs_rw *pfs,
41 				hammer_inode_t ip);
42 static int hammer_pfs_rollback(hammer_transaction_t trans,
43 				hammer_pseudofs_inmem_t pfsm,
44 				hammer_tid_t trunc_tid);
45 static int hammer_pfs_delete_at_cursor(hammer_cursor_t cursor,
46 				hammer_tid_t trunc_tid);
47 
48 /*
49  * Get mirroring/pseudo-fs information
50  *
51  * NOTE: The ip used for ioctl is not necessarily related to the PFS
52  */
53 int
54 hammer_ioc_get_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
55 			struct hammer_ioc_pseudofs_rw *pfs)
56 {
57 	hammer_pseudofs_inmem_t pfsm;
58 	u_int32_t localization;
59 	int error;
60 
61 	if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
62 		return(error);
63 	localization = (u_int32_t)pfs->pfs_id << 16;
64 	pfs->bytes = sizeof(struct hammer_pseudofs_data);
65 	pfs->version = HAMMER_IOC_PSEUDOFS_VERSION;
66 
67 	pfsm = hammer_load_pseudofs(trans, localization, &error);
68 	if (error) {
69 		hammer_rel_pseudofs(trans->hmp, pfsm);
70 		return(error);
71 	}
72 
73 	/*
74 	 * If the PFS is a master the sync tid is set by normal operation
75 	 * rather than the mirroring code, and will always track the
76 	 * real HAMMER filesystem.
77 	 *
78 	 * We use flush_tid1, which is the highest fully committed TID.
79 	 * flush_tid2 is the TID most recently flushed, but the UNDO hasn't
80 	 * caught up to it yet so a crash will roll us back to flush_tid1.
81 	 */
82 	if ((pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) == 0)
83 		pfsm->pfsd.sync_end_tid = trans->hmp->flush_tid1;
84 
85 	/*
86 	 * Copy out to userland.
87 	 */
88 	error = 0;
89 	if (pfs->ondisk && error == 0)
90 		error = copyout(&pfsm->pfsd, pfs->ondisk, sizeof(pfsm->pfsd));
91 	hammer_rel_pseudofs(trans->hmp, pfsm);
92 	return(error);
93 }
94 
95 /*
96  * Set mirroring/pseudo-fs information
97  *
98  * NOTE: The ip used for ioctl is not necessarily related to the PFS
99  */
100 int
101 hammer_ioc_set_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
102 			struct ucred *cred, struct hammer_ioc_pseudofs_rw *pfs)
103 {
104 	hammer_pseudofs_inmem_t pfsm;
105 	u_int32_t localization;
106 	int error;
107 
108 	if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
109 		return(error);
110 	localization = (u_int32_t)pfs->pfs_id << 16;
111 	if (pfs->version != HAMMER_IOC_PSEUDOFS_VERSION)
112 		error = EINVAL;
113 	localization = (u_int32_t)pfs->pfs_id << 16;
114 
115 	if (error == 0 && pfs->ondisk) {
116 		/*
117 		 * Load the PFS so we can modify our in-core copy.  Ignore
118 		 * ENOENT errors.
119 		 */
120 		pfsm = hammer_load_pseudofs(trans, localization, &error);
121 		error = copyin(pfs->ondisk, &pfsm->pfsd, sizeof(pfsm->pfsd));
122 
123 		/*
124 		 * Save it back, create a root inode if we are in master
125 		 * mode and no root exists.
126 		 *
127 		 * We do not create root inodes for slaves, the root inode
128 		 * must be mirrored from the master.
129 		 */
130 		if (error == 0 &&
131 		    (pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) == 0) {
132 			error = hammer_mkroot_pseudofs(trans, cred, pfsm);
133 		}
134 		if (error == 0)
135 			error = hammer_save_pseudofs(trans, pfsm);
136 
137 		/*
138 		 * Wakeup anyone waiting for a TID update for this PFS
139 		 */
140 		wakeup(&pfsm->pfsd.sync_end_tid);
141 		hammer_rel_pseudofs(trans->hmp, pfsm);
142 	}
143 	return(error);
144 }
145 
146 /*
147  * Upgrade a slave to a master
148  *
149  * This is fairly easy to do, but we must physically undo any partial syncs
150  * for transaction ids > sync_end_tid.  Effective, we must do a partial
151  * rollback.
152  *
153  * NOTE: The ip used for ioctl is not necessarily related to the PFS
154  */
155 int
156 hammer_ioc_upgrade_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
157 			struct hammer_ioc_pseudofs_rw *pfs)
158 {
159 	hammer_pseudofs_inmem_t pfsm;
160 	u_int32_t localization;
161 	int error;
162 
163 	if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
164 		return(error);
165 	localization = (u_int32_t)pfs->pfs_id << 16;
166 	if ((error = hammer_unload_pseudofs(trans, localization)) != 0)
167 		return(error);
168 
169 	/*
170 	 * A master id must be set when upgrading
171 	 */
172 	pfsm = hammer_load_pseudofs(trans, localization, &error);
173 	if (error == 0) {
174 		if ((pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) != 0) {
175 			error = hammer_pfs_rollback(trans, pfsm,
176 					    pfsm->pfsd.sync_end_tid + 1);
177 			if (error == 0) {
178 				pfsm->pfsd.mirror_flags &= ~HAMMER_PFSD_SLAVE;
179 				error = hammer_save_pseudofs(trans, pfsm);
180 			}
181 		}
182 	}
183 	hammer_rel_pseudofs(trans->hmp, pfsm);
184 	if (error == EINTR) {
185 		pfs->head.flags |= HAMMER_IOC_HEAD_INTR;
186 		error = 0;
187 	}
188 	return (error);
189 }
190 
191 /*
192  * Downgrade a master to a slave
193  *
194  * This is really easy to do, just set the SLAVE flag and update sync_end_tid.
195  *
196  * We previously did not update sync_end_tid in consideration for a slave
197  * upgraded to a master and then downgraded again, but this completely breaks
198  * the case where one starts with a master and then downgrades to a slave,
199  * then upgrades again.
200  *
201  * NOTE: The ip used for ioctl is not necessarily related to the PFS
202  */
203 int
204 hammer_ioc_downgrade_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
205 			struct hammer_ioc_pseudofs_rw *pfs)
206 {
207 	hammer_mount_t hmp = trans->hmp;
208 	hammer_pseudofs_inmem_t pfsm;
209 	u_int32_t localization;
210 	int error;
211 
212 	if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
213 		return(error);
214 	localization = (u_int32_t)pfs->pfs_id << 16;
215 	if ((error = hammer_unload_pseudofs(trans, localization)) != 0)
216 		return(error);
217 
218 	pfsm = hammer_load_pseudofs(trans, localization, &error);
219 	if (error == 0) {
220 		if ((pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) == 0) {
221 			pfsm->pfsd.mirror_flags |= HAMMER_PFSD_SLAVE;
222 			if (pfsm->pfsd.sync_end_tid < hmp->flush_tid1)
223 				pfsm->pfsd.sync_end_tid = hmp->flush_tid1;
224 			error = hammer_save_pseudofs(trans, pfsm);
225 		}
226 	}
227 	hammer_rel_pseudofs(trans->hmp, pfsm);
228 	return (error);
229 }
230 
231 /*
232  * Destroy a PFS
233  *
234  * We can destroy a PFS by scanning and deleting all of its records in the
235  * B-Tree.  The hammer utility will delete the softlink in the primary
236  * filesystem.
237  *
238  * NOTE: The ip used for ioctl is not necessarily related to the PFS
239  */
240 int
241 hammer_ioc_destroy_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
242 			struct hammer_ioc_pseudofs_rw *pfs)
243 {
244 	hammer_pseudofs_inmem_t pfsm;
245 	u_int32_t localization;
246 	int error;
247 
248 	if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
249 		return(error);
250 	localization = (u_int32_t)pfs->pfs_id << 16;
251 
252 	if ((error = hammer_unload_pseudofs(trans, localization)) != 0)
253 		return(error);
254 
255 	pfsm = hammer_load_pseudofs(trans, localization, &error);
256 	if (error == 0) {
257 		error = hammer_pfs_rollback(trans, pfsm, 0);
258 		if (error == 0) {
259 			pfsm->pfsd.mirror_flags |= HAMMER_PFSD_DELETED;
260 			error = hammer_save_pseudofs(trans, pfsm);
261 		}
262 	}
263 	hammer_rel_pseudofs(trans->hmp, pfsm);
264 	if (error == EINTR) {
265 		pfs->head.flags |= HAMMER_IOC_HEAD_INTR;
266 		error = 0;
267 	}
268 	return(error);
269 }
270 
271 /*
272  * Wait for the PFS to sync past the specified TID
273  */
274 int
275 hammer_ioc_wait_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
276 			 struct hammer_ioc_pseudofs_rw *pfs)
277 {
278 	hammer_pseudofs_inmem_t pfsm;
279 	struct hammer_pseudofs_data pfsd;
280 	u_int32_t localization;
281 	hammer_tid_t tid;
282 	void *waitp;
283 	int error;
284 
285 	if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
286 		return(error);
287 	localization = (u_int32_t)pfs->pfs_id << 16;
288 
289 	if ((error = copyin(pfs->ondisk, &pfsd, sizeof(pfsd))) != 0)
290 		return(error);
291 
292 	pfsm = hammer_load_pseudofs(trans, localization, &error);
293 	if (error == 0) {
294 		if (pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) {
295 			tid = pfsm->pfsd.sync_end_tid;
296 			waitp = &pfsm->pfsd.sync_end_tid;
297 		} else {
298 			tid = trans->hmp->flush_tid1;
299 			waitp = &trans->hmp->flush_tid1;
300 		}
301 		if (tid <= pfsd.sync_end_tid)
302 			tsleep(waitp, PCATCH, "hmrmwt", 0);
303 	}
304 	hammer_rel_pseudofs(trans->hmp, pfsm);
305 	if (error == EINTR) {
306 		pfs->head.flags |= HAMMER_IOC_HEAD_INTR;
307 		error = 0;
308 	}
309 	return(error);
310 }
311 
312 
313 /*
314  * Auto-detect the pseudofs and do basic bounds checking.
315  */
316 static
317 int
318 hammer_pfs_autodetect(struct hammer_ioc_pseudofs_rw *pfs, hammer_inode_t ip)
319 {
320 	int error = 0;
321 
322 	if (pfs->pfs_id == -1)
323 		pfs->pfs_id = (int)(ip->obj_localization >> 16);
324 	if (pfs->pfs_id < 0 || pfs->pfs_id >= HAMMER_MAX_PFS)
325 		error = EINVAL;
326 	if (pfs->bytes < sizeof(struct hammer_pseudofs_data))
327 		error = EINVAL;
328 	return(error);
329 }
330 
331 /*
332  * Rollback the specified PFS to (trunc_tid - 1), removing everything
333  * greater or equal to trunc_tid.  The PFS must not have been in no-mirror
334  * mode or the MIRROR_FILTERED scan will not work properly.
335  *
336  * This is typically used to remove any partial syncs when upgrading a
337  * slave to a master.  It can theoretically also be used to rollback
338  * any PFS, including PFS#0, BUT ONLY TO POINTS THAT HAVE NOT YET BEEN
339  * PRUNED, and to points that are older only if they are on a retained
340  * (pruning softlink) boundary.
341  *
342  * Rollbacks destroy information.  If you don't mind inode numbers changing
343  * a better way would be to cpdup a snapshot back onto the master.
344  */
345 static
346 int
347 hammer_pfs_rollback(hammer_transaction_t trans,
348 		    hammer_pseudofs_inmem_t pfsm,
349 		    hammer_tid_t trunc_tid)
350 {
351 	struct hammer_cmirror cmirror;
352 	struct hammer_cursor cursor;
353 	struct hammer_base_elm key_cur;
354 	int error;
355 	int seq;
356 
357 	bzero(&cmirror, sizeof(cmirror));
358 	bzero(&key_cur, sizeof(key_cur));
359 	key_cur.localization = HAMMER_MIN_LOCALIZATION + pfsm->localization;
360 	key_cur.obj_id = HAMMER_MIN_OBJID;
361 	key_cur.key = HAMMER_MIN_KEY;
362 	key_cur.create_tid = 1;
363 	key_cur.rec_type = HAMMER_MIN_RECTYPE;
364 
365 	seq = trans->hmp->flusher.done;
366 
367 retry:
368 	error = hammer_init_cursor(trans, &cursor, NULL, NULL);
369 	if (error) {
370 		hammer_done_cursor(&cursor);
371 		goto failed;
372 	}
373 	cursor.key_beg = key_cur;
374 	cursor.key_end.localization = HAMMER_MAX_LOCALIZATION +
375 				      pfsm->localization;
376 	cursor.key_end.obj_id = HAMMER_MAX_OBJID;
377 	cursor.key_end.key = HAMMER_MAX_KEY;
378 	cursor.key_end.create_tid = HAMMER_MAX_TID;
379 	cursor.key_end.rec_type = HAMMER_MAX_RECTYPE;
380 
381 	cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE;
382 	cursor.flags |= HAMMER_CURSOR_BACKEND;
383 
384 	/*
385 	 * Do an optimized scan of only records created or modified
386 	 * >= trunc_tid, so we can fix up those records.  We must
387 	 * still check the TIDs but this greatly reduces the size of
388 	 * the scan.
389 	 */
390 	cursor.flags |= HAMMER_CURSOR_MIRROR_FILTERED;
391 	cursor.cmirror = &cmirror;
392 	cmirror.mirror_tid = trunc_tid;
393 
394 	error = hammer_btree_first(&cursor);
395 	while (error == 0) {
396 		/*
397 		 * Abort the rollback.
398 		 */
399 		if (error == 0) {
400 			error = hammer_signal_check(trans->hmp);
401 			if (error)
402 				break;
403 		}
404 
405 		/*
406 		 * We only care about leafs.  Internal nodes can be returned
407 		 * in mirror-filtered mode (they are used to generate SKIP
408 		 * mrecords), but we don't need them for this code.
409 		 *
410 		 * WARNING: See warnings in hammer_unlock_cursor() function.
411 		 */
412 		cursor.flags |= HAMMER_CURSOR_ATEDISK;
413 		if (cursor.node->ondisk->type == HAMMER_BTREE_TYPE_LEAF) {
414 			key_cur = cursor.node->ondisk->elms[cursor.index].base;
415 			error = hammer_pfs_delete_at_cursor(&cursor, trunc_tid);
416 		}
417 
418 		while (hammer_flusher_meta_halflimit(trans->hmp) ||
419 		       hammer_flusher_undo_exhausted(trans, 2)) {
420 			hammer_unlock_cursor(&cursor);
421 			hammer_flusher_wait(trans->hmp, seq);
422 			hammer_lock_cursor(&cursor);
423 			seq = hammer_flusher_async_one(trans->hmp);
424 		}
425 
426 		if (error == 0)
427 			error = hammer_btree_iterate(&cursor);
428 	}
429 	if (error == ENOENT)
430 		error = 0;
431 	hammer_done_cursor(&cursor);
432 	if (error == EDEADLK)
433 		goto retry;
434 failed:
435 	return(error);
436 }
437 
438 /*
439  * Helper function - perform rollback on a B-Tree element given trunc_tid.
440  *
441  * If create_tid >= trunc_tid the record is physically destroyed.
442  * If delete_tid >= trunc_tid it will be set to 0, undeleting the record.
443  */
444 static
445 int
446 hammer_pfs_delete_at_cursor(hammer_cursor_t cursor, hammer_tid_t trunc_tid)
447 {
448 	hammer_btree_leaf_elm_t elm;
449 	int error;
450 
451 	elm = &cursor->node->ondisk->elms[cursor->index].leaf;
452 	if (elm->base.create_tid < trunc_tid &&
453 	    elm->base.delete_tid < trunc_tid) {
454 		return(0);
455 	}
456 
457 	if (elm->base.create_tid >= trunc_tid) {
458 		error = hammer_delete_at_cursor(
459 				cursor, HAMMER_DELETE_DESTROY,
460 				cursor->trans->tid, cursor->trans->time32,
461 				1, NULL);
462 	} else if (elm->base.delete_tid >= trunc_tid) {
463 		error = hammer_delete_at_cursor(
464 				cursor, HAMMER_DELETE_ADJUST,
465 				0, 0,
466 				1, NULL);
467 	} else {
468 		error = 0;
469 	}
470 	return(error);
471 }
472 
473