xref: /dragonfly/sys/vfs/hammer/hammer_pfs.c (revision dcd37f7d)
1 /*
2  * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  * $DragonFly: src/sys/vfs/hammer/hammer_pfs.c,v 1.5 2008/07/31 04:42:04 dillon Exp $
35  */
36 /*
37  * HAMMER PFS ioctls - Manage pseudo-fs configurations
38  */
39 
40 #include "hammer.h"
41 
42 static int hammer_pfs_autodetect(struct hammer_ioc_pseudofs_rw *pfs,
43 				hammer_inode_t ip);
44 static int hammer_pfs_rollback(hammer_transaction_t trans,
45 				hammer_pseudofs_inmem_t pfsm,
46 				hammer_tid_t trunc_tid);
47 static int hammer_pfs_delete_at_cursor(hammer_cursor_t cursor,
48 				hammer_tid_t trunc_tid);
49 
50 /*
51  * Get mirroring/pseudo-fs information
52  *
53  * NOTE: The ip used for ioctl is not necessarily related to the PFS
54  */
55 int
56 hammer_ioc_get_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
57 			struct hammer_ioc_pseudofs_rw *pfs)
58 {
59 	hammer_pseudofs_inmem_t pfsm;
60 	u_int32_t localization;
61 	int error;
62 
63 	if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
64 		return(error);
65 	localization = (u_int32_t)pfs->pfs_id << 16;
66 	pfs->bytes = sizeof(struct hammer_pseudofs_data);
67 	pfs->version = HAMMER_IOC_PSEUDOFS_VERSION;
68 
69 	pfsm = hammer_load_pseudofs(trans, localization, &error);
70 	if (error) {
71 		hammer_rel_pseudofs(trans->hmp, pfsm);
72 		return(error);
73 	}
74 
75 	/*
76 	 * If the PFS is a master the sync tid is set by normal operation
77 	 * rather than the mirroring code, and will always track the
78 	 * real HAMMER filesystem.
79 	 *
80 	 * We use flush_tid1, which is the highest fully committed TID.
81 	 * flush_tid2 is the TID most recently flushed, but the UNDO hasn't
82 	 * caught up to it yet so a crash will roll us back to flush_tid1.
83 	 */
84 	if ((pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) == 0)
85 		pfsm->pfsd.sync_end_tid = trans->hmp->flush_tid1;
86 
87 	/*
88 	 * Copy out to userland.
89 	 */
90 	error = 0;
91 	if (pfs->ondisk && error == 0)
92 		error = copyout(&pfsm->pfsd, pfs->ondisk, sizeof(pfsm->pfsd));
93 	hammer_rel_pseudofs(trans->hmp, pfsm);
94 	return(error);
95 }
96 
97 /*
98  * Set mirroring/pseudo-fs information
99  *
100  * NOTE: The ip used for ioctl is not necessarily related to the PFS
101  */
102 int
103 hammer_ioc_set_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
104 			struct ucred *cred, struct hammer_ioc_pseudofs_rw *pfs)
105 {
106 	hammer_pseudofs_inmem_t pfsm;
107 	u_int32_t localization;
108 	int error;
109 
110 	if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
111 		return(error);
112 	localization = (u_int32_t)pfs->pfs_id << 16;
113 	if (pfs->version != HAMMER_IOC_PSEUDOFS_VERSION)
114 		error = EINVAL;
115 	localization = (u_int32_t)pfs->pfs_id << 16;
116 
117 	if (error == 0 && pfs->ondisk) {
118 		/*
119 		 * Load the PFS so we can modify our in-core copy.  Ignore
120 		 * ENOENT errors.
121 		 */
122 		pfsm = hammer_load_pseudofs(trans, localization, &error);
123 		error = copyin(pfs->ondisk, &pfsm->pfsd, sizeof(pfsm->pfsd));
124 
125 		/*
126 		 * Save it back, create a root inode if we are in master
127 		 * mode and no root exists.
128 		 *
129 		 * We do not create root inodes for slaves, the root inode
130 		 * must be mirrored from the master.
131 		 */
132 		if (error == 0 &&
133 		    (pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) == 0) {
134 			error = hammer_mkroot_pseudofs(trans, cred, pfsm);
135 		}
136 		if (error == 0)
137 			error = hammer_save_pseudofs(trans, pfsm);
138 
139 		/*
140 		 * Wakeup anyone waiting for a TID update for this PFS
141 		 */
142 		wakeup(&pfsm->pfsd.sync_end_tid);
143 		hammer_rel_pseudofs(trans->hmp, pfsm);
144 	}
145 	return(error);
146 }
147 
148 /*
149  * Upgrade a slave to a master
150  *
151  * This is fairly easy to do, but we must physically undo any partial syncs
152  * for transaction ids > sync_end_tid.  Effective, we must do a partial
153  * rollback.
154  *
155  * NOTE: The ip used for ioctl is not necessarily related to the PFS
156  */
157 int
158 hammer_ioc_upgrade_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
159 			struct hammer_ioc_pseudofs_rw *pfs)
160 {
161 	hammer_pseudofs_inmem_t pfsm;
162 	u_int32_t localization;
163 	int error;
164 
165 	if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
166 		return(error);
167 	localization = (u_int32_t)pfs->pfs_id << 16;
168 	if ((error = hammer_unload_pseudofs(trans, localization)) != 0)
169 		return(error);
170 
171 	/*
172 	 * A master id must be set when upgrading
173 	 */
174 	pfsm = hammer_load_pseudofs(trans, localization, &error);
175 	if (error == 0) {
176 		if ((pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) != 0) {
177 			error = hammer_pfs_rollback(trans, pfsm,
178 					    pfsm->pfsd.sync_end_tid + 1);
179 			if (error == 0) {
180 				pfsm->pfsd.mirror_flags &= ~HAMMER_PFSD_SLAVE;
181 				error = hammer_save_pseudofs(trans, pfsm);
182 			}
183 		}
184 	}
185 	hammer_rel_pseudofs(trans->hmp, pfsm);
186 	if (error == EINTR) {
187 		pfs->head.flags |= HAMMER_IOC_HEAD_INTR;
188 		error = 0;
189 	}
190 	return (error);
191 }
192 
193 /*
194  * Downgrade a master to a slave
195  *
196  * This is really easy to do, just set the SLAVE flag and update sync_end_tid.
197  *
198  * We previously did not update sync_end_tid in consideration for a slave
199  * upgraded to a master and then downgraded again, but this completely breaks
200  * the case where one starts with a master and then downgrades to a slave,
201  * then upgrades again.
202  *
203  * NOTE: The ip used for ioctl is not necessarily related to the PFS
204  */
205 int
206 hammer_ioc_downgrade_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
207 			struct hammer_ioc_pseudofs_rw *pfs)
208 {
209 	hammer_mount_t hmp = trans->hmp;
210 	hammer_pseudofs_inmem_t pfsm;
211 	u_int32_t localization;
212 	int error;
213 
214 	if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
215 		return(error);
216 	localization = (u_int32_t)pfs->pfs_id << 16;
217 	if ((error = hammer_unload_pseudofs(trans, localization)) != 0)
218 		return(error);
219 
220 	pfsm = hammer_load_pseudofs(trans, localization, &error);
221 	if (error == 0) {
222 		if ((pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) == 0) {
223 			pfsm->pfsd.mirror_flags |= HAMMER_PFSD_SLAVE;
224 			if (pfsm->pfsd.sync_end_tid < hmp->flush_tid1)
225 				pfsm->pfsd.sync_end_tid = hmp->flush_tid1;
226 			error = hammer_save_pseudofs(trans, pfsm);
227 		}
228 	}
229 	hammer_rel_pseudofs(trans->hmp, pfsm);
230 	return (error);
231 }
232 
233 /*
234  * Destroy a PFS
235  *
236  * We can destroy a PFS by scanning and deleting all of its records in the
237  * B-Tree.  The hammer utility will delete the softlink in the primary
238  * filesystem.
239  *
240  * NOTE: The ip used for ioctl is not necessarily related to the PFS
241  */
242 int
243 hammer_ioc_destroy_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
244 			struct hammer_ioc_pseudofs_rw *pfs)
245 {
246 	hammer_pseudofs_inmem_t pfsm;
247 	u_int32_t localization;
248 	int error;
249 
250 	if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
251 		return(error);
252 	localization = (u_int32_t)pfs->pfs_id << 16;
253 
254 	if ((error = hammer_unload_pseudofs(trans, localization)) != 0)
255 		return(error);
256 
257 	pfsm = hammer_load_pseudofs(trans, localization, &error);
258 	if (error == 0) {
259 		error = hammer_pfs_rollback(trans, pfsm, 0);
260 		if (error == 0) {
261 			pfsm->pfsd.mirror_flags |= HAMMER_PFSD_DELETED;
262 			error = hammer_save_pseudofs(trans, pfsm);
263 		}
264 	}
265 	hammer_rel_pseudofs(trans->hmp, pfsm);
266 	if (error == EINTR) {
267 		pfs->head.flags |= HAMMER_IOC_HEAD_INTR;
268 		error = 0;
269 	}
270 	return(error);
271 }
272 
273 /*
274  * Wait for the PFS to sync past the specified TID
275  */
276 int
277 hammer_ioc_wait_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
278 			 struct hammer_ioc_pseudofs_rw *pfs)
279 {
280 	hammer_pseudofs_inmem_t pfsm;
281 	struct hammer_pseudofs_data pfsd;
282 	u_int32_t localization;
283 	hammer_tid_t tid;
284 	void *waitp;
285 	int error;
286 
287 	if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
288 		return(error);
289 	localization = (u_int32_t)pfs->pfs_id << 16;
290 
291 	if ((error = copyin(pfs->ondisk, &pfsd, sizeof(pfsd))) != 0)
292 		return(error);
293 
294 	pfsm = hammer_load_pseudofs(trans, localization, &error);
295 	if (error == 0) {
296 		if (pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) {
297 			tid = pfsm->pfsd.sync_end_tid;
298 			waitp = &pfsm->pfsd.sync_end_tid;
299 		} else {
300 			tid = trans->hmp->flush_tid1;
301 			waitp = &trans->hmp->flush_tid1;
302 		}
303 		if (tid <= pfsd.sync_end_tid)
304 			tsleep(waitp, PCATCH, "hmrmwt", 0);
305 	}
306 	hammer_rel_pseudofs(trans->hmp, pfsm);
307 	if (error == EINTR) {
308 		pfs->head.flags |= HAMMER_IOC_HEAD_INTR;
309 		error = 0;
310 	}
311 	return(error);
312 }
313 
314 
315 /*
316  * Auto-detect the pseudofs and do basic bounds checking.
317  */
318 static
319 int
320 hammer_pfs_autodetect(struct hammer_ioc_pseudofs_rw *pfs, hammer_inode_t ip)
321 {
322 	int error = 0;
323 
324 	if (pfs->pfs_id == -1)
325 		pfs->pfs_id = (int)(ip->obj_localization >> 16);
326 	if (pfs->pfs_id < 0 || pfs->pfs_id >= HAMMER_MAX_PFS)
327 		error = EINVAL;
328 	if (pfs->bytes < sizeof(struct hammer_pseudofs_data))
329 		error = EINVAL;
330 	return(error);
331 }
332 
333 /*
334  * Rollback the specified PFS to (trunc_tid - 1), removing everything
335  * greater or equal to trunc_tid.  The PFS must not have been in no-mirror
336  * mode or the MIRROR_FILTERED scan will not work properly.
337  *
338  * This is typically used to remove any partial syncs when upgrading a
339  * slave to a master.  It can theoretically also be used to rollback
340  * any PFS, including PFS#0, BUT ONLY TO POINTS THAT HAVE NOT YET BEEN
341  * PRUNED, and to points that are older only if they are on a retained
342  * (pruning softlink) boundary.
343  *
344  * Rollbacks destroy information.  If you don't mind inode numbers changing
345  * a better way would be to cpdup a snapshot back onto the master.
346  */
347 static
348 int
349 hammer_pfs_rollback(hammer_transaction_t trans,
350 		    hammer_pseudofs_inmem_t pfsm,
351 		    hammer_tid_t trunc_tid)
352 {
353 	struct hammer_cmirror cmirror;
354 	struct hammer_cursor cursor;
355 	struct hammer_base_elm key_cur;
356 	int error;
357 	int seq;
358 
359 	bzero(&cmirror, sizeof(cmirror));
360 	bzero(&key_cur, sizeof(key_cur));
361 	key_cur.localization = HAMMER_MIN_LOCALIZATION + pfsm->localization;
362 	key_cur.obj_id = HAMMER_MIN_OBJID;
363 	key_cur.key = HAMMER_MIN_KEY;
364 	key_cur.create_tid = 1;
365 	key_cur.rec_type = HAMMER_MIN_RECTYPE;
366 
367 	seq = trans->hmp->flusher.act;
368 
369 retry:
370 	error = hammer_init_cursor(trans, &cursor, NULL, NULL);
371 	if (error) {
372 		hammer_done_cursor(&cursor);
373 		goto failed;
374 	}
375 	cursor.key_beg = key_cur;
376 	cursor.key_end.localization = HAMMER_MAX_LOCALIZATION +
377 				      pfsm->localization;
378 	cursor.key_end.obj_id = HAMMER_MAX_OBJID;
379 	cursor.key_end.key = HAMMER_MAX_KEY;
380 	cursor.key_end.create_tid = HAMMER_MAX_TID;
381 	cursor.key_end.rec_type = HAMMER_MAX_RECTYPE;
382 
383 	cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE;
384 	cursor.flags |= HAMMER_CURSOR_BACKEND;
385 
386 	/*
387 	 * Do an optimized scan of only records created or modified
388 	 * >= trunc_tid, so we can fix up those records.  We must
389 	 * still check the TIDs but this greatly reduces the size of
390 	 * the scan.
391 	 */
392 	cursor.flags |= HAMMER_CURSOR_MIRROR_FILTERED;
393 	cursor.cmirror = &cmirror;
394 	cmirror.mirror_tid = trunc_tid;
395 
396 	error = hammer_btree_first(&cursor);
397 	while (error == 0) {
398 		/*
399 		 * Abort the rollback.
400 		 */
401 		if (error == 0) {
402 			error = hammer_signal_check(trans->hmp);
403 			if (error)
404 				break;
405 		}
406 
407 		/*
408 		 * We only care about leafs.  Internal nodes can be returned
409 		 * in mirror-filtered mode (they are used to generate SKIP
410 		 * mrecords), but we don't need them for this code.
411 		 *
412 		 * WARNING: See warnings in hammer_unlock_cursor() function.
413 		 */
414 		cursor.flags |= HAMMER_CURSOR_ATEDISK;
415 		if (cursor.node->ondisk->type == HAMMER_BTREE_TYPE_LEAF) {
416 			key_cur = cursor.node->ondisk->elms[cursor.index].base;
417 			error = hammer_pfs_delete_at_cursor(&cursor, trunc_tid);
418 		}
419 
420 		while (hammer_flusher_meta_halflimit(trans->hmp) ||
421 		       hammer_flusher_undo_exhausted(trans, 2)) {
422 			hammer_unlock_cursor(&cursor);
423 			hammer_flusher_wait(trans->hmp, seq);
424 			hammer_lock_cursor(&cursor);
425 			seq = hammer_flusher_async_one(trans->hmp);
426 		}
427 
428 		if (error == 0)
429 			error = hammer_btree_iterate(&cursor);
430 	}
431 	if (error == ENOENT)
432 		error = 0;
433 	hammer_done_cursor(&cursor);
434 	if (error == EDEADLK)
435 		goto retry;
436 failed:
437 	return(error);
438 }
439 
440 /*
441  * Helper function - perform rollback on a B-Tree element given trunc_tid.
442  *
443  * If create_tid >= trunc_tid the record is physically destroyed.
444  * If delete_tid >= trunc_tid it will be set to 0, undeleting the record.
445  */
446 static
447 int
448 hammer_pfs_delete_at_cursor(hammer_cursor_t cursor, hammer_tid_t trunc_tid)
449 {
450 	hammer_btree_leaf_elm_t elm;
451 	hammer_transaction_t trans;
452         int error;
453 
454 	elm = &cursor->node->ondisk->elms[cursor->index].leaf;
455 	if (elm->base.create_tid < trunc_tid &&
456 	    elm->base.delete_tid < trunc_tid) {
457 		return(0);
458 	}
459         trans = cursor->trans;
460 
461 	if (elm->base.create_tid >= trunc_tid) {
462 		error = hammer_delete_at_cursor(
463 				cursor, HAMMER_DELETE_DESTROY,
464 				cursor->trans->tid, cursor->trans->time32,
465 				1, NULL);
466 	} else if (elm->base.delete_tid >= trunc_tid) {
467 		error = hammer_delete_at_cursor(
468 				cursor, HAMMER_DELETE_ADJUST,
469 				0, 0,
470 				1, NULL);
471 	} else {
472 		error = 0;
473 	}
474 	return(error);
475 }
476 
477