xref: /dragonfly/sys/vfs/hammer/hammer_pfs.c (revision dca3c15d)
1 /*
2  * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  * $DragonFly: src/sys/vfs/hammer/hammer_pfs.c,v 1.5 2008/07/31 04:42:04 dillon Exp $
35  */
36 /*
37  * HAMMER PFS ioctls - Manage pseudo-fs configurations
38  */
39 
40 #include "hammer.h"
41 
42 static int hammer_pfs_autodetect(struct hammer_ioc_pseudofs_rw *pfs,
43 				hammer_inode_t ip);
44 static int hammer_pfs_rollback(hammer_transaction_t trans,
45 				hammer_pseudofs_inmem_t pfsm,
46 				hammer_tid_t trunc_tid);
47 static int hammer_pfs_delete_at_cursor(hammer_cursor_t cursor,
48 				hammer_tid_t trunc_tid);
49 
50 /*
51  * Get mirroring/pseudo-fs information
52  *
53  * NOTE: The ip used for ioctl is not necessarily related to the PFS
54  */
55 int
56 hammer_ioc_get_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
57 			struct hammer_ioc_pseudofs_rw *pfs)
58 {
59 	hammer_pseudofs_inmem_t pfsm;
60 	u_int32_t localization;
61 	int error;
62 
63 	if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
64 		return(error);
65 	localization = (u_int32_t)pfs->pfs_id << 16;
66 	pfs->bytes = sizeof(struct hammer_pseudofs_data);
67 	pfs->version = HAMMER_IOC_PSEUDOFS_VERSION;
68 
69 	pfsm = hammer_load_pseudofs(trans, localization, &error);
70 	if (error) {
71 		hammer_rel_pseudofs(trans->hmp, pfsm);
72 		return(error);
73 	}
74 
75 	/*
76 	 * If the PFS is a master the sync tid is set by normal operation
77 	 * rather then the mirroring code, and will always track the
78 	 * real HAMMER filesystem.
79 	 *
80 	 * We use flush_tid1, which is the highest fully committed TID.
81 	 * flush_tid2 is the TID most recently flushed, but the UNDO hasn't
82 	 * caught up to it yet so a crash will roll us back to flush_tid1.
83 	 */
84 	if ((pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) == 0)
85 		pfsm->pfsd.sync_end_tid = trans->hmp->flush_tid1;
86 
87 	/*
88 	 * Copy out to userland.
89 	 */
90 	error = 0;
91 	if (pfs->ondisk && error == 0)
92 		error = copyout(&pfsm->pfsd, pfs->ondisk, sizeof(pfsm->pfsd));
93 	hammer_rel_pseudofs(trans->hmp, pfsm);
94 	return(error);
95 }
96 
97 /*
98  * Set mirroring/pseudo-fs information
99  *
100  * NOTE: The ip used for ioctl is not necessarily related to the PFS
101  */
102 int
103 hammer_ioc_set_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
104 			struct ucred *cred, struct hammer_ioc_pseudofs_rw *pfs)
105 {
106 	hammer_pseudofs_inmem_t pfsm;
107 	u_int32_t localization;
108 	int error;
109 
110 	if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
111 		return(error);
112 	localization = (u_int32_t)pfs->pfs_id << 16;
113 	if (pfs->version != HAMMER_IOC_PSEUDOFS_VERSION)
114 		error = EINVAL;
115 	localization = (u_int32_t)pfs->pfs_id << 16;
116 
117 	if (error == 0 && pfs->ondisk) {
118 		/*
119 		 * Load the PFS so we can modify our in-core copy.  Ignore
120 		 * ENOENT errors.
121 		 */
122 		pfsm = hammer_load_pseudofs(trans, localization, &error);
123 		error = copyin(pfs->ondisk, &pfsm->pfsd, sizeof(pfsm->pfsd));
124 
125 		/*
126 		 * Save it back, create a root inode if we are in master
127 		 * mode and no root exists.
128 		 *
129 		 * We do not create root inodes for slaves, the root inode
130 		 * must be mirrored from the master.
131 		 */
132 		if (error == 0 &&
133 		    (pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) == 0) {
134 			error = hammer_mkroot_pseudofs(trans, cred, pfsm);
135 		}
136 		if (error == 0)
137 			error = hammer_save_pseudofs(trans, pfsm);
138 
139 		/*
140 		 * Wakeup anyone waiting for a TID update for this PFS
141 		 */
142 		wakeup(&pfsm->pfsd.sync_end_tid);
143 		hammer_rel_pseudofs(trans->hmp, pfsm);
144 	}
145 	return(error);
146 }
147 
148 /*
149  * Upgrade a slave to a master
150  *
151  * This is fairly easy to do, but we must physically undo any partial syncs
152  * for transaction ids > sync_end_tid.  Effective, we must do a partial
153  * rollback.
154  *
155  * NOTE: The ip used for ioctl is not necessarily related to the PFS
156  */
157 int
158 hammer_ioc_upgrade_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
159 			struct hammer_ioc_pseudofs_rw *pfs)
160 {
161 	hammer_pseudofs_inmem_t pfsm;
162 	u_int32_t localization;
163 	int error;
164 
165 	if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
166 		return(error);
167 	localization = (u_int32_t)pfs->pfs_id << 16;
168 	if ((error = hammer_unload_pseudofs(trans, localization)) != 0)
169 		return(error);
170 
171 	/*
172 	 * A master id must be set when upgrading
173 	 */
174 	pfsm = hammer_load_pseudofs(trans, localization, &error);
175 	if (error == 0) {
176 		if ((pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) != 0) {
177 			error = hammer_pfs_rollback(trans, pfsm,
178 					    pfsm->pfsd.sync_end_tid + 1);
179 			if (error == 0) {
180 				pfsm->pfsd.mirror_flags &= ~HAMMER_PFSD_SLAVE;
181 				error = hammer_save_pseudofs(trans, pfsm);
182 			}
183 		}
184 	}
185 	hammer_rel_pseudofs(trans->hmp, pfsm);
186 	if (error == EINTR) {
187 		pfs->head.flags |= HAMMER_IOC_HEAD_INTR;
188 		error = 0;
189 	}
190 	return (error);
191 }
192 
193 /*
194  * Downgrade a master to a slave
195  *
196  * This is really easy to do, just set the SLAVE flag.
197  *
198  * We also leave sync_end_tid intact... the field is not used in master
199  * mode (vol0_next_tid overrides it), but if someone switches to master
200  * mode accidently and then back to slave mode we don't want it to change.
201  * Eventually it will be used as the cross-synchronization TID in
202  * multi-master mode, and we don't want to mess with it for that feature
203  * either.
204  *
205  * NOTE: The ip used for ioctl is not necessarily related to the PFS
206  */
207 int
208 hammer_ioc_downgrade_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
209 			struct hammer_ioc_pseudofs_rw *pfs)
210 {
211 	hammer_pseudofs_inmem_t pfsm;
212 	u_int32_t localization;
213 	int error;
214 
215 	if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
216 		return(error);
217 	localization = (u_int32_t)pfs->pfs_id << 16;
218 	if ((error = hammer_unload_pseudofs(trans, localization)) != 0)
219 		return(error);
220 
221 	pfsm = hammer_load_pseudofs(trans, localization, &error);
222 	if (error == 0) {
223 		if ((pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) == 0) {
224 			pfsm->pfsd.mirror_flags |= HAMMER_PFSD_SLAVE;
225 			error = hammer_save_pseudofs(trans, pfsm);
226 		}
227 	}
228 	hammer_rel_pseudofs(trans->hmp, pfsm);
229 	return (error);
230 }
231 
232 /*
233  * Destroy a PFS
234  *
235  * We can destroy a PFS by scanning and deleting all of its records in the
236  * B-Tree.  The hammer utility will delete the softlink in the primary
237  * filesystem.
238  *
239  * NOTE: The ip used for ioctl is not necessarily related to the PFS
240  */
241 int
242 hammer_ioc_destroy_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
243 			struct hammer_ioc_pseudofs_rw *pfs)
244 {
245 	hammer_pseudofs_inmem_t pfsm;
246 	u_int32_t localization;
247 	int error;
248 
249 	if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
250 		return(error);
251 	localization = (u_int32_t)pfs->pfs_id << 16;
252 
253 	if ((error = hammer_unload_pseudofs(trans, localization)) != 0)
254 		return(error);
255 
256 	pfsm = hammer_load_pseudofs(trans, localization, &error);
257 	if (error == 0) {
258 		error = hammer_pfs_rollback(trans, pfsm, 0);
259 		if (error == 0) {
260 			pfsm->pfsd.mirror_flags |= HAMMER_PFSD_DELETED;
261 			error = hammer_save_pseudofs(trans, pfsm);
262 		}
263 	}
264 	hammer_rel_pseudofs(trans->hmp, pfsm);
265 	if (error == EINTR) {
266 		pfs->head.flags |= HAMMER_IOC_HEAD_INTR;
267 		error = 0;
268 	}
269 	return(error);
270 }
271 
272 /*
273  * Wait for the PFS to sync past the specified TID
274  */
275 int
276 hammer_ioc_wait_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
277 			 struct hammer_ioc_pseudofs_rw *pfs)
278 {
279 	hammer_pseudofs_inmem_t pfsm;
280 	struct hammer_pseudofs_data pfsd;
281 	u_int32_t localization;
282 	hammer_tid_t tid;
283 	void *waitp;
284 	int error;
285 
286 	if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
287 		return(error);
288 	localization = (u_int32_t)pfs->pfs_id << 16;
289 
290 	if ((error = copyin(pfs->ondisk, &pfsd, sizeof(pfsd))) != 0)
291 		return(error);
292 
293 	pfsm = hammer_load_pseudofs(trans, localization, &error);
294 	if (error == 0) {
295 		if (pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) {
296 			tid = pfsm->pfsd.sync_end_tid;
297 			waitp = &pfsm->pfsd.sync_end_tid;
298 		} else {
299 			tid = trans->hmp->flush_tid1;
300 			waitp = &trans->hmp->flush_tid1;
301 		}
302 		if (tid <= pfsd.sync_end_tid)
303 			tsleep(waitp, PCATCH, "hmrmwt", 0);
304 	}
305 	hammer_rel_pseudofs(trans->hmp, pfsm);
306 	if (error == EINTR) {
307 		pfs->head.flags |= HAMMER_IOC_HEAD_INTR;
308 		error = 0;
309 	}
310 	return(error);
311 }
312 
313 
314 /*
315  * Auto-detect the pseudofs and do basic bounds checking.
316  */
317 static
318 int
319 hammer_pfs_autodetect(struct hammer_ioc_pseudofs_rw *pfs, hammer_inode_t ip)
320 {
321 	int error = 0;
322 
323 	if (pfs->pfs_id == -1)
324 		pfs->pfs_id = (int)(ip->obj_localization >> 16);
325 	if (pfs->pfs_id < 0 || pfs->pfs_id >= HAMMER_MAX_PFS)
326 		error = EINVAL;
327 	if (pfs->bytes < sizeof(struct hammer_pseudofs_data))
328 		error = EINVAL;
329 	return(error);
330 }
331 
332 /*
333  * Rollback the specified PFS to (trunc_tid - 1), removing everything
334  * greater or equal to trunc_tid.  The PFS must not have been in no-mirror
335  * mode or the MIRROR_FILTERED scan will not work properly.
336  *
337  * This is typically used to remove any partial syncs when upgrading a
338  * slave to a master.  It can theoretically also be used to rollback
339  * any PFS, including PFS#0, BUT ONLY TO POINTS THAT HAVE NOT YET BEEN
340  * PRUNED, and to points that are older only if they are on a retained
341  * (pruning softlink) boundary.
342  *
343  * Rollbacks destroy information.  If you don't mind inode numbers changing
344  * a better way would be to cpdup a snapshot back onto the master.
345  */
346 static
347 int
348 hammer_pfs_rollback(hammer_transaction_t trans,
349 		    hammer_pseudofs_inmem_t pfsm,
350 		    hammer_tid_t trunc_tid)
351 {
352 	struct hammer_cmirror cmirror;
353 	struct hammer_cursor cursor;
354 	struct hammer_base_elm key_cur;
355 	int error;
356 	int seq;
357 
358 	bzero(&cmirror, sizeof(cmirror));
359 	bzero(&key_cur, sizeof(key_cur));
360 	key_cur.localization = HAMMER_MIN_LOCALIZATION + pfsm->localization;
361 	key_cur.obj_id = HAMMER_MIN_OBJID;
362 	key_cur.key = HAMMER_MIN_KEY;
363 	key_cur.create_tid = 1;
364 	key_cur.rec_type = HAMMER_MIN_RECTYPE;
365 
366 	seq = trans->hmp->flusher.act;
367 
368 retry:
369 	error = hammer_init_cursor(trans, &cursor, NULL, NULL);
370 	if (error) {
371 		hammer_done_cursor(&cursor);
372 		goto failed;
373 	}
374 	cursor.key_beg = key_cur;
375 	cursor.key_end.localization = HAMMER_MAX_LOCALIZATION +
376 				      pfsm->localization;
377 	cursor.key_end.obj_id = HAMMER_MAX_OBJID;
378 	cursor.key_end.key = HAMMER_MAX_KEY;
379 	cursor.key_end.create_tid = HAMMER_MAX_TID;
380 	cursor.key_end.rec_type = HAMMER_MAX_RECTYPE;
381 
382 	cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE;
383 	cursor.flags |= HAMMER_CURSOR_BACKEND;
384 
385 	/*
386 	 * Do an optimized scan of only records created or modified
387 	 * >= trunc_tid, so we can fix up those records.  We must
388 	 * still check the TIDs but this greatly reduces the size of
389 	 * the scan.
390 	 */
391 	cursor.flags |= HAMMER_CURSOR_MIRROR_FILTERED;
392 	cursor.cmirror = &cmirror;
393 	cmirror.mirror_tid = trunc_tid;
394 
395 	error = hammer_btree_first(&cursor);
396 	while (error == 0) {
397 		/*
398 		 * Abort the rollback.
399 		 */
400 		if (error == 0) {
401 			error = hammer_signal_check(trans->hmp);
402 			if (error)
403 				break;
404 		}
405 
406 		/*
407 		 * We only care about leafs.  Internal nodes can be returned
408 		 * in mirror-filtered mode (they are used to generate SKIP
409 		 * mrecords), but we don't need them for this code.
410 		 *
411 		 * WARNING: See warnings in hammer_unlock_cursor() function.
412 		 */
413 		cursor.flags |= HAMMER_CURSOR_ATEDISK;
414 		if (cursor.node->ondisk->type == HAMMER_BTREE_TYPE_LEAF) {
415 			key_cur = cursor.node->ondisk->elms[cursor.index].base;
416 			error = hammer_pfs_delete_at_cursor(&cursor, trunc_tid);
417 		}
418 
419 		while (hammer_flusher_meta_halflimit(trans->hmp) ||
420 		       hammer_flusher_undo_exhausted(trans, 2)) {
421 			hammer_unlock_cursor(&cursor);
422 			hammer_flusher_wait(trans->hmp, seq);
423 			hammer_lock_cursor(&cursor);
424 			seq = hammer_flusher_async_one(trans->hmp);
425 		}
426 
427 		if (error == 0)
428 			error = hammer_btree_iterate(&cursor);
429 	}
430 	if (error == ENOENT)
431 		error = 0;
432 	hammer_done_cursor(&cursor);
433 	if (error == EDEADLK)
434 		goto retry;
435 failed:
436 	return(error);
437 }
438 
439 /*
440  * Helper function - perform rollback on a B-Tree element given trunc_tid.
441  *
442  * If create_tid >= trunc_tid the record is physically destroyed.
443  * If delete_tid >= trunc_tid it will be set to 0, undeleting the record.
444  */
445 static
446 int
447 hammer_pfs_delete_at_cursor(hammer_cursor_t cursor, hammer_tid_t trunc_tid)
448 {
449 	hammer_btree_leaf_elm_t elm;
450 	hammer_transaction_t trans;
451         int error;
452 
453 	elm = &cursor->node->ondisk->elms[cursor->index].leaf;
454 	if (elm->base.create_tid < trunc_tid &&
455 	    elm->base.delete_tid < trunc_tid) {
456 		return(0);
457 	}
458         trans = cursor->trans;
459 
460 	if (elm->base.create_tid >= trunc_tid) {
461 		error = hammer_delete_at_cursor(
462 				cursor, HAMMER_DELETE_DESTROY,
463 				cursor->trans->tid, cursor->trans->time32,
464 				1, NULL);
465 	} else if (elm->base.delete_tid >= trunc_tid) {
466 		error = hammer_delete_at_cursor(
467 				cursor, HAMMER_DELETE_ADJUST,
468 				0, 0,
469 				1, NULL);
470 	} else {
471 		error = 0;
472 	}
473 	return(error);
474 }
475 
476