xref: /dragonfly/sys/vfs/hammer/hammer_pfs.c (revision 9ddb8543)
1 /*
2  * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  * $DragonFly: src/sys/vfs/hammer/hammer_pfs.c,v 1.5 2008/07/31 04:42:04 dillon Exp $
35  */
36 /*
37  * HAMMER PFS ioctls - Manage pseudo-fs configurations
38  */
39 
40 #include "hammer.h"
41 
42 static int hammer_pfs_autodetect(struct hammer_ioc_pseudofs_rw *pfs,
43 				hammer_inode_t ip);
44 static int hammer_pfs_rollback(hammer_transaction_t trans,
45 				hammer_pseudofs_inmem_t pfsm,
46 				hammer_tid_t trunc_tid);
47 static int hammer_pfs_delete_at_cursor(hammer_cursor_t cursor,
48 				hammer_tid_t trunc_tid);
49 
50 /*
51  * Get mirroring/pseudo-fs information
52  *
53  * NOTE: The ip used for ioctl is not necessarily related to the PFS
54  */
55 int
56 hammer_ioc_get_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
57 			struct hammer_ioc_pseudofs_rw *pfs)
58 {
59 	hammer_pseudofs_inmem_t pfsm;
60 	u_int32_t localization;
61 	int error;
62 
63 	if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
64 		return(error);
65 	localization = (u_int32_t)pfs->pfs_id << 16;
66 	pfs->bytes = sizeof(struct hammer_pseudofs_data);
67 	pfs->version = HAMMER_IOC_PSEUDOFS_VERSION;
68 
69 	pfsm = hammer_load_pseudofs(trans, localization, &error);
70 	if (error) {
71 		hammer_rel_pseudofs(trans->hmp, pfsm);
72 		return(error);
73 	}
74 
75 	/*
76 	 * If the PFS is a master the sync tid is set by normal operation
77 	 * rather then the mirroring code, and will always track the
78 	 * real HAMMER filesystem.
79 	 *
80 	 * We use flush_tid1, which is the highest fully committed TID.
81 	 * flush_tid2 is the TID most recently flushed, but the UNDO hasn't
82 	 * caught up to it yet so a crash will roll us back to flush_tid1.
83 	 */
84 	if ((pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) == 0)
85 		pfsm->pfsd.sync_end_tid = trans->hmp->flush_tid1;
86 
87 	/*
88 	 * Copy out to userland.
89 	 */
90 	error = 0;
91 	if (pfs->ondisk && error == 0)
92 		error = copyout(&pfsm->pfsd, pfs->ondisk, sizeof(pfsm->pfsd));
93 	hammer_rel_pseudofs(trans->hmp, pfsm);
94 	return(error);
95 }
96 
97 /*
98  * Set mirroring/pseudo-fs information
99  *
100  * NOTE: The ip used for ioctl is not necessarily related to the PFS
101  */
102 int
103 hammer_ioc_set_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
104 			struct ucred *cred, struct hammer_ioc_pseudofs_rw *pfs)
105 {
106 	hammer_pseudofs_inmem_t pfsm;
107 	u_int32_t localization;
108 	int error;
109 
110 	if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
111 		return(error);
112 	localization = (u_int32_t)pfs->pfs_id << 16;
113 	if (pfs->version != HAMMER_IOC_PSEUDOFS_VERSION)
114 		error = EINVAL;
115 	localization = (u_int32_t)pfs->pfs_id << 16;
116 
117 	if (error == 0 && pfs->ondisk) {
118 		/*
119 		 * Load the PFS so we can modify our in-core copy.  Ignore
120 		 * ENOENT errors.
121 		 */
122 		pfsm = hammer_load_pseudofs(trans, localization, &error);
123 		error = copyin(pfs->ondisk, &pfsm->pfsd, sizeof(pfsm->pfsd));
124 
125 		/*
126 		 * Save it back, create a root inode if we are in master
127 		 * mode and no root exists.
128 		 */
129 		if (error == 0)
130 			error = hammer_mkroot_pseudofs(trans, cred, pfsm);
131 		if (error == 0)
132 			error = hammer_save_pseudofs(trans, pfsm);
133 
134 		/*
135 		 * Wakeup anyone waiting for a TID update for this PFS
136 		 */
137 		wakeup(&pfsm->pfsd.sync_end_tid);
138 		hammer_rel_pseudofs(trans->hmp, pfsm);
139 	}
140 	return(error);
141 }
142 
143 /*
144  * Upgrade a slave to a master
145  *
146  * This is fairly easy to do, but we must physically undo any partial syncs
147  * for transaction ids > sync_end_tid.  Effective, we must do a partial
148  * rollback.
149  *
150  * NOTE: The ip used for ioctl is not necessarily related to the PFS
151  */
152 int
153 hammer_ioc_upgrade_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
154 			struct hammer_ioc_pseudofs_rw *pfs)
155 {
156 	hammer_pseudofs_inmem_t pfsm;
157 	u_int32_t localization;
158 	int error;
159 
160 	if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
161 		return(error);
162 	localization = (u_int32_t)pfs->pfs_id << 16;
163 	if ((error = hammer_unload_pseudofs(trans, localization)) != 0)
164 		return(error);
165 
166 	/*
167 	 * A master id must be set when upgrading
168 	 */
169 	pfsm = hammer_load_pseudofs(trans, localization, &error);
170 	if (error == 0) {
171 		if ((pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) != 0) {
172 			error = hammer_pfs_rollback(trans, pfsm,
173 					    pfsm->pfsd.sync_end_tid + 1);
174 			if (error == 0) {
175 				pfsm->pfsd.mirror_flags &= ~HAMMER_PFSD_SLAVE;
176 				error = hammer_save_pseudofs(trans, pfsm);
177 			}
178 		}
179 	}
180 	hammer_rel_pseudofs(trans->hmp, pfsm);
181 	if (error == EINTR) {
182 		pfs->head.flags |= HAMMER_IOC_HEAD_INTR;
183 		error = 0;
184 	}
185 	return (error);
186 }
187 
188 /*
189  * Downgrade a master to a slave
190  *
191  * This is really easy to do, just set the SLAVE flag.
192  *
193  * We also leave sync_end_tid intact... the field is not used in master
194  * mode (vol0_next_tid overrides it), but if someone switches to master
195  * mode accidently and then back to slave mode we don't want it to change.
196  * Eventually it will be used as the cross-synchronization TID in
197  * multi-master mode, and we don't want to mess with it for that feature
198  * either.
199  *
200  * NOTE: The ip used for ioctl is not necessarily related to the PFS
201  */
202 int
203 hammer_ioc_downgrade_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
204 			struct hammer_ioc_pseudofs_rw *pfs)
205 {
206 	hammer_pseudofs_inmem_t pfsm;
207 	u_int32_t localization;
208 	int error;
209 
210 	if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
211 		return(error);
212 	localization = (u_int32_t)pfs->pfs_id << 16;
213 	if ((error = hammer_unload_pseudofs(trans, localization)) != 0)
214 		return(error);
215 
216 	pfsm = hammer_load_pseudofs(trans, localization, &error);
217 	if (error == 0) {
218 		if ((pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) == 0) {
219 			pfsm->pfsd.mirror_flags |= HAMMER_PFSD_SLAVE;
220 			error = hammer_save_pseudofs(trans, pfsm);
221 		}
222 	}
223 	hammer_rel_pseudofs(trans->hmp, pfsm);
224 	return (error);
225 }
226 
227 /*
228  * Destroy a PFS
229  *
230  * We can destroy a PFS by scanning and deleting all of its records in the
231  * B-Tree.  The hammer utility will delete the softlink in the primary
232  * filesystem.
233  *
234  * NOTE: The ip used for ioctl is not necessarily related to the PFS
235  */
236 int
237 hammer_ioc_destroy_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
238 			struct hammer_ioc_pseudofs_rw *pfs)
239 {
240 	hammer_pseudofs_inmem_t pfsm;
241 	u_int32_t localization;
242 	int error;
243 
244 	if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
245 		return(error);
246 	localization = (u_int32_t)pfs->pfs_id << 16;
247 
248 	if ((error = hammer_unload_pseudofs(trans, localization)) != 0)
249 		return(error);
250 
251 	pfsm = hammer_load_pseudofs(trans, localization, &error);
252 	if (error == 0) {
253 		error = hammer_pfs_rollback(trans, pfsm, 0);
254 		if (error == 0) {
255 			pfsm->pfsd.mirror_flags |= HAMMER_PFSD_DELETED;
256 			error = hammer_save_pseudofs(trans, pfsm);
257 		}
258 	}
259 	hammer_rel_pseudofs(trans->hmp, pfsm);
260 	if (error == EINTR) {
261 		pfs->head.flags |= HAMMER_IOC_HEAD_INTR;
262 		error = 0;
263 	}
264 	return(error);
265 }
266 
267 /*
268  * Wait for the PFS to sync past the specified TID
269  */
270 int
271 hammer_ioc_wait_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
272 			 struct hammer_ioc_pseudofs_rw *pfs)
273 {
274 	hammer_pseudofs_inmem_t pfsm;
275 	struct hammer_pseudofs_data pfsd;
276 	u_int32_t localization;
277 	hammer_tid_t tid;
278 	void *waitp;
279 	int error;
280 
281 	if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
282 		return(error);
283 	localization = (u_int32_t)pfs->pfs_id << 16;
284 
285 	if ((error = copyin(pfs->ondisk, &pfsd, sizeof(pfsd))) != 0)
286 		return(error);
287 
288 	pfsm = hammer_load_pseudofs(trans, localization, &error);
289 	if (error == 0) {
290 		if (pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) {
291 			tid = pfsm->pfsd.sync_end_tid;
292 			waitp = &pfsm->pfsd.sync_end_tid;
293 		} else {
294 			tid = trans->hmp->flush_tid1;
295 			waitp = &trans->hmp->flush_tid1;
296 		}
297 		if (tid <= pfsd.sync_end_tid)
298 			tsleep(waitp, PCATCH, "hmrmwt", 0);
299 	}
300 	hammer_rel_pseudofs(trans->hmp, pfsm);
301 	if (error == EINTR) {
302 		pfs->head.flags |= HAMMER_IOC_HEAD_INTR;
303 		error = 0;
304 	}
305 	return(error);
306 }
307 
308 
309 /*
310  * Auto-detect the pseudofs and do basic bounds checking.
311  */
312 static
313 int
314 hammer_pfs_autodetect(struct hammer_ioc_pseudofs_rw *pfs, hammer_inode_t ip)
315 {
316 	int error = 0;
317 
318 	if (pfs->pfs_id == -1)
319 		pfs->pfs_id = (int)(ip->obj_localization >> 16);
320 	if (pfs->pfs_id < 0 || pfs->pfs_id >= HAMMER_MAX_PFS)
321 		error = EINVAL;
322 	if (pfs->bytes < sizeof(struct hammer_pseudofs_data))
323 		error = EINVAL;
324 	return(error);
325 }
326 
327 /*
328  * Rollback the specified PFS to (trunc_tid - 1), removing everything
329  * greater or equal to trunc_tid.  The PFS must not have been in no-mirror
330  * mode or the MIRROR_FILTERED scan will not work properly.
331  *
332  * This is typically used to remove any partial syncs when upgrading a
333  * slave to a master.  It can theoretically also be used to rollback
334  * any PFS, including PFS#0, BUT ONLY TO POINTS THAT HAVE NOT YET BEEN
335  * PRUNED, and to points that are older only if they are on a retained
336  * (pruning softlink) boundary.
337  *
338  * Rollbacks destroy information.  If you don't mind inode numbers changing
339  * a better way would be to cpdup a snapshot back onto the master.
340  */
341 static
342 int
343 hammer_pfs_rollback(hammer_transaction_t trans,
344 		    hammer_pseudofs_inmem_t pfsm,
345 		    hammer_tid_t trunc_tid)
346 {
347 	struct hammer_cmirror cmirror;
348 	struct hammer_cursor cursor;
349 	struct hammer_base_elm key_cur;
350 	int error;
351 	int seq;
352 
353 	bzero(&cmirror, sizeof(cmirror));
354 	bzero(&key_cur, sizeof(key_cur));
355 	key_cur.localization = HAMMER_MIN_LOCALIZATION + pfsm->localization;
356 	key_cur.obj_id = HAMMER_MIN_OBJID;
357 	key_cur.key = HAMMER_MIN_KEY;
358 	key_cur.create_tid = 1;
359 	key_cur.rec_type = HAMMER_MIN_RECTYPE;
360 
361 	seq = trans->hmp->flusher.act;
362 
363 retry:
364 	error = hammer_init_cursor(trans, &cursor, NULL, NULL);
365 	if (error) {
366 		hammer_done_cursor(&cursor);
367 		goto failed;
368 	}
369 	cursor.key_beg = key_cur;
370 	cursor.key_end.localization = HAMMER_MAX_LOCALIZATION +
371 				      pfsm->localization;
372 	cursor.key_end.obj_id = HAMMER_MAX_OBJID;
373 	cursor.key_end.key = HAMMER_MAX_KEY;
374 	cursor.key_end.create_tid = HAMMER_MAX_TID;
375 	cursor.key_end.rec_type = HAMMER_MAX_RECTYPE;
376 
377 	cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE;
378 	cursor.flags |= HAMMER_CURSOR_BACKEND;
379 
380 	/*
381 	 * Do an optimized scan of only records created or modified
382 	 * >= trunc_tid, so we can fix up those records.  We must
383 	 * still check the TIDs but this greatly reduces the size of
384 	 * the scan.
385 	 */
386 	cursor.flags |= HAMMER_CURSOR_MIRROR_FILTERED;
387 	cursor.cmirror = &cmirror;
388 	cmirror.mirror_tid = trunc_tid;
389 
390 	error = hammer_btree_first(&cursor);
391 	while (error == 0) {
392 		/*
393 		 * Abort the rollback.
394 		 */
395 		if (error == 0) {
396 			error = hammer_signal_check(trans->hmp);
397 			if (error)
398 				break;
399 		}
400 
401 		/*
402 		 * We only care about leafs.  Internal nodes can be returned
403 		 * in mirror-filtered mode (they are used to generate SKIP
404 		 * mrecords), but we don't need them for this code.
405 		 *
406 		 * WARNING: See warnings in hammer_unlock_cursor() function.
407 		 */
408 		cursor.flags |= HAMMER_CURSOR_ATEDISK;
409 		if (cursor.node->ondisk->type == HAMMER_BTREE_TYPE_LEAF) {
410 			key_cur = cursor.node->ondisk->elms[cursor.index].base;
411 			error = hammer_pfs_delete_at_cursor(&cursor, trunc_tid);
412 		}
413 
414 		while (hammer_flusher_meta_halflimit(trans->hmp) ||
415 		       hammer_flusher_undo_exhausted(trans, 2)) {
416 			hammer_unlock_cursor(&cursor);
417 			hammer_flusher_wait(trans->hmp, seq);
418 			hammer_lock_cursor(&cursor);
419 			seq = hammer_flusher_async_one(trans->hmp);
420 		}
421 
422 		if (error == 0)
423 			error = hammer_btree_iterate(&cursor);
424 	}
425 	if (error == ENOENT)
426 		error = 0;
427 	hammer_done_cursor(&cursor);
428 	if (error == EDEADLK)
429 		goto retry;
430 failed:
431 	return(error);
432 }
433 
434 /*
435  * Helper function - perform rollback on a B-Tree element given trunc_tid.
436  *
437  * If create_tid >= trunc_tid the record is physically destroyed.
438  * If delete_tid >= trunc_tid it will be set to 0, undeleting the record.
439  */
440 static
441 int
442 hammer_pfs_delete_at_cursor(hammer_cursor_t cursor, hammer_tid_t trunc_tid)
443 {
444 	hammer_btree_leaf_elm_t elm;
445 	hammer_transaction_t trans;
446         int error;
447 
448 	elm = &cursor->node->ondisk->elms[cursor->index].leaf;
449 	if (elm->base.create_tid < trunc_tid &&
450 	    elm->base.delete_tid < trunc_tid) {
451 		return(0);
452 	}
453         trans = cursor->trans;
454 
455 	if (elm->base.create_tid >= trunc_tid) {
456 		error = hammer_delete_at_cursor(
457 				cursor, HAMMER_DELETE_DESTROY,
458 				cursor->trans->tid, cursor->trans->time32,
459 				1, NULL);
460 	} else if (elm->base.delete_tid >= trunc_tid) {
461 		error = hammer_delete_at_cursor(
462 				cursor, HAMMER_DELETE_ADJUST,
463 				0, 0,
464 				1, NULL);
465 	} else {
466 		error = 0;
467 	}
468 	return(error);
469 }
470 
471