xref: /dragonfly/sys/vfs/hammer/hammer_pfs.c (revision ae24b5e0)
1 /*
2  * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 /*
35  * HAMMER PFS ioctls - Manage pseudo-fs configurations
36  */
37 
38 #include "hammer.h"
39 
40 static int hammer_pfs_autodetect(struct hammer_ioc_pseudofs_rw *pfs,
41 				hammer_inode_t ip);
42 static int hammer_pfs_rollback(hammer_transaction_t trans,
43 				hammer_pseudofs_inmem_t pfsm,
44 				hammer_tid_t trunc_tid);
45 static int hammer_pfs_delete_at_cursor(hammer_cursor_t cursor,
46 				hammer_tid_t trunc_tid);
47 
48 /*
49  * Get mirroring/pseudo-fs information
50  *
51  * NOTE: The ip used for ioctl is not necessarily related to the PFS
52  * since this ioctl only requires PFS id (or upper 16 bits of ip localization).
53  */
54 int
55 hammer_ioc_get_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
56 			struct hammer_ioc_pseudofs_rw *pfs)
57 {
58 	hammer_pseudofs_inmem_t pfsm;
59 	uint32_t localization;
60 	int error;
61 
62 	if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
63 		return(error);
64 	localization = pfs_to_lo(pfs->pfs_id);
65 	pfs->bytes = sizeof(struct hammer_pseudofs_data);
66 	pfs->version = HAMMER_IOC_PSEUDOFS_VERSION;
67 
68 	pfsm = hammer_load_pseudofs(trans, localization, &error);
69 	if (error) {
70 		hammer_rel_pseudofs(trans->hmp, pfsm);
71 		return(error);
72 	}
73 
74 	/*
75 	 * If the PFS is a master the sync tid is set by normal operation
76 	 * rather than the mirroring code, and will always track the
77 	 * real HAMMER filesystem.
78 	 *
79 	 * We use flush_tid1, which is the highest fully committed TID.
80 	 * flush_tid2 is the TID most recently flushed, but the UNDO hasn't
81 	 * caught up to it yet so a crash will roll us back to flush_tid1.
82 	 */
83 	if (hammer_is_pfs_master(&pfsm->pfsd))
84 		pfsm->pfsd.sync_end_tid = trans->hmp->flush_tid1;
85 
86 	/*
87 	 * Copy out to userland.
88 	 */
89 	if (pfs->ondisk)
90 		error = copyout(&pfsm->pfsd, pfs->ondisk, sizeof(pfsm->pfsd));
91 	hammer_rel_pseudofs(trans->hmp, pfsm);
92 	return(error);
93 }
94 
95 /*
96  * Set mirroring/pseudo-fs information
97  */
98 int
99 hammer_ioc_set_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
100 			struct ucred *cred, struct hammer_ioc_pseudofs_rw *pfs)
101 {
102 	hammer_pseudofs_inmem_t pfsm;
103 	uint32_t localization;
104 	int error;
105 
106 	if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
107 		return(error);
108 	localization = pfs_to_lo(pfs->pfs_id);
109 	if (pfs->version != HAMMER_IOC_PSEUDOFS_VERSION)
110 		error = EINVAL;
111 
112 	if (error == 0 && pfs->ondisk) {
113 		/*
114 		 * Load the PFS so we can modify our in-core copy.  Ignore
115 		 * ENOENT errors.
116 		 */
117 		pfsm = hammer_load_pseudofs(trans, localization, &error);
118 		error = copyin(pfs->ondisk, &pfsm->pfsd, sizeof(pfsm->pfsd));
119 
120 		/*
121 		 * Save it back, create a root inode if we are in master
122 		 * mode and no root exists.
123 		 *
124 		 * We do not create root inodes for slaves, the root inode
125 		 * must be mirrored from the master.
126 		 */
127 		if (error == 0 && hammer_is_pfs_master(&pfsm->pfsd)) {
128 			error = hammer_mkroot_pseudofs(trans, cred, pfsm, ip);
129 		}
130 		if (error == 0)
131 			error = hammer_save_pseudofs(trans, pfsm);
132 
133 		/*
134 		 * Wakeup anyone waiting for a TID update for this PFS
135 		 */
136 		wakeup(&pfsm->pfsd.sync_end_tid);
137 		hammer_rel_pseudofs(trans->hmp, pfsm);
138 	}
139 	return(error);
140 }
141 
142 /*
143  * Upgrade a slave to a master
144  *
145  * This is fairly easy to do, but we must physically undo any partial syncs
146  * for transaction ids > sync_end_tid.  Effective, we must do a partial
147  * rollback.
148  *
149  * NOTE: The ip used for ioctl is not necessarily related to the PFS
150  * since this ioctl only requires PFS id (or upper 16 bits of ip localization).
151  */
152 int
153 hammer_ioc_upgrade_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
154 			struct hammer_ioc_pseudofs_rw *pfs)
155 {
156 	hammer_pseudofs_inmem_t pfsm;
157 	uint32_t localization;
158 	int error;
159 
160 	if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
161 		return(error);
162 	localization = pfs_to_lo(pfs->pfs_id);
163 	if ((error = hammer_unload_pseudofs(trans, localization)) != 0)
164 		return(error);
165 
166 	/*
167 	 * A master id must be set when upgrading
168 	 */
169 	pfsm = hammer_load_pseudofs(trans, localization, &error);
170 	if (error == 0) {
171 		if (hammer_is_pfs_slave(&pfsm->pfsd)) {
172 			error = hammer_pfs_rollback(trans, pfsm,
173 					    pfsm->pfsd.sync_end_tid + 1);
174 			if (error == 0) {
175 				pfsm->pfsd.mirror_flags &= ~HAMMER_PFSD_SLAVE;
176 				error = hammer_save_pseudofs(trans, pfsm);
177 			}
178 		}
179 	}
180 	hammer_rel_pseudofs(trans->hmp, pfsm);
181 	if (error == EINTR) {
182 		pfs->head.flags |= HAMMER_IOC_HEAD_INTR;
183 		error = 0;
184 	}
185 	return (error);
186 }
187 
188 /*
189  * Downgrade a master to a slave
190  *
191  * This is really easy to do, just set the SLAVE flag and update sync_end_tid.
192  *
193  * We previously did not update sync_end_tid in consideration for a slave
194  * upgraded to a master and then downgraded again, but this completely breaks
195  * the case where one starts with a master and then downgrades to a slave,
196  * then upgrades again.
197  *
198  * NOTE: The ip used for ioctl is not necessarily related to the PFS
199  * since this ioctl only requires PFS id (or upper 16 bits of ip localization).
200  */
201 int
202 hammer_ioc_downgrade_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
203 			struct hammer_ioc_pseudofs_rw *pfs)
204 {
205 	hammer_mount_t hmp = trans->hmp;
206 	hammer_pseudofs_inmem_t pfsm;
207 	uint32_t localization;
208 	int error;
209 
210 	if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
211 		return(error);
212 	localization = pfs_to_lo(pfs->pfs_id);
213 	if ((error = hammer_unload_pseudofs(trans, localization)) != 0)
214 		return(error);
215 
216 	pfsm = hammer_load_pseudofs(trans, localization, &error);
217 	if (error == 0) {
218 		if (hammer_is_pfs_master(&pfsm->pfsd)) {
219 			pfsm->pfsd.mirror_flags |= HAMMER_PFSD_SLAVE;
220 			if (pfsm->pfsd.sync_end_tid < hmp->flush_tid1)
221 				pfsm->pfsd.sync_end_tid = hmp->flush_tid1;
222 			error = hammer_save_pseudofs(trans, pfsm);
223 		}
224 	}
225 	hammer_rel_pseudofs(trans->hmp, pfsm);
226 	return (error);
227 }
228 
229 /*
230  * Destroy a PFS
231  *
232  * We can destroy a PFS by scanning and deleting all of its records in the
233  * B-Tree.  The hammer utility will delete the softlink in the primary
234  * filesystem.
235  *
236  * NOTE: The ip used for ioctl is not necessarily related to the PFS
237  * since this ioctl only requires PFS id (or upper 16 bits of ip localization).
238  */
239 int
240 hammer_ioc_destroy_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
241 			struct hammer_ioc_pseudofs_rw *pfs)
242 {
243 	hammer_pseudofs_inmem_t pfsm;
244 	uint32_t localization;
245 	int error;
246 
247 	if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
248 		return(error);
249 	localization = pfs_to_lo(pfs->pfs_id);
250 
251 	if ((error = hammer_unload_pseudofs(trans, localization)) != 0)
252 		return(error);
253 
254 	pfsm = hammer_load_pseudofs(trans, localization, &error);
255 	if (error == 0) {
256 		error = hammer_pfs_rollback(trans, pfsm, 0);
257 		if (error == 0) {
258 			pfsm->pfsd.mirror_flags |= HAMMER_PFSD_DELETED;
259 			error = hammer_save_pseudofs(trans, pfsm);
260 		}
261 	}
262 	hammer_rel_pseudofs(trans->hmp, pfsm);
263 	if (error == EINTR) {
264 		pfs->head.flags |= HAMMER_IOC_HEAD_INTR;
265 		error = 0;
266 	}
267 	return(error);
268 }
269 
270 /*
271  * Wait for the PFS to sync past the specified TID
272  */
273 int
274 hammer_ioc_wait_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
275 			 struct hammer_ioc_pseudofs_rw *pfs)
276 {
277 	hammer_pseudofs_inmem_t pfsm;
278 	struct hammer_pseudofs_data pfsd;
279 	uint32_t localization;
280 	hammer_tid_t tid;
281 	void *waitp;
282 	int error;
283 
284 	if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
285 		return(error);
286 	localization = pfs_to_lo(pfs->pfs_id);
287 
288 	if ((error = copyin(pfs->ondisk, &pfsd, sizeof(pfsd))) != 0)
289 		return(error);
290 
291 	pfsm = hammer_load_pseudofs(trans, localization, &error);
292 	if (error == 0) {
293 		if (hammer_is_pfs_slave(&pfsm->pfsd)) {
294 			tid = pfsm->pfsd.sync_end_tid;
295 			waitp = &pfsm->pfsd.sync_end_tid;
296 		} else {
297 			tid = trans->hmp->flush_tid1;
298 			waitp = &trans->hmp->flush_tid1;
299 		}
300 		if (tid <= pfsd.sync_end_tid)
301 			tsleep(waitp, PCATCH, "hmrmwt", 0);
302 	}
303 	hammer_rel_pseudofs(trans->hmp, pfsm);
304 	if (error == EINTR) {
305 		pfs->head.flags |= HAMMER_IOC_HEAD_INTR;
306 		error = 0;
307 	}
308 	return(error);
309 }
310 
311 /*
312  * Iterate PFS ondisk data.
313  * This function essentially does the same as hammer_load_pseudofs()
314  * except that this function only retrieves PFS data without touching
315  * hammer_pfs_rb_tree at all.
316  *
317  * NOTE: The ip used for ioctl is not necessarily related to the PFS
318  * since this ioctl only requires PFS id (or upper 16 bits of ip localization).
319  *
320  * NOTE: The API was changed in DragonFly 4.7, due to design issues
321  * this ioctl and libhammer (which is the only caller of this ioctl
322  * within DragonFly source, but no longer maintained by anyone) had.
323  */
324 int
325 hammer_ioc_scan_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
326 			struct hammer_ioc_pseudofs_rw *pfs)
327 {
328 	struct hammer_cursor cursor;
329 	hammer_inode_t dip;
330 	uint32_t localization;
331 	int error;
332 
333 	if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
334 		return(error);
335 	localization = pfs_to_lo(pfs->pfs_id);
336 	pfs->bytes = sizeof(struct hammer_pseudofs_data);
337 	pfs->version = HAMMER_IOC_PSEUDOFS_VERSION;
338 
339 	dip = hammer_get_inode(trans, NULL, HAMMER_OBJID_ROOT, HAMMER_MAX_TID,
340 		HAMMER_DEF_LOCALIZATION, 0, &error);
341 
342 	error = hammer_init_cursor(trans, &cursor,
343 		(dip ? &dip->cache[1] : NULL), dip);
344 	if (error)
345 		goto fail;
346 
347 	cursor.key_beg.localization = HAMMER_DEF_LOCALIZATION |
348 				      HAMMER_LOCALIZE_MISC;
349 	cursor.key_beg.obj_id = HAMMER_OBJID_ROOT;
350 	cursor.key_beg.create_tid = 0;
351 	cursor.key_beg.delete_tid = 0;
352 	cursor.key_beg.rec_type = HAMMER_RECTYPE_PFS;
353 	cursor.key_beg.obj_type = 0;
354 	cursor.key_beg.key = localization;
355 	cursor.asof = HAMMER_MAX_TID;
356 	cursor.flags |= HAMMER_CURSOR_ASOF;
357 
358 	error = hammer_ip_lookup(&cursor);
359 	if (error == 0) {
360 		error = hammer_ip_resolve_data(&cursor);
361 		if (error == 0) {
362 			if (pfs->ondisk)
363 				copyout(cursor.data, pfs->ondisk, cursor.leaf->data_len);
364 			localization = cursor.leaf->base.key;
365 			pfs->pfs_id = lo_to_pfs(localization);
366 		}
367 	}
368 	hammer_done_cursor(&cursor);
369 fail:
370 	if (dip)
371 		hammer_rel_inode(dip, 0);
372 	return(error);
373 }
374 
375 /*
376  * Auto-detect the pseudofs and do basic bounds checking.
377  */
378 static
379 int
380 hammer_pfs_autodetect(struct hammer_ioc_pseudofs_rw *pfs, hammer_inode_t ip)
381 {
382 	int error = 0;
383 
384 	if (pfs->pfs_id == -1)
385 		pfs->pfs_id = lo_to_pfs(ip->obj_localization);
386 	if (pfs->pfs_id < 0 || pfs->pfs_id >= HAMMER_MAX_PFS)
387 		error = EINVAL;
388 	if (pfs->bytes < sizeof(struct hammer_pseudofs_data))
389 		error = EINVAL;
390 	return(error);
391 }
392 
393 /*
394  * Rollback the specified PFS to (trunc_tid - 1), removing everything
395  * greater or equal to trunc_tid.  The PFS must not have been in no-mirror
396  * mode or the MIRROR_FILTERED scan will not work properly.
397  *
398  * This is typically used to remove any partial syncs when upgrading a
399  * slave to a master.  It can theoretically also be used to rollback
400  * any PFS, including root PFS, BUT ONLY TO POINTS THAT HAVE NOT YET BEEN
401  * PRUNED, and to points that are older only if they are on a retained
402  * (pruning softlink) boundary.
403  *
404  * Rollbacks destroy information.  If you don't mind inode numbers changing
405  * a better way would be to cpdup a snapshot back onto the master.
406  */
407 static
408 int
409 hammer_pfs_rollback(hammer_transaction_t trans,
410 		    hammer_pseudofs_inmem_t pfsm,
411 		    hammer_tid_t trunc_tid)
412 {
413 	struct hammer_cmirror cmirror;
414 	struct hammer_cursor cursor;
415 	struct hammer_base_elm key_cur;
416 	int error;
417 	int seq;
418 
419 	bzero(&cmirror, sizeof(cmirror));
420 	bzero(&key_cur, sizeof(key_cur));
421 	key_cur.localization = HAMMER_MIN_LOCALIZATION | pfsm->localization;
422 	key_cur.obj_id = HAMMER_MIN_OBJID;
423 	key_cur.key = HAMMER_MIN_KEY;
424 	key_cur.create_tid = 1;
425 	key_cur.rec_type = HAMMER_MIN_RECTYPE;
426 
427 	seq = trans->hmp->flusher.done;
428 
429 retry:
430 	error = hammer_init_cursor(trans, &cursor, NULL, NULL);
431 	if (error) {
432 		hammer_done_cursor(&cursor);
433 		goto failed;
434 	}
435 	cursor.key_beg = key_cur;
436 	cursor.key_end.localization = HAMMER_MAX_LOCALIZATION |
437 				      pfsm->localization;
438 	cursor.key_end.obj_id = HAMMER_MAX_OBJID;
439 	cursor.key_end.key = HAMMER_MAX_KEY;
440 	cursor.key_end.create_tid = HAMMER_MAX_TID;
441 	cursor.key_end.rec_type = HAMMER_MAX_RECTYPE;
442 
443 	cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE;
444 	cursor.flags |= HAMMER_CURSOR_BACKEND;
445 
446 	/*
447 	 * Do an optimized scan of only records created or modified
448 	 * >= trunc_tid, so we can fix up those records.  We must
449 	 * still check the TIDs but this greatly reduces the size of
450 	 * the scan.
451 	 */
452 	cursor.flags |= HAMMER_CURSOR_MIRROR_FILTERED;
453 	cursor.cmirror = &cmirror;
454 	cmirror.mirror_tid = trunc_tid;
455 
456 	error = hammer_btree_first(&cursor);
457 	while (error == 0) {
458 		/*
459 		 * Abort the rollback.
460 		 */
461 		if (error == 0) {
462 			error = hammer_signal_check(trans->hmp);
463 			if (error)
464 				break;
465 		}
466 
467 		/*
468 		 * We only care about leafs.  Internal nodes can be returned
469 		 * in mirror-filtered mode (they are used to generate SKIP
470 		 * mrecords), but we don't need them for this code.
471 		 *
472 		 * WARNING: See warnings in hammer_unlock_cursor() function.
473 		 */
474 		cursor.flags |= HAMMER_CURSOR_ATEDISK;
475 		if (cursor.node->ondisk->type == HAMMER_BTREE_TYPE_LEAF) {
476 			key_cur = cursor.node->ondisk->elms[cursor.index].base;
477 			error = hammer_pfs_delete_at_cursor(&cursor, trunc_tid);
478 		}
479 
480 		while (hammer_flusher_meta_halflimit(trans->hmp) ||
481 		       hammer_flusher_undo_exhausted(trans, 2)) {
482 			hammer_unlock_cursor(&cursor);
483 			hammer_flusher_wait(trans->hmp, seq);
484 			hammer_lock_cursor(&cursor);
485 			seq = hammer_flusher_async_one(trans->hmp);
486 		}
487 
488 		if (error == 0)
489 			error = hammer_btree_iterate(&cursor);
490 	}
491 	if (error == ENOENT)
492 		error = 0;
493 	hammer_done_cursor(&cursor);
494 	if (error == EDEADLK)
495 		goto retry;
496 failed:
497 	return(error);
498 }
499 
500 /*
501  * Helper function - perform rollback on a B-Tree element given trunc_tid.
502  *
503  * If create_tid >= trunc_tid the record is physically destroyed.
504  * If delete_tid >= trunc_tid it will be set to 0, undeleting the record.
505  */
506 static
507 int
508 hammer_pfs_delete_at_cursor(hammer_cursor_t cursor, hammer_tid_t trunc_tid)
509 {
510 	hammer_btree_leaf_elm_t elm;
511 	int error;
512 
513 	elm = &cursor->node->ondisk->elms[cursor->index].leaf;
514 	if (elm->base.create_tid < trunc_tid &&
515 	    elm->base.delete_tid < trunc_tid) {
516 		return(0);
517 	}
518 
519 	if (elm->base.create_tid >= trunc_tid) {
520 		error = hammer_delete_at_cursor(
521 				cursor, HAMMER_DELETE_DESTROY,
522 				cursor->trans->tid, cursor->trans->time32,
523 				1, NULL);
524 	} else if (elm->base.delete_tid >= trunc_tid) {
525 		error = hammer_delete_at_cursor(
526 				cursor, HAMMER_DELETE_ADJUST,
527 				0, 0,
528 				1, NULL);
529 	} else {
530 		error = 0;
531 	}
532 	return(error);
533 }
534 
535