xref: /dragonfly/sys/vfs/hammer/hammer_ioctl.c (revision ef3ac1d1)
1 /*
2  * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  * $DragonFly: src/sys/vfs/hammer/hammer_ioctl.c,v 1.32 2008/11/13 02:23:29 dillon Exp $
35  */
36 
37 #include "hammer.h"
38 
39 static int hammer_ioc_gethistory(hammer_transaction_t trans, hammer_inode_t ip,
40 				struct hammer_ioc_history *hist);
41 static int hammer_ioc_synctid(hammer_transaction_t trans, hammer_inode_t ip,
42 				struct hammer_ioc_synctid *std);
43 static int hammer_ioc_get_version(hammer_transaction_t trans,
44 				hammer_inode_t ip,
45 				struct hammer_ioc_version *ver);
46 static int hammer_ioc_set_version(hammer_transaction_t trans,
47 				hammer_inode_t ip,
48 				struct hammer_ioc_version *ver);
49 static int hammer_ioc_get_info(hammer_transaction_t trans,
50 				struct hammer_ioc_info *info);
51 static int hammer_ioc_pfs_iterate(hammer_transaction_t trans,
52 				struct hammer_ioc_pfs_iterate *pi);
53 static int hammer_ioc_add_snapshot(hammer_transaction_t trans, hammer_inode_t ip,
54 				struct hammer_ioc_snapshot *snap);
55 static int hammer_ioc_del_snapshot(hammer_transaction_t trans, hammer_inode_t ip,
56 				struct hammer_ioc_snapshot *snap);
57 static int hammer_ioc_get_snapshot(hammer_transaction_t trans, hammer_inode_t ip,
58 				struct hammer_ioc_snapshot *snap);
59 static int hammer_ioc_get_config(hammer_transaction_t trans, hammer_inode_t ip,
60 				struct hammer_ioc_config *snap);
61 static int hammer_ioc_set_config(hammer_transaction_t trans, hammer_inode_t ip,
62 				struct hammer_ioc_config *snap);
63 static int hammer_ioc_get_data(hammer_transaction_t trans, hammer_inode_t ip,
64 				struct hammer_ioc_data *data);
65 
66 int
67 hammer_ioctl(hammer_inode_t ip, u_long com, caddr_t data, int fflag,
68 	     struct ucred *cred)
69 {
70 	struct hammer_transaction trans;
71 	int error;
72 
73 	error = priv_check_cred(cred, PRIV_HAMMER_IOCTL, 0);
74 
75 	hammer_start_transaction(&trans, ip->hmp);
76 
77 	switch(com) {
78 	case HAMMERIOC_PRUNE:
79 		if (error == 0) {
80 			error = hammer_ioc_prune(&trans, ip,
81 					(struct hammer_ioc_prune *)data);
82 		}
83 		break;
84 	case HAMMERIOC_GETHISTORY:
85 		error = hammer_ioc_gethistory(&trans, ip,
86 					(struct hammer_ioc_history *)data);
87 		break;
88 	case HAMMERIOC_REBLOCK:
89 		if (error == 0) {
90 			error = hammer_ioc_reblock(&trans, ip,
91 					(struct hammer_ioc_reblock *)data);
92 		}
93 		break;
94 	case HAMMERIOC_REBALANCE:
95 		/*
96 		 * Rebalancing needs to lock a lot of B-Tree nodes.  The
97 		 * children and children's children.  Systems with very
98 		 * little memory will not be able to do it.
99 		 */
100 		if (error == 0 && nbuf < HAMMER_REBALANCE_MIN_BUFS) {
101 			kprintf("hammer: System has insufficient buffers "
102 				"to rebalance the tree.  nbuf < %d\n",
103 				HAMMER_REBALANCE_MIN_BUFS);
104 			error = ENOSPC;
105 		}
106 		if (error == 0) {
107 			error = hammer_ioc_rebalance(&trans, ip,
108 					(struct hammer_ioc_rebalance *)data);
109 		}
110 		break;
111 	case HAMMERIOC_SYNCTID:
112 		error = hammer_ioc_synctid(&trans, ip,
113 					(struct hammer_ioc_synctid *)data);
114 		break;
115 	case HAMMERIOC_GET_PSEUDOFS:
116 		error = hammer_ioc_get_pseudofs(&trans, ip,
117 				    (struct hammer_ioc_pseudofs_rw *)data);
118 		break;
119 	case HAMMERIOC_SET_PSEUDOFS:
120 		if (error == 0) {
121 			error = hammer_ioc_set_pseudofs(&trans, ip, cred,
122 				    (struct hammer_ioc_pseudofs_rw *)data);
123 		}
124 		break;
125 	case HAMMERIOC_UPG_PSEUDOFS:
126 		if (error == 0) {
127 			error = hammer_ioc_upgrade_pseudofs(&trans, ip,
128 				    (struct hammer_ioc_pseudofs_rw *)data);
129 		}
130 		break;
131 	case HAMMERIOC_DGD_PSEUDOFS:
132 		if (error == 0) {
133 			error = hammer_ioc_downgrade_pseudofs(&trans, ip,
134 				    (struct hammer_ioc_pseudofs_rw *)data);
135 		}
136 		break;
137 	case HAMMERIOC_RMR_PSEUDOFS:
138 		if (error == 0) {
139 			error = hammer_ioc_destroy_pseudofs(&trans, ip,
140 				    (struct hammer_ioc_pseudofs_rw *)data);
141 		}
142 		break;
143 	case HAMMERIOC_WAI_PSEUDOFS:
144 		if (error == 0) {
145 			error = hammer_ioc_wait_pseudofs(&trans, ip,
146 				    (struct hammer_ioc_pseudofs_rw *)data);
147 		}
148 		break;
149 	case HAMMERIOC_MIRROR_READ:
150 		if (error == 0) {
151 			error = hammer_ioc_mirror_read(&trans, ip,
152 				    (struct hammer_ioc_mirror_rw *)data);
153 		}
154 		break;
155 	case HAMMERIOC_MIRROR_WRITE:
156 		if (error == 0) {
157 			error = hammer_ioc_mirror_write(&trans, ip,
158 				    (struct hammer_ioc_mirror_rw *)data);
159 		}
160 		break;
161 	case HAMMERIOC_GET_VERSION:
162 		error = hammer_ioc_get_version(&trans, ip,
163 				    (struct hammer_ioc_version *)data);
164 		break;
165 	case HAMMERIOC_GET_INFO:
166 		error = hammer_ioc_get_info(&trans,
167 				    (struct hammer_ioc_info *)data);
168 		break;
169 	case HAMMERIOC_SET_VERSION:
170 		if (error == 0) {
171 			error = hammer_ioc_set_version(&trans, ip,
172 					    (struct hammer_ioc_version *)data);
173 		}
174 		break;
175 	case HAMMERIOC_ADD_VOLUME:
176 		if (error == 0) {
177 			error = priv_check_cred(cred, PRIV_HAMMER_VOLUME, 0);
178 			if (error == 0)
179 				error = hammer_ioc_volume_add(&trans, ip,
180 					    (struct hammer_ioc_volume *)data);
181 		}
182 		break;
183 	case HAMMERIOC_DEL_VOLUME:
184 		if (error == 0) {
185 			error = priv_check_cred(cred, PRIV_HAMMER_VOLUME, 0);
186 			if (error == 0)
187 				error = hammer_ioc_volume_del(&trans, ip,
188 					    (struct hammer_ioc_volume *)data);
189 		}
190 		break;
191 	case HAMMERIOC_LIST_VOLUMES:
192 		error = hammer_ioc_volume_list(&trans, ip,
193 		    (struct hammer_ioc_volume_list *)data);
194 		break;
195 	case HAMMERIOC_ADD_SNAPSHOT:
196 		if (error == 0) {
197 			error = hammer_ioc_add_snapshot(
198 					&trans, ip, (struct hammer_ioc_snapshot *)data);
199 		}
200 		break;
201 	case HAMMERIOC_DEL_SNAPSHOT:
202 		if (error == 0) {
203 			error = hammer_ioc_del_snapshot(
204 					&trans, ip, (struct hammer_ioc_snapshot *)data);
205 		}
206 		break;
207 	case HAMMERIOC_GET_SNAPSHOT:
208 		error = hammer_ioc_get_snapshot(
209 					&trans, ip, (struct hammer_ioc_snapshot *)data);
210 		break;
211 	case HAMMERIOC_GET_CONFIG:
212 		error = hammer_ioc_get_config(
213 					&trans, ip, (struct hammer_ioc_config *)data);
214 		break;
215 	case HAMMERIOC_SET_CONFIG:
216 		if (error == 0) {
217 			error = hammer_ioc_set_config(
218 					&trans, ip, (struct hammer_ioc_config *)data);
219 		}
220 		break;
221 	case HAMMERIOC_DEDUP:
222 		if (error == 0) {
223 			error = hammer_ioc_dedup(
224 					&trans, ip, (struct hammer_ioc_dedup *)data);
225 		}
226 		break;
227 	case HAMMERIOC_GET_DATA:
228 		if (error == 0) {
229 			error = hammer_ioc_get_data(
230 					&trans, ip, (struct hammer_ioc_data *)data);
231 		}
232 		break;
233 	case HAMMERIOC_PFS_ITERATE:
234 		error = hammer_ioc_pfs_iterate(
235 			&trans, (struct hammer_ioc_pfs_iterate *)data);
236 		break;
237 	default:
238 		error = EOPNOTSUPP;
239 		break;
240 	}
241 	hammer_done_transaction(&trans);
242 	return (error);
243 }
244 
245 /*
246  * Iterate through an object's inode or an object's records and record
247  * modification TIDs.
248  */
249 static void add_history(hammer_inode_t ip, struct hammer_ioc_history *hist,
250 			hammer_btree_elm_t elm);
251 
252 static
253 int
254 hammer_ioc_gethistory(hammer_transaction_t trans, hammer_inode_t ip,
255 		      struct hammer_ioc_history *hist)
256 {
257 	struct hammer_cursor cursor;
258 	hammer_btree_elm_t elm;
259 	int error;
260 
261 	/*
262 	 * Validate the structure and initialize for return.
263 	 */
264 	if (hist->beg_tid > hist->end_tid)
265 		return(EINVAL);
266 	if (hist->head.flags & HAMMER_IOC_HISTORY_ATKEY) {
267 		if (hist->key > hist->nxt_key)
268 			return(EINVAL);
269 	}
270 
271 	hist->obj_id = ip->obj_id;
272 	hist->count = 0;
273 	hist->nxt_tid = hist->end_tid;
274 	hist->head.flags &= ~HAMMER_IOC_HISTORY_NEXT_TID;
275 	hist->head.flags &= ~HAMMER_IOC_HISTORY_NEXT_KEY;
276 	hist->head.flags &= ~HAMMER_IOC_HISTORY_EOF;
277 	hist->head.flags &= ~HAMMER_IOC_HISTORY_UNSYNCED;
278 	if ((ip->flags & HAMMER_INODE_MODMASK) &
279 	    ~(HAMMER_INODE_ATIME | HAMMER_INODE_MTIME)) {
280 		hist->head.flags |= HAMMER_IOC_HISTORY_UNSYNCED;
281 	}
282 
283 	/*
284 	 * Setup the cursor.  We can't handle undeletable records
285 	 * (create_tid of 0) at the moment.  A create_tid of 0 has
286 	 * a special meaning and cannot be specified in the cursor.
287 	 */
288 	error = hammer_init_cursor(trans, &cursor, &ip->cache[0], NULL);
289 	if (error) {
290 		hammer_done_cursor(&cursor);
291 		return(error);
292 	}
293 
294 	cursor.key_beg.obj_id = hist->obj_id;
295 	cursor.key_beg.create_tid = hist->beg_tid;
296 	cursor.key_beg.delete_tid = 0;
297 	cursor.key_beg.obj_type = 0;
298 	if (cursor.key_beg.create_tid == HAMMER_MIN_TID)
299 		cursor.key_beg.create_tid = 1;
300 
301 	cursor.key_end.obj_id = hist->obj_id;
302 	cursor.key_end.create_tid = hist->end_tid;
303 	cursor.key_end.delete_tid = 0;
304 	cursor.key_end.obj_type = 0;
305 
306 	cursor.flags |= HAMMER_CURSOR_END_EXCLUSIVE;
307 
308 	if (hist->head.flags & HAMMER_IOC_HISTORY_ATKEY) {
309 		/*
310 		 * key-range within the file.  For a regular file the
311 		 * on-disk key represents BASE+LEN, not BASE, so the
312 		 * first possible record containing the offset 'key'
313 		 * has an on-disk key of (key + 1).
314 		 */
315 		cursor.key_beg.key = hist->key;
316 		cursor.key_end.key = HAMMER_MAX_KEY;
317 		cursor.key_beg.localization = ip->obj_localization +
318 					      HAMMER_LOCALIZE_MISC;
319 		cursor.key_end.localization = ip->obj_localization +
320 					      HAMMER_LOCALIZE_MISC;
321 
322 		switch(ip->ino_data.obj_type) {
323 		case HAMMER_OBJTYPE_REGFILE:
324 			++cursor.key_beg.key;
325 			cursor.key_beg.rec_type = HAMMER_RECTYPE_DATA;
326 			break;
327 		case HAMMER_OBJTYPE_DIRECTORY:
328 			cursor.key_beg.rec_type = HAMMER_RECTYPE_DIRENTRY;
329 			cursor.key_beg.localization = ip->obj_localization +
330 						hammer_dir_localization(ip);
331 			cursor.key_end.localization = ip->obj_localization +
332 						hammer_dir_localization(ip);
333 			break;
334 		case HAMMER_OBJTYPE_DBFILE:
335 			cursor.key_beg.rec_type = HAMMER_RECTYPE_DB;
336 			break;
337 		default:
338 			error = EINVAL;
339 			break;
340 		}
341 		cursor.key_end.rec_type = cursor.key_beg.rec_type;
342 	} else {
343 		/*
344 		 * The inode itself.
345 		 */
346 		cursor.key_beg.key = 0;
347 		cursor.key_end.key = 0;
348 		cursor.key_beg.rec_type = HAMMER_RECTYPE_INODE;
349 		cursor.key_end.rec_type = HAMMER_RECTYPE_INODE;
350 		cursor.key_beg.localization = ip->obj_localization +
351 					      HAMMER_LOCALIZE_INODE;
352 		cursor.key_end.localization = ip->obj_localization +
353 					      HAMMER_LOCALIZE_INODE;
354 	}
355 
356 	error = hammer_btree_first(&cursor);
357 	while (error == 0) {
358 		elm = &cursor.node->ondisk->elms[cursor.index];
359 
360 		add_history(ip, hist, elm);
361 		if (hist->head.flags & (HAMMER_IOC_HISTORY_NEXT_TID |
362 				        HAMMER_IOC_HISTORY_NEXT_KEY |
363 				        HAMMER_IOC_HISTORY_EOF)) {
364 			break;
365 		}
366 		error = hammer_btree_iterate(&cursor);
367 	}
368 	if (error == ENOENT) {
369 		hist->head.flags |= HAMMER_IOC_HISTORY_EOF;
370 		error = 0;
371 	}
372 	hammer_done_cursor(&cursor);
373 	return(error);
374 }
375 
376 /*
377  * Add the scanned element to the ioctl return structure.  Some special
378  * casing is required for regular files to accomodate how data ranges are
379  * stored on-disk.
380  */
381 static void
382 add_history(hammer_inode_t ip, struct hammer_ioc_history *hist,
383 	    hammer_btree_elm_t elm)
384 {
385 	int i;
386 
387 	if (elm->base.btype != HAMMER_BTREE_TYPE_RECORD)
388 		return;
389 	if ((hist->head.flags & HAMMER_IOC_HISTORY_ATKEY) &&
390 	    ip->ino_data.obj_type == HAMMER_OBJTYPE_REGFILE) {
391 		/*
392 		 * Adjust nxt_key
393 		 */
394 		if (hist->nxt_key > elm->leaf.base.key - elm->leaf.data_len &&
395 		    hist->key < elm->leaf.base.key - elm->leaf.data_len) {
396 			hist->nxt_key = elm->leaf.base.key - elm->leaf.data_len;
397 		}
398 		if (hist->nxt_key > elm->leaf.base.key)
399 			hist->nxt_key = elm->leaf.base.key;
400 
401 		/*
402 		 * Record is beyond MAXPHYS, there won't be any more records
403 		 * in the iteration covering the requested offset (key).
404 		 */
405 		if (elm->leaf.base.key >= MAXPHYS &&
406 		    elm->leaf.base.key - MAXPHYS > hist->key) {
407 			hist->head.flags |= HAMMER_IOC_HISTORY_NEXT_KEY;
408 		}
409 
410 		/*
411 		 * Data-range of record does not cover the key.
412 		 */
413 		if (elm->leaf.base.key - elm->leaf.data_len > hist->key)
414 			return;
415 
416 	} else if (hist->head.flags & HAMMER_IOC_HISTORY_ATKEY) {
417 		/*
418 		 * Adjust nxt_key
419 		 */
420 		if (hist->nxt_key > elm->leaf.base.key &&
421 		    hist->key < elm->leaf.base.key) {
422 			hist->nxt_key = elm->leaf.base.key;
423 		}
424 
425 		/*
426 		 * Record is beyond the requested key.
427 		 */
428 		if (elm->leaf.base.key > hist->key)
429 			hist->head.flags |= HAMMER_IOC_HISTORY_NEXT_KEY;
430 	}
431 
432 	/*
433 	 * Add create_tid if it is in-bounds.
434 	 */
435 	i = hist->count;
436 	if ((i == 0 ||
437 	     elm->leaf.base.create_tid != hist->hist_ary[i - 1].tid) &&
438 	    elm->leaf.base.create_tid >= hist->beg_tid &&
439 	    elm->leaf.base.create_tid < hist->end_tid) {
440 		if (hist->count == HAMMER_MAX_HISTORY_ELMS) {
441 			hist->nxt_tid = elm->leaf.base.create_tid;
442 			hist->head.flags |= HAMMER_IOC_HISTORY_NEXT_TID;
443 			return;
444 		}
445 		hist->hist_ary[i].tid = elm->leaf.base.create_tid;
446 		hist->hist_ary[i].time32 = elm->leaf.create_ts;
447 		++hist->count;
448 	}
449 
450 	/*
451 	 * Add delete_tid if it is in-bounds.  Note that different portions
452 	 * of the history may have overlapping data ranges with different
453 	 * delete_tid's.  If this case occurs the delete_tid may match the
454 	 * create_tid of a following record.  XXX
455 	 *
456 	 *	[        ]
457 	 *            [     ]
458 	 */
459 	i = hist->count;
460 	if (elm->leaf.base.delete_tid &&
461 	    elm->leaf.base.delete_tid >= hist->beg_tid &&
462 	    elm->leaf.base.delete_tid < hist->end_tid) {
463 		if (i == HAMMER_MAX_HISTORY_ELMS) {
464 			hist->nxt_tid = elm->leaf.base.delete_tid;
465 			hist->head.flags |= HAMMER_IOC_HISTORY_NEXT_TID;
466 			return;
467 		}
468 		hist->hist_ary[i].tid = elm->leaf.base.delete_tid;
469 		hist->hist_ary[i].time32 = elm->leaf.delete_ts;
470 		++hist->count;
471 	}
472 }
473 
474 /*
475  * Acquire synchronization TID
476  */
477 static
478 int
479 hammer_ioc_synctid(hammer_transaction_t trans, hammer_inode_t ip,
480 		   struct hammer_ioc_synctid *std)
481 {
482 	hammer_mount_t hmp = ip->hmp;
483 	int error = 0;
484 
485 	switch(std->op) {
486 	case HAMMER_SYNCTID_NONE:
487 		std->tid = hmp->flusher.tid;	/* inaccurate */
488 		break;
489 	case HAMMER_SYNCTID_ASYNC:
490 		hammer_queue_inodes_flusher(hmp, MNT_NOWAIT);
491 		hammer_flusher_async(hmp, NULL);
492 		std->tid = hmp->flusher.tid;	/* inaccurate */
493 		break;
494 	case HAMMER_SYNCTID_SYNC1:
495 		hammer_queue_inodes_flusher(hmp, MNT_WAIT);
496 		hammer_flusher_sync(hmp);
497 		std->tid = hmp->flusher.tid;
498 		break;
499 	case HAMMER_SYNCTID_SYNC2:
500 		hammer_queue_inodes_flusher(hmp, MNT_WAIT);
501 		hammer_flusher_sync(hmp);
502 		std->tid = hmp->flusher.tid;
503 		hammer_flusher_sync(hmp);
504 		break;
505 	default:
506 		error = EOPNOTSUPP;
507 		break;
508 	}
509 	return(error);
510 }
511 
512 /*
513  * Retrieve version info.
514  *
515  * Load min_version, wip_version, and max_versino.  If cur_version is passed
516  * as 0 then load the current version into cur_version.  Load the description
517  * for cur_version into the description array.
518  *
519  * Returns 0 on success, EINVAL if cur_version is non-zero and set to an
520  * unsupported value.
521  */
522 static
523 int
524 hammer_ioc_get_version(hammer_transaction_t trans, hammer_inode_t ip,
525 		   struct hammer_ioc_version *ver)
526 {
527 	int error = 0;
528 
529 	ver->min_version = HAMMER_VOL_VERSION_MIN;
530 	ver->wip_version = HAMMER_VOL_VERSION_WIP;
531 	ver->max_version = HAMMER_VOL_VERSION_MAX;
532 	if (ver->cur_version == 0)
533 		ver->cur_version = trans->hmp->version;
534 	switch(ver->cur_version) {
535 	case 1:
536 		ksnprintf(ver->description, sizeof(ver->description),
537 			 "First HAMMER release (DragonFly 2.0+)");
538 		break;
539 	case 2:
540 		ksnprintf(ver->description, sizeof(ver->description),
541 			 "New directory entry layout (DragonFly 2.3+)");
542 		break;
543 	case 3:
544 		ksnprintf(ver->description, sizeof(ver->description),
545 			 "New snapshot management (DragonFly 2.5+)");
546 		break;
547 	case 4:
548 		ksnprintf(ver->description, sizeof(ver->description),
549 			 "New undo/flush, faster flush/sync (DragonFly 2.5+)");
550 		break;
551 	case 5:
552 		ksnprintf(ver->description, sizeof(ver->description),
553 			 "Adjustments for dedup support (DragonFly 2.9+)");
554 		break;
555 	case 6:
556 		ksnprintf(ver->description, sizeof(ver->description),
557 			  "Directory Hash ALG1 (tmp/rename resistance)");
558 		break;
559 	default:
560 		ksnprintf(ver->description, sizeof(ver->description),
561 			 "Unknown");
562 		error = EINVAL;
563 		break;
564 	}
565 	return(error);
566 };
567 
568 /*
569  * Set version info
570  */
571 static
572 int
573 hammer_ioc_set_version(hammer_transaction_t trans, hammer_inode_t ip,
574 		   struct hammer_ioc_version *ver)
575 {
576 	hammer_mount_t hmp = trans->hmp;
577 	struct hammer_cursor cursor;
578 	hammer_volume_t volume;
579 	int error;
580 	int over = hmp->version;
581 
582 	/*
583 	 * Generally do not allow downgrades.  However, version 4 can
584 	 * be downgraded to version 3.
585 	 */
586 	if (ver->cur_version < hmp->version) {
587 		if (!(ver->cur_version == 3 && hmp->version == 4))
588 			return(EINVAL);
589 	}
590 	if (ver->cur_version == hmp->version)
591 		return(0);
592 	if (ver->cur_version > HAMMER_VOL_VERSION_MAX)
593 		return(EINVAL);
594 	if (hmp->ronly)
595 		return(EROFS);
596 
597 	/*
598 	 * Update the root volume header and the version cached in
599 	 * the hammer_mount structure.
600 	 */
601 	error = hammer_init_cursor(trans, &cursor, NULL, NULL);
602 	if (error)
603 		goto failed;
604 	hammer_lock_ex(&hmp->flusher.finalize_lock);
605 	hammer_sync_lock_ex(trans);
606 	hmp->version = ver->cur_version;
607 
608 	/*
609 	 * If upgrading from version < 4 to version >= 4 the UNDO FIFO
610 	 * must be reinitialized.
611 	 */
612 	if (over < HAMMER_VOL_VERSION_FOUR &&
613 	    ver->cur_version >= HAMMER_VOL_VERSION_FOUR) {
614 		kprintf("upgrade undo to version 4\n");
615 		error = hammer_upgrade_undo_4(trans);
616 		if (error)
617 			goto failed;
618 	}
619 
620 	/*
621 	 * Adjust the version in the volume header
622 	 */
623 	volume = hammer_get_root_volume(hmp, &error);
624 	KKASSERT(error == 0);
625 	hammer_modify_volume_field(cursor.trans, volume, vol_version);
626 	volume->ondisk->vol_version = ver->cur_version;
627 	hammer_modify_volume_done(volume);
628 	hammer_rel_volume(volume, 0);
629 
630 	hammer_sync_unlock(trans);
631 	hammer_unlock(&hmp->flusher.finalize_lock);
632 failed:
633 	ver->head.error = error;
634 	hammer_done_cursor(&cursor);
635 	return(0);
636 }
637 
638 /*
639  * Get information
640  */
641 static
642 int
643 hammer_ioc_get_info(hammer_transaction_t trans, struct hammer_ioc_info *info) {
644 
645 	struct hammer_volume_ondisk	*od = trans->hmp->rootvol->ondisk;
646 	struct hammer_mount 		*hm = trans->hmp;
647 
648 	/* Fill the structure with the necessary information */
649 	_hammer_checkspace(hm, HAMMER_CHKSPC_WRITE, &info->rsvbigblocks);
650 	info->rsvbigblocks = info->rsvbigblocks >> HAMMER_LARGEBLOCK_BITS;
651 	strlcpy(info->vol_name, od->vol_name, sizeof(od->vol_name));
652 
653 	info->vol_fsid = hm->fsid;
654 	info->vol_fstype = od->vol_fstype;
655 	info->version = hm->version;
656 
657 	info->inodes = od->vol0_stat_inodes;
658 	info->bigblocks = od->vol0_stat_bigblocks;
659 	info->freebigblocks = od->vol0_stat_freebigblocks;
660 	info->nvolumes = hm->nvolumes;
661 
662 	return 0;
663 }
664 
665 /*
666  * Add a snapshot transction id(s) to the list of snapshots.
667  *
668  * NOTE: Records are created with an allocated TID.  If a flush cycle
669  *	 is in progress the record may be synced in the current flush
670  *	 cycle and the volume header will reflect the allocation of the
671  *	 TID, but the synchronization point may not catch up to the
672  *	 TID until the next flush cycle.
673  */
674 static
675 int
676 hammer_ioc_add_snapshot(hammer_transaction_t trans, hammer_inode_t ip,
677 			struct hammer_ioc_snapshot *snap)
678 {
679 	hammer_mount_t hmp = ip->hmp;
680 	struct hammer_btree_leaf_elm leaf;
681 	struct hammer_cursor cursor;
682 	int error;
683 
684 	/*
685 	 * Validate structure
686 	 */
687 	if (snap->count > HAMMER_SNAPS_PER_IOCTL)
688 		return (EINVAL);
689 	if (snap->index > snap->count)
690 		return (EINVAL);
691 
692 	hammer_lock_ex(&hmp->snapshot_lock);
693 again:
694 	/*
695 	 * Look for keys starting after the previous iteration, or at
696 	 * the beginning if snap->count is 0.
697 	 */
698 	error = hammer_init_cursor(trans, &cursor, &ip->cache[0], NULL);
699 	if (error) {
700 		hammer_done_cursor(&cursor);
701 		return(error);
702 	}
703 
704 	cursor.asof = HAMMER_MAX_TID;
705 	cursor.flags |= HAMMER_CURSOR_BACKEND | HAMMER_CURSOR_ASOF;
706 
707 	bzero(&leaf, sizeof(leaf));
708 	leaf.base.obj_id = HAMMER_OBJID_ROOT;
709 	leaf.base.rec_type = HAMMER_RECTYPE_SNAPSHOT;
710 	leaf.base.create_tid = hammer_alloc_tid(hmp, 1);
711 	leaf.base.btype = HAMMER_BTREE_TYPE_RECORD;
712 	leaf.base.localization = ip->obj_localization + HAMMER_LOCALIZE_INODE;
713 	leaf.data_len = sizeof(struct hammer_snapshot_data);
714 
715 	while (snap->index < snap->count) {
716 		leaf.base.key = (int64_t)snap->snaps[snap->index].tid;
717 		cursor.key_beg = leaf.base;
718 		error = hammer_btree_lookup(&cursor);
719 		if (error == 0) {
720 			error = EEXIST;
721 			break;
722 		}
723 
724 		/*
725 		 * NOTE: Must reload key_beg after an ASOF search because
726 		 *	 the create_tid may have been modified during the
727 		 *	 search.
728 		 */
729 		cursor.flags &= ~HAMMER_CURSOR_ASOF;
730 		cursor.key_beg = leaf.base;
731 		error = hammer_create_at_cursor(&cursor, &leaf,
732 						&snap->snaps[snap->index],
733 						HAMMER_CREATE_MODE_SYS);
734 		if (error == EDEADLK) {
735 			hammer_done_cursor(&cursor);
736 			goto again;
737 		}
738 		cursor.flags |= HAMMER_CURSOR_ASOF;
739 		if (error)
740 			break;
741 		++snap->index;
742 	}
743 	snap->head.error = error;
744 	hammer_done_cursor(&cursor);
745 	hammer_unlock(&hmp->snapshot_lock);
746 	return(0);
747 }
748 
749 /*
750  * Delete snapshot transaction id(s) from the list of snapshots.
751  */
752 static
753 int
754 hammer_ioc_del_snapshot(hammer_transaction_t trans, hammer_inode_t ip,
755 			struct hammer_ioc_snapshot *snap)
756 {
757 	hammer_mount_t hmp = ip->hmp;
758 	struct hammer_cursor cursor;
759 	int error;
760 
761 	/*
762 	 * Validate structure
763 	 */
764 	if (snap->count > HAMMER_SNAPS_PER_IOCTL)
765 		return (EINVAL);
766 	if (snap->index > snap->count)
767 		return (EINVAL);
768 
769 	hammer_lock_ex(&hmp->snapshot_lock);
770 again:
771 	/*
772 	 * Look for keys starting after the previous iteration, or at
773 	 * the beginning if snap->count is 0.
774 	 */
775 	error = hammer_init_cursor(trans, &cursor, &ip->cache[0], NULL);
776 	if (error) {
777 		hammer_done_cursor(&cursor);
778 		return(error);
779 	}
780 
781 	cursor.key_beg.obj_id = HAMMER_OBJID_ROOT;
782 	cursor.key_beg.create_tid = 0;
783 	cursor.key_beg.delete_tid = 0;
784 	cursor.key_beg.obj_type = 0;
785 	cursor.key_beg.rec_type = HAMMER_RECTYPE_SNAPSHOT;
786 	cursor.key_beg.localization = ip->obj_localization + HAMMER_LOCALIZE_INODE;
787 	cursor.asof = HAMMER_MAX_TID;
788 	cursor.flags |= HAMMER_CURSOR_ASOF;
789 
790 	while (snap->index < snap->count) {
791 		cursor.key_beg.key = (int64_t)snap->snaps[snap->index].tid;
792 		error = hammer_btree_lookup(&cursor);
793 		if (error)
794 			break;
795 		error = hammer_btree_extract(&cursor, HAMMER_CURSOR_GET_LEAF);
796 		if (error)
797 			break;
798 		error = hammer_delete_at_cursor(&cursor, HAMMER_DELETE_DESTROY,
799 						0, 0, 0, NULL);
800 		if (error == EDEADLK) {
801 			hammer_done_cursor(&cursor);
802 			goto again;
803 		}
804 		if (error)
805 			break;
806 		++snap->index;
807 	}
808 	snap->head.error = error;
809 	hammer_done_cursor(&cursor);
810 	hammer_unlock(&hmp->snapshot_lock);
811 	return(0);
812 }
813 
814 /*
815  * Retrieve as many snapshot ids as possible or until the array is
816  * full, starting after the last transction id passed in.  If count
817  * is 0 we retrieve starting at the beginning.
818  *
819  * NOTE: Because the b-tree key field is signed but transaction ids
820  *       are unsigned the returned list will be signed-sorted instead
821  *	 of unsigned sorted.  The Caller must still sort the aggregate
822  *	 results.
823  */
824 static
825 int
826 hammer_ioc_get_snapshot(hammer_transaction_t trans, hammer_inode_t ip,
827 			struct hammer_ioc_snapshot *snap)
828 {
829 	struct hammer_cursor cursor;
830 	int error;
831 
832 	/*
833 	 * Validate structure
834 	 */
835 	if (snap->index != 0)
836 		return (EINVAL);
837 	if (snap->count > HAMMER_SNAPS_PER_IOCTL)
838 		return (EINVAL);
839 
840 	/*
841 	 * Look for keys starting after the previous iteration, or at
842 	 * the beginning if snap->count is 0.
843 	 */
844 	error = hammer_init_cursor(trans, &cursor, &ip->cache[0], NULL);
845 	if (error) {
846 		hammer_done_cursor(&cursor);
847 		return(error);
848 	}
849 
850 	cursor.key_beg.obj_id = HAMMER_OBJID_ROOT;
851 	cursor.key_beg.create_tid = 0;
852 	cursor.key_beg.delete_tid = 0;
853 	cursor.key_beg.obj_type = 0;
854 	cursor.key_beg.rec_type = HAMMER_RECTYPE_SNAPSHOT;
855 	cursor.key_beg.localization = ip->obj_localization + HAMMER_LOCALIZE_INODE;
856 	if (snap->count == 0)
857 		cursor.key_beg.key = HAMMER_MIN_KEY;
858 	else
859 		cursor.key_beg.key = (int64_t)snap->snaps[snap->count - 1].tid + 1;
860 
861 	cursor.key_end = cursor.key_beg;
862 	cursor.key_end.key = HAMMER_MAX_KEY;
863 	cursor.asof = HAMMER_MAX_TID;
864 	cursor.flags |= HAMMER_CURSOR_END_EXCLUSIVE | HAMMER_CURSOR_ASOF;
865 
866 	snap->count = 0;
867 
868 	error = hammer_btree_first(&cursor);
869 	while (error == 0 && snap->count < HAMMER_SNAPS_PER_IOCTL) {
870 		error = hammer_btree_extract(&cursor, HAMMER_CURSOR_GET_LEAF);
871 		if (error)
872 			break;
873 		if (cursor.leaf->base.rec_type == HAMMER_RECTYPE_SNAPSHOT) {
874 			error = hammer_btree_extract(
875 					     &cursor, HAMMER_CURSOR_GET_LEAF |
876 						      HAMMER_CURSOR_GET_DATA);
877 			snap->snaps[snap->count] = cursor.data->snap;
878 
879 			/*
880 			 * The snap data tid should match the key but might
881 			 * not due to a bug in the HAMMER v3 conversion code.
882 			 *
883 			 * This error will work itself out over time but we
884 			 * have to force a match or the snapshot will not
885 			 * be deletable.
886 			 */
887 			if (cursor.data->snap.tid !=
888 			    (hammer_tid_t)cursor.leaf->base.key) {
889 				kprintf("HAMMER: lo=%08x snapshot key "
890 					"0x%016jx data mismatch 0x%016jx\n",
891 					cursor.key_beg.localization,
892 					(uintmax_t)cursor.data->snap.tid,
893 					cursor.leaf->base.key);
894 				kprintf("HAMMER: Probably left over from the "
895 					"original v3 conversion, hammer "
896 					"cleanup should get it eventually\n");
897 				snap->snaps[snap->count].tid =
898 					cursor.leaf->base.key;
899 			}
900 			++snap->count;
901 		}
902 		error = hammer_btree_iterate(&cursor);
903 	}
904 
905 	if (error == ENOENT) {
906 		snap->head.flags |= HAMMER_IOC_SNAPSHOT_EOF;
907 		error = 0;
908 	}
909 	snap->head.error = error;
910 	hammer_done_cursor(&cursor);
911 	return(0);
912 }
913 
914 /*
915  * Retrieve the PFS hammer cleanup utility config record.  This is
916  * different (newer than) the PFS config.
917  */
918 static
919 int
920 hammer_ioc_get_config(hammer_transaction_t trans, hammer_inode_t ip,
921 			struct hammer_ioc_config *config)
922 {
923 	struct hammer_cursor cursor;
924 	int error;
925 
926 	error = hammer_init_cursor(trans, &cursor, &ip->cache[0], NULL);
927 	if (error) {
928 		hammer_done_cursor(&cursor);
929 		return(error);
930 	}
931 
932 	cursor.key_beg.obj_id = HAMMER_OBJID_ROOT;
933 	cursor.key_beg.create_tid = 0;
934 	cursor.key_beg.delete_tid = 0;
935 	cursor.key_beg.obj_type = 0;
936 	cursor.key_beg.rec_type = HAMMER_RECTYPE_CONFIG;
937 	cursor.key_beg.localization = ip->obj_localization + HAMMER_LOCALIZE_INODE;
938 	cursor.key_beg.key = 0;		/* config space page 0 */
939 
940 	cursor.asof = HAMMER_MAX_TID;
941 	cursor.flags |= HAMMER_CURSOR_ASOF;
942 
943 	error = hammer_btree_lookup(&cursor);
944 	if (error == 0) {
945 		error = hammer_btree_extract(&cursor, HAMMER_CURSOR_GET_LEAF |
946 						      HAMMER_CURSOR_GET_DATA);
947 		if (error == 0)
948 			config->config = cursor.data->config;
949 	}
950 	/* error can be ENOENT */
951 	config->head.error = error;
952 	hammer_done_cursor(&cursor);
953 	return(0);
954 }
955 
956 /*
957  * Retrieve the PFS hammer cleanup utility config record.  This is
958  * different (newer than) the PFS config.
959  *
960  * This is kinda a hack.
961  */
962 static
963 int
964 hammer_ioc_set_config(hammer_transaction_t trans, hammer_inode_t ip,
965 			struct hammer_ioc_config *config)
966 {
967 	struct hammer_btree_leaf_elm leaf;
968 	struct hammer_cursor cursor;
969 	hammer_mount_t hmp = ip->hmp;
970 	int error;
971 
972 again:
973 	error = hammer_init_cursor(trans, &cursor, &ip->cache[0], NULL);
974 	if (error) {
975 		hammer_done_cursor(&cursor);
976 		return(error);
977 	}
978 
979 	bzero(&leaf, sizeof(leaf));
980 	leaf.base.obj_id = HAMMER_OBJID_ROOT;
981 	leaf.base.rec_type = HAMMER_RECTYPE_CONFIG;
982 	leaf.base.create_tid = hammer_alloc_tid(hmp, 1);
983 	leaf.base.btype = HAMMER_BTREE_TYPE_RECORD;
984 	leaf.base.localization = ip->obj_localization + HAMMER_LOCALIZE_INODE;
985 	leaf.base.key = 0;	/* page 0 */
986 	leaf.data_len = sizeof(struct hammer_config_data);
987 
988 	cursor.key_beg = leaf.base;
989 
990 	cursor.asof = HAMMER_MAX_TID;
991 	cursor.flags |= HAMMER_CURSOR_BACKEND | HAMMER_CURSOR_ASOF;
992 
993 	error = hammer_btree_lookup(&cursor);
994 	if (error == 0) {
995 		error = hammer_btree_extract(&cursor, HAMMER_CURSOR_GET_LEAF |
996 						      HAMMER_CURSOR_GET_DATA);
997 		error = hammer_delete_at_cursor(&cursor, HAMMER_DELETE_DESTROY,
998 						0, 0, 0, NULL);
999 		if (error == EDEADLK) {
1000 			hammer_done_cursor(&cursor);
1001 			goto again;
1002 		}
1003 	}
1004 	if (error == ENOENT)
1005 		error = 0;
1006 	if (error == 0) {
1007 		/*
1008 		 * NOTE: Must reload key_beg after an ASOF search because
1009 		 *	 the create_tid may have been modified during the
1010 		 *	 search.
1011 		 */
1012 		cursor.flags &= ~HAMMER_CURSOR_ASOF;
1013 		cursor.key_beg = leaf.base;
1014 		error = hammer_create_at_cursor(&cursor, &leaf,
1015 						&config->config,
1016 						HAMMER_CREATE_MODE_SYS);
1017 		if (error == EDEADLK) {
1018 			hammer_done_cursor(&cursor);
1019 			goto again;
1020 		}
1021 	}
1022 	config->head.error = error;
1023 	hammer_done_cursor(&cursor);
1024 	return(0);
1025 }
1026 
1027 static
1028 int
1029 hammer_ioc_pfs_iterate(hammer_transaction_t trans,
1030     struct hammer_ioc_pfs_iterate *pi)
1031 {
1032 	struct hammer_cursor cursor;
1033 	hammer_inode_t ip;
1034 	int error;
1035 
1036 	ip = hammer_get_inode(trans, NULL, HAMMER_OBJID_ROOT, HAMMER_MAX_TID,
1037 	    HAMMER_DEF_LOCALIZATION, 0, &error);
1038 
1039 	error = hammer_init_cursor(trans, &cursor,
1040 	    (ip ? &ip->cache[1] : NULL), ip);
1041 	if (error)
1042 		goto out;
1043 
1044 	pi->head.flags &= ~HAMMER_PFSD_DELETED;
1045 
1046 	cursor.key_beg.localization = HAMMER_DEF_LOCALIZATION +
1047 	    HAMMER_LOCALIZE_MISC;
1048 	cursor.key_beg.obj_id = HAMMER_OBJID_ROOT;
1049 	cursor.key_beg.create_tid = 0;
1050 	cursor.key_beg.delete_tid = 0;
1051 	cursor.key_beg.rec_type = HAMMER_RECTYPE_PFS;
1052 	cursor.key_beg.obj_type = 0;
1053 	cursor.key_end = cursor.key_beg;
1054 	cursor.key_end.key = HAMMER_MAX_KEY;
1055 	cursor.asof = HAMMER_MAX_TID;
1056 	cursor.flags |= HAMMER_CURSOR_ASOF;
1057 
1058 	if (pi->pos < 0)	/* Sanity check */
1059 		pi->pos = 0;
1060 
1061 	pi->pos <<= 16;
1062 	cursor.key_beg.key = pi->pos;
1063 	error = hammer_ip_lookup(&cursor);
1064 
1065 	if (error == 0) {
1066 		error = hammer_ip_resolve_data(&cursor);
1067 		if (error)
1068 			goto out;
1069 		if (cursor.data->pfsd.mirror_flags & HAMMER_PFSD_DELETED)
1070 			pi->head.flags |= HAMMER_PFSD_DELETED;
1071 		else
1072 			copyout(cursor.data, pi->ondisk, cursor.leaf->data_len);
1073 		pi->pos = (u_int32_t)(cursor.leaf->base.key >> 16);
1074 	}
1075 
1076 out:
1077 	hammer_done_cursor(&cursor);
1078 	if (ip)
1079 		hammer_rel_inode(ip, 0);
1080 
1081 	return (error);
1082 }
1083 
1084 static
1085 int
1086 hammer_ioc_get_data(hammer_transaction_t trans, hammer_inode_t ip,
1087 			struct hammer_ioc_data *data)
1088 {
1089 	struct hammer_cursor cursor;
1090 	int bytes;
1091 	int error;
1092 
1093 	/* XXX cached inode ? */
1094 	error = hammer_init_cursor(trans, &cursor, NULL, NULL);
1095 	if (error)
1096 		goto failed;
1097 
1098 	cursor.key_beg = data->elm;
1099 	cursor.flags |= HAMMER_CURSOR_BACKEND;
1100 
1101 	error = hammer_btree_lookup(&cursor);
1102 	if (error == 0) {
1103 		error = hammer_btree_extract(&cursor, HAMMER_CURSOR_GET_LEAF |
1104 						      HAMMER_CURSOR_GET_DATA);
1105 		if (error == 0) {
1106 			data->leaf = *cursor.leaf;
1107 			bytes = cursor.leaf->data_len;
1108 			if (bytes > data->size)
1109 				bytes = data->size;
1110 			error = copyout(cursor.data, data->ubuf, bytes);
1111 		}
1112 	}
1113 
1114 failed:
1115 	hammer_done_cursor(&cursor);
1116 	return (error);
1117 }
1118