xref: /dragonfly/sys/vfs/hammer/hammer_ioctl.c (revision 650094e1)
1 /*
2  * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  * $DragonFly: src/sys/vfs/hammer/hammer_ioctl.c,v 1.32 2008/11/13 02:23:29 dillon Exp $
35  */
36 
37 #include "hammer.h"
38 
39 static int hammer_ioc_gethistory(hammer_transaction_t trans, hammer_inode_t ip,
40 				struct hammer_ioc_history *hist);
41 static int hammer_ioc_synctid(hammer_transaction_t trans, hammer_inode_t ip,
42 				struct hammer_ioc_synctid *std);
43 static int hammer_ioc_get_version(hammer_transaction_t trans,
44 				hammer_inode_t ip,
45 				struct hammer_ioc_version *ver);
46 static int hammer_ioc_set_version(hammer_transaction_t trans,
47 				hammer_inode_t ip,
48 				struct hammer_ioc_version *ver);
49 static int hammer_ioc_get_info(hammer_transaction_t trans,
50 				struct hammer_ioc_info *info);
51 static int hammer_ioc_add_snapshot(hammer_transaction_t trans, hammer_inode_t ip,
52 				struct hammer_ioc_snapshot *snap);
53 static int hammer_ioc_del_snapshot(hammer_transaction_t trans, hammer_inode_t ip,
54 				struct hammer_ioc_snapshot *snap);
55 static int hammer_ioc_get_snapshot(hammer_transaction_t trans, hammer_inode_t ip,
56 				struct hammer_ioc_snapshot *snap);
57 static int hammer_ioc_get_config(hammer_transaction_t trans, hammer_inode_t ip,
58 				struct hammer_ioc_config *snap);
59 static int hammer_ioc_set_config(hammer_transaction_t trans, hammer_inode_t ip,
60 				struct hammer_ioc_config *snap);
61 static int hammer_ioc_get_data(hammer_transaction_t trans, hammer_inode_t ip,
62 				struct hammer_ioc_data *data);
63 
64 int
65 hammer_ioctl(hammer_inode_t ip, u_long com, caddr_t data, int fflag,
66 	     struct ucred *cred)
67 {
68 	struct hammer_transaction trans;
69 	int error;
70 
71 	error = priv_check_cred(cred, PRIV_HAMMER_IOCTL, 0);
72 
73 	hammer_start_transaction(&trans, ip->hmp);
74 
75 	switch(com) {
76 	case HAMMERIOC_PRUNE:
77 		if (error == 0) {
78 			error = hammer_ioc_prune(&trans, ip,
79 					(struct hammer_ioc_prune *)data);
80 		}
81 		break;
82 	case HAMMERIOC_GETHISTORY:
83 		error = hammer_ioc_gethistory(&trans, ip,
84 					(struct hammer_ioc_history *)data);
85 		break;
86 	case HAMMERIOC_REBLOCK:
87 		if (error == 0) {
88 			error = hammer_ioc_reblock(&trans, ip,
89 					(struct hammer_ioc_reblock *)data);
90 		}
91 		break;
92 	case HAMMERIOC_REBALANCE:
93 		/*
94 		 * Rebalancing needs to lock a lot of B-Tree nodes.  The
95 		 * children and children's children.  Systems with very
96 		 * little memory will not be able to do it.
97 		 */
98 		if (error == 0 && nbuf < HAMMER_REBALANCE_MIN_BUFS) {
99 			kprintf("hammer: System has insufficient buffers "
100 				"to rebalance the tree.  nbuf < %d\n",
101 				HAMMER_REBALANCE_MIN_BUFS);
102 			error = ENOSPC;
103 		}
104 		if (error == 0) {
105 			error = hammer_ioc_rebalance(&trans, ip,
106 					(struct hammer_ioc_rebalance *)data);
107 		}
108 		break;
109 	case HAMMERIOC_SYNCTID:
110 		error = hammer_ioc_synctid(&trans, ip,
111 					(struct hammer_ioc_synctid *)data);
112 		break;
113 	case HAMMERIOC_GET_PSEUDOFS:
114 		error = hammer_ioc_get_pseudofs(&trans, ip,
115 				    (struct hammer_ioc_pseudofs_rw *)data);
116 		break;
117 	case HAMMERIOC_SET_PSEUDOFS:
118 		if (error == 0) {
119 			error = hammer_ioc_set_pseudofs(&trans, ip, cred,
120 				    (struct hammer_ioc_pseudofs_rw *)data);
121 		}
122 		break;
123 	case HAMMERIOC_UPG_PSEUDOFS:
124 		if (error == 0) {
125 			error = hammer_ioc_upgrade_pseudofs(&trans, ip,
126 				    (struct hammer_ioc_pseudofs_rw *)data);
127 		}
128 		break;
129 	case HAMMERIOC_DGD_PSEUDOFS:
130 		if (error == 0) {
131 			error = hammer_ioc_downgrade_pseudofs(&trans, ip,
132 				    (struct hammer_ioc_pseudofs_rw *)data);
133 		}
134 		break;
135 	case HAMMERIOC_RMR_PSEUDOFS:
136 		if (error == 0) {
137 			error = hammer_ioc_destroy_pseudofs(&trans, ip,
138 				    (struct hammer_ioc_pseudofs_rw *)data);
139 		}
140 		break;
141 	case HAMMERIOC_WAI_PSEUDOFS:
142 		if (error == 0) {
143 			error = hammer_ioc_wait_pseudofs(&trans, ip,
144 				    (struct hammer_ioc_pseudofs_rw *)data);
145 		}
146 		break;
147 	case HAMMERIOC_MIRROR_READ:
148 		if (error == 0) {
149 			error = hammer_ioc_mirror_read(&trans, ip,
150 				    (struct hammer_ioc_mirror_rw *)data);
151 		}
152 		break;
153 	case HAMMERIOC_MIRROR_WRITE:
154 		if (error == 0) {
155 			error = hammer_ioc_mirror_write(&trans, ip,
156 				    (struct hammer_ioc_mirror_rw *)data);
157 		}
158 		break;
159 	case HAMMERIOC_GET_VERSION:
160 		error = hammer_ioc_get_version(&trans, ip,
161 				    (struct hammer_ioc_version *)data);
162 		break;
163 	case HAMMERIOC_GET_INFO:
164 		error = hammer_ioc_get_info(&trans,
165 				    (struct hammer_ioc_info *)data);
166 		break;
167 	case HAMMERIOC_SET_VERSION:
168 		if (error == 0) {
169 			error = hammer_ioc_set_version(&trans, ip,
170 					    (struct hammer_ioc_version *)data);
171 		}
172 		break;
173 	case HAMMERIOC_ADD_VOLUME:
174 		if (error == 0) {
175 			error = priv_check_cred(cred, PRIV_HAMMER_VOLUME, 0);
176 			if (error == 0)
177 				error = hammer_ioc_volume_add(&trans, ip,
178 					    (struct hammer_ioc_volume *)data);
179 		}
180 		break;
181 	case HAMMERIOC_DEL_VOLUME:
182 		if (error == 0) {
183 			error = priv_check_cred(cred, PRIV_HAMMER_VOLUME, 0);
184 			if (error == 0)
185 				error = hammer_ioc_volume_del(&trans, ip,
186 					    (struct hammer_ioc_volume *)data);
187 		}
188 		break;
189 	case HAMMERIOC_LIST_VOLUMES:
190 		error = hammer_ioc_volume_list(&trans, ip,
191 		    (struct hammer_ioc_volume_list *)data);
192 		break;
193 	case HAMMERIOC_ADD_SNAPSHOT:
194 		if (error == 0) {
195 			error = hammer_ioc_add_snapshot(
196 					&trans, ip, (struct hammer_ioc_snapshot *)data);
197 		}
198 		break;
199 	case HAMMERIOC_DEL_SNAPSHOT:
200 		if (error == 0) {
201 			error = hammer_ioc_del_snapshot(
202 					&trans, ip, (struct hammer_ioc_snapshot *)data);
203 		}
204 		break;
205 	case HAMMERIOC_GET_SNAPSHOT:
206 		error = hammer_ioc_get_snapshot(
207 					&trans, ip, (struct hammer_ioc_snapshot *)data);
208 		break;
209 	case HAMMERIOC_GET_CONFIG:
210 		error = hammer_ioc_get_config(
211 					&trans, ip, (struct hammer_ioc_config *)data);
212 		break;
213 	case HAMMERIOC_SET_CONFIG:
214 		if (error == 0) {
215 			error = hammer_ioc_set_config(
216 					&trans, ip, (struct hammer_ioc_config *)data);
217 		}
218 		break;
219 	case HAMMERIOC_DEDUP:
220 		if (error == 0) {
221 			error = hammer_ioc_dedup(
222 					&trans, ip, (struct hammer_ioc_dedup *)data);
223 		}
224 		break;
225 	case HAMMERIOC_GET_DATA:
226 		if (error == 0) {
227 			error = hammer_ioc_get_data(
228 					&trans, ip, (struct hammer_ioc_data *)data);
229 		}
230 		break;
231 	default:
232 		error = EOPNOTSUPP;
233 		break;
234 	}
235 	hammer_done_transaction(&trans);
236 	return (error);
237 }
238 
239 /*
240  * Iterate through an object's inode or an object's records and record
241  * modification TIDs.
242  */
243 static void add_history(hammer_inode_t ip, struct hammer_ioc_history *hist,
244 			hammer_btree_elm_t elm);
245 
246 static
247 int
248 hammer_ioc_gethistory(hammer_transaction_t trans, hammer_inode_t ip,
249 		      struct hammer_ioc_history *hist)
250 {
251 	struct hammer_cursor cursor;
252 	hammer_btree_elm_t elm;
253 	int error;
254 
255 	/*
256 	 * Validate the structure and initialize for return.
257 	 */
258 	if (hist->beg_tid > hist->end_tid)
259 		return(EINVAL);
260 	if (hist->head.flags & HAMMER_IOC_HISTORY_ATKEY) {
261 		if (hist->key > hist->nxt_key)
262 			return(EINVAL);
263 	}
264 
265 	hist->obj_id = ip->obj_id;
266 	hist->count = 0;
267 	hist->nxt_tid = hist->end_tid;
268 	hist->head.flags &= ~HAMMER_IOC_HISTORY_NEXT_TID;
269 	hist->head.flags &= ~HAMMER_IOC_HISTORY_NEXT_KEY;
270 	hist->head.flags &= ~HAMMER_IOC_HISTORY_EOF;
271 	hist->head.flags &= ~HAMMER_IOC_HISTORY_UNSYNCED;
272 	if ((ip->flags & HAMMER_INODE_MODMASK) &
273 	    ~(HAMMER_INODE_ATIME | HAMMER_INODE_MTIME)) {
274 		hist->head.flags |= HAMMER_IOC_HISTORY_UNSYNCED;
275 	}
276 
277 	/*
278 	 * Setup the cursor.  We can't handle undeletable records
279 	 * (create_tid of 0) at the moment.  A create_tid of 0 has
280 	 * a special meaning and cannot be specified in the cursor.
281 	 */
282 	error = hammer_init_cursor(trans, &cursor, &ip->cache[0], NULL);
283 	if (error) {
284 		hammer_done_cursor(&cursor);
285 		return(error);
286 	}
287 
288 	cursor.key_beg.obj_id = hist->obj_id;
289 	cursor.key_beg.create_tid = hist->beg_tid;
290 	cursor.key_beg.delete_tid = 0;
291 	cursor.key_beg.obj_type = 0;
292 	if (cursor.key_beg.create_tid == HAMMER_MIN_TID)
293 		cursor.key_beg.create_tid = 1;
294 
295 	cursor.key_end.obj_id = hist->obj_id;
296 	cursor.key_end.create_tid = hist->end_tid;
297 	cursor.key_end.delete_tid = 0;
298 	cursor.key_end.obj_type = 0;
299 
300 	cursor.flags |= HAMMER_CURSOR_END_EXCLUSIVE;
301 
302 	if (hist->head.flags & HAMMER_IOC_HISTORY_ATKEY) {
303 		/*
304 		 * key-range within the file.  For a regular file the
305 		 * on-disk key represents BASE+LEN, not BASE, so the
306 		 * first possible record containing the offset 'key'
307 		 * has an on-disk key of (key + 1).
308 		 */
309 		cursor.key_beg.key = hist->key;
310 		cursor.key_end.key = HAMMER_MAX_KEY;
311 		cursor.key_beg.localization = ip->obj_localization +
312 					      HAMMER_LOCALIZE_MISC;
313 		cursor.key_end.localization = ip->obj_localization +
314 					      HAMMER_LOCALIZE_MISC;
315 
316 		switch(ip->ino_data.obj_type) {
317 		case HAMMER_OBJTYPE_REGFILE:
318 			++cursor.key_beg.key;
319 			cursor.key_beg.rec_type = HAMMER_RECTYPE_DATA;
320 			break;
321 		case HAMMER_OBJTYPE_DIRECTORY:
322 			cursor.key_beg.rec_type = HAMMER_RECTYPE_DIRENTRY;
323 			cursor.key_beg.localization = ip->obj_localization +
324 						hammer_dir_localization(ip);
325 			cursor.key_end.localization = ip->obj_localization +
326 						hammer_dir_localization(ip);
327 			break;
328 		case HAMMER_OBJTYPE_DBFILE:
329 			cursor.key_beg.rec_type = HAMMER_RECTYPE_DB;
330 			break;
331 		default:
332 			error = EINVAL;
333 			break;
334 		}
335 		cursor.key_end.rec_type = cursor.key_beg.rec_type;
336 	} else {
337 		/*
338 		 * The inode itself.
339 		 */
340 		cursor.key_beg.key = 0;
341 		cursor.key_end.key = 0;
342 		cursor.key_beg.rec_type = HAMMER_RECTYPE_INODE;
343 		cursor.key_end.rec_type = HAMMER_RECTYPE_INODE;
344 		cursor.key_beg.localization = ip->obj_localization +
345 					      HAMMER_LOCALIZE_INODE;
346 		cursor.key_end.localization = ip->obj_localization +
347 					      HAMMER_LOCALIZE_INODE;
348 	}
349 
350 	error = hammer_btree_first(&cursor);
351 	while (error == 0) {
352 		elm = &cursor.node->ondisk->elms[cursor.index];
353 
354 		add_history(ip, hist, elm);
355 		if (hist->head.flags & (HAMMER_IOC_HISTORY_NEXT_TID |
356 				        HAMMER_IOC_HISTORY_NEXT_KEY |
357 				        HAMMER_IOC_HISTORY_EOF)) {
358 			break;
359 		}
360 		error = hammer_btree_iterate(&cursor);
361 	}
362 	if (error == ENOENT) {
363 		hist->head.flags |= HAMMER_IOC_HISTORY_EOF;
364 		error = 0;
365 	}
366 	hammer_done_cursor(&cursor);
367 	return(error);
368 }
369 
370 /*
371  * Add the scanned element to the ioctl return structure.  Some special
372  * casing is required for regular files to accomodate how data ranges are
373  * stored on-disk.
374  */
375 static void
376 add_history(hammer_inode_t ip, struct hammer_ioc_history *hist,
377 	    hammer_btree_elm_t elm)
378 {
379 	int i;
380 
381 	if (elm->base.btype != HAMMER_BTREE_TYPE_RECORD)
382 		return;
383 	if ((hist->head.flags & HAMMER_IOC_HISTORY_ATKEY) &&
384 	    ip->ino_data.obj_type == HAMMER_OBJTYPE_REGFILE) {
385 		/*
386 		 * Adjust nxt_key
387 		 */
388 		if (hist->nxt_key > elm->leaf.base.key - elm->leaf.data_len &&
389 		    hist->key < elm->leaf.base.key - elm->leaf.data_len) {
390 			hist->nxt_key = elm->leaf.base.key - elm->leaf.data_len;
391 		}
392 		if (hist->nxt_key > elm->leaf.base.key)
393 			hist->nxt_key = elm->leaf.base.key;
394 
395 		/*
396 		 * Record is beyond MAXPHYS, there won't be any more records
397 		 * in the iteration covering the requested offset (key).
398 		 */
399 		if (elm->leaf.base.key >= MAXPHYS &&
400 		    elm->leaf.base.key - MAXPHYS > hist->key) {
401 			hist->head.flags |= HAMMER_IOC_HISTORY_NEXT_KEY;
402 		}
403 
404 		/*
405 		 * Data-range of record does not cover the key.
406 		 */
407 		if (elm->leaf.base.key - elm->leaf.data_len > hist->key)
408 			return;
409 
410 	} else if (hist->head.flags & HAMMER_IOC_HISTORY_ATKEY) {
411 		/*
412 		 * Adjust nxt_key
413 		 */
414 		if (hist->nxt_key > elm->leaf.base.key &&
415 		    hist->key < elm->leaf.base.key) {
416 			hist->nxt_key = elm->leaf.base.key;
417 		}
418 
419 		/*
420 		 * Record is beyond the requested key.
421 		 */
422 		if (elm->leaf.base.key > hist->key)
423 			hist->head.flags |= HAMMER_IOC_HISTORY_NEXT_KEY;
424 	}
425 
426 	/*
427 	 * Add create_tid if it is in-bounds.
428 	 */
429 	i = hist->count;
430 	if ((i == 0 ||
431 	     elm->leaf.base.create_tid != hist->hist_ary[i - 1].tid) &&
432 	    elm->leaf.base.create_tid >= hist->beg_tid &&
433 	    elm->leaf.base.create_tid < hist->end_tid) {
434 		if (hist->count == HAMMER_MAX_HISTORY_ELMS) {
435 			hist->nxt_tid = elm->leaf.base.create_tid;
436 			hist->head.flags |= HAMMER_IOC_HISTORY_NEXT_TID;
437 			return;
438 		}
439 		hist->hist_ary[i].tid = elm->leaf.base.create_tid;
440 		hist->hist_ary[i].time32 = elm->leaf.create_ts;
441 		++hist->count;
442 	}
443 
444 	/*
445 	 * Add delete_tid if it is in-bounds.  Note that different portions
446 	 * of the history may have overlapping data ranges with different
447 	 * delete_tid's.  If this case occurs the delete_tid may match the
448 	 * create_tid of a following record.  XXX
449 	 *
450 	 *	[        ]
451 	 *            [     ]
452 	 */
453 	i = hist->count;
454 	if (elm->leaf.base.delete_tid &&
455 	    elm->leaf.base.delete_tid >= hist->beg_tid &&
456 	    elm->leaf.base.delete_tid < hist->end_tid) {
457 		if (i == HAMMER_MAX_HISTORY_ELMS) {
458 			hist->nxt_tid = elm->leaf.base.delete_tid;
459 			hist->head.flags |= HAMMER_IOC_HISTORY_NEXT_TID;
460 			return;
461 		}
462 		hist->hist_ary[i].tid = elm->leaf.base.delete_tid;
463 		hist->hist_ary[i].time32 = elm->leaf.delete_ts;
464 		++hist->count;
465 	}
466 }
467 
468 /*
469  * Acquire synchronization TID
470  */
471 static
472 int
473 hammer_ioc_synctid(hammer_transaction_t trans, hammer_inode_t ip,
474 		   struct hammer_ioc_synctid *std)
475 {
476 	hammer_mount_t hmp = ip->hmp;
477 	int error = 0;
478 
479 	switch(std->op) {
480 	case HAMMER_SYNCTID_NONE:
481 		std->tid = hmp->flusher.tid;	/* inaccurate */
482 		break;
483 	case HAMMER_SYNCTID_ASYNC:
484 		hammer_queue_inodes_flusher(hmp, MNT_NOWAIT);
485 		hammer_flusher_async(hmp, NULL);
486 		std->tid = hmp->flusher.tid;	/* inaccurate */
487 		break;
488 	case HAMMER_SYNCTID_SYNC1:
489 		hammer_queue_inodes_flusher(hmp, MNT_WAIT);
490 		hammer_flusher_sync(hmp);
491 		std->tid = hmp->flusher.tid;
492 		break;
493 	case HAMMER_SYNCTID_SYNC2:
494 		hammer_queue_inodes_flusher(hmp, MNT_WAIT);
495 		hammer_flusher_sync(hmp);
496 		std->tid = hmp->flusher.tid;
497 		hammer_flusher_sync(hmp);
498 		break;
499 	default:
500 		error = EOPNOTSUPP;
501 		break;
502 	}
503 	return(error);
504 }
505 
506 /*
507  * Retrieve version info.
508  *
509  * Load min_version, wip_version, and max_versino.  If cur_version is passed
510  * as 0 then load the current version into cur_version.  Load the description
511  * for cur_version into the description array.
512  *
513  * Returns 0 on success, EINVAL if cur_version is non-zero and set to an
514  * unsupported value.
515  */
516 static
517 int
518 hammer_ioc_get_version(hammer_transaction_t trans, hammer_inode_t ip,
519 		   struct hammer_ioc_version *ver)
520 {
521 	int error = 0;
522 
523 	ver->min_version = HAMMER_VOL_VERSION_MIN;
524 	ver->wip_version = HAMMER_VOL_VERSION_WIP;
525 	ver->max_version = HAMMER_VOL_VERSION_MAX;
526 	if (ver->cur_version == 0)
527 		ver->cur_version = trans->hmp->version;
528 	switch(ver->cur_version) {
529 	case 1:
530 		ksnprintf(ver->description, sizeof(ver->description),
531 			 "First HAMMER release (DragonFly 2.0+)");
532 		break;
533 	case 2:
534 		ksnprintf(ver->description, sizeof(ver->description),
535 			 "New directory entry layout (DragonFly 2.3+)");
536 		break;
537 	case 3:
538 		ksnprintf(ver->description, sizeof(ver->description),
539 			 "New snapshot management (DragonFly 2.5+)");
540 		break;
541 	case 4:
542 		ksnprintf(ver->description, sizeof(ver->description),
543 			 "New undo/flush, faster flush/sync (DragonFly 2.5+)");
544 		break;
545 	case 5:
546 		ksnprintf(ver->description, sizeof(ver->description),
547 			 "Adjustments for dedup support (DragonFly 2.9+)");
548 		break;
549 	case 6:
550 		ksnprintf(ver->description, sizeof(ver->description),
551 			  "Directory Hash ALG1 (tmp/rename resistance)");
552 		break;
553 	default:
554 		ksnprintf(ver->description, sizeof(ver->description),
555 			 "Unknown");
556 		error = EINVAL;
557 		break;
558 	}
559 	return(error);
560 };
561 
562 /*
563  * Set version info
564  */
565 static
566 int
567 hammer_ioc_set_version(hammer_transaction_t trans, hammer_inode_t ip,
568 		   struct hammer_ioc_version *ver)
569 {
570 	hammer_mount_t hmp = trans->hmp;
571 	struct hammer_cursor cursor;
572 	hammer_volume_t volume;
573 	int error;
574 	int over = hmp->version;
575 
576 	/*
577 	 * Generally do not allow downgrades.  However, version 4 can
578 	 * be downgraded to version 3.
579 	 */
580 	if (ver->cur_version < hmp->version) {
581 		if (!(ver->cur_version == 3 && hmp->version == 4))
582 			return(EINVAL);
583 	}
584 	if (ver->cur_version == hmp->version)
585 		return(0);
586 	if (ver->cur_version > HAMMER_VOL_VERSION_MAX)
587 		return(EINVAL);
588 	if (hmp->ronly)
589 		return(EROFS);
590 
591 	/*
592 	 * Update the root volume header and the version cached in
593 	 * the hammer_mount structure.
594 	 */
595 	error = hammer_init_cursor(trans, &cursor, NULL, NULL);
596 	if (error)
597 		goto failed;
598 	hammer_lock_ex(&hmp->flusher.finalize_lock);
599 	hammer_sync_lock_ex(trans);
600 	hmp->version = ver->cur_version;
601 
602 	/*
603 	 * If upgrading from version < 4 to version >= 4 the UNDO FIFO
604 	 * must be reinitialized.
605 	 */
606 	if (over < HAMMER_VOL_VERSION_FOUR &&
607 	    ver->cur_version >= HAMMER_VOL_VERSION_FOUR) {
608 		kprintf("upgrade undo to version 4\n");
609 		error = hammer_upgrade_undo_4(trans);
610 		if (error)
611 			goto failed;
612 	}
613 
614 	/*
615 	 * Adjust the version in the volume header
616 	 */
617 	volume = hammer_get_root_volume(hmp, &error);
618 	KKASSERT(error == 0);
619 	hammer_modify_volume_field(cursor.trans, volume, vol_version);
620 	volume->ondisk->vol_version = ver->cur_version;
621 	hammer_modify_volume_done(volume);
622 	hammer_rel_volume(volume, 0);
623 
624 	hammer_sync_unlock(trans);
625 	hammer_unlock(&hmp->flusher.finalize_lock);
626 failed:
627 	ver->head.error = error;
628 	hammer_done_cursor(&cursor);
629 	return(0);
630 }
631 
632 /*
633  * Get information
634  */
635 static
636 int
637 hammer_ioc_get_info(hammer_transaction_t trans, struct hammer_ioc_info *info) {
638 
639 	struct hammer_volume_ondisk	*od = trans->hmp->rootvol->ondisk;
640 	struct hammer_mount 		*hm = trans->hmp;
641 
642 	/* Fill the structure with the necessary information */
643 	_hammer_checkspace(hm, HAMMER_CHKSPC_WRITE, &info->rsvbigblocks);
644 	info->rsvbigblocks = info->rsvbigblocks >> HAMMER_LARGEBLOCK_BITS;
645 	strlcpy(info->vol_name, od->vol_name, sizeof(od->vol_name));
646 
647 	info->vol_fsid = hm->fsid;
648 	info->vol_fstype = od->vol_fstype;
649 	info->version = hm->version;
650 
651 	info->inodes = od->vol0_stat_inodes;
652 	info->bigblocks = od->vol0_stat_bigblocks;
653 	info->freebigblocks = od->vol0_stat_freebigblocks;
654 	info->nvolumes = hm->nvolumes;
655 
656 	return 0;
657 }
658 
659 /*
660  * Add a snapshot transction id(s) to the list of snapshots.
661  *
662  * NOTE: Records are created with an allocated TID.  If a flush cycle
663  *	 is in progress the record may be synced in the current flush
664  *	 cycle and the volume header will reflect the allocation of the
665  *	 TID, but the synchronization point may not catch up to the
666  *	 TID until the next flush cycle.
667  */
668 static
669 int
670 hammer_ioc_add_snapshot(hammer_transaction_t trans, hammer_inode_t ip,
671 			struct hammer_ioc_snapshot *snap)
672 {
673 	hammer_mount_t hmp = ip->hmp;
674 	struct hammer_btree_leaf_elm leaf;
675 	struct hammer_cursor cursor;
676 	int error;
677 
678 	/*
679 	 * Validate structure
680 	 */
681 	if (snap->count > HAMMER_SNAPS_PER_IOCTL)
682 		return (EINVAL);
683 	if (snap->index > snap->count)
684 		return (EINVAL);
685 
686 	hammer_lock_ex(&hmp->snapshot_lock);
687 again:
688 	/*
689 	 * Look for keys starting after the previous iteration, or at
690 	 * the beginning if snap->count is 0.
691 	 */
692 	error = hammer_init_cursor(trans, &cursor, &ip->cache[0], NULL);
693 	if (error) {
694 		hammer_done_cursor(&cursor);
695 		return(error);
696 	}
697 
698 	cursor.asof = HAMMER_MAX_TID;
699 	cursor.flags |= HAMMER_CURSOR_BACKEND | HAMMER_CURSOR_ASOF;
700 
701 	bzero(&leaf, sizeof(leaf));
702 	leaf.base.obj_id = HAMMER_OBJID_ROOT;
703 	leaf.base.rec_type = HAMMER_RECTYPE_SNAPSHOT;
704 	leaf.base.create_tid = hammer_alloc_tid(hmp, 1);
705 	leaf.base.btype = HAMMER_BTREE_TYPE_RECORD;
706 	leaf.base.localization = ip->obj_localization + HAMMER_LOCALIZE_INODE;
707 	leaf.data_len = sizeof(struct hammer_snapshot_data);
708 
709 	while (snap->index < snap->count) {
710 		leaf.base.key = (int64_t)snap->snaps[snap->index].tid;
711 		cursor.key_beg = leaf.base;
712 		error = hammer_btree_lookup(&cursor);
713 		if (error == 0) {
714 			error = EEXIST;
715 			break;
716 		}
717 
718 		/*
719 		 * NOTE: Must reload key_beg after an ASOF search because
720 		 *	 the create_tid may have been modified during the
721 		 *	 search.
722 		 */
723 		cursor.flags &= ~HAMMER_CURSOR_ASOF;
724 		cursor.key_beg = leaf.base;
725 		error = hammer_create_at_cursor(&cursor, &leaf,
726 						&snap->snaps[snap->index],
727 						HAMMER_CREATE_MODE_SYS);
728 		if (error == EDEADLK) {
729 			hammer_done_cursor(&cursor);
730 			goto again;
731 		}
732 		cursor.flags |= HAMMER_CURSOR_ASOF;
733 		if (error)
734 			break;
735 		++snap->index;
736 	}
737 	snap->head.error = error;
738 	hammer_done_cursor(&cursor);
739 	hammer_unlock(&hmp->snapshot_lock);
740 	return(0);
741 }
742 
743 /*
744  * Delete snapshot transaction id(s) from the list of snapshots.
745  */
746 static
747 int
748 hammer_ioc_del_snapshot(hammer_transaction_t trans, hammer_inode_t ip,
749 			struct hammer_ioc_snapshot *snap)
750 {
751 	hammer_mount_t hmp = ip->hmp;
752 	struct hammer_cursor cursor;
753 	int error;
754 
755 	/*
756 	 * Validate structure
757 	 */
758 	if (snap->count > HAMMER_SNAPS_PER_IOCTL)
759 		return (EINVAL);
760 	if (snap->index > snap->count)
761 		return (EINVAL);
762 
763 	hammer_lock_ex(&hmp->snapshot_lock);
764 again:
765 	/*
766 	 * Look for keys starting after the previous iteration, or at
767 	 * the beginning if snap->count is 0.
768 	 */
769 	error = hammer_init_cursor(trans, &cursor, &ip->cache[0], NULL);
770 	if (error) {
771 		hammer_done_cursor(&cursor);
772 		return(error);
773 	}
774 
775 	cursor.key_beg.obj_id = HAMMER_OBJID_ROOT;
776 	cursor.key_beg.create_tid = 0;
777 	cursor.key_beg.delete_tid = 0;
778 	cursor.key_beg.obj_type = 0;
779 	cursor.key_beg.rec_type = HAMMER_RECTYPE_SNAPSHOT;
780 	cursor.key_beg.localization = ip->obj_localization + HAMMER_LOCALIZE_INODE;
781 	cursor.asof = HAMMER_MAX_TID;
782 	cursor.flags |= HAMMER_CURSOR_ASOF;
783 
784 	while (snap->index < snap->count) {
785 		cursor.key_beg.key = (int64_t)snap->snaps[snap->index].tid;
786 		error = hammer_btree_lookup(&cursor);
787 		if (error)
788 			break;
789 		error = hammer_btree_extract(&cursor, HAMMER_CURSOR_GET_LEAF);
790 		if (error)
791 			break;
792 		error = hammer_delete_at_cursor(&cursor, HAMMER_DELETE_DESTROY,
793 						0, 0, 0, NULL);
794 		if (error == EDEADLK) {
795 			hammer_done_cursor(&cursor);
796 			goto again;
797 		}
798 		if (error)
799 			break;
800 		++snap->index;
801 	}
802 	snap->head.error = error;
803 	hammer_done_cursor(&cursor);
804 	hammer_unlock(&hmp->snapshot_lock);
805 	return(0);
806 }
807 
808 /*
809  * Retrieve as many snapshot ids as possible or until the array is
810  * full, starting after the last transction id passed in.  If count
811  * is 0 we retrieve starting at the beginning.
812  *
813  * NOTE: Because the b-tree key field is signed but transaction ids
814  *       are unsigned the returned list will be signed-sorted instead
815  *	 of unsigned sorted.  The Caller must still sort the aggregate
816  *	 results.
817  */
818 static
819 int
820 hammer_ioc_get_snapshot(hammer_transaction_t trans, hammer_inode_t ip,
821 			struct hammer_ioc_snapshot *snap)
822 {
823 	struct hammer_cursor cursor;
824 	int error;
825 
826 	/*
827 	 * Validate structure
828 	 */
829 	if (snap->index != 0)
830 		return (EINVAL);
831 	if (snap->count > HAMMER_SNAPS_PER_IOCTL)
832 		return (EINVAL);
833 
834 	/*
835 	 * Look for keys starting after the previous iteration, or at
836 	 * the beginning if snap->count is 0.
837 	 */
838 	error = hammer_init_cursor(trans, &cursor, &ip->cache[0], NULL);
839 	if (error) {
840 		hammer_done_cursor(&cursor);
841 		return(error);
842 	}
843 
844 	cursor.key_beg.obj_id = HAMMER_OBJID_ROOT;
845 	cursor.key_beg.create_tid = 0;
846 	cursor.key_beg.delete_tid = 0;
847 	cursor.key_beg.obj_type = 0;
848 	cursor.key_beg.rec_type = HAMMER_RECTYPE_SNAPSHOT;
849 	cursor.key_beg.localization = ip->obj_localization + HAMMER_LOCALIZE_INODE;
850 	if (snap->count == 0)
851 		cursor.key_beg.key = HAMMER_MIN_KEY;
852 	else
853 		cursor.key_beg.key = (int64_t)snap->snaps[snap->count - 1].tid + 1;
854 
855 	cursor.key_end = cursor.key_beg;
856 	cursor.key_end.key = HAMMER_MAX_KEY;
857 	cursor.asof = HAMMER_MAX_TID;
858 	cursor.flags |= HAMMER_CURSOR_END_EXCLUSIVE | HAMMER_CURSOR_ASOF;
859 
860 	snap->count = 0;
861 
862 	error = hammer_btree_first(&cursor);
863 	while (error == 0 && snap->count < HAMMER_SNAPS_PER_IOCTL) {
864 		error = hammer_btree_extract(&cursor, HAMMER_CURSOR_GET_LEAF);
865 		if (error)
866 			break;
867 		if (cursor.leaf->base.rec_type == HAMMER_RECTYPE_SNAPSHOT) {
868 			error = hammer_btree_extract(
869 					     &cursor, HAMMER_CURSOR_GET_LEAF |
870 						      HAMMER_CURSOR_GET_DATA);
871 			snap->snaps[snap->count] = cursor.data->snap;
872 
873 			/*
874 			 * The snap data tid should match the key but might
875 			 * not due to a bug in the HAMMER v3 conversion code.
876 			 *
877 			 * This error will work itself out over time but we
878 			 * have to force a match or the snapshot will not
879 			 * be deletable.
880 			 */
881 			if (cursor.data->snap.tid !=
882 			    (hammer_tid_t)cursor.leaf->base.key) {
883 				kprintf("HAMMER: lo=%08x snapshot key "
884 					"0x%016jx data mismatch 0x%016jx\n",
885 					cursor.key_beg.localization,
886 					(uintmax_t)cursor.data->snap.tid,
887 					cursor.leaf->base.key);
888 				kprintf("HAMMER: Probably left over from the "
889 					"original v3 conversion, hammer "
890 					"cleanup should get it eventually\n");
891 				snap->snaps[snap->count].tid =
892 					cursor.leaf->base.key;
893 			}
894 			++snap->count;
895 		}
896 		error = hammer_btree_iterate(&cursor);
897 	}
898 
899 	if (error == ENOENT) {
900 		snap->head.flags |= HAMMER_IOC_SNAPSHOT_EOF;
901 		error = 0;
902 	}
903 	snap->head.error = error;
904 	hammer_done_cursor(&cursor);
905 	return(0);
906 }
907 
908 /*
909  * Retrieve the PFS hammer cleanup utility config record.  This is
910  * different (newer than) the PFS config.
911  */
912 static
913 int
914 hammer_ioc_get_config(hammer_transaction_t trans, hammer_inode_t ip,
915 			struct hammer_ioc_config *config)
916 {
917 	struct hammer_cursor cursor;
918 	int error;
919 
920 	error = hammer_init_cursor(trans, &cursor, &ip->cache[0], NULL);
921 	if (error) {
922 		hammer_done_cursor(&cursor);
923 		return(error);
924 	}
925 
926 	cursor.key_beg.obj_id = HAMMER_OBJID_ROOT;
927 	cursor.key_beg.create_tid = 0;
928 	cursor.key_beg.delete_tid = 0;
929 	cursor.key_beg.obj_type = 0;
930 	cursor.key_beg.rec_type = HAMMER_RECTYPE_CONFIG;
931 	cursor.key_beg.localization = ip->obj_localization + HAMMER_LOCALIZE_INODE;
932 	cursor.key_beg.key = 0;		/* config space page 0 */
933 
934 	cursor.asof = HAMMER_MAX_TID;
935 	cursor.flags |= HAMMER_CURSOR_ASOF;
936 
937 	error = hammer_btree_lookup(&cursor);
938 	if (error == 0) {
939 		error = hammer_btree_extract(&cursor, HAMMER_CURSOR_GET_LEAF |
940 						      HAMMER_CURSOR_GET_DATA);
941 		if (error == 0)
942 			config->config = cursor.data->config;
943 	}
944 	/* error can be ENOENT */
945 	config->head.error = error;
946 	hammer_done_cursor(&cursor);
947 	return(0);
948 }
949 
950 /*
951  * Retrieve the PFS hammer cleanup utility config record.  This is
952  * different (newer than) the PFS config.
953  *
954  * This is kinda a hack.
955  */
956 static
957 int
958 hammer_ioc_set_config(hammer_transaction_t trans, hammer_inode_t ip,
959 			struct hammer_ioc_config *config)
960 {
961 	struct hammer_btree_leaf_elm leaf;
962 	struct hammer_cursor cursor;
963 	hammer_mount_t hmp = ip->hmp;
964 	int error;
965 
966 again:
967 	error = hammer_init_cursor(trans, &cursor, &ip->cache[0], NULL);
968 	if (error) {
969 		hammer_done_cursor(&cursor);
970 		return(error);
971 	}
972 
973 	bzero(&leaf, sizeof(leaf));
974 	leaf.base.obj_id = HAMMER_OBJID_ROOT;
975 	leaf.base.rec_type = HAMMER_RECTYPE_CONFIG;
976 	leaf.base.create_tid = hammer_alloc_tid(hmp, 1);
977 	leaf.base.btype = HAMMER_BTREE_TYPE_RECORD;
978 	leaf.base.localization = ip->obj_localization + HAMMER_LOCALIZE_INODE;
979 	leaf.base.key = 0;	/* page 0 */
980 	leaf.data_len = sizeof(struct hammer_config_data);
981 
982 	cursor.key_beg = leaf.base;
983 
984 	cursor.asof = HAMMER_MAX_TID;
985 	cursor.flags |= HAMMER_CURSOR_BACKEND | HAMMER_CURSOR_ASOF;
986 
987 	error = hammer_btree_lookup(&cursor);
988 	if (error == 0) {
989 		error = hammer_btree_extract(&cursor, HAMMER_CURSOR_GET_LEAF |
990 						      HAMMER_CURSOR_GET_DATA);
991 		error = hammer_delete_at_cursor(&cursor, HAMMER_DELETE_DESTROY,
992 						0, 0, 0, NULL);
993 		if (error == EDEADLK) {
994 			hammer_done_cursor(&cursor);
995 			goto again;
996 		}
997 	}
998 	if (error == ENOENT)
999 		error = 0;
1000 	if (error == 0) {
1001 		/*
1002 		 * NOTE: Must reload key_beg after an ASOF search because
1003 		 *	 the create_tid may have been modified during the
1004 		 *	 search.
1005 		 */
1006 		cursor.flags &= ~HAMMER_CURSOR_ASOF;
1007 		cursor.key_beg = leaf.base;
1008 		error = hammer_create_at_cursor(&cursor, &leaf,
1009 						&config->config,
1010 						HAMMER_CREATE_MODE_SYS);
1011 		if (error == EDEADLK) {
1012 			hammer_done_cursor(&cursor);
1013 			goto again;
1014 		}
1015 	}
1016 	config->head.error = error;
1017 	hammer_done_cursor(&cursor);
1018 	return(0);
1019 }
1020 
1021 static
1022 int
1023 hammer_ioc_get_data(hammer_transaction_t trans, hammer_inode_t ip,
1024 			struct hammer_ioc_data *data)
1025 {
1026 	struct hammer_cursor cursor;
1027 	int bytes;
1028 	int error;
1029 
1030 	/* XXX cached inode ? */
1031 	error = hammer_init_cursor(trans, &cursor, NULL, NULL);
1032 	if (error)
1033 		goto failed;
1034 
1035 	cursor.key_beg = data->elm;
1036 	cursor.flags |= HAMMER_CURSOR_BACKEND;
1037 
1038 	error = hammer_btree_lookup(&cursor);
1039 	if (error == 0) {
1040 		error = hammer_btree_extract(&cursor, HAMMER_CURSOR_GET_LEAF |
1041 						      HAMMER_CURSOR_GET_DATA);
1042 		if (error == 0) {
1043 			data->leaf = *cursor.leaf;
1044 			bytes = cursor.leaf->data_len;
1045 			if (bytes > data->size)
1046 				bytes = data->size;
1047 			error = copyout(cursor.data, data->ubuf, bytes);
1048 		}
1049 	}
1050 
1051 failed:
1052 	hammer_done_cursor(&cursor);
1053 	return (error);
1054 }
1055