1 /* 2 * Copyright (c) 2008 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * $DragonFly: src/sys/vfs/hammer/hammer_ioctl.c,v 1.4 2008/02/08 08:30:59 dillon Exp $ 35 */ 36 37 #include "hammer.h" 38 39 static int hammer_ioc_prune(hammer_inode_t ip, 40 struct hammer_ioc_prune *prune); 41 static int hammer_ioc_gethistory(hammer_inode_t ip, 42 struct hammer_ioc_history *hist); 43 44 int 45 hammer_ioctl(hammer_inode_t ip, u_long com, caddr_t data, int fflag, 46 struct ucred *cred) 47 { 48 int error; 49 50 error = suser_cred(cred, PRISON_ROOT); 51 52 switch(com) { 53 case HAMMERIOC_PRUNE: 54 if (error == 0) { 55 error = hammer_ioc_prune(ip, 56 (struct hammer_ioc_prune *)data); 57 } 58 break; 59 case HAMMERIOC_GETHISTORY: 60 error = hammer_ioc_gethistory(ip, 61 (struct hammer_ioc_history *)data); 62 break; 63 default: 64 error = EOPNOTSUPP; 65 break; 66 } 67 return (error); 68 } 69 70 /* 71 * Iterate through the specified range of object ids and remove any 72 * deleted records that fall entirely within a prune modulo. 73 * 74 * A reverse iteration is used to prevent overlapping records from being 75 * created during the iteration due to alignments. This also allows us 76 * to adjust alignments without blowing up the B-Tree. 77 */ 78 static int check_prune(struct hammer_ioc_prune *prune, hammer_btree_elm_t elm, 79 int *realign_cre, int *realign_del); 80 static int realign_prune(struct hammer_ioc_prune *prune, hammer_cursor_t cursor, 81 int realign_cre, int realign_del); 82 83 static int 84 hammer_ioc_prune(hammer_inode_t ip, struct hammer_ioc_prune *prune) 85 { 86 struct hammer_cursor cursor; 87 hammer_btree_elm_t elm; 88 int error; 89 int isdir; 90 int realign_cre; 91 int realign_del; 92 93 if (prune->nelms < 0 || prune->nelms > HAMMER_MAX_PRUNE_ELMS) 94 return(EINVAL); 95 if (prune->beg_obj_id >= prune->end_obj_id) 96 return(EINVAL); 97 if ((prune->flags & HAMMER_IOC_PRUNE_ALL) && prune->nelms) 98 return(EINVAL); 99 100 retry: 101 error = hammer_init_cursor_hmp(&cursor, NULL, ip->hmp); 102 if (error) { 103 hammer_done_cursor(&cursor); 104 return(error); 105 } 106 cursor.key_beg.obj_id = prune->beg_obj_id; 107 cursor.key_beg.key = HAMMER_MIN_KEY; 108 cursor.key_beg.create_tid = 1; 109 cursor.key_beg.delete_tid = 0; 110 cursor.key_beg.rec_type = HAMMER_MIN_RECTYPE; 111 cursor.key_beg.obj_type = 0; 112 113 cursor.key_end.obj_id = prune->cur_obj_id; 114 cursor.key_end.key = prune->cur_key; 115 cursor.key_end.create_tid = HAMMER_MAX_TID - 1; 116 cursor.key_end.delete_tid = 0; 117 cursor.key_end.rec_type = HAMMER_MAX_RECTYPE; 118 cursor.key_end.obj_type = 0; 119 120 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE; 121 122 error = hammer_btree_last(&cursor); 123 while (error == 0) { 124 elm = &cursor.node->ondisk->elms[cursor.index]; 125 prune->cur_obj_id = elm->base.obj_id; 126 prune->cur_key = elm->base.key; 127 128 if (check_prune(prune, elm, &realign_cre, &realign_del) == 0) { 129 if (hammer_debug_general & 0x0200) { 130 kprintf("check %016llx %016llx: DELETE\n", 131 elm->base.obj_id, elm->base.key); 132 } 133 134 /* 135 * NOTE: This can return EDEADLK 136 */ 137 isdir = (elm->base.rec_type == HAMMER_RECTYPE_DIRENTRY); 138 139 error = hammer_delete_at_cursor(&cursor, 140 &prune->stat_bytes); 141 if (error) 142 break; 143 144 if (isdir) 145 ++prune->stat_dirrecords; 146 else 147 ++prune->stat_rawrecords; 148 } else if (realign_cre >= 0 || realign_del >= 0) { 149 error = realign_prune(prune, &cursor, 150 realign_cre, realign_del); 151 if (error == 0) { 152 cursor.flags |= HAMMER_CURSOR_ATEDISK; 153 if (hammer_debug_general & 0x0200) { 154 kprintf("check %016llx %016llx: " 155 "REALIGN\n", 156 elm->base.obj_id, 157 elm->base.key); 158 } 159 } 160 } else { 161 cursor.flags |= HAMMER_CURSOR_ATEDISK; 162 if (hammer_debug_general & 0x0100) { 163 kprintf("check %016llx %016llx: SKIP\n", 164 elm->base.obj_id, elm->base.key); 165 } 166 } 167 if (error == 0) 168 error = hammer_btree_iterate_reverse(&cursor); 169 } 170 if (error == ENOENT) 171 error = 0; 172 hammer_done_cursor(&cursor); 173 if (error == EDEADLK) 174 goto retry; 175 return(error); 176 } 177 178 /* 179 * Check pruning list. The list must be sorted in descending order. 180 */ 181 static int 182 check_prune(struct hammer_ioc_prune *prune, hammer_btree_elm_t elm, 183 int *realign_cre, int *realign_del) 184 { 185 struct hammer_ioc_prune_elm *scan; 186 int i; 187 188 *realign_cre = -1; 189 *realign_del = -1; 190 191 /* 192 * If pruning everything remove all records with a non-zero 193 * delete_tid. 194 */ 195 if (prune->flags & HAMMER_IOC_PRUNE_ALL) { 196 if (elm->base.delete_tid != 0) 197 return(0); 198 return(-1); 199 } 200 201 for (i = 0; i < prune->nelms; ++i) { 202 scan = &prune->elms[i]; 203 204 /* 205 * Locate the scan index covering the create and delete TIDs. 206 */ 207 if (*realign_cre < 0 && 208 elm->base.create_tid >= scan->beg_tid && 209 elm->base.create_tid < scan->end_tid) { 210 *realign_cre = i; 211 } 212 if (*realign_del < 0 && elm->base.delete_tid && 213 elm->base.delete_tid > scan->beg_tid && 214 elm->base.delete_tid <= scan->end_tid) { 215 *realign_del = i; 216 } 217 218 /* 219 * Now check for loop termination. 220 */ 221 if (elm->base.create_tid >= scan->end_tid || 222 elm->base.delete_tid > scan->end_tid) { 223 break; 224 } 225 226 /* 227 * Now determine if we can delete the record. 228 */ 229 if (elm->base.delete_tid && 230 elm->base.create_tid >= scan->beg_tid && 231 elm->base.delete_tid <= scan->end_tid && 232 elm->base.create_tid / scan->mod_tid == 233 elm->base.delete_tid / scan->mod_tid) { 234 return(0); 235 } 236 } 237 return(-1); 238 } 239 240 /* 241 * Align the record to cover any gaps created through the deletion of 242 * records within the pruning space. If we were to just delete the records 243 * there would be gaps which in turn would cause a snapshot that is NOT on 244 * a pruning boundary to appear corrupt to the user. Forcing alignment 245 * of the create_tid and delete_tid for retained records 'reconnects' 246 * the previously contiguous space, making it contiguous again after the 247 * deletions. 248 * 249 * The use of a reverse iteration allows us to safely align the records and 250 * related elements without creating temporary overlaps. XXX we should 251 * add ordering dependancies for record buffers to guarantee consistency 252 * during recovery. 253 */ 254 static int 255 realign_prune(struct hammer_ioc_prune *prune, 256 hammer_cursor_t cursor, int realign_cre, int realign_del) 257 { 258 hammer_btree_elm_t elm; 259 hammer_tid_t delta; 260 hammer_tid_t mod; 261 hammer_tid_t tid; 262 int error; 263 264 hammer_cursor_downgrade(cursor); 265 266 elm = &cursor->node->ondisk->elms[cursor->index]; 267 ++prune->stat_realignments; 268 269 /* 270 * Align the create_tid. By doing a reverse iteration we guarantee 271 * that all records after our current record have already been 272 * aligned, allowing us to safely correct the right-hand-boundary 273 * (because no record to our right if otherwise exactly matching 274 * will have a create_tid to the left of our aligned create_tid). 275 * 276 * Ordering is important here XXX but disk write ordering for 277 * inter-cluster corrections is not currently guaranteed. 278 */ 279 error = 0; 280 if (realign_cre >= 0) { 281 mod = prune->elms[realign_cre].mod_tid; 282 delta = elm->leaf.base.create_tid % mod; 283 if (delta) { 284 tid = elm->leaf.base.create_tid - delta + mod; 285 286 /* can EDEADLK */ 287 error = hammer_btree_correct_rhb(cursor, tid + 1); 288 if (error == 0) { 289 error = hammer_btree_extract(cursor, 290 HAMMER_CURSOR_GET_RECORD); 291 } 292 if (error == 0) { 293 /* can EDEADLK */ 294 error = hammer_cursor_upgrade(cursor); 295 } 296 if (error == 0) { 297 hammer_modify_buffer(cursor->record_buffer, 298 NULL, 0); 299 cursor->record->base.base.create_tid = tid; 300 hammer_modify_node(cursor->node); 301 elm->leaf.base.create_tid = tid; 302 } 303 } 304 } 305 306 /* 307 * Align the delete_tid. This only occurs if the record is historical 308 * was deleted at some point. Realigning the delete_tid does not 309 * move the record within the B-Tree but may cause it to temporarily 310 * overlap a record that has not yet been pruned. 311 */ 312 if (error == 0 && realign_del >= 0) { 313 mod = prune->elms[realign_del].mod_tid; 314 delta = elm->leaf.base.delete_tid % mod; 315 hammer_modify_node(cursor->node); 316 if (delta) { 317 error = hammer_btree_extract(cursor, 318 HAMMER_CURSOR_GET_RECORD); 319 if (error == 0) { 320 elm->leaf.base.delete_tid = 321 elm->leaf.base.delete_tid - 322 delta + mod; 323 hammer_modify_buffer(cursor->record_buffer, &cursor->record->base.base.delete_tid, sizeof(hammer_tid_t)); 324 cursor->record->base.base.delete_tid = 325 elm->leaf.base.delete_tid; 326 } 327 } 328 } 329 return (error); 330 } 331 332 /* 333 * Iterate through an object's inode or an object's records and record 334 * modification TIDs. 335 */ 336 static void add_history(hammer_inode_t ip, struct hammer_ioc_history *hist, 337 hammer_btree_elm_t elm); 338 339 static 340 int 341 hammer_ioc_gethistory(hammer_inode_t ip, struct hammer_ioc_history *hist) 342 { 343 struct hammer_cursor cursor; 344 hammer_btree_elm_t elm; 345 int error; 346 347 /* 348 * Validate the structure and initialize for return. 349 */ 350 if (hist->beg_tid > hist->end_tid) 351 return(EINVAL); 352 if (hist->flags & HAMMER_IOC_HISTORY_ATKEY) { 353 if (hist->key > hist->nxt_key) 354 return(EINVAL); 355 } 356 357 hist->obj_id = ip->obj_id; 358 hist->count = 0; 359 hist->nxt_tid = hist->end_tid; 360 hist->flags &= ~HAMMER_IOC_HISTORY_NEXT_TID; 361 hist->flags &= ~HAMMER_IOC_HISTORY_NEXT_KEY; 362 hist->flags &= ~HAMMER_IOC_HISTORY_EOF; 363 hist->flags &= ~HAMMER_IOC_HISTORY_UNSYNCED; 364 if ((ip->flags & HAMMER_INODE_MODMASK) & ~HAMMER_INODE_ITIMES) 365 hist->flags |= HAMMER_IOC_HISTORY_UNSYNCED; 366 367 /* 368 * Setup the cursor. We can't handle undeletable records 369 * (create_tid of 0) at the moment. A create_tid of 0 has 370 * a special meaning and cannot be specified in the cursor. 371 */ 372 error = hammer_init_cursor_hmp(&cursor, &ip->cache[0], ip->hmp); 373 if (error) { 374 hammer_done_cursor(&cursor); 375 return(error); 376 } 377 378 cursor.key_beg.obj_id = hist->obj_id; 379 cursor.key_beg.create_tid = hist->beg_tid; 380 cursor.key_beg.delete_tid = 0; 381 cursor.key_beg.obj_type = 0; 382 if (cursor.key_beg.create_tid == HAMMER_MIN_TID) 383 cursor.key_beg.create_tid = 1; 384 385 cursor.key_end.obj_id = hist->obj_id; 386 cursor.key_end.create_tid = hist->end_tid; 387 cursor.key_end.delete_tid = 0; 388 cursor.key_end.obj_type = 0; 389 390 cursor.flags |= HAMMER_CURSOR_END_EXCLUSIVE; 391 392 if (hist->flags & HAMMER_IOC_HISTORY_ATKEY) { 393 /* 394 * key-range within the file. For a regular file the 395 * on-disk key represents BASE+LEN, not BASE, so the 396 * first possible record containing the offset 'key' 397 * has an on-disk key of (key + 1). 398 */ 399 cursor.key_beg.key = hist->key; 400 cursor.key_end.key = HAMMER_MAX_KEY; 401 402 switch(ip->ino_rec.base.base.obj_type) { 403 case HAMMER_OBJTYPE_REGFILE: 404 ++cursor.key_beg.key; 405 cursor.key_beg.rec_type = HAMMER_RECTYPE_DATA; 406 break; 407 case HAMMER_OBJTYPE_DIRECTORY: 408 cursor.key_beg.rec_type = HAMMER_RECTYPE_DIRENTRY; 409 break; 410 case HAMMER_OBJTYPE_DBFILE: 411 cursor.key_beg.rec_type = HAMMER_RECTYPE_DB; 412 break; 413 default: 414 error = EINVAL; 415 break; 416 } 417 cursor.key_end.rec_type = cursor.key_beg.rec_type; 418 } else { 419 /* 420 * The inode itself. 421 */ 422 cursor.key_beg.key = 0; 423 cursor.key_end.key = 0; 424 cursor.key_beg.rec_type = HAMMER_RECTYPE_INODE; 425 cursor.key_end.rec_type = HAMMER_RECTYPE_INODE; 426 } 427 428 error = hammer_btree_first(&cursor); 429 while (error == 0) { 430 elm = &cursor.node->ondisk->elms[cursor.index]; 431 432 add_history(ip, hist, elm); 433 if (hist->flags & (HAMMER_IOC_HISTORY_NEXT_TID | 434 HAMMER_IOC_HISTORY_NEXT_KEY | 435 HAMMER_IOC_HISTORY_EOF)) { 436 break; 437 } 438 error = hammer_btree_iterate(&cursor); 439 } 440 if (error == ENOENT) { 441 hist->flags |= HAMMER_IOC_HISTORY_EOF; 442 error = 0; 443 } 444 hammer_done_cursor(&cursor); 445 return(error); 446 } 447 448 /* 449 * Add the scanned element to the ioctl return structure. Some special 450 * casing is required for regular files to accomodate how data ranges are 451 * stored on-disk. 452 */ 453 static void 454 add_history(hammer_inode_t ip, struct hammer_ioc_history *hist, 455 hammer_btree_elm_t elm) 456 { 457 if (elm->base.btype != HAMMER_BTREE_TYPE_RECORD) 458 return; 459 if ((hist->flags & HAMMER_IOC_HISTORY_ATKEY) && 460 ip->ino_rec.base.base.obj_type == HAMMER_OBJTYPE_REGFILE) { 461 /* 462 * Adjust nxt_key 463 */ 464 if (hist->nxt_key > elm->leaf.base.key - elm->leaf.data_len && 465 hist->key < elm->leaf.base.key - elm->leaf.data_len) { 466 hist->nxt_key = elm->leaf.base.key - elm->leaf.data_len; 467 } 468 if (hist->nxt_key > elm->leaf.base.key) 469 hist->nxt_key = elm->leaf.base.key; 470 471 /* 472 * Record is beyond MAXPHYS, there won't be any more records 473 * in the iteration covering the requested offset (key). 474 */ 475 if (elm->leaf.base.key >= MAXPHYS && 476 elm->leaf.base.key - MAXPHYS > hist->key) { 477 hist->flags |= HAMMER_IOC_HISTORY_NEXT_KEY; 478 } 479 480 /* 481 * Data-range of record does not cover the key. 482 */ 483 if (elm->leaf.base.key - elm->leaf.data_len > hist->key) 484 return; 485 486 } else if (hist->flags & HAMMER_IOC_HISTORY_ATKEY) { 487 /* 488 * Adjust nxt_key 489 */ 490 if (hist->nxt_key > elm->leaf.base.key && 491 hist->key < elm->leaf.base.key) { 492 hist->nxt_key = elm->leaf.base.key; 493 } 494 495 /* 496 * Record is beyond the requested key. 497 */ 498 if (elm->leaf.base.key > hist->key) 499 hist->flags |= HAMMER_IOC_HISTORY_NEXT_KEY; 500 } 501 502 /* 503 * Add create_tid if it is in-bounds. 504 */ 505 if ((hist->count == 0 || 506 elm->leaf.base.create_tid != hist->tid_ary[hist->count - 1]) && 507 elm->leaf.base.create_tid >= hist->beg_tid && 508 elm->leaf.base.create_tid < hist->end_tid) { 509 if (hist->count == HAMMER_MAX_HISTORY_ELMS) { 510 hist->nxt_tid = elm->leaf.base.create_tid; 511 hist->flags |= HAMMER_IOC_HISTORY_NEXT_TID; 512 return; 513 } 514 hist->tid_ary[hist->count++] = elm->leaf.base.create_tid; 515 } 516 517 /* 518 * Add delete_tid if it is in-bounds. Note that different portions 519 * of the history may have overlapping data ranges with different 520 * delete_tid's. If this case occurs the delete_tid may match the 521 * create_tid of a following record. XXX 522 * 523 * [ ] 524 * [ ] 525 */ 526 if (elm->leaf.base.delete_tid && 527 elm->leaf.base.delete_tid >= hist->beg_tid && 528 elm->leaf.base.delete_tid < hist->end_tid) { 529 if (hist->count == HAMMER_MAX_HISTORY_ELMS) { 530 hist->nxt_tid = elm->leaf.base.delete_tid; 531 hist->flags |= HAMMER_IOC_HISTORY_NEXT_TID; 532 return; 533 } 534 hist->tid_ary[hist->count++] = elm->leaf.base.delete_tid; 535 } 536 } 537 538