1 /* 2 * Copyright (c) 2007 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * $DragonFly: src/sys/vfs/hammer/hammer_object.c,v 1.30 2008/02/10 09:51:01 dillon Exp $ 35 */ 36 37 #include "hammer.h" 38 39 static int hammer_mem_add(hammer_transaction_t trans, hammer_record_t record); 40 static int hammer_mem_lookup(hammer_cursor_t cursor, hammer_inode_t ip); 41 static int hammer_mem_first(hammer_cursor_t cursor, hammer_inode_t ip); 42 43 /* 44 * Red-black tree support. 45 */ 46 static int 47 hammer_rec_rb_compare(hammer_record_t rec1, hammer_record_t rec2) 48 { 49 if (rec1->rec.base.base.rec_type < rec2->rec.base.base.rec_type) 50 return(-1); 51 if (rec1->rec.base.base.rec_type > rec2->rec.base.base.rec_type) 52 return(1); 53 54 if (rec1->rec.base.base.key < rec2->rec.base.base.key) 55 return(-1); 56 if (rec1->rec.base.base.key > rec2->rec.base.base.key) 57 return(1); 58 59 if (rec1->rec.base.base.create_tid == 0) { 60 if (rec2->rec.base.base.create_tid == 0) 61 return(0); 62 return(1); 63 } 64 if (rec2->rec.base.base.create_tid == 0) 65 return(-1); 66 67 if (rec1->rec.base.base.create_tid < rec2->rec.base.base.create_tid) 68 return(-1); 69 if (rec1->rec.base.base.create_tid > rec2->rec.base.base.create_tid) 70 return(1); 71 return(0); 72 } 73 74 static int 75 hammer_rec_compare(hammer_base_elm_t info, hammer_record_t rec) 76 { 77 if (info->rec_type < rec->rec.base.base.rec_type) 78 return(-3); 79 if (info->rec_type > rec->rec.base.base.rec_type) 80 return(3); 81 82 if (info->key < rec->rec.base.base.key) 83 return(-2); 84 if (info->key > rec->rec.base.base.key) 85 return(2); 86 87 if (info->create_tid == 0) { 88 if (rec->rec.base.base.create_tid == 0) 89 return(0); 90 return(1); 91 } 92 if (rec->rec.base.base.create_tid == 0) 93 return(-1); 94 if (info->create_tid < rec->rec.base.base.create_tid) 95 return(-1); 96 if (info->create_tid > rec->rec.base.base.create_tid) 97 return(1); 98 return(0); 99 } 100 101 /* 102 * RB_SCAN comparison code for hammer_mem_first(). The argument order 103 * is reversed so the comparison result has to be negated. key_beg and 104 * key_end are both range-inclusive. 105 * 106 * The creation timestamp can cause hammer_rec_compare() to return -1 or +1. 107 * These do not stop the scan. 108 * 109 * Localized deletions are not cached in-memory. 110 */ 111 static 112 int 113 hammer_rec_scan_cmp(hammer_record_t rec, void *data) 114 { 115 hammer_cursor_t cursor = data; 116 int r; 117 118 r = hammer_rec_compare(&cursor->key_beg, rec); 119 if (r > 1) 120 return(-1); 121 r = hammer_rec_compare(&cursor->key_end, rec); 122 if (r < -1) 123 return(1); 124 return(0); 125 } 126 127 RB_GENERATE(hammer_rec_rb_tree, hammer_record, rb_node, hammer_rec_rb_compare); 128 RB_GENERATE_XLOOKUP(hammer_rec_rb_tree, INFO, hammer_record, rb_node, 129 hammer_rec_compare, hammer_base_elm_t); 130 131 /* 132 * Allocate a record for the caller to finish filling in. The record is 133 * returned referenced. 134 */ 135 hammer_record_t 136 hammer_alloc_mem_record(hammer_inode_t ip) 137 { 138 hammer_record_t record; 139 140 ++hammer_count_records; 141 record = kmalloc(sizeof(*record), M_HAMMER, M_WAITOK|M_ZERO); 142 record->ip = ip; 143 record->rec.base.base.btype = HAMMER_BTREE_TYPE_RECORD; 144 hammer_ref(&record->lock); 145 return (record); 146 } 147 148 /* 149 * Release a memory record. Records marked for deletion are immediately 150 * removed from the RB-Tree but otherwise left intact until the last ref 151 * goes away. 152 */ 153 void 154 hammer_rel_mem_record(struct hammer_record *record) 155 { 156 hammer_unref(&record->lock); 157 158 if (record->flags & HAMMER_RECF_DELETED) { 159 if (record->flags & HAMMER_RECF_ONRBTREE) { 160 RB_REMOVE(hammer_rec_rb_tree, &record->ip->rec_tree, 161 record); 162 record->flags &= ~HAMMER_RECF_ONRBTREE; 163 } 164 if (record->lock.refs == 0) { 165 if (record->flags & HAMMER_RECF_ALLOCDATA) { 166 --hammer_count_record_datas; 167 kfree(record->data, M_HAMMER); 168 record->flags &= ~HAMMER_RECF_ALLOCDATA; 169 } 170 record->data = NULL; 171 --hammer_count_records; 172 kfree(record, M_HAMMER); 173 return; 174 } 175 } 176 177 /* 178 * If someone wanted the record wake them up. 179 */ 180 if (record->flags & HAMMER_RECF_WANTED) { 181 record->flags &= ~HAMMER_RECF_WANTED; 182 wakeup(record); 183 } 184 } 185 186 /* 187 * Lookup an in-memory record given the key specified in the cursor. Works 188 * just like hammer_btree_lookup() but operates on an inode's in-memory 189 * record list. 190 * 191 * The lookup must fail if the record is marked for deferred deletion. 192 */ 193 static 194 int 195 hammer_mem_lookup(hammer_cursor_t cursor, hammer_inode_t ip) 196 { 197 int error; 198 199 if (cursor->iprec) { 200 hammer_rel_mem_record(cursor->iprec); 201 cursor->iprec = NULL; 202 } 203 if (cursor->ip) { 204 hammer_rec_rb_tree_scan_info_done(&cursor->scan, 205 &cursor->ip->rec_tree); 206 } 207 cursor->ip = ip; 208 hammer_rec_rb_tree_scan_info_link(&cursor->scan, &ip->rec_tree); 209 cursor->scan.node = NULL; 210 cursor->iprec = hammer_rec_rb_tree_RB_LOOKUP_INFO( 211 &ip->rec_tree, &cursor->key_beg); 212 if (cursor->iprec == NULL) { 213 error = ENOENT; 214 } else { 215 hammer_ref(&cursor->iprec->lock); 216 error = 0; 217 } 218 return(error); 219 } 220 221 /* 222 * hammer_mem_first() - locate the first in-memory record matching the 223 * cursor. 224 * 225 * The RB_SCAN function we use is designed as a callback. We terminate it 226 * (return -1) as soon as we get a match. 227 */ 228 static 229 int 230 hammer_rec_scan_callback(hammer_record_t rec, void *data) 231 { 232 hammer_cursor_t cursor = data; 233 234 /* 235 * We terminate on success, so this should be NULL on entry. 236 */ 237 KKASSERT(cursor->iprec == NULL); 238 239 /* 240 * Skip if the record was marked deleted 241 */ 242 if (rec->flags & HAMMER_RECF_DELETED) 243 return(0); 244 245 /* 246 * Skip if not visible due to our as-of TID 247 */ 248 if (cursor->flags & HAMMER_CURSOR_ASOF) { 249 if (cursor->asof < rec->rec.base.base.create_tid) 250 return(0); 251 if (rec->rec.base.base.delete_tid && 252 cursor->asof >= rec->rec.base.base.delete_tid) { 253 return(0); 254 } 255 } 256 257 /* 258 * Block if currently being synchronized to disk, otherwise we 259 * may get a duplicate. Wakeup the syncer if it's stuck on 260 * the record. 261 */ 262 hammer_ref(&rec->lock); 263 ++rec->blocked; 264 while (rec->flags & HAMMER_RECF_SYNCING) { 265 rec->flags |= HAMMER_RECF_WANTED; 266 tsleep(rec, 0, "hmrrc2", 0); 267 } 268 --rec->blocked; 269 270 /* 271 * The record may have been deleted while we were blocked. 272 */ 273 if (rec->flags & HAMMER_RECF_DELETED) { 274 hammer_rel_mem_record(cursor->iprec); 275 return(0); 276 } 277 278 /* 279 * Set the matching record and stop the scan. 280 */ 281 cursor->iprec = rec; 282 return(-1); 283 } 284 285 static 286 int 287 hammer_mem_first(hammer_cursor_t cursor, hammer_inode_t ip) 288 { 289 if (cursor->iprec) { 290 hammer_rel_mem_record(cursor->iprec); 291 cursor->iprec = NULL; 292 } 293 if (cursor->ip) { 294 hammer_rec_rb_tree_scan_info_done(&cursor->scan, 295 &cursor->ip->rec_tree); 296 } 297 cursor->ip = ip; 298 hammer_rec_rb_tree_scan_info_link(&cursor->scan, &ip->rec_tree); 299 300 cursor->scan.node = NULL; 301 hammer_rec_rb_tree_RB_SCAN(&ip->rec_tree, hammer_rec_scan_cmp, 302 hammer_rec_scan_callback, cursor); 303 304 /* 305 * Adjust scan.node and keep it linked into the RB-tree so we can 306 * hold the cursor through third party modifications of the RB-tree. 307 */ 308 if (cursor->iprec) { 309 cursor->scan.node = hammer_rec_rb_tree_RB_NEXT(cursor->iprec); 310 return(0); 311 } 312 return(ENOENT); 313 } 314 315 void 316 hammer_mem_done(hammer_cursor_t cursor) 317 { 318 if (cursor->ip) { 319 hammer_rec_rb_tree_scan_info_done(&cursor->scan, 320 &cursor->ip->rec_tree); 321 cursor->ip = NULL; 322 } 323 if (cursor->iprec) { 324 hammer_rel_mem_record(cursor->iprec); 325 cursor->iprec = NULL; 326 } 327 } 328 329 /************************************************************************ 330 * HAMMER IN-MEMORY RECORD FUNCTIONS * 331 ************************************************************************ 332 * 333 * These functions manipulate in-memory records. Such records typically 334 * exist prior to being committed to disk or indexed via the on-disk B-Tree. 335 */ 336 337 /* 338 * Add a directory entry (dip,ncp) which references inode (ip). 339 * 340 * Note that the low 32 bits of the namekey are set temporarily to create 341 * a unique in-memory record, and may be modified a second time when the 342 * record is synchronized to disk. In particular, the low 32 bits cannot be 343 * all 0's when synching to disk, which is not handled here. 344 */ 345 int 346 hammer_ip_add_directory(struct hammer_transaction *trans, 347 struct hammer_inode *dip, struct namecache *ncp, 348 struct hammer_inode *ip) 349 { 350 hammer_record_t record; 351 int error; 352 int bytes; 353 354 record = hammer_alloc_mem_record(dip); 355 356 bytes = ncp->nc_nlen; /* NOTE: terminating \0 is NOT included */ 357 if (++trans->hmp->namekey_iterator == 0) 358 ++trans->hmp->namekey_iterator; 359 360 record->rec.entry.base.base.obj_id = dip->obj_id; 361 record->rec.entry.base.base.key = 362 hammer_directory_namekey(ncp->nc_name, bytes); 363 record->rec.entry.base.base.key += trans->hmp->namekey_iterator; 364 record->rec.entry.base.base.create_tid = trans->tid; 365 record->rec.entry.base.base.rec_type = HAMMER_RECTYPE_DIRENTRY; 366 record->rec.entry.base.base.obj_type = ip->ino_rec.base.base.obj_type; 367 record->rec.entry.obj_id = ip->obj_id; 368 record->data = (void *)ncp->nc_name; 369 record->rec.entry.base.data_len = bytes; 370 ++ip->ino_rec.ino_nlinks; 371 hammer_modify_inode(trans, ip, HAMMER_INODE_RDIRTY); 372 /* NOTE: copies record->data */ 373 error = hammer_mem_add(trans, record); 374 return(error); 375 } 376 377 /* 378 * Delete the directory entry and update the inode link count. The 379 * cursor must be seeked to the directory entry record being deleted. 380 * 381 * NOTE: HAMMER_CURSOR_DELETE may not have been set. XXX remove flag. 382 * 383 * This function can return EDEADLK requiring the caller to terminate 384 * the cursor and retry. 385 */ 386 int 387 hammer_ip_del_directory(struct hammer_transaction *trans, 388 hammer_cursor_t cursor, struct hammer_inode *dip, 389 struct hammer_inode *ip) 390 { 391 int error; 392 393 error = hammer_ip_delete_record(cursor, trans->tid); 394 395 /* 396 * One less link. The file may still be open in the OS even after 397 * all links have gone away so we only try to sync if the OS has 398 * no references and nlinks falls to 0. 399 * 400 * We have to terminate the cursor before syncing the inode to 401 * avoid deadlocking against ourselves. 402 */ 403 if (error == 0) { 404 --ip->ino_rec.ino_nlinks; 405 hammer_modify_inode(trans, ip, HAMMER_INODE_RDIRTY); 406 if (ip->ino_rec.ino_nlinks == 0 && 407 (ip->vp == NULL || (ip->vp->v_flag & VINACTIVE))) { 408 hammer_done_cursor(cursor); 409 hammer_sync_inode(ip, MNT_NOWAIT, 1); 410 } 411 412 } 413 return(error); 414 } 415 416 /* 417 * Add a record to an inode. 418 * 419 * The caller must allocate the record with hammer_alloc_mem_record(ip) and 420 * initialize the following additional fields: 421 * 422 * record->rec.entry.base.base.key 423 * record->rec.entry.base.base.rec_type 424 * record->rec.entry.base.base.data_len 425 * record->data (a copy will be kmalloc'd if it cannot be embedded) 426 */ 427 int 428 hammer_ip_add_record(struct hammer_transaction *trans, hammer_record_t record) 429 { 430 hammer_inode_t ip = record->ip; 431 int error; 432 433 record->rec.base.base.obj_id = ip->obj_id; 434 record->rec.base.base.create_tid = trans->tid; 435 record->rec.base.base.obj_type = ip->ino_rec.base.base.obj_type; 436 437 hammer_modify_inode(trans, ip, HAMMER_INODE_RDIRTY); 438 /* NOTE: copies record->data */ 439 error = hammer_mem_add(trans, record); 440 return(error); 441 } 442 443 /* 444 * Sync data from a buffer cache buffer (typically) to the filesystem. This 445 * is called via the strategy called from a cached data source. This code 446 * is responsible for actually writing a data record out to the disk. 447 * 448 * This can only occur non-historically (i.e. 'current' data only). 449 */ 450 int 451 hammer_ip_sync_data(hammer_transaction_t trans, hammer_inode_t ip, 452 int64_t offset, void *data, int bytes) 453 { 454 struct hammer_cursor cursor; 455 hammer_record_ondisk_t rec; 456 union hammer_btree_elm elm; 457 hammer_off_t rec_offset; 458 void *bdata; 459 int error; 460 461 KKASSERT((offset & HAMMER_BUFMASK) == 0); 462 KKASSERT((bytes & HAMMER_BUFMASK) == 0); 463 retry: 464 error = hammer_init_cursor_hmp(&cursor, &ip->cache[0], ip->hmp); 465 if (error) 466 return(error); 467 cursor.key_beg.obj_id = ip->obj_id; 468 cursor.key_beg.key = offset + bytes; 469 cursor.key_beg.create_tid = trans->tid; 470 cursor.key_beg.delete_tid = 0; 471 cursor.key_beg.rec_type = HAMMER_RECTYPE_DATA; 472 cursor.asof = trans->tid; 473 cursor.flags |= HAMMER_CURSOR_INSERT; 474 475 /* 476 * Issue a lookup to position the cursor. 477 */ 478 error = hammer_btree_lookup(&cursor); 479 if (error == 0) { 480 kprintf("hammer_ip_sync_data: duplicate data at (%lld,%d)\n", 481 offset, bytes); 482 hammer_print_btree_elm(&cursor.node->ondisk->elms[cursor.index], 483 HAMMER_BTREE_TYPE_LEAF, cursor.index); 484 error = EIO; 485 } 486 if (error != ENOENT) 487 goto done; 488 489 /* 490 * Allocate record and data space. HAMMER_RECTYPE_DATA records 491 * can cross buffer boundaries so we may have to split our bcopy. 492 */ 493 rec = hammer_alloc_record(ip->hmp, &rec_offset, HAMMER_RECTYPE_DATA, 494 &cursor.record_buffer, 495 bytes, &bdata, 496 &cursor.data_buffer, &error); 497 if (rec == NULL) 498 goto done; 499 if (hammer_debug_general & 0x1000) 500 kprintf("OOB RECOR2 DATA REC %016llx DATA %016llx LEN=%d\n", rec_offset, rec->base.data_off, rec->base.data_len); 501 502 /* 503 * Fill everything in and insert our B-Tree node. 504 * 505 * NOTE: hammer_alloc_record() has already marked the related 506 * buffers as modified. If we do it again we will generate 507 * unnecessary undo elements. 508 */ 509 rec->base.base.btype = HAMMER_BTREE_TYPE_RECORD; 510 rec->base.base.obj_id = ip->obj_id; 511 rec->base.base.key = offset + bytes; 512 rec->base.base.create_tid = trans->tid; 513 rec->base.base.delete_tid = 0; 514 rec->base.base.rec_type = HAMMER_RECTYPE_DATA; 515 rec->base.data_crc = crc32(data, bytes); 516 KKASSERT(rec->base.data_len == bytes); 517 518 bcopy(data, bdata, bytes); 519 520 elm.leaf.base = rec->base.base; 521 elm.leaf.rec_offset = rec_offset; 522 elm.leaf.data_offset = rec->base.data_off; 523 elm.leaf.data_len = bytes; 524 elm.leaf.data_crc = rec->base.data_crc; 525 526 /* 527 * Data records can wind up on-disk before the inode itself is 528 * on-disk. One must assume data records may be on-disk if either 529 * HAMMER_INODE_DONDISK or HAMMER_INODE_ONDISK is set 530 */ 531 ip->flags |= HAMMER_INODE_DONDISK; 532 533 error = hammer_btree_insert(&cursor, &elm); 534 if (error == 0) 535 goto done; 536 537 hammer_blockmap_free(ip->hmp, rec_offset, HAMMER_RECORD_SIZE); 538 done: 539 hammer_done_cursor(&cursor); 540 if (error == EDEADLK) 541 goto retry; 542 return(error); 543 } 544 545 /* 546 * Sync an in-memory record to the disk. This is typically called via fsync 547 * from a cached record source. This code is responsible for actually 548 * writing a record out to the disk. 549 */ 550 int 551 hammer_ip_sync_record(hammer_record_t record) 552 { 553 struct hammer_cursor cursor; 554 hammer_record_ondisk_t rec; 555 hammer_mount_t hmp; 556 union hammer_btree_elm elm; 557 hammer_off_t rec_offset; 558 void *bdata; 559 int error; 560 561 hmp = record->ip->hmp; 562 retry: 563 /* 564 * If the record has been deleted or is being synchronized, stop. 565 * Interlock with the syncing flag. 566 */ 567 if (record->flags & (HAMMER_RECF_DELETED | HAMMER_RECF_SYNCING)) 568 return(0); 569 record->flags |= HAMMER_RECF_SYNCING; 570 571 /* 572 * If someone other then us is referencing the record and not 573 * blocking waiting for us, we have to wait until they finish. 574 * 575 * It is possible the record got destroyed while we were blocked. 576 */ 577 if (record->lock.refs > record->blocked + 1) { 578 while (record->lock.refs > record->blocked + 1) { 579 record->flags |= HAMMER_RECF_WANTED; 580 tsleep(record, 0, "hmrrc1", 0); 581 } 582 if (record->flags & HAMMER_RECF_DELETED) 583 return(0); 584 } 585 586 /* 587 * Get a cursor 588 */ 589 error = hammer_init_cursor_hmp(&cursor, &record->ip->cache[0], hmp); 590 if (error) 591 return(error); 592 cursor.key_beg = record->rec.base.base; 593 cursor.flags |= HAMMER_CURSOR_INSERT; 594 595 /* 596 * Issue a lookup to position the cursor and locate the cluster. The 597 * target key should not exist. If we are creating a directory entry 598 * we may have to iterate the low 32 bits of the key to find an unused 599 * key. 600 */ 601 for (;;) { 602 error = hammer_btree_lookup(&cursor); 603 if (error) 604 break; 605 if (record->rec.base.base.rec_type != HAMMER_RECTYPE_DIRENTRY) { 606 kprintf("hammer_ip_sync_record: duplicate rec " 607 "at (%016llx)\n", record->rec.base.base.key); 608 Debugger("duplicate record1"); 609 error = EIO; 610 break; 611 } 612 if (++hmp->namekey_iterator == 0) 613 ++hmp->namekey_iterator; 614 record->rec.base.base.key &= ~(0xFFFFFFFFLL); 615 record->rec.base.base.key |= hmp->namekey_iterator; 616 cursor.key_beg.key = record->rec.base.base.key; 617 } 618 if (error != ENOENT) 619 goto done; 620 621 /* 622 * Mark the record as undergoing synchronization. Our cursor is 623 * holding a locked B-Tree node for the insertion which interlocks 624 * anyone trying to access this record. 625 * 626 * XXX There is still a race present related to iterations. An 627 * iteration may process the record, a sync may occur, and then 628 * later process the B-Tree element for the same record. 629 * 630 * We do not try to synchronize a deleted record. 631 */ 632 if (record->flags & HAMMER_RECF_DELETED) { 633 error = 0; 634 goto done; 635 } 636 637 /* 638 * Allocate the record and data. The result buffers will be 639 * marked as being modified and further calls to 640 * hammer_modify_buffer() will result in unneeded UNDO records. 641 * 642 * Support zero-fill records (data == NULL and data_len != 0) 643 */ 644 if (record->data == NULL) { 645 rec = hammer_alloc_record(hmp, &rec_offset, 646 record->rec.base.base.rec_type, 647 &cursor.record_buffer, 648 0, &bdata, 649 NULL, &error); 650 if (hammer_debug_general & 0x1000) 651 kprintf("NULL RECORD DATA\n"); 652 } else if (record->flags & HAMMER_RECF_INBAND) { 653 rec = hammer_alloc_record(hmp, &rec_offset, 654 record->rec.base.base.rec_type, 655 &cursor.record_buffer, 656 record->rec.base.data_len, &bdata, 657 NULL, &error); 658 if (hammer_debug_general & 0x1000) 659 kprintf("INBAND RECORD DATA %016llx DATA %016llx LEN=%d\n", rec_offset, rec->base.data_off, record->rec.base.data_len); 660 } else { 661 rec = hammer_alloc_record(hmp, &rec_offset, 662 record->rec.base.base.rec_type, 663 &cursor.record_buffer, 664 record->rec.base.data_len, &bdata, 665 &cursor.data_buffer, &error); 666 if (hammer_debug_general & 0x1000) 667 kprintf("OOB RECORD DATA REC %016llx DATA %016llx LEN=%d\n", rec_offset, rec->base.data_off, record->rec.base.data_len); 668 } 669 670 if (rec == NULL) 671 goto done; 672 673 /* 674 * Fill in the remaining fields and insert our B-Tree node. 675 */ 676 rec->base.base = record->rec.base.base; 677 bcopy(&record->rec.base + 1, &rec->base + 1, 678 HAMMER_RECORD_SIZE - sizeof(record->rec.base)); 679 680 /* 681 * Copy the data and deal with zero-fill support. 682 */ 683 if (record->data) { 684 rec->base.data_crc = crc32(record->data, rec->base.data_len); 685 bcopy(record->data, bdata, rec->base.data_len); 686 } else { 687 rec->base.data_len = record->rec.base.data_len; 688 } 689 690 elm.leaf.base = record->rec.base.base; 691 elm.leaf.rec_offset = rec_offset; 692 elm.leaf.data_offset = rec->base.data_off; 693 elm.leaf.data_len = rec->base.data_len; 694 elm.leaf.data_crc = rec->base.data_crc; 695 696 error = hammer_btree_insert(&cursor, &elm); 697 698 /* 699 * Clean up on success, or fall through on error. 700 */ 701 if (error == 0) { 702 record->flags |= HAMMER_RECF_DELETED; 703 goto done; 704 } 705 706 /* 707 * Try to unwind the fifo allocation 708 */ 709 hammer_blockmap_free(hmp, rec_offset, HAMMER_RECORD_SIZE); 710 done: 711 record->flags &= ~HAMMER_RECF_SYNCING; 712 hammer_done_cursor(&cursor); 713 if (error == EDEADLK) 714 goto retry; 715 return(error); 716 } 717 718 /* 719 * Add the record to the inode's rec_tree. The low 32 bits of a directory 720 * entry's key is used to deal with hash collisions in the upper 32 bits. 721 * A unique 64 bit key is generated in-memory and may be regenerated a 722 * second time when the directory record is flushed to the on-disk B-Tree. 723 * 724 * A referenced record is passed to this function. This function 725 * eats the reference. If an error occurs the record will be deleted. 726 * 727 * A copy of the temporary record->data pointer provided by the caller 728 * will be made. 729 */ 730 static 731 int 732 hammer_mem_add(struct hammer_transaction *trans, hammer_record_t record) 733 { 734 void *data; 735 int bytes; 736 int reclen; 737 738 /* 739 * Make a private copy of record->data 740 */ 741 if (record->data) { 742 /* 743 * Try to embed the data in extra space in the record 744 * union, otherwise allocate a copy. 745 */ 746 bytes = record->rec.base.data_len; 747 switch(record->rec.base.base.rec_type) { 748 case HAMMER_RECTYPE_DIRENTRY: 749 reclen = offsetof(struct hammer_entry_record, name[0]); 750 break; 751 case HAMMER_RECTYPE_DATA: 752 reclen = offsetof(struct hammer_data_record, data[0]); 753 break; 754 default: 755 reclen = sizeof(record->rec); 756 break; 757 } 758 if (reclen + bytes <= HAMMER_RECORD_SIZE) { 759 bcopy(record->data, (char *)&record->rec + reclen, 760 bytes); 761 record->data = (void *)((char *)&record->rec + reclen); 762 record->flags |= HAMMER_RECF_INBAND; 763 } else { 764 ++hammer_count_record_datas; 765 data = kmalloc(bytes, M_HAMMER, M_WAITOK); 766 record->flags |= HAMMER_RECF_ALLOCDATA; 767 bcopy(record->data, data, bytes); 768 record->data = data; 769 } 770 } 771 772 /* 773 * Insert into the RB tree, find an unused iterator if this is 774 * a directory entry. 775 */ 776 while (RB_INSERT(hammer_rec_rb_tree, &record->ip->rec_tree, record)) { 777 if (record->rec.base.base.rec_type != HAMMER_RECTYPE_DIRENTRY){ 778 record->flags |= HAMMER_RECF_DELETED; 779 hammer_rel_mem_record(record); 780 return (EEXIST); 781 } 782 if (++trans->hmp->namekey_iterator == 0) 783 ++trans->hmp->namekey_iterator; 784 record->rec.base.base.key &= ~(0xFFFFFFFFLL); 785 record->rec.base.base.key |= trans->hmp->namekey_iterator; 786 } 787 record->flags |= HAMMER_RECF_ONRBTREE; 788 hammer_modify_inode(trans, record->ip, HAMMER_INODE_XDIRTY); 789 hammer_rel_mem_record(record); 790 return(0); 791 } 792 793 /************************************************************************ 794 * HAMMER INODE MERGED-RECORD FUNCTIONS * 795 ************************************************************************ 796 * 797 * These functions augment the B-Tree scanning functions in hammer_btree.c 798 * by merging in-memory records with on-disk records. 799 */ 800 801 /* 802 * Locate a particular record either in-memory or on-disk. 803 * 804 * NOTE: This is basically a standalone routine, hammer_ip_next() may 805 * NOT be called to iterate results. 806 */ 807 int 808 hammer_ip_lookup(hammer_cursor_t cursor, struct hammer_inode *ip) 809 { 810 int error; 811 812 /* 813 * If the element is in-memory return it without searching the 814 * on-disk B-Tree 815 */ 816 error = hammer_mem_lookup(cursor, ip); 817 if (error == 0) { 818 cursor->record = &cursor->iprec->rec; 819 return(error); 820 } 821 if (error != ENOENT) 822 return(error); 823 824 /* 825 * If the inode has on-disk components search the on-disk B-Tree. 826 */ 827 if ((ip->flags & (HAMMER_INODE_ONDISK|HAMMER_INODE_DONDISK)) == 0) 828 return(error); 829 error = hammer_btree_lookup(cursor); 830 if (error == 0) 831 error = hammer_btree_extract(cursor, HAMMER_CURSOR_GET_RECORD); 832 return(error); 833 } 834 835 /* 836 * Locate the first record within the cursor's key_beg/key_end range, 837 * restricted to a particular inode. 0 is returned on success, ENOENT 838 * if no records matched the requested range, or some other error. 839 * 840 * When 0 is returned hammer_ip_next() may be used to iterate additional 841 * records within the requested range. 842 * 843 * This function can return EDEADLK, requiring the caller to terminate 844 * the cursor and try again. 845 */ 846 int 847 hammer_ip_first(hammer_cursor_t cursor, struct hammer_inode *ip) 848 { 849 int error; 850 851 /* 852 * Clean up fields and setup for merged scan 853 */ 854 cursor->flags &= ~HAMMER_CURSOR_DELBTREE; 855 cursor->flags |= HAMMER_CURSOR_ATEDISK | HAMMER_CURSOR_ATEMEM; 856 cursor->flags |= HAMMER_CURSOR_DISKEOF | HAMMER_CURSOR_MEMEOF; 857 if (cursor->iprec) { 858 hammer_rel_mem_record(cursor->iprec); 859 cursor->iprec = NULL; 860 } 861 862 /* 863 * Search the on-disk B-Tree. hammer_btree_lookup() only does an 864 * exact lookup so if we get ENOENT we have to call the iterate 865 * function to validate the first record after the begin key. 866 * 867 * The ATEDISK flag is used by hammer_btree_iterate to determine 868 * whether it must index forwards or not. It is also used here 869 * to select the next record from in-memory or on-disk. 870 * 871 * EDEADLK can only occur if the lookup hit an empty internal 872 * element and couldn't delete it. Since this could only occur 873 * in-range, we can just iterate from the failure point. 874 */ 875 if (ip->flags & (HAMMER_INODE_ONDISK|HAMMER_INODE_DONDISK)) { 876 error = hammer_btree_lookup(cursor); 877 if (error == ENOENT || error == EDEADLK) { 878 cursor->flags &= ~HAMMER_CURSOR_ATEDISK; 879 error = hammer_btree_iterate(cursor); 880 } 881 if (error && error != ENOENT) 882 return(error); 883 if (error == 0) { 884 cursor->flags &= ~HAMMER_CURSOR_DISKEOF; 885 cursor->flags &= ~HAMMER_CURSOR_ATEDISK; 886 } else { 887 cursor->flags |= HAMMER_CURSOR_ATEDISK; 888 } 889 } 890 891 /* 892 * Search the in-memory record list (Red-Black tree). Unlike the 893 * B-Tree search, mem_first checks for records in the range. 894 */ 895 error = hammer_mem_first(cursor, ip); 896 if (error && error != ENOENT) 897 return(error); 898 if (error == 0) { 899 cursor->flags &= ~HAMMER_CURSOR_MEMEOF; 900 cursor->flags &= ~HAMMER_CURSOR_ATEMEM; 901 } 902 903 /* 904 * This will return the first matching record. 905 */ 906 return(hammer_ip_next(cursor)); 907 } 908 909 /* 910 * Retrieve the next record in a merged iteration within the bounds of the 911 * cursor. This call may be made multiple times after the cursor has been 912 * initially searched with hammer_ip_first(). 913 * 914 * 0 is returned on success, ENOENT if no further records match the 915 * requested range, or some other error code is returned. 916 */ 917 int 918 hammer_ip_next(hammer_cursor_t cursor) 919 { 920 hammer_btree_elm_t elm; 921 hammer_record_t rec; 922 int error; 923 int r; 924 925 /* 926 * Load the current on-disk and in-memory record. If we ate any 927 * records we have to get the next one. 928 * 929 * If we deleted the last on-disk record we had scanned ATEDISK will 930 * be clear and DELBTREE will be set, forcing a call to iterate. The 931 * fact that ATEDISK is clear causes iterate to re-test the 'current' 932 * element. If ATEDISK is set, iterate will skip the 'current' 933 * element. 934 * 935 * Get the next on-disk record 936 */ 937 if (cursor->flags & (HAMMER_CURSOR_ATEDISK|HAMMER_CURSOR_DELBTREE)) { 938 if ((cursor->flags & HAMMER_CURSOR_DISKEOF) == 0) { 939 error = hammer_btree_iterate(cursor); 940 cursor->flags &= ~HAMMER_CURSOR_DELBTREE; 941 if (error == 0) 942 cursor->flags &= ~HAMMER_CURSOR_ATEDISK; 943 else 944 cursor->flags |= HAMMER_CURSOR_DISKEOF | 945 HAMMER_CURSOR_ATEDISK; 946 } 947 } 948 949 /* 950 * Get the next in-memory record. The record can be ripped out 951 * of the RB tree so we maintain a scan_info structure to track 952 * the next node. 953 * 954 * hammer_rec_scan_cmp: Is the record still in our general range, 955 * (non-inclusive of snapshot exclusions)? 956 * hammer_rec_scan_callback: Is the record in our snapshot? 957 */ 958 if (cursor->flags & HAMMER_CURSOR_ATEMEM) { 959 if ((cursor->flags & HAMMER_CURSOR_MEMEOF) == 0) { 960 if (cursor->iprec) { 961 hammer_rel_mem_record(cursor->iprec); 962 cursor->iprec = NULL; 963 } 964 rec = cursor->scan.node; /* next node */ 965 while (rec) { 966 if (hammer_rec_scan_cmp(rec, cursor) != 0) 967 break; 968 if (hammer_rec_scan_callback(rec, cursor) != 0) 969 break; 970 rec = hammer_rec_rb_tree_RB_NEXT(rec); 971 } 972 if (cursor->iprec) { 973 KKASSERT(cursor->iprec == rec); 974 cursor->flags &= ~HAMMER_CURSOR_ATEMEM; 975 cursor->scan.node = 976 hammer_rec_rb_tree_RB_NEXT(rec); 977 } else { 978 cursor->flags |= HAMMER_CURSOR_MEMEOF; 979 } 980 } 981 } 982 983 /* 984 * Extract either the disk or memory record depending on their 985 * relative position. 986 */ 987 error = 0; 988 switch(cursor->flags & (HAMMER_CURSOR_ATEDISK | HAMMER_CURSOR_ATEMEM)) { 989 case 0: 990 /* 991 * Both entries valid 992 */ 993 elm = &cursor->node->ondisk->elms[cursor->index]; 994 r = hammer_btree_cmp(&elm->base, &cursor->iprec->rec.base.base); 995 if (r < 0) { 996 error = hammer_btree_extract(cursor, 997 HAMMER_CURSOR_GET_RECORD); 998 cursor->flags |= HAMMER_CURSOR_ATEDISK; 999 break; 1000 } 1001 /* fall through to the memory entry */ 1002 case HAMMER_CURSOR_ATEDISK: 1003 /* 1004 * Only the memory entry is valid 1005 */ 1006 cursor->record = &cursor->iprec->rec; 1007 cursor->flags |= HAMMER_CURSOR_ATEMEM; 1008 break; 1009 case HAMMER_CURSOR_ATEMEM: 1010 /* 1011 * Only the disk entry is valid 1012 */ 1013 error = hammer_btree_extract(cursor, HAMMER_CURSOR_GET_RECORD); 1014 cursor->flags |= HAMMER_CURSOR_ATEDISK; 1015 break; 1016 default: 1017 /* 1018 * Neither entry is valid 1019 * 1020 * XXX error not set properly 1021 */ 1022 cursor->record = NULL; 1023 error = ENOENT; 1024 break; 1025 } 1026 return(error); 1027 } 1028 1029 /* 1030 * Resolve the cursor->data pointer for the current cursor position in 1031 * a merged iteration. 1032 */ 1033 int 1034 hammer_ip_resolve_data(hammer_cursor_t cursor) 1035 { 1036 int error; 1037 1038 if (cursor->iprec && cursor->record == &cursor->iprec->rec) { 1039 cursor->data = cursor->iprec->data; 1040 error = 0; 1041 } else { 1042 error = hammer_btree_extract(cursor, HAMMER_CURSOR_GET_DATA); 1043 } 1044 return(error); 1045 } 1046 1047 int 1048 hammer_ip_resolve_record_and_data(hammer_cursor_t cursor) 1049 { 1050 int error; 1051 1052 if (cursor->iprec && cursor->record == &cursor->iprec->rec) { 1053 cursor->data = cursor->iprec->data; 1054 error = 0; 1055 } else { 1056 error = hammer_btree_extract(cursor, HAMMER_CURSOR_GET_DATA | 1057 HAMMER_CURSOR_GET_RECORD); 1058 } 1059 return(error); 1060 } 1061 1062 /* 1063 * Delete all records within the specified range for inode ip. 1064 * 1065 * NOTE: An unaligned range will cause new records to be added to cover 1066 * the edge cases. (XXX not implemented yet). 1067 * 1068 * NOTE: ran_end is inclusive (e.g. 0,1023 instead of 0,1024). 1069 * 1070 * NOTE: Record keys for regular file data have to be special-cased since 1071 * they indicate the end of the range (key = base + bytes). 1072 */ 1073 int 1074 hammer_ip_delete_range(hammer_transaction_t trans, hammer_inode_t ip, 1075 int64_t ran_beg, int64_t ran_end) 1076 { 1077 struct hammer_cursor cursor; 1078 hammer_record_ondisk_t rec; 1079 hammer_base_elm_t base; 1080 int error; 1081 int64_t off; 1082 1083 retry: 1084 hammer_init_cursor_hmp(&cursor, &ip->cache[0], ip->hmp); 1085 1086 cursor.key_beg.obj_id = ip->obj_id; 1087 cursor.key_beg.create_tid = 0; 1088 cursor.key_beg.delete_tid = 0; 1089 cursor.key_beg.obj_type = 0; 1090 cursor.asof = ip->obj_asof; 1091 cursor.flags |= HAMMER_CURSOR_ASOF; 1092 1093 cursor.key_end = cursor.key_beg; 1094 if (ip->ino_rec.base.base.obj_type == HAMMER_OBJTYPE_DBFILE) { 1095 cursor.key_beg.key = ran_beg; 1096 cursor.key_beg.rec_type = HAMMER_RECTYPE_DB; 1097 cursor.key_end.rec_type = HAMMER_RECTYPE_DB; 1098 cursor.key_end.key = ran_end; 1099 } else { 1100 /* 1101 * The key in the B-Tree is (base+bytes), so the first possible 1102 * matching key is ran_beg + 1. 1103 */ 1104 int64_t tmp64; 1105 1106 cursor.key_beg.key = ran_beg + 1; 1107 cursor.key_beg.rec_type = HAMMER_RECTYPE_DATA; 1108 cursor.key_end.rec_type = HAMMER_RECTYPE_DATA; 1109 1110 tmp64 = ran_end + MAXPHYS + 1; /* work around GCC-4 bug */ 1111 if (tmp64 < ran_end) 1112 cursor.key_end.key = 0x7FFFFFFFFFFFFFFFLL; 1113 else 1114 cursor.key_end.key = ran_end + MAXPHYS + 1; 1115 } 1116 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE; 1117 1118 error = hammer_ip_first(&cursor, ip); 1119 1120 /* 1121 * Iterate through matching records and mark them as deleted. 1122 */ 1123 while (error == 0) { 1124 rec = cursor.record; 1125 base = &rec->base.base; 1126 1127 KKASSERT(base->delete_tid == 0); 1128 1129 /* 1130 * There may be overlap cases for regular file data. Also 1131 * remember the key for a regular file record is the offset 1132 * of the last byte of the record (base + len - 1), NOT the 1133 * base offset. 1134 */ 1135 #if 0 1136 kprintf("delete_range rec_type %02x\n", base->rec_type); 1137 #endif 1138 if (base->rec_type == HAMMER_RECTYPE_DATA) { 1139 #if 0 1140 kprintf("delete_range loop key %016llx\n", 1141 base->key - rec->base.data_len); 1142 #endif 1143 off = base->key - rec->base.data_len; 1144 /* 1145 * Check the left edge case. We currently do not 1146 * split existing records. 1147 */ 1148 if (off < ran_beg) { 1149 panic("hammer left edge case %016llx %d\n", 1150 base->key, rec->base.data_len); 1151 } 1152 1153 /* 1154 * Check the right edge case. Note that the 1155 * record can be completely out of bounds, which 1156 * terminates the search. 1157 * 1158 * base->key is exclusive of the right edge while 1159 * ran_end is inclusive of the right edge. The 1160 * (key - data_len) left boundary is inclusive. 1161 * 1162 * XXX theory-check this test at some point, are 1163 * we missing a + 1 somewhere? Note that ran_end 1164 * could overflow. 1165 */ 1166 if (base->key - 1 > ran_end) { 1167 if (base->key - rec->base.data_len > ran_end) 1168 break; 1169 panic("hammer right edge case\n"); 1170 } 1171 } 1172 1173 /* 1174 * Mark the record and B-Tree entry as deleted. This will 1175 * also physically delete the B-Tree entry, record, and 1176 * data if the retention policy dictates. The function 1177 * will set HAMMER_CURSOR_DELBTREE which hammer_ip_next() 1178 * uses to perform a fixup. 1179 */ 1180 error = hammer_ip_delete_record(&cursor, trans->tid); 1181 if (error) 1182 break; 1183 error = hammer_ip_next(&cursor); 1184 } 1185 hammer_done_cursor(&cursor); 1186 if (error == EDEADLK) 1187 goto retry; 1188 if (error == ENOENT) 1189 error = 0; 1190 return(error); 1191 } 1192 1193 /* 1194 * Delete all records associated with an inode except the inode record 1195 * itself. 1196 */ 1197 int 1198 hammer_ip_delete_range_all(hammer_transaction_t trans, hammer_inode_t ip) 1199 { 1200 struct hammer_cursor cursor; 1201 hammer_record_ondisk_t rec; 1202 hammer_base_elm_t base; 1203 int error; 1204 1205 retry: 1206 hammer_init_cursor_hmp(&cursor, &ip->cache[0], ip->hmp); 1207 1208 cursor.key_beg.obj_id = ip->obj_id; 1209 cursor.key_beg.create_tid = 0; 1210 cursor.key_beg.delete_tid = 0; 1211 cursor.key_beg.obj_type = 0; 1212 cursor.key_beg.rec_type = HAMMER_RECTYPE_INODE + 1; 1213 cursor.key_beg.key = HAMMER_MIN_KEY; 1214 1215 cursor.key_end = cursor.key_beg; 1216 cursor.key_end.rec_type = 0xFFFF; 1217 cursor.key_end.key = HAMMER_MAX_KEY; 1218 1219 cursor.asof = ip->obj_asof; 1220 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE | HAMMER_CURSOR_ASOF; 1221 1222 error = hammer_ip_first(&cursor, ip); 1223 1224 /* 1225 * Iterate through matching records and mark them as deleted. 1226 */ 1227 while (error == 0) { 1228 rec = cursor.record; 1229 base = &rec->base.base; 1230 1231 KKASSERT(base->delete_tid == 0); 1232 1233 /* 1234 * Mark the record and B-Tree entry as deleted. This will 1235 * also physically delete the B-Tree entry, record, and 1236 * data if the retention policy dictates. The function 1237 * will set HAMMER_CURSOR_DELBTREE which hammer_ip_next() 1238 * uses to perform a fixup. 1239 */ 1240 error = hammer_ip_delete_record(&cursor, trans->tid); 1241 if (error) 1242 break; 1243 error = hammer_ip_next(&cursor); 1244 } 1245 hammer_done_cursor(&cursor); 1246 if (error == EDEADLK) 1247 goto retry; 1248 if (error == ENOENT) 1249 error = 0; 1250 return(error); 1251 } 1252 1253 /* 1254 * Delete the record at the current cursor. On success the cursor will 1255 * be positioned appropriately for an iteration but may no longer be at 1256 * a leaf node. 1257 * 1258 * NOTE: This can return EDEADLK, requiring the caller to terminate the 1259 * cursor and retry. 1260 */ 1261 int 1262 hammer_ip_delete_record(hammer_cursor_t cursor, hammer_tid_t tid) 1263 { 1264 hammer_btree_elm_t elm; 1265 hammer_mount_t hmp; 1266 int error; 1267 int dodelete; 1268 1269 /* 1270 * In-memory (unsynchronized) records can simply be freed. 1271 */ 1272 if (cursor->record == &cursor->iprec->rec) { 1273 cursor->iprec->flags |= HAMMER_RECF_DELETED; 1274 return(0); 1275 } 1276 1277 /* 1278 * On-disk records are marked as deleted by updating their delete_tid. 1279 * This does not effect their position in the B-Tree (which is based 1280 * on their create_tid). 1281 */ 1282 error = hammer_btree_extract(cursor, HAMMER_CURSOR_GET_RECORD); 1283 elm = NULL; 1284 hmp = cursor->node->hmp; 1285 1286 dodelete = 0; 1287 if (error == 0) { 1288 error = hammer_cursor_upgrade(cursor); 1289 if (error == 0) { 1290 hammer_modify_node(cursor->node); 1291 elm = &cursor->node->ondisk->elms[cursor->index]; 1292 elm->leaf.base.delete_tid = tid; 1293 hammer_modify_buffer(cursor->record_buffer, &cursor->record->base.base.delete_tid, sizeof(hammer_tid_t)); 1294 cursor->record->base.base.delete_tid = tid; 1295 } 1296 } 1297 1298 /* 1299 * If we were mounted with the nohistory option, we physically 1300 * delete the record. 1301 */ 1302 if (hmp->hflags & HMNT_NOHISTORY) 1303 dodelete = 1; 1304 1305 if (error == 0 && dodelete) { 1306 error = hammer_delete_at_cursor(cursor, NULL); 1307 if (error) { 1308 panic("hammer_ip_delete_record: unable to physically delete the record!\n"); 1309 error = 0; 1310 } 1311 } 1312 return(error); 1313 } 1314 1315 int 1316 hammer_delete_at_cursor(hammer_cursor_t cursor, int64_t *stat_bytes) 1317 { 1318 hammer_btree_elm_t elm; 1319 hammer_off_t rec_offset; 1320 hammer_off_t data_offset; 1321 int32_t data_len; 1322 u_int8_t rec_type; 1323 int error; 1324 1325 elm = &cursor->node->ondisk->elms[cursor->index]; 1326 KKASSERT(elm->base.btype == HAMMER_BTREE_TYPE_RECORD); 1327 1328 rec_offset = elm->leaf.rec_offset; 1329 data_offset = elm->leaf.data_offset; 1330 data_len = elm->leaf.data_len; 1331 rec_type = elm->leaf.base.rec_type; 1332 1333 error = hammer_btree_delete(cursor); 1334 if (error == 0) { 1335 /* 1336 * This forces a fixup for the iteration because 1337 * the cursor is now either sitting at the 'next' 1338 * element or sitting at the end of a leaf. 1339 */ 1340 if ((cursor->flags & HAMMER_CURSOR_DISKEOF) == 0) { 1341 cursor->flags |= HAMMER_CURSOR_DELBTREE; 1342 cursor->flags &= ~HAMMER_CURSOR_ATEDISK; 1343 } 1344 } 1345 if (error == 0) { 1346 hammer_blockmap_free(cursor->node->hmp, rec_offset, 1347 sizeof(union hammer_record_ondisk)); 1348 } 1349 if (error == 0 && 1350 (data_offset & HAMMER_OFF_ZONE_MASK) == HAMMER_ZONE_LARGE_DATA) { 1351 hammer_blockmap_free(cursor->node->hmp, data_offset, data_len); 1352 } 1353 #if 0 1354 kprintf("hammer_delete_at_cursor: %d:%d:%08x %08x/%d " 1355 "(%d remain in cluster)\n", 1356 cluster->volume->vol_no, cluster->clu_no, 1357 rec_offset, data_offset, data_len, 1358 cluster->ondisk->stat_records); 1359 #endif 1360 return (error); 1361 } 1362 1363 /* 1364 * Determine whether a directory is empty or not. Returns 0 if the directory 1365 * is empty, ENOTEMPTY if it isn't, plus other possible errors. 1366 */ 1367 int 1368 hammer_ip_check_directory_empty(hammer_transaction_t trans, hammer_inode_t ip) 1369 { 1370 struct hammer_cursor cursor; 1371 int error; 1372 1373 hammer_init_cursor_hmp(&cursor, &ip->cache[0], ip->hmp); 1374 1375 cursor.key_beg.obj_id = ip->obj_id; 1376 cursor.key_beg.create_tid = 0; 1377 cursor.key_beg.delete_tid = 0; 1378 cursor.key_beg.obj_type = 0; 1379 cursor.key_beg.rec_type = HAMMER_RECTYPE_INODE + 1; 1380 cursor.key_beg.key = HAMMER_MIN_KEY; 1381 1382 cursor.key_end = cursor.key_beg; 1383 cursor.key_end.rec_type = 0xFFFF; 1384 cursor.key_end.key = HAMMER_MAX_KEY; 1385 1386 cursor.asof = ip->obj_asof; 1387 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE | HAMMER_CURSOR_ASOF; 1388 1389 error = hammer_ip_first(&cursor, ip); 1390 if (error == ENOENT) 1391 error = 0; 1392 else if (error == 0) 1393 error = ENOTEMPTY; 1394 hammer_done_cursor(&cursor); 1395 return(error); 1396 } 1397 1398