1 /* 2 * Copyright (c) 2007 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.11 2007/12/14 08:05:39 dillon Exp $ 35 */ 36 37 #include "hammer.h" 38 #include <sys/buf.h> 39 #include <sys/buf2.h> 40 41 int 42 hammer_vop_inactive(struct vop_inactive_args *ap) 43 { 44 struct hammer_inode *ip = VTOI(ap->a_vp); 45 46 /* 47 * Degenerate case 48 */ 49 if (ip == NULL) { 50 vrecycle(ap->a_vp); 51 return(0); 52 } 53 54 /* 55 * If the inode no longer has any references we recover its 56 * in-memory resources immediately. 57 */ 58 if (ip->ino_rec.ino_nlinks == 0 && 59 (ip->hmp->mp->mnt_flag & MNT_RDONLY) == 0) { 60 hammer_sync_inode(ip, MNT_NOWAIT, 1); 61 } 62 return(0); 63 } 64 65 int 66 hammer_vop_reclaim(struct vop_reclaim_args *ap) 67 { 68 struct hammer_inode *ip; 69 struct vnode *vp; 70 71 vp = ap->a_vp; 72 73 /* 74 * Release the vnode association and ask that the inode be flushed. 75 */ 76 if ((ip = vp->v_data) != NULL) { 77 vp->v_data = NULL; 78 ip->vp = NULL; 79 hammer_rel_inode(ip, 1); 80 } 81 return(0); 82 } 83 84 /* 85 * Obtain a vnode for the specified inode number. An exclusively locked 86 * vnode is returned. 87 */ 88 int 89 hammer_vfs_vget(struct mount *mp, ino_t ino, struct vnode **vpp) 90 { 91 struct hammer_mount *hmp = (void *)mp->mnt_data; 92 struct hammer_inode *ip; 93 int error; 94 95 /* 96 * Get/allocate the hammer_inode structure. The structure must be 97 * unlocked while we manipulate the related vnode to avoid a 98 * deadlock. 99 */ 100 ip = hammer_get_inode(hmp, ino, hmp->asof, &error); 101 if (ip == NULL) { 102 *vpp = NULL; 103 return(error); 104 } 105 error = hammer_get_vnode(ip, LK_EXCLUSIVE, vpp); 106 hammer_rel_inode(ip, 0); 107 return (error); 108 } 109 110 /* 111 * Return a locked vnode for the specified inode. The inode must be 112 * referenced but NOT LOCKED on entry and will remain referenced on 113 * return. 114 */ 115 int 116 hammer_get_vnode(struct hammer_inode *ip, int lktype, struct vnode **vpp) 117 { 118 struct vnode *vp; 119 int error = 0; 120 121 for (;;) { 122 if ((vp = ip->vp) == NULL) { 123 error = getnewvnode(VT_HAMMER, ip->hmp->mp, vpp, 0, 0); 124 if (error) 125 break; 126 hammer_lock_ex(&ip->lock); 127 if (ip->vp != NULL) { 128 hammer_unlock(&ip->lock); 129 vp->v_type = VBAD; 130 vx_put(vp); 131 continue; 132 } 133 hammer_ref(&ip->lock); 134 vp = *vpp; 135 ip->vp = vp; 136 vp->v_type = hammer_get_vnode_type( 137 ip->ino_rec.base.base.obj_type); 138 vp->v_data = (void *)ip; 139 /* vnode locked by getnewvnode() */ 140 /* make related vnode dirty if inode dirty? */ 141 hammer_unlock(&ip->lock); 142 if (vp->v_type == VREG) 143 vinitvmio(vp, ip->ino_rec.ino_size); 144 break; 145 } 146 147 /* 148 * loop if the vget fails (aka races), or if the vp 149 * no longer matches ip->vp. 150 */ 151 if (vget(vp, LK_EXCLUSIVE) == 0) { 152 if (vp == ip->vp) 153 break; 154 vput(vp); 155 } 156 } 157 *vpp = vp; 158 return(error); 159 } 160 161 /* 162 * Acquire a HAMMER inode. The returned inode is not locked. These functions 163 * do not attach or detach the related vnode (use hammer_get_vnode() for 164 * that). 165 */ 166 struct hammer_inode * 167 hammer_get_inode(struct hammer_mount *hmp, u_int64_t obj_id, hammer_tid_t asof, 168 int *errorp) 169 { 170 struct hammer_inode_info iinfo; 171 struct hammer_cursor cursor; 172 struct hammer_inode *ip; 173 174 /* 175 * Determine if we already have an inode cached. If we do then 176 * we are golden. 177 */ 178 iinfo.obj_id = obj_id; 179 iinfo.obj_asof = asof; 180 loop: 181 ip = hammer_ino_rb_tree_RB_LOOKUP_INFO(&hmp->rb_inos_root, &iinfo); 182 if (ip) { 183 hammer_ref(&ip->lock); 184 *errorp = 0; 185 return(ip); 186 } 187 188 ip = kmalloc(sizeof(*ip), M_HAMMER, M_WAITOK|M_ZERO); 189 ip->obj_id = obj_id; 190 ip->obj_asof = iinfo.obj_asof; 191 ip->hmp = hmp; 192 RB_INIT(&ip->rec_tree); 193 194 /* 195 * Locate the on-disk inode. 196 * If we do not have an inode cached search the HAMMER on-disk B-Tree 197 * for it. 198 */ 199 200 hammer_init_cursor_hmp(&cursor, hmp); 201 cursor.key_beg.obj_id = ip->obj_id; 202 cursor.key_beg.key = 0; 203 cursor.key_beg.create_tid = iinfo.obj_asof; 204 cursor.key_beg.delete_tid = 0; 205 cursor.key_beg.rec_type = HAMMER_RECTYPE_INODE; 206 cursor.key_beg.obj_type = 0; 207 cursor.flags = HAMMER_CURSOR_GET_RECORD | HAMMER_CURSOR_GET_DATA; 208 209 *errorp = hammer_btree_lookup(&cursor); 210 211 /* 212 * On success the B-Tree lookup will hold the appropriate 213 * buffer cache buffers and provide a pointer to the requested 214 * information. Copy the information to the in-memory inode. 215 */ 216 if (*errorp == 0) { 217 ip->ino_rec = cursor.record->inode; 218 ip->ino_data = cursor.data->inode; 219 } 220 hammer_cache_node(cursor.node, &ip->cache); 221 hammer_done_cursor(&cursor); 222 223 /* 224 * On success load the inode's record and data and insert the 225 * inode into the B-Tree. It is possible to race another lookup 226 * insertion of the same inode so deal with that condition too. 227 */ 228 if (*errorp == 0) { 229 hammer_ref(&ip->lock); 230 if (RB_INSERT(hammer_ino_rb_tree, &hmp->rb_inos_root, ip)) { 231 hammer_uncache_node(&ip->cache); 232 hammer_unref(&ip->lock); 233 kfree(ip, M_HAMMER); 234 goto loop; 235 } 236 ip->flags |= HAMMER_INODE_ONDISK; 237 } else { 238 kfree(ip, M_HAMMER); 239 ip = NULL; 240 } 241 return (ip); 242 } 243 244 /* 245 * Create a new filesystem object, returning the inode in *ipp. The 246 * returned inode will be referenced but not locked. 247 * 248 * The inode is created in-memory and will be delay-synchronized to the 249 * disk. 250 */ 251 int 252 hammer_create_inode(hammer_transaction_t trans, struct vattr *vap, 253 struct ucred *cred, hammer_inode_t dip, 254 struct hammer_inode **ipp) 255 { 256 hammer_mount_t hmp; 257 hammer_inode_t ip; 258 uid_t xuid; 259 260 hmp = trans->hmp; 261 ip = kmalloc(sizeof(*ip), M_HAMMER, M_WAITOK|M_ZERO); 262 ip->obj_id = hammer_alloc_tid(trans); 263 KKASSERT(ip->obj_id != 0); 264 ip->obj_asof = hmp->asof; 265 ip->hmp = hmp; 266 ip->flags = HAMMER_INODE_DDIRTY | HAMMER_INODE_RDIRTY | 267 HAMMER_INODE_ITIMES; 268 ip->last_tid = trans->tid; 269 270 RB_INIT(&ip->rec_tree); 271 272 ip->ino_rec.ino_atime = trans->tid; 273 ip->ino_rec.ino_mtime = trans->tid; 274 ip->ino_rec.ino_size = 0; 275 ip->ino_rec.ino_nlinks = 0; 276 /* XXX */ 277 kprintf("rootvol %p ondisk %p\n", hmp->rootvol, hmp->rootvol->ondisk); 278 ip->ino_rec.base.rec_id = hammer_alloc_recid(trans); 279 KKASSERT(ip->ino_rec.base.rec_id != 0); 280 ip->ino_rec.base.base.obj_id = ip->obj_id; 281 ip->ino_rec.base.base.key = 0; 282 ip->ino_rec.base.base.create_tid = trans->tid; 283 ip->ino_rec.base.base.delete_tid = 0; 284 ip->ino_rec.base.base.rec_type = HAMMER_RECTYPE_INODE; 285 ip->ino_rec.base.base.obj_type = hammer_get_obj_type(vap->va_type); 286 287 ip->ino_data.version = HAMMER_INODE_DATA_VERSION; 288 ip->ino_data.mode = vap->va_mode; 289 ip->ino_data.ctime = trans->tid; 290 ip->ino_data.parent_obj_id = (dip) ? dip->ino_rec.base.base.obj_id : 0; 291 292 /* 293 * Calculate default uid/gid and overwrite with information from 294 * the vap. 295 */ 296 xuid = hammer_to_unix_xid(&dip->ino_data.uid); 297 ip->ino_data.gid = dip->ino_data.gid; 298 xuid = vop_helper_create_uid(hmp->mp, dip->ino_data.mode, xuid, cred, 299 &vap->va_mode); 300 ip->ino_data.mode = vap->va_mode; 301 302 if (vap->va_vaflags & VA_UID_UUID_VALID) 303 ip->ino_data.uid = vap->va_uid_uuid; 304 else if (vap->va_uid != (uid_t)VNOVAL) 305 hammer_guid_to_uuid(&ip->ino_data.uid, xuid); 306 if (vap->va_vaflags & VA_GID_UUID_VALID) 307 ip->ino_data.gid = vap->va_gid_uuid; 308 else if (vap->va_gid != (gid_t)VNOVAL) 309 hammer_guid_to_uuid(&ip->ino_data.gid, vap->va_gid); 310 311 hammer_ref(&ip->lock); 312 if (RB_INSERT(hammer_ino_rb_tree, &hmp->rb_inos_root, ip)) { 313 hammer_unref(&ip->lock); 314 panic("hammer_create_inode: duplicate obj_id %llx", ip->obj_id); 315 } 316 *ipp = ip; 317 return(0); 318 } 319 320 int 321 hammer_update_inode(hammer_inode_t ip) 322 { 323 struct hammer_cursor cursor; 324 hammer_record_t record; 325 int error; 326 327 /* 328 * Locate the record on-disk and mark it as deleted. Both the B-Tree 329 * node and the record must be marked deleted. The record may or 330 * may not be physically deleted, depending on the retention policy. 331 * 332 * If the inode has already been deleted on-disk we have nothing 333 * to do. 334 * 335 * XXX Update the inode record and data in-place if the retention 336 * policy allows it. 337 */ 338 error = 0; 339 340 if ((ip->flags & (HAMMER_INODE_ONDISK|HAMMER_INODE_DELONDISK)) == 341 HAMMER_INODE_ONDISK) { 342 hammer_init_cursor_ip(&cursor, ip); 343 cursor.key_beg.obj_id = ip->obj_id; 344 cursor.key_beg.key = 0; 345 cursor.key_beg.create_tid = ip->obj_asof; 346 cursor.key_beg.delete_tid = 0; 347 cursor.key_beg.rec_type = HAMMER_RECTYPE_INODE; 348 cursor.key_beg.obj_type = 0; 349 cursor.flags = HAMMER_CURSOR_GET_RECORD; 350 351 error = hammer_btree_lookup(&cursor); 352 353 if (error == 0) { 354 error = hammer_ip_delete_record(&cursor, ip->last_tid); 355 if (error == 0) 356 ip->flags |= HAMMER_INODE_DELONDISK; 357 } 358 hammer_cache_node(cursor.node, &ip->cache); 359 hammer_done_cursor(&cursor); 360 } 361 362 /* 363 * Write out a new record if the in-memory inode is not marked 364 * as having been deleted. Update our inode statistics if this 365 * is the first application of the inode on-disk. 366 * 367 * If the inode has been deleted permanently, HAMMER_INODE_DELONDISK 368 * will remain set and prevent further updates. 369 */ 370 if (error == 0 && (ip->flags & HAMMER_INODE_DELETED) == 0) { 371 record = hammer_alloc_mem_record(ip); 372 record->rec.inode = ip->ino_rec; 373 record->rec.inode.base.base.create_tid = ip->last_tid; 374 record->rec.inode.base.data_len = sizeof(ip->ino_data); 375 record->data = (void *)&ip->ino_data; 376 error = hammer_ip_sync_record(record); 377 hammer_free_mem_record(record); 378 ip->flags &= ~(HAMMER_INODE_RDIRTY|HAMMER_INODE_DDIRTY| 379 HAMMER_INODE_DELONDISK); 380 if ((ip->flags & HAMMER_INODE_ONDISK) == 0) { 381 ++ip->hmp->rootvol->ondisk->vol0_stat_inodes; 382 hammer_modify_volume(ip->hmp->rootvol); 383 ip->flags |= HAMMER_INODE_ONDISK; 384 } 385 } 386 ip->flags &= ~HAMMER_INODE_TID; 387 return(error); 388 } 389 390 /* 391 * Release a reference on an inode and unload it if told to flush. 392 */ 393 void 394 hammer_rel_inode(struct hammer_inode *ip, int flush) 395 { 396 hammer_unref(&ip->lock); 397 if (flush || ip->ino_rec.ino_nlinks == 0) 398 ip->flags |= HAMMER_INODE_FLUSH; 399 if (ip->lock.refs == 0 && (ip->flags & HAMMER_INODE_FLUSH)) 400 hammer_unload_inode(ip, NULL); 401 } 402 403 /* 404 * Unload and destroy the specified inode. 405 * 406 * (called via RB_SCAN) 407 */ 408 int 409 hammer_unload_inode(struct hammer_inode *ip, void *data __unused) 410 { 411 int error; 412 413 KASSERT(ip->lock.refs == 0, 414 ("hammer_unload_inode: %d refs\n", ip->lock.refs)); 415 KKASSERT(ip->vp == NULL); 416 hammer_ref(&ip->lock); 417 418 error = hammer_sync_inode(ip, MNT_WAIT, 1); 419 if (error) 420 kprintf("hammer_sync_inode failed error %d\n", error); 421 422 RB_REMOVE(hammer_ino_rb_tree, &ip->hmp->rb_inos_root, ip); 423 424 hammer_uncache_node(&ip->cache); 425 kfree(ip, M_HAMMER); 426 return(0); 427 } 428 429 /* 430 * A transaction has modified an inode, requiring a new record and possibly 431 * also data to be written out. 432 * 433 * last_tid is the TID to use for the disk sync. 434 */ 435 void 436 hammer_modify_inode(struct hammer_transaction *trans, 437 struct hammer_inode *ip, int flags) 438 { 439 if ((flags & HAMMER_INODE_TID) && (ip->flags & HAMMER_INODE_TID) == 0) { 440 ip->last_tid = trans->tid; 441 } 442 ip->flags |= flags; 443 } 444 445 /* 446 * Sync any dirty buffers and records associated with an inode. The 447 * inode's last_tid field is used as the transaction id for the sync, 448 * overriding any intermediate TIDs that were used for records. Note 449 * that the dirty buffer cache buffers do not have any knowledge of 450 * the transaction id they were modified under. 451 */ 452 static int 453 hammer_sync_inode_callback(hammer_record_t rec, void *data __unused) 454 { 455 int error; 456 457 error = 0; 458 if ((rec->flags & HAMMER_RECF_DELETED) == 0) 459 error = hammer_ip_sync_record(rec); 460 461 if (error) { 462 kprintf("hammer_sync_inode_callback: sync failed rec %p\n", 463 rec); 464 return(-1); 465 } 466 hammer_free_mem_record(rec); 467 return(0); 468 } 469 470 /* 471 * XXX error handling 472 */ 473 int 474 hammer_sync_inode(hammer_inode_t ip, int waitfor, int handle_delete) 475 { 476 struct hammer_transaction trans; 477 int error; 478 int r; 479 480 hammer_lock_ex(&ip->lock); 481 hammer_start_transaction(&trans, ip->hmp); 482 483 /* 484 * If the inode has been deleted (nlinks == 0), and the OS no longer 485 * has any references to it (handle_delete != 0), clean up in-memory 486 * data. 487 * 488 * NOTE: We do not set the RDIRTY flag when updating the delete_tid, 489 * setting HAMMER_INODE_DELETED takes care of it. 490 * 491 * NOTE: Because we may sync records within this new transaction, 492 * force the inode update later on to use our transaction id or 493 * the delete_tid of the inode may be less then the create_tid of 494 * the inode update. XXX shouldn't happen but don't take the chance. 495 */ 496 if (ip->ino_rec.ino_nlinks == 0 && handle_delete) { 497 if (ip->vp) 498 vtruncbuf(ip->vp, 0, HAMMER_BUFSIZE); 499 error = hammer_ip_delete_range(&trans, ip, 500 HAMMER_MIN_KEY, HAMMER_MAX_KEY); 501 KKASSERT(RB_EMPTY(&ip->rec_tree)); 502 ip->flags &= ~HAMMER_INODE_TID; 503 ip->ino_rec.base.base.delete_tid = trans.tid; 504 hammer_modify_inode(&trans, ip, 505 HAMMER_INODE_DELETED | HAMMER_INODE_TID); 506 --ip->hmp->rootvol->ondisk->vol0_stat_inodes; 507 hammer_modify_volume(ip->hmp->rootvol); 508 } 509 510 /* 511 * Sync the buffer cache 512 */ 513 if (ip->vp != NULL) 514 error = vfsync(ip->vp, waitfor, 1, NULL, NULL); 515 else 516 error = 0; 517 518 /* 519 * Now sync related records 520 */ 521 if (error == 0) { 522 r = RB_SCAN(hammer_rec_rb_tree, &ip->rec_tree, NULL, 523 hammer_sync_inode_callback, NULL); 524 if (r < 0) 525 error = EIO; 526 } 527 528 /* 529 * Now update the inode's on-disk inode-data and/or on-disk record. 530 */ 531 switch(ip->flags & (HAMMER_INODE_DELETED|HAMMER_INODE_ONDISK)) { 532 case HAMMER_INODE_DELETED|HAMMER_INODE_ONDISK: 533 /* 534 * If deleted and on-disk, don't set any additional flags. 535 * the delete flag takes care of things. 536 */ 537 break; 538 case HAMMER_INODE_DELETED: 539 /* 540 * Take care of the case where a deleted inode was never 541 * flushed to the disk in the first place. 542 */ 543 ip->flags &= ~(HAMMER_INODE_RDIRTY|HAMMER_INODE_DDIRTY); 544 while (RB_ROOT(&ip->rec_tree)) 545 hammer_free_mem_record(RB_ROOT(&ip->rec_tree)); 546 break; 547 case HAMMER_INODE_ONDISK: 548 /* 549 * If already on-disk, do not set any additional flags. 550 */ 551 break; 552 default: 553 /* 554 * If not on-disk and not deleted, set both dirty flags 555 * to force an initial record to be written. 556 */ 557 ip->flags |= HAMMER_INODE_RDIRTY | HAMMER_INODE_DDIRTY; 558 break; 559 } 560 561 /* 562 * If RDIRTY or DDIRTY is set, write out a new record. If the 563 * inode is already on-disk, the old record is marked as deleted. 564 */ 565 if (ip->flags & (HAMMER_INODE_RDIRTY | HAMMER_INODE_DDIRTY | 566 HAMMER_INODE_DELETED)) { 567 error = hammer_update_inode(ip); 568 } 569 hammer_commit_transaction(&trans); 570 hammer_unlock(&ip->lock); 571 return(error); 572 } 573 574 /* 575 * Access the filesystem buffer containing the cluster-relative byte 576 * offset, validate the buffer type, load *bufferp and return a 577 * pointer to the requested data. The buffer is reference and locked on 578 * return. 579 * 580 * If buf_type is 0 the buffer is assumed to be a pure-data buffer and 581 * no type or crc check is performed. 582 * 583 * If *bufferp is not NULL on entry it is assumed to contain a locked 584 * and referenced buffer which will then be replaced. 585 * 586 * If the caller is holding another unrelated buffer locked it must be 587 * passed in reorderbuf so we can properly order buffer locks. 588 * 589 * XXX add a flag for the buffer type and check the CRC here XXX 590 */ 591 void * 592 hammer_bread(hammer_cluster_t cluster, int32_t cloff, 593 u_int64_t buf_type, int *errorp, 594 struct hammer_buffer **bufferp) 595 { 596 hammer_buffer_t buffer; 597 int32_t buf_no; 598 int32_t buf_off; 599 600 /* 601 * Load the correct filesystem buffer, replacing *bufferp. 602 */ 603 buf_no = cloff / HAMMER_BUFSIZE; 604 buffer = *bufferp; 605 if (buffer == NULL || buffer->cluster != cluster || 606 buffer->buf_no != buf_no) { 607 if (buffer) { 608 /*hammer_unlock(&buffer->io.lock);*/ 609 hammer_rel_buffer(buffer, 0); 610 } 611 buffer = hammer_get_buffer(cluster, buf_no, 0, errorp); 612 *bufferp = buffer; 613 if (buffer == NULL) 614 return(NULL); 615 /*hammer_lock_ex(&buffer->io.lock);*/ 616 } 617 618 /* 619 * Validate the buffer type 620 */ 621 buf_off = cloff & HAMMER_BUFMASK; 622 if (buf_type) { 623 if (buf_type != buffer->ondisk->head.buf_type) { 624 kprintf("BUFFER HEAD TYPE MISMATCH %llx %llx\n", 625 buf_type, buffer->ondisk->head.buf_type); 626 *errorp = EIO; 627 return(NULL); 628 } 629 if (buf_off < sizeof(buffer->ondisk->head)) { 630 kprintf("BUFFER OFFSET TOO LOW %d\n", buf_off); 631 *errorp = EIO; 632 return(NULL); 633 } 634 } 635 636 /* 637 * Return a pointer to the buffer data. 638 */ 639 *errorp = 0; 640 return((char *)buffer->ondisk + buf_off); 641 } 642 643