1 /* 2 * Copyright (c) 2011-2012 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@dragonflybsd.org> 6 * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org> 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in 16 * the documentation and/or other materials provided with the 17 * distribution. 18 * 3. Neither the name of The DragonFly Project nor the names of its 19 * contributors may be used to endorse or promote products derived 20 * from this software without specific, prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 26 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 27 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 31 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 #include <sys/cdefs.h> 36 #include <sys/param.h> 37 #include <sys/systm.h> 38 #include <sys/types.h> 39 #include <sys/lock.h> 40 #include <sys/uuid.h> 41 42 #include "hammer2.h" 43 44 /* 45 * Adding a ref to an inode is only legal if the inode already has at least 46 * one ref. 47 */ 48 void 49 hammer2_inode_ref(hammer2_inode_t *ip) 50 { 51 atomic_add_int(&ip->refs, 1); 52 } 53 54 /* 55 * Drop an inode reference, freeing the inode when the last reference goes 56 * away. 57 */ 58 void 59 hammer2_inode_drop(hammer2_inode_t *ip) 60 { 61 hammer2_mount_t *hmp; 62 hammer2_inode_t *pip; 63 hammer2_chain_t *chain; 64 u_int refs; 65 66 for (;;) { 67 refs = ip->refs; 68 cpu_ccfence(); 69 if (refs == 1) { 70 if (atomic_cmpset_int(&ip->refs, 1, 0)) { 71 KKASSERT(ip->topo_cst.count == 0); 72 73 hmp = ip->hmp; 74 ip->hmp = NULL; 75 pip = ip->pip; 76 ip->pip = NULL; 77 chain = ip->chain; 78 ip->chain = NULL; 79 if (chain) 80 hammer2_chain_drop(hmp, chain); 81 82 /* 83 * We have to drop pip (if non-NULL) to 84 * dispose of our implied reference from 85 * ip->pip. We can simply loop on it. 86 */ 87 kfree(ip, hmp->minode); 88 if (pip == NULL) 89 break; 90 ip = pip; 91 /* continue */ 92 } 93 } else { 94 if (atomic_cmpset_int(&ip->refs, refs, refs - 1)) 95 break; 96 } 97 } 98 } 99 100 /* 101 * Get the vnode associated with the given inode, allocating the vnode if 102 * necessary. The vnode will be returned exclusively locked. 103 * 104 * The caller must lock the inode (shared or exclusive). 105 * 106 * Great care must be taken to avoid deadlocks and vnode acquisition/reclaim 107 * races. 108 */ 109 struct vnode * 110 hammer2_igetv(hammer2_inode_t *ip, int *errorp) 111 { 112 hammer2_inode_data_t *ipdata; 113 hammer2_pfsmount_t *pmp; 114 struct vnode *vp; 115 ccms_state_t ostate; 116 117 pmp = ip->pmp; 118 KKASSERT(pmp != NULL); 119 *errorp = 0; 120 ipdata = &ip->chain->data->ipdata; 121 122 for (;;) { 123 /* 124 * Attempt to reuse an existing vnode assignment. It is 125 * possible to race a reclaim so the vget() may fail. The 126 * inode must be unlocked during the vget() to avoid a 127 * deadlock against a reclaim. 128 */ 129 vp = ip->vp; 130 if (vp) { 131 /* 132 * Inode must be unlocked during the vget() to avoid 133 * possible deadlocks, but leave the ip ref intact. 134 * 135 * vnode is held to prevent destruction during the 136 * vget(). The vget() can still fail if we lost 137 * a reclaim race on the vnode. 138 */ 139 vhold_interlocked(vp); 140 ostate = hammer2_inode_lock_temp_release(ip); 141 if (vget(vp, LK_EXCLUSIVE)) { 142 vdrop(vp); 143 hammer2_inode_lock_restore(ip, ostate); 144 continue; 145 } 146 hammer2_inode_lock_restore(ip, ostate); 147 vdrop(vp); 148 /* vp still locked and ref from vget */ 149 if (ip->vp != vp) { 150 kprintf("hammer2: igetv race %p/%p\n", 151 ip->vp, vp); 152 vput(vp); 153 continue; 154 } 155 *errorp = 0; 156 break; 157 } 158 159 /* 160 * No vnode exists, allocate a new vnode. Beware of 161 * allocation races. This function will return an 162 * exclusively locked and referenced vnode. 163 */ 164 *errorp = getnewvnode(VT_HAMMER2, pmp->mp, &vp, 0, 0); 165 if (*errorp) { 166 kprintf("hammer2: igetv getnewvnode failed %d\n", 167 *errorp); 168 vp = NULL; 169 break; 170 } 171 172 /* 173 * Lock the inode and check for an allocation race. 174 */ 175 ostate = hammer2_inode_lock_upgrade(ip); 176 if (ip->vp != NULL) { 177 vp->v_type = VBAD; 178 vx_put(vp); 179 hammer2_inode_lock_restore(ip, ostate); 180 continue; 181 } 182 183 switch (ipdata->type) { 184 case HAMMER2_OBJTYPE_DIRECTORY: 185 vp->v_type = VDIR; 186 break; 187 case HAMMER2_OBJTYPE_REGFILE: 188 vp->v_type = VREG; 189 vinitvmio(vp, ipdata->size, 190 HAMMER2_LBUFSIZE, 191 (int)ipdata->size & HAMMER2_LBUFMASK); 192 break; 193 case HAMMER2_OBJTYPE_SOFTLINK: 194 /* 195 * XXX for now we are using the generic file_read 196 * and file_write code so we need a buffer cache 197 * association. 198 */ 199 vp->v_type = VLNK; 200 vinitvmio(vp, ipdata->size, 201 HAMMER2_LBUFSIZE, 202 (int)ipdata->size & HAMMER2_LBUFMASK); 203 break; 204 /* XXX FIFO */ 205 default: 206 panic("hammer2: unhandled objtype %d", ipdata->type); 207 break; 208 } 209 210 if (ip == pmp->iroot) 211 vsetflags(vp, VROOT); 212 213 vp->v_data = ip; 214 ip->vp = vp; 215 hammer2_inode_ref(ip); /* vp association */ 216 hammer2_inode_lock_restore(ip, ostate); 217 break; 218 } 219 220 /* 221 * Return non-NULL vp and *errorp == 0, or NULL vp and *errorp != 0. 222 */ 223 if (hammer2_debug & 0x0002) { 224 kprintf("igetv vp %p refs %d aux %d\n", 225 vp, vp->v_sysref.refcnt, vp->v_auxrefs); 226 } 227 return (vp); 228 } 229 230 /* 231 * The passed-in chain must be locked and the returned inode will also be 232 * locked. A ref is added to both the chain and the inode. 233 * 234 * The hammer2_inode structure regulates the interface between the high level 235 * kernel VNOPS API and the filesystem backend (the chains). 236 * 237 * NOTE! This routine allocates the hammer2_inode structure 238 * unconditionally, and thus there might be several which 239 * are associated with the same chain. Particularly for hardlinks 240 * but this can also happen temporarily for normal files and 241 * directories. 242 * 243 * WARNING! This routine sucks up the chain's lock (makes it part of the 244 * inode lock from the point of view of the inode lock API), 245 * so callers need to be careful. 246 * 247 * WARNING! The mount code is allowed to pass dip == NULL for iroot and 248 * is allowed to pass pmp == NULL and dip == NULL for sroot. 249 */ 250 hammer2_inode_t * 251 hammer2_inode_get(hammer2_mount_t *hmp, hammer2_pfsmount_t *pmp, 252 hammer2_inode_t *dip, hammer2_chain_t *chain) 253 { 254 hammer2_inode_t *nip; 255 256 KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_INODE); 257 258 nip = kmalloc(sizeof(*nip), hmp->minode, M_WAITOK | M_ZERO); 259 260 nip->chain = chain; 261 hammer2_chain_ref(hmp, chain); /* nip->chain */ 262 nip->pip = dip; /* can be NULL */ 263 if (dip) 264 hammer2_inode_ref(dip); /* ref dip for nip->pip */ 265 266 nip->pmp = pmp; 267 nip->hmp = hmp; 268 269 /* 270 * ref and lock on nip gives it state compatible to after a 271 * hammer2_inode_lock_ex() call. 272 */ 273 nip->refs = 1; 274 ccms_cst_init(&nip->topo_cst, &nip->chain); 275 ccms_thread_lock(&nip->topo_cst, CCMS_STATE_EXCLUSIVE); 276 /* combination of thread lock and chain lock == inode lock */ 277 278 return (nip); 279 } 280 281 /* 282 * Put away an inode, disconnecting it from its chain. The inode must be 283 * exclusively locked. 284 * 285 * The inode will be unlocked by this function. Note however that any related 286 * chain returned by the hammer2_inode_lock_*() call will NOT be unlocked 287 * by this function. The related chain is dropped to undo the ref that 288 * hammer2_inode_get() put on it. 289 * 290 * passed_chain is unlocked normally and does not have to be directly 291 * associated with (ip). This is simply so the API works the same as 292 * the hammer2_inode_unlock_ex() API. NULL is ok. 293 */ 294 void 295 hammer2_inode_put(hammer2_inode_t *ip, hammer2_chain_t *passed_chain) 296 { 297 hammer2_mount_t *hmp = ip->hmp; 298 hammer2_inode_t *pip; 299 hammer2_chain_t *chain; 300 301 /* 302 * Disconnect chain 303 */ 304 if ((chain = ip->chain) != NULL) { 305 ip->chain = NULL; 306 hammer2_chain_drop(hmp, chain); /* from *_get() */ 307 } 308 KKASSERT(ip->topo_cst.count == -1); /* one excl lock allowed */ 309 310 /* 311 * Disconnect pip 312 */ 313 if ((pip = ip->pip) != NULL) { 314 ip->pip = NULL; 315 hammer2_inode_drop(pip); 316 } 317 318 /* 319 * clean up the ip, we use an inode_unlock_ex-compatible API. 320 */ 321 hammer2_inode_unlock_ex(ip, passed_chain); 322 } 323 324 /* 325 * Create a new inode in the specified directory using the vattr to 326 * figure out the type of inode. 327 * 328 * If no error occurs the new inode with its chain locked is returned in 329 * *nipp, otherwise an error is returned and *nipp is set to NULL. 330 * 331 * If vap and/or cred are NULL the related fields are not set and the 332 * inode type defaults to a directory. This is used when creating PFSs 333 * under the super-root, so the inode number is set to 1 in this case. 334 * 335 * dip is not locked on entry. 336 */ 337 int 338 hammer2_inode_create(hammer2_inode_t *dip, 339 struct vattr *vap, struct ucred *cred, 340 const uint8_t *name, size_t name_len, 341 hammer2_inode_t **nipp, hammer2_chain_t **nchainp) 342 { 343 hammer2_inode_data_t *nipdata; 344 hammer2_mount_t *hmp; 345 hammer2_chain_t *chain; 346 hammer2_chain_t *parent; 347 hammer2_inode_t *nip; 348 hammer2_key_t lhc; 349 int error; 350 uid_t xuid; 351 uuid_t dip_uid; 352 uuid_t dip_gid; 353 uint32_t dip_mode; 354 355 hmp = dip->hmp; 356 lhc = hammer2_dirhash(name, name_len); 357 358 /* 359 * Locate the inode or indirect block to create the new 360 * entry in. At the same time check for key collisions 361 * and iterate until we don't get one. 362 */ 363 retry: 364 parent = hammer2_inode_lock_ex(dip); 365 366 dip_uid = parent->data->ipdata.uid; 367 dip_gid = parent->data->ipdata.gid; 368 dip_mode = parent->data->ipdata.mode; 369 370 error = 0; 371 while (error == 0) { 372 chain = hammer2_chain_lookup(hmp, &parent, lhc, lhc, 0); 373 if (chain == NULL) 374 break; 375 if ((lhc & HAMMER2_DIRHASH_VISIBLE) == 0) 376 error = ENOSPC; 377 if ((lhc & HAMMER2_DIRHASH_LOMASK) == HAMMER2_DIRHASH_LOMASK) 378 error = ENOSPC; 379 hammer2_chain_unlock(hmp, chain); 380 chain = NULL; 381 ++lhc; 382 } 383 if (error == 0) { 384 chain = hammer2_chain_create(hmp, parent, NULL, lhc, 0, 385 HAMMER2_BREF_TYPE_INODE, 386 HAMMER2_INODE_BYTES, 387 &error); 388 } 389 390 hammer2_inode_unlock_ex(dip, parent); 391 392 /* 393 * Handle the error case 394 */ 395 if (error) { 396 KKASSERT(chain == NULL); 397 if (error == EAGAIN) { 398 hammer2_chain_wait(hmp, parent); 399 goto retry; 400 } 401 *nipp = NULL; 402 *nchainp = NULL; 403 return (error); 404 } 405 406 /* 407 * Set up the new inode. 408 * 409 * NOTE: *_get() integrates chain's lock into the inode lock. 410 */ 411 nip = hammer2_inode_get(dip->hmp, dip->pmp, dip, chain); 412 *nipp = nip; 413 *nchainp = chain; 414 nipdata = &chain->data->ipdata; 415 416 hammer2_voldata_lock(hmp); 417 if (vap) { 418 nipdata->type = hammer2_get_obj_type(vap->va_type); 419 nipdata->inum = hmp->voldata.alloc_tid++; 420 /* XXX modify/lock */ 421 } else { 422 nipdata->type = HAMMER2_OBJTYPE_DIRECTORY; 423 nipdata->inum = 1; 424 } 425 hammer2_voldata_unlock(hmp); 426 nipdata->version = HAMMER2_INODE_VERSION_ONE; 427 hammer2_update_time(&nipdata->ctime); 428 nipdata->mtime = nipdata->ctime; 429 if (vap) 430 nipdata->mode = vap->va_mode; 431 nipdata->nlinks = 1; 432 if (vap) { 433 if (dip) { 434 xuid = hammer2_to_unix_xid(&dip_uid); 435 xuid = vop_helper_create_uid(dip->pmp->mp, 436 dip_mode, 437 xuid, 438 cred, 439 &vap->va_mode); 440 } else { 441 xuid = 0; 442 } 443 if (vap->va_vaflags & VA_UID_UUID_VALID) 444 nipdata->uid = vap->va_uid_uuid; 445 else if (vap->va_uid != (uid_t)VNOVAL) 446 hammer2_guid_to_uuid(&nipdata->uid, vap->va_uid); 447 else 448 hammer2_guid_to_uuid(&nipdata->uid, xuid); 449 450 if (vap->va_vaflags & VA_GID_UUID_VALID) 451 nipdata->gid = vap->va_gid_uuid; 452 else if (vap->va_gid != (gid_t)VNOVAL) 453 hammer2_guid_to_uuid(&nipdata->gid, vap->va_gid); 454 else if (dip) 455 nipdata->gid = dip_gid; 456 } 457 458 /* 459 * Regular files and softlinks allow a small amount of data to be 460 * directly embedded in the inode. This flag will be cleared if 461 * the size is extended past the embedded limit. 462 */ 463 if (nipdata->type == HAMMER2_OBJTYPE_REGFILE || 464 nipdata->type == HAMMER2_OBJTYPE_SOFTLINK) { 465 nipdata->op_flags |= HAMMER2_OPFLAG_DIRECTDATA; 466 } 467 468 KKASSERT(name_len < HAMMER2_INODE_MAXNAME); 469 bcopy(name, nipdata->filename, name_len); 470 nipdata->name_key = lhc; 471 nipdata->name_len = name_len; 472 473 return (0); 474 } 475 476 /* 477 * Create a duplicate of the inode (chain) in the specified target directory 478 * (dip), return the duplicated chain in *nchainp (locked). chain is locked 479 * on call and remains locked on return. 480 * 481 * If name is NULL the inode is duplicated as a hidden directory entry. 482 * 483 * XXX name needs to be NULL for now. 484 */ 485 int 486 hammer2_inode_duplicate(hammer2_inode_t *dip, 487 hammer2_chain_t *ochain, hammer2_chain_t **nchainp) 488 { 489 hammer2_inode_data_t *nipdata; 490 hammer2_mount_t *hmp; 491 hammer2_chain_t *parent; 492 hammer2_chain_t *chain; 493 hammer2_key_t lhc; 494 int error = 0; 495 496 hmp = dip->hmp; 497 lhc = ochain->data->ipdata.inum; 498 *nchainp = NULL; 499 KKASSERT((lhc & HAMMER2_DIRHASH_VISIBLE) == 0); 500 501 /* 502 * Locate the inode or indirect block to create the new 503 * entry in. 504 * 505 * There should be no key collisions with invisible inode keys. 506 */ 507 retry: 508 parent = dip->chain; 509 hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS); 510 chain = hammer2_chain_lookup(hmp, &parent, lhc, lhc, 0); 511 if (chain) { 512 hammer2_chain_unlock(hmp, chain); 513 chain = NULL; 514 error = ENOSPC; 515 } 516 517 /* 518 * Create entry in common parent directory. 519 */ 520 if (error == 0) { 521 chain = hammer2_chain_create(hmp, parent, NULL, lhc, 0, 522 HAMMER2_BREF_TYPE_INODE, /* n/a */ 523 HAMMER2_INODE_BYTES, /* n/a */ 524 &error); 525 } 526 527 /* 528 * Clean up, but we need to retain a ref on parent so we can wait 529 * on it for certain errors. 530 */ 531 if (error == EAGAIN) 532 hammer2_chain_ref(hmp, parent); 533 hammer2_chain_unlock(hmp, parent); 534 535 /* 536 * Handle the error case 537 */ 538 if (error) { 539 KKASSERT(chain == NULL); 540 if (error == EAGAIN) { 541 hammer2_chain_wait(hmp, parent); 542 hammer2_chain_drop(hmp, parent); 543 goto retry; 544 } 545 return (error); 546 } 547 548 /* 549 * XXX This is currently a horrible hack. Well, if we wanted to 550 * duplicate a file, i.e. as in a snapshot, we definitely 551 * would have to flush it first. 552 * 553 * For hardlink target generation we can theoretically move any 554 * active chain structures without flushing, but that gets really 555 * iffy for code which follows chain->parent and ip->pip links. 556 * 557 * XXX only works with files. Duplicating a directory hierarchy 558 * requires a flush but doesn't deal with races post-flush. 559 * Well, it would work I guess, but you might catch some files 560 * mid-operation. 561 * 562 * We cannot leave ochain with any in-memory chains because (for a 563 * hardlink), ochain will become a OBJTYPE_HARDLINK which is just a 564 * pointer to the real hardlink's inum and can't have any sub-chains. 565 * XXX might be 0-ref chains left. 566 */ 567 hammer2_chain_flush(hmp, ochain, 0); 568 /*KKASSERT(RB_EMPTY(&ochain.rbhead));*/ 569 570 hammer2_chain_modify(hmp, chain, 0); 571 nipdata = &chain->data->ipdata; 572 *nipdata = ochain->data->ipdata; 573 574 /* 575 * Directory entries are inodes but this is a hidden hardlink 576 * target. The name isn't used but to ease debugging give it 577 * a name after its inode number. 578 */ 579 ksnprintf(nipdata->filename, sizeof(nipdata->filename), 580 "0x%016jx", (intmax_t)nipdata->inum); 581 nipdata->name_len = strlen(nipdata->filename); 582 nipdata->name_key = lhc; 583 584 *nchainp = chain; 585 586 return (0); 587 } 588 589 /* 590 * Connect *chainp to the media topology represented by (dip, name, len). 591 * A directory entry is created which points to *chainp. *chainp is then 592 * unlocked and set to NULL. 593 * 594 * If *chainp is not currently connected we simply connect it up. 595 * 596 * If *chainp is already connected we create a OBJTYPE_HARDLINK entry which 597 * points to chain's inode number. *chainp is expected to be the terminus of 598 * the hardlink sitting as a hidden file in a common parent directory 599 * in this situation. 600 * 601 * The caller always wants to reference the hardlink terminus, not the 602 * hardlink pointer that we might be creating, so we do NOT replace 603 * *chainp here, we simply unlock and NULL it out. 604 */ 605 int 606 hammer2_inode_connect(hammer2_inode_t *dip, hammer2_chain_t **chainp, 607 const uint8_t *name, size_t name_len) 608 { 609 hammer2_inode_data_t *ipdata; 610 hammer2_mount_t *hmp; 611 hammer2_chain_t *nchain; 612 hammer2_chain_t *parent; 613 hammer2_chain_t *ochain; 614 hammer2_key_t lhc; 615 int error; 616 int hlink; 617 618 hmp = dip->hmp; 619 620 ochain = *chainp; 621 *chainp = NULL; 622 623 /* 624 * Since ochain is either disconnected from the topology or represents 625 * a hardlink terminus which is always a parent of or equal to dip, 626 * we should be able to safely lock dip->chain for our setup. 627 */ 628 retry: 629 parent = dip->chain; 630 hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS); 631 632 lhc = hammer2_dirhash(name, name_len); 633 hlink = (ochain->parent != NULL); 634 635 /* 636 * In fake mode flush oip so we can just snapshot it downbelow. 637 */ 638 if (hlink && hammer2_hardlink_enable < 0) 639 hammer2_chain_flush(hmp, ochain, 0); 640 641 /* 642 * Locate the inode or indirect block to create the new 643 * entry in. At the same time check for key collisions 644 * and iterate until we don't get one. 645 */ 646 error = 0; 647 while (error == 0) { 648 nchain = hammer2_chain_lookup(hmp, &parent, lhc, lhc, 0); 649 if (nchain == NULL) 650 break; 651 if ((lhc & HAMMER2_DIRHASH_LOMASK) == HAMMER2_DIRHASH_LOMASK) 652 error = ENOSPC; 653 hammer2_chain_unlock(hmp, nchain); 654 nchain = NULL; 655 ++lhc; 656 } 657 658 /* 659 * Passing a non-NULL chain to hammer2_chain_create() reconnects the 660 * existing chain instead of creating a new one. The chain's bref 661 * will be properly updated. 662 */ 663 if (error == 0) { 664 if (hlink) { 665 nchain = hammer2_chain_create(hmp, parent, 666 NULL, lhc, 0, 667 HAMMER2_BREF_TYPE_INODE, 668 HAMMER2_INODE_BYTES, 669 &error); 670 } else { 671 /* 672 * NOTE: reconnects oip->chain to the media 673 * topology and returns its argument 674 * (oip->chain). 675 * 676 * No additional locks or refs are obtained on 677 * the returned chain so don't double-unlock! 678 */ 679 nchain = hammer2_chain_create(hmp, parent, 680 ochain, lhc, 0, 681 HAMMER2_BREF_TYPE_INODE, 682 HAMMER2_INODE_BYTES, 683 &error); 684 } 685 } 686 687 /* 688 * Unlock stuff. This is a bit messy, if we have an EAGAIN error 689 * we need to wait for operations on parent to finish. 690 */ 691 if (error == EAGAIN) 692 hammer2_chain_ref(hmp, parent); 693 hammer2_chain_unlock(hmp, parent); 694 695 /* 696 * ochain still active. 697 * 698 * Handle the error case 699 */ 700 if (error) { 701 KKASSERT(nchain == NULL); 702 if (error == EAGAIN) { 703 hammer2_chain_wait(hmp, parent); 704 hammer2_chain_drop(hmp, parent); 705 goto retry; 706 } 707 hammer2_chain_unlock(hmp, ochain); 708 return (error); 709 } 710 711 /* 712 * Directory entries are inodes so if the name has changed we have 713 * to update the inode. 714 * 715 * When creating an OBJTYPE_HARDLINK entry remember to unlock the 716 * chain, the caller will access the hardlink via the actual hardlink 717 * target file and not the hardlink pointer entry. 718 */ 719 if (hlink && hammer2_hardlink_enable >= 0) { 720 /* 721 * Create the HARDLINK pointer. oip represents the hardlink 722 * target in this situation. 723 * 724 * NOTE: *_get() integrates chain's lock into the inode lock. 725 */ 726 hammer2_chain_modify(hmp, nchain, 0); 727 KKASSERT(name_len < HAMMER2_INODE_MAXNAME); 728 ipdata = &nchain->data->ipdata; 729 bcopy(name, ipdata->filename, name_len); 730 ipdata->name_key = lhc; 731 ipdata->name_len = name_len; 732 ipdata->target_type = ochain->data->ipdata.type; 733 ipdata->type = HAMMER2_OBJTYPE_HARDLINK; 734 ipdata->inum = ochain->data->ipdata.inum; 735 ipdata->nlinks = 1; 736 kprintf("created hardlink %*.*s\n", 737 (int)name_len, (int)name_len, name); 738 hammer2_chain_unlock(hmp, nchain); 739 } else if (hlink && hammer2_hardlink_enable < 0) { 740 /* 741 * Create a snapshot (hardlink fake mode for debugging). 742 * 743 * NOTE: *_get() integrates nchain's lock into the inode lock. 744 */ 745 hammer2_chain_modify(hmp, nchain, 0); 746 KKASSERT(name_len < HAMMER2_INODE_MAXNAME); 747 ipdata = &nchain->data->ipdata; 748 *ipdata = ochain->data->ipdata; 749 bcopy(name, ipdata->filename, name_len); 750 ipdata->name_key = lhc; 751 ipdata->name_len = name_len; 752 kprintf("created fake hardlink %*.*s\n", 753 (int)name_len, (int)name_len, name); 754 hammer2_chain_unlock(hmp, nchain); 755 } else { 756 /* 757 * Normally disconnected inode (e.g. during a rename) that 758 * was reconnected. We must fixup the name stored in 759 * oip. 760 * 761 * We are using oip as chain, already locked by caller, 762 * do not unlock it. 763 */ 764 hammer2_chain_modify(hmp, ochain, 0); 765 ipdata = &ochain->data->ipdata; 766 767 if (ipdata->name_len != name_len || 768 bcmp(ipdata->filename, name, name_len) != 0) { 769 KKASSERT(name_len < HAMMER2_INODE_MAXNAME); 770 bcopy(name, ipdata->filename, name_len); 771 ipdata->name_key = lhc; 772 ipdata->name_len = name_len; 773 } 774 ipdata->nlinks = 1; 775 } 776 hammer2_chain_unlock(hmp, ochain); 777 return (0); 778 } 779 780 /* 781 * Unlink the file from the specified directory inode. The directory inode 782 * does not need to be locked. The caller should pass a non-NULL (ip) 783 * representing the object being removed only if the related vnode is 784 * potentially inactive (not referenced in the caller's active path), 785 * so we can vref/vrele it to trigger the VOP_INACTIVE path and properly 786 * recycle it. 787 * 788 * isdir determines whether a directory/non-directory check should be made. 789 * No check is made if isdir is set to -1. 790 * 791 * If retain_chain is non-NULL this function can fail with an EAGAIN if it 792 * catches the object in the middle of a flush. 793 */ 794 int 795 hammer2_unlink_file(hammer2_inode_t *dip, 796 const uint8_t *name, size_t name_len, 797 int isdir, hammer2_chain_t *retain_chain) 798 { 799 hammer2_inode_data_t *ipdata; 800 hammer2_mount_t *hmp; 801 hammer2_chain_t *parent; 802 hammer2_chain_t *ochain; 803 hammer2_chain_t *chain; 804 hammer2_chain_t *dparent; 805 hammer2_chain_t *dchain; 806 hammer2_key_t lhc; 807 int error; 808 int parent_ref; 809 uint8_t type; 810 811 parent_ref = 0; 812 error = 0; 813 ochain = NULL; 814 hmp = dip->hmp; 815 lhc = hammer2_dirhash(name, name_len); 816 817 /* 818 * Search for the filename in the directory 819 */ 820 parent = hammer2_inode_lock_ex(dip); 821 chain = hammer2_chain_lookup(hmp, &parent, 822 lhc, lhc + HAMMER2_DIRHASH_LOMASK, 823 0); 824 while (chain) { 825 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE && 826 name_len == chain->data->ipdata.name_len && 827 bcmp(name, chain->data->ipdata.filename, name_len) == 0) { 828 break; 829 } 830 chain = hammer2_chain_next(hmp, &parent, chain, 831 lhc, lhc + HAMMER2_DIRHASH_LOMASK, 832 0); 833 } 834 hammer2_inode_unlock_ex(dip, NULL); /* retain parent */ 835 836 /* 837 * Not found or wrong type (isdir < 0 disables the type check). 838 * If a hardlink pointer, type checks use the hardlink target. 839 */ 840 if (chain == NULL) { 841 error = ENOENT; 842 goto done; 843 } 844 if ((type = chain->data->ipdata.type) == HAMMER2_OBJTYPE_HARDLINK) 845 type = chain->data->ipdata.target_type; 846 847 if (type == HAMMER2_OBJTYPE_DIRECTORY && isdir == 0) { 848 error = ENOTDIR; 849 goto done; 850 } 851 if (type != HAMMER2_OBJTYPE_DIRECTORY && isdir == 1) { 852 error = EISDIR; 853 goto done; 854 } 855 856 /* 857 * Hardlink must be resolved. We can't hold parent locked while we 858 * do this or we could deadlock. 859 * 860 * On success chain will be adjusted to point at the hardlink target 861 * and ochain will point to the hardlink pointer in the original 862 * directory. Otherwise chain remains pointing to the original. 863 */ 864 if (chain->data->ipdata.type == HAMMER2_OBJTYPE_HARDLINK) { 865 KKASSERT(parent_ref == 0); 866 hammer2_chain_unlock(hmp, parent); 867 parent = NULL; 868 error = hammer2_hardlink_find(dip, &chain, &ochain); 869 } 870 871 /* 872 * If this is a directory the directory must be empty. However, if 873 * isdir < 0 we are doing a rename and the directory does not have 874 * to be empty. 875 * 876 * NOTE: We check the full key range here which covers both visible 877 * and invisible entries. Theoretically there should be no 878 * invisible (hardlink target) entries if there are no visible 879 * entries. 880 */ 881 if (type == HAMMER2_OBJTYPE_DIRECTORY && isdir >= 0) { 882 dparent = chain; 883 hammer2_chain_lock(hmp, dparent, HAMMER2_RESOLVE_ALWAYS); 884 dchain = hammer2_chain_lookup(hmp, &dparent, 885 0, (hammer2_key_t)-1, 886 HAMMER2_LOOKUP_NODATA); 887 if (dchain) { 888 hammer2_chain_unlock(hmp, dchain); 889 hammer2_chain_unlock(hmp, dparent); 890 error = ENOTEMPTY; 891 goto done; 892 } 893 hammer2_chain_unlock(hmp, dparent); 894 dparent = NULL; 895 /* dchain NULL */ 896 } 897 898 /* 899 * Ok, we can now unlink the chain. We always decrement nlinks even 900 * if the entry can be deleted in case someone has the file open and 901 * does an fstat(). 902 * 903 * The chain itself will no longer be in the on-media topology but 904 * can still be flushed to the media (e.g. if an open descriptor 905 * remains). When the last vnode/ip ref goes away the chain will 906 * be marked unmodified, avoiding any further (now unnecesary) I/O. 907 * 908 * A non-NULL ochain indicates a hardlink. 909 */ 910 if (ochain) { 911 /* 912 * Delete the original hardlink pointer. 913 * 914 * NOTE: parent from above is NULL when ochain != NULL 915 * so we can reuse it. 916 */ 917 hammer2_chain_lock(hmp, ochain, HAMMER2_RESOLVE_ALWAYS); 918 parent_ref = 1; 919 for (;;) { 920 parent = ochain->parent; 921 hammer2_chain_ref(hmp, parent); 922 hammer2_chain_unlock(hmp, ochain); 923 hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS); 924 hammer2_chain_lock(hmp, ochain, HAMMER2_RESOLVE_ALWAYS); 925 if (ochain->parent == parent) 926 break; 927 hammer2_chain_unlock(hmp, parent); 928 hammer2_chain_drop(hmp, parent); 929 } 930 931 if (ochain == retain_chain && ochain->flushing) { 932 hammer2_chain_unlock(hmp, ochain); 933 error = EAGAIN; 934 goto done; 935 } 936 hammer2_chain_delete(hmp, parent, ochain, 937 (ochain == retain_chain)); 938 hammer2_chain_unlock(hmp, ochain); 939 hammer2_chain_unlock(hmp, parent); 940 hammer2_chain_drop(hmp, parent); 941 parent = NULL; 942 943 /* 944 * Then decrement nlinks on hardlink target, deleting 945 * the target when nlinks drops to 0. 946 */ 947 if (chain->data->ipdata.nlinks == 1) { 948 dparent = chain->parent; 949 hammer2_chain_ref(hmp, chain); 950 hammer2_chain_unlock(hmp, chain); 951 hammer2_chain_lock(hmp, dparent, 952 HAMMER2_RESOLVE_ALWAYS); 953 hammer2_chain_lock(hmp, chain, 954 HAMMER2_RESOLVE_ALWAYS); 955 hammer2_chain_drop(hmp, chain); 956 hammer2_chain_modify(hmp, chain, 0); 957 --chain->data->ipdata.nlinks; 958 hammer2_chain_delete(hmp, dparent, chain, 0); 959 hammer2_chain_unlock(hmp, dparent); 960 } else { 961 hammer2_chain_modify(hmp, chain, 0); 962 --chain->data->ipdata.nlinks; 963 } 964 } else { 965 /* 966 * Otherwise this was not a hardlink and we can just 967 * remove the entry and decrement nlinks. 968 * 969 * NOTE: *_get() integrates chain's lock into the inode lock. 970 */ 971 ipdata = &chain->data->ipdata; 972 if (chain == retain_chain && chain->flushing) { 973 error = EAGAIN; 974 goto done; 975 } 976 hammer2_chain_modify(hmp, chain, 0); 977 --ipdata->nlinks; 978 hammer2_chain_delete(hmp, parent, chain, 979 (retain_chain == chain)); 980 } 981 982 error = 0; 983 done: 984 if (chain) 985 hammer2_chain_unlock(hmp, chain); 986 if (parent) { 987 hammer2_chain_unlock(hmp, parent); 988 if (parent_ref) 989 hammer2_chain_drop(hmp, parent); 990 } 991 if (ochain) 992 hammer2_chain_drop(hmp, ochain); 993 994 return error; 995 } 996 997 /* 998 * Calculate the allocation size for the file fragment straddling EOF 999 */ 1000 int 1001 hammer2_inode_calc_alloc(hammer2_key_t filesize) 1002 { 1003 int frag = (int)filesize & HAMMER2_PBUFMASK; 1004 int radix; 1005 1006 if (frag == 0) 1007 return(0); 1008 for (radix = HAMMER2_MINALLOCRADIX; frag > (1 << radix); ++radix) 1009 ; 1010 return (radix); 1011 } 1012 1013 /* 1014 * Given an unlocked ip consolidate for hardlink creation, adding (nlinks) 1015 * to the file's link count and potentially relocating the file to a 1016 * directory common to ip->pip and tdip. 1017 * 1018 * If the file has to be relocated ip->chain will also be adjusted. 1019 */ 1020 int 1021 hammer2_hardlink_consolidate(hammer2_inode_t *ip, hammer2_chain_t **chainp, 1022 hammer2_inode_t *tdip, int nlinks) 1023 { 1024 hammer2_inode_data_t *ipdata; 1025 hammer2_mount_t *hmp; 1026 hammer2_inode_t *fdip; 1027 hammer2_inode_t *cdip; 1028 hammer2_chain_t *chain; 1029 hammer2_chain_t *nchain; 1030 hammer2_chain_t *parent; 1031 int error; 1032 1033 hmp = tdip->hmp; 1034 *chainp = NULL; 1035 chain = hammer2_inode_lock_ex(ip); 1036 1037 if (nlinks == 0 && /* no hardlink needed */ 1038 (chain->data->ipdata.name_key & HAMMER2_DIRHASH_VISIBLE)) { 1039 hammer2_inode_unlock_ex(ip, NULL); 1040 *chainp = chain; 1041 return (0); 1042 } 1043 if (hammer2_hardlink_enable < 0) { /* fake hardlinks */ 1044 hammer2_inode_unlock_ex(ip, NULL); 1045 *chainp = chain; 1046 return (0); 1047 } 1048 if (hammer2_hardlink_enable == 0) { /* disallow hardlinks */ 1049 hammer2_inode_unlock_ex(ip, chain); 1050 return (ENOTSUP); 1051 } 1052 1053 /* 1054 * cdip will be returned with a ref, but not locked. 1055 */ 1056 fdip = ip->pip; 1057 cdip = hammer2_inode_common_parent(hmp, fdip, tdip); 1058 1059 /* 1060 * If no change in the hardlink's target directory is required and 1061 * this is already a hardlink target, all we need to do is adjust 1062 * the link count. 1063 */ 1064 if (cdip == fdip && 1065 (chain->data->ipdata.name_key & HAMMER2_DIRHASH_VISIBLE) == 0) { 1066 if (nlinks) { 1067 hammer2_chain_modify(hmp, chain, 0); 1068 chain->data->ipdata.nlinks += nlinks; 1069 } 1070 *chainp = chain; 1071 error = 0; 1072 goto done; 1073 } 1074 1075 /* 1076 * We either have to move an existing hardlink target or we have 1077 * to create a fresh hardlink target. 1078 * 1079 * Hardlink targets are hidden inodes in a parent directory common 1080 * to all directory entries referencing the hardlink. 1081 */ 1082 error = hammer2_inode_duplicate(cdip, chain, &nchain); 1083 if (error == 0) { 1084 /* 1085 * Bump nlinks on duplicated hidden inode. 1086 */ 1087 hammer2_chain_modify(hmp, nchain, 0); 1088 nchain->data->ipdata.nlinks += nlinks; 1089 1090 /* 1091 * If the old chain is not a hardlink target then replace 1092 * it with a OBJTYPE_HARDLINK pointer. 1093 * 1094 * If the old chain IS a hardlink target then delete it. 1095 */ 1096 if (chain->data->ipdata.name_key & HAMMER2_DIRHASH_VISIBLE) { 1097 hammer2_chain_modify(hmp, chain, 0); 1098 ipdata = &chain->data->ipdata; 1099 ipdata->target_type = ipdata->type; 1100 ipdata->type = HAMMER2_OBJTYPE_HARDLINK; 1101 ipdata->uflags = 0; 1102 ipdata->rmajor = 0; 1103 ipdata->rminor = 0; 1104 ipdata->ctime = 0; 1105 ipdata->mtime = 0; 1106 ipdata->atime = 0; 1107 ipdata->btime = 0; 1108 bzero(&ipdata->uid, sizeof(ipdata->uid)); 1109 bzero(&ipdata->gid, sizeof(ipdata->gid)); 1110 ipdata->op_flags = HAMMER2_OPFLAG_DIRECTDATA; 1111 ipdata->cap_flags = 0; 1112 ipdata->mode = 0; 1113 ipdata->size = 0; 1114 ipdata->nlinks = 1; 1115 ipdata->iparent = 0; /* XXX */ 1116 ipdata->pfs_type = 0; 1117 ipdata->pfs_inum = 0; 1118 bzero(&ipdata->pfs_clid, sizeof(ipdata->pfs_clid)); 1119 bzero(&ipdata->pfs_fsid, sizeof(ipdata->pfs_fsid)); 1120 ipdata->data_quota = 0; 1121 ipdata->data_count = 0; 1122 ipdata->inode_quota = 0; 1123 ipdata->inode_count = 0; 1124 ipdata->attr_tid = 0; 1125 ipdata->dirent_tid = 0; 1126 bzero(&ipdata->u, sizeof(ipdata->u)); 1127 /* XXX transaction ids */ 1128 } else { 1129 kprintf("DELETE INVISIBLE\n"); 1130 for (;;) { 1131 parent = chain->parent; 1132 hammer2_chain_ref(hmp, parent); 1133 hammer2_chain_ref(hmp, chain); 1134 hammer2_chain_unlock(hmp, chain); 1135 hammer2_chain_lock(hmp, parent, 1136 HAMMER2_RESOLVE_ALWAYS); 1137 hammer2_chain_lock(hmp, chain, 1138 HAMMER2_RESOLVE_ALWAYS); 1139 hammer2_chain_drop(hmp, chain); 1140 if (chain->parent == parent) 1141 break; 1142 hammer2_chain_unlock(hmp, parent); 1143 hammer2_chain_drop(hmp, parent); 1144 } 1145 hammer2_chain_delete(hmp, parent, chain, 0); 1146 hammer2_chain_unlock(hmp, parent); 1147 hammer2_chain_drop(hmp, parent); 1148 } 1149 1150 /* 1151 * Replace ip->chain with nchain (ip is still locked). 1152 */ 1153 hammer2_chain_ref(hmp, nchain); /* ip->chain */ 1154 if (ip->chain) 1155 hammer2_chain_drop(hmp, ip->chain); /* ip->chain */ 1156 ip->chain = nchain; 1157 1158 hammer2_chain_unlock(hmp, chain); 1159 *chainp = nchain; 1160 } else { 1161 hammer2_chain_unlock(hmp, chain); 1162 } 1163 1164 /* 1165 * Cleanup, chain/nchain already dealt with. 1166 */ 1167 done: 1168 hammer2_inode_unlock_ex(ip, NULL); 1169 hammer2_inode_drop(cdip); 1170 1171 return (error); 1172 } 1173 1174 /* 1175 * If (*ochainp) is non-NULL it points to the forward OBJTYPE_HARDLINK 1176 * inode while (*chainp) points to the resolved (hidden hardlink 1177 * target) inode. In this situation when nlinks is 1 we wish to 1178 * deconsolidate the hardlink, moving it back to the directory that now 1179 * represents the only remaining link. 1180 */ 1181 int 1182 hammer2_hardlink_deconsolidate(hammer2_inode_t *dip, 1183 hammer2_chain_t **chainp, 1184 hammer2_chain_t **ochainp) 1185 { 1186 if (*ochainp == NULL) 1187 return (0); 1188 /* XXX */ 1189 return (0); 1190 } 1191 1192 /* 1193 * The caller presents a locked *chainp pointing to a HAMMER2_BREF_TYPE_INODE 1194 * with an obj_type of HAMMER2_OBJTYPE_HARDLINK. This routine will gobble 1195 * the *chainp and return a new locked *chainp representing the file target 1196 * (the original *chainp will be unlocked). 1197 * 1198 * When a match is found the chain representing the original HARDLINK 1199 * will be returned in *ochainp with a ref, but not locked. 1200 * 1201 * When no match is found *chainp is set to NULL and EIO is returned. 1202 * (*ochainp) will still be set to the original chain with a ref but not 1203 * locked. 1204 */ 1205 int 1206 hammer2_hardlink_find(hammer2_inode_t *dip, hammer2_chain_t **chainp, 1207 hammer2_chain_t **ochainp) 1208 { 1209 hammer2_mount_t *hmp = dip->hmp; 1210 hammer2_chain_t *chain = *chainp; 1211 hammer2_chain_t *parent; 1212 hammer2_inode_t *ip; 1213 hammer2_inode_t *pip; 1214 hammer2_key_t lhc; 1215 1216 pip = dip; 1217 hammer2_inode_ref(pip); /* for loop */ 1218 hammer2_chain_ref(hmp, chain); /* for (*ochainp) */ 1219 1220 *ochainp = chain; 1221 1222 /* 1223 * Locate the hardlink. pip is referenced and not locked, 1224 * ipp. 1225 * 1226 * chain is reused. 1227 */ 1228 lhc = chain->data->ipdata.inum; 1229 hammer2_chain_unlock(hmp, chain); 1230 chain = NULL; 1231 1232 while ((ip = pip) != NULL) { 1233 parent = hammer2_inode_lock_ex(ip); 1234 hammer2_inode_drop(ip); /* loop */ 1235 KKASSERT(parent->bref.type == HAMMER2_BREF_TYPE_INODE); 1236 chain = hammer2_chain_lookup(hmp, &parent, lhc, lhc, 0); 1237 hammer2_chain_unlock(hmp, parent); 1238 if (chain) 1239 break; 1240 pip = ip->pip; /* safe, ip held locked */ 1241 if (pip) 1242 hammer2_inode_ref(pip); /* loop */ 1243 hammer2_inode_unlock_ex(ip, NULL); 1244 } 1245 1246 /* 1247 * chain is locked, ip is locked. Unlock ip, return the locked 1248 * chain. *ipp is already set w/a ref count and not locked. 1249 * 1250 * (parent is already unlocked). 1251 */ 1252 hammer2_inode_unlock_ex(ip, NULL); 1253 *chainp = chain; 1254 if (chain) { 1255 KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_INODE); 1256 /* already locked */ 1257 return (0); 1258 } else { 1259 return (EIO); 1260 } 1261 } 1262 1263 /* 1264 * Find the directory common to both fdip and tdip, hold and return 1265 * its inode. 1266 */ 1267 hammer2_inode_t * 1268 hammer2_inode_common_parent(hammer2_mount_t *hmp, 1269 hammer2_inode_t *fdip, hammer2_inode_t *tdip) 1270 { 1271 hammer2_inode_t *scan1; 1272 hammer2_inode_t *scan2; 1273 1274 /* 1275 * We used to have a depth field but it complicated matters too 1276 * much for directory renames. So now its ugly. Check for 1277 * simple cases before giving up and doing it the expensive way. 1278 * 1279 * XXX need a bottom-up topology stability lock 1280 */ 1281 if (fdip == tdip || fdip == tdip->pip) { 1282 hammer2_inode_ref(fdip); 1283 return(fdip); 1284 } 1285 if (fdip->pip == tdip) { 1286 hammer2_inode_ref(tdip); 1287 return(tdip); 1288 } 1289 1290 /* 1291 * XXX not MPSAFE 1292 */ 1293 for (scan1 = fdip; scan1->pmp == fdip->pmp; scan1 = scan1->pip) { 1294 scan2 = tdip; 1295 while (scan2->pmp == tdip->pmp) { 1296 if (scan1 == scan2) { 1297 hammer2_inode_ref(scan1); 1298 return(scan1); 1299 } 1300 scan2 = scan2->pip; 1301 } 1302 } 1303 panic("hammer2_inode_common_parent: no common parent %p %p\n", 1304 fdip, tdip); 1305 /* NOT REACHED */ 1306 return(NULL); 1307 } 1308