1 /* 2 * Copyright (c) 2007 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * $DragonFly: src/sys/vfs/hammer/hammer_vnops.c,v 1.31 2008/02/10 18:58:23 dillon Exp $ 35 */ 36 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/kernel.h> 40 #include <sys/fcntl.h> 41 #include <sys/namecache.h> 42 #include <sys/vnode.h> 43 #include <sys/lockf.h> 44 #include <sys/event.h> 45 #include <sys/stat.h> 46 #include <sys/dirent.h> 47 #include <vm/vm_extern.h> 48 #include <vfs/fifofs/fifo.h> 49 #include "hammer.h" 50 51 /* 52 * USERFS VNOPS 53 */ 54 /*static int hammer_vop_vnoperate(struct vop_generic_args *);*/ 55 static int hammer_vop_fsync(struct vop_fsync_args *); 56 static int hammer_vop_read(struct vop_read_args *); 57 static int hammer_vop_write(struct vop_write_args *); 58 static int hammer_vop_access(struct vop_access_args *); 59 static int hammer_vop_advlock(struct vop_advlock_args *); 60 static int hammer_vop_close(struct vop_close_args *); 61 static int hammer_vop_ncreate(struct vop_ncreate_args *); 62 static int hammer_vop_getattr(struct vop_getattr_args *); 63 static int hammer_vop_nresolve(struct vop_nresolve_args *); 64 static int hammer_vop_nlookupdotdot(struct vop_nlookupdotdot_args *); 65 static int hammer_vop_nlink(struct vop_nlink_args *); 66 static int hammer_vop_nmkdir(struct vop_nmkdir_args *); 67 static int hammer_vop_nmknod(struct vop_nmknod_args *); 68 static int hammer_vop_open(struct vop_open_args *); 69 static int hammer_vop_pathconf(struct vop_pathconf_args *); 70 static int hammer_vop_print(struct vop_print_args *); 71 static int hammer_vop_readdir(struct vop_readdir_args *); 72 static int hammer_vop_readlink(struct vop_readlink_args *); 73 static int hammer_vop_nremove(struct vop_nremove_args *); 74 static int hammer_vop_nrename(struct vop_nrename_args *); 75 static int hammer_vop_nrmdir(struct vop_nrmdir_args *); 76 static int hammer_vop_setattr(struct vop_setattr_args *); 77 static int hammer_vop_strategy(struct vop_strategy_args *); 78 static int hammer_vop_nsymlink(struct vop_nsymlink_args *); 79 static int hammer_vop_nwhiteout(struct vop_nwhiteout_args *); 80 static int hammer_vop_ioctl(struct vop_ioctl_args *); 81 static int hammer_vop_mountctl(struct vop_mountctl_args *); 82 83 static int hammer_vop_fifoclose (struct vop_close_args *); 84 static int hammer_vop_fiforead (struct vop_read_args *); 85 static int hammer_vop_fifowrite (struct vop_write_args *); 86 87 static int hammer_vop_specclose (struct vop_close_args *); 88 static int hammer_vop_specread (struct vop_read_args *); 89 static int hammer_vop_specwrite (struct vop_write_args *); 90 91 struct vop_ops hammer_vnode_vops = { 92 .vop_default = vop_defaultop, 93 .vop_fsync = hammer_vop_fsync, 94 .vop_getpages = vop_stdgetpages, 95 .vop_putpages = vop_stdputpages, 96 .vop_read = hammer_vop_read, 97 .vop_write = hammer_vop_write, 98 .vop_access = hammer_vop_access, 99 .vop_advlock = hammer_vop_advlock, 100 .vop_close = hammer_vop_close, 101 .vop_ncreate = hammer_vop_ncreate, 102 .vop_getattr = hammer_vop_getattr, 103 .vop_inactive = hammer_vop_inactive, 104 .vop_reclaim = hammer_vop_reclaim, 105 .vop_nresolve = hammer_vop_nresolve, 106 .vop_nlookupdotdot = hammer_vop_nlookupdotdot, 107 .vop_nlink = hammer_vop_nlink, 108 .vop_nmkdir = hammer_vop_nmkdir, 109 .vop_nmknod = hammer_vop_nmknod, 110 .vop_open = hammer_vop_open, 111 .vop_pathconf = hammer_vop_pathconf, 112 .vop_print = hammer_vop_print, 113 .vop_readdir = hammer_vop_readdir, 114 .vop_readlink = hammer_vop_readlink, 115 .vop_nremove = hammer_vop_nremove, 116 .vop_nrename = hammer_vop_nrename, 117 .vop_nrmdir = hammer_vop_nrmdir, 118 .vop_setattr = hammer_vop_setattr, 119 .vop_strategy = hammer_vop_strategy, 120 .vop_nsymlink = hammer_vop_nsymlink, 121 .vop_nwhiteout = hammer_vop_nwhiteout, 122 .vop_ioctl = hammer_vop_ioctl, 123 .vop_mountctl = hammer_vop_mountctl 124 }; 125 126 struct vop_ops hammer_spec_vops = { 127 .vop_default = spec_vnoperate, 128 .vop_fsync = hammer_vop_fsync, 129 .vop_read = hammer_vop_specread, 130 .vop_write = hammer_vop_specwrite, 131 .vop_access = hammer_vop_access, 132 .vop_close = hammer_vop_specclose, 133 .vop_getattr = hammer_vop_getattr, 134 .vop_inactive = hammer_vop_inactive, 135 .vop_reclaim = hammer_vop_reclaim, 136 .vop_setattr = hammer_vop_setattr 137 }; 138 139 struct vop_ops hammer_fifo_vops = { 140 .vop_default = fifo_vnoperate, 141 .vop_fsync = hammer_vop_fsync, 142 .vop_read = hammer_vop_fiforead, 143 .vop_write = hammer_vop_fifowrite, 144 .vop_access = hammer_vop_access, 145 .vop_close = hammer_vop_fifoclose, 146 .vop_getattr = hammer_vop_getattr, 147 .vop_inactive = hammer_vop_inactive, 148 .vop_reclaim = hammer_vop_reclaim, 149 .vop_setattr = hammer_vop_setattr 150 }; 151 152 static int hammer_dounlink(struct nchandle *nch, struct vnode *dvp, 153 struct ucred *cred, int flags); 154 static int hammer_vop_strategy_read(struct vop_strategy_args *ap); 155 static int hammer_vop_strategy_write(struct vop_strategy_args *ap); 156 157 #if 0 158 static 159 int 160 hammer_vop_vnoperate(struct vop_generic_args *) 161 { 162 return (VOCALL(&hammer_vnode_vops, ap)); 163 } 164 #endif 165 166 /* 167 * hammer_vop_fsync { vp, waitfor } 168 */ 169 static 170 int 171 hammer_vop_fsync(struct vop_fsync_args *ap) 172 { 173 hammer_inode_t ip; 174 int error; 175 176 ip = VTOI(ap->a_vp); 177 error = hammer_sync_inode(ip, ap->a_waitfor, 0); 178 return (error); 179 } 180 181 /* 182 * hammer_vop_read { vp, uio, ioflag, cred } 183 */ 184 static 185 int 186 hammer_vop_read(struct vop_read_args *ap) 187 { 188 struct hammer_transaction trans; 189 hammer_inode_t ip; 190 off_t offset; 191 struct buf *bp; 192 struct uio *uio; 193 int error; 194 int n; 195 int seqcount; 196 197 if (ap->a_vp->v_type != VREG) 198 return (EINVAL); 199 ip = VTOI(ap->a_vp); 200 error = 0; 201 seqcount = ap->a_ioflag >> 16; 202 203 hammer_start_transaction(&trans, ip->hmp); 204 205 /* 206 * Access the data in HAMMER_BUFSIZE blocks via the buffer cache. 207 */ 208 uio = ap->a_uio; 209 while (uio->uio_resid > 0 && uio->uio_offset < ip->ino_rec.ino_size) { 210 offset = uio->uio_offset & HAMMER_BUFMASK; 211 #if 0 212 error = cluster_read(ap->a_vp, ip->ino_rec.ino_size, 213 uio->uio_offset - offset, HAMMER_BUFSIZE, 214 MAXBSIZE, seqcount, &bp); 215 #endif 216 error = bread(ap->a_vp, uio->uio_offset - offset, 217 HAMMER_BUFSIZE, &bp); 218 if (error) { 219 brelse(bp); 220 break; 221 } 222 /* bp->b_flags |= B_CLUSTEROK; temporarily disabled */ 223 n = HAMMER_BUFSIZE - offset; 224 if (n > uio->uio_resid) 225 n = uio->uio_resid; 226 if (n > ip->ino_rec.ino_size - uio->uio_offset) 227 n = (int)(ip->ino_rec.ino_size - uio->uio_offset); 228 error = uiomove((char *)bp->b_data + offset, n, uio); 229 if (error) { 230 bqrelse(bp); 231 break; 232 } 233 if ((ip->flags & HAMMER_INODE_RO) == 0 && 234 (ip->hmp->mp->mnt_flag & MNT_NOATIME) == 0) { 235 ip->ino_rec.ino_atime = trans.tid; 236 hammer_modify_inode(&trans, ip, HAMMER_INODE_ITIMES); 237 } 238 bqrelse(bp); 239 } 240 hammer_commit_transaction(&trans); 241 return (error); 242 } 243 244 /* 245 * hammer_vop_write { vp, uio, ioflag, cred } 246 */ 247 static 248 int 249 hammer_vop_write(struct vop_write_args *ap) 250 { 251 struct hammer_transaction trans; 252 struct hammer_inode *ip; 253 struct uio *uio; 254 off_t offset; 255 struct buf *bp; 256 int error; 257 int n; 258 int flags; 259 260 if (ap->a_vp->v_type != VREG) 261 return (EINVAL); 262 ip = VTOI(ap->a_vp); 263 error = 0; 264 265 if (ip->flags & HAMMER_INODE_RO) 266 return (EROFS); 267 268 /* 269 * Create a transaction to cover the operations we perform. 270 */ 271 hammer_start_transaction(&trans, ip->hmp); 272 uio = ap->a_uio; 273 274 /* 275 * Check append mode 276 */ 277 if (ap->a_ioflag & IO_APPEND) 278 uio->uio_offset = ip->ino_rec.ino_size; 279 280 /* 281 * Check for illegal write offsets. Valid range is 0...2^63-1 282 */ 283 if (uio->uio_offset < 0 || uio->uio_offset + uio->uio_resid <= 0) { 284 hammer_commit_transaction(&trans); 285 return (EFBIG); 286 } 287 288 /* 289 * Access the data in HAMMER_BUFSIZE blocks via the buffer cache. 290 */ 291 while (uio->uio_resid > 0) { 292 int fixsize = 0; 293 294 offset = uio->uio_offset & HAMMER_BUFMASK; 295 n = HAMMER_BUFSIZE - offset; 296 if (n > uio->uio_resid) 297 n = uio->uio_resid; 298 if (uio->uio_offset + n > ip->ino_rec.ino_size) { 299 vnode_pager_setsize(ap->a_vp, uio->uio_offset + n); 300 fixsize = 1; 301 } 302 303 if (uio->uio_segflg == UIO_NOCOPY) { 304 /* 305 * Issuing a write with the same data backing the 306 * buffer. Instantiate the buffer to collect the 307 * backing vm pages, then read-in any missing bits. 308 * 309 * This case is used by vop_stdputpages(). 310 */ 311 bp = getblk(ap->a_vp, uio->uio_offset - offset, 312 HAMMER_BUFSIZE, GETBLK_BHEAVY, 0); 313 if ((bp->b_flags & B_CACHE) == 0) { 314 bqrelse(bp); 315 error = bread(ap->a_vp, 316 uio->uio_offset - offset, 317 HAMMER_BUFSIZE, &bp); 318 } 319 } else if (offset == 0 && uio->uio_resid >= HAMMER_BUFSIZE) { 320 /* 321 * entirely overwrite the buffer 322 */ 323 bp = getblk(ap->a_vp, uio->uio_offset - offset, 324 HAMMER_BUFSIZE, GETBLK_BHEAVY, 0); 325 } else if (offset == 0 && uio->uio_offset >= ip->ino_rec.ino_size) { 326 /* 327 * XXX 328 */ 329 bp = getblk(ap->a_vp, uio->uio_offset - offset, 330 HAMMER_BUFSIZE, GETBLK_BHEAVY, 0); 331 vfs_bio_clrbuf(bp); 332 } else { 333 /* 334 * Partial overwrite, read in any missing bits then 335 * replace the portion being written. 336 */ 337 error = bread(ap->a_vp, uio->uio_offset - offset, 338 HAMMER_BUFSIZE, &bp); 339 if (error == 0) 340 bheavy(bp); 341 } 342 if (error == 0) 343 error = uiomove((char *)bp->b_data + offset, n, uio); 344 345 /* 346 * If we screwed up we have to undo any VM size changes we 347 * made. 348 */ 349 if (error) { 350 brelse(bp); 351 if (fixsize) { 352 vtruncbuf(ap->a_vp, ip->ino_rec.ino_size, 353 HAMMER_BUFSIZE); 354 } 355 break; 356 } 357 /* bp->b_flags |= B_CLUSTEROK; temporarily disabled */ 358 if (ip->ino_rec.ino_size < uio->uio_offset) { 359 ip->ino_rec.ino_size = uio->uio_offset; 360 flags = HAMMER_INODE_RDIRTY; 361 vnode_pager_setsize(ap->a_vp, ip->ino_rec.ino_size); 362 } else { 363 flags = 0; 364 } 365 ip->ino_rec.ino_mtime = trans.tid; 366 flags |= HAMMER_INODE_ITIMES | HAMMER_INODE_BUFS; 367 hammer_modify_inode(&trans, ip, flags); 368 369 /* 370 * The file write must be tagged with the same TID as the 371 * inode, for consistency in case the inode changed size. 372 * This guarantees the on-disk data records will have a 373 * TID <= the inode TID representing the size change. 374 * 375 * If a prior write has not yet flushed, retain its TID. 376 */ 377 if (bp->b_tid == 0) 378 bp->b_tid = ip->last_tid; 379 380 if (ap->a_ioflag & IO_SYNC) { 381 bwrite(bp); 382 } else if (ap->a_ioflag & IO_DIRECT) { 383 bawrite(bp); 384 } else if ((ap->a_ioflag >> 16) > 1 && 385 (uio->uio_offset & HAMMER_BUFMASK) == 0) { 386 /* 387 * If seqcount indicates sequential operation and 388 * we just finished filling a buffer, push it out 389 * now to prevent the buffer cache from becoming 390 * too full, which would trigger non-optimal 391 * flushes. 392 */ 393 bawrite(bp); 394 } else { 395 bdwrite(bp); 396 } 397 } 398 if (error) 399 hammer_abort_transaction(&trans); 400 else 401 hammer_commit_transaction(&trans); 402 return (error); 403 } 404 405 /* 406 * hammer_vop_access { vp, mode, cred } 407 */ 408 static 409 int 410 hammer_vop_access(struct vop_access_args *ap) 411 { 412 struct hammer_inode *ip = VTOI(ap->a_vp); 413 uid_t uid; 414 gid_t gid; 415 int error; 416 417 uid = hammer_to_unix_xid(&ip->ino_data.uid); 418 gid = hammer_to_unix_xid(&ip->ino_data.gid); 419 420 error = vop_helper_access(ap, uid, gid, ip->ino_data.mode, 421 ip->ino_data.uflags); 422 return (error); 423 } 424 425 /* 426 * hammer_vop_advlock { vp, id, op, fl, flags } 427 */ 428 static 429 int 430 hammer_vop_advlock(struct vop_advlock_args *ap) 431 { 432 struct hammer_inode *ip = VTOI(ap->a_vp); 433 434 return (lf_advlock(ap, &ip->advlock, ip->ino_rec.ino_size)); 435 } 436 437 /* 438 * hammer_vop_close { vp, fflag } 439 */ 440 static 441 int 442 hammer_vop_close(struct vop_close_args *ap) 443 { 444 return (vop_stdclose(ap)); 445 } 446 447 /* 448 * hammer_vop_ncreate { nch, dvp, vpp, cred, vap } 449 * 450 * The operating system has already ensured that the directory entry 451 * does not exist and done all appropriate namespace locking. 452 */ 453 static 454 int 455 hammer_vop_ncreate(struct vop_ncreate_args *ap) 456 { 457 struct hammer_transaction trans; 458 struct hammer_inode *dip; 459 struct hammer_inode *nip; 460 struct nchandle *nch; 461 int error; 462 463 nch = ap->a_nch; 464 dip = VTOI(ap->a_dvp); 465 466 if (dip->flags & HAMMER_INODE_RO) 467 return (EROFS); 468 469 /* 470 * Create a transaction to cover the operations we perform. 471 */ 472 hammer_start_transaction(&trans, dip->hmp); 473 474 /* 475 * Create a new filesystem object of the requested type. The 476 * returned inode will be referenced but not locked. 477 */ 478 479 error = hammer_create_inode(&trans, ap->a_vap, ap->a_cred, dip, &nip); 480 if (error) 481 kprintf("hammer_create_inode error %d\n", error); 482 if (error) { 483 hammer_abort_transaction(&trans); 484 *ap->a_vpp = NULL; 485 return (error); 486 } 487 488 /* 489 * Add the new filesystem object to the directory. This will also 490 * bump the inode's link count. 491 */ 492 error = hammer_ip_add_directory(&trans, dip, nch->ncp, nip); 493 if (error) 494 kprintf("hammer_ip_add_directory error %d\n", error); 495 496 /* 497 * Finish up. 498 */ 499 if (error) { 500 hammer_rel_inode(nip, 0); 501 hammer_abort_transaction(&trans); 502 *ap->a_vpp = NULL; 503 } else { 504 hammer_commit_transaction(&trans); 505 error = hammer_get_vnode(nip, LK_EXCLUSIVE, ap->a_vpp); 506 hammer_rel_inode(nip, 0); 507 if (error == 0) { 508 cache_setunresolved(ap->a_nch); 509 cache_setvp(ap->a_nch, *ap->a_vpp); 510 } 511 } 512 return (error); 513 } 514 515 /* 516 * hammer_vop_getattr { vp, vap } 517 */ 518 static 519 int 520 hammer_vop_getattr(struct vop_getattr_args *ap) 521 { 522 struct hammer_inode *ip = VTOI(ap->a_vp); 523 struct vattr *vap = ap->a_vap; 524 525 #if 0 526 if (cache_check_fsmid_vp(ap->a_vp, &ip->fsmid) && 527 (vp->v_mount->mnt_flag & MNT_RDONLY) == 0 && 528 ip->obj_asof == XXX 529 ) { 530 /* LAZYMOD XXX */ 531 } 532 hammer_itimes(ap->a_vp); 533 #endif 534 535 vap->va_fsid = ip->hmp->fsid_udev; 536 vap->va_fileid = ip->ino_rec.base.base.obj_id; 537 vap->va_mode = ip->ino_data.mode; 538 vap->va_nlink = ip->ino_rec.ino_nlinks; 539 vap->va_uid = hammer_to_unix_xid(&ip->ino_data.uid); 540 vap->va_gid = hammer_to_unix_xid(&ip->ino_data.gid); 541 vap->va_rmajor = 0; 542 vap->va_rminor = 0; 543 vap->va_size = ip->ino_rec.ino_size; 544 hammer_to_timespec(ip->ino_rec.ino_atime, &vap->va_atime); 545 hammer_to_timespec(ip->ino_rec.ino_mtime, &vap->va_mtime); 546 hammer_to_timespec(ip->ino_data.ctime, &vap->va_ctime); 547 vap->va_flags = ip->ino_data.uflags; 548 vap->va_gen = 1; /* hammer inums are unique for all time */ 549 vap->va_blocksize = 32768; /* XXX - extract from root volume */ 550 vap->va_bytes = ip->ino_rec.ino_size; 551 vap->va_type = hammer_get_vnode_type(ip->ino_rec.base.base.obj_type); 552 vap->va_filerev = 0; /* XXX */ 553 /* mtime uniquely identifies any adjustments made to the file */ 554 vap->va_fsmid = ip->ino_rec.ino_mtime; 555 vap->va_uid_uuid = ip->ino_data.uid; 556 vap->va_gid_uuid = ip->ino_data.gid; 557 vap->va_fsid_uuid = ip->hmp->fsid; 558 vap->va_vaflags = VA_UID_UUID_VALID | VA_GID_UUID_VALID | 559 VA_FSID_UUID_VALID; 560 561 switch (ip->ino_rec.base.base.obj_type) { 562 case HAMMER_OBJTYPE_CDEV: 563 case HAMMER_OBJTYPE_BDEV: 564 vap->va_rmajor = ip->ino_data.rmajor; 565 vap->va_rminor = ip->ino_data.rminor; 566 break; 567 default: 568 break; 569 } 570 571 return(0); 572 } 573 574 /* 575 * hammer_vop_nresolve { nch, dvp, cred } 576 * 577 * Locate the requested directory entry. 578 */ 579 static 580 int 581 hammer_vop_nresolve(struct vop_nresolve_args *ap) 582 { 583 struct namecache *ncp; 584 hammer_inode_t dip; 585 hammer_inode_t ip; 586 hammer_tid_t asof; 587 struct hammer_cursor cursor; 588 union hammer_record_ondisk *rec; 589 struct vnode *vp; 590 int64_t namekey; 591 int error; 592 int i; 593 int nlen; 594 int flags; 595 u_int64_t obj_id; 596 597 /* 598 * Misc initialization, plus handle as-of name extensions. Look for 599 * the '@@' extension. Note that as-of files and directories cannot 600 * be modified. 601 */ 602 dip = VTOI(ap->a_dvp); 603 ncp = ap->a_nch->ncp; 604 asof = dip->obj_asof; 605 nlen = ncp->nc_nlen; 606 flags = dip->flags; 607 608 for (i = 0; i < nlen; ++i) { 609 if (ncp->nc_name[i] == '@' && ncp->nc_name[i+1] == '@') { 610 asof = hammer_str_to_tid(ncp->nc_name + i + 2); 611 flags |= HAMMER_INODE_RO; 612 break; 613 } 614 } 615 nlen = i; 616 617 /* 618 * If there is no path component the time extension is relative to 619 * dip. 620 */ 621 if (nlen == 0) { 622 ip = hammer_get_inode(dip->hmp, &dip->cache[1], dip->obj_id, 623 asof, flags, &error); 624 if (error == 0) { 625 error = hammer_get_vnode(ip, LK_EXCLUSIVE, &vp); 626 hammer_rel_inode(ip, 0); 627 } else { 628 vp = NULL; 629 } 630 if (error == 0) { 631 vn_unlock(vp); 632 cache_setvp(ap->a_nch, vp); 633 vrele(vp); 634 } 635 return(error); 636 } 637 638 /* 639 * Calculate the namekey and setup the key range for the scan. This 640 * works kinda like a chained hash table where the lower 32 bits 641 * of the namekey synthesize the chain. 642 * 643 * The key range is inclusive of both key_beg and key_end. 644 */ 645 namekey = hammer_directory_namekey(ncp->nc_name, nlen); 646 647 error = hammer_init_cursor_hmp(&cursor, &dip->cache[0], dip->hmp); 648 cursor.key_beg.obj_id = dip->obj_id; 649 cursor.key_beg.key = namekey; 650 cursor.key_beg.create_tid = 0; 651 cursor.key_beg.delete_tid = 0; 652 cursor.key_beg.rec_type = HAMMER_RECTYPE_DIRENTRY; 653 cursor.key_beg.obj_type = 0; 654 655 cursor.key_end = cursor.key_beg; 656 cursor.key_end.key |= 0xFFFFFFFFULL; 657 cursor.asof = asof; 658 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE | HAMMER_CURSOR_ASOF; 659 660 /* 661 * Scan all matching records (the chain), locate the one matching 662 * the requested path component. 663 * 664 * The hammer_ip_*() functions merge in-memory records with on-disk 665 * records for the purposes of the search. 666 */ 667 if (error == 0) 668 error = hammer_ip_first(&cursor, dip); 669 670 rec = NULL; 671 obj_id = 0; 672 673 while (error == 0) { 674 error = hammer_ip_resolve_data(&cursor); 675 if (error) 676 break; 677 rec = cursor.record; 678 if (nlen == rec->entry.base.data_len && 679 bcmp(ncp->nc_name, cursor.data, nlen) == 0) { 680 obj_id = rec->entry.obj_id; 681 break; 682 } 683 error = hammer_ip_next(&cursor); 684 } 685 hammer_done_cursor(&cursor); 686 if (error == 0) { 687 ip = hammer_get_inode(dip->hmp, &dip->cache[1], 688 obj_id, asof, flags, &error); 689 if (error == 0) { 690 error = hammer_get_vnode(ip, LK_EXCLUSIVE, &vp); 691 hammer_rel_inode(ip, 0); 692 } else { 693 vp = NULL; 694 } 695 if (error == 0) { 696 vn_unlock(vp); 697 cache_setvp(ap->a_nch, vp); 698 vrele(vp); 699 } 700 } else if (error == ENOENT) { 701 cache_setvp(ap->a_nch, NULL); 702 } 703 return (error); 704 } 705 706 /* 707 * hammer_vop_nlookupdotdot { dvp, vpp, cred } 708 * 709 * Locate the parent directory of a directory vnode. 710 * 711 * dvp is referenced but not locked. *vpp must be returned referenced and 712 * locked. A parent_obj_id of 0 does not necessarily indicate that we are 713 * at the root, instead it could indicate that the directory we were in was 714 * removed. 715 * 716 * NOTE: as-of sequences are not linked into the directory structure. If 717 * we are at the root with a different asof then the mount point, reload 718 * the same directory with the mount point's asof. I'm not sure what this 719 * will do to NFS. We encode ASOF stamps in NFS file handles so it might not 720 * get confused, but it hasn't been tested. 721 */ 722 static 723 int 724 hammer_vop_nlookupdotdot(struct vop_nlookupdotdot_args *ap) 725 { 726 struct hammer_inode *dip; 727 struct hammer_inode *ip; 728 int64_t parent_obj_id; 729 hammer_tid_t asof; 730 int error; 731 732 dip = VTOI(ap->a_dvp); 733 asof = dip->obj_asof; 734 parent_obj_id = dip->ino_data.parent_obj_id; 735 736 if (parent_obj_id == 0) { 737 if (dip->obj_id == HAMMER_OBJID_ROOT && 738 asof != dip->hmp->asof) { 739 parent_obj_id = dip->obj_id; 740 asof = dip->hmp->asof; 741 *ap->a_fakename = kmalloc(19, M_TEMP, M_WAITOK); 742 ksnprintf(*ap->a_fakename, 19, "0x%016llx", 743 dip->obj_asof); 744 } else { 745 *ap->a_vpp = NULL; 746 return ENOENT; 747 } 748 } 749 750 ip = hammer_get_inode(dip->hmp, &dip->cache[1], parent_obj_id, 751 asof, dip->flags, &error); 752 if (ip == NULL) { 753 *ap->a_vpp = NULL; 754 return(error); 755 } 756 error = hammer_get_vnode(ip, LK_EXCLUSIVE, ap->a_vpp); 757 hammer_rel_inode(ip, 0); 758 return (error); 759 } 760 761 /* 762 * hammer_vop_nlink { nch, dvp, vp, cred } 763 */ 764 static 765 int 766 hammer_vop_nlink(struct vop_nlink_args *ap) 767 { 768 struct hammer_transaction trans; 769 struct hammer_inode *dip; 770 struct hammer_inode *ip; 771 struct nchandle *nch; 772 int error; 773 774 nch = ap->a_nch; 775 dip = VTOI(ap->a_dvp); 776 ip = VTOI(ap->a_vp); 777 778 if (dip->flags & HAMMER_INODE_RO) 779 return (EROFS); 780 if (ip->flags & HAMMER_INODE_RO) 781 return (EROFS); 782 783 /* 784 * Create a transaction to cover the operations we perform. 785 */ 786 hammer_start_transaction(&trans, dip->hmp); 787 788 /* 789 * Add the filesystem object to the directory. Note that neither 790 * dip nor ip are referenced or locked, but their vnodes are 791 * referenced. This function will bump the inode's link count. 792 */ 793 error = hammer_ip_add_directory(&trans, dip, nch->ncp, ip); 794 795 /* 796 * Finish up. 797 */ 798 if (error) { 799 hammer_abort_transaction(&trans); 800 } else { 801 cache_setunresolved(nch); 802 cache_setvp(nch, ap->a_vp); 803 hammer_commit_transaction(&trans); 804 } 805 return (error); 806 } 807 808 /* 809 * hammer_vop_nmkdir { nch, dvp, vpp, cred, vap } 810 * 811 * The operating system has already ensured that the directory entry 812 * does not exist and done all appropriate namespace locking. 813 */ 814 static 815 int 816 hammer_vop_nmkdir(struct vop_nmkdir_args *ap) 817 { 818 struct hammer_transaction trans; 819 struct hammer_inode *dip; 820 struct hammer_inode *nip; 821 struct nchandle *nch; 822 int error; 823 824 nch = ap->a_nch; 825 dip = VTOI(ap->a_dvp); 826 827 if (dip->flags & HAMMER_INODE_RO) 828 return (EROFS); 829 830 /* 831 * Create a transaction to cover the operations we perform. 832 */ 833 hammer_start_transaction(&trans, dip->hmp); 834 835 /* 836 * Create a new filesystem object of the requested type. The 837 * returned inode will be referenced but not locked. 838 */ 839 error = hammer_create_inode(&trans, ap->a_vap, ap->a_cred, dip, &nip); 840 if (error) 841 kprintf("hammer_mkdir error %d\n", error); 842 if (error) { 843 hammer_abort_transaction(&trans); 844 *ap->a_vpp = NULL; 845 return (error); 846 } 847 848 /* 849 * Add the new filesystem object to the directory. This will also 850 * bump the inode's link count. 851 */ 852 error = hammer_ip_add_directory(&trans, dip, nch->ncp, nip); 853 if (error) 854 kprintf("hammer_mkdir (add) error %d\n", error); 855 856 /* 857 * Finish up. 858 */ 859 if (error) { 860 hammer_rel_inode(nip, 0); 861 hammer_abort_transaction(&trans); 862 *ap->a_vpp = NULL; 863 } else { 864 hammer_commit_transaction(&trans); 865 error = hammer_get_vnode(nip, LK_EXCLUSIVE, ap->a_vpp); 866 hammer_rel_inode(nip, 0); 867 if (error == 0) { 868 cache_setunresolved(ap->a_nch); 869 cache_setvp(ap->a_nch, *ap->a_vpp); 870 } 871 } 872 return (error); 873 } 874 875 /* 876 * hammer_vop_nmknod { nch, dvp, vpp, cred, vap } 877 * 878 * The operating system has already ensured that the directory entry 879 * does not exist and done all appropriate namespace locking. 880 */ 881 static 882 int 883 hammer_vop_nmknod(struct vop_nmknod_args *ap) 884 { 885 struct hammer_transaction trans; 886 struct hammer_inode *dip; 887 struct hammer_inode *nip; 888 struct nchandle *nch; 889 int error; 890 891 nch = ap->a_nch; 892 dip = VTOI(ap->a_dvp); 893 894 if (dip->flags & HAMMER_INODE_RO) 895 return (EROFS); 896 897 /* 898 * Create a transaction to cover the operations we perform. 899 */ 900 hammer_start_transaction(&trans, dip->hmp); 901 902 /* 903 * Create a new filesystem object of the requested type. The 904 * returned inode will be referenced but not locked. 905 */ 906 error = hammer_create_inode(&trans, ap->a_vap, ap->a_cred, dip, &nip); 907 if (error) { 908 hammer_abort_transaction(&trans); 909 *ap->a_vpp = NULL; 910 return (error); 911 } 912 913 /* 914 * Add the new filesystem object to the directory. This will also 915 * bump the inode's link count. 916 */ 917 error = hammer_ip_add_directory(&trans, dip, nch->ncp, nip); 918 919 /* 920 * Finish up. 921 */ 922 if (error) { 923 hammer_rel_inode(nip, 0); 924 hammer_abort_transaction(&trans); 925 *ap->a_vpp = NULL; 926 } else { 927 hammer_commit_transaction(&trans); 928 error = hammer_get_vnode(nip, LK_EXCLUSIVE, ap->a_vpp); 929 hammer_rel_inode(nip, 0); 930 if (error == 0) { 931 cache_setunresolved(ap->a_nch); 932 cache_setvp(ap->a_nch, *ap->a_vpp); 933 } 934 } 935 return (error); 936 } 937 938 /* 939 * hammer_vop_open { vp, mode, cred, fp } 940 */ 941 static 942 int 943 hammer_vop_open(struct vop_open_args *ap) 944 { 945 if ((ap->a_mode & FWRITE) && (VTOI(ap->a_vp)->flags & HAMMER_INODE_RO)) 946 return (EROFS); 947 948 return(vop_stdopen(ap)); 949 } 950 951 /* 952 * hammer_vop_pathconf { vp, name, retval } 953 */ 954 static 955 int 956 hammer_vop_pathconf(struct vop_pathconf_args *ap) 957 { 958 return EOPNOTSUPP; 959 } 960 961 /* 962 * hammer_vop_print { vp } 963 */ 964 static 965 int 966 hammer_vop_print(struct vop_print_args *ap) 967 { 968 return EOPNOTSUPP; 969 } 970 971 /* 972 * hammer_vop_readdir { vp, uio, cred, *eofflag, *ncookies, off_t **cookies } 973 */ 974 static 975 int 976 hammer_vop_readdir(struct vop_readdir_args *ap) 977 { 978 struct hammer_cursor cursor; 979 struct hammer_inode *ip; 980 struct uio *uio; 981 hammer_record_ondisk_t rec; 982 hammer_base_elm_t base; 983 int error; 984 int cookie_index; 985 int ncookies; 986 off_t *cookies; 987 off_t saveoff; 988 int r; 989 990 ip = VTOI(ap->a_vp); 991 uio = ap->a_uio; 992 saveoff = uio->uio_offset; 993 994 if (ap->a_ncookies) { 995 ncookies = uio->uio_resid / 16 + 1; 996 if (ncookies > 1024) 997 ncookies = 1024; 998 cookies = kmalloc(ncookies * sizeof(off_t), M_TEMP, M_WAITOK); 999 cookie_index = 0; 1000 } else { 1001 ncookies = -1; 1002 cookies = NULL; 1003 cookie_index = 0; 1004 } 1005 1006 /* 1007 * Handle artificial entries 1008 */ 1009 error = 0; 1010 if (saveoff == 0) { 1011 r = vop_write_dirent(&error, uio, ip->obj_id, DT_DIR, 1, "."); 1012 if (r) 1013 goto done; 1014 if (cookies) 1015 cookies[cookie_index] = saveoff; 1016 ++saveoff; 1017 ++cookie_index; 1018 if (cookie_index == ncookies) 1019 goto done; 1020 } 1021 if (saveoff == 1) { 1022 if (ip->ino_data.parent_obj_id) { 1023 r = vop_write_dirent(&error, uio, 1024 ip->ino_data.parent_obj_id, 1025 DT_DIR, 2, ".."); 1026 } else { 1027 r = vop_write_dirent(&error, uio, 1028 ip->obj_id, DT_DIR, 2, ".."); 1029 } 1030 if (r) 1031 goto done; 1032 if (cookies) 1033 cookies[cookie_index] = saveoff; 1034 ++saveoff; 1035 ++cookie_index; 1036 if (cookie_index == ncookies) 1037 goto done; 1038 } 1039 1040 /* 1041 * Key range (begin and end inclusive) to scan. Directory keys 1042 * directly translate to a 64 bit 'seek' position. 1043 */ 1044 hammer_init_cursor_hmp(&cursor, &ip->cache[0], ip->hmp); 1045 cursor.key_beg.obj_id = ip->obj_id; 1046 cursor.key_beg.create_tid = 0; 1047 cursor.key_beg.delete_tid = 0; 1048 cursor.key_beg.rec_type = HAMMER_RECTYPE_DIRENTRY; 1049 cursor.key_beg.obj_type = 0; 1050 cursor.key_beg.key = saveoff; 1051 1052 cursor.key_end = cursor.key_beg; 1053 cursor.key_end.key = HAMMER_MAX_KEY; 1054 cursor.asof = ip->obj_asof; 1055 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE | HAMMER_CURSOR_ASOF; 1056 1057 error = hammer_ip_first(&cursor, ip); 1058 1059 while (error == 0) { 1060 error = hammer_ip_resolve_record_and_data(&cursor); 1061 if (error) 1062 break; 1063 rec = cursor.record; 1064 base = &rec->base.base; 1065 saveoff = base->key; 1066 1067 if (base->obj_id != ip->obj_id) 1068 panic("readdir: bad record at %p", cursor.node); 1069 1070 r = vop_write_dirent( 1071 &error, uio, rec->entry.obj_id, 1072 hammer_get_dtype(rec->entry.base.base.obj_type), 1073 rec->entry.base.data_len, 1074 (void *)cursor.data); 1075 if (r) 1076 break; 1077 ++saveoff; 1078 if (cookies) 1079 cookies[cookie_index] = base->key; 1080 ++cookie_index; 1081 if (cookie_index == ncookies) 1082 break; 1083 error = hammer_ip_next(&cursor); 1084 } 1085 hammer_done_cursor(&cursor); 1086 1087 done: 1088 if (ap->a_eofflag) 1089 *ap->a_eofflag = (error == ENOENT); 1090 uio->uio_offset = saveoff; 1091 if (error && cookie_index == 0) { 1092 if (error == ENOENT) 1093 error = 0; 1094 if (cookies) { 1095 kfree(cookies, M_TEMP); 1096 *ap->a_ncookies = 0; 1097 *ap->a_cookies = NULL; 1098 } 1099 } else { 1100 if (error == ENOENT) 1101 error = 0; 1102 if (cookies) { 1103 *ap->a_ncookies = cookie_index; 1104 *ap->a_cookies = cookies; 1105 } 1106 } 1107 return(error); 1108 } 1109 1110 /* 1111 * hammer_vop_readlink { vp, uio, cred } 1112 */ 1113 static 1114 int 1115 hammer_vop_readlink(struct vop_readlink_args *ap) 1116 { 1117 struct hammer_cursor cursor; 1118 struct hammer_inode *ip; 1119 int error; 1120 1121 ip = VTOI(ap->a_vp); 1122 hammer_init_cursor_hmp(&cursor, &ip->cache[0], ip->hmp); 1123 1124 /* 1125 * Key range (begin and end inclusive) to scan. Directory keys 1126 * directly translate to a 64 bit 'seek' position. 1127 */ 1128 cursor.key_beg.obj_id = ip->obj_id; 1129 cursor.key_beg.create_tid = 0; 1130 cursor.key_beg.delete_tid = 0; 1131 cursor.key_beg.rec_type = HAMMER_RECTYPE_FIX; 1132 cursor.key_beg.obj_type = 0; 1133 cursor.key_beg.key = HAMMER_FIXKEY_SYMLINK; 1134 cursor.asof = ip->obj_asof; 1135 cursor.flags |= HAMMER_CURSOR_ASOF; 1136 1137 error = hammer_ip_lookup(&cursor, ip); 1138 if (error == 0) { 1139 error = hammer_ip_resolve_data(&cursor); 1140 if (error == 0) { 1141 error = uiomove((char *)cursor.data, 1142 cursor.record->base.data_len, 1143 ap->a_uio); 1144 } 1145 } 1146 hammer_done_cursor(&cursor); 1147 return(error); 1148 } 1149 1150 /* 1151 * hammer_vop_nremove { nch, dvp, cred } 1152 */ 1153 static 1154 int 1155 hammer_vop_nremove(struct vop_nremove_args *ap) 1156 { 1157 return(hammer_dounlink(ap->a_nch, ap->a_dvp, ap->a_cred, 0)); 1158 } 1159 1160 /* 1161 * hammer_vop_nrename { fnch, tnch, fdvp, tdvp, cred } 1162 */ 1163 static 1164 int 1165 hammer_vop_nrename(struct vop_nrename_args *ap) 1166 { 1167 struct hammer_transaction trans; 1168 struct namecache *fncp; 1169 struct namecache *tncp; 1170 struct hammer_inode *fdip; 1171 struct hammer_inode *tdip; 1172 struct hammer_inode *ip; 1173 struct hammer_cursor cursor; 1174 union hammer_record_ondisk *rec; 1175 int64_t namekey; 1176 int error; 1177 1178 fdip = VTOI(ap->a_fdvp); 1179 tdip = VTOI(ap->a_tdvp); 1180 fncp = ap->a_fnch->ncp; 1181 tncp = ap->a_tnch->ncp; 1182 ip = VTOI(fncp->nc_vp); 1183 KKASSERT(ip != NULL); 1184 1185 if (fdip->flags & HAMMER_INODE_RO) 1186 return (EROFS); 1187 if (tdip->flags & HAMMER_INODE_RO) 1188 return (EROFS); 1189 if (ip->flags & HAMMER_INODE_RO) 1190 return (EROFS); 1191 1192 hammer_start_transaction(&trans, fdip->hmp); 1193 1194 /* 1195 * Remove tncp from the target directory and then link ip as 1196 * tncp. XXX pass trans to dounlink 1197 * 1198 * Force the inode sync-time to match the transaction so it is 1199 * in-sync with the creation of the target directory entry. 1200 */ 1201 error = hammer_dounlink(ap->a_tnch, ap->a_tdvp, ap->a_cred, 0); 1202 if (error == 0 || error == ENOENT) { 1203 error = hammer_ip_add_directory(&trans, tdip, tncp, ip); 1204 if (error == 0) { 1205 ip->ino_data.parent_obj_id = tdip->obj_id; 1206 hammer_modify_inode(&trans, ip, 1207 HAMMER_INODE_DDIRTY | HAMMER_INODE_TIDLOCKED); 1208 } 1209 } 1210 if (error) 1211 goto failed; /* XXX */ 1212 1213 /* 1214 * Locate the record in the originating directory and remove it. 1215 * 1216 * Calculate the namekey and setup the key range for the scan. This 1217 * works kinda like a chained hash table where the lower 32 bits 1218 * of the namekey synthesize the chain. 1219 * 1220 * The key range is inclusive of both key_beg and key_end. 1221 */ 1222 namekey = hammer_directory_namekey(fncp->nc_name, fncp->nc_nlen); 1223 retry: 1224 hammer_init_cursor_hmp(&cursor, &fdip->cache[0], fdip->hmp); 1225 cursor.key_beg.obj_id = fdip->obj_id; 1226 cursor.key_beg.key = namekey; 1227 cursor.key_beg.create_tid = 0; 1228 cursor.key_beg.delete_tid = 0; 1229 cursor.key_beg.rec_type = HAMMER_RECTYPE_DIRENTRY; 1230 cursor.key_beg.obj_type = 0; 1231 1232 cursor.key_end = cursor.key_beg; 1233 cursor.key_end.key |= 0xFFFFFFFFULL; 1234 cursor.asof = fdip->obj_asof; 1235 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE | HAMMER_CURSOR_ASOF; 1236 1237 /* 1238 * Scan all matching records (the chain), locate the one matching 1239 * the requested path component. 1240 * 1241 * The hammer_ip_*() functions merge in-memory records with on-disk 1242 * records for the purposes of the search. 1243 */ 1244 error = hammer_ip_first(&cursor, fdip); 1245 while (error == 0) { 1246 if (hammer_ip_resolve_data(&cursor) != 0) 1247 break; 1248 rec = cursor.record; 1249 if (fncp->nc_nlen == rec->entry.base.data_len && 1250 bcmp(fncp->nc_name, cursor.data, fncp->nc_nlen) == 0) { 1251 break; 1252 } 1253 error = hammer_ip_next(&cursor); 1254 } 1255 1256 /* 1257 * If all is ok we have to get the inode so we can adjust nlinks. 1258 * 1259 * WARNING: hammer_ip_del_directory() may have to terminate the 1260 * cursor to avoid a recursion. It's ok to call hammer_done_cursor() 1261 * twice. 1262 */ 1263 if (error == 0) 1264 error = hammer_ip_del_directory(&trans, &cursor, fdip, ip); 1265 hammer_done_cursor(&cursor); 1266 if (error == 0) 1267 cache_rename(ap->a_fnch, ap->a_tnch); 1268 if (error == EDEADLK) 1269 goto retry; 1270 failed: 1271 if (error == 0) { 1272 hammer_commit_transaction(&trans); 1273 } else { 1274 hammer_abort_transaction(&trans); 1275 } 1276 return (error); 1277 } 1278 1279 /* 1280 * hammer_vop_nrmdir { nch, dvp, cred } 1281 */ 1282 static 1283 int 1284 hammer_vop_nrmdir(struct vop_nrmdir_args *ap) 1285 { 1286 return(hammer_dounlink(ap->a_nch, ap->a_dvp, ap->a_cred, 0)); 1287 } 1288 1289 /* 1290 * hammer_vop_setattr { vp, vap, cred } 1291 */ 1292 static 1293 int 1294 hammer_vop_setattr(struct vop_setattr_args *ap) 1295 { 1296 struct hammer_transaction trans; 1297 struct vattr *vap; 1298 struct hammer_inode *ip; 1299 int modflags; 1300 int error; 1301 int truncating; 1302 int64_t aligned_size; 1303 u_int32_t flags; 1304 uuid_t uuid; 1305 1306 vap = ap->a_vap; 1307 ip = ap->a_vp->v_data; 1308 modflags = 0; 1309 1310 if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY) 1311 return(EROFS); 1312 if (ip->flags & HAMMER_INODE_RO) 1313 return (EROFS); 1314 1315 hammer_start_transaction(&trans, ip->hmp); 1316 error = 0; 1317 1318 if (vap->va_flags != VNOVAL) { 1319 flags = ip->ino_data.uflags; 1320 error = vop_helper_setattr_flags(&flags, vap->va_flags, 1321 hammer_to_unix_xid(&ip->ino_data.uid), 1322 ap->a_cred); 1323 if (error == 0) { 1324 if (ip->ino_data.uflags != flags) { 1325 ip->ino_data.uflags = flags; 1326 modflags |= HAMMER_INODE_DDIRTY; 1327 } 1328 if (ip->ino_data.uflags & (IMMUTABLE | APPEND)) { 1329 error = 0; 1330 goto done; 1331 } 1332 } 1333 goto done; 1334 } 1335 if (ip->ino_data.uflags & (IMMUTABLE | APPEND)) { 1336 error = EPERM; 1337 goto done; 1338 } 1339 if (vap->va_uid != (uid_t)VNOVAL) { 1340 hammer_guid_to_uuid(&uuid, vap->va_uid); 1341 if (bcmp(&uuid, &ip->ino_data.uid, sizeof(uuid)) != 0) { 1342 ip->ino_data.uid = uuid; 1343 modflags |= HAMMER_INODE_DDIRTY; 1344 } 1345 } 1346 if (vap->va_gid != (uid_t)VNOVAL) { 1347 hammer_guid_to_uuid(&uuid, vap->va_gid); 1348 if (bcmp(&uuid, &ip->ino_data.gid, sizeof(uuid)) != 0) { 1349 ip->ino_data.gid = uuid; 1350 modflags |= HAMMER_INODE_DDIRTY; 1351 } 1352 } 1353 while (vap->va_size != VNOVAL && ip->ino_rec.ino_size != vap->va_size) { 1354 switch(ap->a_vp->v_type) { 1355 case VREG: 1356 if (vap->va_size == ip->ino_rec.ino_size) 1357 break; 1358 if (vap->va_size < ip->ino_rec.ino_size) { 1359 vtruncbuf(ap->a_vp, vap->va_size, 1360 HAMMER_BUFSIZE); 1361 truncating = 1; 1362 } else { 1363 vnode_pager_setsize(ap->a_vp, vap->va_size); 1364 truncating = 0; 1365 } 1366 ip->ino_rec.ino_size = vap->va_size; 1367 modflags |= HAMMER_INODE_RDIRTY; 1368 aligned_size = (vap->va_size + HAMMER_BUFMASK) & 1369 ~(int64_t)HAMMER_BUFMASK; 1370 1371 if (truncating) { 1372 error = hammer_ip_delete_range(&trans, ip, 1373 aligned_size, 1374 0x7FFFFFFFFFFFFFFFLL); 1375 } 1376 /* 1377 * If truncating we have to clean out a portion of 1378 * the last block on-disk. 1379 */ 1380 if (truncating && error == 0 && 1381 vap->va_size < aligned_size) { 1382 struct buf *bp; 1383 int offset; 1384 1385 offset = vap->va_size & HAMMER_BUFMASK; 1386 error = bread(ap->a_vp, 1387 aligned_size - HAMMER_BUFSIZE, 1388 HAMMER_BUFSIZE, &bp); 1389 if (error == 0) { 1390 bzero(bp->b_data + offset, 1391 HAMMER_BUFSIZE - offset); 1392 bdwrite(bp); 1393 } else { 1394 brelse(bp); 1395 } 1396 } 1397 break; 1398 case VDATABASE: 1399 error = hammer_ip_delete_range(&trans, ip, 1400 vap->va_size, 1401 0x7FFFFFFFFFFFFFFFLL); 1402 ip->ino_rec.ino_size = vap->va_size; 1403 modflags |= HAMMER_INODE_RDIRTY; 1404 break; 1405 default: 1406 error = EINVAL; 1407 goto done; 1408 } 1409 break; 1410 } 1411 if (vap->va_atime.tv_sec != VNOVAL) { 1412 ip->ino_rec.ino_atime = 1413 hammer_timespec_to_transid(&vap->va_atime); 1414 modflags |= HAMMER_INODE_ITIMES; 1415 } 1416 if (vap->va_mtime.tv_sec != VNOVAL) { 1417 ip->ino_rec.ino_mtime = 1418 hammer_timespec_to_transid(&vap->va_mtime); 1419 modflags |= HAMMER_INODE_ITIMES; 1420 } 1421 if (vap->va_mode != (mode_t)VNOVAL) { 1422 if (ip->ino_data.mode != vap->va_mode) { 1423 ip->ino_data.mode = vap->va_mode; 1424 modflags |= HAMMER_INODE_DDIRTY; 1425 } 1426 } 1427 done: 1428 if (error) { 1429 hammer_abort_transaction(&trans); 1430 } else { 1431 hammer_modify_inode(&trans, ip, modflags); 1432 hammer_commit_transaction(&trans); 1433 } 1434 return (error); 1435 } 1436 1437 /* 1438 * hammer_vop_nsymlink { nch, dvp, vpp, cred, vap, target } 1439 */ 1440 static 1441 int 1442 hammer_vop_nsymlink(struct vop_nsymlink_args *ap) 1443 { 1444 struct hammer_transaction trans; 1445 struct hammer_inode *dip; 1446 struct hammer_inode *nip; 1447 struct nchandle *nch; 1448 hammer_record_t record; 1449 int error; 1450 int bytes; 1451 1452 ap->a_vap->va_type = VLNK; 1453 1454 nch = ap->a_nch; 1455 dip = VTOI(ap->a_dvp); 1456 1457 if (dip->flags & HAMMER_INODE_RO) 1458 return (EROFS); 1459 1460 /* 1461 * Create a transaction to cover the operations we perform. 1462 */ 1463 hammer_start_transaction(&trans, dip->hmp); 1464 1465 /* 1466 * Create a new filesystem object of the requested type. The 1467 * returned inode will be referenced but not locked. 1468 */ 1469 1470 error = hammer_create_inode(&trans, ap->a_vap, ap->a_cred, dip, &nip); 1471 if (error) { 1472 hammer_abort_transaction(&trans); 1473 *ap->a_vpp = NULL; 1474 return (error); 1475 } 1476 1477 /* 1478 * Add the new filesystem object to the directory. This will also 1479 * bump the inode's link count. 1480 */ 1481 error = hammer_ip_add_directory(&trans, dip, nch->ncp, nip); 1482 1483 /* 1484 * Add a record representing the symlink. symlink stores the link 1485 * as pure data, not a string, and is no \0 terminated. 1486 */ 1487 if (error == 0) { 1488 record = hammer_alloc_mem_record(nip); 1489 bytes = strlen(ap->a_target); 1490 1491 record->rec.base.base.key = HAMMER_FIXKEY_SYMLINK; 1492 record->rec.base.base.rec_type = HAMMER_RECTYPE_FIX; 1493 record->rec.base.data_len = bytes; 1494 record->data = (void *)ap->a_target; 1495 /* will be reallocated by routine below */ 1496 error = hammer_ip_add_record(&trans, record); 1497 1498 /* 1499 * Set the file size to the length of the link. 1500 */ 1501 if (error == 0) { 1502 nip->ino_rec.ino_size = bytes; 1503 hammer_modify_inode(&trans, nip, HAMMER_INODE_RDIRTY); 1504 } 1505 } 1506 1507 /* 1508 * Finish up. 1509 */ 1510 if (error) { 1511 hammer_rel_inode(nip, 0); 1512 hammer_abort_transaction(&trans); 1513 *ap->a_vpp = NULL; 1514 } else { 1515 hammer_commit_transaction(&trans); 1516 error = hammer_get_vnode(nip, LK_EXCLUSIVE, ap->a_vpp); 1517 hammer_rel_inode(nip, 0); 1518 if (error == 0) { 1519 cache_setunresolved(ap->a_nch); 1520 cache_setvp(ap->a_nch, *ap->a_vpp); 1521 } 1522 } 1523 return (error); 1524 } 1525 1526 /* 1527 * hammer_vop_nwhiteout { nch, dvp, cred, flags } 1528 */ 1529 static 1530 int 1531 hammer_vop_nwhiteout(struct vop_nwhiteout_args *ap) 1532 { 1533 return(hammer_dounlink(ap->a_nch, ap->a_dvp, ap->a_cred, ap->a_flags)); 1534 } 1535 1536 /* 1537 * hammer_vop_ioctl { vp, command, data, fflag, cred } 1538 */ 1539 static 1540 int 1541 hammer_vop_ioctl(struct vop_ioctl_args *ap) 1542 { 1543 struct hammer_inode *ip = ap->a_vp->v_data; 1544 1545 return(hammer_ioctl(ip, ap->a_command, ap->a_data, 1546 ap->a_fflag, ap->a_cred)); 1547 } 1548 1549 static 1550 int 1551 hammer_vop_mountctl(struct vop_mountctl_args *ap) 1552 { 1553 struct mount *mp; 1554 int error; 1555 1556 mp = ap->a_head.a_ops->head.vv_mount; 1557 1558 switch(ap->a_op) { 1559 case MOUNTCTL_SET_EXPORT: 1560 if (ap->a_ctllen != sizeof(struct export_args)) 1561 error = EINVAL; 1562 error = hammer_vfs_export(mp, ap->a_op, 1563 (const struct export_args *)ap->a_ctl); 1564 break; 1565 default: 1566 error = journal_mountctl(ap); 1567 break; 1568 } 1569 return(error); 1570 } 1571 1572 /* 1573 * hammer_vop_strategy { vp, bio } 1574 * 1575 * Strategy call, used for regular file read & write only. Note that the 1576 * bp may represent a cluster. 1577 * 1578 * To simplify operation and allow better optimizations in the future, 1579 * this code does not make any assumptions with regards to buffer alignment 1580 * or size. 1581 */ 1582 static 1583 int 1584 hammer_vop_strategy(struct vop_strategy_args *ap) 1585 { 1586 struct buf *bp; 1587 int error; 1588 1589 bp = ap->a_bio->bio_buf; 1590 1591 switch(bp->b_cmd) { 1592 case BUF_CMD_READ: 1593 error = hammer_vop_strategy_read(ap); 1594 break; 1595 case BUF_CMD_WRITE: 1596 error = hammer_vop_strategy_write(ap); 1597 break; 1598 default: 1599 error = EINVAL; 1600 break; 1601 } 1602 bp->b_error = error; 1603 if (error) 1604 bp->b_flags |= B_ERROR; 1605 biodone(ap->a_bio); 1606 return (error); 1607 } 1608 1609 /* 1610 * Read from a regular file. Iterate the related records and fill in the 1611 * BIO/BUF. Gaps are zero-filled. 1612 * 1613 * The support code in hammer_object.c should be used to deal with mixed 1614 * in-memory and on-disk records. 1615 * 1616 * XXX atime update 1617 */ 1618 static 1619 int 1620 hammer_vop_strategy_read(struct vop_strategy_args *ap) 1621 { 1622 struct hammer_inode *ip = ap->a_vp->v_data; 1623 struct hammer_cursor cursor; 1624 hammer_record_ondisk_t rec; 1625 hammer_base_elm_t base; 1626 struct bio *bio; 1627 struct buf *bp; 1628 int64_t rec_offset; 1629 int64_t ran_end; 1630 int64_t tmp64; 1631 int error; 1632 int boff; 1633 int roff; 1634 int n; 1635 1636 bio = ap->a_bio; 1637 bp = bio->bio_buf; 1638 1639 hammer_init_cursor_hmp(&cursor, &ip->cache[0], ip->hmp); 1640 1641 /* 1642 * Key range (begin and end inclusive) to scan. Note that the key's 1643 * stored in the actual records represent BASE+LEN, not BASE. The 1644 * first record containing bio_offset will have a key > bio_offset. 1645 */ 1646 cursor.key_beg.obj_id = ip->obj_id; 1647 cursor.key_beg.create_tid = 0; 1648 cursor.key_beg.delete_tid = 0; 1649 cursor.key_beg.obj_type = 0; 1650 cursor.key_beg.key = bio->bio_offset + 1; 1651 cursor.asof = ip->obj_asof; 1652 cursor.flags |= HAMMER_CURSOR_ASOF | HAMMER_CURSOR_DATAEXTOK; 1653 1654 cursor.key_end = cursor.key_beg; 1655 if (ip->ino_rec.base.base.obj_type == HAMMER_OBJTYPE_DBFILE) { 1656 cursor.key_beg.rec_type = HAMMER_RECTYPE_DB; 1657 cursor.key_end.rec_type = HAMMER_RECTYPE_DB; 1658 cursor.key_end.key = 0x7FFFFFFFFFFFFFFFLL; 1659 } else { 1660 ran_end = bio->bio_offset + bp->b_bufsize; 1661 cursor.key_beg.rec_type = HAMMER_RECTYPE_DATA; 1662 cursor.key_end.rec_type = HAMMER_RECTYPE_DATA; 1663 tmp64 = ran_end + MAXPHYS + 1; /* work-around GCC-4 bug */ 1664 if (tmp64 < ran_end) 1665 cursor.key_end.key = 0x7FFFFFFFFFFFFFFFLL; 1666 else 1667 cursor.key_end.key = ran_end + MAXPHYS + 1; 1668 } 1669 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE; 1670 1671 error = hammer_ip_first(&cursor, ip); 1672 boff = 0; 1673 1674 while (error == 0) { 1675 error = hammer_ip_resolve_data(&cursor); 1676 if (error) 1677 break; 1678 rec = cursor.record; 1679 base = &rec->base.base; 1680 1681 rec_offset = base->key - rec->data.base.data_len; 1682 1683 /* 1684 * Calculate the gap, if any, and zero-fill it. 1685 */ 1686 n = (int)(rec_offset - (bio->bio_offset + boff)); 1687 if (n > 0) { 1688 if (n > bp->b_bufsize - boff) 1689 n = bp->b_bufsize - boff; 1690 bzero((char *)bp->b_data + boff, n); 1691 boff += n; 1692 n = 0; 1693 } 1694 1695 /* 1696 * Calculate the data offset in the record and the number 1697 * of bytes we can copy. 1698 * 1699 * Note there is a degenerate case here where boff may 1700 * already be at bp->b_bufsize. 1701 */ 1702 roff = -n; 1703 n = rec->data.base.data_len - roff; 1704 KKASSERT(n > 0); 1705 if (n > bp->b_bufsize - boff) 1706 n = bp->b_bufsize - boff; 1707 bcopy((char *)cursor.data + roff, 1708 (char *)bp->b_data + boff, n); 1709 boff += n; 1710 if (boff == bp->b_bufsize) 1711 break; 1712 error = hammer_ip_next(&cursor); 1713 } 1714 hammer_done_cursor(&cursor); 1715 1716 /* 1717 * There may have been a gap after the last record 1718 */ 1719 if (error == ENOENT) 1720 error = 0; 1721 if (error == 0 && boff != bp->b_bufsize) { 1722 KKASSERT(boff < bp->b_bufsize); 1723 bzero((char *)bp->b_data + boff, bp->b_bufsize - boff); 1724 /* boff = bp->b_bufsize; */ 1725 } 1726 bp->b_resid = 0; 1727 return(error); 1728 } 1729 1730 /* 1731 * Write to a regular file. Iterate the related records and mark for 1732 * deletion. If existing edge records (left and right side) overlap our 1733 * write they have to be marked deleted and new records created, usually 1734 * referencing a portion of the original data. Then add a record to 1735 * represent the buffer. 1736 * 1737 * The support code in hammer_object.c should be used to deal with mixed 1738 * in-memory and on-disk records. 1739 */ 1740 static 1741 int 1742 hammer_vop_strategy_write(struct vop_strategy_args *ap) 1743 { 1744 struct hammer_transaction trans; 1745 hammer_inode_t ip; 1746 struct bio *bio; 1747 struct buf *bp; 1748 int error; 1749 1750 bio = ap->a_bio; 1751 bp = bio->bio_buf; 1752 ip = ap->a_vp->v_data; 1753 1754 if (ip->flags & HAMMER_INODE_RO) 1755 return (EROFS); 1756 1757 /* 1758 * Start a transaction using the TID stored with the bp. 1759 */ 1760 KKASSERT(bp->b_tid != 0); 1761 hammer_start_transaction_tid(&trans, ip->hmp, bp->b_tid); 1762 1763 /* 1764 * Delete any records overlapping our range. This function will 1765 * (eventually) properly truncate partial overlaps. 1766 */ 1767 if (ip->ino_rec.base.base.obj_type == HAMMER_OBJTYPE_DBFILE) { 1768 error = hammer_ip_delete_range(&trans, ip, bio->bio_offset, 1769 bio->bio_offset); 1770 } else { 1771 error = hammer_ip_delete_range(&trans, ip, bio->bio_offset, 1772 bio->bio_offset + 1773 bp->b_bufsize - 1); 1774 } 1775 1776 /* 1777 * Add a single record to cover the write 1778 */ 1779 if (error == 0) { 1780 error = hammer_ip_sync_data(&trans, ip, bio->bio_offset, 1781 bp->b_data, bp->b_bufsize); 1782 } 1783 1784 /* 1785 * If an error occured abort the transaction 1786 */ 1787 if (error) { 1788 /* XXX undo deletion */ 1789 hammer_abort_transaction(&trans); 1790 bp->b_resid = bp->b_bufsize; 1791 } else { 1792 hammer_commit_transaction(&trans); 1793 bp->b_resid = 0; 1794 bp->b_tid = 0; 1795 } 1796 return(error); 1797 } 1798 1799 /* 1800 * dounlink - disconnect a directory entry 1801 * 1802 * XXX whiteout support not really in yet 1803 */ 1804 static int 1805 hammer_dounlink(struct nchandle *nch, struct vnode *dvp, struct ucred *cred, 1806 int flags) 1807 { 1808 struct hammer_transaction trans; 1809 struct namecache *ncp; 1810 hammer_inode_t dip; 1811 hammer_inode_t ip; 1812 hammer_record_ondisk_t rec; 1813 struct hammer_cursor cursor; 1814 int64_t namekey; 1815 int error; 1816 1817 /* 1818 * Calculate the namekey and setup the key range for the scan. This 1819 * works kinda like a chained hash table where the lower 32 bits 1820 * of the namekey synthesize the chain. 1821 * 1822 * The key range is inclusive of both key_beg and key_end. 1823 */ 1824 dip = VTOI(dvp); 1825 ncp = nch->ncp; 1826 1827 if (dip->flags & HAMMER_INODE_RO) 1828 return (EROFS); 1829 1830 hammer_start_transaction(&trans, dip->hmp); 1831 1832 namekey = hammer_directory_namekey(ncp->nc_name, ncp->nc_nlen); 1833 retry: 1834 hammer_init_cursor_hmp(&cursor, &dip->cache[0], dip->hmp); 1835 cursor.key_beg.obj_id = dip->obj_id; 1836 cursor.key_beg.key = namekey; 1837 cursor.key_beg.create_tid = 0; 1838 cursor.key_beg.delete_tid = 0; 1839 cursor.key_beg.rec_type = HAMMER_RECTYPE_DIRENTRY; 1840 cursor.key_beg.obj_type = 0; 1841 1842 cursor.key_end = cursor.key_beg; 1843 cursor.key_end.key |= 0xFFFFFFFFULL; 1844 cursor.asof = dip->obj_asof; 1845 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE | HAMMER_CURSOR_ASOF; 1846 1847 /* 1848 * Scan all matching records (the chain), locate the one matching 1849 * the requested path component. info->last_error contains the 1850 * error code on search termination and could be 0, ENOENT, or 1851 * something else. 1852 * 1853 * The hammer_ip_*() functions merge in-memory records with on-disk 1854 * records for the purposes of the search. 1855 */ 1856 error = hammer_ip_first(&cursor, dip); 1857 while (error == 0) { 1858 error = hammer_ip_resolve_data(&cursor); 1859 if (error) 1860 break; 1861 rec = cursor.record; 1862 if (ncp->nc_nlen == rec->entry.base.data_len && 1863 bcmp(ncp->nc_name, cursor.data, ncp->nc_nlen) == 0) { 1864 break; 1865 } 1866 error = hammer_ip_next(&cursor); 1867 } 1868 1869 /* 1870 * If all is ok we have to get the inode so we can adjust nlinks. 1871 * 1872 * If the target is a directory, it must be empty. 1873 */ 1874 if (error == 0) { 1875 ip = hammer_get_inode(dip->hmp, &dip->cache[1], 1876 rec->entry.obj_id, 1877 dip->hmp->asof, 0, &error); 1878 if (error == ENOENT) { 1879 kprintf("obj_id %016llx\n", rec->entry.obj_id); 1880 Debugger("ENOENT unlinking object that should exist, cont to sync"); 1881 hammer_sync_hmp(dip->hmp, MNT_NOWAIT); 1882 Debugger("ENOENT - sync done"); 1883 } 1884 if (error == 0 && ip->ino_rec.base.base.obj_type == 1885 HAMMER_OBJTYPE_DIRECTORY) { 1886 error = hammer_ip_check_directory_empty(&trans, ip); 1887 } 1888 /* 1889 * WARNING: hammer_ip_del_directory() may have to terminate 1890 * the cursor to avoid a lock recursion. It's ok to call 1891 * hammer_done_cursor() twice. 1892 */ 1893 if (error == 0) 1894 error = hammer_ip_del_directory(&trans, &cursor, dip, ip); 1895 if (error == 0) { 1896 cache_setunresolved(nch); 1897 cache_setvp(nch, NULL); 1898 /* XXX locking */ 1899 if (ip->vp) 1900 cache_inval_vp(ip->vp, CINV_DESTROY); 1901 } 1902 hammer_rel_inode(ip, 0); 1903 } 1904 hammer_done_cursor(&cursor); 1905 if (error == EDEADLK) 1906 goto retry; 1907 1908 if (error == 0) 1909 hammer_commit_transaction(&trans); 1910 else 1911 hammer_abort_transaction(&trans); 1912 return (error); 1913 } 1914 1915 /************************************************************************ 1916 * FIFO AND SPECFS OPS * 1917 ************************************************************************ 1918 * 1919 */ 1920 1921 static int 1922 hammer_vop_fifoclose (struct vop_close_args *ap) 1923 { 1924 /* XXX update itimes */ 1925 return (VOCALL(&fifo_vnode_vops, &ap->a_head)); 1926 } 1927 1928 static int 1929 hammer_vop_fiforead (struct vop_read_args *ap) 1930 { 1931 int error; 1932 1933 error = VOCALL(&fifo_vnode_vops, &ap->a_head); 1934 /* XXX update access time */ 1935 return (error); 1936 } 1937 1938 static int 1939 hammer_vop_fifowrite (struct vop_write_args *ap) 1940 { 1941 int error; 1942 1943 error = VOCALL(&fifo_vnode_vops, &ap->a_head); 1944 /* XXX update access time */ 1945 return (error); 1946 } 1947 1948 static int 1949 hammer_vop_specclose (struct vop_close_args *ap) 1950 { 1951 /* XXX update itimes */ 1952 return (VOCALL(&spec_vnode_vops, &ap->a_head)); 1953 } 1954 1955 static int 1956 hammer_vop_specread (struct vop_read_args *ap) 1957 { 1958 /* XXX update access time */ 1959 return (VOCALL(&spec_vnode_vops, &ap->a_head)); 1960 } 1961 1962 static int 1963 hammer_vop_specwrite (struct vop_write_args *ap) 1964 { 1965 /* XXX update last change time */ 1966 return (VOCALL(&spec_vnode_vops, &ap->a_head)); 1967 } 1968 1969