1 /* 2 * Copyright (c) 2007-2008 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * $DragonFly: src/sys/vfs/hammer/hammer_vnops.c,v 1.102 2008/10/16 17:24:16 dillon Exp $ 35 */ 36 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/kernel.h> 40 #include <sys/fcntl.h> 41 #include <sys/namecache.h> 42 #include <sys/vnode.h> 43 #include <sys/lockf.h> 44 #include <sys/event.h> 45 #include <sys/stat.h> 46 #include <sys/dirent.h> 47 #include <sys/file.h> 48 #include <vm/vm_extern.h> 49 #include <vfs/fifofs/fifo.h> 50 #include "hammer.h" 51 52 /* 53 * USERFS VNOPS 54 */ 55 /*static int hammer_vop_vnoperate(struct vop_generic_args *);*/ 56 static int hammer_vop_fsync(struct vop_fsync_args *); 57 static int hammer_vop_read(struct vop_read_args *); 58 static int hammer_vop_write(struct vop_write_args *); 59 static int hammer_vop_access(struct vop_access_args *); 60 static int hammer_vop_advlock(struct vop_advlock_args *); 61 static int hammer_vop_close(struct vop_close_args *); 62 static int hammer_vop_ncreate(struct vop_ncreate_args *); 63 static int hammer_vop_getattr(struct vop_getattr_args *); 64 static int hammer_vop_nresolve(struct vop_nresolve_args *); 65 static int hammer_vop_nlookupdotdot(struct vop_nlookupdotdot_args *); 66 static int hammer_vop_nlink(struct vop_nlink_args *); 67 static int hammer_vop_nmkdir(struct vop_nmkdir_args *); 68 static int hammer_vop_nmknod(struct vop_nmknod_args *); 69 static int hammer_vop_open(struct vop_open_args *); 70 static int hammer_vop_print(struct vop_print_args *); 71 static int hammer_vop_readdir(struct vop_readdir_args *); 72 static int hammer_vop_readlink(struct vop_readlink_args *); 73 static int hammer_vop_nremove(struct vop_nremove_args *); 74 static int hammer_vop_nrename(struct vop_nrename_args *); 75 static int hammer_vop_nrmdir(struct vop_nrmdir_args *); 76 static int hammer_vop_markatime(struct vop_markatime_args *); 77 static int hammer_vop_setattr(struct vop_setattr_args *); 78 static int hammer_vop_strategy(struct vop_strategy_args *); 79 static int hammer_vop_bmap(struct vop_bmap_args *ap); 80 static int hammer_vop_nsymlink(struct vop_nsymlink_args *); 81 static int hammer_vop_nwhiteout(struct vop_nwhiteout_args *); 82 static int hammer_vop_ioctl(struct vop_ioctl_args *); 83 static int hammer_vop_mountctl(struct vop_mountctl_args *); 84 static int hammer_vop_kqfilter (struct vop_kqfilter_args *); 85 86 static int hammer_vop_fifoclose (struct vop_close_args *); 87 static int hammer_vop_fiforead (struct vop_read_args *); 88 static int hammer_vop_fifowrite (struct vop_write_args *); 89 static int hammer_vop_fifokqfilter (struct vop_kqfilter_args *); 90 91 struct vop_ops hammer_vnode_vops = { 92 .vop_default = vop_defaultop, 93 .vop_fsync = hammer_vop_fsync, 94 .vop_getpages = vop_stdgetpages, 95 .vop_putpages = vop_stdputpages, 96 .vop_read = hammer_vop_read, 97 .vop_write = hammer_vop_write, 98 .vop_access = hammer_vop_access, 99 .vop_advlock = hammer_vop_advlock, 100 .vop_close = hammer_vop_close, 101 .vop_ncreate = hammer_vop_ncreate, 102 .vop_getattr = hammer_vop_getattr, 103 .vop_inactive = hammer_vop_inactive, 104 .vop_reclaim = hammer_vop_reclaim, 105 .vop_nresolve = hammer_vop_nresolve, 106 .vop_nlookupdotdot = hammer_vop_nlookupdotdot, 107 .vop_nlink = hammer_vop_nlink, 108 .vop_nmkdir = hammer_vop_nmkdir, 109 .vop_nmknod = hammer_vop_nmknod, 110 .vop_open = hammer_vop_open, 111 .vop_pathconf = vop_stdpathconf, 112 .vop_print = hammer_vop_print, 113 .vop_readdir = hammer_vop_readdir, 114 .vop_readlink = hammer_vop_readlink, 115 .vop_nremove = hammer_vop_nremove, 116 .vop_nrename = hammer_vop_nrename, 117 .vop_nrmdir = hammer_vop_nrmdir, 118 .vop_markatime = hammer_vop_markatime, 119 .vop_setattr = hammer_vop_setattr, 120 .vop_bmap = hammer_vop_bmap, 121 .vop_strategy = hammer_vop_strategy, 122 .vop_nsymlink = hammer_vop_nsymlink, 123 .vop_nwhiteout = hammer_vop_nwhiteout, 124 .vop_ioctl = hammer_vop_ioctl, 125 .vop_mountctl = hammer_vop_mountctl, 126 .vop_kqfilter = hammer_vop_kqfilter 127 }; 128 129 struct vop_ops hammer_spec_vops = { 130 .vop_default = vop_defaultop, 131 .vop_fsync = hammer_vop_fsync, 132 .vop_read = vop_stdnoread, 133 .vop_write = vop_stdnowrite, 134 .vop_access = hammer_vop_access, 135 .vop_close = hammer_vop_close, 136 .vop_markatime = hammer_vop_markatime, 137 .vop_getattr = hammer_vop_getattr, 138 .vop_inactive = hammer_vop_inactive, 139 .vop_reclaim = hammer_vop_reclaim, 140 .vop_setattr = hammer_vop_setattr 141 }; 142 143 struct vop_ops hammer_fifo_vops = { 144 .vop_default = fifo_vnoperate, 145 .vop_fsync = hammer_vop_fsync, 146 .vop_read = hammer_vop_fiforead, 147 .vop_write = hammer_vop_fifowrite, 148 .vop_access = hammer_vop_access, 149 .vop_close = hammer_vop_fifoclose, 150 .vop_markatime = hammer_vop_markatime, 151 .vop_getattr = hammer_vop_getattr, 152 .vop_inactive = hammer_vop_inactive, 153 .vop_reclaim = hammer_vop_reclaim, 154 .vop_setattr = hammer_vop_setattr, 155 .vop_kqfilter = hammer_vop_fifokqfilter 156 }; 157 158 static __inline 159 void 160 hammer_knote(struct vnode *vp, int flags) 161 { 162 if (flags) 163 KNOTE(&vp->v_pollinfo.vpi_selinfo.si_note, flags); 164 } 165 166 #ifdef DEBUG_TRUNCATE 167 struct hammer_inode *HammerTruncIp; 168 #endif 169 170 static int hammer_dounlink(hammer_transaction_t trans, struct nchandle *nch, 171 struct vnode *dvp, struct ucred *cred, 172 int flags, int isdir); 173 static int hammer_vop_strategy_read(struct vop_strategy_args *ap); 174 static int hammer_vop_strategy_write(struct vop_strategy_args *ap); 175 176 #if 0 177 static 178 int 179 hammer_vop_vnoperate(struct vop_generic_args *) 180 { 181 return (VOCALL(&hammer_vnode_vops, ap)); 182 } 183 #endif 184 185 /* 186 * hammer_vop_fsync { vp, waitfor } 187 * 188 * fsync() an inode to disk and wait for it to be completely committed 189 * such that the information would not be undone if a crash occured after 190 * return. 191 * 192 * NOTE: HAMMER's fsync()'s are going to remain expensive until we implement 193 * a REDO log. A sysctl is provided to relax HAMMER's fsync() 194 * operation. 195 * 196 * Ultimately the combination of a REDO log and use of fast storage 197 * to front-end cluster caches will make fsync fast, but it aint 198 * here yet. And, in anycase, we need real transactional 199 * all-or-nothing features which are not restricted to a single file. 200 */ 201 static 202 int 203 hammer_vop_fsync(struct vop_fsync_args *ap) 204 { 205 hammer_inode_t ip = VTOI(ap->a_vp); 206 int waitfor = ap->a_waitfor; 207 208 /* 209 * Fsync rule relaxation (default disabled) 210 */ 211 if (ap->a_flags & VOP_FSYNC_SYSCALL) { 212 switch(hammer_fsync_mode) { 213 case 0: 214 /* full semantics */ 215 break; 216 case 1: 217 /* asynchronous */ 218 if (waitfor == MNT_WAIT) 219 waitfor = MNT_NOWAIT; 220 break; 221 case 2: 222 /* synchronous fsync on close */ 223 ip->flags |= HAMMER_INODE_CLOSESYNC; 224 return(0); 225 case 3: 226 /* asynchronous fsync on close */ 227 ip->flags |= HAMMER_INODE_CLOSEASYNC; 228 return(0); 229 default: 230 /* ignore the fsync() system call */ 231 return(0); 232 } 233 } 234 235 /* 236 * Go do it 237 */ 238 ++hammer_count_fsyncs; 239 vfsync(ap->a_vp, waitfor, 1, NULL, NULL); 240 hammer_flush_inode(ip, HAMMER_FLUSH_SIGNAL); 241 if (waitfor == MNT_WAIT) { 242 vn_unlock(ap->a_vp); 243 hammer_wait_inode(ip); 244 vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY); 245 } 246 return (ip->error); 247 } 248 249 /* 250 * hammer_vop_read { vp, uio, ioflag, cred } 251 * 252 * MPALMOSTSAFE 253 */ 254 static 255 int 256 hammer_vop_read(struct vop_read_args *ap) 257 { 258 struct hammer_transaction trans; 259 hammer_inode_t ip; 260 off_t offset; 261 struct buf *bp; 262 struct uio *uio; 263 int error; 264 int n; 265 int seqcount; 266 int ioseqcount; 267 int blksize; 268 int got_mplock; 269 int bigread; 270 271 if (ap->a_vp->v_type != VREG) 272 return (EINVAL); 273 ip = VTOI(ap->a_vp); 274 error = 0; 275 uio = ap->a_uio; 276 277 /* 278 * Allow the UIO's size to override the sequential heuristic. 279 */ 280 blksize = hammer_blocksize(uio->uio_offset); 281 seqcount = (uio->uio_resid + (blksize - 1)) / blksize; 282 ioseqcount = ap->a_ioflag >> 16; 283 if (seqcount < ioseqcount) 284 seqcount = ioseqcount; 285 286 /* 287 * Temporary hack until more of HAMMER can be made MPSAFE. 288 */ 289 #ifdef SMP 290 if (curthread->td_mpcount) { 291 got_mplock = -1; 292 hammer_start_transaction(&trans, ip->hmp); 293 } else { 294 got_mplock = 0; 295 } 296 #else 297 hammer_start_transaction(&trans, ip->hmp); 298 got_mplock = -1; 299 #endif 300 301 /* 302 * If reading or writing a huge amount of data we have to break 303 * atomicy and allow the operation to be interrupted by a signal 304 * or it can DOS the machine. 305 */ 306 bigread = (uio->uio_resid > 100 * 1024 * 1024); 307 308 /* 309 * Access the data typically in HAMMER_BUFSIZE blocks via the 310 * buffer cache, but HAMMER may use a variable block size based 311 * on the offset. 312 * 313 * XXX Temporary hack, delay the start transaction while we remain 314 * MPSAFE. NOTE: ino_data.size cannot change while vnode is 315 * locked-shared. 316 */ 317 while (uio->uio_resid > 0 && uio->uio_offset < ip->ino_data.size) { 318 int64_t base_offset; 319 int64_t file_limit; 320 321 blksize = hammer_blocksize(uio->uio_offset); 322 offset = (int)uio->uio_offset & (blksize - 1); 323 base_offset = uio->uio_offset - offset; 324 325 if (bigread && (error = hammer_signal_check(ip->hmp)) != 0) 326 break; 327 328 /* 329 * MPSAFE 330 */ 331 bp = getcacheblk(ap->a_vp, base_offset); 332 if (bp) { 333 error = 0; 334 goto skip; 335 } 336 337 /* 338 * MPUNSAFE 339 */ 340 if (got_mplock == 0) { 341 got_mplock = 1; 342 get_mplock(); 343 hammer_start_transaction(&trans, ip->hmp); 344 } 345 346 if (hammer_cluster_enable) { 347 /* 348 * Use file_limit to prevent cluster_read() from 349 * creating buffers of the wrong block size past 350 * the demarc. 351 */ 352 file_limit = ip->ino_data.size; 353 if (base_offset < HAMMER_XDEMARC && 354 file_limit > HAMMER_XDEMARC) { 355 file_limit = HAMMER_XDEMARC; 356 } 357 error = cluster_read(ap->a_vp, 358 file_limit, base_offset, 359 blksize, MAXPHYS, 360 seqcount, &bp); 361 } else { 362 error = bread(ap->a_vp, base_offset, blksize, &bp); 363 } 364 if (error) { 365 kprintf("error %d\n", error); 366 brelse(bp); 367 break; 368 } 369 skip: 370 371 /* bp->b_flags |= B_CLUSTEROK; temporarily disabled */ 372 n = blksize - offset; 373 if (n > uio->uio_resid) 374 n = uio->uio_resid; 375 if (n > ip->ino_data.size - uio->uio_offset) 376 n = (int)(ip->ino_data.size - uio->uio_offset); 377 error = uiomove((char *)bp->b_data + offset, n, uio); 378 379 /* data has a lower priority then meta-data */ 380 bp->b_flags |= B_AGE; 381 bqrelse(bp); 382 if (error) 383 break; 384 hammer_stats_file_read += n; 385 } 386 387 /* 388 * XXX only update the atime if we had to get the MP lock. 389 * XXX hack hack hack, fixme. 390 */ 391 if (got_mplock) { 392 if ((ip->flags & HAMMER_INODE_RO) == 0 && 393 (ip->hmp->mp->mnt_flag & MNT_NOATIME) == 0) { 394 ip->ino_data.atime = trans.time; 395 hammer_modify_inode(ip, HAMMER_INODE_ATIME); 396 } 397 hammer_done_transaction(&trans); 398 if (got_mplock > 0) 399 rel_mplock(); 400 } 401 return (error); 402 } 403 404 /* 405 * hammer_vop_write { vp, uio, ioflag, cred } 406 */ 407 static 408 int 409 hammer_vop_write(struct vop_write_args *ap) 410 { 411 struct hammer_transaction trans; 412 struct hammer_inode *ip; 413 hammer_mount_t hmp; 414 struct uio *uio; 415 int offset; 416 off_t base_offset; 417 struct buf *bp; 418 int kflags; 419 int error; 420 int n; 421 int flags; 422 int seqcount; 423 int bigwrite; 424 425 if (ap->a_vp->v_type != VREG) 426 return (EINVAL); 427 ip = VTOI(ap->a_vp); 428 hmp = ip->hmp; 429 error = 0; 430 kflags = 0; 431 seqcount = ap->a_ioflag >> 16; 432 433 if (ip->flags & HAMMER_INODE_RO) 434 return (EROFS); 435 436 /* 437 * Create a transaction to cover the operations we perform. 438 */ 439 hammer_start_transaction(&trans, hmp); 440 uio = ap->a_uio; 441 442 /* 443 * Check append mode 444 */ 445 if (ap->a_ioflag & IO_APPEND) 446 uio->uio_offset = ip->ino_data.size; 447 448 /* 449 * Check for illegal write offsets. Valid range is 0...2^63-1. 450 * 451 * NOTE: the base_off assignment is required to work around what 452 * I consider to be a GCC-4 optimization bug. 453 */ 454 if (uio->uio_offset < 0) { 455 hammer_done_transaction(&trans); 456 return (EFBIG); 457 } 458 base_offset = uio->uio_offset + uio->uio_resid; /* work around gcc-4 */ 459 if (uio->uio_resid > 0 && base_offset <= uio->uio_offset) { 460 hammer_done_transaction(&trans); 461 return (EFBIG); 462 } 463 464 /* 465 * If reading or writing a huge amount of data we have to break 466 * atomicy and allow the operation to be interrupted by a signal 467 * or it can DOS the machine. 468 */ 469 bigwrite = (uio->uio_resid > 100 * 1024 * 1024); 470 471 /* 472 * Access the data typically in HAMMER_BUFSIZE blocks via the 473 * buffer cache, but HAMMER may use a variable block size based 474 * on the offset. 475 */ 476 while (uio->uio_resid > 0) { 477 int fixsize = 0; 478 int blksize; 479 int blkmask; 480 481 if ((error = hammer_checkspace(hmp, HAMMER_CHKSPC_WRITE)) != 0) 482 break; 483 if (bigwrite && (error = hammer_signal_check(hmp)) != 0) 484 break; 485 486 blksize = hammer_blocksize(uio->uio_offset); 487 488 /* 489 * Do not allow HAMMER to blow out the buffer cache. Very 490 * large UIOs can lockout other processes due to bwillwrite() 491 * mechanics. 492 * 493 * The hammer inode is not locked during these operations. 494 * The vnode is locked which can interfere with the pageout 495 * daemon for non-UIO_NOCOPY writes but should not interfere 496 * with the buffer cache. Even so, we cannot afford to 497 * allow the pageout daemon to build up too many dirty buffer 498 * cache buffers. 499 * 500 * Only call this if we aren't being recursively called from 501 * a virtual disk device (vn), else we may deadlock. 502 */ 503 if ((ap->a_ioflag & IO_RECURSE) == 0) 504 bwillwrite(blksize); 505 506 /* 507 * Control the number of pending records associated with 508 * this inode. If too many have accumulated start a 509 * flush. Try to maintain a pipeline with the flusher. 510 */ 511 if (ip->rsv_recs >= hammer_limit_inode_recs) { 512 hammer_flush_inode(ip, HAMMER_FLUSH_SIGNAL); 513 } 514 if (ip->rsv_recs >= hammer_limit_inode_recs * 2) { 515 while (ip->rsv_recs >= hammer_limit_inode_recs) { 516 tsleep(&ip->rsv_recs, 0, "hmrwww", hz); 517 } 518 hammer_flush_inode(ip, HAMMER_FLUSH_SIGNAL); 519 } 520 521 #if 0 522 /* 523 * Do not allow HAMMER to blow out system memory by 524 * accumulating too many records. Records are so well 525 * decoupled from the buffer cache that it is possible 526 * for userland to push data out to the media via 527 * direct-write, but build up the records queued to the 528 * backend faster then the backend can flush them out. 529 * HAMMER has hit its write limit but the frontend has 530 * no pushback to slow it down. 531 */ 532 if (hmp->rsv_recs > hammer_limit_recs / 2) { 533 /* 534 * Get the inode on the flush list 535 */ 536 if (ip->rsv_recs >= 64) 537 hammer_flush_inode(ip, HAMMER_FLUSH_SIGNAL); 538 else if (ip->rsv_recs >= 16) 539 hammer_flush_inode(ip, 0); 540 541 /* 542 * Keep the flusher going if the system keeps 543 * queueing records. 544 */ 545 delta = hmp->count_newrecords - 546 hmp->last_newrecords; 547 if (delta < 0 || delta > hammer_limit_recs / 2) { 548 hmp->last_newrecords = hmp->count_newrecords; 549 hammer_sync_hmp(hmp, MNT_NOWAIT); 550 } 551 552 /* 553 * If we have gotten behind start slowing 554 * down the writers. 555 */ 556 delta = (hmp->rsv_recs - hammer_limit_recs) * 557 hz / hammer_limit_recs; 558 if (delta > 0) 559 tsleep(&trans, 0, "hmrslo", delta); 560 } 561 #endif 562 563 /* 564 * Calculate the blocksize at the current offset and figure 565 * out how much we can actually write. 566 */ 567 blkmask = blksize - 1; 568 offset = (int)uio->uio_offset & blkmask; 569 base_offset = uio->uio_offset & ~(int64_t)blkmask; 570 n = blksize - offset; 571 if (n > uio->uio_resid) 572 n = uio->uio_resid; 573 if (uio->uio_offset + n > ip->ino_data.size) { 574 vnode_pager_setsize(ap->a_vp, uio->uio_offset + n); 575 fixsize = 1; 576 kflags |= NOTE_EXTEND; 577 } 578 579 if (uio->uio_segflg == UIO_NOCOPY) { 580 /* 581 * Issuing a write with the same data backing the 582 * buffer. Instantiate the buffer to collect the 583 * backing vm pages, then read-in any missing bits. 584 * 585 * This case is used by vop_stdputpages(). 586 */ 587 bp = getblk(ap->a_vp, base_offset, 588 blksize, GETBLK_BHEAVY, 0); 589 if ((bp->b_flags & B_CACHE) == 0) { 590 bqrelse(bp); 591 error = bread(ap->a_vp, base_offset, 592 blksize, &bp); 593 } 594 } else if (offset == 0 && uio->uio_resid >= blksize) { 595 /* 596 * Even though we are entirely overwriting the buffer 597 * we may still have to zero it out to avoid a 598 * mmap/write visibility issue. 599 */ 600 bp = getblk(ap->a_vp, base_offset, blksize, GETBLK_BHEAVY, 0); 601 if ((bp->b_flags & B_CACHE) == 0) 602 vfs_bio_clrbuf(bp); 603 } else if (base_offset >= ip->ino_data.size) { 604 /* 605 * If the base offset of the buffer is beyond the 606 * file EOF, we don't have to issue a read. 607 */ 608 bp = getblk(ap->a_vp, base_offset, 609 blksize, GETBLK_BHEAVY, 0); 610 vfs_bio_clrbuf(bp); 611 } else { 612 /* 613 * Partial overwrite, read in any missing bits then 614 * replace the portion being written. 615 */ 616 error = bread(ap->a_vp, base_offset, blksize, &bp); 617 if (error == 0) 618 bheavy(bp); 619 } 620 if (error == 0) { 621 error = uiomove((char *)bp->b_data + offset, 622 n, uio); 623 } 624 625 /* 626 * If we screwed up we have to undo any VM size changes we 627 * made. 628 */ 629 if (error) { 630 brelse(bp); 631 if (fixsize) { 632 vtruncbuf(ap->a_vp, ip->ino_data.size, 633 hammer_blocksize(ip->ino_data.size)); 634 } 635 break; 636 } 637 kflags |= NOTE_WRITE; 638 hammer_stats_file_write += n; 639 /* bp->b_flags |= B_CLUSTEROK; temporarily disabled */ 640 if (ip->ino_data.size < uio->uio_offset) { 641 ip->ino_data.size = uio->uio_offset; 642 flags = HAMMER_INODE_DDIRTY; 643 vnode_pager_setsize(ap->a_vp, ip->ino_data.size); 644 } else { 645 flags = 0; 646 } 647 ip->ino_data.mtime = trans.time; 648 flags |= HAMMER_INODE_MTIME | HAMMER_INODE_BUFS; 649 hammer_modify_inode(ip, flags); 650 651 /* 652 * Once we dirty the buffer any cached zone-X offset 653 * becomes invalid. HAMMER NOTE: no-history mode cannot 654 * allow overwriting over the same data sector unless 655 * we provide UNDOs for the old data, which we don't. 656 */ 657 bp->b_bio2.bio_offset = NOOFFSET; 658 659 /* 660 * Final buffer disposition. 661 * 662 * Because meta-data updates are deferred, HAMMER is 663 * especially sensitive to excessive bdwrite()s because 664 * the I/O stream is not broken up by disk reads. So the 665 * buffer cache simply cannot keep up. 666 * 667 * WARNING! blksize is variable. cluster_write() is 668 * expected to not blow up if it encounters buffers that 669 * do not match the passed blksize. 670 * 671 * NOTE! Hammer shouldn't need to bawrite()/cluster_write(). 672 * The ip->rsv_recs check should burst-flush the data. 673 * If we queue it immediately the buf could be left 674 * locked on the device queue for a very long time. 675 */ 676 bp->b_flags |= B_AGE; 677 if (ap->a_ioflag & IO_SYNC) { 678 bwrite(bp); 679 } else if (ap->a_ioflag & IO_DIRECT) { 680 bawrite(bp); 681 } else { 682 #if 0 683 if (offset + n == blksize) { 684 if (hammer_cluster_enable == 0 || 685 (ap->a_vp->v_mount->mnt_flag & MNT_NOCLUSTERW)) { 686 bawrite(bp); 687 } else { 688 cluster_write(bp, ip->ino_data.size, 689 blksize, seqcount); 690 } 691 } else { 692 #endif 693 bdwrite(bp); 694 } 695 } 696 hammer_done_transaction(&trans); 697 hammer_knote(ap->a_vp, kflags); 698 return (error); 699 } 700 701 /* 702 * hammer_vop_access { vp, mode, cred } 703 */ 704 static 705 int 706 hammer_vop_access(struct vop_access_args *ap) 707 { 708 struct hammer_inode *ip = VTOI(ap->a_vp); 709 uid_t uid; 710 gid_t gid; 711 int error; 712 713 ++hammer_stats_file_iopsr; 714 uid = hammer_to_unix_xid(&ip->ino_data.uid); 715 gid = hammer_to_unix_xid(&ip->ino_data.gid); 716 717 error = vop_helper_access(ap, uid, gid, ip->ino_data.mode, 718 ip->ino_data.uflags); 719 return (error); 720 } 721 722 /* 723 * hammer_vop_advlock { vp, id, op, fl, flags } 724 */ 725 static 726 int 727 hammer_vop_advlock(struct vop_advlock_args *ap) 728 { 729 hammer_inode_t ip = VTOI(ap->a_vp); 730 731 return (lf_advlock(ap, &ip->advlock, ip->ino_data.size)); 732 } 733 734 /* 735 * hammer_vop_close { vp, fflag } 736 * 737 * We can only sync-on-close for normal closes. 738 */ 739 static 740 int 741 hammer_vop_close(struct vop_close_args *ap) 742 { 743 struct vnode *vp = ap->a_vp; 744 hammer_inode_t ip = VTOI(vp); 745 int waitfor; 746 747 if (ip->flags & (HAMMER_INODE_CLOSESYNC|HAMMER_INODE_CLOSEASYNC)) { 748 if (vn_islocked(vp) == LK_EXCLUSIVE && 749 (vp->v_flag & (VINACTIVE|VRECLAIMED)) == 0) { 750 if (ip->flags & HAMMER_INODE_CLOSESYNC) 751 waitfor = MNT_WAIT; 752 else 753 waitfor = MNT_NOWAIT; 754 ip->flags &= ~(HAMMER_INODE_CLOSESYNC | 755 HAMMER_INODE_CLOSEASYNC); 756 VOP_FSYNC(vp, MNT_NOWAIT, waitfor); 757 } 758 } 759 return (vop_stdclose(ap)); 760 } 761 762 /* 763 * hammer_vop_ncreate { nch, dvp, vpp, cred, vap } 764 * 765 * The operating system has already ensured that the directory entry 766 * does not exist and done all appropriate namespace locking. 767 */ 768 static 769 int 770 hammer_vop_ncreate(struct vop_ncreate_args *ap) 771 { 772 struct hammer_transaction trans; 773 struct hammer_inode *dip; 774 struct hammer_inode *nip; 775 struct nchandle *nch; 776 int error; 777 778 nch = ap->a_nch; 779 dip = VTOI(ap->a_dvp); 780 781 if (dip->flags & HAMMER_INODE_RO) 782 return (EROFS); 783 if ((error = hammer_checkspace(dip->hmp, HAMMER_CHKSPC_CREATE)) != 0) 784 return (error); 785 786 /* 787 * Create a transaction to cover the operations we perform. 788 */ 789 hammer_start_transaction(&trans, dip->hmp); 790 ++hammer_stats_file_iopsw; 791 792 /* 793 * Create a new filesystem object of the requested type. The 794 * returned inode will be referenced and shared-locked to prevent 795 * it from being moved to the flusher. 796 */ 797 error = hammer_create_inode(&trans, ap->a_vap, ap->a_cred, 798 dip, nch->ncp->nc_name, nch->ncp->nc_nlen, 799 NULL, &nip); 800 if (error) { 801 hkprintf("hammer_create_inode error %d\n", error); 802 hammer_done_transaction(&trans); 803 *ap->a_vpp = NULL; 804 return (error); 805 } 806 807 /* 808 * Add the new filesystem object to the directory. This will also 809 * bump the inode's link count. 810 */ 811 error = hammer_ip_add_directory(&trans, dip, 812 nch->ncp->nc_name, nch->ncp->nc_nlen, 813 nip); 814 if (error) 815 hkprintf("hammer_ip_add_directory error %d\n", error); 816 817 /* 818 * Finish up. 819 */ 820 if (error) { 821 hammer_rel_inode(nip, 0); 822 hammer_done_transaction(&trans); 823 *ap->a_vpp = NULL; 824 } else { 825 error = hammer_get_vnode(nip, ap->a_vpp); 826 hammer_done_transaction(&trans); 827 hammer_rel_inode(nip, 0); 828 if (error == 0) { 829 cache_setunresolved(ap->a_nch); 830 cache_setvp(ap->a_nch, *ap->a_vpp); 831 } 832 hammer_knote(ap->a_dvp, NOTE_WRITE); 833 } 834 return (error); 835 } 836 837 /* 838 * hammer_vop_getattr { vp, vap } 839 * 840 * Retrieve an inode's attribute information. When accessing inodes 841 * historically we fake the atime field to ensure consistent results. 842 * The atime field is stored in the B-Tree element and allowed to be 843 * updated without cycling the element. 844 * 845 * MPSAFE 846 */ 847 static 848 int 849 hammer_vop_getattr(struct vop_getattr_args *ap) 850 { 851 struct hammer_inode *ip = VTOI(ap->a_vp); 852 struct vattr *vap = ap->a_vap; 853 854 /* 855 * We want the fsid to be different when accessing a filesystem 856 * with different as-of's so programs like diff don't think 857 * the files are the same. 858 * 859 * We also want the fsid to be the same when comparing snapshots, 860 * or when comparing mirrors (which might be backed by different 861 * physical devices). HAMMER fsids are based on the PFS's 862 * shared_uuid field. 863 * 864 * XXX there is a chance of collision here. The va_fsid reported 865 * by stat is different from the more involved fsid used in the 866 * mount structure. 867 */ 868 ++hammer_stats_file_iopsr; 869 hammer_lock_sh(&ip->lock); 870 vap->va_fsid = ip->pfsm->fsid_udev ^ (u_int32_t)ip->obj_asof ^ 871 (u_int32_t)(ip->obj_asof >> 32); 872 873 vap->va_fileid = ip->ino_leaf.base.obj_id; 874 vap->va_mode = ip->ino_data.mode; 875 vap->va_nlink = ip->ino_data.nlinks; 876 vap->va_uid = hammer_to_unix_xid(&ip->ino_data.uid); 877 vap->va_gid = hammer_to_unix_xid(&ip->ino_data.gid); 878 vap->va_rmajor = 0; 879 vap->va_rminor = 0; 880 vap->va_size = ip->ino_data.size; 881 882 /* 883 * Special case for @@PFS softlinks. The actual size of the 884 * expanded softlink is "@@0x%016llx:%05d" == 26 bytes. 885 * or for MAX_TID is "@@-1:%05d" == 10 bytes. 886 */ 887 if (ip->ino_data.obj_type == HAMMER_OBJTYPE_SOFTLINK && 888 ip->ino_data.size == 10 && 889 ip->obj_asof == HAMMER_MAX_TID && 890 ip->obj_localization == 0 && 891 strncmp(ip->ino_data.ext.symlink, "@@PFS", 5) == 0) { 892 if (ip->pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) 893 vap->va_size = 26; 894 else 895 vap->va_size = 10; 896 } 897 898 /* 899 * We must provide a consistent atime and mtime for snapshots 900 * so people can do a 'tar cf - ... | md5' on them and get 901 * consistent results. 902 */ 903 if (ip->flags & HAMMER_INODE_RO) { 904 hammer_time_to_timespec(ip->ino_data.ctime, &vap->va_atime); 905 hammer_time_to_timespec(ip->ino_data.ctime, &vap->va_mtime); 906 } else { 907 hammer_time_to_timespec(ip->ino_data.atime, &vap->va_atime); 908 hammer_time_to_timespec(ip->ino_data.mtime, &vap->va_mtime); 909 } 910 hammer_time_to_timespec(ip->ino_data.ctime, &vap->va_ctime); 911 vap->va_flags = ip->ino_data.uflags; 912 vap->va_gen = 1; /* hammer inums are unique for all time */ 913 vap->va_blocksize = HAMMER_BUFSIZE; 914 if (ip->ino_data.size >= HAMMER_XDEMARC) { 915 vap->va_bytes = (ip->ino_data.size + HAMMER_XBUFMASK64) & 916 ~HAMMER_XBUFMASK64; 917 } else if (ip->ino_data.size > HAMMER_BUFSIZE / 2) { 918 vap->va_bytes = (ip->ino_data.size + HAMMER_BUFMASK64) & 919 ~HAMMER_BUFMASK64; 920 } else { 921 vap->va_bytes = (ip->ino_data.size + 15) & ~15; 922 } 923 924 vap->va_type = hammer_get_vnode_type(ip->ino_data.obj_type); 925 vap->va_filerev = 0; /* XXX */ 926 /* mtime uniquely identifies any adjustments made to the file XXX */ 927 vap->va_fsmid = ip->ino_data.mtime; 928 vap->va_uid_uuid = ip->ino_data.uid; 929 vap->va_gid_uuid = ip->ino_data.gid; 930 vap->va_fsid_uuid = ip->hmp->fsid; 931 vap->va_vaflags = VA_UID_UUID_VALID | VA_GID_UUID_VALID | 932 VA_FSID_UUID_VALID; 933 934 switch (ip->ino_data.obj_type) { 935 case HAMMER_OBJTYPE_CDEV: 936 case HAMMER_OBJTYPE_BDEV: 937 vap->va_rmajor = ip->ino_data.rmajor; 938 vap->va_rminor = ip->ino_data.rminor; 939 break; 940 default: 941 break; 942 } 943 hammer_unlock(&ip->lock); 944 return(0); 945 } 946 947 /* 948 * hammer_vop_nresolve { nch, dvp, cred } 949 * 950 * Locate the requested directory entry. 951 */ 952 static 953 int 954 hammer_vop_nresolve(struct vop_nresolve_args *ap) 955 { 956 struct hammer_transaction trans; 957 struct namecache *ncp; 958 hammer_inode_t dip; 959 hammer_inode_t ip; 960 hammer_tid_t asof; 961 struct hammer_cursor cursor; 962 struct vnode *vp; 963 int64_t namekey; 964 int error; 965 int i; 966 int nlen; 967 int flags; 968 int ispfs; 969 int64_t obj_id; 970 u_int32_t localization; 971 u_int32_t max_iterations; 972 973 /* 974 * Misc initialization, plus handle as-of name extensions. Look for 975 * the '@@' extension. Note that as-of files and directories cannot 976 * be modified. 977 */ 978 dip = VTOI(ap->a_dvp); 979 ncp = ap->a_nch->ncp; 980 asof = dip->obj_asof; 981 localization = dip->obj_localization; /* for code consistency */ 982 nlen = ncp->nc_nlen; 983 flags = dip->flags & HAMMER_INODE_RO; 984 ispfs = 0; 985 986 hammer_simple_transaction(&trans, dip->hmp); 987 ++hammer_stats_file_iopsr; 988 989 for (i = 0; i < nlen; ++i) { 990 if (ncp->nc_name[i] == '@' && ncp->nc_name[i+1] == '@') { 991 error = hammer_str_to_tid(ncp->nc_name + i + 2, 992 &ispfs, &asof, &localization); 993 if (error != 0) { 994 i = nlen; 995 break; 996 } 997 if (asof != HAMMER_MAX_TID) 998 flags |= HAMMER_INODE_RO; 999 break; 1000 } 1001 } 1002 nlen = i; 1003 1004 /* 1005 * If this is a PFS softlink we dive into the PFS 1006 */ 1007 if (ispfs && nlen == 0) { 1008 ip = hammer_get_inode(&trans, dip, HAMMER_OBJID_ROOT, 1009 asof, localization, 1010 flags, &error); 1011 if (error == 0) { 1012 error = hammer_get_vnode(ip, &vp); 1013 hammer_rel_inode(ip, 0); 1014 } else { 1015 vp = NULL; 1016 } 1017 if (error == 0) { 1018 vn_unlock(vp); 1019 cache_setvp(ap->a_nch, vp); 1020 vrele(vp); 1021 } 1022 goto done; 1023 } 1024 1025 /* 1026 * If there is no path component the time extension is relative to dip. 1027 * e.g. "fubar/@@<snapshot>" 1028 * 1029 * "." is handled by the kernel, but ".@@<snapshot>" is not. 1030 * e.g. "fubar/.@@<snapshot>" 1031 * 1032 * ".." is handled by the kernel. We do not currently handle 1033 * "..@<snapshot>". 1034 */ 1035 if (nlen == 0 || (nlen == 1 && ncp->nc_name[0] == '.')) { 1036 ip = hammer_get_inode(&trans, dip, dip->obj_id, 1037 asof, dip->obj_localization, 1038 flags, &error); 1039 if (error == 0) { 1040 error = hammer_get_vnode(ip, &vp); 1041 hammer_rel_inode(ip, 0); 1042 } else { 1043 vp = NULL; 1044 } 1045 if (error == 0) { 1046 vn_unlock(vp); 1047 cache_setvp(ap->a_nch, vp); 1048 vrele(vp); 1049 } 1050 goto done; 1051 } 1052 1053 /* 1054 * Calculate the namekey and setup the key range for the scan. This 1055 * works kinda like a chained hash table where the lower 32 bits 1056 * of the namekey synthesize the chain. 1057 * 1058 * The key range is inclusive of both key_beg and key_end. 1059 */ 1060 namekey = hammer_directory_namekey(dip, ncp->nc_name, nlen, 1061 &max_iterations); 1062 1063 error = hammer_init_cursor(&trans, &cursor, &dip->cache[1], dip); 1064 cursor.key_beg.localization = dip->obj_localization + 1065 hammer_dir_localization(dip); 1066 cursor.key_beg.obj_id = dip->obj_id; 1067 cursor.key_beg.key = namekey; 1068 cursor.key_beg.create_tid = 0; 1069 cursor.key_beg.delete_tid = 0; 1070 cursor.key_beg.rec_type = HAMMER_RECTYPE_DIRENTRY; 1071 cursor.key_beg.obj_type = 0; 1072 1073 cursor.key_end = cursor.key_beg; 1074 cursor.key_end.key += max_iterations; 1075 cursor.asof = asof; 1076 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE | HAMMER_CURSOR_ASOF; 1077 1078 /* 1079 * Scan all matching records (the chain), locate the one matching 1080 * the requested path component. 1081 * 1082 * The hammer_ip_*() functions merge in-memory records with on-disk 1083 * records for the purposes of the search. 1084 */ 1085 obj_id = 0; 1086 localization = HAMMER_DEF_LOCALIZATION; 1087 1088 if (error == 0) { 1089 error = hammer_ip_first(&cursor); 1090 while (error == 0) { 1091 error = hammer_ip_resolve_data(&cursor); 1092 if (error) 1093 break; 1094 if (nlen == cursor.leaf->data_len - HAMMER_ENTRY_NAME_OFF && 1095 bcmp(ncp->nc_name, cursor.data->entry.name, nlen) == 0) { 1096 obj_id = cursor.data->entry.obj_id; 1097 localization = cursor.data->entry.localization; 1098 break; 1099 } 1100 error = hammer_ip_next(&cursor); 1101 } 1102 } 1103 hammer_done_cursor(&cursor); 1104 1105 /* 1106 * Lookup the obj_id. This should always succeed. If it does not 1107 * the filesystem may be damaged and we return a dummy inode. 1108 */ 1109 if (error == 0) { 1110 ip = hammer_get_inode(&trans, dip, obj_id, 1111 asof, localization, 1112 flags, &error); 1113 if (error == ENOENT) { 1114 kprintf("HAMMER: WARNING: Missing " 1115 "inode for dirent \"%s\"\n" 1116 "\tobj_id = %016llx, asof=%016llx, lo=%08x\n", 1117 ncp->nc_name, 1118 (long long)obj_id, (long long)asof, 1119 localization); 1120 error = 0; 1121 ip = hammer_get_dummy_inode(&trans, dip, obj_id, 1122 asof, localization, 1123 flags, &error); 1124 } 1125 if (error == 0) { 1126 error = hammer_get_vnode(ip, &vp); 1127 hammer_rel_inode(ip, 0); 1128 } else { 1129 vp = NULL; 1130 } 1131 if (error == 0) { 1132 vn_unlock(vp); 1133 cache_setvp(ap->a_nch, vp); 1134 vrele(vp); 1135 } 1136 } else if (error == ENOENT) { 1137 cache_setvp(ap->a_nch, NULL); 1138 } 1139 done: 1140 hammer_done_transaction(&trans); 1141 return (error); 1142 } 1143 1144 /* 1145 * hammer_vop_nlookupdotdot { dvp, vpp, cred } 1146 * 1147 * Locate the parent directory of a directory vnode. 1148 * 1149 * dvp is referenced but not locked. *vpp must be returned referenced and 1150 * locked. A parent_obj_id of 0 does not necessarily indicate that we are 1151 * at the root, instead it could indicate that the directory we were in was 1152 * removed. 1153 * 1154 * NOTE: as-of sequences are not linked into the directory structure. If 1155 * we are at the root with a different asof then the mount point, reload 1156 * the same directory with the mount point's asof. I'm not sure what this 1157 * will do to NFS. We encode ASOF stamps in NFS file handles so it might not 1158 * get confused, but it hasn't been tested. 1159 */ 1160 static 1161 int 1162 hammer_vop_nlookupdotdot(struct vop_nlookupdotdot_args *ap) 1163 { 1164 struct hammer_transaction trans; 1165 struct hammer_inode *dip; 1166 struct hammer_inode *ip; 1167 int64_t parent_obj_id; 1168 u_int32_t parent_obj_localization; 1169 hammer_tid_t asof; 1170 int error; 1171 1172 dip = VTOI(ap->a_dvp); 1173 asof = dip->obj_asof; 1174 1175 /* 1176 * Whos are parent? This could be the root of a pseudo-filesystem 1177 * whos parent is in another localization domain. 1178 */ 1179 parent_obj_id = dip->ino_data.parent_obj_id; 1180 if (dip->obj_id == HAMMER_OBJID_ROOT) 1181 parent_obj_localization = dip->ino_data.ext.obj.parent_obj_localization; 1182 else 1183 parent_obj_localization = dip->obj_localization; 1184 1185 if (parent_obj_id == 0) { 1186 if (dip->obj_id == HAMMER_OBJID_ROOT && 1187 asof != dip->hmp->asof) { 1188 parent_obj_id = dip->obj_id; 1189 asof = dip->hmp->asof; 1190 *ap->a_fakename = kmalloc(19, M_TEMP, M_WAITOK); 1191 ksnprintf(*ap->a_fakename, 19, "0x%016llx", 1192 (long long)dip->obj_asof); 1193 } else { 1194 *ap->a_vpp = NULL; 1195 return ENOENT; 1196 } 1197 } 1198 1199 hammer_simple_transaction(&trans, dip->hmp); 1200 ++hammer_stats_file_iopsr; 1201 1202 ip = hammer_get_inode(&trans, dip, parent_obj_id, 1203 asof, parent_obj_localization, 1204 dip->flags, &error); 1205 if (ip) { 1206 error = hammer_get_vnode(ip, ap->a_vpp); 1207 hammer_rel_inode(ip, 0); 1208 } else { 1209 *ap->a_vpp = NULL; 1210 } 1211 hammer_done_transaction(&trans); 1212 return (error); 1213 } 1214 1215 /* 1216 * hammer_vop_nlink { nch, dvp, vp, cred } 1217 */ 1218 static 1219 int 1220 hammer_vop_nlink(struct vop_nlink_args *ap) 1221 { 1222 struct hammer_transaction trans; 1223 struct hammer_inode *dip; 1224 struct hammer_inode *ip; 1225 struct nchandle *nch; 1226 int error; 1227 1228 if (ap->a_dvp->v_mount != ap->a_vp->v_mount) 1229 return(EXDEV); 1230 1231 nch = ap->a_nch; 1232 dip = VTOI(ap->a_dvp); 1233 ip = VTOI(ap->a_vp); 1234 1235 if (dip->obj_localization != ip->obj_localization) 1236 return(EXDEV); 1237 1238 if (dip->flags & HAMMER_INODE_RO) 1239 return (EROFS); 1240 if (ip->flags & HAMMER_INODE_RO) 1241 return (EROFS); 1242 if ((error = hammer_checkspace(dip->hmp, HAMMER_CHKSPC_CREATE)) != 0) 1243 return (error); 1244 1245 /* 1246 * Create a transaction to cover the operations we perform. 1247 */ 1248 hammer_start_transaction(&trans, dip->hmp); 1249 ++hammer_stats_file_iopsw; 1250 1251 /* 1252 * Add the filesystem object to the directory. Note that neither 1253 * dip nor ip are referenced or locked, but their vnodes are 1254 * referenced. This function will bump the inode's link count. 1255 */ 1256 error = hammer_ip_add_directory(&trans, dip, 1257 nch->ncp->nc_name, nch->ncp->nc_nlen, 1258 ip); 1259 1260 /* 1261 * Finish up. 1262 */ 1263 if (error == 0) { 1264 cache_setunresolved(nch); 1265 cache_setvp(nch, ap->a_vp); 1266 } 1267 hammer_done_transaction(&trans); 1268 hammer_knote(ap->a_vp, NOTE_LINK); 1269 hammer_knote(ap->a_dvp, NOTE_WRITE); 1270 return (error); 1271 } 1272 1273 /* 1274 * hammer_vop_nmkdir { nch, dvp, vpp, cred, vap } 1275 * 1276 * The operating system has already ensured that the directory entry 1277 * does not exist and done all appropriate namespace locking. 1278 */ 1279 static 1280 int 1281 hammer_vop_nmkdir(struct vop_nmkdir_args *ap) 1282 { 1283 struct hammer_transaction trans; 1284 struct hammer_inode *dip; 1285 struct hammer_inode *nip; 1286 struct nchandle *nch; 1287 int error; 1288 1289 nch = ap->a_nch; 1290 dip = VTOI(ap->a_dvp); 1291 1292 if (dip->flags & HAMMER_INODE_RO) 1293 return (EROFS); 1294 if ((error = hammer_checkspace(dip->hmp, HAMMER_CHKSPC_CREATE)) != 0) 1295 return (error); 1296 1297 /* 1298 * Create a transaction to cover the operations we perform. 1299 */ 1300 hammer_start_transaction(&trans, dip->hmp); 1301 ++hammer_stats_file_iopsw; 1302 1303 /* 1304 * Create a new filesystem object of the requested type. The 1305 * returned inode will be referenced but not locked. 1306 */ 1307 error = hammer_create_inode(&trans, ap->a_vap, ap->a_cred, 1308 dip, nch->ncp->nc_name, nch->ncp->nc_nlen, 1309 NULL, &nip); 1310 if (error) { 1311 hkprintf("hammer_mkdir error %d\n", error); 1312 hammer_done_transaction(&trans); 1313 *ap->a_vpp = NULL; 1314 return (error); 1315 } 1316 /* 1317 * Add the new filesystem object to the directory. This will also 1318 * bump the inode's link count. 1319 */ 1320 error = hammer_ip_add_directory(&trans, dip, 1321 nch->ncp->nc_name, nch->ncp->nc_nlen, 1322 nip); 1323 if (error) 1324 hkprintf("hammer_mkdir (add) error %d\n", error); 1325 1326 /* 1327 * Finish up. 1328 */ 1329 if (error) { 1330 hammer_rel_inode(nip, 0); 1331 *ap->a_vpp = NULL; 1332 } else { 1333 error = hammer_get_vnode(nip, ap->a_vpp); 1334 hammer_rel_inode(nip, 0); 1335 if (error == 0) { 1336 cache_setunresolved(ap->a_nch); 1337 cache_setvp(ap->a_nch, *ap->a_vpp); 1338 } 1339 } 1340 hammer_done_transaction(&trans); 1341 if (error == 0) 1342 hammer_knote(ap->a_dvp, NOTE_WRITE | NOTE_LINK); 1343 return (error); 1344 } 1345 1346 /* 1347 * hammer_vop_nmknod { nch, dvp, vpp, cred, vap } 1348 * 1349 * The operating system has already ensured that the directory entry 1350 * does not exist and done all appropriate namespace locking. 1351 */ 1352 static 1353 int 1354 hammer_vop_nmknod(struct vop_nmknod_args *ap) 1355 { 1356 struct hammer_transaction trans; 1357 struct hammer_inode *dip; 1358 struct hammer_inode *nip; 1359 struct nchandle *nch; 1360 int error; 1361 1362 nch = ap->a_nch; 1363 dip = VTOI(ap->a_dvp); 1364 1365 if (dip->flags & HAMMER_INODE_RO) 1366 return (EROFS); 1367 if ((error = hammer_checkspace(dip->hmp, HAMMER_CHKSPC_CREATE)) != 0) 1368 return (error); 1369 1370 /* 1371 * Create a transaction to cover the operations we perform. 1372 */ 1373 hammer_start_transaction(&trans, dip->hmp); 1374 ++hammer_stats_file_iopsw; 1375 1376 /* 1377 * Create a new filesystem object of the requested type. The 1378 * returned inode will be referenced but not locked. 1379 * 1380 * If mknod specifies a directory a pseudo-fs is created. 1381 */ 1382 error = hammer_create_inode(&trans, ap->a_vap, ap->a_cred, 1383 dip, nch->ncp->nc_name, nch->ncp->nc_nlen, 1384 NULL, &nip); 1385 if (error) { 1386 hammer_done_transaction(&trans); 1387 *ap->a_vpp = NULL; 1388 return (error); 1389 } 1390 1391 /* 1392 * Add the new filesystem object to the directory. This will also 1393 * bump the inode's link count. 1394 */ 1395 error = hammer_ip_add_directory(&trans, dip, 1396 nch->ncp->nc_name, nch->ncp->nc_nlen, 1397 nip); 1398 1399 /* 1400 * Finish up. 1401 */ 1402 if (error) { 1403 hammer_rel_inode(nip, 0); 1404 *ap->a_vpp = NULL; 1405 } else { 1406 error = hammer_get_vnode(nip, ap->a_vpp); 1407 hammer_rel_inode(nip, 0); 1408 if (error == 0) { 1409 cache_setunresolved(ap->a_nch); 1410 cache_setvp(ap->a_nch, *ap->a_vpp); 1411 } 1412 } 1413 hammer_done_transaction(&trans); 1414 if (error == 0) 1415 hammer_knote(ap->a_dvp, NOTE_WRITE); 1416 return (error); 1417 } 1418 1419 /* 1420 * hammer_vop_open { vp, mode, cred, fp } 1421 */ 1422 static 1423 int 1424 hammer_vop_open(struct vop_open_args *ap) 1425 { 1426 hammer_inode_t ip; 1427 1428 ++hammer_stats_file_iopsr; 1429 ip = VTOI(ap->a_vp); 1430 1431 if ((ap->a_mode & FWRITE) && (ip->flags & HAMMER_INODE_RO)) 1432 return (EROFS); 1433 return(vop_stdopen(ap)); 1434 } 1435 1436 /* 1437 * hammer_vop_print { vp } 1438 */ 1439 static 1440 int 1441 hammer_vop_print(struct vop_print_args *ap) 1442 { 1443 return EOPNOTSUPP; 1444 } 1445 1446 /* 1447 * hammer_vop_readdir { vp, uio, cred, *eofflag, *ncookies, off_t **cookies } 1448 */ 1449 static 1450 int 1451 hammer_vop_readdir(struct vop_readdir_args *ap) 1452 { 1453 struct hammer_transaction trans; 1454 struct hammer_cursor cursor; 1455 struct hammer_inode *ip; 1456 struct uio *uio; 1457 hammer_base_elm_t base; 1458 int error; 1459 int cookie_index; 1460 int ncookies; 1461 off_t *cookies; 1462 off_t saveoff; 1463 int r; 1464 int dtype; 1465 1466 ++hammer_stats_file_iopsr; 1467 ip = VTOI(ap->a_vp); 1468 uio = ap->a_uio; 1469 saveoff = uio->uio_offset; 1470 1471 if (ap->a_ncookies) { 1472 ncookies = uio->uio_resid / 16 + 1; 1473 if (ncookies > 1024) 1474 ncookies = 1024; 1475 cookies = kmalloc(ncookies * sizeof(off_t), M_TEMP, M_WAITOK); 1476 cookie_index = 0; 1477 } else { 1478 ncookies = -1; 1479 cookies = NULL; 1480 cookie_index = 0; 1481 } 1482 1483 hammer_simple_transaction(&trans, ip->hmp); 1484 1485 /* 1486 * Handle artificial entries 1487 * 1488 * It should be noted that the minimum value for a directory 1489 * hash key on-media is 0x0000000100000000, so we can use anything 1490 * less then that to represent our 'special' key space. 1491 */ 1492 error = 0; 1493 if (saveoff == 0) { 1494 r = vop_write_dirent(&error, uio, ip->obj_id, DT_DIR, 1, "."); 1495 if (r) 1496 goto done; 1497 if (cookies) 1498 cookies[cookie_index] = saveoff; 1499 ++saveoff; 1500 ++cookie_index; 1501 if (cookie_index == ncookies) 1502 goto done; 1503 } 1504 if (saveoff == 1) { 1505 if (ip->ino_data.parent_obj_id) { 1506 r = vop_write_dirent(&error, uio, 1507 ip->ino_data.parent_obj_id, 1508 DT_DIR, 2, ".."); 1509 } else { 1510 r = vop_write_dirent(&error, uio, 1511 ip->obj_id, DT_DIR, 2, ".."); 1512 } 1513 if (r) 1514 goto done; 1515 if (cookies) 1516 cookies[cookie_index] = saveoff; 1517 ++saveoff; 1518 ++cookie_index; 1519 if (cookie_index == ncookies) 1520 goto done; 1521 } 1522 1523 /* 1524 * Key range (begin and end inclusive) to scan. Directory keys 1525 * directly translate to a 64 bit 'seek' position. 1526 */ 1527 hammer_init_cursor(&trans, &cursor, &ip->cache[1], ip); 1528 cursor.key_beg.localization = ip->obj_localization + 1529 hammer_dir_localization(ip); 1530 cursor.key_beg.obj_id = ip->obj_id; 1531 cursor.key_beg.create_tid = 0; 1532 cursor.key_beg.delete_tid = 0; 1533 cursor.key_beg.rec_type = HAMMER_RECTYPE_DIRENTRY; 1534 cursor.key_beg.obj_type = 0; 1535 cursor.key_beg.key = saveoff; 1536 1537 cursor.key_end = cursor.key_beg; 1538 cursor.key_end.key = HAMMER_MAX_KEY; 1539 cursor.asof = ip->obj_asof; 1540 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE | HAMMER_CURSOR_ASOF; 1541 1542 error = hammer_ip_first(&cursor); 1543 1544 while (error == 0) { 1545 error = hammer_ip_resolve_data(&cursor); 1546 if (error) 1547 break; 1548 base = &cursor.leaf->base; 1549 saveoff = base->key; 1550 KKASSERT(cursor.leaf->data_len > HAMMER_ENTRY_NAME_OFF); 1551 1552 if (base->obj_id != ip->obj_id) 1553 panic("readdir: bad record at %p", cursor.node); 1554 1555 /* 1556 * Convert pseudo-filesystems into softlinks 1557 */ 1558 dtype = hammer_get_dtype(cursor.leaf->base.obj_type); 1559 r = vop_write_dirent( 1560 &error, uio, cursor.data->entry.obj_id, 1561 dtype, 1562 cursor.leaf->data_len - HAMMER_ENTRY_NAME_OFF , 1563 (void *)cursor.data->entry.name); 1564 if (r) 1565 break; 1566 ++saveoff; 1567 if (cookies) 1568 cookies[cookie_index] = base->key; 1569 ++cookie_index; 1570 if (cookie_index == ncookies) 1571 break; 1572 error = hammer_ip_next(&cursor); 1573 } 1574 hammer_done_cursor(&cursor); 1575 1576 done: 1577 hammer_done_transaction(&trans); 1578 1579 if (ap->a_eofflag) 1580 *ap->a_eofflag = (error == ENOENT); 1581 uio->uio_offset = saveoff; 1582 if (error && cookie_index == 0) { 1583 if (error == ENOENT) 1584 error = 0; 1585 if (cookies) { 1586 kfree(cookies, M_TEMP); 1587 *ap->a_ncookies = 0; 1588 *ap->a_cookies = NULL; 1589 } 1590 } else { 1591 if (error == ENOENT) 1592 error = 0; 1593 if (cookies) { 1594 *ap->a_ncookies = cookie_index; 1595 *ap->a_cookies = cookies; 1596 } 1597 } 1598 return(error); 1599 } 1600 1601 /* 1602 * hammer_vop_readlink { vp, uio, cred } 1603 */ 1604 static 1605 int 1606 hammer_vop_readlink(struct vop_readlink_args *ap) 1607 { 1608 struct hammer_transaction trans; 1609 struct hammer_cursor cursor; 1610 struct hammer_inode *ip; 1611 char buf[32]; 1612 u_int32_t localization; 1613 hammer_pseudofs_inmem_t pfsm; 1614 int error; 1615 1616 ip = VTOI(ap->a_vp); 1617 1618 /* 1619 * Shortcut if the symlink data was stuffed into ino_data. 1620 * 1621 * Also expand special "@@PFS%05d" softlinks (expansion only 1622 * occurs for non-historical (current) accesses made from the 1623 * primary filesystem). 1624 */ 1625 if (ip->ino_data.size <= HAMMER_INODE_BASESYMLEN) { 1626 char *ptr; 1627 int bytes; 1628 1629 ptr = ip->ino_data.ext.symlink; 1630 bytes = (int)ip->ino_data.size; 1631 if (bytes == 10 && 1632 ip->obj_asof == HAMMER_MAX_TID && 1633 ip->obj_localization == 0 && 1634 strncmp(ptr, "@@PFS", 5) == 0) { 1635 hammer_simple_transaction(&trans, ip->hmp); 1636 bcopy(ptr + 5, buf, 5); 1637 buf[5] = 0; 1638 localization = strtoul(buf, NULL, 10) << 16; 1639 pfsm = hammer_load_pseudofs(&trans, localization, 1640 &error); 1641 if (error == 0) { 1642 if (pfsm->pfsd.mirror_flags & 1643 HAMMER_PFSD_SLAVE) { 1644 /* vap->va_size == 26 */ 1645 ksnprintf(buf, sizeof(buf), 1646 "@@0x%016llx:%05d", 1647 (long long)pfsm->pfsd.sync_end_tid, 1648 localization >> 16); 1649 } else { 1650 /* vap->va_size == 10 */ 1651 ksnprintf(buf, sizeof(buf), 1652 "@@-1:%05d", 1653 localization >> 16); 1654 #if 0 1655 ksnprintf(buf, sizeof(buf), 1656 "@@0x%016llx:%05d", 1657 (long long)HAMMER_MAX_TID, 1658 localization >> 16); 1659 #endif 1660 } 1661 ptr = buf; 1662 bytes = strlen(buf); 1663 } 1664 if (pfsm) 1665 hammer_rel_pseudofs(trans.hmp, pfsm); 1666 hammer_done_transaction(&trans); 1667 } 1668 error = uiomove(ptr, bytes, ap->a_uio); 1669 return(error); 1670 } 1671 1672 /* 1673 * Long version 1674 */ 1675 hammer_simple_transaction(&trans, ip->hmp); 1676 ++hammer_stats_file_iopsr; 1677 hammer_init_cursor(&trans, &cursor, &ip->cache[1], ip); 1678 1679 /* 1680 * Key range (begin and end inclusive) to scan. Directory keys 1681 * directly translate to a 64 bit 'seek' position. 1682 */ 1683 cursor.key_beg.localization = ip->obj_localization + 1684 HAMMER_LOCALIZE_MISC; 1685 cursor.key_beg.obj_id = ip->obj_id; 1686 cursor.key_beg.create_tid = 0; 1687 cursor.key_beg.delete_tid = 0; 1688 cursor.key_beg.rec_type = HAMMER_RECTYPE_FIX; 1689 cursor.key_beg.obj_type = 0; 1690 cursor.key_beg.key = HAMMER_FIXKEY_SYMLINK; 1691 cursor.asof = ip->obj_asof; 1692 cursor.flags |= HAMMER_CURSOR_ASOF; 1693 1694 error = hammer_ip_lookup(&cursor); 1695 if (error == 0) { 1696 error = hammer_ip_resolve_data(&cursor); 1697 if (error == 0) { 1698 KKASSERT(cursor.leaf->data_len >= 1699 HAMMER_SYMLINK_NAME_OFF); 1700 error = uiomove(cursor.data->symlink.name, 1701 cursor.leaf->data_len - 1702 HAMMER_SYMLINK_NAME_OFF, 1703 ap->a_uio); 1704 } 1705 } 1706 hammer_done_cursor(&cursor); 1707 hammer_done_transaction(&trans); 1708 return(error); 1709 } 1710 1711 /* 1712 * hammer_vop_nremove { nch, dvp, cred } 1713 */ 1714 static 1715 int 1716 hammer_vop_nremove(struct vop_nremove_args *ap) 1717 { 1718 struct hammer_transaction trans; 1719 struct hammer_inode *dip; 1720 int error; 1721 1722 dip = VTOI(ap->a_dvp); 1723 1724 if (hammer_nohistory(dip) == 0 && 1725 (error = hammer_checkspace(dip->hmp, HAMMER_CHKSPC_REMOVE)) != 0) { 1726 return (error); 1727 } 1728 1729 hammer_start_transaction(&trans, dip->hmp); 1730 ++hammer_stats_file_iopsw; 1731 error = hammer_dounlink(&trans, ap->a_nch, ap->a_dvp, ap->a_cred, 0, 0); 1732 hammer_done_transaction(&trans); 1733 if (error == 0) 1734 hammer_knote(ap->a_dvp, NOTE_WRITE); 1735 return (error); 1736 } 1737 1738 /* 1739 * hammer_vop_nrename { fnch, tnch, fdvp, tdvp, cred } 1740 */ 1741 static 1742 int 1743 hammer_vop_nrename(struct vop_nrename_args *ap) 1744 { 1745 struct hammer_transaction trans; 1746 struct namecache *fncp; 1747 struct namecache *tncp; 1748 struct hammer_inode *fdip; 1749 struct hammer_inode *tdip; 1750 struct hammer_inode *ip; 1751 struct hammer_cursor cursor; 1752 int64_t namekey; 1753 u_int32_t max_iterations; 1754 int nlen, error; 1755 1756 if (ap->a_fdvp->v_mount != ap->a_tdvp->v_mount) 1757 return(EXDEV); 1758 if (ap->a_fdvp->v_mount != ap->a_fnch->ncp->nc_vp->v_mount) 1759 return(EXDEV); 1760 1761 fdip = VTOI(ap->a_fdvp); 1762 tdip = VTOI(ap->a_tdvp); 1763 fncp = ap->a_fnch->ncp; 1764 tncp = ap->a_tnch->ncp; 1765 ip = VTOI(fncp->nc_vp); 1766 KKASSERT(ip != NULL); 1767 1768 if (fdip->obj_localization != tdip->obj_localization) 1769 return(EXDEV); 1770 if (fdip->obj_localization != ip->obj_localization) 1771 return(EXDEV); 1772 1773 if (fdip->flags & HAMMER_INODE_RO) 1774 return (EROFS); 1775 if (tdip->flags & HAMMER_INODE_RO) 1776 return (EROFS); 1777 if (ip->flags & HAMMER_INODE_RO) 1778 return (EROFS); 1779 if ((error = hammer_checkspace(fdip->hmp, HAMMER_CHKSPC_CREATE)) != 0) 1780 return (error); 1781 1782 hammer_start_transaction(&trans, fdip->hmp); 1783 ++hammer_stats_file_iopsw; 1784 1785 /* 1786 * Remove tncp from the target directory and then link ip as 1787 * tncp. XXX pass trans to dounlink 1788 * 1789 * Force the inode sync-time to match the transaction so it is 1790 * in-sync with the creation of the target directory entry. 1791 */ 1792 error = hammer_dounlink(&trans, ap->a_tnch, ap->a_tdvp, 1793 ap->a_cred, 0, -1); 1794 if (error == 0 || error == ENOENT) { 1795 error = hammer_ip_add_directory(&trans, tdip, 1796 tncp->nc_name, tncp->nc_nlen, 1797 ip); 1798 if (error == 0) { 1799 ip->ino_data.parent_obj_id = tdip->obj_id; 1800 ip->ino_data.ctime = trans.time; 1801 hammer_modify_inode(ip, HAMMER_INODE_DDIRTY); 1802 } 1803 } 1804 if (error) 1805 goto failed; /* XXX */ 1806 1807 /* 1808 * Locate the record in the originating directory and remove it. 1809 * 1810 * Calculate the namekey and setup the key range for the scan. This 1811 * works kinda like a chained hash table where the lower 32 bits 1812 * of the namekey synthesize the chain. 1813 * 1814 * The key range is inclusive of both key_beg and key_end. 1815 */ 1816 namekey = hammer_directory_namekey(fdip, fncp->nc_name, fncp->nc_nlen, 1817 &max_iterations); 1818 retry: 1819 hammer_init_cursor(&trans, &cursor, &fdip->cache[1], fdip); 1820 cursor.key_beg.localization = fdip->obj_localization + 1821 hammer_dir_localization(fdip); 1822 cursor.key_beg.obj_id = fdip->obj_id; 1823 cursor.key_beg.key = namekey; 1824 cursor.key_beg.create_tid = 0; 1825 cursor.key_beg.delete_tid = 0; 1826 cursor.key_beg.rec_type = HAMMER_RECTYPE_DIRENTRY; 1827 cursor.key_beg.obj_type = 0; 1828 1829 cursor.key_end = cursor.key_beg; 1830 cursor.key_end.key += max_iterations; 1831 cursor.asof = fdip->obj_asof; 1832 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE | HAMMER_CURSOR_ASOF; 1833 1834 /* 1835 * Scan all matching records (the chain), locate the one matching 1836 * the requested path component. 1837 * 1838 * The hammer_ip_*() functions merge in-memory records with on-disk 1839 * records for the purposes of the search. 1840 */ 1841 error = hammer_ip_first(&cursor); 1842 while (error == 0) { 1843 if (hammer_ip_resolve_data(&cursor) != 0) 1844 break; 1845 nlen = cursor.leaf->data_len - HAMMER_ENTRY_NAME_OFF; 1846 KKASSERT(nlen > 0); 1847 if (fncp->nc_nlen == nlen && 1848 bcmp(fncp->nc_name, cursor.data->entry.name, nlen) == 0) { 1849 break; 1850 } 1851 error = hammer_ip_next(&cursor); 1852 } 1853 1854 /* 1855 * If all is ok we have to get the inode so we can adjust nlinks. 1856 * 1857 * WARNING: hammer_ip_del_directory() may have to terminate the 1858 * cursor to avoid a recursion. It's ok to call hammer_done_cursor() 1859 * twice. 1860 */ 1861 if (error == 0) 1862 error = hammer_ip_del_directory(&trans, &cursor, fdip, ip); 1863 1864 /* 1865 * XXX A deadlock here will break rename's atomicy for the purposes 1866 * of crash recovery. 1867 */ 1868 if (error == EDEADLK) { 1869 hammer_done_cursor(&cursor); 1870 goto retry; 1871 } 1872 1873 /* 1874 * Cleanup and tell the kernel that the rename succeeded. 1875 */ 1876 hammer_done_cursor(&cursor); 1877 if (error == 0) { 1878 cache_rename(ap->a_fnch, ap->a_tnch); 1879 hammer_knote(ap->a_fdvp, NOTE_WRITE); 1880 hammer_knote(ap->a_tdvp, NOTE_WRITE); 1881 if (ip->vp) 1882 hammer_knote(ip->vp, NOTE_RENAME); 1883 } 1884 1885 failed: 1886 hammer_done_transaction(&trans); 1887 return (error); 1888 } 1889 1890 /* 1891 * hammer_vop_nrmdir { nch, dvp, cred } 1892 */ 1893 static 1894 int 1895 hammer_vop_nrmdir(struct vop_nrmdir_args *ap) 1896 { 1897 struct hammer_transaction trans; 1898 struct hammer_inode *dip; 1899 int error; 1900 1901 dip = VTOI(ap->a_dvp); 1902 1903 if (hammer_nohistory(dip) == 0 && 1904 (error = hammer_checkspace(dip->hmp, HAMMER_CHKSPC_REMOVE)) != 0) { 1905 return (error); 1906 } 1907 1908 hammer_start_transaction(&trans, dip->hmp); 1909 ++hammer_stats_file_iopsw; 1910 error = hammer_dounlink(&trans, ap->a_nch, ap->a_dvp, ap->a_cred, 0, 1); 1911 hammer_done_transaction(&trans); 1912 if (error == 0) 1913 hammer_knote(ap->a_dvp, NOTE_WRITE | NOTE_LINK); 1914 return (error); 1915 } 1916 1917 /* 1918 * hammer_vop_markatime { vp, cred } 1919 */ 1920 static 1921 int 1922 hammer_vop_markatime(struct vop_markatime_args *ap) 1923 { 1924 struct hammer_transaction trans; 1925 struct hammer_inode *ip; 1926 1927 ip = VTOI(ap->a_vp); 1928 if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY) 1929 return (EROFS); 1930 if (ip->flags & HAMMER_INODE_RO) 1931 return (EROFS); 1932 if (ip->hmp->mp->mnt_flag & MNT_NOATIME) 1933 return (0); 1934 hammer_start_transaction(&trans, ip->hmp); 1935 ++hammer_stats_file_iopsw; 1936 1937 ip->ino_data.atime = trans.time; 1938 hammer_modify_inode(ip, HAMMER_INODE_ATIME); 1939 hammer_done_transaction(&trans); 1940 hammer_knote(ap->a_vp, NOTE_ATTRIB); 1941 return (0); 1942 } 1943 1944 /* 1945 * hammer_vop_setattr { vp, vap, cred } 1946 */ 1947 static 1948 int 1949 hammer_vop_setattr(struct vop_setattr_args *ap) 1950 { 1951 struct hammer_transaction trans; 1952 struct vattr *vap; 1953 struct hammer_inode *ip; 1954 int modflags; 1955 int error; 1956 int truncating; 1957 int blksize; 1958 int kflags; 1959 int64_t aligned_size; 1960 u_int32_t flags; 1961 1962 vap = ap->a_vap; 1963 ip = ap->a_vp->v_data; 1964 modflags = 0; 1965 kflags = 0; 1966 1967 if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY) 1968 return(EROFS); 1969 if (ip->flags & HAMMER_INODE_RO) 1970 return (EROFS); 1971 if (hammer_nohistory(ip) == 0 && 1972 (error = hammer_checkspace(ip->hmp, HAMMER_CHKSPC_REMOVE)) != 0) { 1973 return (error); 1974 } 1975 1976 hammer_start_transaction(&trans, ip->hmp); 1977 ++hammer_stats_file_iopsw; 1978 error = 0; 1979 1980 if (vap->va_flags != VNOVAL) { 1981 flags = ip->ino_data.uflags; 1982 error = vop_helper_setattr_flags(&flags, vap->va_flags, 1983 hammer_to_unix_xid(&ip->ino_data.uid), 1984 ap->a_cred); 1985 if (error == 0) { 1986 if (ip->ino_data.uflags != flags) { 1987 ip->ino_data.uflags = flags; 1988 ip->ino_data.ctime = trans.time; 1989 modflags |= HAMMER_INODE_DDIRTY; 1990 kflags |= NOTE_ATTRIB; 1991 } 1992 if (ip->ino_data.uflags & (IMMUTABLE | APPEND)) { 1993 error = 0; 1994 goto done; 1995 } 1996 } 1997 goto done; 1998 } 1999 if (ip->ino_data.uflags & (IMMUTABLE | APPEND)) { 2000 error = EPERM; 2001 goto done; 2002 } 2003 if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) { 2004 mode_t cur_mode = ip->ino_data.mode; 2005 uid_t cur_uid = hammer_to_unix_xid(&ip->ino_data.uid); 2006 gid_t cur_gid = hammer_to_unix_xid(&ip->ino_data.gid); 2007 uuid_t uuid_uid; 2008 uuid_t uuid_gid; 2009 2010 error = vop_helper_chown(ap->a_vp, vap->va_uid, vap->va_gid, 2011 ap->a_cred, 2012 &cur_uid, &cur_gid, &cur_mode); 2013 if (error == 0) { 2014 hammer_guid_to_uuid(&uuid_uid, cur_uid); 2015 hammer_guid_to_uuid(&uuid_gid, cur_gid); 2016 if (bcmp(&uuid_uid, &ip->ino_data.uid, 2017 sizeof(uuid_uid)) || 2018 bcmp(&uuid_gid, &ip->ino_data.gid, 2019 sizeof(uuid_gid)) || 2020 ip->ino_data.mode != cur_mode 2021 ) { 2022 ip->ino_data.uid = uuid_uid; 2023 ip->ino_data.gid = uuid_gid; 2024 ip->ino_data.mode = cur_mode; 2025 ip->ino_data.ctime = trans.time; 2026 modflags |= HAMMER_INODE_DDIRTY; 2027 } 2028 kflags |= NOTE_ATTRIB; 2029 } 2030 } 2031 while (vap->va_size != VNOVAL && ip->ino_data.size != vap->va_size) { 2032 switch(ap->a_vp->v_type) { 2033 case VREG: 2034 if (vap->va_size == ip->ino_data.size) 2035 break; 2036 /* 2037 * XXX break atomicy, we can deadlock the backend 2038 * if we do not release the lock. Probably not a 2039 * big deal here. 2040 */ 2041 blksize = hammer_blocksize(vap->va_size); 2042 if (vap->va_size < ip->ino_data.size) { 2043 vtruncbuf(ap->a_vp, vap->va_size, blksize); 2044 truncating = 1; 2045 kflags |= NOTE_WRITE; 2046 } else { 2047 vnode_pager_setsize(ap->a_vp, vap->va_size); 2048 truncating = 0; 2049 kflags |= NOTE_WRITE | NOTE_EXTEND; 2050 } 2051 ip->ino_data.size = vap->va_size; 2052 ip->ino_data.mtime = trans.time; 2053 modflags |= HAMMER_INODE_MTIME | HAMMER_INODE_DDIRTY; 2054 2055 /* 2056 * on-media truncation is cached in the inode until 2057 * the inode is synchronized. 2058 */ 2059 if (truncating) { 2060 hammer_ip_frontend_trunc(ip, vap->va_size); 2061 #ifdef DEBUG_TRUNCATE 2062 if (HammerTruncIp == NULL) 2063 HammerTruncIp = ip; 2064 #endif 2065 if ((ip->flags & HAMMER_INODE_TRUNCATED) == 0) { 2066 ip->flags |= HAMMER_INODE_TRUNCATED; 2067 ip->trunc_off = vap->va_size; 2068 #ifdef DEBUG_TRUNCATE 2069 if (ip == HammerTruncIp) 2070 kprintf("truncate1 %016llx\n", 2071 (long long)ip->trunc_off); 2072 #endif 2073 } else if (ip->trunc_off > vap->va_size) { 2074 ip->trunc_off = vap->va_size; 2075 #ifdef DEBUG_TRUNCATE 2076 if (ip == HammerTruncIp) 2077 kprintf("truncate2 %016llx\n", 2078 (long long)ip->trunc_off); 2079 #endif 2080 } else { 2081 #ifdef DEBUG_TRUNCATE 2082 if (ip == HammerTruncIp) 2083 kprintf("truncate3 %016llx (ignored)\n", 2084 (long long)vap->va_size); 2085 #endif 2086 } 2087 } 2088 2089 /* 2090 * If truncating we have to clean out a portion of 2091 * the last block on-disk. We do this in the 2092 * front-end buffer cache. 2093 */ 2094 aligned_size = (vap->va_size + (blksize - 1)) & 2095 ~(int64_t)(blksize - 1); 2096 if (truncating && vap->va_size < aligned_size) { 2097 struct buf *bp; 2098 int offset; 2099 2100 aligned_size -= blksize; 2101 2102 offset = (int)vap->va_size & (blksize - 1); 2103 error = bread(ap->a_vp, aligned_size, 2104 blksize, &bp); 2105 hammer_ip_frontend_trunc(ip, aligned_size); 2106 if (error == 0) { 2107 bzero(bp->b_data + offset, 2108 blksize - offset); 2109 /* must de-cache direct-io offset */ 2110 bp->b_bio2.bio_offset = NOOFFSET; 2111 bdwrite(bp); 2112 } else { 2113 kprintf("ERROR %d\n", error); 2114 brelse(bp); 2115 } 2116 } 2117 break; 2118 case VDATABASE: 2119 if ((ip->flags & HAMMER_INODE_TRUNCATED) == 0) { 2120 ip->flags |= HAMMER_INODE_TRUNCATED; 2121 ip->trunc_off = vap->va_size; 2122 } else if (ip->trunc_off > vap->va_size) { 2123 ip->trunc_off = vap->va_size; 2124 } 2125 hammer_ip_frontend_trunc(ip, vap->va_size); 2126 ip->ino_data.size = vap->va_size; 2127 ip->ino_data.mtime = trans.time; 2128 modflags |= HAMMER_INODE_MTIME | HAMMER_INODE_DDIRTY; 2129 kflags |= NOTE_ATTRIB; 2130 break; 2131 default: 2132 error = EINVAL; 2133 goto done; 2134 } 2135 break; 2136 } 2137 if (vap->va_atime.tv_sec != VNOVAL) { 2138 ip->ino_data.atime = hammer_timespec_to_time(&vap->va_atime); 2139 modflags |= HAMMER_INODE_ATIME; 2140 kflags |= NOTE_ATTRIB; 2141 } 2142 if (vap->va_mtime.tv_sec != VNOVAL) { 2143 ip->ino_data.mtime = hammer_timespec_to_time(&vap->va_mtime); 2144 modflags |= HAMMER_INODE_MTIME; 2145 kflags |= NOTE_ATTRIB; 2146 } 2147 if (vap->va_mode != (mode_t)VNOVAL) { 2148 mode_t cur_mode = ip->ino_data.mode; 2149 uid_t cur_uid = hammer_to_unix_xid(&ip->ino_data.uid); 2150 gid_t cur_gid = hammer_to_unix_xid(&ip->ino_data.gid); 2151 2152 error = vop_helper_chmod(ap->a_vp, vap->va_mode, ap->a_cred, 2153 cur_uid, cur_gid, &cur_mode); 2154 if (error == 0 && ip->ino_data.mode != cur_mode) { 2155 ip->ino_data.mode = cur_mode; 2156 ip->ino_data.ctime = trans.time; 2157 modflags |= HAMMER_INODE_DDIRTY; 2158 kflags |= NOTE_ATTRIB; 2159 } 2160 } 2161 done: 2162 if (error == 0) 2163 hammer_modify_inode(ip, modflags); 2164 hammer_done_transaction(&trans); 2165 hammer_knote(ap->a_vp, kflags); 2166 return (error); 2167 } 2168 2169 /* 2170 * hammer_vop_nsymlink { nch, dvp, vpp, cred, vap, target } 2171 */ 2172 static 2173 int 2174 hammer_vop_nsymlink(struct vop_nsymlink_args *ap) 2175 { 2176 struct hammer_transaction trans; 2177 struct hammer_inode *dip; 2178 struct hammer_inode *nip; 2179 struct nchandle *nch; 2180 hammer_record_t record; 2181 int error; 2182 int bytes; 2183 2184 ap->a_vap->va_type = VLNK; 2185 2186 nch = ap->a_nch; 2187 dip = VTOI(ap->a_dvp); 2188 2189 if (dip->flags & HAMMER_INODE_RO) 2190 return (EROFS); 2191 if ((error = hammer_checkspace(dip->hmp, HAMMER_CHKSPC_CREATE)) != 0) 2192 return (error); 2193 2194 /* 2195 * Create a transaction to cover the operations we perform. 2196 */ 2197 hammer_start_transaction(&trans, dip->hmp); 2198 ++hammer_stats_file_iopsw; 2199 2200 /* 2201 * Create a new filesystem object of the requested type. The 2202 * returned inode will be referenced but not locked. 2203 */ 2204 2205 error = hammer_create_inode(&trans, ap->a_vap, ap->a_cred, 2206 dip, nch->ncp->nc_name, nch->ncp->nc_nlen, 2207 NULL, &nip); 2208 if (error) { 2209 hammer_done_transaction(&trans); 2210 *ap->a_vpp = NULL; 2211 return (error); 2212 } 2213 2214 /* 2215 * Add a record representing the symlink. symlink stores the link 2216 * as pure data, not a string, and is no \0 terminated. 2217 */ 2218 if (error == 0) { 2219 bytes = strlen(ap->a_target); 2220 2221 if (bytes <= HAMMER_INODE_BASESYMLEN) { 2222 bcopy(ap->a_target, nip->ino_data.ext.symlink, bytes); 2223 } else { 2224 record = hammer_alloc_mem_record(nip, bytes); 2225 record->type = HAMMER_MEM_RECORD_GENERAL; 2226 2227 record->leaf.base.localization = nip->obj_localization + 2228 HAMMER_LOCALIZE_MISC; 2229 record->leaf.base.key = HAMMER_FIXKEY_SYMLINK; 2230 record->leaf.base.rec_type = HAMMER_RECTYPE_FIX; 2231 record->leaf.data_len = bytes; 2232 KKASSERT(HAMMER_SYMLINK_NAME_OFF == 0); 2233 bcopy(ap->a_target, record->data->symlink.name, bytes); 2234 error = hammer_ip_add_record(&trans, record); 2235 } 2236 2237 /* 2238 * Set the file size to the length of the link. 2239 */ 2240 if (error == 0) { 2241 nip->ino_data.size = bytes; 2242 hammer_modify_inode(nip, HAMMER_INODE_DDIRTY); 2243 } 2244 } 2245 if (error == 0) 2246 error = hammer_ip_add_directory(&trans, dip, nch->ncp->nc_name, 2247 nch->ncp->nc_nlen, nip); 2248 2249 /* 2250 * Finish up. 2251 */ 2252 if (error) { 2253 hammer_rel_inode(nip, 0); 2254 *ap->a_vpp = NULL; 2255 } else { 2256 error = hammer_get_vnode(nip, ap->a_vpp); 2257 hammer_rel_inode(nip, 0); 2258 if (error == 0) { 2259 cache_setunresolved(ap->a_nch); 2260 cache_setvp(ap->a_nch, *ap->a_vpp); 2261 hammer_knote(ap->a_dvp, NOTE_WRITE); 2262 } 2263 } 2264 hammer_done_transaction(&trans); 2265 return (error); 2266 } 2267 2268 /* 2269 * hammer_vop_nwhiteout { nch, dvp, cred, flags } 2270 */ 2271 static 2272 int 2273 hammer_vop_nwhiteout(struct vop_nwhiteout_args *ap) 2274 { 2275 struct hammer_transaction trans; 2276 struct hammer_inode *dip; 2277 int error; 2278 2279 dip = VTOI(ap->a_dvp); 2280 2281 if (hammer_nohistory(dip) == 0 && 2282 (error = hammer_checkspace(dip->hmp, HAMMER_CHKSPC_CREATE)) != 0) { 2283 return (error); 2284 } 2285 2286 hammer_start_transaction(&trans, dip->hmp); 2287 ++hammer_stats_file_iopsw; 2288 error = hammer_dounlink(&trans, ap->a_nch, ap->a_dvp, 2289 ap->a_cred, ap->a_flags, -1); 2290 hammer_done_transaction(&trans); 2291 2292 return (error); 2293 } 2294 2295 /* 2296 * hammer_vop_ioctl { vp, command, data, fflag, cred } 2297 */ 2298 static 2299 int 2300 hammer_vop_ioctl(struct vop_ioctl_args *ap) 2301 { 2302 struct hammer_inode *ip = ap->a_vp->v_data; 2303 2304 ++hammer_stats_file_iopsr; 2305 return(hammer_ioctl(ip, ap->a_command, ap->a_data, 2306 ap->a_fflag, ap->a_cred)); 2307 } 2308 2309 static 2310 int 2311 hammer_vop_mountctl(struct vop_mountctl_args *ap) 2312 { 2313 static const struct mountctl_opt extraopt[] = { 2314 { HMNT_NOHISTORY, "nohistory" }, 2315 { HMNT_MASTERID, "master" }, 2316 { 0, NULL} 2317 2318 }; 2319 struct hammer_mount *hmp; 2320 struct mount *mp; 2321 int usedbytes; 2322 int error; 2323 2324 error = 0; 2325 usedbytes = 0; 2326 mp = ap->a_head.a_ops->head.vv_mount; 2327 KKASSERT(mp->mnt_data != NULL); 2328 hmp = (struct hammer_mount *)mp->mnt_data; 2329 2330 switch(ap->a_op) { 2331 2332 case MOUNTCTL_SET_EXPORT: 2333 if (ap->a_ctllen != sizeof(struct export_args)) 2334 error = EINVAL; 2335 else 2336 error = hammer_vfs_export(mp, ap->a_op, 2337 (const struct export_args *)ap->a_ctl); 2338 break; 2339 case MOUNTCTL_MOUNTFLAGS: 2340 { 2341 /* 2342 * Call standard mountctl VOP function 2343 * so we get user mount flags. 2344 */ 2345 error = vop_stdmountctl(ap); 2346 if (error) 2347 break; 2348 2349 usedbytes = *ap->a_res; 2350 2351 if (usedbytes > 0 && usedbytes < ap->a_buflen) { 2352 usedbytes += vfs_flagstostr(hmp->hflags, extraopt, ap->a_buf, 2353 ap->a_buflen - usedbytes, 2354 &error); 2355 } 2356 2357 *ap->a_res += usedbytes; 2358 break; 2359 } 2360 default: 2361 error = vop_stdmountctl(ap); 2362 break; 2363 } 2364 return(error); 2365 } 2366 2367 /* 2368 * hammer_vop_strategy { vp, bio } 2369 * 2370 * Strategy call, used for regular file read & write only. Note that the 2371 * bp may represent a cluster. 2372 * 2373 * To simplify operation and allow better optimizations in the future, 2374 * this code does not make any assumptions with regards to buffer alignment 2375 * or size. 2376 */ 2377 static 2378 int 2379 hammer_vop_strategy(struct vop_strategy_args *ap) 2380 { 2381 struct buf *bp; 2382 int error; 2383 2384 bp = ap->a_bio->bio_buf; 2385 2386 switch(bp->b_cmd) { 2387 case BUF_CMD_READ: 2388 error = hammer_vop_strategy_read(ap); 2389 break; 2390 case BUF_CMD_WRITE: 2391 error = hammer_vop_strategy_write(ap); 2392 break; 2393 default: 2394 bp->b_error = error = EINVAL; 2395 bp->b_flags |= B_ERROR; 2396 biodone(ap->a_bio); 2397 break; 2398 } 2399 return (error); 2400 } 2401 2402 /* 2403 * Read from a regular file. Iterate the related records and fill in the 2404 * BIO/BUF. Gaps are zero-filled. 2405 * 2406 * The support code in hammer_object.c should be used to deal with mixed 2407 * in-memory and on-disk records. 2408 * 2409 * NOTE: Can be called from the cluster code with an oversized buf. 2410 * 2411 * XXX atime update 2412 */ 2413 static 2414 int 2415 hammer_vop_strategy_read(struct vop_strategy_args *ap) 2416 { 2417 struct hammer_transaction trans; 2418 struct hammer_inode *ip; 2419 struct hammer_inode *dip; 2420 struct hammer_cursor cursor; 2421 hammer_base_elm_t base; 2422 hammer_off_t disk_offset; 2423 struct bio *bio; 2424 struct bio *nbio; 2425 struct buf *bp; 2426 int64_t rec_offset; 2427 int64_t ran_end; 2428 int64_t tmp64; 2429 int error; 2430 int boff; 2431 int roff; 2432 int n; 2433 2434 bio = ap->a_bio; 2435 bp = bio->bio_buf; 2436 ip = ap->a_vp->v_data; 2437 2438 /* 2439 * The zone-2 disk offset may have been set by the cluster code via 2440 * a BMAP operation, or else should be NOOFFSET. 2441 * 2442 * Checking the high bits for a match against zone-2 should suffice. 2443 */ 2444 nbio = push_bio(bio); 2445 if ((nbio->bio_offset & HAMMER_OFF_ZONE_MASK) == 2446 HAMMER_ZONE_LARGE_DATA) { 2447 error = hammer_io_direct_read(ip->hmp, nbio, NULL); 2448 return (error); 2449 } 2450 2451 /* 2452 * Well, that sucked. Do it the hard way. If all the stars are 2453 * aligned we may still be able to issue a direct-read. 2454 */ 2455 hammer_simple_transaction(&trans, ip->hmp); 2456 hammer_init_cursor(&trans, &cursor, &ip->cache[1], ip); 2457 2458 /* 2459 * Key range (begin and end inclusive) to scan. Note that the key's 2460 * stored in the actual records represent BASE+LEN, not BASE. The 2461 * first record containing bio_offset will have a key > bio_offset. 2462 */ 2463 cursor.key_beg.localization = ip->obj_localization + 2464 HAMMER_LOCALIZE_MISC; 2465 cursor.key_beg.obj_id = ip->obj_id; 2466 cursor.key_beg.create_tid = 0; 2467 cursor.key_beg.delete_tid = 0; 2468 cursor.key_beg.obj_type = 0; 2469 cursor.key_beg.key = bio->bio_offset + 1; 2470 cursor.asof = ip->obj_asof; 2471 cursor.flags |= HAMMER_CURSOR_ASOF; 2472 2473 cursor.key_end = cursor.key_beg; 2474 KKASSERT(ip->ino_data.obj_type == HAMMER_OBJTYPE_REGFILE); 2475 #if 0 2476 if (ip->ino_data.obj_type == HAMMER_OBJTYPE_DBFILE) { 2477 cursor.key_beg.rec_type = HAMMER_RECTYPE_DB; 2478 cursor.key_end.rec_type = HAMMER_RECTYPE_DB; 2479 cursor.key_end.key = 0x7FFFFFFFFFFFFFFFLL; 2480 } else 2481 #endif 2482 { 2483 ran_end = bio->bio_offset + bp->b_bufsize; 2484 cursor.key_beg.rec_type = HAMMER_RECTYPE_DATA; 2485 cursor.key_end.rec_type = HAMMER_RECTYPE_DATA; 2486 tmp64 = ran_end + MAXPHYS + 1; /* work-around GCC-4 bug */ 2487 if (tmp64 < ran_end) 2488 cursor.key_end.key = 0x7FFFFFFFFFFFFFFFLL; 2489 else 2490 cursor.key_end.key = ran_end + MAXPHYS + 1; 2491 } 2492 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE; 2493 2494 error = hammer_ip_first(&cursor); 2495 boff = 0; 2496 2497 while (error == 0) { 2498 /* 2499 * Get the base file offset of the record. The key for 2500 * data records is (base + bytes) rather then (base). 2501 */ 2502 base = &cursor.leaf->base; 2503 rec_offset = base->key - cursor.leaf->data_len; 2504 2505 /* 2506 * Calculate the gap, if any, and zero-fill it. 2507 * 2508 * n is the offset of the start of the record verses our 2509 * current seek offset in the bio. 2510 */ 2511 n = (int)(rec_offset - (bio->bio_offset + boff)); 2512 if (n > 0) { 2513 if (n > bp->b_bufsize - boff) 2514 n = bp->b_bufsize - boff; 2515 bzero((char *)bp->b_data + boff, n); 2516 boff += n; 2517 n = 0; 2518 } 2519 2520 /* 2521 * Calculate the data offset in the record and the number 2522 * of bytes we can copy. 2523 * 2524 * There are two degenerate cases. First, boff may already 2525 * be at bp->b_bufsize. Secondly, the data offset within 2526 * the record may exceed the record's size. 2527 */ 2528 roff = -n; 2529 rec_offset += roff; 2530 n = cursor.leaf->data_len - roff; 2531 if (n <= 0) { 2532 kprintf("strategy_read: bad n=%d roff=%d\n", n, roff); 2533 n = 0; 2534 } else if (n > bp->b_bufsize - boff) { 2535 n = bp->b_bufsize - boff; 2536 } 2537 2538 /* 2539 * Deal with cached truncations. This cool bit of code 2540 * allows truncate()/ftruncate() to avoid having to sync 2541 * the file. 2542 * 2543 * If the frontend is truncated then all backend records are 2544 * subject to the frontend's truncation. 2545 * 2546 * If the backend is truncated then backend records on-disk 2547 * (but not in-memory) are subject to the backend's 2548 * truncation. In-memory records owned by the backend 2549 * represent data written after the truncation point on the 2550 * backend and must not be truncated. 2551 * 2552 * Truncate operations deal with frontend buffer cache 2553 * buffers and frontend-owned in-memory records synchronously. 2554 */ 2555 if (ip->flags & HAMMER_INODE_TRUNCATED) { 2556 if (hammer_cursor_ondisk(&cursor) || 2557 cursor.iprec->flush_state == HAMMER_FST_FLUSH) { 2558 if (ip->trunc_off <= rec_offset) 2559 n = 0; 2560 else if (ip->trunc_off < rec_offset + n) 2561 n = (int)(ip->trunc_off - rec_offset); 2562 } 2563 } 2564 if (ip->sync_flags & HAMMER_INODE_TRUNCATED) { 2565 if (hammer_cursor_ondisk(&cursor)) { 2566 if (ip->sync_trunc_off <= rec_offset) 2567 n = 0; 2568 else if (ip->sync_trunc_off < rec_offset + n) 2569 n = (int)(ip->sync_trunc_off - rec_offset); 2570 } 2571 } 2572 2573 /* 2574 * Try to issue a direct read into our bio if possible, 2575 * otherwise resolve the element data into a hammer_buffer 2576 * and copy. 2577 * 2578 * The buffer on-disk should be zerod past any real 2579 * truncation point, but may not be for any synthesized 2580 * truncation point from above. 2581 */ 2582 disk_offset = cursor.leaf->data_offset + roff; 2583 if (boff == 0 && n == bp->b_bufsize && 2584 hammer_cursor_ondisk(&cursor) && 2585 (disk_offset & HAMMER_BUFMASK) == 0) { 2586 KKASSERT((disk_offset & HAMMER_OFF_ZONE_MASK) == 2587 HAMMER_ZONE_LARGE_DATA); 2588 nbio->bio_offset = disk_offset; 2589 error = hammer_io_direct_read(trans.hmp, nbio, 2590 cursor.leaf); 2591 goto done; 2592 } else if (n) { 2593 error = hammer_ip_resolve_data(&cursor); 2594 if (error == 0) { 2595 bcopy((char *)cursor.data + roff, 2596 (char *)bp->b_data + boff, n); 2597 } 2598 } 2599 if (error) 2600 break; 2601 2602 /* 2603 * Iterate until we have filled the request. 2604 */ 2605 boff += n; 2606 if (boff == bp->b_bufsize) 2607 break; 2608 error = hammer_ip_next(&cursor); 2609 } 2610 2611 /* 2612 * There may have been a gap after the last record 2613 */ 2614 if (error == ENOENT) 2615 error = 0; 2616 if (error == 0 && boff != bp->b_bufsize) { 2617 KKASSERT(boff < bp->b_bufsize); 2618 bzero((char *)bp->b_data + boff, bp->b_bufsize - boff); 2619 /* boff = bp->b_bufsize; */ 2620 } 2621 bp->b_resid = 0; 2622 bp->b_error = error; 2623 if (error) 2624 bp->b_flags |= B_ERROR; 2625 biodone(ap->a_bio); 2626 2627 done: 2628 /* 2629 * Cache the b-tree node for the last data read in cache[1]. 2630 * 2631 * If we hit the file EOF then also cache the node in the 2632 * governing director's cache[3], it will be used to initialize 2633 * the inode's cache[1] for any inodes looked up via the directory. 2634 * 2635 * This doesn't reduce disk accesses since the B-Tree chain is 2636 * likely cached, but it does reduce cpu overhead when looking 2637 * up file offsets for cpdup/tar/cpio style iterations. 2638 */ 2639 if (cursor.node) 2640 hammer_cache_node(&ip->cache[1], cursor.node); 2641 if (ran_end >= ip->ino_data.size) { 2642 dip = hammer_find_inode(&trans, ip->ino_data.parent_obj_id, 2643 ip->obj_asof, ip->obj_localization); 2644 if (dip) { 2645 hammer_cache_node(&dip->cache[3], cursor.node); 2646 hammer_rel_inode(dip, 0); 2647 } 2648 } 2649 hammer_done_cursor(&cursor); 2650 hammer_done_transaction(&trans); 2651 return(error); 2652 } 2653 2654 /* 2655 * BMAP operation - used to support cluster_read() only. 2656 * 2657 * (struct vnode *vp, off_t loffset, off_t *doffsetp, int *runp, int *runb) 2658 * 2659 * This routine may return EOPNOTSUPP if the opration is not supported for 2660 * the specified offset. The contents of the pointer arguments do not 2661 * need to be initialized in that case. 2662 * 2663 * If a disk address is available and properly aligned return 0 with 2664 * *doffsetp set to the zone-2 address, and *runp / *runb set appropriately 2665 * to the run-length relative to that offset. Callers may assume that 2666 * *doffsetp is valid if 0 is returned, even if *runp is not sufficiently 2667 * large, so return EOPNOTSUPP if it is not sufficiently large. 2668 */ 2669 static 2670 int 2671 hammer_vop_bmap(struct vop_bmap_args *ap) 2672 { 2673 struct hammer_transaction trans; 2674 struct hammer_inode *ip; 2675 struct hammer_cursor cursor; 2676 hammer_base_elm_t base; 2677 int64_t rec_offset; 2678 int64_t ran_end; 2679 int64_t tmp64; 2680 int64_t base_offset; 2681 int64_t base_disk_offset; 2682 int64_t last_offset; 2683 hammer_off_t last_disk_offset; 2684 hammer_off_t disk_offset; 2685 int rec_len; 2686 int error; 2687 int blksize; 2688 2689 ++hammer_stats_file_iopsr; 2690 ip = ap->a_vp->v_data; 2691 2692 /* 2693 * We can only BMAP regular files. We can't BMAP database files, 2694 * directories, etc. 2695 */ 2696 if (ip->ino_data.obj_type != HAMMER_OBJTYPE_REGFILE) 2697 return(EOPNOTSUPP); 2698 2699 /* 2700 * bmap is typically called with runp/runb both NULL when used 2701 * for writing. We do not support BMAP for writing atm. 2702 */ 2703 if (ap->a_cmd != BUF_CMD_READ) 2704 return(EOPNOTSUPP); 2705 2706 /* 2707 * Scan the B-Tree to acquire blockmap addresses, then translate 2708 * to raw addresses. 2709 */ 2710 hammer_simple_transaction(&trans, ip->hmp); 2711 #if 0 2712 kprintf("bmap_beg %016llx ip->cache %p\n", 2713 (long long)ap->a_loffset, ip->cache[1]); 2714 #endif 2715 hammer_init_cursor(&trans, &cursor, &ip->cache[1], ip); 2716 2717 /* 2718 * Key range (begin and end inclusive) to scan. Note that the key's 2719 * stored in the actual records represent BASE+LEN, not BASE. The 2720 * first record containing bio_offset will have a key > bio_offset. 2721 */ 2722 cursor.key_beg.localization = ip->obj_localization + 2723 HAMMER_LOCALIZE_MISC; 2724 cursor.key_beg.obj_id = ip->obj_id; 2725 cursor.key_beg.create_tid = 0; 2726 cursor.key_beg.delete_tid = 0; 2727 cursor.key_beg.obj_type = 0; 2728 if (ap->a_runb) 2729 cursor.key_beg.key = ap->a_loffset - MAXPHYS + 1; 2730 else 2731 cursor.key_beg.key = ap->a_loffset + 1; 2732 if (cursor.key_beg.key < 0) 2733 cursor.key_beg.key = 0; 2734 cursor.asof = ip->obj_asof; 2735 cursor.flags |= HAMMER_CURSOR_ASOF; 2736 2737 cursor.key_end = cursor.key_beg; 2738 KKASSERT(ip->ino_data.obj_type == HAMMER_OBJTYPE_REGFILE); 2739 2740 ran_end = ap->a_loffset + MAXPHYS; 2741 cursor.key_beg.rec_type = HAMMER_RECTYPE_DATA; 2742 cursor.key_end.rec_type = HAMMER_RECTYPE_DATA; 2743 tmp64 = ran_end + MAXPHYS + 1; /* work-around GCC-4 bug */ 2744 if (tmp64 < ran_end) 2745 cursor.key_end.key = 0x7FFFFFFFFFFFFFFFLL; 2746 else 2747 cursor.key_end.key = ran_end + MAXPHYS + 1; 2748 2749 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE; 2750 2751 error = hammer_ip_first(&cursor); 2752 base_offset = last_offset = 0; 2753 base_disk_offset = last_disk_offset = 0; 2754 2755 while (error == 0) { 2756 /* 2757 * Get the base file offset of the record. The key for 2758 * data records is (base + bytes) rather then (base). 2759 * 2760 * NOTE: rec_offset + rec_len may exceed the end-of-file. 2761 * The extra bytes should be zero on-disk and the BMAP op 2762 * should still be ok. 2763 */ 2764 base = &cursor.leaf->base; 2765 rec_offset = base->key - cursor.leaf->data_len; 2766 rec_len = cursor.leaf->data_len; 2767 2768 /* 2769 * Incorporate any cached truncation. 2770 * 2771 * NOTE: Modifications to rec_len based on synthesized 2772 * truncation points remove the guarantee that any extended 2773 * data on disk is zero (since the truncations may not have 2774 * taken place on-media yet). 2775 */ 2776 if (ip->flags & HAMMER_INODE_TRUNCATED) { 2777 if (hammer_cursor_ondisk(&cursor) || 2778 cursor.iprec->flush_state == HAMMER_FST_FLUSH) { 2779 if (ip->trunc_off <= rec_offset) 2780 rec_len = 0; 2781 else if (ip->trunc_off < rec_offset + rec_len) 2782 rec_len = (int)(ip->trunc_off - rec_offset); 2783 } 2784 } 2785 if (ip->sync_flags & HAMMER_INODE_TRUNCATED) { 2786 if (hammer_cursor_ondisk(&cursor)) { 2787 if (ip->sync_trunc_off <= rec_offset) 2788 rec_len = 0; 2789 else if (ip->sync_trunc_off < rec_offset + rec_len) 2790 rec_len = (int)(ip->sync_trunc_off - rec_offset); 2791 } 2792 } 2793 2794 /* 2795 * Accumulate information. If we have hit a discontiguous 2796 * block reset base_offset unless we are already beyond the 2797 * requested offset. If we are, that's it, we stop. 2798 */ 2799 if (error) 2800 break; 2801 if (hammer_cursor_ondisk(&cursor)) { 2802 disk_offset = cursor.leaf->data_offset; 2803 if (rec_offset != last_offset || 2804 disk_offset != last_disk_offset) { 2805 if (rec_offset > ap->a_loffset) 2806 break; 2807 base_offset = rec_offset; 2808 base_disk_offset = disk_offset; 2809 } 2810 last_offset = rec_offset + rec_len; 2811 last_disk_offset = disk_offset + rec_len; 2812 } 2813 error = hammer_ip_next(&cursor); 2814 } 2815 2816 #if 0 2817 kprintf("BMAP %016llx: %016llx - %016llx\n", 2818 (long long)ap->a_loffset, 2819 (long long)base_offset, 2820 (long long)last_offset); 2821 kprintf("BMAP %16s: %016llx - %016llx\n", "", 2822 (long long)base_disk_offset, 2823 (long long)last_disk_offset); 2824 #endif 2825 2826 if (cursor.node) { 2827 hammer_cache_node(&ip->cache[1], cursor.node); 2828 #if 0 2829 kprintf("bmap_end2 %016llx ip->cache %p\n", 2830 (long long)ap->a_loffset, ip->cache[1]); 2831 #endif 2832 } 2833 hammer_done_cursor(&cursor); 2834 hammer_done_transaction(&trans); 2835 2836 /* 2837 * If we couldn't find any records or the records we did find were 2838 * all behind the requested offset, return failure. A forward 2839 * truncation can leave a hole w/ no on-disk records. 2840 */ 2841 if (last_offset == 0 || last_offset < ap->a_loffset) 2842 return (EOPNOTSUPP); 2843 2844 /* 2845 * Figure out the block size at the requested offset and adjust 2846 * our limits so the cluster_read() does not create inappropriately 2847 * sized buffer cache buffers. 2848 */ 2849 blksize = hammer_blocksize(ap->a_loffset); 2850 if (hammer_blocksize(base_offset) != blksize) { 2851 base_offset = hammer_blockdemarc(base_offset, ap->a_loffset); 2852 } 2853 if (last_offset != ap->a_loffset && 2854 hammer_blocksize(last_offset - 1) != blksize) { 2855 last_offset = hammer_blockdemarc(ap->a_loffset, 2856 last_offset - 1); 2857 } 2858 2859 /* 2860 * Returning EOPNOTSUPP simply prevents the direct-IO optimization 2861 * from occuring. 2862 */ 2863 disk_offset = base_disk_offset + (ap->a_loffset - base_offset); 2864 2865 if ((disk_offset & HAMMER_OFF_ZONE_MASK) != HAMMER_ZONE_LARGE_DATA) { 2866 /* 2867 * Only large-data zones can be direct-IOd 2868 */ 2869 error = EOPNOTSUPP; 2870 } else if ((disk_offset & HAMMER_BUFMASK) || 2871 (last_offset - ap->a_loffset) < blksize) { 2872 /* 2873 * doffsetp is not aligned or the forward run size does 2874 * not cover a whole buffer, disallow the direct I/O. 2875 */ 2876 error = EOPNOTSUPP; 2877 } else { 2878 /* 2879 * We're good. 2880 */ 2881 *ap->a_doffsetp = disk_offset; 2882 if (ap->a_runb) { 2883 *ap->a_runb = ap->a_loffset - base_offset; 2884 KKASSERT(*ap->a_runb >= 0); 2885 } 2886 if (ap->a_runp) { 2887 *ap->a_runp = last_offset - ap->a_loffset; 2888 KKASSERT(*ap->a_runp >= 0); 2889 } 2890 error = 0; 2891 } 2892 return(error); 2893 } 2894 2895 /* 2896 * Write to a regular file. Because this is a strategy call the OS is 2897 * trying to actually get data onto the media. 2898 */ 2899 static 2900 int 2901 hammer_vop_strategy_write(struct vop_strategy_args *ap) 2902 { 2903 hammer_record_t record; 2904 hammer_mount_t hmp; 2905 hammer_inode_t ip; 2906 struct bio *bio; 2907 struct buf *bp; 2908 int blksize; 2909 int bytes; 2910 int error; 2911 2912 bio = ap->a_bio; 2913 bp = bio->bio_buf; 2914 ip = ap->a_vp->v_data; 2915 hmp = ip->hmp; 2916 2917 blksize = hammer_blocksize(bio->bio_offset); 2918 KKASSERT(bp->b_bufsize == blksize); 2919 2920 if (ip->flags & HAMMER_INODE_RO) { 2921 bp->b_error = EROFS; 2922 bp->b_flags |= B_ERROR; 2923 biodone(ap->a_bio); 2924 return(EROFS); 2925 } 2926 2927 /* 2928 * Interlock with inode destruction (no in-kernel or directory 2929 * topology visibility). If we queue new IO while trying to 2930 * destroy the inode we can deadlock the vtrunc call in 2931 * hammer_inode_unloadable_check(). 2932 * 2933 * Besides, there's no point flushing a bp associated with an 2934 * inode that is being destroyed on-media and has no kernel 2935 * references. 2936 */ 2937 if ((ip->flags | ip->sync_flags) & 2938 (HAMMER_INODE_DELETING|HAMMER_INODE_DELETED)) { 2939 bp->b_resid = 0; 2940 biodone(ap->a_bio); 2941 return(0); 2942 } 2943 2944 /* 2945 * Reserve space and issue a direct-write from the front-end. 2946 * NOTE: The direct_io code will hammer_bread/bcopy smaller 2947 * allocations. 2948 * 2949 * An in-memory record will be installed to reference the storage 2950 * until the flusher can get to it. 2951 * 2952 * Since we own the high level bio the front-end will not try to 2953 * do a direct-read until the write completes. 2954 * 2955 * NOTE: The only time we do not reserve a full-sized buffers 2956 * worth of data is if the file is small. We do not try to 2957 * allocate a fragment (from the small-data zone) at the end of 2958 * an otherwise large file as this can lead to wildly separated 2959 * data. 2960 */ 2961 KKASSERT((bio->bio_offset & HAMMER_BUFMASK) == 0); 2962 KKASSERT(bio->bio_offset < ip->ino_data.size); 2963 if (bio->bio_offset || ip->ino_data.size > HAMMER_BUFSIZE / 2) 2964 bytes = bp->b_bufsize; 2965 else 2966 bytes = ((int)ip->ino_data.size + 15) & ~15; 2967 2968 record = hammer_ip_add_bulk(ip, bio->bio_offset, bp->b_data, 2969 bytes, &error); 2970 if (record) { 2971 hammer_io_direct_write(hmp, record, bio); 2972 if (ip->rsv_recs > 1 && hmp->rsv_recs > hammer_limit_recs) 2973 hammer_flush_inode(ip, 0); 2974 } else { 2975 bp->b_bio2.bio_offset = NOOFFSET; 2976 bp->b_error = error; 2977 bp->b_flags |= B_ERROR; 2978 biodone(ap->a_bio); 2979 } 2980 return(error); 2981 } 2982 2983 /* 2984 * dounlink - disconnect a directory entry 2985 * 2986 * XXX whiteout support not really in yet 2987 */ 2988 static int 2989 hammer_dounlink(hammer_transaction_t trans, struct nchandle *nch, 2990 struct vnode *dvp, struct ucred *cred, 2991 int flags, int isdir) 2992 { 2993 struct namecache *ncp; 2994 hammer_inode_t dip; 2995 hammer_inode_t ip; 2996 struct hammer_cursor cursor; 2997 int64_t namekey; 2998 u_int32_t max_iterations; 2999 int nlen, error; 3000 3001 /* 3002 * Calculate the namekey and setup the key range for the scan. This 3003 * works kinda like a chained hash table where the lower 32 bits 3004 * of the namekey synthesize the chain. 3005 * 3006 * The key range is inclusive of both key_beg and key_end. 3007 */ 3008 dip = VTOI(dvp); 3009 ncp = nch->ncp; 3010 3011 if (dip->flags & HAMMER_INODE_RO) 3012 return (EROFS); 3013 3014 namekey = hammer_directory_namekey(dip, ncp->nc_name, ncp->nc_nlen, 3015 &max_iterations); 3016 retry: 3017 hammer_init_cursor(trans, &cursor, &dip->cache[1], dip); 3018 cursor.key_beg.localization = dip->obj_localization + 3019 hammer_dir_localization(dip); 3020 cursor.key_beg.obj_id = dip->obj_id; 3021 cursor.key_beg.key = namekey; 3022 cursor.key_beg.create_tid = 0; 3023 cursor.key_beg.delete_tid = 0; 3024 cursor.key_beg.rec_type = HAMMER_RECTYPE_DIRENTRY; 3025 cursor.key_beg.obj_type = 0; 3026 3027 cursor.key_end = cursor.key_beg; 3028 cursor.key_end.key += max_iterations; 3029 cursor.asof = dip->obj_asof; 3030 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE | HAMMER_CURSOR_ASOF; 3031 3032 /* 3033 * Scan all matching records (the chain), locate the one matching 3034 * the requested path component. info->last_error contains the 3035 * error code on search termination and could be 0, ENOENT, or 3036 * something else. 3037 * 3038 * The hammer_ip_*() functions merge in-memory records with on-disk 3039 * records for the purposes of the search. 3040 */ 3041 error = hammer_ip_first(&cursor); 3042 3043 while (error == 0) { 3044 error = hammer_ip_resolve_data(&cursor); 3045 if (error) 3046 break; 3047 nlen = cursor.leaf->data_len - HAMMER_ENTRY_NAME_OFF; 3048 KKASSERT(nlen > 0); 3049 if (ncp->nc_nlen == nlen && 3050 bcmp(ncp->nc_name, cursor.data->entry.name, nlen) == 0) { 3051 break; 3052 } 3053 error = hammer_ip_next(&cursor); 3054 } 3055 3056 /* 3057 * If all is ok we have to get the inode so we can adjust nlinks. 3058 * To avoid a deadlock with the flusher we must release the inode 3059 * lock on the directory when acquiring the inode for the entry. 3060 * 3061 * If the target is a directory, it must be empty. 3062 */ 3063 if (error == 0) { 3064 hammer_unlock(&cursor.ip->lock); 3065 ip = hammer_get_inode(trans, dip, cursor.data->entry.obj_id, 3066 dip->hmp->asof, 3067 cursor.data->entry.localization, 3068 0, &error); 3069 hammer_lock_sh(&cursor.ip->lock); 3070 if (error == ENOENT) { 3071 kprintf("HAMMER: WARNING: Removing " 3072 "dirent w/missing inode \"%s\"\n" 3073 "\tobj_id = %016llx\n", 3074 ncp->nc_name, 3075 (long long)cursor.data->entry.obj_id); 3076 error = 0; 3077 } 3078 3079 /* 3080 * If isdir >= 0 we validate that the entry is or is not a 3081 * directory. If isdir < 0 we don't care. 3082 */ 3083 if (error == 0 && isdir >= 0 && ip) { 3084 if (isdir && 3085 ip->ino_data.obj_type != HAMMER_OBJTYPE_DIRECTORY) { 3086 error = ENOTDIR; 3087 } else if (isdir == 0 && 3088 ip->ino_data.obj_type == HAMMER_OBJTYPE_DIRECTORY) { 3089 error = EISDIR; 3090 } 3091 } 3092 3093 /* 3094 * If we are trying to remove a directory the directory must 3095 * be empty. 3096 * 3097 * The check directory code can loop and deadlock/retry. Our 3098 * own cursor's node locks must be released to avoid a 3-way 3099 * deadlock with the flusher if the check directory code 3100 * blocks. 3101 * 3102 * If any changes whatsoever have been made to the cursor 3103 * set EDEADLK and retry. 3104 * 3105 * WARNING: See warnings in hammer_unlock_cursor() 3106 * function. 3107 */ 3108 if (error == 0 && ip && ip->ino_data.obj_type == 3109 HAMMER_OBJTYPE_DIRECTORY) { 3110 hammer_unlock_cursor(&cursor); 3111 error = hammer_ip_check_directory_empty(trans, ip); 3112 hammer_lock_cursor(&cursor); 3113 if (cursor.flags & HAMMER_CURSOR_RETEST) { 3114 kprintf("HAMMER: Warning: avoided deadlock " 3115 "on rmdir '%s'\n", 3116 ncp->nc_name); 3117 error = EDEADLK; 3118 } 3119 } 3120 3121 /* 3122 * Delete the directory entry. 3123 * 3124 * WARNING: hammer_ip_del_directory() may have to terminate 3125 * the cursor to avoid a deadlock. It is ok to call 3126 * hammer_done_cursor() twice. 3127 */ 3128 if (error == 0) { 3129 error = hammer_ip_del_directory(trans, &cursor, 3130 dip, ip); 3131 } 3132 hammer_done_cursor(&cursor); 3133 if (error == 0) { 3134 cache_setunresolved(nch); 3135 cache_setvp(nch, NULL); 3136 /* XXX locking */ 3137 if (ip && ip->vp) { 3138 hammer_knote(ip->vp, NOTE_DELETE); 3139 cache_inval_vp(ip->vp, CINV_DESTROY); 3140 } 3141 } 3142 if (ip) 3143 hammer_rel_inode(ip, 0); 3144 } else { 3145 hammer_done_cursor(&cursor); 3146 } 3147 if (error == EDEADLK) 3148 goto retry; 3149 3150 return (error); 3151 } 3152 3153 /************************************************************************ 3154 * FIFO AND SPECFS OPS * 3155 ************************************************************************ 3156 * 3157 */ 3158 3159 static int 3160 hammer_vop_fifoclose (struct vop_close_args *ap) 3161 { 3162 /* XXX update itimes */ 3163 return (VOCALL(&fifo_vnode_vops, &ap->a_head)); 3164 } 3165 3166 static int 3167 hammer_vop_fiforead (struct vop_read_args *ap) 3168 { 3169 int error; 3170 3171 error = VOCALL(&fifo_vnode_vops, &ap->a_head); 3172 /* XXX update access time */ 3173 return (error); 3174 } 3175 3176 static int 3177 hammer_vop_fifowrite (struct vop_write_args *ap) 3178 { 3179 int error; 3180 3181 error = VOCALL(&fifo_vnode_vops, &ap->a_head); 3182 /* XXX update access time */ 3183 return (error); 3184 } 3185 3186 static 3187 int 3188 hammer_vop_fifokqfilter(struct vop_kqfilter_args *ap) 3189 { 3190 int error; 3191 3192 error = VOCALL(&fifo_vnode_vops, &ap->a_head); 3193 if (error) 3194 error = hammer_vop_kqfilter(ap); 3195 return(error); 3196 } 3197 3198 /************************************************************************ 3199 * KQFILTER OPS * 3200 ************************************************************************ 3201 * 3202 */ 3203 static void filt_hammerdetach(struct knote *kn); 3204 static int filt_hammerread(struct knote *kn, long hint); 3205 static int filt_hammerwrite(struct knote *kn, long hint); 3206 static int filt_hammervnode(struct knote *kn, long hint); 3207 3208 static struct filterops hammerread_filtops = 3209 { 1, NULL, filt_hammerdetach, filt_hammerread }; 3210 static struct filterops hammerwrite_filtops = 3211 { 1, NULL, filt_hammerdetach, filt_hammerwrite }; 3212 static struct filterops hammervnode_filtops = 3213 { 1, NULL, filt_hammerdetach, filt_hammervnode }; 3214 3215 static 3216 int 3217 hammer_vop_kqfilter(struct vop_kqfilter_args *ap) 3218 { 3219 struct vnode *vp = ap->a_vp; 3220 struct knote *kn = ap->a_kn; 3221 lwkt_tokref vlock; 3222 3223 switch (kn->kn_filter) { 3224 case EVFILT_READ: 3225 kn->kn_fop = &hammerread_filtops; 3226 break; 3227 case EVFILT_WRITE: 3228 kn->kn_fop = &hammerwrite_filtops; 3229 break; 3230 case EVFILT_VNODE: 3231 kn->kn_fop = &hammervnode_filtops; 3232 break; 3233 default: 3234 return (1); 3235 } 3236 3237 kn->kn_hook = (caddr_t)vp; 3238 3239 lwkt_gettoken(&vlock, &vp->v_token); 3240 SLIST_INSERT_HEAD(&vp->v_pollinfo.vpi_selinfo.si_note, kn, kn_selnext); 3241 lwkt_reltoken(&vlock); 3242 3243 return(0); 3244 } 3245 3246 static void 3247 filt_hammerdetach(struct knote *kn) 3248 { 3249 struct vnode *vp = (void *)kn->kn_hook; 3250 lwkt_tokref vlock; 3251 3252 lwkt_gettoken(&vlock, &vp->v_token); 3253 SLIST_REMOVE(&vp->v_pollinfo.vpi_selinfo.si_note, 3254 kn, knote, kn_selnext); 3255 lwkt_reltoken(&vlock); 3256 } 3257 3258 static int 3259 filt_hammerread(struct knote *kn, long hint) 3260 { 3261 struct vnode *vp = (void *)kn->kn_hook; 3262 hammer_inode_t ip = VTOI(vp); 3263 3264 if (hint == NOTE_REVOKE) { 3265 kn->kn_flags |= (EV_EOF | EV_ONESHOT); 3266 return(1); 3267 } 3268 kn->kn_data = ip->ino_data.size - kn->kn_fp->f_offset; 3269 return (kn->kn_data != 0); 3270 } 3271 3272 static int 3273 filt_hammerwrite(struct knote *kn, long hint) 3274 { 3275 if (hint == NOTE_REVOKE) 3276 kn->kn_flags |= (EV_EOF | EV_ONESHOT); 3277 kn->kn_data = 0; 3278 return (1); 3279 } 3280 3281 static int 3282 filt_hammervnode(struct knote *kn, long hint) 3283 { 3284 if (kn->kn_sfflags & hint) 3285 kn->kn_fflags |= hint; 3286 if (hint == NOTE_REVOKE) { 3287 kn->kn_flags |= EV_EOF; 3288 return (1); 3289 } 3290 return (kn->kn_fflags != 0); 3291 } 3292 3293