1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2017 Nexenta Systems, Inc. All rights reserved. 14 */ 15 16 #include <sys/types.h> 17 #include <sys/param.h> 18 #include <sys/systm.h> 19 #include <sys/t_lock.h> 20 #include <sys/errno.h> 21 #include <sys/cred.h> 22 #include <sys/user.h> 23 #include <sys/uio.h> 24 #include <sys/file.h> 25 #include <sys/pathname.h> 26 #include <sys/vfs.h> 27 #include <sys/vnode.h> 28 #include <sys/stat.h> 29 #include <sys/mode.h> 30 #include <sys/kmem.h> 31 #include <sys/cmn_err.h> 32 #include <sys/debug.h> 33 #include <sys/atomic.h> 34 #include <sys/acl.h> 35 #include <sys/filio.h> 36 #include <sys/flock.h> 37 #include <sys/nbmlock.h> 38 #include <sys/fcntl.h> 39 #include <sys/poll.h> 40 #include <sys/time.h> 41 42 #include <errno.h> 43 #include <fcntl.h> 44 #include <unistd.h> 45 46 #include "vncache.h" 47 48 #define O_RWMASK (O_WRONLY | O_RDWR) /* == 3 */ 49 50 int fop_shrlock_enable = 0; 51 52 int stat_to_vattr(const struct stat *, vattr_t *); 53 int fop__getxvattr(vnode_t *, xvattr_t *); 54 int fop__setxvattr(vnode_t *, xvattr_t *); 55 56 static void fake_inactive_xattrdir(vnode_t *); 57 58 /* ARGSUSED */ 59 int 60 fop_open( 61 vnode_t **vpp, 62 int mode, 63 cred_t *cr, 64 caller_context_t *ct) 65 { 66 67 if ((*vpp)->v_type == VREG) { 68 if (mode & FREAD) 69 atomic_add_32(&((*vpp)->v_rdcnt), 1); 70 if (mode & FWRITE) 71 atomic_add_32(&((*vpp)->v_wrcnt), 1); 72 } 73 74 /* call to ->vop_open was here */ 75 76 return (0); 77 } 78 79 /* ARGSUSED */ 80 int 81 fop_close( 82 vnode_t *vp, 83 int flag, 84 int count, 85 offset_t offset, 86 cred_t *cr, 87 caller_context_t *ct) 88 { 89 90 /* call to ->vop_close was here */ 91 92 /* 93 * Check passed in count to handle possible dups. Vnode counts are only 94 * kept on regular files 95 */ 96 if ((vp->v_type == VREG) && (count == 1)) { 97 if (flag & FREAD) { 98 ASSERT(vp->v_rdcnt > 0); 99 atomic_add_32(&(vp->v_rdcnt), -1); 100 } 101 if (flag & FWRITE) { 102 ASSERT(vp->v_wrcnt > 0); 103 atomic_add_32(&(vp->v_wrcnt), -1); 104 } 105 } 106 return (0); 107 } 108 109 /* ARGSUSED */ 110 int 111 fop_read( 112 vnode_t *vp, 113 uio_t *uio, 114 int ioflag, 115 cred_t *cr, 116 caller_context_t *ct) 117 { 118 struct stat st; 119 struct iovec *iov; 120 ssize_t resid; 121 size_t cnt; 122 int n; 123 124 /* 125 * If that caller asks for read beyond end of file, 126 * that causes the pread call to block. (Ugh!) 127 * Get the file size and return what we can. 128 */ 129 (void) fstat(vp->v_fd, &st); 130 resid = uio->uio_resid; 131 if ((uio->uio_loffset + resid) > st.st_size) 132 resid = st.st_size - uio->uio_loffset; 133 134 while (resid > 0) { 135 136 ASSERT(uio->uio_iovcnt > 0); 137 iov = uio->uio_iov; 138 139 if (iov->iov_len == 0) { 140 uio->uio_iov++; 141 uio->uio_iovcnt--; 142 continue; 143 } 144 cnt = iov->iov_len; 145 if (cnt > resid) 146 cnt = resid; 147 148 n = pread(vp->v_fd, iov->iov_base, cnt, uio->uio_loffset); 149 if (n < 0) 150 return (errno); 151 152 iov->iov_base += n; 153 iov->iov_len -= n; 154 155 uio->uio_resid -= n; 156 uio->uio_loffset += n; 157 158 resid -= n; 159 } 160 161 return (0); 162 } 163 164 /* ARGSUSED */ 165 int 166 fop_write( 167 vnode_t *vp, 168 uio_t *uio, 169 int ioflag, 170 cred_t *cr, 171 caller_context_t *ct) 172 { 173 struct iovec *iov; 174 size_t cnt; 175 int n; 176 177 while (uio->uio_resid > 0) { 178 179 ASSERT(uio->uio_iovcnt > 0); 180 iov = uio->uio_iov; 181 182 if (iov->iov_len == 0) { 183 uio->uio_iov++; 184 uio->uio_iovcnt--; 185 continue; 186 } 187 cnt = iov->iov_len; 188 if (cnt > uio->uio_resid) 189 cnt = uio->uio_resid; 190 191 n = pwrite(vp->v_fd, iov->iov_base, iov->iov_len, 192 uio->uio_loffset); 193 if (n < 0) 194 return (errno); 195 196 iov->iov_base += n; 197 iov->iov_len -= n; 198 199 uio->uio_resid -= n; 200 uio->uio_loffset += n; 201 } 202 203 if (ioflag == FSYNC) { 204 (void) fsync(vp->v_fd); 205 } 206 207 return (0); 208 } 209 210 /* ARGSUSED */ 211 int 212 fop_ioctl( 213 vnode_t *vp, 214 int cmd, 215 intptr_t arg, 216 int flag, 217 cred_t *cr, 218 int *rvalp, 219 caller_context_t *ct) 220 { 221 off64_t off; 222 int rv, whence; 223 224 switch (cmd) { 225 case _FIO_SEEK_DATA: 226 case _FIO_SEEK_HOLE: 227 whence = (cmd == _FIO_SEEK_DATA) ? SEEK_DATA : SEEK_HOLE; 228 bcopy((void *)arg, &off, sizeof (off)); 229 off = lseek(vp->v_fd, off, whence); 230 if (off == (off64_t)-1) { 231 rv = errno; 232 } else { 233 bcopy(&off, (void *)arg, sizeof (off)); 234 rv = 0; 235 } 236 break; 237 238 default: 239 rv = ENOTTY; 240 break; 241 } 242 243 return (rv); 244 } 245 246 /* ARGSUSED */ 247 int 248 fop_setfl( 249 vnode_t *vp, 250 int oflags, 251 int nflags, 252 cred_t *cr, 253 caller_context_t *ct) 254 { 255 /* allow any flags? See fs_setfl */ 256 return (0); 257 } 258 259 /* ARGSUSED */ 260 int 261 fop_getattr( 262 vnode_t *vp, 263 vattr_t *vap, 264 int flags, 265 cred_t *cr, 266 caller_context_t *ct) 267 { 268 int error; 269 struct stat st; 270 271 if (fstat(vp->v_fd, &st) == -1) 272 return (errno); 273 error = stat_to_vattr(&st, vap); 274 275 if (vap->va_mask & AT_XVATTR) 276 (void) fop__getxvattr(vp, (xvattr_t *)vap); 277 278 return (error); 279 } 280 281 /* ARGSUSED */ 282 int 283 fop_setattr( 284 vnode_t *vp, 285 vattr_t *vap, 286 int flags, 287 cred_t *cr, 288 caller_context_t *ct) 289 { 290 timespec_t times[2]; 291 int err; 292 293 if (vap->va_mask & AT_SIZE) { 294 if (ftruncate(vp->v_fd, vap->va_size) == -1) { 295 err = errno; 296 if (err == EBADF) 297 err = EACCES; 298 return (err); 299 } 300 } 301 302 /* AT_MODE or anything else? */ 303 304 if (vap->va_mask & AT_XVATTR) 305 (void) fop__setxvattr(vp, (xvattr_t *)vap); 306 307 if (vap->va_mask & (AT_ATIME | AT_MTIME)) { 308 if (vap->va_mask & AT_ATIME) { 309 times[0] = vap->va_atime; 310 } else { 311 times[0].tv_sec = 0; 312 times[0].tv_nsec = UTIME_OMIT; 313 } 314 if (vap->va_mask & AT_MTIME) { 315 times[1] = vap->va_mtime; 316 } else { 317 times[1].tv_sec = 0; 318 times[1].tv_nsec = UTIME_OMIT; 319 } 320 321 (void) futimens(vp->v_fd, times); 322 } 323 324 return (0); 325 } 326 327 /* ARGSUSED */ 328 int 329 fop_access( 330 vnode_t *vp, 331 int mode, 332 int flags, 333 cred_t *cr, 334 caller_context_t *ct) 335 { 336 return (0); 337 } 338 339 /* 340 * Conceptually like xattr_dir_lookup() 341 */ 342 static int 343 fake_lookup_xattrdir( 344 vnode_t *dvp, 345 vnode_t **vpp) 346 { 347 int len, fd; 348 int omode = O_RDWR | O_NOFOLLOW; 349 vnode_t *vp; 350 351 *vpp = NULL; 352 353 if (dvp->v_type != VDIR && dvp->v_type != VREG) 354 return (EINVAL); 355 356 /* 357 * If we're already in sysattr space, don't allow creation 358 * of another level of sysattrs. 359 */ 360 if (dvp->v_flag & V_SYSATTR) 361 return (EINVAL); 362 363 mutex_enter(&dvp->v_lock); 364 if (dvp->v_xattrdir != NULL) { 365 *vpp = dvp->v_xattrdir; 366 VN_HOLD(*vpp); 367 mutex_exit(&dvp->v_lock); 368 return (0); 369 } 370 mutex_exit(&dvp->v_lock); 371 372 omode = O_RDONLY|O_XATTR; 373 fd = openat(dvp->v_fd, ".", omode); 374 if (fd < 0) 375 return (errno); 376 377 vp = vn_alloc(KM_SLEEP); 378 vp->v_fd = fd; 379 vp->v_flag = V_XATTRDIR|V_SYSATTR; 380 vp->v_type = VDIR; 381 vp->v_vfsp = dvp->v_vfsp; 382 383 /* Set v_path to parent path + "/@" (like NFS) */ 384 len = strlen(dvp->v_path) + 3; 385 vp->v_path = kmem_alloc(len, KM_SLEEP); 386 (void) snprintf(vp->v_path, len, "%s/@", dvp->v_path); 387 388 /* 389 * Keep a pointer to the parent and a hold on it. 390 * Both are cleaned up in fake_inactive_xattrdir 391 */ 392 vp->v_data = dvp; 393 vn_hold(dvp); 394 395 mutex_enter(&dvp->v_lock); 396 if (dvp->v_xattrdir == NULL) { 397 *vpp = dvp->v_xattrdir = vp; 398 mutex_exit(&dvp->v_lock); 399 } else { 400 *vpp = dvp->v_xattrdir; 401 mutex_exit(&dvp->v_lock); 402 fake_inactive_xattrdir(vp); 403 } 404 405 return (0); 406 } 407 408 /* ARGSUSED */ 409 int 410 fop_lookup( 411 vnode_t *dvp, 412 char *name, 413 vnode_t **vpp, 414 pathname_t *pnp, 415 int flags, 416 vnode_t *rdir, 417 cred_t *cr, 418 caller_context_t *ct, 419 int *deflags, /* Returned per-dirent flags */ 420 pathname_t *ppnp) /* Returned case-preserved name in directory */ 421 { 422 int fd; 423 int omode = O_RDWR | O_NOFOLLOW; 424 vnode_t *vp; 425 struct stat st; 426 427 if (flags & LOOKUP_XATTR) 428 return (fake_lookup_xattrdir(dvp, vpp)); 429 430 /* 431 * If lookup is for "", just return dvp. 432 */ 433 if (name[0] == '\0') { 434 vn_hold(dvp); 435 *vpp = dvp; 436 return (0); 437 } 438 439 if (fstatat(dvp->v_fd, name, &st, AT_SYMLINK_NOFOLLOW) == -1) 440 return (errno); 441 442 vp = vncache_lookup(&st); 443 if (vp != NULL) { 444 /* lookup gave us a hold */ 445 *vpp = vp; 446 return (0); 447 } 448 449 if (S_ISDIR(st.st_mode)) 450 omode = O_RDONLY | O_NOFOLLOW; 451 452 again: 453 fd = openat(dvp->v_fd, name, omode, 0); 454 if (fd < 0) { 455 if ((omode & O_RWMASK) == O_RDWR) { 456 omode &= ~O_RWMASK; 457 omode |= O_RDONLY; 458 goto again; 459 } 460 return (errno); 461 } 462 463 if (fstat(fd, &st) == -1) { 464 (void) close(fd); 465 return (errno); 466 } 467 468 vp = vncache_enter(&st, dvp, name, fd); 469 470 *vpp = vp; 471 return (0); 472 } 473 474 /* ARGSUSED */ 475 int 476 fop_create( 477 vnode_t *dvp, 478 char *name, 479 vattr_t *vap, 480 vcexcl_t excl, 481 int mode, 482 vnode_t **vpp, 483 cred_t *cr, 484 int flags, 485 caller_context_t *ct, 486 vsecattr_t *vsecp) /* ACL to set during create */ 487 { 488 struct stat st; 489 vnode_t *vp; 490 int err, fd, omode; 491 492 /* 493 * If creating "", just return dvp. 494 */ 495 if (name[0] == '\0') { 496 vn_hold(dvp); 497 *vpp = dvp; 498 return (0); 499 } 500 501 err = fstatat(dvp->v_fd, name, &st, AT_SYMLINK_NOFOLLOW); 502 if (err != 0) 503 err = errno; 504 505 vp = NULL; 506 if (err == 0) { 507 /* The file already exists. */ 508 if (excl == EXCL) 509 return (EEXIST); 510 511 vp = vncache_lookup(&st); 512 /* vp gained a hold */ 513 } 514 515 if (vp == NULL) { 516 /* 517 * Open it. (may or may not exist) 518 */ 519 omode = O_RDWR | O_CREAT | O_NOFOLLOW; 520 if (excl == EXCL) 521 omode |= O_EXCL; 522 open_again: 523 fd = openat(dvp->v_fd, name, omode, mode); 524 if (fd < 0) { 525 if ((omode & O_RWMASK) == O_RDWR) { 526 omode &= ~O_RWMASK; 527 omode |= O_RDONLY; 528 goto open_again; 529 } 530 return (errno); 531 } 532 (void) fstat(fd, &st); 533 534 vp = vncache_enter(&st, dvp, name, fd); 535 /* vp has its initial hold */ 536 } 537 538 /* Should have the vp now. */ 539 if (vp == NULL) 540 return (EFAULT); 541 542 if (vp->v_type == VDIR && vap->va_type != VDIR) { 543 vn_rele(vp); 544 return (EISDIR); 545 } 546 if (vp->v_type != VDIR && vap->va_type == VDIR) { 547 vn_rele(vp); 548 return (ENOTDIR); 549 } 550 551 /* 552 * Might need to set attributes. 553 */ 554 (void) fop_setattr(vp, vap, 0, cr, ct); 555 556 *vpp = vp; 557 return (0); 558 } 559 560 /* ARGSUSED */ 561 int 562 fop_remove( 563 vnode_t *dvp, 564 char *name, 565 cred_t *cr, 566 caller_context_t *ct, 567 int flags) 568 { 569 570 if (unlinkat(dvp->v_fd, name, 0)) 571 return (errno); 572 573 return (0); 574 } 575 576 /* ARGSUSED */ 577 int 578 fop_link( 579 vnode_t *to_dvp, 580 vnode_t *fr_vp, 581 char *to_name, 582 cred_t *cr, 583 caller_context_t *ct, 584 int flags) 585 { 586 int err; 587 588 /* 589 * Would prefer to specify "from" as the combination: 590 * (fr_vp->v_fd, NULL) but linkat does not permit it. 591 */ 592 err = linkat(AT_FDCWD, fr_vp->v_path, to_dvp->v_fd, to_name, 593 AT_SYMLINK_FOLLOW); 594 if (err == -1) 595 err = errno; 596 597 return (err); 598 } 599 600 /* ARGSUSED */ 601 int 602 fop_rename( 603 vnode_t *from_dvp, 604 char *from_name, 605 vnode_t *to_dvp, 606 char *to_name, 607 cred_t *cr, 608 caller_context_t *ct, 609 int flags) 610 { 611 struct stat st; 612 vnode_t *vp; 613 int err; 614 615 if (fstatat(from_dvp->v_fd, from_name, &st, 616 AT_SYMLINK_NOFOLLOW) == -1) 617 return (errno); 618 619 vp = vncache_lookup(&st); 620 if (vp == NULL) 621 return (ENOENT); 622 623 err = renameat(from_dvp->v_fd, from_name, to_dvp->v_fd, to_name); 624 if (err == -1) 625 err = errno; 626 else 627 vncache_renamed(vp, to_dvp, to_name); 628 629 vn_rele(vp); 630 631 return (err); 632 } 633 634 /* ARGSUSED */ 635 int 636 fop_mkdir( 637 vnode_t *dvp, 638 char *name, 639 vattr_t *vap, 640 vnode_t **vpp, 641 cred_t *cr, 642 caller_context_t *ct, 643 int flags, 644 vsecattr_t *vsecp) /* ACL to set during create */ 645 { 646 struct stat st; 647 int err, fd; 648 649 mode_t mode = vap->va_mode & 0777; 650 651 if (mkdirat(dvp->v_fd, name, mode) == -1) 652 return (errno); 653 654 if ((fd = openat(dvp->v_fd, name, O_RDONLY)) == -1) 655 return (errno); 656 if (fstat(fd, &st) == -1) { 657 err = errno; 658 (void) close(fd); 659 return (err); 660 } 661 662 *vpp = vncache_enter(&st, dvp, name, fd); 663 664 /* 665 * Might need to set attributes. 666 */ 667 (void) fop_setattr(*vpp, vap, 0, cr, ct); 668 669 return (0); 670 } 671 672 /* ARGSUSED */ 673 int 674 fop_rmdir( 675 vnode_t *dvp, 676 char *name, 677 vnode_t *cdir, 678 cred_t *cr, 679 caller_context_t *ct, 680 int flags) 681 { 682 683 if (unlinkat(dvp->v_fd, name, AT_REMOVEDIR) == -1) 684 return (errno); 685 686 return (0); 687 } 688 689 /* ARGSUSED */ 690 int 691 fop_readdir( 692 vnode_t *vp, 693 uio_t *uiop, 694 cred_t *cr, 695 int *eofp, 696 caller_context_t *ct, 697 int flags) 698 { 699 struct iovec *iov; 700 int cnt; 701 int error = 0; 702 int fd = vp->v_fd; 703 704 if (eofp) { 705 *eofp = 0; 706 } 707 708 error = lseek(fd, uiop->uio_loffset, SEEK_SET); 709 if (error == -1) 710 return (errno); 711 712 ASSERT(uiop->uio_iovcnt > 0); 713 iov = uiop->uio_iov; 714 if (iov->iov_len < sizeof (struct dirent)) 715 return (EINVAL); 716 717 /* LINTED E_BAD_PTR_CAST_ALIGN */ 718 cnt = getdents(fd, (struct dirent *)(uiop->uio_iov->iov_base), 719 uiop->uio_resid); 720 if (cnt == -1) 721 return (errno); 722 if (cnt == 0) { 723 if (eofp) { 724 *eofp = 1; 725 } 726 return (ENOENT); 727 } 728 729 iov->iov_base += cnt; 730 iov->iov_len -= cnt; 731 uiop->uio_resid -= cnt; 732 uiop->uio_loffset = lseek(fd, 0LL, SEEK_CUR); 733 734 return (0); 735 } 736 737 /* ARGSUSED */ 738 int 739 fop_symlink( 740 vnode_t *dvp, 741 char *linkname, 742 vattr_t *vap, 743 char *target, 744 cred_t *cr, 745 caller_context_t *ct, 746 int flags) 747 { 748 return (ENOSYS); 749 } 750 751 /* ARGSUSED */ 752 int 753 fop_readlink( 754 vnode_t *vp, 755 uio_t *uiop, 756 cred_t *cr, 757 caller_context_t *ct) 758 { 759 return (ENOSYS); 760 } 761 762 /* ARGSUSED */ 763 int 764 fop_fsync( 765 vnode_t *vp, 766 int syncflag, 767 cred_t *cr, 768 caller_context_t *ct) 769 { 770 771 if (fsync(vp->v_fd) == -1) 772 return (errno); 773 774 return (0); 775 } 776 777 /* ARGSUSED */ 778 void 779 fop_inactive( 780 vnode_t *vp, 781 cred_t *cr, 782 caller_context_t *ct) 783 { 784 if (vp->v_flag & V_XATTRDIR) { 785 fake_inactive_xattrdir(vp); 786 } else { 787 vncache_inactive(vp); 788 } 789 } 790 791 /* 792 * The special xattr directories are not in the vncache AVL, but 793 * hang off the parent's v_xattrdir field. When vn_rele finds 794 * an xattr dir at v_count == 1 it calls here, but until we 795 * take locks on both the parent and the xattrdir, we don't 796 * know if we're really at the last reference. So in here we 797 * take both locks, re-check the count, and either bail out 798 * or proceed with "inactive" vnode cleanup. Part of that 799 * cleanup includes releasing the hold on the parent and 800 * clearing the parent's v_xattrdir field, which were 801 * setup in fake_lookup_xattrdir() 802 */ 803 static void 804 fake_inactive_xattrdir(vnode_t *vp) 805 { 806 vnode_t *dvp = vp->v_data; /* parent */ 807 mutex_enter(&dvp->v_lock); 808 mutex_enter(&vp->v_lock); 809 if (vp->v_count > 1) { 810 /* new ref. via v_xattrdir */ 811 mutex_exit(&vp->v_lock); 812 mutex_exit(&dvp->v_lock); 813 return; 814 } 815 ASSERT(dvp->v_xattrdir == vp); 816 dvp->v_xattrdir = NULL; 817 mutex_exit(&vp->v_lock); 818 mutex_exit(&dvp->v_lock); 819 vn_rele(dvp); 820 vn_free(vp); 821 } 822 823 /* ARGSUSED */ 824 int 825 fop_fid( 826 vnode_t *vp, 827 fid_t *fidp, 828 caller_context_t *ct) 829 { 830 return (ENOSYS); 831 } 832 833 /* ARGSUSED */ 834 int 835 fop_rwlock( 836 vnode_t *vp, 837 int write_lock, 838 caller_context_t *ct) 839 { 840 /* See: fs_rwlock */ 841 return (-1); 842 } 843 844 /* ARGSUSED */ 845 void 846 fop_rwunlock( 847 vnode_t *vp, 848 int write_lock, 849 caller_context_t *ct) 850 { 851 /* See: fs_rwunlock */ 852 } 853 854 /* ARGSUSED */ 855 int 856 fop_seek( 857 vnode_t *vp, 858 offset_t ooff, 859 offset_t *noffp, 860 caller_context_t *ct) 861 { 862 return (ENOSYS); 863 } 864 865 /* ARGSUSED */ 866 int 867 fop_cmp( 868 vnode_t *vp1, 869 vnode_t *vp2, 870 caller_context_t *ct) 871 { 872 /* See fs_cmp */ 873 return (vncache_cmp(vp1, vp2)); 874 } 875 876 /* ARGSUSED */ 877 int 878 fop_frlock( 879 vnode_t *vp, 880 int cmd, 881 flock64_t *bfp, 882 int flag, 883 offset_t offset, 884 struct flk_callback *flk_cbp, 885 cred_t *cr, 886 caller_context_t *ct) 887 { 888 #if defined(_LP64) 889 offset_t maxoffset = INT64_MAX; 890 #elif defined(_ILP32) 891 /* 892 * Sadly, the fcntl API enforces 32-bit offsets, 893 * even though we have _FILE_OFFSET_BITS=64 894 */ 895 offset_t maxoffset = INT32_MAX; 896 #else 897 #error "unsupported env." 898 #endif 899 900 /* See fs_frlock */ 901 902 switch (cmd) { 903 case F_GETLK: 904 case F_SETLK_NBMAND: 905 case F_SETLK: 906 case F_SETLKW: 907 break; 908 default: 909 return (EINVAL); 910 } 911 912 /* We only get SEEK_SET ranges here. */ 913 if (bfp->l_whence != 0) 914 return (EINVAL); 915 916 /* 917 * One limitation of using fcntl(2) F_SETLK etc is that 918 * the real kernel limits the offsets we can use. 919 * (Maybe the fcntl API should loosen that up?) 920 * See syscall/fcntl.c:flock_check() 921 * 922 * Here in libfksmbsrv we can just ignore such locks, 923 * or ignore the part that extends beyond maxoffset. 924 * The SMB layer still keeps track of such locks for 925 * conflict detection, so not reflecting such locks 926 * into the real FS layer is OK. Note: this may 927 * modify the pased bfp->l_len. 928 */ 929 if (bfp->l_start < 0 || bfp->l_start > maxoffset) 930 return (0); 931 if (bfp->l_len < 0 || bfp->l_len > maxoffset) 932 return (0); 933 if (bfp->l_len > (maxoffset - bfp->l_start + 1)) 934 bfp->l_len = (maxoffset - bfp->l_start + 1); 935 936 if (fcntl(vp->v_fd, cmd, bfp) == -1) 937 return (errno); 938 939 return (0); 940 } 941 942 /* ARGSUSED */ 943 int 944 fop_space( 945 vnode_t *vp, 946 int cmd, 947 flock64_t *bfp, 948 int flag, 949 offset_t offset, 950 cred_t *cr, 951 caller_context_t *ct) 952 { 953 /* See fs_frlock */ 954 955 switch (cmd) { 956 case F_ALLOCSP: 957 case F_FREESP: 958 break; 959 default: 960 return (EINVAL); 961 } 962 963 if (fcntl(vp->v_fd, cmd, bfp) == -1) 964 return (errno); 965 966 return (0); 967 } 968 969 /* ARGSUSED */ 970 int 971 fop_realvp( 972 vnode_t *vp, 973 vnode_t **vpp, 974 caller_context_t *ct) 975 { 976 return (ENOSYS); 977 } 978 979 /* ARGSUSED */ 980 int 981 fop_getpage( 982 vnode_t *vp, 983 offset_t off, 984 size_t len, 985 uint_t *protp, 986 struct page **plarr, 987 size_t plsz, 988 struct seg *seg, 989 caddr_t addr, 990 enum seg_rw rw, 991 cred_t *cr, 992 caller_context_t *ct) 993 { 994 return (ENOSYS); 995 } 996 997 /* ARGSUSED */ 998 int 999 fop_putpage( 1000 vnode_t *vp, 1001 offset_t off, 1002 size_t len, 1003 int flags, 1004 cred_t *cr, 1005 caller_context_t *ct) 1006 { 1007 return (ENOSYS); 1008 } 1009 1010 /* ARGSUSED */ 1011 int 1012 fop_map( 1013 vnode_t *vp, 1014 offset_t off, 1015 struct as *as, 1016 caddr_t *addrp, 1017 size_t len, 1018 uchar_t prot, 1019 uchar_t maxprot, 1020 uint_t flags, 1021 cred_t *cr, 1022 caller_context_t *ct) 1023 { 1024 return (ENOSYS); 1025 } 1026 1027 /* ARGSUSED */ 1028 int 1029 fop_addmap( 1030 vnode_t *vp, 1031 offset_t off, 1032 struct as *as, 1033 caddr_t addr, 1034 size_t len, 1035 uchar_t prot, 1036 uchar_t maxprot, 1037 uint_t flags, 1038 cred_t *cr, 1039 caller_context_t *ct) 1040 { 1041 return (ENOSYS); 1042 } 1043 1044 /* ARGSUSED */ 1045 int 1046 fop_delmap( 1047 vnode_t *vp, 1048 offset_t off, 1049 struct as *as, 1050 caddr_t addr, 1051 size_t len, 1052 uint_t prot, 1053 uint_t maxprot, 1054 uint_t flags, 1055 cred_t *cr, 1056 caller_context_t *ct) 1057 { 1058 return (ENOSYS); 1059 } 1060 1061 /* ARGSUSED */ 1062 int 1063 fop_poll( 1064 vnode_t *vp, 1065 short events, 1066 int anyyet, 1067 short *reventsp, 1068 struct pollhead **phpp, 1069 caller_context_t *ct) 1070 { 1071 *reventsp = 0; 1072 if (events & POLLIN) 1073 *reventsp |= POLLIN; 1074 if (events & POLLRDNORM) 1075 *reventsp |= POLLRDNORM; 1076 if (events & POLLRDBAND) 1077 *reventsp |= POLLRDBAND; 1078 if (events & POLLOUT) 1079 *reventsp |= POLLOUT; 1080 if (events & POLLWRBAND) 1081 *reventsp |= POLLWRBAND; 1082 *phpp = NULL; /* or fake_pollhead? */ 1083 1084 return (0); 1085 } 1086 1087 /* ARGSUSED */ 1088 int 1089 fop_dump( 1090 vnode_t *vp, 1091 caddr_t addr, 1092 offset_t lbdn, 1093 offset_t dblks, 1094 caller_context_t *ct) 1095 { 1096 return (ENOSYS); 1097 } 1098 1099 /* 1100 * See fs_pathconf 1101 */ 1102 /* ARGSUSED */ 1103 int 1104 fop_pathconf( 1105 vnode_t *vp, 1106 int cmd, 1107 ulong_t *valp, 1108 cred_t *cr, 1109 caller_context_t *ct) 1110 { 1111 register ulong_t val; 1112 register int error = 0; 1113 1114 switch (cmd) { 1115 1116 case _PC_LINK_MAX: 1117 val = MAXLINK; 1118 break; 1119 1120 case _PC_MAX_CANON: 1121 val = MAX_CANON; 1122 break; 1123 1124 case _PC_MAX_INPUT: 1125 val = MAX_INPUT; 1126 break; 1127 1128 case _PC_NAME_MAX: 1129 val = MAXNAMELEN; 1130 break; 1131 1132 case _PC_PATH_MAX: 1133 case _PC_SYMLINK_MAX: 1134 val = MAXPATHLEN; 1135 break; 1136 1137 case _PC_PIPE_BUF: 1138 val = PIPE_BUF; 1139 break; 1140 1141 case _PC_NO_TRUNC: 1142 val = (ulong_t)-1; 1143 break; 1144 1145 case _PC_VDISABLE: 1146 val = _POSIX_VDISABLE; 1147 break; 1148 1149 case _PC_CHOWN_RESTRICTED: 1150 val = 1; /* chown restricted enabled */ 1151 break; 1152 1153 case _PC_FILESIZEBITS: 1154 val = (ulong_t)-1; /* large file support */ 1155 break; 1156 1157 case _PC_ACL_ENABLED: 1158 val = _ACL_ACE_ENABLED; 1159 break; 1160 1161 case _PC_CASE_BEHAVIOR: 1162 val = _CASE_SENSITIVE; 1163 break; 1164 1165 case _PC_SATTR_ENABLED: 1166 case _PC_SATTR_EXISTS: 1167 val = 0; 1168 break; 1169 1170 case _PC_ACCESS_FILTERING: 1171 val = 0; 1172 break; 1173 1174 default: 1175 error = EINVAL; 1176 break; 1177 } 1178 1179 if (error == 0) 1180 *valp = val; 1181 return (error); 1182 } 1183 1184 /* ARGSUSED */ 1185 int 1186 fop_pageio( 1187 vnode_t *vp, 1188 struct page *pp, 1189 u_offset_t io_off, 1190 size_t io_len, 1191 int flags, 1192 cred_t *cr, 1193 caller_context_t *ct) 1194 { 1195 return (ENOSYS); 1196 } 1197 1198 /* ARGSUSED */ 1199 int 1200 fop_dumpctl( 1201 vnode_t *vp, 1202 int action, 1203 offset_t *blkp, 1204 caller_context_t *ct) 1205 { 1206 return (ENOSYS); 1207 } 1208 1209 /* ARGSUSED */ 1210 void 1211 fop_dispose( 1212 vnode_t *vp, 1213 struct page *pp, 1214 int flag, 1215 int dn, 1216 cred_t *cr, 1217 caller_context_t *ct) 1218 { 1219 } 1220 1221 /* ARGSUSED */ 1222 int 1223 fop_setsecattr( 1224 vnode_t *vp, 1225 vsecattr_t *vsap, 1226 int flag, 1227 cred_t *cr, 1228 caller_context_t *ct) 1229 { 1230 return (0); 1231 } 1232 1233 /* 1234 * Fake up just enough of this so we can test get/set SDs. 1235 */ 1236 /* ARGSUSED */ 1237 int 1238 fop_getsecattr( 1239 vnode_t *vp, 1240 vsecattr_t *vsecattr, 1241 int flag, 1242 cred_t *cr, 1243 caller_context_t *ct) 1244 { 1245 1246 vsecattr->vsa_aclcnt = 0; 1247 vsecattr->vsa_aclentsz = 0; 1248 vsecattr->vsa_aclentp = NULL; 1249 vsecattr->vsa_dfaclcnt = 0; /* Default ACLs are not fabricated */ 1250 vsecattr->vsa_dfaclentp = NULL; 1251 1252 if (vsecattr->vsa_mask & (VSA_ACLCNT | VSA_ACL)) { 1253 aclent_t *aclentp; 1254 size_t aclsize; 1255 1256 aclsize = sizeof (aclent_t); 1257 vsecattr->vsa_aclcnt = 1; 1258 vsecattr->vsa_aclentp = kmem_zalloc(aclsize, KM_SLEEP); 1259 aclentp = vsecattr->vsa_aclentp; 1260 1261 aclentp->a_type = OTHER_OBJ; 1262 aclentp->a_perm = 0777; 1263 aclentp->a_id = (gid_t)-1; 1264 aclentp++; 1265 } else if (vsecattr->vsa_mask & (VSA_ACECNT | VSA_ACE)) { 1266 ace_t *acl; 1267 1268 acl = kmem_alloc(sizeof (ace_t), KM_SLEEP); 1269 acl->a_who = (uint32_t)-1; 1270 acl->a_type = ACE_ACCESS_ALLOWED_ACE_TYPE; 1271 acl->a_flags = ACE_EVERYONE; 1272 acl->a_access_mask = ACE_MODIFY_PERMS; 1273 1274 vsecattr->vsa_aclentp = (void *)acl; 1275 vsecattr->vsa_aclcnt = 1; 1276 vsecattr->vsa_aclentsz = sizeof (ace_t); 1277 } 1278 1279 return (0); 1280 } 1281 1282 /* ARGSUSED */ 1283 int 1284 fop_shrlock( 1285 vnode_t *vp, 1286 int cmd, 1287 struct shrlock *shr, 1288 int flag, 1289 cred_t *cr, 1290 caller_context_t *ct) 1291 { 1292 1293 switch (cmd) { 1294 case F_SHARE: 1295 case F_SHARE_NBMAND: 1296 case F_UNSHARE: 1297 break; 1298 default: 1299 return (EINVAL); 1300 } 1301 1302 if (!fop_shrlock_enable) 1303 return (0); 1304 1305 if (fcntl(vp->v_fd, cmd, shr) == -1) 1306 return (errno); 1307 1308 return (0); 1309 } 1310 1311 /* ARGSUSED */ 1312 int 1313 fop_vnevent(vnode_t *vp, vnevent_t vnevent, vnode_t *dvp, char *fnm, 1314 caller_context_t *ct) 1315 { 1316 return (ENOSYS); 1317 } 1318 1319 /* ARGSUSED */ 1320 int 1321 fop_reqzcbuf(vnode_t *vp, enum uio_rw ioflag, xuio_t *uiop, cred_t *cr, 1322 caller_context_t *ct) 1323 { 1324 return (ENOSYS); 1325 } 1326 1327 /* ARGSUSED */ 1328 int 1329 fop_retzcbuf(vnode_t *vp, xuio_t *uiop, cred_t *cr, caller_context_t *ct) 1330 { 1331 return (ENOSYS); 1332 } 1333 1334 1335 /* 1336 * *************************************************************** 1337 * other VOP support 1338 */ 1339 1340 /* 1341 * Convert stat(2) formats to vnode types and vice versa. (Knows about 1342 * numerical order of S_IFMT and vnode types.) 1343 */ 1344 enum vtype iftovt_tab[] = { 1345 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 1346 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VNON 1347 }; 1348 1349 ushort_t vttoif_tab[] = { 1350 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, S_IFIFO, 1351 S_IFDOOR, 0, S_IFSOCK, S_IFPORT, 0 1352 }; 1353 1354 /* 1355 * stat_to_vattr() 1356 * 1357 * Convert from a stat structure to an vattr structure 1358 * Note: only set fields according to va_mask 1359 */ 1360 1361 int 1362 stat_to_vattr(const struct stat *st, vattr_t *vap) 1363 { 1364 1365 if (vap->va_mask & AT_TYPE) 1366 vap->va_type = IFTOVT(st->st_mode); 1367 1368 if (vap->va_mask & AT_MODE) 1369 vap->va_mode = st->st_mode; 1370 1371 if (vap->va_mask & AT_UID) 1372 vap->va_uid = st->st_uid; 1373 1374 if (vap->va_mask & AT_GID) 1375 vap->va_gid = st->st_gid; 1376 1377 if (vap->va_mask & AT_FSID) 1378 vap->va_fsid = st->st_dev; 1379 1380 if (vap->va_mask & AT_NODEID) 1381 vap->va_nodeid = st->st_ino; 1382 1383 if (vap->va_mask & AT_NLINK) 1384 vap->va_nlink = st->st_nlink; 1385 1386 if (vap->va_mask & AT_SIZE) 1387 vap->va_size = (u_offset_t)st->st_size; 1388 1389 if (vap->va_mask & AT_ATIME) { 1390 vap->va_atime.tv_sec = st->st_atim.tv_sec; 1391 vap->va_atime.tv_nsec = st->st_atim.tv_nsec; 1392 } 1393 1394 if (vap->va_mask & AT_MTIME) { 1395 vap->va_mtime.tv_sec = st->st_mtim.tv_sec; 1396 vap->va_mtime.tv_nsec = st->st_mtim.tv_nsec; 1397 } 1398 1399 if (vap->va_mask & AT_CTIME) { 1400 vap->va_ctime.tv_sec = st->st_ctim.tv_sec; 1401 vap->va_ctime.tv_nsec = st->st_ctim.tv_nsec; 1402 } 1403 1404 if (vap->va_mask & AT_RDEV) 1405 vap->va_rdev = st->st_rdev; 1406 1407 if (vap->va_mask & AT_BLKSIZE) 1408 vap->va_blksize = (uint_t)st->st_blksize; 1409 1410 1411 if (vap->va_mask & AT_NBLOCKS) 1412 vap->va_nblocks = (u_longlong_t)st->st_blocks; 1413 1414 if (vap->va_mask & AT_SEQ) 1415 vap->va_seq = 0; 1416 1417 return (0); 1418 } 1419 1420 /* ARGSUSED */ 1421 void 1422 flk_init_callback(flk_callback_t *flk_cb, 1423 callb_cpr_t *(*cb_fcn)(flk_cb_when_t, void *), void *cbdata) 1424 { 1425 } 1426 1427 void 1428 vn_hold(vnode_t *vp) 1429 { 1430 mutex_enter(&vp->v_lock); 1431 vp->v_count++; 1432 mutex_exit(&vp->v_lock); 1433 } 1434 1435 void 1436 vn_rele(vnode_t *vp) 1437 { 1438 VERIFY3U(vp->v_count, !=, 0); 1439 mutex_enter(&vp->v_lock); 1440 if (vp->v_count == 1) { 1441 mutex_exit(&vp->v_lock); 1442 fop_inactive(vp, NULL, NULL); 1443 } else { 1444 vp->v_count--; 1445 mutex_exit(&vp->v_lock); 1446 } 1447 } 1448 1449 int 1450 vn_has_other_opens( 1451 vnode_t *vp, 1452 v_mode_t mode) 1453 { 1454 1455 switch (mode) { 1456 case V_WRITE: 1457 if (vp->v_wrcnt > 1) 1458 return (V_TRUE); 1459 break; 1460 case V_RDORWR: 1461 if ((vp->v_rdcnt > 1) || (vp->v_wrcnt > 1)) 1462 return (V_TRUE); 1463 break; 1464 case V_RDANDWR: 1465 if ((vp->v_rdcnt > 1) && (vp->v_wrcnt > 1)) 1466 return (V_TRUE); 1467 break; 1468 case V_READ: 1469 if (vp->v_rdcnt > 1) 1470 return (V_TRUE); 1471 break; 1472 } 1473 1474 return (V_FALSE); 1475 } 1476 1477 /* 1478 * vn_is_opened() checks whether a particular file is opened and 1479 * whether the open is for read and/or write. 1480 * 1481 * Vnode counts are only kept on regular files (v_type=VREG). 1482 */ 1483 int 1484 vn_is_opened( 1485 vnode_t *vp, 1486 v_mode_t mode) 1487 { 1488 1489 ASSERT(vp != NULL); 1490 1491 switch (mode) { 1492 case V_WRITE: 1493 if (vp->v_wrcnt) 1494 return (V_TRUE); 1495 break; 1496 case V_RDANDWR: 1497 if (vp->v_rdcnt && vp->v_wrcnt) 1498 return (V_TRUE); 1499 break; 1500 case V_RDORWR: 1501 if (vp->v_rdcnt || vp->v_wrcnt) 1502 return (V_TRUE); 1503 break; 1504 case V_READ: 1505 if (vp->v_rdcnt) 1506 return (V_TRUE); 1507 break; 1508 } 1509 1510 return (V_FALSE); 1511 } 1512 1513 /* 1514 * vn_is_mapped() checks whether a particular file is mapped and whether 1515 * the file is mapped read and/or write. 1516 */ 1517 /* ARGSUSED */ 1518 int 1519 vn_is_mapped( 1520 vnode_t *vp, 1521 v_mode_t mode) 1522 { 1523 return (V_FALSE); 1524 } 1525