xref: /illumos-gate/usr/src/uts/common/syscall/rw.c (revision 81c3d085)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
513506d1eSmaybee  * Common Development and Distribution License (the "License").
613506d1eSmaybee  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
214d86dd30Sraf 
227c478bd9Sstevel@tonic-gate /*
234d86dd30Sraf  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
247c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
25*81c3d085SJerry Jelinek  * Copyright 2020, Joyent, Inc.
267c478bd9Sstevel@tonic-gate  */
277c478bd9Sstevel@tonic-gate 
287c478bd9Sstevel@tonic-gate /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
297c478bd9Sstevel@tonic-gate /*	  All Rights Reserved	*/
307c478bd9Sstevel@tonic-gate 
317c478bd9Sstevel@tonic-gate /*
327c478bd9Sstevel@tonic-gate  * Portions of this source code were derived from Berkeley 4.3 BSD
337c478bd9Sstevel@tonic-gate  * under license from the Regents of the University of California.
347c478bd9Sstevel@tonic-gate  */
357c478bd9Sstevel@tonic-gate 
367c478bd9Sstevel@tonic-gate #include <sys/param.h>
377c478bd9Sstevel@tonic-gate #include <sys/isa_defs.h>
387c478bd9Sstevel@tonic-gate #include <sys/types.h>
397c478bd9Sstevel@tonic-gate #include <sys/inttypes.h>
407c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>
417c478bd9Sstevel@tonic-gate #include <sys/cred.h>
427c478bd9Sstevel@tonic-gate #include <sys/user.h>
437c478bd9Sstevel@tonic-gate #include <sys/systm.h>
447c478bd9Sstevel@tonic-gate #include <sys/errno.h>
457c478bd9Sstevel@tonic-gate #include <sys/vnode.h>
467c478bd9Sstevel@tonic-gate #include <sys/file.h>
477c478bd9Sstevel@tonic-gate #include <sys/proc.h>
487c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h>
497c478bd9Sstevel@tonic-gate #include <sys/uio.h>
507c478bd9Sstevel@tonic-gate #include <sys/debug.h>
517c478bd9Sstevel@tonic-gate #include <sys/rctl.h>
527c478bd9Sstevel@tonic-gate #include <sys/nbmlock.h>
538e935259SBryan Cantrill #include <sys/limits.h>
547c478bd9Sstevel@tonic-gate 
5513506d1eSmaybee #define	COPYOUT_MAX_CACHE	(1<<17)		/* 128K */
567c478bd9Sstevel@tonic-gate 
5713506d1eSmaybee size_t copyout_max_cached = COPYOUT_MAX_CACHE;	/* global so it's patchable */
587c478bd9Sstevel@tonic-gate 
597c478bd9Sstevel@tonic-gate /*
607c478bd9Sstevel@tonic-gate  * read, write, pread, pwrite, readv, and writev syscalls.
617c478bd9Sstevel@tonic-gate  *
627c478bd9Sstevel@tonic-gate  * 64-bit open:	all open's are large file opens.
637c478bd9Sstevel@tonic-gate  * Large Files: the behaviour of read depends on whether the fd
647c478bd9Sstevel@tonic-gate  *		corresponds to large open or not.
657c478bd9Sstevel@tonic-gate  * 32-bit open:	FOFFMAX flag not set.
667c478bd9Sstevel@tonic-gate  *		read until MAXOFF32_T - 1 and read at MAXOFF32_T returns
677c478bd9Sstevel@tonic-gate  *		EOVERFLOW if count is non-zero and if size of file
687c478bd9Sstevel@tonic-gate  *		is > MAXOFF32_T. If size of file is <= MAXOFF32_T read
697c478bd9Sstevel@tonic-gate  *		at >= MAXOFF32_T returns EOF.
707c478bd9Sstevel@tonic-gate  */
717c478bd9Sstevel@tonic-gate 
727c478bd9Sstevel@tonic-gate /*
737c478bd9Sstevel@tonic-gate  * Native system call
747c478bd9Sstevel@tonic-gate  */
757c478bd9Sstevel@tonic-gate ssize_t
read(int fdes,void * cbuf,size_t count)767c478bd9Sstevel@tonic-gate read(int fdes, void *cbuf, size_t count)
777c478bd9Sstevel@tonic-gate {
787c478bd9Sstevel@tonic-gate 	struct uio auio;
797c478bd9Sstevel@tonic-gate 	struct iovec aiov;
807c478bd9Sstevel@tonic-gate 	file_t *fp;
817c478bd9Sstevel@tonic-gate 	register vnode_t *vp;
827c478bd9Sstevel@tonic-gate 	struct cpu *cp;
837c478bd9Sstevel@tonic-gate 	int fflag, ioflag, rwflag;
847c478bd9Sstevel@tonic-gate 	ssize_t cnt, bcount;
857c478bd9Sstevel@tonic-gate 	int error = 0;
867c478bd9Sstevel@tonic-gate 	u_offset_t fileoff;
877c478bd9Sstevel@tonic-gate 	int in_crit = 0;
887c478bd9Sstevel@tonic-gate 
897c478bd9Sstevel@tonic-gate 	if ((cnt = (ssize_t)count) < 0)
907c478bd9Sstevel@tonic-gate 		return (set_errno(EINVAL));
917c478bd9Sstevel@tonic-gate 	if ((fp = getf(fdes)) == NULL)
927c478bd9Sstevel@tonic-gate 		return (set_errno(EBADF));
937c478bd9Sstevel@tonic-gate 	if (((fflag = fp->f_flag) & FREAD) == 0) {
947c478bd9Sstevel@tonic-gate 		error = EBADF;
957c478bd9Sstevel@tonic-gate 		goto out;
967c478bd9Sstevel@tonic-gate 	}
977c478bd9Sstevel@tonic-gate 	vp = fp->f_vnode;
987c478bd9Sstevel@tonic-gate 
997c478bd9Sstevel@tonic-gate 	if (vp->v_type == VREG && cnt == 0) {
1007c478bd9Sstevel@tonic-gate 		goto out;
1017c478bd9Sstevel@tonic-gate 	}
1027c478bd9Sstevel@tonic-gate 
1037c478bd9Sstevel@tonic-gate 	rwflag = 0;
1047c478bd9Sstevel@tonic-gate 	aiov.iov_base = cbuf;
1057c478bd9Sstevel@tonic-gate 	aiov.iov_len = cnt;
1067c478bd9Sstevel@tonic-gate 
1077c478bd9Sstevel@tonic-gate 	/*
1087c478bd9Sstevel@tonic-gate 	 * We have to enter the critical region before calling VOP_RWLOCK
1097c478bd9Sstevel@tonic-gate 	 * to avoid a deadlock with write() calls.
1107c478bd9Sstevel@tonic-gate 	 */
1117c478bd9Sstevel@tonic-gate 	if (nbl_need_check(vp)) {
1127c478bd9Sstevel@tonic-gate 		int svmand;
1137c478bd9Sstevel@tonic-gate 
1147c478bd9Sstevel@tonic-gate 		nbl_start_crit(vp, RW_READER);
1157c478bd9Sstevel@tonic-gate 		in_crit = 1;
1167c478bd9Sstevel@tonic-gate 		error = nbl_svmand(vp, fp->f_cred, &svmand);
1177c478bd9Sstevel@tonic-gate 		if (error != 0)
1187c478bd9Sstevel@tonic-gate 			goto out;
119da6c28aaSamw 		if (nbl_conflict(vp, NBL_READ, fp->f_offset, cnt, svmand,
120da6c28aaSamw 		    NULL)) {
1217c478bd9Sstevel@tonic-gate 			error = EACCES;
1227c478bd9Sstevel@tonic-gate 			goto out;
1237c478bd9Sstevel@tonic-gate 		}
1247c478bd9Sstevel@tonic-gate 	}
1257c478bd9Sstevel@tonic-gate 
1267c478bd9Sstevel@tonic-gate 	(void) VOP_RWLOCK(vp, rwflag, NULL);
1277c478bd9Sstevel@tonic-gate 
1287c478bd9Sstevel@tonic-gate 	/*
1297c478bd9Sstevel@tonic-gate 	 * We do the following checks inside VOP_RWLOCK so as to
1307c478bd9Sstevel@tonic-gate 	 * prevent file size from changing while these checks are
1317c478bd9Sstevel@tonic-gate 	 * being done. Also, we load fp's offset to the local
1327c478bd9Sstevel@tonic-gate 	 * variable fileoff because we can have a parallel lseek
1337c478bd9Sstevel@tonic-gate 	 * going on (f_offset is not protected by any lock) which
1347c478bd9Sstevel@tonic-gate 	 * could change f_offset. We need to see the value only
1357c478bd9Sstevel@tonic-gate 	 * once here and take a decision. Seeing it more than once
1367c478bd9Sstevel@tonic-gate 	 * can lead to incorrect functionality.
1377c478bd9Sstevel@tonic-gate 	 */
1387c478bd9Sstevel@tonic-gate 
1397c478bd9Sstevel@tonic-gate 	fileoff = (u_offset_t)fp->f_offset;
1407c478bd9Sstevel@tonic-gate 	if (fileoff >= OFFSET_MAX(fp) && (vp->v_type == VREG)) {
1417c478bd9Sstevel@tonic-gate 		struct vattr va;
1427c478bd9Sstevel@tonic-gate 		va.va_mask = AT_SIZE;
143da6c28aaSamw 		if ((error = VOP_GETATTR(vp, &va, 0, fp->f_cred, NULL)))  {
1447c478bd9Sstevel@tonic-gate 			VOP_RWUNLOCK(vp, rwflag, NULL);
1457c478bd9Sstevel@tonic-gate 			goto out;
1467c478bd9Sstevel@tonic-gate 		}
1477c478bd9Sstevel@tonic-gate 		if (fileoff >= va.va_size) {
1487c478bd9Sstevel@tonic-gate 			cnt = 0;
1497c478bd9Sstevel@tonic-gate 			VOP_RWUNLOCK(vp, rwflag, NULL);
1507c478bd9Sstevel@tonic-gate 			goto out;
1517c478bd9Sstevel@tonic-gate 		} else {
1527c478bd9Sstevel@tonic-gate 			error = EOVERFLOW;
1537c478bd9Sstevel@tonic-gate 			VOP_RWUNLOCK(vp, rwflag, NULL);
1547c478bd9Sstevel@tonic-gate 			goto out;
1557c478bd9Sstevel@tonic-gate 		}
1567c478bd9Sstevel@tonic-gate 	}
1577c478bd9Sstevel@tonic-gate 	if ((vp->v_type == VREG) &&
1587c478bd9Sstevel@tonic-gate 	    (fileoff + cnt > OFFSET_MAX(fp))) {
1597c478bd9Sstevel@tonic-gate 		cnt = (ssize_t)(OFFSET_MAX(fp) - fileoff);
1607c478bd9Sstevel@tonic-gate 	}
1617c478bd9Sstevel@tonic-gate 	auio.uio_loffset = fileoff;
1627c478bd9Sstevel@tonic-gate 	auio.uio_iov = &aiov;
1637c478bd9Sstevel@tonic-gate 	auio.uio_iovcnt = 1;
1647c478bd9Sstevel@tonic-gate 	auio.uio_resid = bcount = cnt;
1657c478bd9Sstevel@tonic-gate 	auio.uio_segflg = UIO_USERSPACE;
1667c478bd9Sstevel@tonic-gate 	auio.uio_llimit = MAXOFFSET_T;
1677c478bd9Sstevel@tonic-gate 	auio.uio_fmode = fflag;
1687c478bd9Sstevel@tonic-gate 	/*
1697c478bd9Sstevel@tonic-gate 	 * Only use bypass caches when the count is large enough
1707c478bd9Sstevel@tonic-gate 	 */
17113506d1eSmaybee 	if (bcount <= copyout_max_cached)
1727c478bd9Sstevel@tonic-gate 		auio.uio_extflg = UIO_COPY_CACHED;
1737c478bd9Sstevel@tonic-gate 	else
1747c478bd9Sstevel@tonic-gate 		auio.uio_extflg = UIO_COPY_DEFAULT;
1757c478bd9Sstevel@tonic-gate 
1767c478bd9Sstevel@tonic-gate 	ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
1777c478bd9Sstevel@tonic-gate 
1787c478bd9Sstevel@tonic-gate 	/* If read sync is not asked for, filter sync flags */
1797c478bd9Sstevel@tonic-gate 	if ((ioflag & FRSYNC) == 0)
1807c478bd9Sstevel@tonic-gate 		ioflag &= ~(FSYNC|FDSYNC);
1817c478bd9Sstevel@tonic-gate 	error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL);
1827c478bd9Sstevel@tonic-gate 	cnt -= auio.uio_resid;
1837c478bd9Sstevel@tonic-gate 	CPU_STATS_ENTER_K();
1847c478bd9Sstevel@tonic-gate 	cp = CPU;
1857c478bd9Sstevel@tonic-gate 	CPU_STATS_ADDQ(cp, sys, sysread, 1);
1867c478bd9Sstevel@tonic-gate 	CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)cnt);
1877c478bd9Sstevel@tonic-gate 	CPU_STATS_EXIT_K();
1887c478bd9Sstevel@tonic-gate 	ttolwp(curthread)->lwp_ru.ioch += (ulong_t)cnt;
1897c478bd9Sstevel@tonic-gate 
1907c478bd9Sstevel@tonic-gate 	if (vp->v_type == VFIFO)	/* Backward compatibility */
1917c478bd9Sstevel@tonic-gate 		fp->f_offset = cnt;
1927c478bd9Sstevel@tonic-gate 	else if (((fp->f_flag & FAPPEND) == 0) ||
1937c478bd9Sstevel@tonic-gate 	    (vp->v_type != VREG) || (bcount != 0))	/* POSIX */
1947c478bd9Sstevel@tonic-gate 		fp->f_offset = auio.uio_loffset;
1957c478bd9Sstevel@tonic-gate 	VOP_RWUNLOCK(vp, rwflag, NULL);
1967c478bd9Sstevel@tonic-gate 
1977c478bd9Sstevel@tonic-gate 	if (error == EINTR && cnt != 0)
1987c478bd9Sstevel@tonic-gate 		error = 0;
1997c478bd9Sstevel@tonic-gate out:
2007c478bd9Sstevel@tonic-gate 	if (in_crit)
2017c478bd9Sstevel@tonic-gate 		nbl_end_crit(vp);
2027c478bd9Sstevel@tonic-gate 	releasef(fdes);
2037c478bd9Sstevel@tonic-gate 	if (error)
2047c478bd9Sstevel@tonic-gate 		return (set_errno(error));
2057c478bd9Sstevel@tonic-gate 	return (cnt);
2067c478bd9Sstevel@tonic-gate }
2077c478bd9Sstevel@tonic-gate 
2087c478bd9Sstevel@tonic-gate /*
2097c478bd9Sstevel@tonic-gate  * Native system call
2107c478bd9Sstevel@tonic-gate  */
2117c478bd9Sstevel@tonic-gate ssize_t
write(int fdes,void * cbuf,size_t count)2127c478bd9Sstevel@tonic-gate write(int fdes, void *cbuf, size_t count)
2137c478bd9Sstevel@tonic-gate {
2147c478bd9Sstevel@tonic-gate 	struct uio auio;
2157c478bd9Sstevel@tonic-gate 	struct iovec aiov;
2167c478bd9Sstevel@tonic-gate 	file_t *fp;
2177c478bd9Sstevel@tonic-gate 	register vnode_t *vp;
2187c478bd9Sstevel@tonic-gate 	struct cpu *cp;
2197c478bd9Sstevel@tonic-gate 	int fflag, ioflag, rwflag;
2207c478bd9Sstevel@tonic-gate 	ssize_t cnt, bcount;
2217c478bd9Sstevel@tonic-gate 	int error = 0;
2227c478bd9Sstevel@tonic-gate 	u_offset_t fileoff;
2237c478bd9Sstevel@tonic-gate 	int in_crit = 0;
2247c478bd9Sstevel@tonic-gate 
2257c478bd9Sstevel@tonic-gate 	if ((cnt = (ssize_t)count) < 0)
2267c478bd9Sstevel@tonic-gate 		return (set_errno(EINVAL));
2277c478bd9Sstevel@tonic-gate 	if ((fp = getf(fdes)) == NULL)
2287c478bd9Sstevel@tonic-gate 		return (set_errno(EBADF));
2297c478bd9Sstevel@tonic-gate 	if (((fflag = fp->f_flag) & FWRITE) == 0) {
2307c478bd9Sstevel@tonic-gate 		error = EBADF;
2317c478bd9Sstevel@tonic-gate 		goto out;
2327c478bd9Sstevel@tonic-gate 	}
2337c478bd9Sstevel@tonic-gate 	vp = fp->f_vnode;
2347c478bd9Sstevel@tonic-gate 
2357c478bd9Sstevel@tonic-gate 	if (vp->v_type == VREG && cnt == 0) {
2367c478bd9Sstevel@tonic-gate 		goto out;
2377c478bd9Sstevel@tonic-gate 	}
2387c478bd9Sstevel@tonic-gate 
2397c478bd9Sstevel@tonic-gate 	rwflag = 1;
2407c478bd9Sstevel@tonic-gate 	aiov.iov_base = cbuf;
2417c478bd9Sstevel@tonic-gate 	aiov.iov_len = cnt;
2427c478bd9Sstevel@tonic-gate 
2437c478bd9Sstevel@tonic-gate 	/*
2447c478bd9Sstevel@tonic-gate 	 * We have to enter the critical region before calling VOP_RWLOCK
2457c478bd9Sstevel@tonic-gate 	 * to avoid a deadlock with ufs.
2467c478bd9Sstevel@tonic-gate 	 */
2477c478bd9Sstevel@tonic-gate 	if (nbl_need_check(vp)) {
2487c478bd9Sstevel@tonic-gate 		int svmand;
2497c478bd9Sstevel@tonic-gate 
2507c478bd9Sstevel@tonic-gate 		nbl_start_crit(vp, RW_READER);
2517c478bd9Sstevel@tonic-gate 		in_crit = 1;
2527c478bd9Sstevel@tonic-gate 		error = nbl_svmand(vp, fp->f_cred, &svmand);
2537c478bd9Sstevel@tonic-gate 		if (error != 0)
2547c478bd9Sstevel@tonic-gate 			goto out;
255da6c28aaSamw 		if (nbl_conflict(vp, NBL_WRITE, fp->f_offset, cnt, svmand,
256da6c28aaSamw 		    NULL)) {
2577c478bd9Sstevel@tonic-gate 			error = EACCES;
2587c478bd9Sstevel@tonic-gate 			goto out;
2597c478bd9Sstevel@tonic-gate 		}
2607c478bd9Sstevel@tonic-gate 	}
2617c478bd9Sstevel@tonic-gate 
2627c478bd9Sstevel@tonic-gate 	(void) VOP_RWLOCK(vp, rwflag, NULL);
2637c478bd9Sstevel@tonic-gate 
2647c478bd9Sstevel@tonic-gate 	fileoff = fp->f_offset;
2657c478bd9Sstevel@tonic-gate 	if (vp->v_type == VREG) {
2667c478bd9Sstevel@tonic-gate 
2677c478bd9Sstevel@tonic-gate 		/*
2687c478bd9Sstevel@tonic-gate 		 * We raise psignal if write for >0 bytes causes
2697c478bd9Sstevel@tonic-gate 		 * it to exceed the ulimit.
2707c478bd9Sstevel@tonic-gate 		 */
2717c478bd9Sstevel@tonic-gate 		if (fileoff >= curproc->p_fsz_ctl) {
2727c478bd9Sstevel@tonic-gate 			VOP_RWUNLOCK(vp, rwflag, NULL);
2737c478bd9Sstevel@tonic-gate 
2747c478bd9Sstevel@tonic-gate 			mutex_enter(&curproc->p_lock);
2757c478bd9Sstevel@tonic-gate 			(void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE],
2767c478bd9Sstevel@tonic-gate 			    curproc->p_rctls, curproc, RCA_UNSAFE_SIGINFO);
2777c478bd9Sstevel@tonic-gate 			mutex_exit(&curproc->p_lock);
2787c478bd9Sstevel@tonic-gate 
2797c478bd9Sstevel@tonic-gate 			error = EFBIG;
2807c478bd9Sstevel@tonic-gate 			goto out;
2817c478bd9Sstevel@tonic-gate 		}
2827c478bd9Sstevel@tonic-gate 		/*
2837c478bd9Sstevel@tonic-gate 		 * We return EFBIG if write is done at an offset
2847c478bd9Sstevel@tonic-gate 		 * greater than the offset maximum for this file structure.
2857c478bd9Sstevel@tonic-gate 		 */
2867c478bd9Sstevel@tonic-gate 
2877c478bd9Sstevel@tonic-gate 		if (fileoff >= OFFSET_MAX(fp)) {
2887c478bd9Sstevel@tonic-gate 			VOP_RWUNLOCK(vp, rwflag, NULL);
2897c478bd9Sstevel@tonic-gate 			error = EFBIG;
2907c478bd9Sstevel@tonic-gate 			goto out;
2917c478bd9Sstevel@tonic-gate 		}
2927c478bd9Sstevel@tonic-gate 		/*
2937c478bd9Sstevel@tonic-gate 		 * Limit the bytes to be written  upto offset maximum for
2947c478bd9Sstevel@tonic-gate 		 * this open file structure.
2957c478bd9Sstevel@tonic-gate 		 */
2967c478bd9Sstevel@tonic-gate 		if (fileoff + cnt > OFFSET_MAX(fp))
2977c478bd9Sstevel@tonic-gate 			cnt = (ssize_t)(OFFSET_MAX(fp) - fileoff);
2987c478bd9Sstevel@tonic-gate 	}
2997c478bd9Sstevel@tonic-gate 	auio.uio_loffset = fileoff;
3007c478bd9Sstevel@tonic-gate 	auio.uio_iov = &aiov;
3017c478bd9Sstevel@tonic-gate 	auio.uio_iovcnt = 1;
3027c478bd9Sstevel@tonic-gate 	auio.uio_resid = bcount = cnt;
3037c478bd9Sstevel@tonic-gate 	auio.uio_segflg = UIO_USERSPACE;
3047c478bd9Sstevel@tonic-gate 	auio.uio_llimit = curproc->p_fsz_ctl;
3057c478bd9Sstevel@tonic-gate 	auio.uio_fmode = fflag;
3067c478bd9Sstevel@tonic-gate 	auio.uio_extflg = UIO_COPY_DEFAULT;
3077c478bd9Sstevel@tonic-gate 
3087c478bd9Sstevel@tonic-gate 	ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
3097c478bd9Sstevel@tonic-gate 
3107c478bd9Sstevel@tonic-gate 	error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL);
3117c478bd9Sstevel@tonic-gate 	cnt -= auio.uio_resid;
3127c478bd9Sstevel@tonic-gate 	CPU_STATS_ENTER_K();
3137c478bd9Sstevel@tonic-gate 	cp = CPU;
3147c478bd9Sstevel@tonic-gate 	CPU_STATS_ADDQ(cp, sys, syswrite, 1);
3157c478bd9Sstevel@tonic-gate 	CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)cnt);
3167c478bd9Sstevel@tonic-gate 	CPU_STATS_EXIT_K();
3177c478bd9Sstevel@tonic-gate 	ttolwp(curthread)->lwp_ru.ioch += (ulong_t)cnt;
3187c478bd9Sstevel@tonic-gate 
3197c478bd9Sstevel@tonic-gate 	if (vp->v_type == VFIFO)	/* Backward compatibility */
3207c478bd9Sstevel@tonic-gate 		fp->f_offset = cnt;
3217c478bd9Sstevel@tonic-gate 	else if (((fp->f_flag & FAPPEND) == 0) ||
3227c478bd9Sstevel@tonic-gate 	    (vp->v_type != VREG) || (bcount != 0))	/* POSIX */
3237c478bd9Sstevel@tonic-gate 		fp->f_offset = auio.uio_loffset;
3247c478bd9Sstevel@tonic-gate 	VOP_RWUNLOCK(vp, rwflag, NULL);
3257c478bd9Sstevel@tonic-gate 
3267c478bd9Sstevel@tonic-gate 	if (error == EINTR && cnt != 0)
3277c478bd9Sstevel@tonic-gate 		error = 0;
3287c478bd9Sstevel@tonic-gate out:
3297c478bd9Sstevel@tonic-gate 	if (in_crit)
3307c478bd9Sstevel@tonic-gate 		nbl_end_crit(vp);
3317c478bd9Sstevel@tonic-gate 	releasef(fdes);
3327c478bd9Sstevel@tonic-gate 	if (error)
3337c478bd9Sstevel@tonic-gate 		return (set_errno(error));
3347c478bd9Sstevel@tonic-gate 	return (cnt);
3357c478bd9Sstevel@tonic-gate }
3367c478bd9Sstevel@tonic-gate 
3377c478bd9Sstevel@tonic-gate ssize_t
pread(int fdes,void * cbuf,size_t count,off_t offset)3387c478bd9Sstevel@tonic-gate pread(int fdes, void *cbuf, size_t count, off_t offset)
3397c478bd9Sstevel@tonic-gate {
3407c478bd9Sstevel@tonic-gate 	struct uio auio;
3417c478bd9Sstevel@tonic-gate 	struct iovec aiov;
3427c478bd9Sstevel@tonic-gate 	file_t *fp;
3437c478bd9Sstevel@tonic-gate 	register vnode_t *vp;
3447c478bd9Sstevel@tonic-gate 	struct cpu *cp;
3457c478bd9Sstevel@tonic-gate 	int fflag, ioflag, rwflag;
3467c478bd9Sstevel@tonic-gate 	ssize_t bcount;
3477c478bd9Sstevel@tonic-gate 	int error = 0;
3487c478bd9Sstevel@tonic-gate 	u_offset_t fileoff = (u_offset_t)(ulong_t)offset;
3497c478bd9Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL
3507c478bd9Sstevel@tonic-gate 	u_offset_t maxoff = get_udatamodel() == DATAMODEL_ILP32 ?
3517c478bd9Sstevel@tonic-gate 	    MAXOFF32_T : MAXOFFSET_T;
3527c478bd9Sstevel@tonic-gate #else
3537c478bd9Sstevel@tonic-gate 	const u_offset_t maxoff = MAXOFF32_T;
3547c478bd9Sstevel@tonic-gate #endif
3557c478bd9Sstevel@tonic-gate 	int in_crit = 0;
3567c478bd9Sstevel@tonic-gate 
3577c478bd9Sstevel@tonic-gate 	if ((bcount = (ssize_t)count) < 0)
3587c478bd9Sstevel@tonic-gate 		return (set_errno(EINVAL));
3597c478bd9Sstevel@tonic-gate 
3607c478bd9Sstevel@tonic-gate 	if ((fp = getf(fdes)) == NULL)
3617c478bd9Sstevel@tonic-gate 		return (set_errno(EBADF));
3627c478bd9Sstevel@tonic-gate 	if (((fflag = fp->f_flag) & (FREAD)) == 0) {
3637c478bd9Sstevel@tonic-gate 		error = EBADF;
3647c478bd9Sstevel@tonic-gate 		goto out;
3657c478bd9Sstevel@tonic-gate 	}
3667c478bd9Sstevel@tonic-gate 
3677c478bd9Sstevel@tonic-gate 	rwflag = 0;
3687c478bd9Sstevel@tonic-gate 	vp = fp->f_vnode;
3697c478bd9Sstevel@tonic-gate 
3707c478bd9Sstevel@tonic-gate 	if (vp->v_type == VREG) {
3717c478bd9Sstevel@tonic-gate 
3727c478bd9Sstevel@tonic-gate 		if (bcount == 0)
3737c478bd9Sstevel@tonic-gate 			goto out;
3747c478bd9Sstevel@tonic-gate 
3757c478bd9Sstevel@tonic-gate 		/*
3767c478bd9Sstevel@tonic-gate 		 * Return EINVAL if an invalid offset comes to pread.
3777c478bd9Sstevel@tonic-gate 		 * Negative offset from user will cause this error.
3787c478bd9Sstevel@tonic-gate 		 */
3797c478bd9Sstevel@tonic-gate 
3807c478bd9Sstevel@tonic-gate 		if (fileoff > maxoff) {
3817c478bd9Sstevel@tonic-gate 			error = EINVAL;
3827c478bd9Sstevel@tonic-gate 			goto out;
3837c478bd9Sstevel@tonic-gate 		}
3847c478bd9Sstevel@tonic-gate 		/*
3857c478bd9Sstevel@tonic-gate 		 * Limit offset such that we don't read or write
3867c478bd9Sstevel@tonic-gate 		 * a file beyond the maximum offset representable in
3877c478bd9Sstevel@tonic-gate 		 * an off_t structure.
3887c478bd9Sstevel@tonic-gate 		 */
3897c478bd9Sstevel@tonic-gate 		if (fileoff + bcount > maxoff)
3907c478bd9Sstevel@tonic-gate 			bcount = (ssize_t)((offset_t)maxoff - fileoff);
3917c478bd9Sstevel@tonic-gate 	} else if (vp->v_type == VFIFO) {
3927c478bd9Sstevel@tonic-gate 		error = ESPIPE;
3937c478bd9Sstevel@tonic-gate 		goto out;
3947c478bd9Sstevel@tonic-gate 	}
3957c478bd9Sstevel@tonic-gate 
3967c478bd9Sstevel@tonic-gate 	/*
3977c478bd9Sstevel@tonic-gate 	 * We have to enter the critical region before calling VOP_RWLOCK
3987c478bd9Sstevel@tonic-gate 	 * to avoid a deadlock with ufs.
3997c478bd9Sstevel@tonic-gate 	 */
4007c478bd9Sstevel@tonic-gate 	if (nbl_need_check(vp)) {
4017c478bd9Sstevel@tonic-gate 		int svmand;
4027c478bd9Sstevel@tonic-gate 
4037c478bd9Sstevel@tonic-gate 		nbl_start_crit(vp, RW_READER);
4047c478bd9Sstevel@tonic-gate 		in_crit = 1;
4057c478bd9Sstevel@tonic-gate 		error = nbl_svmand(vp, fp->f_cred, &svmand);
4067c478bd9Sstevel@tonic-gate 		if (error != 0)
4077c478bd9Sstevel@tonic-gate 			goto out;
408da6c28aaSamw 		if (nbl_conflict(vp, NBL_READ, fileoff, bcount, svmand,
409da6c28aaSamw 		    NULL)) {
4107c478bd9Sstevel@tonic-gate 			error = EACCES;
4117c478bd9Sstevel@tonic-gate 			goto out;
4127c478bd9Sstevel@tonic-gate 		}
4137c478bd9Sstevel@tonic-gate 	}
4147c478bd9Sstevel@tonic-gate 
4157c478bd9Sstevel@tonic-gate 	aiov.iov_base = cbuf;
4167c478bd9Sstevel@tonic-gate 	aiov.iov_len = bcount;
4177c478bd9Sstevel@tonic-gate 	(void) VOP_RWLOCK(vp, rwflag, NULL);
4187c478bd9Sstevel@tonic-gate 	if (vp->v_type == VREG && fileoff == (u_offset_t)maxoff) {
4197c478bd9Sstevel@tonic-gate 		struct vattr va;
4207c478bd9Sstevel@tonic-gate 		va.va_mask = AT_SIZE;
421da6c28aaSamw 		if ((error = VOP_GETATTR(vp, &va, 0, fp->f_cred, NULL))) {
4227c478bd9Sstevel@tonic-gate 			VOP_RWUNLOCK(vp, rwflag, NULL);
4237c478bd9Sstevel@tonic-gate 			goto out;
4247c478bd9Sstevel@tonic-gate 		}
4257c478bd9Sstevel@tonic-gate 		VOP_RWUNLOCK(vp, rwflag, NULL);
4267c478bd9Sstevel@tonic-gate 
4277c478bd9Sstevel@tonic-gate 		/*
4287c478bd9Sstevel@tonic-gate 		 * We have to return EOF if fileoff is >= file size.
4297c478bd9Sstevel@tonic-gate 		 */
4307c478bd9Sstevel@tonic-gate 		if (fileoff >= va.va_size) {
4317c478bd9Sstevel@tonic-gate 			bcount = 0;
4327c478bd9Sstevel@tonic-gate 			goto out;
4337c478bd9Sstevel@tonic-gate 		}
4347c478bd9Sstevel@tonic-gate 
4357c478bd9Sstevel@tonic-gate 		/*
4367c478bd9Sstevel@tonic-gate 		 * File is greater than or equal to maxoff and therefore
4377c478bd9Sstevel@tonic-gate 		 * we return EOVERFLOW.
4387c478bd9Sstevel@tonic-gate 		 */
4397c478bd9Sstevel@tonic-gate 		error = EOVERFLOW;
4407c478bd9Sstevel@tonic-gate 		goto out;
4417c478bd9Sstevel@tonic-gate 	}
4427c478bd9Sstevel@tonic-gate 	auio.uio_loffset = fileoff;
4437c478bd9Sstevel@tonic-gate 	auio.uio_iov = &aiov;
4447c478bd9Sstevel@tonic-gate 	auio.uio_iovcnt = 1;
4457c478bd9Sstevel@tonic-gate 	auio.uio_resid = bcount;
4467c478bd9Sstevel@tonic-gate 	auio.uio_segflg = UIO_USERSPACE;
4477c478bd9Sstevel@tonic-gate 	auio.uio_llimit = MAXOFFSET_T;
4487c478bd9Sstevel@tonic-gate 	auio.uio_fmode = fflag;
4497c478bd9Sstevel@tonic-gate 	auio.uio_extflg = UIO_COPY_CACHED;
4507c478bd9Sstevel@tonic-gate 
4517c478bd9Sstevel@tonic-gate 	ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
4527c478bd9Sstevel@tonic-gate 
4537c478bd9Sstevel@tonic-gate 	/* If read sync is not asked for, filter sync flags */
4547c478bd9Sstevel@tonic-gate 	if ((ioflag & FRSYNC) == 0)
4557c478bd9Sstevel@tonic-gate 		ioflag &= ~(FSYNC|FDSYNC);
4567c478bd9Sstevel@tonic-gate 	error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL);
4577c478bd9Sstevel@tonic-gate 	bcount -= auio.uio_resid;
4587c478bd9Sstevel@tonic-gate 	CPU_STATS_ENTER_K();
4597c478bd9Sstevel@tonic-gate 	cp = CPU;
4607c478bd9Sstevel@tonic-gate 	CPU_STATS_ADDQ(cp, sys, sysread, 1);
4617c478bd9Sstevel@tonic-gate 	CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)bcount);
4627c478bd9Sstevel@tonic-gate 	CPU_STATS_EXIT_K();
4637c478bd9Sstevel@tonic-gate 	ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount;
4647c478bd9Sstevel@tonic-gate 	VOP_RWUNLOCK(vp, rwflag, NULL);
4657c478bd9Sstevel@tonic-gate 
4667c478bd9Sstevel@tonic-gate 	if (error == EINTR && bcount != 0)
4677c478bd9Sstevel@tonic-gate 		error = 0;
4687c478bd9Sstevel@tonic-gate out:
4697c478bd9Sstevel@tonic-gate 	if (in_crit)
4707c478bd9Sstevel@tonic-gate 		nbl_end_crit(vp);
4717c478bd9Sstevel@tonic-gate 	releasef(fdes);
4727c478bd9Sstevel@tonic-gate 	if (error)
4737c478bd9Sstevel@tonic-gate 		return (set_errno(error));
4747c478bd9Sstevel@tonic-gate 	return (bcount);
4757c478bd9Sstevel@tonic-gate }
4767c478bd9Sstevel@tonic-gate 
4777c478bd9Sstevel@tonic-gate ssize_t
pwrite(int fdes,void * cbuf,size_t count,off_t offset)4787c478bd9Sstevel@tonic-gate pwrite(int fdes, void *cbuf, size_t count, off_t offset)
4797c478bd9Sstevel@tonic-gate {
4807c478bd9Sstevel@tonic-gate 	struct uio auio;
4817c478bd9Sstevel@tonic-gate 	struct iovec aiov;
4827c478bd9Sstevel@tonic-gate 	file_t *fp;
4837c478bd9Sstevel@tonic-gate 	register vnode_t *vp;
4847c478bd9Sstevel@tonic-gate 	struct cpu *cp;
4857c478bd9Sstevel@tonic-gate 	int fflag, ioflag, rwflag;
4867c478bd9Sstevel@tonic-gate 	ssize_t bcount;
4877c478bd9Sstevel@tonic-gate 	int error = 0;
4887c478bd9Sstevel@tonic-gate 	u_offset_t fileoff = (u_offset_t)(ulong_t)offset;
4897c478bd9Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL
4907c478bd9Sstevel@tonic-gate 	u_offset_t maxoff = get_udatamodel() == DATAMODEL_ILP32 ?
4917c478bd9Sstevel@tonic-gate 	    MAXOFF32_T : MAXOFFSET_T;
4927c478bd9Sstevel@tonic-gate #else
4937c478bd9Sstevel@tonic-gate 	const u_offset_t maxoff = MAXOFF32_T;
4947c478bd9Sstevel@tonic-gate #endif
4957c478bd9Sstevel@tonic-gate 	int in_crit = 0;
4967c478bd9Sstevel@tonic-gate 
4977c478bd9Sstevel@tonic-gate 	if ((bcount = (ssize_t)count) < 0)
4987c478bd9Sstevel@tonic-gate 		return (set_errno(EINVAL));
4997c478bd9Sstevel@tonic-gate 	if ((fp = getf(fdes)) == NULL)
5007c478bd9Sstevel@tonic-gate 		return (set_errno(EBADF));
5017c478bd9Sstevel@tonic-gate 	if (((fflag = fp->f_flag) & (FWRITE)) == 0) {
5027c478bd9Sstevel@tonic-gate 		error = EBADF;
5037c478bd9Sstevel@tonic-gate 		goto out;
5047c478bd9Sstevel@tonic-gate 	}
5057c478bd9Sstevel@tonic-gate 
5067c478bd9Sstevel@tonic-gate 	rwflag = 1;
5077c478bd9Sstevel@tonic-gate 	vp = fp->f_vnode;
5087c478bd9Sstevel@tonic-gate 
5097c478bd9Sstevel@tonic-gate 	if (vp->v_type == VREG) {
5107c478bd9Sstevel@tonic-gate 
5117c478bd9Sstevel@tonic-gate 		if (bcount == 0)
5127c478bd9Sstevel@tonic-gate 			goto out;
5137c478bd9Sstevel@tonic-gate 
5147c478bd9Sstevel@tonic-gate 		/*
5157c478bd9Sstevel@tonic-gate 		 * return EINVAL for offsets that cannot be
5167c478bd9Sstevel@tonic-gate 		 * represented in an off_t.
5177c478bd9Sstevel@tonic-gate 		 */
5187c478bd9Sstevel@tonic-gate 		if (fileoff > maxoff) {
5197c478bd9Sstevel@tonic-gate 			error = EINVAL;
5207c478bd9Sstevel@tonic-gate 			goto out;
5217c478bd9Sstevel@tonic-gate 		}
5227c478bd9Sstevel@tonic-gate 		/*
5237c478bd9Sstevel@tonic-gate 		 * Take appropriate action if we are trying to write above the
5247c478bd9Sstevel@tonic-gate 		 * resource limit.
5257c478bd9Sstevel@tonic-gate 		 */
5267c478bd9Sstevel@tonic-gate 		if (fileoff >= curproc->p_fsz_ctl) {
5277c478bd9Sstevel@tonic-gate 			mutex_enter(&curproc->p_lock);
5287c478bd9Sstevel@tonic-gate 			(void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE],
5297c478bd9Sstevel@tonic-gate 			    curproc->p_rctls, curproc, RCA_UNSAFE_SIGINFO);
5307c478bd9Sstevel@tonic-gate 			mutex_exit(&curproc->p_lock);
5317c478bd9Sstevel@tonic-gate 
5327c478bd9Sstevel@tonic-gate 			error = EFBIG;
5337c478bd9Sstevel@tonic-gate 			goto out;
5347c478bd9Sstevel@tonic-gate 		}
5357c478bd9Sstevel@tonic-gate 		/*
5367c478bd9Sstevel@tonic-gate 		 * Don't allow pwrite to cause file sizes to exceed
5377c478bd9Sstevel@tonic-gate 		 * maxoff.
5387c478bd9Sstevel@tonic-gate 		 */
5397c478bd9Sstevel@tonic-gate 		if (fileoff == maxoff) {
5407c478bd9Sstevel@tonic-gate 			error = EFBIG;
5417c478bd9Sstevel@tonic-gate 			goto out;
5427c478bd9Sstevel@tonic-gate 		}
5437c478bd9Sstevel@tonic-gate 		if (fileoff + count > maxoff)
5447c478bd9Sstevel@tonic-gate 			bcount = (ssize_t)((u_offset_t)maxoff - fileoff);
5457c478bd9Sstevel@tonic-gate 	} else if (vp->v_type == VFIFO) {
5467c478bd9Sstevel@tonic-gate 		error = ESPIPE;
5477c478bd9Sstevel@tonic-gate 		goto out;
5487c478bd9Sstevel@tonic-gate 	}
5497c478bd9Sstevel@tonic-gate 
5507c478bd9Sstevel@tonic-gate 	/*
5517c478bd9Sstevel@tonic-gate 	 * We have to enter the critical region before calling VOP_RWLOCK
5527c478bd9Sstevel@tonic-gate 	 * to avoid a deadlock with ufs.
5537c478bd9Sstevel@tonic-gate 	 */
5547c478bd9Sstevel@tonic-gate 	if (nbl_need_check(vp)) {
5557c478bd9Sstevel@tonic-gate 		int svmand;
5567c478bd9Sstevel@tonic-gate 
5577c478bd9Sstevel@tonic-gate 		nbl_start_crit(vp, RW_READER);
5587c478bd9Sstevel@tonic-gate 		in_crit = 1;
5597c478bd9Sstevel@tonic-gate 		error = nbl_svmand(vp, fp->f_cred, &svmand);
5607c478bd9Sstevel@tonic-gate 		if (error != 0)
5617c478bd9Sstevel@tonic-gate 			goto out;
562da6c28aaSamw 		if (nbl_conflict(vp, NBL_WRITE, fileoff, bcount, svmand,
563da6c28aaSamw 		    NULL)) {
5647c478bd9Sstevel@tonic-gate 			error = EACCES;
5657c478bd9Sstevel@tonic-gate 			goto out;
5667c478bd9Sstevel@tonic-gate 		}
5677c478bd9Sstevel@tonic-gate 	}
5687c478bd9Sstevel@tonic-gate 
5697c478bd9Sstevel@tonic-gate 	aiov.iov_base = cbuf;
5707c478bd9Sstevel@tonic-gate 	aiov.iov_len = bcount;
5717c478bd9Sstevel@tonic-gate 	(void) VOP_RWLOCK(vp, rwflag, NULL);
5727c478bd9Sstevel@tonic-gate 	auio.uio_loffset = fileoff;
5737c478bd9Sstevel@tonic-gate 	auio.uio_iov = &aiov;
5747c478bd9Sstevel@tonic-gate 	auio.uio_iovcnt = 1;
5757c478bd9Sstevel@tonic-gate 	auio.uio_resid = bcount;
5767c478bd9Sstevel@tonic-gate 	auio.uio_segflg = UIO_USERSPACE;
5777c478bd9Sstevel@tonic-gate 	auio.uio_llimit = curproc->p_fsz_ctl;
5787c478bd9Sstevel@tonic-gate 	auio.uio_fmode = fflag;
5797c478bd9Sstevel@tonic-gate 	auio.uio_extflg = UIO_COPY_CACHED;
5807c478bd9Sstevel@tonic-gate 
5814d86dd30Sraf 	/*
5824d86dd30Sraf 	 * The SUSv4 POSIX specification states:
5834d86dd30Sraf 	 *	The pwrite() function shall be equivalent to write(), except
5844d86dd30Sraf 	 *	that it writes into a given position and does not change
5854d86dd30Sraf 	 *	the file offset (regardless of whether O_APPEND is set).
5864d86dd30Sraf 	 * To make this be true, we omit the FAPPEND flag from ioflag.
5874d86dd30Sraf 	 */
5884d86dd30Sraf 	ioflag = auio.uio_fmode & (FSYNC|FDSYNC|FRSYNC);
5897c478bd9Sstevel@tonic-gate 
5907c478bd9Sstevel@tonic-gate 	error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL);
5917c478bd9Sstevel@tonic-gate 	bcount -= auio.uio_resid;
5927c478bd9Sstevel@tonic-gate 	CPU_STATS_ENTER_K();
5937c478bd9Sstevel@tonic-gate 	cp = CPU;
5947c478bd9Sstevel@tonic-gate 	CPU_STATS_ADDQ(cp, sys, syswrite, 1);
5957c478bd9Sstevel@tonic-gate 	CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)bcount);
5967c478bd9Sstevel@tonic-gate 	CPU_STATS_EXIT_K();
5977c478bd9Sstevel@tonic-gate 	ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount;
5987c478bd9Sstevel@tonic-gate 	VOP_RWUNLOCK(vp, rwflag, NULL);
5997c478bd9Sstevel@tonic-gate 
6007c478bd9Sstevel@tonic-gate 	if (error == EINTR && bcount != 0)
6017c478bd9Sstevel@tonic-gate 		error = 0;
6027c478bd9Sstevel@tonic-gate out:
6037c478bd9Sstevel@tonic-gate 	if (in_crit)
6047c478bd9Sstevel@tonic-gate 		nbl_end_crit(vp);
6057c478bd9Sstevel@tonic-gate 	releasef(fdes);
6067c478bd9Sstevel@tonic-gate 	if (error)
6077c478bd9Sstevel@tonic-gate 		return (set_errno(error));
6087c478bd9Sstevel@tonic-gate 	return (bcount);
6097c478bd9Sstevel@tonic-gate }
6107c478bd9Sstevel@tonic-gate 
6117c478bd9Sstevel@tonic-gate ssize_t
readv(int fdes,struct iovec * iovp,int iovcnt)6127c478bd9Sstevel@tonic-gate readv(int fdes, struct iovec *iovp, int iovcnt)
6137c478bd9Sstevel@tonic-gate {
6147c478bd9Sstevel@tonic-gate 	struct uio auio;
6158e935259SBryan Cantrill 	struct iovec buf[IOV_MAX_STACK], *aiov = buf;
6168e935259SBryan Cantrill 	int aiovlen = 0;
6177c478bd9Sstevel@tonic-gate 	file_t *fp;
6187c478bd9Sstevel@tonic-gate 	register vnode_t *vp;
6197c478bd9Sstevel@tonic-gate 	struct cpu *cp;
6207c478bd9Sstevel@tonic-gate 	int fflag, ioflag, rwflag;
6217c478bd9Sstevel@tonic-gate 	ssize_t count, bcount;
6227c478bd9Sstevel@tonic-gate 	int error = 0;
6237c478bd9Sstevel@tonic-gate 	int i;
6247c478bd9Sstevel@tonic-gate 	u_offset_t fileoff;
6257c478bd9Sstevel@tonic-gate 	int in_crit = 0;
6267c478bd9Sstevel@tonic-gate 
6278e935259SBryan Cantrill 	if (iovcnt <= 0 || iovcnt > IOV_MAX)
6287c478bd9Sstevel@tonic-gate 		return (set_errno(EINVAL));
6297c478bd9Sstevel@tonic-gate 
6308e935259SBryan Cantrill 	if (iovcnt > IOV_MAX_STACK) {
6318e935259SBryan Cantrill 		aiovlen = iovcnt * sizeof (iovec_t);
6328e935259SBryan Cantrill 		aiov = kmem_alloc(aiovlen, KM_SLEEP);
6338e935259SBryan Cantrill 	}
6348e935259SBryan Cantrill 
6357c478bd9Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL
6367c478bd9Sstevel@tonic-gate 	/*
6377c478bd9Sstevel@tonic-gate 	 * 32-bit callers need to have their iovec expanded,
6387c478bd9Sstevel@tonic-gate 	 * while ensuring that they can't move more than 2Gbytes
6397c478bd9Sstevel@tonic-gate 	 * of data in a single call.
6407c478bd9Sstevel@tonic-gate 	 */
6417c478bd9Sstevel@tonic-gate 	if (get_udatamodel() == DATAMODEL_ILP32) {
6428e935259SBryan Cantrill 		struct iovec32 buf32[IOV_MAX_STACK], *aiov32 = buf32;
6438e935259SBryan Cantrill 		int aiov32len;
6447c478bd9Sstevel@tonic-gate 		ssize32_t count32;
6457c478bd9Sstevel@tonic-gate 
6468e935259SBryan Cantrill 		aiov32len = iovcnt * sizeof (iovec32_t);
6478e935259SBryan Cantrill 		if (aiovlen != 0)
6488e935259SBryan Cantrill 			aiov32 = kmem_alloc(aiov32len, KM_SLEEP);
6498e935259SBryan Cantrill 
6508e935259SBryan Cantrill 		if (copyin(iovp, aiov32, aiov32len)) {
6518e935259SBryan Cantrill 			if (aiovlen != 0) {
6528e935259SBryan Cantrill 				kmem_free(aiov32, aiov32len);
6538e935259SBryan Cantrill 				kmem_free(aiov, aiovlen);
6548e935259SBryan Cantrill 			}
6557c478bd9Sstevel@tonic-gate 			return (set_errno(EFAULT));
6568e935259SBryan Cantrill 		}
6577c478bd9Sstevel@tonic-gate 
6587c478bd9Sstevel@tonic-gate 		count32 = 0;
6597c478bd9Sstevel@tonic-gate 		for (i = 0; i < iovcnt; i++) {
6607c478bd9Sstevel@tonic-gate 			ssize32_t iovlen32 = aiov32[i].iov_len;
6617c478bd9Sstevel@tonic-gate 			count32 += iovlen32;
6628e935259SBryan Cantrill 			if (iovlen32 < 0 || count32 < 0) {
6638e935259SBryan Cantrill 				if (aiovlen != 0) {
6648e935259SBryan Cantrill 					kmem_free(aiov32, aiov32len);
6658e935259SBryan Cantrill 					kmem_free(aiov, aiovlen);
6668e935259SBryan Cantrill 				}
6677c478bd9Sstevel@tonic-gate 				return (set_errno(EINVAL));
6688e935259SBryan Cantrill 			}
6697c478bd9Sstevel@tonic-gate 			aiov[i].iov_len = iovlen32;
6707c478bd9Sstevel@tonic-gate 			aiov[i].iov_base =
6717c478bd9Sstevel@tonic-gate 			    (caddr_t)(uintptr_t)aiov32[i].iov_base;
6727c478bd9Sstevel@tonic-gate 		}
6738e935259SBryan Cantrill 
6748e935259SBryan Cantrill 		if (aiovlen != 0)
6758e935259SBryan Cantrill 			kmem_free(aiov32, aiov32len);
6767c478bd9Sstevel@tonic-gate 	} else
6777c478bd9Sstevel@tonic-gate #endif
6788e935259SBryan Cantrill 	if (copyin(iovp, aiov, iovcnt * sizeof (iovec_t))) {
6798e935259SBryan Cantrill 		if (aiovlen != 0)
6808e935259SBryan Cantrill 			kmem_free(aiov, aiovlen);
6817c478bd9Sstevel@tonic-gate 		return (set_errno(EFAULT));
6828e935259SBryan Cantrill 	}
6837c478bd9Sstevel@tonic-gate 
6847c478bd9Sstevel@tonic-gate 	count = 0;
6857c478bd9Sstevel@tonic-gate 	for (i = 0; i < iovcnt; i++) {
6867c478bd9Sstevel@tonic-gate 		ssize_t iovlen = aiov[i].iov_len;
6877c478bd9Sstevel@tonic-gate 		count += iovlen;
6888e935259SBryan Cantrill 		if (iovlen < 0 || count < 0) {
6898e935259SBryan Cantrill 			if (aiovlen != 0)
6908e935259SBryan Cantrill 				kmem_free(aiov, aiovlen);
6917c478bd9Sstevel@tonic-gate 			return (set_errno(EINVAL));
6927c478bd9Sstevel@tonic-gate 		}
6938e935259SBryan Cantrill 	}
6948e935259SBryan Cantrill 	if ((fp = getf(fdes)) == NULL) {
6958e935259SBryan Cantrill 		if (aiovlen != 0)
6968e935259SBryan Cantrill 			kmem_free(aiov, aiovlen);
6977c478bd9Sstevel@tonic-gate 		return (set_errno(EBADF));
6988e935259SBryan Cantrill 	}
6997c478bd9Sstevel@tonic-gate 	if (((fflag = fp->f_flag) & FREAD) == 0) {
7007c478bd9Sstevel@tonic-gate 		error = EBADF;
7017c478bd9Sstevel@tonic-gate 		goto out;
7027c478bd9Sstevel@tonic-gate 	}
7037c478bd9Sstevel@tonic-gate 	vp = fp->f_vnode;
7047c478bd9Sstevel@tonic-gate 	if (vp->v_type == VREG && count == 0) {
7057c478bd9Sstevel@tonic-gate 		goto out;
7067c478bd9Sstevel@tonic-gate 	}
7077c478bd9Sstevel@tonic-gate 
7087c478bd9Sstevel@tonic-gate 	rwflag = 0;
7097c478bd9Sstevel@tonic-gate 
7107c478bd9Sstevel@tonic-gate 	/*
7117c478bd9Sstevel@tonic-gate 	 * We have to enter the critical region before calling VOP_RWLOCK
7127c478bd9Sstevel@tonic-gate 	 * to avoid a deadlock with ufs.
7137c478bd9Sstevel@tonic-gate 	 */
7147c478bd9Sstevel@tonic-gate 	if (nbl_need_check(vp)) {
7157c478bd9Sstevel@tonic-gate 		int svmand;
7167c478bd9Sstevel@tonic-gate 
7177c478bd9Sstevel@tonic-gate 		nbl_start_crit(vp, RW_READER);
7187c478bd9Sstevel@tonic-gate 		in_crit = 1;
7197c478bd9Sstevel@tonic-gate 		error = nbl_svmand(vp, fp->f_cred, &svmand);
7207c478bd9Sstevel@tonic-gate 		if (error != 0)
7217c478bd9Sstevel@tonic-gate 			goto out;
722da6c28aaSamw 		if (nbl_conflict(vp, NBL_READ, fp->f_offset, count, svmand,
723da6c28aaSamw 		    NULL)) {
7247c478bd9Sstevel@tonic-gate 			error = EACCES;
7257c478bd9Sstevel@tonic-gate 			goto out;
7267c478bd9Sstevel@tonic-gate 		}
7277c478bd9Sstevel@tonic-gate 	}
7287c478bd9Sstevel@tonic-gate 
7297c478bd9Sstevel@tonic-gate 	(void) VOP_RWLOCK(vp, rwflag, NULL);
7307c478bd9Sstevel@tonic-gate 	fileoff = fp->f_offset;
7317c478bd9Sstevel@tonic-gate 
7327c478bd9Sstevel@tonic-gate 	/*
7337c478bd9Sstevel@tonic-gate 	 * Behaviour is same as read. Please see comments in read.
7347c478bd9Sstevel@tonic-gate 	 */
7357c478bd9Sstevel@tonic-gate 
7367c478bd9Sstevel@tonic-gate 	if ((vp->v_type == VREG) && (fileoff >= OFFSET_MAX(fp))) {
7377c478bd9Sstevel@tonic-gate 		struct vattr va;
7387c478bd9Sstevel@tonic-gate 		va.va_mask = AT_SIZE;
739da6c28aaSamw 		if ((error = VOP_GETATTR(vp, &va, 0, fp->f_cred, NULL)))  {
7407c478bd9Sstevel@tonic-gate 			VOP_RWUNLOCK(vp, rwflag, NULL);
7417c478bd9Sstevel@tonic-gate 			goto out;
7427c478bd9Sstevel@tonic-gate 		}
7437c478bd9Sstevel@tonic-gate 		if (fileoff >= va.va_size) {
7447c478bd9Sstevel@tonic-gate 			VOP_RWUNLOCK(vp, rwflag, NULL);
7457c478bd9Sstevel@tonic-gate 			count = 0;
7467c478bd9Sstevel@tonic-gate 			goto out;
7477c478bd9Sstevel@tonic-gate 		} else {
7487c478bd9Sstevel@tonic-gate 			VOP_RWUNLOCK(vp, rwflag, NULL);
7497c478bd9Sstevel@tonic-gate 			error = EOVERFLOW;
7507c478bd9Sstevel@tonic-gate 			goto out;
7517c478bd9Sstevel@tonic-gate 		}
7527c478bd9Sstevel@tonic-gate 	}
7537c478bd9Sstevel@tonic-gate 	if ((vp->v_type == VREG) && (fileoff + count > OFFSET_MAX(fp))) {
7547c478bd9Sstevel@tonic-gate 		count = (ssize_t)(OFFSET_MAX(fp) - fileoff);
7557c478bd9Sstevel@tonic-gate 	}
7567c478bd9Sstevel@tonic-gate 	auio.uio_loffset = fileoff;
7577c478bd9Sstevel@tonic-gate 	auio.uio_iov = aiov;
7587c478bd9Sstevel@tonic-gate 	auio.uio_iovcnt = iovcnt;
7597c478bd9Sstevel@tonic-gate 	auio.uio_resid = bcount = count;
7607c478bd9Sstevel@tonic-gate 	auio.uio_segflg = UIO_USERSPACE;
7617c478bd9Sstevel@tonic-gate 	auio.uio_llimit = MAXOFFSET_T;
7627c478bd9Sstevel@tonic-gate 	auio.uio_fmode = fflag;
76313506d1eSmaybee 	if (bcount <= copyout_max_cached)
7647c478bd9Sstevel@tonic-gate 		auio.uio_extflg = UIO_COPY_CACHED;
7657c478bd9Sstevel@tonic-gate 	else
7667c478bd9Sstevel@tonic-gate 		auio.uio_extflg = UIO_COPY_DEFAULT;
7677c478bd9Sstevel@tonic-gate 
7687c478bd9Sstevel@tonic-gate 
7697c478bd9Sstevel@tonic-gate 	ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
7707c478bd9Sstevel@tonic-gate 
7717c478bd9Sstevel@tonic-gate 	/* If read sync is not asked for, filter sync flags */
7727c478bd9Sstevel@tonic-gate 	if ((ioflag & FRSYNC) == 0)
7737c478bd9Sstevel@tonic-gate 		ioflag &= ~(FSYNC|FDSYNC);
7747c478bd9Sstevel@tonic-gate 	error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL);
7757c478bd9Sstevel@tonic-gate 	count -= auio.uio_resid;
7767c478bd9Sstevel@tonic-gate 	CPU_STATS_ENTER_K();
7777c478bd9Sstevel@tonic-gate 	cp = CPU;
7787c478bd9Sstevel@tonic-gate 	CPU_STATS_ADDQ(cp, sys, sysread, 1);
7797c478bd9Sstevel@tonic-gate 	CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)count);
7807c478bd9Sstevel@tonic-gate 	CPU_STATS_EXIT_K();
7817c478bd9Sstevel@tonic-gate 	ttolwp(curthread)->lwp_ru.ioch += (ulong_t)count;
7827c478bd9Sstevel@tonic-gate 
7837c478bd9Sstevel@tonic-gate 	if (vp->v_type == VFIFO)	/* Backward compatibility */
7847c478bd9Sstevel@tonic-gate 		fp->f_offset = count;
7857c478bd9Sstevel@tonic-gate 	else if (((fp->f_flag & FAPPEND) == 0) ||
7867c478bd9Sstevel@tonic-gate 	    (vp->v_type != VREG) || (bcount != 0))	/* POSIX */
7877c478bd9Sstevel@tonic-gate 		fp->f_offset = auio.uio_loffset;
7887c478bd9Sstevel@tonic-gate 
7897c478bd9Sstevel@tonic-gate 	VOP_RWUNLOCK(vp, rwflag, NULL);
7907c478bd9Sstevel@tonic-gate 
7917c478bd9Sstevel@tonic-gate 	if (error == EINTR && count != 0)
7927c478bd9Sstevel@tonic-gate 		error = 0;
7937c478bd9Sstevel@tonic-gate out:
7947c478bd9Sstevel@tonic-gate 	if (in_crit)
7957c478bd9Sstevel@tonic-gate 		nbl_end_crit(vp);
7967c478bd9Sstevel@tonic-gate 	releasef(fdes);
7978e935259SBryan Cantrill 	if (aiovlen != 0)
7988e935259SBryan Cantrill 		kmem_free(aiov, aiovlen);
7997c478bd9Sstevel@tonic-gate 	if (error)
8007c478bd9Sstevel@tonic-gate 		return (set_errno(error));
8017c478bd9Sstevel@tonic-gate 	return (count);
8027c478bd9Sstevel@tonic-gate }
8037c478bd9Sstevel@tonic-gate 
8047c478bd9Sstevel@tonic-gate ssize_t
writev(int fdes,struct iovec * iovp,int iovcnt)8057c478bd9Sstevel@tonic-gate writev(int fdes, struct iovec *iovp, int iovcnt)
8067c478bd9Sstevel@tonic-gate {
8077c478bd9Sstevel@tonic-gate 	struct uio auio;
8088e935259SBryan Cantrill 	struct iovec buf[IOV_MAX_STACK], *aiov = buf;
8098e935259SBryan Cantrill 	int aiovlen = 0;
8107c478bd9Sstevel@tonic-gate 	file_t *fp;
8117c478bd9Sstevel@tonic-gate 	register vnode_t *vp;
8127c478bd9Sstevel@tonic-gate 	struct cpu *cp;
8137c478bd9Sstevel@tonic-gate 	int fflag, ioflag, rwflag;
8147c478bd9Sstevel@tonic-gate 	ssize_t count, bcount;
8157c478bd9Sstevel@tonic-gate 	int error = 0;
8167c478bd9Sstevel@tonic-gate 	int i;
8177c478bd9Sstevel@tonic-gate 	u_offset_t fileoff;
8187c478bd9Sstevel@tonic-gate 	int in_crit = 0;
8197c478bd9Sstevel@tonic-gate 
8208e935259SBryan Cantrill 	if (iovcnt <= 0 || iovcnt > IOV_MAX)
8217c478bd9Sstevel@tonic-gate 		return (set_errno(EINVAL));
8227c478bd9Sstevel@tonic-gate 
8238e935259SBryan Cantrill 	if (iovcnt > IOV_MAX_STACK) {
8248e935259SBryan Cantrill 		aiovlen = iovcnt * sizeof (iovec_t);
8258e935259SBryan Cantrill 		aiov = kmem_alloc(aiovlen, KM_SLEEP);
8268e935259SBryan Cantrill 	}
8278e935259SBryan Cantrill 
8287c478bd9Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL
8297c478bd9Sstevel@tonic-gate 	/*
8307c478bd9Sstevel@tonic-gate 	 * 32-bit callers need to have their iovec expanded,
8317c478bd9Sstevel@tonic-gate 	 * while ensuring that they can't move more than 2Gbytes
8327c478bd9Sstevel@tonic-gate 	 * of data in a single call.
8337c478bd9Sstevel@tonic-gate 	 */
8347c478bd9Sstevel@tonic-gate 	if (get_udatamodel() == DATAMODEL_ILP32) {
8358e935259SBryan Cantrill 		struct iovec32 buf32[IOV_MAX_STACK], *aiov32 = buf32;
8368e935259SBryan Cantrill 		int aiov32len;
8377c478bd9Sstevel@tonic-gate 		ssize32_t count32;
8387c478bd9Sstevel@tonic-gate 
8398e935259SBryan Cantrill 		aiov32len = iovcnt * sizeof (iovec32_t);
8408e935259SBryan Cantrill 		if (aiovlen != 0)
8418e935259SBryan Cantrill 			aiov32 = kmem_alloc(aiov32len, KM_SLEEP);
8428e935259SBryan Cantrill 
8438e935259SBryan Cantrill 		if (copyin(iovp, aiov32, aiov32len)) {
8448e935259SBryan Cantrill 			if (aiovlen != 0) {
8458e935259SBryan Cantrill 				kmem_free(aiov32, aiov32len);
8468e935259SBryan Cantrill 				kmem_free(aiov, aiovlen);
8478e935259SBryan Cantrill 			}
8487c478bd9Sstevel@tonic-gate 			return (set_errno(EFAULT));
8498e935259SBryan Cantrill 		}
8507c478bd9Sstevel@tonic-gate 
8517c478bd9Sstevel@tonic-gate 		count32 = 0;
8527c478bd9Sstevel@tonic-gate 		for (i = 0; i < iovcnt; i++) {
8537c478bd9Sstevel@tonic-gate 			ssize32_t iovlen = aiov32[i].iov_len;
8547c478bd9Sstevel@tonic-gate 			count32 += iovlen;
8558e935259SBryan Cantrill 			if (iovlen < 0 || count32 < 0) {
8568e935259SBryan Cantrill 				if (aiovlen != 0) {
8578e935259SBryan Cantrill 					kmem_free(aiov32, aiov32len);
8588e935259SBryan Cantrill 					kmem_free(aiov, aiovlen);
8598e935259SBryan Cantrill 				}
8607c478bd9Sstevel@tonic-gate 				return (set_errno(EINVAL));
8618e935259SBryan Cantrill 			}
8627c478bd9Sstevel@tonic-gate 			aiov[i].iov_len = iovlen;
8637c478bd9Sstevel@tonic-gate 			aiov[i].iov_base =
8647c478bd9Sstevel@tonic-gate 			    (caddr_t)(uintptr_t)aiov32[i].iov_base;
8657c478bd9Sstevel@tonic-gate 		}
8668e935259SBryan Cantrill 		if (aiovlen != 0)
8678e935259SBryan Cantrill 			kmem_free(aiov32, aiov32len);
8687c478bd9Sstevel@tonic-gate 	} else
8697c478bd9Sstevel@tonic-gate #endif
8708e935259SBryan Cantrill 	if (copyin(iovp, aiov, iovcnt * sizeof (iovec_t))) {
8718e935259SBryan Cantrill 		if (aiovlen != 0)
8728e935259SBryan Cantrill 			kmem_free(aiov, aiovlen);
8737c478bd9Sstevel@tonic-gate 		return (set_errno(EFAULT));
8748e935259SBryan Cantrill 	}
8757c478bd9Sstevel@tonic-gate 
8767c478bd9Sstevel@tonic-gate 	count = 0;
8777c478bd9Sstevel@tonic-gate 	for (i = 0; i < iovcnt; i++) {
8787c478bd9Sstevel@tonic-gate 		ssize_t iovlen = aiov[i].iov_len;
8797c478bd9Sstevel@tonic-gate 		count += iovlen;
8808e935259SBryan Cantrill 		if (iovlen < 0 || count < 0) {
8818e935259SBryan Cantrill 			if (aiovlen != 0)
8828e935259SBryan Cantrill 				kmem_free(aiov, aiovlen);
8837c478bd9Sstevel@tonic-gate 			return (set_errno(EINVAL));
8847c478bd9Sstevel@tonic-gate 		}
8858e935259SBryan Cantrill 	}
8868e935259SBryan Cantrill 	if ((fp = getf(fdes)) == NULL) {
8878e935259SBryan Cantrill 		if (aiovlen != 0)
8888e935259SBryan Cantrill 			kmem_free(aiov, aiovlen);
8897c478bd9Sstevel@tonic-gate 		return (set_errno(EBADF));
8908e935259SBryan Cantrill 	}
8917c478bd9Sstevel@tonic-gate 	if (((fflag = fp->f_flag) & FWRITE) == 0) {
8927c478bd9Sstevel@tonic-gate 		error = EBADF;
8937c478bd9Sstevel@tonic-gate 		goto out;
8947c478bd9Sstevel@tonic-gate 	}
8957c478bd9Sstevel@tonic-gate 	vp = fp->f_vnode;
8967c478bd9Sstevel@tonic-gate 	if (vp->v_type == VREG && count == 0) {
8977c478bd9Sstevel@tonic-gate 		goto out;
8987c478bd9Sstevel@tonic-gate 	}
8997c478bd9Sstevel@tonic-gate 
9007c478bd9Sstevel@tonic-gate 	rwflag = 1;
9017c478bd9Sstevel@tonic-gate 
9027c478bd9Sstevel@tonic-gate 	/*
9037c478bd9Sstevel@tonic-gate 	 * We have to enter the critical region before calling VOP_RWLOCK
9047c478bd9Sstevel@tonic-gate 	 * to avoid a deadlock with ufs.
9057c478bd9Sstevel@tonic-gate 	 */
9067c478bd9Sstevel@tonic-gate 	if (nbl_need_check(vp)) {
9077c478bd9Sstevel@tonic-gate 		int svmand;
9087c478bd9Sstevel@tonic-gate 
9097c478bd9Sstevel@tonic-gate 		nbl_start_crit(vp, RW_READER);
9107c478bd9Sstevel@tonic-gate 		in_crit = 1;
9117c478bd9Sstevel@tonic-gate 		error = nbl_svmand(vp, fp->f_cred, &svmand);
9127c478bd9Sstevel@tonic-gate 		if (error != 0)
9137c478bd9Sstevel@tonic-gate 			goto out;
914da6c28aaSamw 		if (nbl_conflict(vp, NBL_WRITE, fp->f_offset, count, svmand,
915da6c28aaSamw 		    NULL)) {
9167c478bd9Sstevel@tonic-gate 			error = EACCES;
9177c478bd9Sstevel@tonic-gate 			goto out;
9187c478bd9Sstevel@tonic-gate 		}
9197c478bd9Sstevel@tonic-gate 	}
9207c478bd9Sstevel@tonic-gate 
9217c478bd9Sstevel@tonic-gate 	(void) VOP_RWLOCK(vp, rwflag, NULL);
9227c478bd9Sstevel@tonic-gate 
9237c478bd9Sstevel@tonic-gate 	fileoff = fp->f_offset;
9247c478bd9Sstevel@tonic-gate 
9257c478bd9Sstevel@tonic-gate 	/*
9267c478bd9Sstevel@tonic-gate 	 * Behaviour is same as write. Please see comments for write.
9277c478bd9Sstevel@tonic-gate 	 */
9287c478bd9Sstevel@tonic-gate 
9297c478bd9Sstevel@tonic-gate 	if (vp->v_type == VREG) {
9307c478bd9Sstevel@tonic-gate 		if (fileoff >= curproc->p_fsz_ctl) {
9317c478bd9Sstevel@tonic-gate 			VOP_RWUNLOCK(vp, rwflag, NULL);
9327c478bd9Sstevel@tonic-gate 			mutex_enter(&curproc->p_lock);
9337c478bd9Sstevel@tonic-gate 			(void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE],
9347c478bd9Sstevel@tonic-gate 			    curproc->p_rctls, curproc, RCA_UNSAFE_SIGINFO);
9357c478bd9Sstevel@tonic-gate 			mutex_exit(&curproc->p_lock);
9367c478bd9Sstevel@tonic-gate 			error = EFBIG;
9377c478bd9Sstevel@tonic-gate 			goto out;
9387c478bd9Sstevel@tonic-gate 		}
9397c478bd9Sstevel@tonic-gate 		if (fileoff >= OFFSET_MAX(fp)) {
9407c478bd9Sstevel@tonic-gate 			VOP_RWUNLOCK(vp, rwflag, NULL);
9417c478bd9Sstevel@tonic-gate 			error = EFBIG;
9427c478bd9Sstevel@tonic-gate 			goto out;
9437c478bd9Sstevel@tonic-gate 		}
9447c478bd9Sstevel@tonic-gate 		if (fileoff + count > OFFSET_MAX(fp))
9457c478bd9Sstevel@tonic-gate 			count = (ssize_t)(OFFSET_MAX(fp) - fileoff);
9467c478bd9Sstevel@tonic-gate 	}
9477c478bd9Sstevel@tonic-gate 	auio.uio_loffset = fileoff;
9487c478bd9Sstevel@tonic-gate 	auio.uio_iov = aiov;
9497c478bd9Sstevel@tonic-gate 	auio.uio_iovcnt = iovcnt;
9507c478bd9Sstevel@tonic-gate 	auio.uio_resid = bcount = count;
9517c478bd9Sstevel@tonic-gate 	auio.uio_segflg = UIO_USERSPACE;
9527c478bd9Sstevel@tonic-gate 	auio.uio_llimit = curproc->p_fsz_ctl;
9537c478bd9Sstevel@tonic-gate 	auio.uio_fmode = fflag;
9547c478bd9Sstevel@tonic-gate 	auio.uio_extflg = UIO_COPY_DEFAULT;
9557c478bd9Sstevel@tonic-gate 
9567c478bd9Sstevel@tonic-gate 	ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
9577c478bd9Sstevel@tonic-gate 
9587c478bd9Sstevel@tonic-gate 	error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL);
9597c478bd9Sstevel@tonic-gate 	count -= auio.uio_resid;
9607c478bd9Sstevel@tonic-gate 	CPU_STATS_ENTER_K();
9617c478bd9Sstevel@tonic-gate 	cp = CPU;
9627c478bd9Sstevel@tonic-gate 	CPU_STATS_ADDQ(cp, sys, syswrite, 1);
9637c478bd9Sstevel@tonic-gate 	CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)count);
9647c478bd9Sstevel@tonic-gate 	CPU_STATS_EXIT_K();
9657c478bd9Sstevel@tonic-gate 	ttolwp(curthread)->lwp_ru.ioch += (ulong_t)count;
9667c478bd9Sstevel@tonic-gate 
9677c478bd9Sstevel@tonic-gate 	if (vp->v_type == VFIFO)	/* Backward compatibility */
9687c478bd9Sstevel@tonic-gate 		fp->f_offset = count;
9697c478bd9Sstevel@tonic-gate 	else if (((fp->f_flag & FAPPEND) == 0) ||
9707c478bd9Sstevel@tonic-gate 	    (vp->v_type != VREG) || (bcount != 0))	/* POSIX */
9717c478bd9Sstevel@tonic-gate 		fp->f_offset = auio.uio_loffset;
9727c478bd9Sstevel@tonic-gate 	VOP_RWUNLOCK(vp, rwflag, NULL);
9737c478bd9Sstevel@tonic-gate 
9747c478bd9Sstevel@tonic-gate 	if (error == EINTR && count != 0)
9757c478bd9Sstevel@tonic-gate 		error = 0;
9767c478bd9Sstevel@tonic-gate out:
9777c478bd9Sstevel@tonic-gate 	if (in_crit)
9787c478bd9Sstevel@tonic-gate 		nbl_end_crit(vp);
9797c478bd9Sstevel@tonic-gate 	releasef(fdes);
9808e935259SBryan Cantrill 	if (aiovlen != 0)
9818e935259SBryan Cantrill 		kmem_free(aiov, aiovlen);
9827c478bd9Sstevel@tonic-gate 	if (error)
9837c478bd9Sstevel@tonic-gate 		return (set_errno(error));
9847c478bd9Sstevel@tonic-gate 	return (count);
9857c478bd9Sstevel@tonic-gate }
9867c478bd9Sstevel@tonic-gate 
987fca543caSDJ Hoffman ssize_t
preadv(int fdes,struct iovec * iovp,int iovcnt,off_t offset,off_t extended_offset)988fca543caSDJ Hoffman preadv(int fdes, struct iovec *iovp, int iovcnt, off_t offset,
989fca543caSDJ Hoffman     off_t extended_offset)
990fca543caSDJ Hoffman {
991fca543caSDJ Hoffman 	struct uio auio;
9928e935259SBryan Cantrill 	struct iovec buf[IOV_MAX_STACK], *aiov = buf;
9938e935259SBryan Cantrill 	int aiovlen = 0;
994fca543caSDJ Hoffman 	file_t *fp;
995fca543caSDJ Hoffman 	register vnode_t *vp;
996fca543caSDJ Hoffman 	struct cpu *cp;
997fca543caSDJ Hoffman 	int fflag, ioflag, rwflag;
998fca543caSDJ Hoffman 	ssize_t count, bcount;
999fca543caSDJ Hoffman 	int error = 0;
1000fca543caSDJ Hoffman 	int i;
1001fca543caSDJ Hoffman 
1002*81c3d085SJerry Jelinek 	/*
1003*81c3d085SJerry Jelinek 	 * In a 64-bit kernel, this interface supports native 64-bit
1004*81c3d085SJerry Jelinek 	 * applications as well as 32-bit applications using both standard and
1005*81c3d085SJerry Jelinek 	 * large-file access. For 32-bit large-file aware applications, the
1006*81c3d085SJerry Jelinek 	 * offset is passed as two parameters which are joined into the actual
1007*81c3d085SJerry Jelinek 	 * offset used. The 64-bit libc always passes 0 for the extended_offset.
1008*81c3d085SJerry Jelinek 	 * Note that off_t is a signed value, but the preadv/pwritev API treats
1009*81c3d085SJerry Jelinek 	 * the offset as a position in the file for the operation, so passing
1010*81c3d085SJerry Jelinek 	 * a negative value will likely fail the maximum offset checks below
1011*81c3d085SJerry Jelinek 	 * because we convert it to an unsigned value which will be larger than
1012*81c3d085SJerry Jelinek 	 * the maximum valid offset.
1013*81c3d085SJerry Jelinek 	 */
1014fca543caSDJ Hoffman #if defined(_SYSCALL32_IMPL) || defined(_ILP32)
1015fca543caSDJ Hoffman 	u_offset_t fileoff = ((u_offset_t)extended_offset << 32) |
1016fca543caSDJ Hoffman 	    (u_offset_t)offset;
1017fca543caSDJ Hoffman #else /* _SYSCALL32_IMPL || _ILP32 */
1018fca543caSDJ Hoffman 	u_offset_t fileoff = (u_offset_t)(ulong_t)offset;
1019fca543caSDJ Hoffman #endif /* _SYSCALL32_IMPR || _ILP32 */
1020fca543caSDJ Hoffman 
1021fca543caSDJ Hoffman 	int in_crit = 0;
1022fca543caSDJ Hoffman 
10238e935259SBryan Cantrill 	if (iovcnt <= 0 || iovcnt > IOV_MAX)
1024fca543caSDJ Hoffman 		return (set_errno(EINVAL));
1025fca543caSDJ Hoffman 
10268e935259SBryan Cantrill 	if (iovcnt > IOV_MAX_STACK) {
10278e935259SBryan Cantrill 		aiovlen = iovcnt * sizeof (iovec_t);
10288e935259SBryan Cantrill 		aiov = kmem_alloc(aiovlen, KM_SLEEP);
10298e935259SBryan Cantrill 	}
10308e935259SBryan Cantrill 
1031fca543caSDJ Hoffman #ifdef _SYSCALL32_IMPL
1032fca543caSDJ Hoffman 	/*
1033fca543caSDJ Hoffman 	 * 32-bit callers need to have their iovec expanded,
1034fca543caSDJ Hoffman 	 * while ensuring that they can't move more than 2Gbytes
1035fca543caSDJ Hoffman 	 * of data in a single call.
1036fca543caSDJ Hoffman 	 */
1037fca543caSDJ Hoffman 	if (get_udatamodel() == DATAMODEL_ILP32) {
10388e935259SBryan Cantrill 		struct iovec32 buf32[IOV_MAX_STACK], *aiov32 = buf32;
10398e935259SBryan Cantrill 		int aiov32len;
1040fca543caSDJ Hoffman 		ssize32_t count32;
1041fca543caSDJ Hoffman 
10428e935259SBryan Cantrill 		aiov32len = iovcnt * sizeof (iovec32_t);
10438e935259SBryan Cantrill 		if (aiovlen != 0)
10448e935259SBryan Cantrill 			aiov32 = kmem_alloc(aiov32len, KM_SLEEP);
10458e935259SBryan Cantrill 
10468e935259SBryan Cantrill 		if (copyin(iovp, aiov32, aiov32len)) {
10478e935259SBryan Cantrill 			if (aiovlen != 0) {
10488e935259SBryan Cantrill 				kmem_free(aiov32, aiov32len);
10498e935259SBryan Cantrill 				kmem_free(aiov, aiovlen);
10508e935259SBryan Cantrill 			}
1051fca543caSDJ Hoffman 			return (set_errno(EFAULT));
10528e935259SBryan Cantrill 		}
1053fca543caSDJ Hoffman 
1054fca543caSDJ Hoffman 		count32 = 0;
1055fca543caSDJ Hoffman 		for (i = 0; i < iovcnt; i++) {
1056fca543caSDJ Hoffman 			ssize32_t iovlen32 = aiov32[i].iov_len;
1057fca543caSDJ Hoffman 			count32 += iovlen32;
10588e935259SBryan Cantrill 			if (iovlen32 < 0 || count32 < 0) {
10598e935259SBryan Cantrill 				if (aiovlen != 0) {
10608e935259SBryan Cantrill 					kmem_free(aiov32, aiov32len);
10618e935259SBryan Cantrill 					kmem_free(aiov, aiovlen);
10628e935259SBryan Cantrill 				}
1063fca543caSDJ Hoffman 				return (set_errno(EINVAL));
10648e935259SBryan Cantrill 			}
1065fca543caSDJ Hoffman 			aiov[i].iov_len = iovlen32;
1066fca543caSDJ Hoffman 			aiov[i].iov_base =
1067fca543caSDJ Hoffman 			    (caddr_t)(uintptr_t)aiov32[i].iov_base;
1068fca543caSDJ Hoffman 		}
10698e935259SBryan Cantrill 		if (aiovlen != 0)
10708e935259SBryan Cantrill 			kmem_free(aiov32, aiov32len);
1071fca543caSDJ Hoffman 	} else
1072fca543caSDJ Hoffman #endif /* _SYSCALL32_IMPL */
10738e935259SBryan Cantrill 		if (copyin(iovp, aiov, iovcnt * sizeof (iovec_t))) {
10748e935259SBryan Cantrill 			if (aiovlen != 0)
10758e935259SBryan Cantrill 				kmem_free(aiov, aiovlen);
1076fca543caSDJ Hoffman 			return (set_errno(EFAULT));
10778e935259SBryan Cantrill 		}
1078fca543caSDJ Hoffman 
1079fca543caSDJ Hoffman 	count = 0;
1080fca543caSDJ Hoffman 	for (i = 0; i < iovcnt; i++) {
1081fca543caSDJ Hoffman 		ssize_t iovlen = aiov[i].iov_len;
1082fca543caSDJ Hoffman 		count += iovlen;
10838e935259SBryan Cantrill 		if (iovlen < 0 || count < 0) {
10848e935259SBryan Cantrill 			if (aiovlen != 0)
10858e935259SBryan Cantrill 				kmem_free(aiov, aiovlen);
1086fca543caSDJ Hoffman 			return (set_errno(EINVAL));
1087fca543caSDJ Hoffman 		}
10888e935259SBryan Cantrill 	}
1089fca543caSDJ Hoffman 
1090*81c3d085SJerry Jelinek 	if ((bcount = count) < 0) {
10918e935259SBryan Cantrill 		if (aiovlen != 0)
10928e935259SBryan Cantrill 			kmem_free(aiov, aiovlen);
1093fca543caSDJ Hoffman 		return (set_errno(EINVAL));
10948e935259SBryan Cantrill 	}
10958e935259SBryan Cantrill 	if ((fp = getf(fdes)) == NULL) {
10968e935259SBryan Cantrill 		if (aiovlen != 0)
10978e935259SBryan Cantrill 			kmem_free(aiov, aiovlen);
1098fca543caSDJ Hoffman 		return (set_errno(EBADF));
10998e935259SBryan Cantrill 	}
1100fca543caSDJ Hoffman 	if (((fflag = fp->f_flag) & FREAD) == 0) {
1101fca543caSDJ Hoffman 		error = EBADF;
1102fca543caSDJ Hoffman 		goto out;
1103fca543caSDJ Hoffman 	}
1104fca543caSDJ Hoffman 	vp = fp->f_vnode;
1105fca543caSDJ Hoffman 	rwflag = 0;
1106fca543caSDJ Hoffman 
1107*81c3d085SJerry Jelinek 	/*
1108*81c3d085SJerry Jelinek 	 * Behaviour is same as read(2). Please see comments in read above.
1109*81c3d085SJerry Jelinek 	 */
1110*81c3d085SJerry Jelinek 	if (vp->v_type == VREG) {
1111fca543caSDJ Hoffman 		if (bcount == 0)
1112fca543caSDJ Hoffman 			goto out;
1113fca543caSDJ Hoffman 
1114*81c3d085SJerry Jelinek 		/* Handle offset past maximum offset allowed for file. */
1115*81c3d085SJerry Jelinek 		if (fileoff >= OFFSET_MAX(fp)) {
1116*81c3d085SJerry Jelinek 			struct vattr va;
1117*81c3d085SJerry Jelinek 			va.va_mask = AT_SIZE;
1118*81c3d085SJerry Jelinek 
1119*81c3d085SJerry Jelinek 			error = VOP_GETATTR(vp, &va, 0, fp->f_cred, NULL);
1120*81c3d085SJerry Jelinek 			if (error == 0)  {
1121*81c3d085SJerry Jelinek 				if (fileoff >= va.va_size) {
1122*81c3d085SJerry Jelinek 					count = 0;
1123*81c3d085SJerry Jelinek 				} else {
1124*81c3d085SJerry Jelinek 					error = EOVERFLOW;
1125*81c3d085SJerry Jelinek 				}
1126*81c3d085SJerry Jelinek 			}
1127fca543caSDJ Hoffman 			goto out;
1128fca543caSDJ Hoffman 		}
1129fca543caSDJ Hoffman 
1130*81c3d085SJerry Jelinek 		ASSERT(bcount == count);
1131*81c3d085SJerry Jelinek 
1132*81c3d085SJerry Jelinek 		/* Note: modified count used in nbl_conflict() call below. */
1133*81c3d085SJerry Jelinek 		if ((fileoff + count) > OFFSET_MAX(fp))
1134*81c3d085SJerry Jelinek 			count = (ssize_t)(OFFSET_MAX(fp) - fileoff);
1135*81c3d085SJerry Jelinek 
1136fca543caSDJ Hoffman 	} else if (vp->v_type == VFIFO) {
1137fca543caSDJ Hoffman 		error = ESPIPE;
1138fca543caSDJ Hoffman 		goto out;
1139fca543caSDJ Hoffman 	}
1140fca543caSDJ Hoffman 	/*
1141fca543caSDJ Hoffman 	 * We have to enter the critical region before calling VOP_RWLOCK
1142fca543caSDJ Hoffman 	 * to avoid a deadlock with ufs.
1143fca543caSDJ Hoffman 	 */
1144fca543caSDJ Hoffman 	if (nbl_need_check(vp)) {
1145fca543caSDJ Hoffman 		int svmand;
1146fca543caSDJ Hoffman 
1147fca543caSDJ Hoffman 		nbl_start_crit(vp, RW_READER);
1148fca543caSDJ Hoffman 		in_crit = 1;
1149fca543caSDJ Hoffman 		error = nbl_svmand(vp, fp->f_cred, &svmand);
1150fca543caSDJ Hoffman 		if (error != 0)
1151fca543caSDJ Hoffman 			goto out;
1152*81c3d085SJerry Jelinek 		if (nbl_conflict(vp, NBL_WRITE, fileoff, count, svmand, NULL)) {
1153fca543caSDJ Hoffman 			error = EACCES;
1154fca543caSDJ Hoffman 			goto out;
1155fca543caSDJ Hoffman 		}
1156fca543caSDJ Hoffman 	}
1157fca543caSDJ Hoffman 
1158fca543caSDJ Hoffman 	(void) VOP_RWLOCK(vp, rwflag, NULL);
1159fca543caSDJ Hoffman 
1160fca543caSDJ Hoffman 	auio.uio_loffset = fileoff;
1161fca543caSDJ Hoffman 	auio.uio_iov = aiov;
1162fca543caSDJ Hoffman 	auio.uio_iovcnt = iovcnt;
1163fca543caSDJ Hoffman 	auio.uio_resid = bcount = count;
1164fca543caSDJ Hoffman 	auio.uio_segflg = UIO_USERSPACE;
1165fca543caSDJ Hoffman 	auio.uio_llimit = MAXOFFSET_T;
1166fca543caSDJ Hoffman 	auio.uio_fmode = fflag;
1167fca543caSDJ Hoffman 	if (bcount <= copyout_max_cached)
1168fca543caSDJ Hoffman 		auio.uio_extflg = UIO_COPY_CACHED;
1169fca543caSDJ Hoffman 	else
1170fca543caSDJ Hoffman 		auio.uio_extflg = UIO_COPY_DEFAULT;
1171fca543caSDJ Hoffman 
1172fca543caSDJ Hoffman 	ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
1173fca543caSDJ Hoffman 	error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL);
1174fca543caSDJ Hoffman 	count -= auio.uio_resid;
1175fca543caSDJ Hoffman 	CPU_STATS_ENTER_K();
1176fca543caSDJ Hoffman 	cp = CPU;
1177fca543caSDJ Hoffman 	CPU_STATS_ADDQ(cp, sys, sysread, 1);
1178fca543caSDJ Hoffman 	CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)count);
1179fca543caSDJ Hoffman 	CPU_STATS_EXIT_K();
1180fca543caSDJ Hoffman 	ttolwp(curthread)->lwp_ru.ioch += (ulong_t)count;
1181fca543caSDJ Hoffman 
1182fca543caSDJ Hoffman 	VOP_RWUNLOCK(vp, rwflag, NULL);
1183fca543caSDJ Hoffman 
1184fca543caSDJ Hoffman 	if (error == EINTR && count != 0)
1185fca543caSDJ Hoffman 		error = 0;
1186fca543caSDJ Hoffman out:
1187fca543caSDJ Hoffman 	if (in_crit)
1188fca543caSDJ Hoffman 		nbl_end_crit(vp);
1189fca543caSDJ Hoffman 	releasef(fdes);
11908e935259SBryan Cantrill 	if (aiovlen != 0)
11918e935259SBryan Cantrill 		kmem_free(aiov, aiovlen);
1192fca543caSDJ Hoffman 	if (error)
1193fca543caSDJ Hoffman 		return (set_errno(error));
1194fca543caSDJ Hoffman 	return (count);
1195fca543caSDJ Hoffman }
1196fca543caSDJ Hoffman 
1197fca543caSDJ Hoffman ssize_t
pwritev(int fdes,struct iovec * iovp,int iovcnt,off_t offset,off_t extended_offset)1198fca543caSDJ Hoffman pwritev(int fdes, struct iovec *iovp, int iovcnt, off_t offset,
1199fca543caSDJ Hoffman     off_t extended_offset)
1200fca543caSDJ Hoffman {
1201fca543caSDJ Hoffman 	struct uio auio;
12028e935259SBryan Cantrill 	struct iovec buf[IOV_MAX_STACK], *aiov = buf;
12038e935259SBryan Cantrill 	int aiovlen = 0;
1204fca543caSDJ Hoffman 	file_t *fp;
1205fca543caSDJ Hoffman 	register vnode_t *vp;
1206fca543caSDJ Hoffman 	struct cpu *cp;
1207fca543caSDJ Hoffman 	int fflag, ioflag, rwflag;
1208fca543caSDJ Hoffman 	ssize_t count, bcount;
1209fca543caSDJ Hoffman 	int error = 0;
1210fca543caSDJ Hoffman 	int i;
1211fca543caSDJ Hoffman 
1212*81c3d085SJerry Jelinek 	/*
1213*81c3d085SJerry Jelinek 	 * See the comment in preadv for how the offset is handled.
1214*81c3d085SJerry Jelinek 	 */
1215fca543caSDJ Hoffman #if defined(_SYSCALL32_IMPL) || defined(_ILP32)
1216fca543caSDJ Hoffman 	u_offset_t fileoff = ((u_offset_t)extended_offset << 32) |
1217fca543caSDJ Hoffman 	    (u_offset_t)offset;
1218fca543caSDJ Hoffman #else /* _SYSCALL32_IMPL || _ILP32 */
1219fca543caSDJ Hoffman 	u_offset_t fileoff = (u_offset_t)(ulong_t)offset;
1220fca543caSDJ Hoffman #endif /* _SYSCALL32_IMPR || _ILP32 */
1221fca543caSDJ Hoffman 
1222fca543caSDJ Hoffman 	int in_crit = 0;
1223fca543caSDJ Hoffman 
12248e935259SBryan Cantrill 	if (iovcnt <= 0 || iovcnt > IOV_MAX)
1225fca543caSDJ Hoffman 		return (set_errno(EINVAL));
1226fca543caSDJ Hoffman 
12278e935259SBryan Cantrill 	if (iovcnt > IOV_MAX_STACK) {
12288e935259SBryan Cantrill 		aiovlen = iovcnt * sizeof (iovec_t);
12298e935259SBryan Cantrill 		aiov = kmem_alloc(aiovlen, KM_SLEEP);
12308e935259SBryan Cantrill 	}
12318e935259SBryan Cantrill 
1232fca543caSDJ Hoffman #ifdef _SYSCALL32_IMPL
1233fca543caSDJ Hoffman 	/*
1234fca543caSDJ Hoffman 	 * 32-bit callers need to have their iovec expanded,
1235fca543caSDJ Hoffman 	 * while ensuring that they can't move more than 2Gbytes
1236fca543caSDJ Hoffman 	 * of data in a single call.
1237fca543caSDJ Hoffman 	 */
1238fca543caSDJ Hoffman 	if (get_udatamodel() == DATAMODEL_ILP32) {
12398e935259SBryan Cantrill 		struct iovec32 buf32[IOV_MAX_STACK], *aiov32 = buf32;
12408e935259SBryan Cantrill 		int aiov32len;
1241fca543caSDJ Hoffman 		ssize32_t count32;
1242fca543caSDJ Hoffman 
12438e935259SBryan Cantrill 		aiov32len = iovcnt * sizeof (iovec32_t);
12448e935259SBryan Cantrill 		if (aiovlen != 0)
12458e935259SBryan Cantrill 			aiov32 = kmem_alloc(aiov32len, KM_SLEEP);
12468e935259SBryan Cantrill 
12478e935259SBryan Cantrill 		if (copyin(iovp, aiov32, aiov32len)) {
12488e935259SBryan Cantrill 			if (aiovlen != 0) {
12498e935259SBryan Cantrill 				kmem_free(aiov32, aiov32len);
12508e935259SBryan Cantrill 				kmem_free(aiov, aiovlen);
12518e935259SBryan Cantrill 			}
1252fca543caSDJ Hoffman 			return (set_errno(EFAULT));
12538e935259SBryan Cantrill 		}
1254fca543caSDJ Hoffman 
1255fca543caSDJ Hoffman 		count32 = 0;
1256fca543caSDJ Hoffman 		for (i = 0; i < iovcnt; i++) {
1257fca543caSDJ Hoffman 			ssize32_t iovlen32 = aiov32[i].iov_len;
1258fca543caSDJ Hoffman 			count32 += iovlen32;
12598e935259SBryan Cantrill 			if (iovlen32 < 0 || count32 < 0) {
12608e935259SBryan Cantrill 				if (aiovlen != 0) {
12618e935259SBryan Cantrill 					kmem_free(aiov32, aiov32len);
12628e935259SBryan Cantrill 					kmem_free(aiov, aiovlen);
12638e935259SBryan Cantrill 				}
1264fca543caSDJ Hoffman 				return (set_errno(EINVAL));
12658e935259SBryan Cantrill 			}
1266fca543caSDJ Hoffman 			aiov[i].iov_len = iovlen32;
1267fca543caSDJ Hoffman 			aiov[i].iov_base =
1268fca543caSDJ Hoffman 			    (caddr_t)(uintptr_t)aiov32[i].iov_base;
1269fca543caSDJ Hoffman 		}
12708e935259SBryan Cantrill 		if (aiovlen != 0)
12718e935259SBryan Cantrill 			kmem_free(aiov32, aiov32len);
1272fca543caSDJ Hoffman 	} else
1273fca543caSDJ Hoffman #endif /* _SYSCALL32_IMPL */
12748e935259SBryan Cantrill 		if (copyin(iovp, aiov, iovcnt * sizeof (iovec_t))) {
12758e935259SBryan Cantrill 			if (aiovlen != 0)
12768e935259SBryan Cantrill 				kmem_free(aiov, aiovlen);
1277fca543caSDJ Hoffman 			return (set_errno(EFAULT));
12788e935259SBryan Cantrill 		}
1279fca543caSDJ Hoffman 
1280fca543caSDJ Hoffman 	count = 0;
1281fca543caSDJ Hoffman 	for (i = 0; i < iovcnt; i++) {
1282fca543caSDJ Hoffman 		ssize_t iovlen = aiov[i].iov_len;
1283fca543caSDJ Hoffman 		count += iovlen;
12848e935259SBryan Cantrill 		if (iovlen < 0 || count < 0) {
12858e935259SBryan Cantrill 			if (aiovlen != 0)
12868e935259SBryan Cantrill 				kmem_free(aiov, aiovlen);
1287fca543caSDJ Hoffman 			return (set_errno(EINVAL));
1288fca543caSDJ Hoffman 		}
12898e935259SBryan Cantrill 	}
1290fca543caSDJ Hoffman 
1291*81c3d085SJerry Jelinek 	if ((bcount = count) < 0) {
12928e935259SBryan Cantrill 		if (aiovlen != 0)
12938e935259SBryan Cantrill 			kmem_free(aiov, aiovlen);
1294fca543caSDJ Hoffman 		return (set_errno(EINVAL));
12958e935259SBryan Cantrill 	}
12968e935259SBryan Cantrill 	if ((fp = getf(fdes)) == NULL) {
12978e935259SBryan Cantrill 		if (aiovlen != 0)
12988e935259SBryan Cantrill 			kmem_free(aiov, aiovlen);
1299fca543caSDJ Hoffman 		return (set_errno(EBADF));
13008e935259SBryan Cantrill 	}
1301fca543caSDJ Hoffman 	if (((fflag = fp->f_flag) & FWRITE) == 0) {
1302fca543caSDJ Hoffman 		error = EBADF;
1303fca543caSDJ Hoffman 		goto out;
1304fca543caSDJ Hoffman 	}
1305fca543caSDJ Hoffman 	vp = fp->f_vnode;
1306fca543caSDJ Hoffman 	rwflag = 1;
1307fca543caSDJ Hoffman 
1308*81c3d085SJerry Jelinek 	/*
1309*81c3d085SJerry Jelinek 	 * The kernel's write(2) code checks OFFSET_MAX and the rctl, and
1310*81c3d085SJerry Jelinek 	 * returns EFBIG when fileoff exceeds either limit. We do the same.
1311*81c3d085SJerry Jelinek 	 */
1312*81c3d085SJerry Jelinek 	if (vp->v_type == VREG) {
1313fca543caSDJ Hoffman 		if (bcount == 0)
1314fca543caSDJ Hoffman 			goto out;
1315fca543caSDJ Hoffman 
1316fca543caSDJ Hoffman 		/*
1317*81c3d085SJerry Jelinek 		 * Don't allow pwritev to cause file size to exceed the proper
1318*81c3d085SJerry Jelinek 		 * offset limit.
1319fca543caSDJ Hoffman 		 */
1320*81c3d085SJerry Jelinek 		if (fileoff >= OFFSET_MAX(fp)) {
1321*81c3d085SJerry Jelinek 			error = EFBIG;
1322fca543caSDJ Hoffman 			goto out;
1323fca543caSDJ Hoffman 		}
1324*81c3d085SJerry Jelinek 
1325fca543caSDJ Hoffman 		/*
1326fca543caSDJ Hoffman 		 * Take appropriate action if we are trying
1327fca543caSDJ Hoffman 		 * to write above the resource limit.
1328fca543caSDJ Hoffman 		 */
1329fca543caSDJ Hoffman 		if (fileoff >= curproc->p_fsz_ctl) {
1330fca543caSDJ Hoffman 			mutex_enter(&curproc->p_lock);
1331fca543caSDJ Hoffman 			/*
1332fca543caSDJ Hoffman 			 * Return value ignored because it lists
1333fca543caSDJ Hoffman 			 * actions taken, but we are in an error case.
1334fca543caSDJ Hoffman 			 * We don't have any actions that depend on
1335fca543caSDJ Hoffman 			 * what could happen in this call, so we ignore
1336fca543caSDJ Hoffman 			 * the return value.
1337fca543caSDJ Hoffman 			 */
1338fca543caSDJ Hoffman 			(void) rctl_action(
1339fca543caSDJ Hoffman 			    rctlproc_legacy[RLIMIT_FSIZE],
1340fca543caSDJ Hoffman 			    curproc->p_rctls, curproc,
1341fca543caSDJ Hoffman 			    RCA_UNSAFE_SIGINFO);
1342fca543caSDJ Hoffman 			mutex_exit(&curproc->p_lock);
1343fca543caSDJ Hoffman 
1344fca543caSDJ Hoffman 			error = EFBIG;
1345fca543caSDJ Hoffman 			goto out;
1346fca543caSDJ Hoffman 		}
1347fca543caSDJ Hoffman 
1348*81c3d085SJerry Jelinek 		ASSERT(bcount == count);
1349*81c3d085SJerry Jelinek 
1350*81c3d085SJerry Jelinek 		/* Note: modified count used in nbl_conflict() call below. */
1351*81c3d085SJerry Jelinek 		if ((fileoff + count) > OFFSET_MAX(fp))
1352*81c3d085SJerry Jelinek 			count = (ssize_t)(OFFSET_MAX(fp) - fileoff);
1353*81c3d085SJerry Jelinek 
1354fca543caSDJ Hoffman 	} else if (vp->v_type == VFIFO) {
1355fca543caSDJ Hoffman 		error = ESPIPE;
1356fca543caSDJ Hoffman 		goto out;
1357fca543caSDJ Hoffman 	}
1358fca543caSDJ Hoffman 	/*
1359fca543caSDJ Hoffman 	 * We have to enter the critical region before calling VOP_RWLOCK
1360fca543caSDJ Hoffman 	 * to avoid a deadlock with ufs.
1361fca543caSDJ Hoffman 	 */
1362fca543caSDJ Hoffman 	if (nbl_need_check(vp)) {
1363fca543caSDJ Hoffman 		int svmand;
1364fca543caSDJ Hoffman 
1365fca543caSDJ Hoffman 		nbl_start_crit(vp, RW_READER);
1366fca543caSDJ Hoffman 		in_crit = 1;
1367fca543caSDJ Hoffman 		error = nbl_svmand(vp, fp->f_cred, &svmand);
1368fca543caSDJ Hoffman 		if (error != 0)
1369fca543caSDJ Hoffman 			goto out;
1370*81c3d085SJerry Jelinek 		if (nbl_conflict(vp, NBL_WRITE, fileoff, count, svmand, NULL)) {
1371fca543caSDJ Hoffman 			error = EACCES;
1372fca543caSDJ Hoffman 			goto out;
1373fca543caSDJ Hoffman 		}
1374fca543caSDJ Hoffman 	}
1375fca543caSDJ Hoffman 
1376fca543caSDJ Hoffman 	(void) VOP_RWLOCK(vp, rwflag, NULL);
1377fca543caSDJ Hoffman 
1378fca543caSDJ Hoffman 	auio.uio_loffset = fileoff;
1379fca543caSDJ Hoffman 	auio.uio_iov = aiov;
1380fca543caSDJ Hoffman 	auio.uio_iovcnt = iovcnt;
1381fca543caSDJ Hoffman 	auio.uio_resid = bcount = count;
1382fca543caSDJ Hoffman 	auio.uio_segflg = UIO_USERSPACE;
1383fca543caSDJ Hoffman 	auio.uio_llimit = curproc->p_fsz_ctl;
1384fca543caSDJ Hoffman 	auio.uio_fmode = fflag;
1385fca543caSDJ Hoffman 	auio.uio_extflg = UIO_COPY_CACHED;
1386fca543caSDJ Hoffman 	ioflag = auio.uio_fmode & (FSYNC|FDSYNC|FRSYNC);
1387fca543caSDJ Hoffman 	error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL);
1388fca543caSDJ Hoffman 	count -= auio.uio_resid;
1389fca543caSDJ Hoffman 	CPU_STATS_ENTER_K();
1390fca543caSDJ Hoffman 	cp = CPU;
1391fca543caSDJ Hoffman 	CPU_STATS_ADDQ(cp, sys, syswrite, 1);
1392fca543caSDJ Hoffman 	CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)count);
1393fca543caSDJ Hoffman 	CPU_STATS_EXIT_K();
1394fca543caSDJ Hoffman 	ttolwp(curthread)->lwp_ru.ioch += (ulong_t)count;
1395fca543caSDJ Hoffman 
1396fca543caSDJ Hoffman 	VOP_RWUNLOCK(vp, rwflag, NULL);
1397fca543caSDJ Hoffman 
1398fca543caSDJ Hoffman 	if (error == EINTR && count != 0)
1399fca543caSDJ Hoffman 		error = 0;
1400fca543caSDJ Hoffman out:
1401fca543caSDJ Hoffman 	if (in_crit)
1402fca543caSDJ Hoffman 		nbl_end_crit(vp);
1403fca543caSDJ Hoffman 	releasef(fdes);
14048e935259SBryan Cantrill 	if (aiovlen != 0)
14058e935259SBryan Cantrill 		kmem_free(aiov, aiovlen);
1406fca543caSDJ Hoffman 	if (error)
1407fca543caSDJ Hoffman 		return (set_errno(error));
1408fca543caSDJ Hoffman 	return (count);
1409fca543caSDJ Hoffman }
1410fca543caSDJ Hoffman 
14117c478bd9Sstevel@tonic-gate #if defined(_SYSCALL32_IMPL) || defined(_ILP32)
14127c478bd9Sstevel@tonic-gate 
14137c478bd9Sstevel@tonic-gate /*
14147c478bd9Sstevel@tonic-gate  * This syscall supplies 64-bit file offsets to 32-bit applications only.
14157c478bd9Sstevel@tonic-gate  */
14167c478bd9Sstevel@tonic-gate ssize32_t
pread64(int fdes,void * cbuf,size32_t count,uint32_t offset_1,uint32_t offset_2)14177c478bd9Sstevel@tonic-gate pread64(int fdes, void *cbuf, size32_t count, uint32_t offset_1,
14187c478bd9Sstevel@tonic-gate     uint32_t offset_2)
14197c478bd9Sstevel@tonic-gate {
14207c478bd9Sstevel@tonic-gate 	struct uio auio;
14217c478bd9Sstevel@tonic-gate 	struct iovec aiov;
14227c478bd9Sstevel@tonic-gate 	file_t *fp;
14237c478bd9Sstevel@tonic-gate 	register vnode_t *vp;
14247c478bd9Sstevel@tonic-gate 	struct cpu *cp;
14257c478bd9Sstevel@tonic-gate 	int fflag, ioflag, rwflag;
14267c478bd9Sstevel@tonic-gate 	ssize_t bcount;
14277c478bd9Sstevel@tonic-gate 	int error = 0;
14287c478bd9Sstevel@tonic-gate 	u_offset_t fileoff;
14297c478bd9Sstevel@tonic-gate 	int in_crit = 0;
14307c478bd9Sstevel@tonic-gate 
14317c478bd9Sstevel@tonic-gate #if defined(_LITTLE_ENDIAN)
14327c478bd9Sstevel@tonic-gate 	fileoff = ((u_offset_t)offset_2 << 32) | (u_offset_t)offset_1;
14337c478bd9Sstevel@tonic-gate #else
14347c478bd9Sstevel@tonic-gate 	fileoff = ((u_offset_t)offset_1 << 32) | (u_offset_t)offset_2;
14357c478bd9Sstevel@tonic-gate #endif
14367c478bd9Sstevel@tonic-gate 
14377c478bd9Sstevel@tonic-gate 	if ((bcount = (ssize_t)count) < 0 || bcount > INT32_MAX)
14387c478bd9Sstevel@tonic-gate 		return (set_errno(EINVAL));
14397c478bd9Sstevel@tonic-gate 
14407c478bd9Sstevel@tonic-gate 	if ((fp = getf(fdes)) == NULL)
14417c478bd9Sstevel@tonic-gate 		return (set_errno(EBADF));
14427c478bd9Sstevel@tonic-gate 	if (((fflag = fp->f_flag) & (FREAD)) == 0) {
14437c478bd9Sstevel@tonic-gate 		error = EBADF;
14447c478bd9Sstevel@tonic-gate 		goto out;
14457c478bd9Sstevel@tonic-gate 	}
14467c478bd9Sstevel@tonic-gate 
14477c478bd9Sstevel@tonic-gate 	rwflag = 0;
14487c478bd9Sstevel@tonic-gate 	vp = fp->f_vnode;
14497c478bd9Sstevel@tonic-gate 
14507c478bd9Sstevel@tonic-gate 	if (vp->v_type == VREG) {
14517c478bd9Sstevel@tonic-gate 
14527c478bd9Sstevel@tonic-gate 		if (bcount == 0)
14537c478bd9Sstevel@tonic-gate 			goto out;
14547c478bd9Sstevel@tonic-gate 
14557c478bd9Sstevel@tonic-gate 		/*
14567c478bd9Sstevel@tonic-gate 		 * Same as pread. See comments in pread.
14577c478bd9Sstevel@tonic-gate 		 */
14587c478bd9Sstevel@tonic-gate 
14597c478bd9Sstevel@tonic-gate 		if (fileoff > MAXOFFSET_T) {
14607c478bd9Sstevel@tonic-gate 			error = EINVAL;
14617c478bd9Sstevel@tonic-gate 			goto out;
14627c478bd9Sstevel@tonic-gate 		}
14637c478bd9Sstevel@tonic-gate 		if (fileoff + bcount > MAXOFFSET_T)
14647c478bd9Sstevel@tonic-gate 			bcount = (ssize_t)(MAXOFFSET_T - fileoff);
14657c478bd9Sstevel@tonic-gate 	} else if (vp->v_type == VFIFO) {
14667c478bd9Sstevel@tonic-gate 		error = ESPIPE;
14677c478bd9Sstevel@tonic-gate 		goto out;
14687c478bd9Sstevel@tonic-gate 	}
14697c478bd9Sstevel@tonic-gate 
14707c478bd9Sstevel@tonic-gate 	/*
14717c478bd9Sstevel@tonic-gate 	 * We have to enter the critical region before calling VOP_RWLOCK
14727c478bd9Sstevel@tonic-gate 	 * to avoid a deadlock with ufs.
14737c478bd9Sstevel@tonic-gate 	 */
14747c478bd9Sstevel@tonic-gate 	if (nbl_need_check(vp)) {
14757c478bd9Sstevel@tonic-gate 		int svmand;
14767c478bd9Sstevel@tonic-gate 
14777c478bd9Sstevel@tonic-gate 		nbl_start_crit(vp, RW_READER);
14787c478bd9Sstevel@tonic-gate 		in_crit = 1;
14797c478bd9Sstevel@tonic-gate 		error = nbl_svmand(vp, fp->f_cred, &svmand);
14807c478bd9Sstevel@tonic-gate 		if (error != 0)
14817c478bd9Sstevel@tonic-gate 			goto out;
1482da6c28aaSamw 		if (nbl_conflict(vp, NBL_READ, fileoff, bcount, svmand,
1483da6c28aaSamw 		    NULL)) {
14847c478bd9Sstevel@tonic-gate 			error = EACCES;
14857c478bd9Sstevel@tonic-gate 			goto out;
14867c478bd9Sstevel@tonic-gate 		}
14877c478bd9Sstevel@tonic-gate 	}
14887c478bd9Sstevel@tonic-gate 
14897c478bd9Sstevel@tonic-gate 	aiov.iov_base = cbuf;
14907c478bd9Sstevel@tonic-gate 	aiov.iov_len = bcount;
14917c478bd9Sstevel@tonic-gate 	(void) VOP_RWLOCK(vp, rwflag, NULL);
14927c478bd9Sstevel@tonic-gate 	auio.uio_loffset = fileoff;
14937c478bd9Sstevel@tonic-gate 
14947c478bd9Sstevel@tonic-gate 	/*
14957c478bd9Sstevel@tonic-gate 	 * Note: File size can never be greater than MAXOFFSET_T.
14967c478bd9Sstevel@tonic-gate 	 * If ever we start supporting 128 bit files the code
14977c478bd9Sstevel@tonic-gate 	 * similar to the one in pread at this place should be here.
14987c478bd9Sstevel@tonic-gate 	 * Here we avoid the unnecessary VOP_GETATTR() when we
14997c478bd9Sstevel@tonic-gate 	 * know that fileoff == MAXOFFSET_T implies that it is always
15007c478bd9Sstevel@tonic-gate 	 * greater than or equal to file size.
15017c478bd9Sstevel@tonic-gate 	 */
15027c478bd9Sstevel@tonic-gate 	auio.uio_iov = &aiov;
15037c478bd9Sstevel@tonic-gate 	auio.uio_iovcnt = 1;
15047c478bd9Sstevel@tonic-gate 	auio.uio_resid = bcount;
15057c478bd9Sstevel@tonic-gate 	auio.uio_segflg = UIO_USERSPACE;
15067c478bd9Sstevel@tonic-gate 	auio.uio_llimit = MAXOFFSET_T;
15077c478bd9Sstevel@tonic-gate 	auio.uio_fmode = fflag;
15087c478bd9Sstevel@tonic-gate 	auio.uio_extflg = UIO_COPY_CACHED;
15097c478bd9Sstevel@tonic-gate 
15107c478bd9Sstevel@tonic-gate 	ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
15117c478bd9Sstevel@tonic-gate 
15127c478bd9Sstevel@tonic-gate 	/* If read sync is not asked for, filter sync flags */
15137c478bd9Sstevel@tonic-gate 	if ((ioflag & FRSYNC) == 0)
15147c478bd9Sstevel@tonic-gate 		ioflag &= ~(FSYNC|FDSYNC);
15157c478bd9Sstevel@tonic-gate 	error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL);
15167c478bd9Sstevel@tonic-gate 	bcount -= auio.uio_resid;
15177c478bd9Sstevel@tonic-gate 	CPU_STATS_ENTER_K();
15187c478bd9Sstevel@tonic-gate 	cp = CPU;
15197c478bd9Sstevel@tonic-gate 	CPU_STATS_ADDQ(cp, sys, sysread, 1);
15207c478bd9Sstevel@tonic-gate 	CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)bcount);
15217c478bd9Sstevel@tonic-gate 	CPU_STATS_EXIT_K();
15227c478bd9Sstevel@tonic-gate 	ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount;
15237c478bd9Sstevel@tonic-gate 	VOP_RWUNLOCK(vp, rwflag, NULL);
15247c478bd9Sstevel@tonic-gate 
15257c478bd9Sstevel@tonic-gate 	if (error == EINTR && bcount != 0)
15267c478bd9Sstevel@tonic-gate 		error = 0;
15277c478bd9Sstevel@tonic-gate out:
15287c478bd9Sstevel@tonic-gate 	if (in_crit)
15297c478bd9Sstevel@tonic-gate 		nbl_end_crit(vp);
15307c478bd9Sstevel@tonic-gate 	releasef(fdes);
15317c478bd9Sstevel@tonic-gate 	if (error)
15327c478bd9Sstevel@tonic-gate 		return (set_errno(error));
15337c478bd9Sstevel@tonic-gate 	return (bcount);
15347c478bd9Sstevel@tonic-gate }
15357c478bd9Sstevel@tonic-gate 
15367c478bd9Sstevel@tonic-gate /*
15377c478bd9Sstevel@tonic-gate  * This syscall supplies 64-bit file offsets to 32-bit applications only.
15387c478bd9Sstevel@tonic-gate  */
15397c478bd9Sstevel@tonic-gate ssize32_t
pwrite64(int fdes,void * cbuf,size32_t count,uint32_t offset_1,uint32_t offset_2)15407c478bd9Sstevel@tonic-gate pwrite64(int fdes, void *cbuf, size32_t count, uint32_t offset_1,
15417c478bd9Sstevel@tonic-gate     uint32_t offset_2)
15427c478bd9Sstevel@tonic-gate {
15437c478bd9Sstevel@tonic-gate 	struct uio auio;
15447c478bd9Sstevel@tonic-gate 	struct iovec aiov;
15457c478bd9Sstevel@tonic-gate 	file_t *fp;
15467c478bd9Sstevel@tonic-gate 	register vnode_t *vp;
15477c478bd9Sstevel@tonic-gate 	struct cpu *cp;
15487c478bd9Sstevel@tonic-gate 	int fflag, ioflag, rwflag;
15497c478bd9Sstevel@tonic-gate 	ssize_t bcount;
15507c478bd9Sstevel@tonic-gate 	int error = 0;
15517c478bd9Sstevel@tonic-gate 	u_offset_t fileoff;
15527c478bd9Sstevel@tonic-gate 	int in_crit = 0;
15537c478bd9Sstevel@tonic-gate 
15547c478bd9Sstevel@tonic-gate #if defined(_LITTLE_ENDIAN)
15557c478bd9Sstevel@tonic-gate 	fileoff = ((u_offset_t)offset_2 << 32) | (u_offset_t)offset_1;
15567c478bd9Sstevel@tonic-gate #else
15577c478bd9Sstevel@tonic-gate 	fileoff = ((u_offset_t)offset_1 << 32) | (u_offset_t)offset_2;
15587c478bd9Sstevel@tonic-gate #endif
15597c478bd9Sstevel@tonic-gate 
15607c478bd9Sstevel@tonic-gate 	if ((bcount = (ssize_t)count) < 0 || bcount > INT32_MAX)
15617c478bd9Sstevel@tonic-gate 		return (set_errno(EINVAL));
15627c478bd9Sstevel@tonic-gate 	if ((fp = getf(fdes)) == NULL)
15637c478bd9Sstevel@tonic-gate 		return (set_errno(EBADF));
15647c478bd9Sstevel@tonic-gate 	if (((fflag = fp->f_flag) & (FWRITE)) == 0) {
15657c478bd9Sstevel@tonic-gate 		error = EBADF;
15667c478bd9Sstevel@tonic-gate 		goto out;
15677c478bd9Sstevel@tonic-gate 	}
15687c478bd9Sstevel@tonic-gate 
15697c478bd9Sstevel@tonic-gate 	rwflag = 1;
15707c478bd9Sstevel@tonic-gate 	vp = fp->f_vnode;
15717c478bd9Sstevel@tonic-gate 
15727c478bd9Sstevel@tonic-gate 	if (vp->v_type == VREG) {
15737c478bd9Sstevel@tonic-gate 
15747c478bd9Sstevel@tonic-gate 		if (bcount == 0)
15757c478bd9Sstevel@tonic-gate 			goto out;
15767c478bd9Sstevel@tonic-gate 
15777c478bd9Sstevel@tonic-gate 		/*
15787c478bd9Sstevel@tonic-gate 		 * See comments in pwrite.
15797c478bd9Sstevel@tonic-gate 		 */
15807c478bd9Sstevel@tonic-gate 		if (fileoff > MAXOFFSET_T) {
15817c478bd9Sstevel@tonic-gate 			error = EINVAL;
15827c478bd9Sstevel@tonic-gate 			goto out;
15837c478bd9Sstevel@tonic-gate 		}
15847c478bd9Sstevel@tonic-gate 		if (fileoff >= curproc->p_fsz_ctl) {
15857c478bd9Sstevel@tonic-gate 			mutex_enter(&curproc->p_lock);
15867c478bd9Sstevel@tonic-gate 			(void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE],
15877c478bd9Sstevel@tonic-gate 			    curproc->p_rctls, curproc, RCA_SAFE);
15887c478bd9Sstevel@tonic-gate 			mutex_exit(&curproc->p_lock);
15897c478bd9Sstevel@tonic-gate 			error = EFBIG;
15907c478bd9Sstevel@tonic-gate 			goto out;
15917c478bd9Sstevel@tonic-gate 		}
15927c478bd9Sstevel@tonic-gate 		if (fileoff == MAXOFFSET_T) {
15937c478bd9Sstevel@tonic-gate 			error = EFBIG;
15947c478bd9Sstevel@tonic-gate 			goto out;
15957c478bd9Sstevel@tonic-gate 		}
15967c478bd9Sstevel@tonic-gate 		if (fileoff + bcount > MAXOFFSET_T)
15977c478bd9Sstevel@tonic-gate 			bcount = (ssize_t)((u_offset_t)MAXOFFSET_T - fileoff);
15987c478bd9Sstevel@tonic-gate 	} else if (vp->v_type == VFIFO) {
15997c478bd9Sstevel@tonic-gate 		error = ESPIPE;
16007c478bd9Sstevel@tonic-gate 		goto out;
16017c478bd9Sstevel@tonic-gate 	}
16027c478bd9Sstevel@tonic-gate 
16037c478bd9Sstevel@tonic-gate 	/*
16047c478bd9Sstevel@tonic-gate 	 * We have to enter the critical region before calling VOP_RWLOCK
16057c478bd9Sstevel@tonic-gate 	 * to avoid a deadlock with ufs.
16067c478bd9Sstevel@tonic-gate 	 */
16077c478bd9Sstevel@tonic-gate 	if (nbl_need_check(vp)) {
16087c478bd9Sstevel@tonic-gate 		int svmand;
16097c478bd9Sstevel@tonic-gate 
16107c478bd9Sstevel@tonic-gate 		nbl_start_crit(vp, RW_READER);
16117c478bd9Sstevel@tonic-gate 		in_crit = 1;
16127c478bd9Sstevel@tonic-gate 		error = nbl_svmand(vp, fp->f_cred, &svmand);
16137c478bd9Sstevel@tonic-gate 		if (error != 0)
16147c478bd9Sstevel@tonic-gate 			goto out;
1615da6c28aaSamw 		if (nbl_conflict(vp, NBL_WRITE, fileoff, bcount, svmand,
1616da6c28aaSamw 		    NULL)) {
16177c478bd9Sstevel@tonic-gate 			error = EACCES;
16187c478bd9Sstevel@tonic-gate 			goto out;
16197c478bd9Sstevel@tonic-gate 		}
16207c478bd9Sstevel@tonic-gate 	}
16217c478bd9Sstevel@tonic-gate 
16227c478bd9Sstevel@tonic-gate 	aiov.iov_base = cbuf;
16237c478bd9Sstevel@tonic-gate 	aiov.iov_len = bcount;
16247c478bd9Sstevel@tonic-gate 	(void) VOP_RWLOCK(vp, rwflag, NULL);
16257c478bd9Sstevel@tonic-gate 	auio.uio_loffset = fileoff;
16267c478bd9Sstevel@tonic-gate 	auio.uio_iov = &aiov;
16277c478bd9Sstevel@tonic-gate 	auio.uio_iovcnt = 1;
16287c478bd9Sstevel@tonic-gate 	auio.uio_resid = bcount;
16297c478bd9Sstevel@tonic-gate 	auio.uio_segflg = UIO_USERSPACE;
16307c478bd9Sstevel@tonic-gate 	auio.uio_llimit = curproc->p_fsz_ctl;
16317c478bd9Sstevel@tonic-gate 	auio.uio_fmode = fflag;
16327c478bd9Sstevel@tonic-gate 	auio.uio_extflg = UIO_COPY_CACHED;
16337c478bd9Sstevel@tonic-gate 
16344d86dd30Sraf 	/*
16354d86dd30Sraf 	 * The SUSv4 POSIX specification states:
16364d86dd30Sraf 	 *	The pwrite() function shall be equivalent to write(), except
16374d86dd30Sraf 	 *	that it writes into a given position and does not change
16384d86dd30Sraf 	 *	the file offset (regardless of whether O_APPEND is set).
16394d86dd30Sraf 	 * To make this be true, we omit the FAPPEND flag from ioflag.
16404d86dd30Sraf 	 */
16414d86dd30Sraf 	ioflag = auio.uio_fmode & (FSYNC|FDSYNC|FRSYNC);
16427c478bd9Sstevel@tonic-gate 
16437c478bd9Sstevel@tonic-gate 	error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL);
16447c478bd9Sstevel@tonic-gate 	bcount -= auio.uio_resid;
16457c478bd9Sstevel@tonic-gate 	CPU_STATS_ENTER_K();
16467c478bd9Sstevel@tonic-gate 	cp = CPU;
16477c478bd9Sstevel@tonic-gate 	CPU_STATS_ADDQ(cp, sys, syswrite, 1);
16487c478bd9Sstevel@tonic-gate 	CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)bcount);
16497c478bd9Sstevel@tonic-gate 	CPU_STATS_EXIT_K();
16507c478bd9Sstevel@tonic-gate 	ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount;
16517c478bd9Sstevel@tonic-gate 	VOP_RWUNLOCK(vp, rwflag, NULL);
16527c478bd9Sstevel@tonic-gate 
16537c478bd9Sstevel@tonic-gate 	if (error == EINTR && bcount != 0)
16547c478bd9Sstevel@tonic-gate 		error = 0;
16557c478bd9Sstevel@tonic-gate out:
16567c478bd9Sstevel@tonic-gate 	if (in_crit)
16577c478bd9Sstevel@tonic-gate 		nbl_end_crit(vp);
16587c478bd9Sstevel@tonic-gate 	releasef(fdes);
16597c478bd9Sstevel@tonic-gate 	if (error)
16607c478bd9Sstevel@tonic-gate 		return (set_errno(error));
16617c478bd9Sstevel@tonic-gate 	return (bcount);
16627c478bd9Sstevel@tonic-gate }
16637c478bd9Sstevel@tonic-gate 
16647c478bd9Sstevel@tonic-gate #endif	/* _SYSCALL32_IMPL || _ILP32 */
16657c478bd9Sstevel@tonic-gate 
16667c478bd9Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL
16677c478bd9Sstevel@tonic-gate /*
16687c478bd9Sstevel@tonic-gate  * Tail-call elimination of xxx32() down to xxx()
16697c478bd9Sstevel@tonic-gate  *
16707c478bd9Sstevel@tonic-gate  * A number of xxx32 system calls take a len (or count) argument and
16717c478bd9Sstevel@tonic-gate  * return a number in the range [0,len] or -1 on error.
16727c478bd9Sstevel@tonic-gate  * Given an ssize32_t input len, the downcall xxx() will return
16737c478bd9Sstevel@tonic-gate  * a 64-bit value that is -1 or in the range [0,len] which actually
16747c478bd9Sstevel@tonic-gate  * is a proper return value for the xxx32 call. So even if the xxx32
16757c478bd9Sstevel@tonic-gate  * calls can be considered as returning a ssize32_t, they are currently
16767c478bd9Sstevel@tonic-gate  * declared as returning a ssize_t as this enables tail-call elimination.
16777c478bd9Sstevel@tonic-gate  *
16787c478bd9Sstevel@tonic-gate  * The cast of len (or count) to ssize32_t is needed to ensure we pass
16797c478bd9Sstevel@tonic-gate  * down negative input values as such and let the downcall handle error
16807c478bd9Sstevel@tonic-gate  * reporting. Functions covered by this comments are:
16817c478bd9Sstevel@tonic-gate  *
16827c478bd9Sstevel@tonic-gate  * rw.c:           read32, write32, pread32, pwrite32, readv32, writev32.
16837c478bd9Sstevel@tonic-gate  * socksyscall.c:  recv32, recvfrom32, send32, sendto32.
16847c478bd9Sstevel@tonic-gate  * readlink.c:     readlink32.
16857c478bd9Sstevel@tonic-gate  */
16867c478bd9Sstevel@tonic-gate 
16877c478bd9Sstevel@tonic-gate ssize_t
read32(int32_t fdes,caddr32_t cbuf,size32_t count)16887c478bd9Sstevel@tonic-gate read32(int32_t fdes, caddr32_t cbuf, size32_t count)
16897c478bd9Sstevel@tonic-gate {
16907c478bd9Sstevel@tonic-gate 	return (read(fdes,
16917c478bd9Sstevel@tonic-gate 	    (void *)(uintptr_t)cbuf, (ssize32_t)count));
16927c478bd9Sstevel@tonic-gate }
16937c478bd9Sstevel@tonic-gate 
16947c478bd9Sstevel@tonic-gate ssize_t
write32(int32_t fdes,caddr32_t cbuf,size32_t count)16957c478bd9Sstevel@tonic-gate write32(int32_t fdes, caddr32_t cbuf, size32_t count)
16967c478bd9Sstevel@tonic-gate {
16977c478bd9Sstevel@tonic-gate 	return (write(fdes,
16987c478bd9Sstevel@tonic-gate 	    (void *)(uintptr_t)cbuf, (ssize32_t)count));
16997c478bd9Sstevel@tonic-gate }
17007c478bd9Sstevel@tonic-gate 
17017c478bd9Sstevel@tonic-gate ssize_t
pread32(int32_t fdes,caddr32_t cbuf,size32_t count,off32_t offset)17027c478bd9Sstevel@tonic-gate pread32(int32_t fdes, caddr32_t cbuf, size32_t count, off32_t offset)
17037c478bd9Sstevel@tonic-gate {
17047c478bd9Sstevel@tonic-gate 	return (pread(fdes,
17057c478bd9Sstevel@tonic-gate 	    (void *)(uintptr_t)cbuf, (ssize32_t)count,
17067c478bd9Sstevel@tonic-gate 	    (off_t)(uint32_t)offset));
17077c478bd9Sstevel@tonic-gate }
17087c478bd9Sstevel@tonic-gate 
17097c478bd9Sstevel@tonic-gate ssize_t
pwrite32(int32_t fdes,caddr32_t cbuf,size32_t count,off32_t offset)17107c478bd9Sstevel@tonic-gate pwrite32(int32_t fdes, caddr32_t cbuf, size32_t count, off32_t offset)
17117c478bd9Sstevel@tonic-gate {
17127c478bd9Sstevel@tonic-gate 	return (pwrite(fdes,
17137c478bd9Sstevel@tonic-gate 	    (void *)(uintptr_t)cbuf, (ssize32_t)count,
17147c478bd9Sstevel@tonic-gate 	    (off_t)(uint32_t)offset));
17157c478bd9Sstevel@tonic-gate }
17167c478bd9Sstevel@tonic-gate 
17177c478bd9Sstevel@tonic-gate ssize_t
readv32(int32_t fdes,caddr32_t iovp,int32_t iovcnt)17187c478bd9Sstevel@tonic-gate readv32(int32_t fdes, caddr32_t iovp, int32_t iovcnt)
17197c478bd9Sstevel@tonic-gate {
17207c478bd9Sstevel@tonic-gate 	return (readv(fdes, (void *)(uintptr_t)iovp, iovcnt));
17217c478bd9Sstevel@tonic-gate }
17227c478bd9Sstevel@tonic-gate 
17237c478bd9Sstevel@tonic-gate ssize_t
writev32(int32_t fdes,caddr32_t iovp,int32_t iovcnt)17247c478bd9Sstevel@tonic-gate writev32(int32_t fdes, caddr32_t iovp, int32_t iovcnt)
17257c478bd9Sstevel@tonic-gate {
17267c478bd9Sstevel@tonic-gate 	return (writev(fdes, (void *)(uintptr_t)iovp, iovcnt));
17277c478bd9Sstevel@tonic-gate }
17287c478bd9Sstevel@tonic-gate #endif	/* _SYSCALL32_IMPL */
1729