xref: /illumos-gate/usr/src/uts/common/fs/ufs/ufs_trans.c (revision 1e13ea4b)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
580d34432Sfrankho  * Common Development and Distribution License (the "License").
680d34432Sfrankho  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate /*
22*1e13ea4bSvsakar  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
237c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
247c478bd9Sstevel@tonic-gate  */
257c478bd9Sstevel@tonic-gate 
267c478bd9Sstevel@tonic-gate /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
277c478bd9Sstevel@tonic-gate /* All Rights Reserved */
287c478bd9Sstevel@tonic-gate 
297c478bd9Sstevel@tonic-gate /*
307c478bd9Sstevel@tonic-gate  * Portions of this source code were derived from Berkeley 4.3 BSD
317c478bd9Sstevel@tonic-gate  * under license from the Regents of the University of California.
327c478bd9Sstevel@tonic-gate  */
337c478bd9Sstevel@tonic-gate 
347c478bd9Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
357c478bd9Sstevel@tonic-gate 
367c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>
377c478bd9Sstevel@tonic-gate #include <sys/param.h>
387c478bd9Sstevel@tonic-gate #include <sys/types.h>
397c478bd9Sstevel@tonic-gate #include <sys/systm.h>
407c478bd9Sstevel@tonic-gate #include <sys/t_lock.h>
417c478bd9Sstevel@tonic-gate #include <sys/uio.h>
427c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
437c478bd9Sstevel@tonic-gate #include <sys/thread.h>
447c478bd9Sstevel@tonic-gate #include <sys/vfs.h>
457c478bd9Sstevel@tonic-gate #include <sys/errno.h>
467c478bd9Sstevel@tonic-gate #include <sys/buf.h>
477c478bd9Sstevel@tonic-gate #include <sys/vnode.h>
487c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_trans.h>
497c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_inode.h>
507c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_fs.h>
517c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_fsdir.h>
527c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_quota.h>
537c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_panic.h>
547c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_bio.h>
557c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_log.h>
567c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
577c478bd9Sstevel@tonic-gate #include <sys/file.h>
587c478bd9Sstevel@tonic-gate #include <sys/debug.h>
597c478bd9Sstevel@tonic-gate 
607c478bd9Sstevel@tonic-gate 
617c478bd9Sstevel@tonic-gate extern kmutex_t ufsvfs_mutex;
627c478bd9Sstevel@tonic-gate extern struct ufsvfs *ufs_instances;
637c478bd9Sstevel@tonic-gate 
647c478bd9Sstevel@tonic-gate /*
657c478bd9Sstevel@tonic-gate  * hlock any file systems w/errored logs
667c478bd9Sstevel@tonic-gate  */
677c478bd9Sstevel@tonic-gate int
687c478bd9Sstevel@tonic-gate ufs_trans_hlock()
697c478bd9Sstevel@tonic-gate {
707c478bd9Sstevel@tonic-gate 	struct ufsvfs	*ufsvfsp;
717c478bd9Sstevel@tonic-gate 	struct lockfs	lockfs;
727c478bd9Sstevel@tonic-gate 	int		error;
737c478bd9Sstevel@tonic-gate 	int		retry	= 0;
747c478bd9Sstevel@tonic-gate 
757c478bd9Sstevel@tonic-gate 	/*
767c478bd9Sstevel@tonic-gate 	 * find fs's that paniced or have errored logging devices
777c478bd9Sstevel@tonic-gate 	 */
787c478bd9Sstevel@tonic-gate 	mutex_enter(&ufsvfs_mutex);
797c478bd9Sstevel@tonic-gate 	for (ufsvfsp = ufs_instances; ufsvfsp; ufsvfsp = ufsvfsp->vfs_next) {
807c478bd9Sstevel@tonic-gate 		/*
817c478bd9Sstevel@tonic-gate 		 * not mounted; continue
827c478bd9Sstevel@tonic-gate 		 */
837c478bd9Sstevel@tonic-gate 		if ((ufsvfsp->vfs_vfs == NULL) ||
847c478bd9Sstevel@tonic-gate 		    (ufsvfsp->vfs_validfs == UT_UNMOUNTED))
857c478bd9Sstevel@tonic-gate 			continue;
867c478bd9Sstevel@tonic-gate 		/*
877c478bd9Sstevel@tonic-gate 		 * disallow unmounts (hlock occurs below)
887c478bd9Sstevel@tonic-gate 		 */
897c478bd9Sstevel@tonic-gate 		if (TRANS_ISERROR(ufsvfsp))
907c478bd9Sstevel@tonic-gate 			ufsvfsp->vfs_validfs = UT_HLOCKING;
917c478bd9Sstevel@tonic-gate 	}
927c478bd9Sstevel@tonic-gate 	mutex_exit(&ufsvfs_mutex);
937c478bd9Sstevel@tonic-gate 
947c478bd9Sstevel@tonic-gate 	/*
957c478bd9Sstevel@tonic-gate 	 * hlock the fs's that paniced or have errored logging devices
967c478bd9Sstevel@tonic-gate 	 */
977c478bd9Sstevel@tonic-gate again:
987c478bd9Sstevel@tonic-gate 	mutex_enter(&ufsvfs_mutex);
997c478bd9Sstevel@tonic-gate 	for (ufsvfsp = ufs_instances; ufsvfsp; ufsvfsp = ufsvfsp->vfs_next)
1007c478bd9Sstevel@tonic-gate 		if (ufsvfsp->vfs_validfs == UT_HLOCKING)
1017c478bd9Sstevel@tonic-gate 			break;
1027c478bd9Sstevel@tonic-gate 	mutex_exit(&ufsvfs_mutex);
1037c478bd9Sstevel@tonic-gate 	if (ufsvfsp == NULL)
1047c478bd9Sstevel@tonic-gate 		return (retry);
1057c478bd9Sstevel@tonic-gate 	/*
1067c478bd9Sstevel@tonic-gate 	 * hlock the file system
1077c478bd9Sstevel@tonic-gate 	 */
1087c478bd9Sstevel@tonic-gate 	(void) ufs_fiolfss(ufsvfsp->vfs_root, &lockfs);
1097c478bd9Sstevel@tonic-gate 	if (!LOCKFS_IS_ELOCK(&lockfs)) {
1107c478bd9Sstevel@tonic-gate 		lockfs.lf_lock = LOCKFS_HLOCK;
1117c478bd9Sstevel@tonic-gate 		lockfs.lf_flags = 0;
1127c478bd9Sstevel@tonic-gate 		lockfs.lf_comlen = 0;
1137c478bd9Sstevel@tonic-gate 		lockfs.lf_comment = NULL;
1147c478bd9Sstevel@tonic-gate 		error = ufs_fiolfs(ufsvfsp->vfs_root, &lockfs, 0);
1157c478bd9Sstevel@tonic-gate 		/*
1167c478bd9Sstevel@tonic-gate 		 * retry after awhile; another app currently doing lockfs
1177c478bd9Sstevel@tonic-gate 		 */
1187c478bd9Sstevel@tonic-gate 		if (error == EBUSY || error == EINVAL)
1197c478bd9Sstevel@tonic-gate 			retry = 1;
1207c478bd9Sstevel@tonic-gate 	} else {
1217c478bd9Sstevel@tonic-gate 		if (ufsfx_get_failure_qlen() > 0) {
1227c478bd9Sstevel@tonic-gate 			if (mutex_tryenter(&ufs_fix.uq_mutex)) {
1237c478bd9Sstevel@tonic-gate 				ufs_fix.uq_lowat = ufs_fix.uq_ne;
1247c478bd9Sstevel@tonic-gate 				cv_broadcast(&ufs_fix.uq_cv);
1257c478bd9Sstevel@tonic-gate 				mutex_exit(&ufs_fix.uq_mutex);
1267c478bd9Sstevel@tonic-gate 			}
1277c478bd9Sstevel@tonic-gate 		}
1287c478bd9Sstevel@tonic-gate 		retry = 1;
1297c478bd9Sstevel@tonic-gate 	}
1307c478bd9Sstevel@tonic-gate 
1317c478bd9Sstevel@tonic-gate 	/*
1327c478bd9Sstevel@tonic-gate 	 * allow unmounts
1337c478bd9Sstevel@tonic-gate 	 */
1347c478bd9Sstevel@tonic-gate 	ufsvfsp->vfs_validfs = UT_MOUNTED;
1357c478bd9Sstevel@tonic-gate 	goto again;
1367c478bd9Sstevel@tonic-gate }
1377c478bd9Sstevel@tonic-gate 
1387c478bd9Sstevel@tonic-gate /*ARGSUSED*/
1397c478bd9Sstevel@tonic-gate void
1407c478bd9Sstevel@tonic-gate ufs_trans_onerror()
1417c478bd9Sstevel@tonic-gate {
1427c478bd9Sstevel@tonic-gate 	mutex_enter(&ufs_hlock.uq_mutex);
1437c478bd9Sstevel@tonic-gate 	ufs_hlock.uq_ne = ufs_hlock.uq_lowat;
1447c478bd9Sstevel@tonic-gate 	cv_broadcast(&ufs_hlock.uq_cv);
1457c478bd9Sstevel@tonic-gate 	mutex_exit(&ufs_hlock.uq_mutex);
1467c478bd9Sstevel@tonic-gate }
1477c478bd9Sstevel@tonic-gate 
1487c478bd9Sstevel@tonic-gate void
1497c478bd9Sstevel@tonic-gate ufs_trans_sbupdate(struct ufsvfs *ufsvfsp, struct vfs *vfsp, top_t topid)
1507c478bd9Sstevel@tonic-gate {
1517c478bd9Sstevel@tonic-gate 	if (curthread->t_flag & T_DONTBLOCK) {
1527c478bd9Sstevel@tonic-gate 		sbupdate(vfsp);
1537c478bd9Sstevel@tonic-gate 		return;
1547c478bd9Sstevel@tonic-gate 	} else {
1557c478bd9Sstevel@tonic-gate 
1567c478bd9Sstevel@tonic-gate 		if (panicstr && TRANS_ISTRANS(ufsvfsp))
1577c478bd9Sstevel@tonic-gate 			return;
1587c478bd9Sstevel@tonic-gate 
1597c478bd9Sstevel@tonic-gate 		curthread->t_flag |= T_DONTBLOCK;
1607c478bd9Sstevel@tonic-gate 		TRANS_BEGIN_ASYNC(ufsvfsp, topid, TOP_SBUPDATE_SIZE);
1617c478bd9Sstevel@tonic-gate 		sbupdate(vfsp);
1627c478bd9Sstevel@tonic-gate 		TRANS_END_ASYNC(ufsvfsp, topid, TOP_SBUPDATE_SIZE);
1637c478bd9Sstevel@tonic-gate 		curthread->t_flag &= ~T_DONTBLOCK;
1647c478bd9Sstevel@tonic-gate 	}
1657c478bd9Sstevel@tonic-gate }
1667c478bd9Sstevel@tonic-gate 
1677c478bd9Sstevel@tonic-gate void
1687c478bd9Sstevel@tonic-gate ufs_trans_iupdat(struct inode *ip, int waitfor)
1697c478bd9Sstevel@tonic-gate {
1707c478bd9Sstevel@tonic-gate 	struct ufsvfs	*ufsvfsp;
1717c478bd9Sstevel@tonic-gate 
1727c478bd9Sstevel@tonic-gate 	if (curthread->t_flag & T_DONTBLOCK) {
1737c478bd9Sstevel@tonic-gate 		rw_enter(&ip->i_contents, RW_READER);
1747c478bd9Sstevel@tonic-gate 		ufs_iupdat(ip, waitfor);
1757c478bd9Sstevel@tonic-gate 		rw_exit(&ip->i_contents);
1767c478bd9Sstevel@tonic-gate 		return;
1777c478bd9Sstevel@tonic-gate 	} else {
1787c478bd9Sstevel@tonic-gate 		ufsvfsp = ip->i_ufsvfs;
1797c478bd9Sstevel@tonic-gate 
1807c478bd9Sstevel@tonic-gate 		if (panicstr && TRANS_ISTRANS(ufsvfsp))
1817c478bd9Sstevel@tonic-gate 			return;
1827c478bd9Sstevel@tonic-gate 
1837c478bd9Sstevel@tonic-gate 		curthread->t_flag |= T_DONTBLOCK;
1847c478bd9Sstevel@tonic-gate 		TRANS_BEGIN_ASYNC(ufsvfsp, TOP_IUPDAT, TOP_IUPDAT_SIZE(ip));
1857c478bd9Sstevel@tonic-gate 		rw_enter(&ip->i_contents, RW_READER);
1867c478bd9Sstevel@tonic-gate 		ufs_iupdat(ip, waitfor);
1877c478bd9Sstevel@tonic-gate 		rw_exit(&ip->i_contents);
1887c478bd9Sstevel@tonic-gate 		TRANS_END_ASYNC(ufsvfsp, TOP_IUPDAT, TOP_IUPDAT_SIZE(ip));
1897c478bd9Sstevel@tonic-gate 		curthread->t_flag &= ~T_DONTBLOCK;
1907c478bd9Sstevel@tonic-gate 	}
1917c478bd9Sstevel@tonic-gate }
1927c478bd9Sstevel@tonic-gate 
1937c478bd9Sstevel@tonic-gate void
1947c478bd9Sstevel@tonic-gate ufs_trans_sbwrite(struct ufsvfs *ufsvfsp, top_t topid)
1957c478bd9Sstevel@tonic-gate {
1967c478bd9Sstevel@tonic-gate 	if (curthread->t_flag & T_DONTBLOCK) {
1977c478bd9Sstevel@tonic-gate 		mutex_enter(&ufsvfsp->vfs_lock);
1987c478bd9Sstevel@tonic-gate 		ufs_sbwrite(ufsvfsp);
1997c478bd9Sstevel@tonic-gate 		mutex_exit(&ufsvfsp->vfs_lock);
2007c478bd9Sstevel@tonic-gate 		return;
2017c478bd9Sstevel@tonic-gate 	} else {
2027c478bd9Sstevel@tonic-gate 
2037c478bd9Sstevel@tonic-gate 		if (panicstr && TRANS_ISTRANS(ufsvfsp))
2047c478bd9Sstevel@tonic-gate 			return;
2057c478bd9Sstevel@tonic-gate 
2067c478bd9Sstevel@tonic-gate 		curthread->t_flag |= T_DONTBLOCK;
2077c478bd9Sstevel@tonic-gate 		TRANS_BEGIN_ASYNC(ufsvfsp, topid, TOP_SBWRITE_SIZE);
2087c478bd9Sstevel@tonic-gate 		mutex_enter(&ufsvfsp->vfs_lock);
2097c478bd9Sstevel@tonic-gate 		ufs_sbwrite(ufsvfsp);
2107c478bd9Sstevel@tonic-gate 		mutex_exit(&ufsvfsp->vfs_lock);
2117c478bd9Sstevel@tonic-gate 		TRANS_END_ASYNC(ufsvfsp, topid, TOP_SBWRITE_SIZE);
2127c478bd9Sstevel@tonic-gate 		curthread->t_flag &= ~T_DONTBLOCK;
2137c478bd9Sstevel@tonic-gate 	}
2147c478bd9Sstevel@tonic-gate }
2157c478bd9Sstevel@tonic-gate 
2167c478bd9Sstevel@tonic-gate /*ARGSUSED*/
2177c478bd9Sstevel@tonic-gate int
2187c478bd9Sstevel@tonic-gate ufs_trans_push_si(ufsvfs_t *ufsvfsp, delta_t dtyp, int ignore)
2197c478bd9Sstevel@tonic-gate {
2207c478bd9Sstevel@tonic-gate 	struct fs	*fs;
2217c478bd9Sstevel@tonic-gate 
2227c478bd9Sstevel@tonic-gate 	fs = ufsvfsp->vfs_fs;
2237c478bd9Sstevel@tonic-gate 	mutex_enter(&ufsvfsp->vfs_lock);
2247c478bd9Sstevel@tonic-gate 	TRANS_LOG(ufsvfsp, (char *)fs->fs_u.fs_csp,
2257c478bd9Sstevel@tonic-gate 	    ldbtob(fsbtodb(fs, fs->fs_csaddr)), fs->fs_cssize,
2267c478bd9Sstevel@tonic-gate 	    (caddr_t)fs->fs_u.fs_csp, fs->fs_cssize);
2277c478bd9Sstevel@tonic-gate 	mutex_exit(&ufsvfsp->vfs_lock);
2287c478bd9Sstevel@tonic-gate 	return (0);
2297c478bd9Sstevel@tonic-gate }
2307c478bd9Sstevel@tonic-gate 
2317c478bd9Sstevel@tonic-gate /*ARGSUSED*/
2327c478bd9Sstevel@tonic-gate int
2337c478bd9Sstevel@tonic-gate ufs_trans_push_buf(ufsvfs_t *ufsvfsp, delta_t dtyp, daddr_t bno)
2347c478bd9Sstevel@tonic-gate {
2357c478bd9Sstevel@tonic-gate 	struct buf	*bp;
2367c478bd9Sstevel@tonic-gate 
2377c478bd9Sstevel@tonic-gate 	bp = (struct buf *)UFS_GETBLK(ufsvfsp, ufsvfsp->vfs_dev, bno, 1);
2387c478bd9Sstevel@tonic-gate 	if (bp == NULL)
2397c478bd9Sstevel@tonic-gate 		return (ENOENT);
2407c478bd9Sstevel@tonic-gate 
2417c478bd9Sstevel@tonic-gate 	if (bp->b_flags & B_DELWRI) {
2427c478bd9Sstevel@tonic-gate 		/*
2437c478bd9Sstevel@tonic-gate 		 * Do not use brwrite() here since the buffer is already
2447c478bd9Sstevel@tonic-gate 		 * marked for retry or not by the code that called
2457c478bd9Sstevel@tonic-gate 		 * TRANS_BUF().
2467c478bd9Sstevel@tonic-gate 		 */
2477c478bd9Sstevel@tonic-gate 		UFS_BWRITE(ufsvfsp, bp);
2487c478bd9Sstevel@tonic-gate 		return (0);
2497c478bd9Sstevel@tonic-gate 	}
2507c478bd9Sstevel@tonic-gate 	/*
2517c478bd9Sstevel@tonic-gate 	 * If we did not find the real buf for this block above then
2527c478bd9Sstevel@tonic-gate 	 * clear the dev so the buf won't be found by mistake
2537c478bd9Sstevel@tonic-gate 	 * for this block later.  We had to allocate at least a 1 byte
2547c478bd9Sstevel@tonic-gate 	 * buffer to keep brelse happy.
2557c478bd9Sstevel@tonic-gate 	 */
2567c478bd9Sstevel@tonic-gate 	if (bp->b_bufsize == 1) {
2577c478bd9Sstevel@tonic-gate 		bp->b_dev = (o_dev_t)NODEV;
2587c478bd9Sstevel@tonic-gate 		bp->b_edev = NODEV;
2597c478bd9Sstevel@tonic-gate 		bp->b_flags = 0;
2607c478bd9Sstevel@tonic-gate 	}
2617c478bd9Sstevel@tonic-gate 	brelse(bp);
2627c478bd9Sstevel@tonic-gate 	return (ENOENT);
2637c478bd9Sstevel@tonic-gate }
2647c478bd9Sstevel@tonic-gate 
2657c478bd9Sstevel@tonic-gate /*ARGSUSED*/
2667c478bd9Sstevel@tonic-gate int
2677c478bd9Sstevel@tonic-gate ufs_trans_push_inode(ufsvfs_t *ufsvfsp, delta_t dtyp, ino_t ino)
2687c478bd9Sstevel@tonic-gate {
2697c478bd9Sstevel@tonic-gate 	int		error;
2707c478bd9Sstevel@tonic-gate 	struct inode	*ip;
2717c478bd9Sstevel@tonic-gate 
2727c478bd9Sstevel@tonic-gate 	/*
2737c478bd9Sstevel@tonic-gate 	 * Grab the quota lock (if the file system has not been forcibly
2747c478bd9Sstevel@tonic-gate 	 * unmounted).
2757c478bd9Sstevel@tonic-gate 	 */
2767c478bd9Sstevel@tonic-gate 	if (ufsvfsp)
2777c478bd9Sstevel@tonic-gate 		rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER);
2787c478bd9Sstevel@tonic-gate 
2797c478bd9Sstevel@tonic-gate 	error = ufs_iget(ufsvfsp->vfs_vfs, ino, &ip, kcred);
2807c478bd9Sstevel@tonic-gate 
2817c478bd9Sstevel@tonic-gate 	if (ufsvfsp)
2827c478bd9Sstevel@tonic-gate 		rw_exit(&ufsvfsp->vfs_dqrwlock);
2837c478bd9Sstevel@tonic-gate 	if (error)
2847c478bd9Sstevel@tonic-gate 		return (ENOENT);
2857c478bd9Sstevel@tonic-gate 
2867c478bd9Sstevel@tonic-gate 	if (ip->i_flag & (IUPD|IACC|ICHG|IMOD|IMODACC|IATTCHG)) {
2877c478bd9Sstevel@tonic-gate 		rw_enter(&ip->i_contents, RW_READER);
2887c478bd9Sstevel@tonic-gate 		ufs_iupdat(ip, 1);
2897c478bd9Sstevel@tonic-gate 		rw_exit(&ip->i_contents);
2907c478bd9Sstevel@tonic-gate 		VN_RELE(ITOV(ip));
2917c478bd9Sstevel@tonic-gate 		return (0);
2927c478bd9Sstevel@tonic-gate 	}
2937c478bd9Sstevel@tonic-gate 	VN_RELE(ITOV(ip));
2947c478bd9Sstevel@tonic-gate 	return (ENOENT);
2957c478bd9Sstevel@tonic-gate }
2967c478bd9Sstevel@tonic-gate 
2977c478bd9Sstevel@tonic-gate #ifdef DEBUG
2987c478bd9Sstevel@tonic-gate /*
2997c478bd9Sstevel@tonic-gate  *	These routines maintain the metadata map (matamap)
3007c478bd9Sstevel@tonic-gate  */
3017c478bd9Sstevel@tonic-gate 
3027c478bd9Sstevel@tonic-gate /*
3037c478bd9Sstevel@tonic-gate  * update the metadata map at mount
3047c478bd9Sstevel@tonic-gate  */
3057c478bd9Sstevel@tonic-gate static int
3067c478bd9Sstevel@tonic-gate ufs_trans_mata_mount_scan(struct inode *ip, void *arg)
3077c478bd9Sstevel@tonic-gate {
3087c478bd9Sstevel@tonic-gate 	/*
3097c478bd9Sstevel@tonic-gate 	 * wrong file system; keep looking
3107c478bd9Sstevel@tonic-gate 	 */
3117c478bd9Sstevel@tonic-gate 	if (ip->i_ufsvfs != (struct ufsvfs *)arg)
3127c478bd9Sstevel@tonic-gate 		return (0);
3137c478bd9Sstevel@tonic-gate 
3147c478bd9Sstevel@tonic-gate 	/*
3157c478bd9Sstevel@tonic-gate 	 * load the metadata map
3167c478bd9Sstevel@tonic-gate 	 */
3177c478bd9Sstevel@tonic-gate 	rw_enter(&ip->i_contents, RW_WRITER);
3187c478bd9Sstevel@tonic-gate 	ufs_trans_mata_iget(ip);
3197c478bd9Sstevel@tonic-gate 	rw_exit(&ip->i_contents);
3207c478bd9Sstevel@tonic-gate 	return (0);
3217c478bd9Sstevel@tonic-gate }
3227c478bd9Sstevel@tonic-gate 
3237c478bd9Sstevel@tonic-gate void
3247c478bd9Sstevel@tonic-gate ufs_trans_mata_mount(struct ufsvfs *ufsvfsp)
3257c478bd9Sstevel@tonic-gate {
3267c478bd9Sstevel@tonic-gate 	struct fs	*fs	= ufsvfsp->vfs_fs;
3277c478bd9Sstevel@tonic-gate 	ino_t		ino;
3287c478bd9Sstevel@tonic-gate 	int		i;
3297c478bd9Sstevel@tonic-gate 
3307c478bd9Sstevel@tonic-gate 	/*
3317c478bd9Sstevel@tonic-gate 	 * put static metadata into matamap
3327c478bd9Sstevel@tonic-gate 	 *	superblock
3337c478bd9Sstevel@tonic-gate 	 *	cylinder groups
3347c478bd9Sstevel@tonic-gate 	 *	inode groups
3357c478bd9Sstevel@tonic-gate 	 *	existing inodes
3367c478bd9Sstevel@tonic-gate 	 */
3377c478bd9Sstevel@tonic-gate 	TRANS_MATAADD(ufsvfsp, ldbtob(SBLOCK), fs->fs_sbsize);
3387c478bd9Sstevel@tonic-gate 
3397c478bd9Sstevel@tonic-gate 	for (ino = i = 0; i < fs->fs_ncg; ++i, ino += fs->fs_ipg) {
3407c478bd9Sstevel@tonic-gate 		TRANS_MATAADD(ufsvfsp,
3417c478bd9Sstevel@tonic-gate 		    ldbtob(fsbtodb(fs, cgtod(fs, i))), fs->fs_cgsize);
3427c478bd9Sstevel@tonic-gate 		TRANS_MATAADD(ufsvfsp,
3437c478bd9Sstevel@tonic-gate 		    ldbtob(fsbtodb(fs, itod(fs, ino))),
3447c478bd9Sstevel@tonic-gate 		    fs->fs_ipg * sizeof (struct dinode));
3457c478bd9Sstevel@tonic-gate 	}
3467c478bd9Sstevel@tonic-gate 	(void) ufs_scan_inodes(0, ufs_trans_mata_mount_scan, ufsvfsp, ufsvfsp);
3477c478bd9Sstevel@tonic-gate }
3487c478bd9Sstevel@tonic-gate 
3497c478bd9Sstevel@tonic-gate /*
3507c478bd9Sstevel@tonic-gate  * clear the metadata map at umount
3517c478bd9Sstevel@tonic-gate  */
3527c478bd9Sstevel@tonic-gate void
3537c478bd9Sstevel@tonic-gate ufs_trans_mata_umount(struct ufsvfs *ufsvfsp)
3547c478bd9Sstevel@tonic-gate {
3557c478bd9Sstevel@tonic-gate 	top_mataclr(ufsvfsp);
3567c478bd9Sstevel@tonic-gate }
3577c478bd9Sstevel@tonic-gate 
3587c478bd9Sstevel@tonic-gate /*
3597c478bd9Sstevel@tonic-gate  * summary info (may be extended during growfs test)
3607c478bd9Sstevel@tonic-gate  */
3617c478bd9Sstevel@tonic-gate void
3627c478bd9Sstevel@tonic-gate ufs_trans_mata_si(struct ufsvfs *ufsvfsp, struct fs *fs)
3637c478bd9Sstevel@tonic-gate {
3647c478bd9Sstevel@tonic-gate 	TRANS_MATAADD(ufsvfsp, ldbtob(fsbtodb(fs, fs->fs_csaddr)),
3657c478bd9Sstevel@tonic-gate 	    fs->fs_cssize);
3667c478bd9Sstevel@tonic-gate }
3677c478bd9Sstevel@tonic-gate 
3687c478bd9Sstevel@tonic-gate /*
3697c478bd9Sstevel@tonic-gate  * scan an allocation block (either inode or true block)
3707c478bd9Sstevel@tonic-gate  */
3717c478bd9Sstevel@tonic-gate static void
3727c478bd9Sstevel@tonic-gate ufs_trans_mata_direct(
3737c478bd9Sstevel@tonic-gate 	struct inode *ip,
3747c478bd9Sstevel@tonic-gate 	daddr_t *fragsp,
3757c478bd9Sstevel@tonic-gate 	daddr32_t *blkp,
3767c478bd9Sstevel@tonic-gate 	unsigned int nblk)
3777c478bd9Sstevel@tonic-gate {
3787c478bd9Sstevel@tonic-gate 	int		i;
3797c478bd9Sstevel@tonic-gate 	daddr_t		frag;
3807c478bd9Sstevel@tonic-gate 	ulong_t		nb;
3817c478bd9Sstevel@tonic-gate 	struct ufsvfs	*ufsvfsp	= ip->i_ufsvfs;
3827c478bd9Sstevel@tonic-gate 	struct fs	*fs		= ufsvfsp->vfs_fs;
3837c478bd9Sstevel@tonic-gate 
3847c478bd9Sstevel@tonic-gate 	for (i = 0; i < nblk && *fragsp; ++i, ++blkp)
3857c478bd9Sstevel@tonic-gate 		if ((frag = *blkp) != 0) {
3867c478bd9Sstevel@tonic-gate 			if (*fragsp > fs->fs_frag) {
3877c478bd9Sstevel@tonic-gate 				nb = fs->fs_bsize;
3887c478bd9Sstevel@tonic-gate 				*fragsp -= fs->fs_frag;
3897c478bd9Sstevel@tonic-gate 			} else {
3907c478bd9Sstevel@tonic-gate 				nb = *fragsp * fs->fs_fsize;
3917c478bd9Sstevel@tonic-gate 				*fragsp = 0;
3927c478bd9Sstevel@tonic-gate 			}
3937c478bd9Sstevel@tonic-gate 			TRANS_MATAADD(ufsvfsp, ldbtob(fsbtodb(fs, frag)), nb);
3947c478bd9Sstevel@tonic-gate 		}
3957c478bd9Sstevel@tonic-gate }
3967c478bd9Sstevel@tonic-gate 
3977c478bd9Sstevel@tonic-gate /*
3987c478bd9Sstevel@tonic-gate  * scan an indirect allocation block (either inode or true block)
3997c478bd9Sstevel@tonic-gate  */
4007c478bd9Sstevel@tonic-gate static void
4017c478bd9Sstevel@tonic-gate ufs_trans_mata_indir(
4027c478bd9Sstevel@tonic-gate 	struct inode *ip,
4037c478bd9Sstevel@tonic-gate 	daddr_t *fragsp,
4047c478bd9Sstevel@tonic-gate 	daddr_t frag,
4057c478bd9Sstevel@tonic-gate 	int level)
4067c478bd9Sstevel@tonic-gate {
4077c478bd9Sstevel@tonic-gate 	struct ufsvfs *ufsvfsp	= ip->i_ufsvfs;
4087c478bd9Sstevel@tonic-gate 	struct fs *fs = ufsvfsp->vfs_fs;
4097c478bd9Sstevel@tonic-gate 	int ne = fs->fs_bsize / (int)sizeof (daddr32_t);
4107c478bd9Sstevel@tonic-gate 	int i;
4117c478bd9Sstevel@tonic-gate 	struct buf *bp;
4127c478bd9Sstevel@tonic-gate 	daddr32_t *blkp;
4137c478bd9Sstevel@tonic-gate 	o_mode_t ifmt = ip->i_mode & IFMT;
4147c478bd9Sstevel@tonic-gate 
4157c478bd9Sstevel@tonic-gate 	bp = UFS_BREAD(ufsvfsp, ip->i_dev, fsbtodb(fs, frag), fs->fs_bsize);
4167c478bd9Sstevel@tonic-gate 	if (bp->b_flags & B_ERROR) {
4177c478bd9Sstevel@tonic-gate 		brelse(bp);
4187c478bd9Sstevel@tonic-gate 		return;
4197c478bd9Sstevel@tonic-gate 	}
4207c478bd9Sstevel@tonic-gate 	blkp = bp->b_un.b_daddr;
4217c478bd9Sstevel@tonic-gate 
4227c478bd9Sstevel@tonic-gate 	if (level || (ifmt == IFDIR) || (ifmt == IFSHAD) ||
4237c478bd9Sstevel@tonic-gate 	    (ifmt == IFATTRDIR) || (ip == ip->i_ufsvfs->vfs_qinod))
4247c478bd9Sstevel@tonic-gate 		ufs_trans_mata_direct(ip, fragsp, blkp, ne);
4257c478bd9Sstevel@tonic-gate 
4267c478bd9Sstevel@tonic-gate 	if (level)
4277c478bd9Sstevel@tonic-gate 		for (i = 0; i < ne && *fragsp; ++i, ++blkp)
4287c478bd9Sstevel@tonic-gate 			ufs_trans_mata_indir(ip, fragsp, *blkp, level-1);
4297c478bd9Sstevel@tonic-gate 	brelse(bp);
4307c478bd9Sstevel@tonic-gate }
4317c478bd9Sstevel@tonic-gate 
4327c478bd9Sstevel@tonic-gate /*
4337c478bd9Sstevel@tonic-gate  * put appropriate metadata into matamap for this inode
4347c478bd9Sstevel@tonic-gate  */
4357c478bd9Sstevel@tonic-gate void
4367c478bd9Sstevel@tonic-gate ufs_trans_mata_iget(struct inode *ip)
4377c478bd9Sstevel@tonic-gate {
4387c478bd9Sstevel@tonic-gate 	int		i;
4397c478bd9Sstevel@tonic-gate 	daddr_t		frags	= dbtofsb(ip->i_fs, ip->i_blocks);
4407c478bd9Sstevel@tonic-gate 	o_mode_t	ifmt 	= ip->i_mode & IFMT;
4417c478bd9Sstevel@tonic-gate 
4427c478bd9Sstevel@tonic-gate 	if (frags && ((ifmt == IFDIR) || (ifmt == IFSHAD) ||
4437c478bd9Sstevel@tonic-gate 	    (ifmt == IFATTRDIR) || (ip == ip->i_ufsvfs->vfs_qinod)))
4447c478bd9Sstevel@tonic-gate 		ufs_trans_mata_direct(ip, &frags, &ip->i_db[0], NDADDR);
4457c478bd9Sstevel@tonic-gate 
4467c478bd9Sstevel@tonic-gate 	if (frags)
4477c478bd9Sstevel@tonic-gate 		ufs_trans_mata_direct(ip, &frags, &ip->i_ib[0], NIADDR);
4487c478bd9Sstevel@tonic-gate 
4497c478bd9Sstevel@tonic-gate 	for (i = 0; i < NIADDR && frags; ++i)
4507c478bd9Sstevel@tonic-gate 		if (ip->i_ib[i])
4517c478bd9Sstevel@tonic-gate 			ufs_trans_mata_indir(ip, &frags, ip->i_ib[i], i);
4527c478bd9Sstevel@tonic-gate }
4537c478bd9Sstevel@tonic-gate 
4547c478bd9Sstevel@tonic-gate /*
4557c478bd9Sstevel@tonic-gate  * freeing possible metadata (block of user data)
4567c478bd9Sstevel@tonic-gate  */
4577c478bd9Sstevel@tonic-gate void
4587c478bd9Sstevel@tonic-gate ufs_trans_mata_free(struct ufsvfs *ufsvfsp, offset_t mof, off_t nb)
4597c478bd9Sstevel@tonic-gate {
4607c478bd9Sstevel@tonic-gate 	top_matadel(ufsvfsp, mof, nb);
4617c478bd9Sstevel@tonic-gate 
4627c478bd9Sstevel@tonic-gate }
4637c478bd9Sstevel@tonic-gate 
4647c478bd9Sstevel@tonic-gate /*
4657c478bd9Sstevel@tonic-gate  * allocating metadata
4667c478bd9Sstevel@tonic-gate  */
4677c478bd9Sstevel@tonic-gate void
4687c478bd9Sstevel@tonic-gate ufs_trans_mata_alloc(
4697c478bd9Sstevel@tonic-gate 	struct ufsvfs *ufsvfsp,
4707c478bd9Sstevel@tonic-gate 	struct inode *ip,
4717c478bd9Sstevel@tonic-gate 	daddr_t frag,
4727c478bd9Sstevel@tonic-gate 	ulong_t nb,
4737c478bd9Sstevel@tonic-gate 	int indir)
4747c478bd9Sstevel@tonic-gate {
4757c478bd9Sstevel@tonic-gate 	struct fs	*fs	= ufsvfsp->vfs_fs;
4767c478bd9Sstevel@tonic-gate 	o_mode_t	ifmt 	= ip->i_mode & IFMT;
4777c478bd9Sstevel@tonic-gate 
4787c478bd9Sstevel@tonic-gate 	if (indir || ((ifmt == IFDIR) || (ifmt == IFSHAD) ||
4797c478bd9Sstevel@tonic-gate 	    (ifmt == IFATTRDIR) || (ip == ip->i_ufsvfs->vfs_qinod)))
4807c478bd9Sstevel@tonic-gate 		TRANS_MATAADD(ufsvfsp, ldbtob(fsbtodb(fs, frag)), nb);
4817c478bd9Sstevel@tonic-gate }
4827c478bd9Sstevel@tonic-gate 
4837c478bd9Sstevel@tonic-gate #endif /* DEBUG */
4847c478bd9Sstevel@tonic-gate 
4857c478bd9Sstevel@tonic-gate /*
4867c478bd9Sstevel@tonic-gate  * ufs_trans_dir is used to declare a directory delta
4877c478bd9Sstevel@tonic-gate  */
4887c478bd9Sstevel@tonic-gate int
4897c478bd9Sstevel@tonic-gate ufs_trans_dir(struct inode *ip, off_t offset)
4907c478bd9Sstevel@tonic-gate {
4917c478bd9Sstevel@tonic-gate 	daddr_t	bn;
4927c478bd9Sstevel@tonic-gate 	int	contig = 0, error;
4937c478bd9Sstevel@tonic-gate 
4947c478bd9Sstevel@tonic-gate 	ASSERT(ip);
4957c478bd9Sstevel@tonic-gate 	ASSERT(RW_WRITE_HELD(&ip->i_contents));
4967c478bd9Sstevel@tonic-gate 	error = bmap_read(ip, (u_offset_t)offset, &bn, &contig);
4977c478bd9Sstevel@tonic-gate 	if (error || (bn == UFS_HOLE)) {
4987c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN, "ufs_trans_dir - could not get block"
4997c478bd9Sstevel@tonic-gate 		    " number error = %d bn = %d\n", error, (int)bn);
5007c478bd9Sstevel@tonic-gate 		if (error == 0)	/* treat UFS_HOLE as an I/O error */
5017c478bd9Sstevel@tonic-gate 			error = EIO;
5027c478bd9Sstevel@tonic-gate 		return (error);
5037c478bd9Sstevel@tonic-gate 	}
5047c478bd9Sstevel@tonic-gate 	TRANS_DELTA(ip->i_ufsvfs, ldbtob(bn), DIRBLKSIZ, DT_DIR, 0, 0);
5057c478bd9Sstevel@tonic-gate 	return (error);
5067c478bd9Sstevel@tonic-gate }
5077c478bd9Sstevel@tonic-gate 
5087c478bd9Sstevel@tonic-gate /*ARGSUSED*/
5097c478bd9Sstevel@tonic-gate int
5107c478bd9Sstevel@tonic-gate ufs_trans_push_quota(ufsvfs_t *ufsvfsp, delta_t dtyp, struct dquot *dqp)
5117c478bd9Sstevel@tonic-gate {
5127c478bd9Sstevel@tonic-gate 	/*
5137c478bd9Sstevel@tonic-gate 	 * Lock the quota subsystem (ufsvfsp can be NULL
5147c478bd9Sstevel@tonic-gate 	 * if the DQ_ERROR is set).
5157c478bd9Sstevel@tonic-gate 	 */
5167c478bd9Sstevel@tonic-gate 	if (ufsvfsp)
5177c478bd9Sstevel@tonic-gate 		rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER);
5187c478bd9Sstevel@tonic-gate 	mutex_enter(&dqp->dq_lock);
5197c478bd9Sstevel@tonic-gate 
5207c478bd9Sstevel@tonic-gate 	/*
5217c478bd9Sstevel@tonic-gate 	 * If this transaction has been cancelled by closedq_scan_inode(),
5227c478bd9Sstevel@tonic-gate 	 * then bail out now.  We don't call dqput() in this case because
5237c478bd9Sstevel@tonic-gate 	 * it has already been done.
5247c478bd9Sstevel@tonic-gate 	 */
5257c478bd9Sstevel@tonic-gate 	if ((dqp->dq_flags & DQ_TRANS) == 0) {
5267c478bd9Sstevel@tonic-gate 		mutex_exit(&dqp->dq_lock);
5277c478bd9Sstevel@tonic-gate 		if (ufsvfsp)
5287c478bd9Sstevel@tonic-gate 			rw_exit(&ufsvfsp->vfs_dqrwlock);
5297c478bd9Sstevel@tonic-gate 		return (0);
5307c478bd9Sstevel@tonic-gate 	}
5317c478bd9Sstevel@tonic-gate 
5327c478bd9Sstevel@tonic-gate 	if (dqp->dq_flags & DQ_ERROR) {
5337c478bd9Sstevel@tonic-gate 		/*
5347c478bd9Sstevel@tonic-gate 		 * Paranoia to make sure that there is at least one
5357c478bd9Sstevel@tonic-gate 		 * reference to the dquot struct.  We are done with
5367c478bd9Sstevel@tonic-gate 		 * the dquot (due to an error) so clear logging
5377c478bd9Sstevel@tonic-gate 		 * specific markers.
5387c478bd9Sstevel@tonic-gate 		 */
5397c478bd9Sstevel@tonic-gate 		ASSERT(dqp->dq_cnt >= 1);
5407c478bd9Sstevel@tonic-gate 		dqp->dq_flags &= ~DQ_TRANS;
5417c478bd9Sstevel@tonic-gate 		dqput(dqp);
5427c478bd9Sstevel@tonic-gate 		mutex_exit(&dqp->dq_lock);
5437c478bd9Sstevel@tonic-gate 		if (ufsvfsp)
5447c478bd9Sstevel@tonic-gate 			rw_exit(&ufsvfsp->vfs_dqrwlock);
5457c478bd9Sstevel@tonic-gate 		return (1);
5467c478bd9Sstevel@tonic-gate 	}
5477c478bd9Sstevel@tonic-gate 
5487c478bd9Sstevel@tonic-gate 	if (dqp->dq_flags & (DQ_MOD | DQ_BLKS | DQ_FILES)) {
5497c478bd9Sstevel@tonic-gate 		ASSERT((dqp->dq_mof != UFS_HOLE) && (dqp->dq_mof != 0));
5507c478bd9Sstevel@tonic-gate 		TRANS_LOG(ufsvfsp, (caddr_t)&dqp->dq_dqb,
5517c478bd9Sstevel@tonic-gate 		    dqp->dq_mof, (int)sizeof (struct dqblk), NULL, 0);
5527c478bd9Sstevel@tonic-gate 		/*
5537c478bd9Sstevel@tonic-gate 		 * Paranoia to make sure that there is at least one
5547c478bd9Sstevel@tonic-gate 		 * reference to the dquot struct.  Clear the
5557c478bd9Sstevel@tonic-gate 		 * modification flag because the operation is now in
5567c478bd9Sstevel@tonic-gate 		 * the log.  Also clear the logging specific markers
5577c478bd9Sstevel@tonic-gate 		 * that were set in ufs_trans_quota().
5587c478bd9Sstevel@tonic-gate 		 */
5597c478bd9Sstevel@tonic-gate 		ASSERT(dqp->dq_cnt >= 1);
5607c478bd9Sstevel@tonic-gate 		dqp->dq_flags &= ~(DQ_MOD | DQ_TRANS);
5617c478bd9Sstevel@tonic-gate 		dqput(dqp);
5627c478bd9Sstevel@tonic-gate 	}
5637c478bd9Sstevel@tonic-gate 
5647c478bd9Sstevel@tonic-gate 	/*
5657c478bd9Sstevel@tonic-gate 	 * At this point, the logging specific flag should be clear,
5667c478bd9Sstevel@tonic-gate 	 * but add paranoia just in case something has gone wrong.
5677c478bd9Sstevel@tonic-gate 	 */
5687c478bd9Sstevel@tonic-gate 	ASSERT((dqp->dq_flags & DQ_TRANS) == 0);
5697c478bd9Sstevel@tonic-gate 	mutex_exit(&dqp->dq_lock);
5707c478bd9Sstevel@tonic-gate 	if (ufsvfsp)
5717c478bd9Sstevel@tonic-gate 		rw_exit(&ufsvfsp->vfs_dqrwlock);
5727c478bd9Sstevel@tonic-gate 	return (0);
5737c478bd9Sstevel@tonic-gate }
5747c478bd9Sstevel@tonic-gate 
5757c478bd9Sstevel@tonic-gate /*
5767c478bd9Sstevel@tonic-gate  * ufs_trans_quota take in a uid, allocates the disk space, placing the
5777c478bd9Sstevel@tonic-gate  * quota record into the metamap, then declares the delta.
5787c478bd9Sstevel@tonic-gate  */
5797c478bd9Sstevel@tonic-gate /*ARGSUSED*/
5807c478bd9Sstevel@tonic-gate void
5817c478bd9Sstevel@tonic-gate ufs_trans_quota(struct dquot *dqp)
5827c478bd9Sstevel@tonic-gate {
5837c478bd9Sstevel@tonic-gate 
5847c478bd9Sstevel@tonic-gate 	struct inode	*qip = dqp->dq_ufsvfsp->vfs_qinod;
5857c478bd9Sstevel@tonic-gate 
5867c478bd9Sstevel@tonic-gate 	ASSERT(qip);
5877c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&dqp->dq_lock));
5887c478bd9Sstevel@tonic-gate 	ASSERT(dqp->dq_flags & DQ_MOD);
5897c478bd9Sstevel@tonic-gate 	ASSERT(dqp->dq_mof != 0);
5907c478bd9Sstevel@tonic-gate 	ASSERT(dqp->dq_mof != UFS_HOLE);
5917c478bd9Sstevel@tonic-gate 
5927c478bd9Sstevel@tonic-gate 	/*
5937c478bd9Sstevel@tonic-gate 	 * Mark this dquot to indicate that we are starting a logging
5947c478bd9Sstevel@tonic-gate 	 * file system operation for this dquot.  Also increment the
5957c478bd9Sstevel@tonic-gate 	 * reference count so that the dquot does not get reused while
5967c478bd9Sstevel@tonic-gate 	 * it is on the mapentry_t list.  DQ_TRANS is cleared and the
5977c478bd9Sstevel@tonic-gate 	 * reference count is decremented by ufs_trans_push_quota.
5987c478bd9Sstevel@tonic-gate 	 *
5997c478bd9Sstevel@tonic-gate 	 * If the file system is force-unmounted while there is a
6007c478bd9Sstevel@tonic-gate 	 * pending quota transaction, then closedq_scan_inode() will
6017c478bd9Sstevel@tonic-gate 	 * clear the DQ_TRANS flag and decrement the reference count.
6027c478bd9Sstevel@tonic-gate 	 *
6037c478bd9Sstevel@tonic-gate 	 * Since deltamap_add() drops multiple transactions to the
6047c478bd9Sstevel@tonic-gate 	 * same dq_mof and ufs_trans_push_quota() won't get called,
6057c478bd9Sstevel@tonic-gate 	 * we use DQ_TRANS to prevent repeat transactions from
6067c478bd9Sstevel@tonic-gate 	 * incrementing the reference count (or calling TRANS_DELTA()).
6077c478bd9Sstevel@tonic-gate 	 */
6087c478bd9Sstevel@tonic-gate 	if ((dqp->dq_flags & DQ_TRANS) == 0) {
6097c478bd9Sstevel@tonic-gate 		dqp->dq_flags |= DQ_TRANS;
6107c478bd9Sstevel@tonic-gate 		dqp->dq_cnt++;
6117c478bd9Sstevel@tonic-gate 		TRANS_DELTA(qip->i_ufsvfs, dqp->dq_mof, sizeof (struct dqblk),
6127c478bd9Sstevel@tonic-gate 		    DT_QR, ufs_trans_push_quota, (ulong_t)dqp);
6137c478bd9Sstevel@tonic-gate 	}
6147c478bd9Sstevel@tonic-gate }
6157c478bd9Sstevel@tonic-gate 
6167c478bd9Sstevel@tonic-gate void
6177c478bd9Sstevel@tonic-gate ufs_trans_dqrele(struct dquot *dqp)
6187c478bd9Sstevel@tonic-gate {
6197c478bd9Sstevel@tonic-gate 	struct ufsvfs	*ufsvfsp = dqp->dq_ufsvfsp;
6207c478bd9Sstevel@tonic-gate 
6217c478bd9Sstevel@tonic-gate 	curthread->t_flag |= T_DONTBLOCK;
6227c478bd9Sstevel@tonic-gate 	TRANS_BEGIN_ASYNC(ufsvfsp, TOP_QUOTA, TOP_QUOTA_SIZE);
6237c478bd9Sstevel@tonic-gate 	rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER);
6247c478bd9Sstevel@tonic-gate 	dqrele(dqp);
6257c478bd9Sstevel@tonic-gate 	rw_exit(&ufsvfsp->vfs_dqrwlock);
6267c478bd9Sstevel@tonic-gate 	TRANS_END_ASYNC(ufsvfsp, TOP_QUOTA, TOP_QUOTA_SIZE);
6277c478bd9Sstevel@tonic-gate 	curthread->t_flag &= ~T_DONTBLOCK;
6287c478bd9Sstevel@tonic-gate }
6297c478bd9Sstevel@tonic-gate 
6307c478bd9Sstevel@tonic-gate int ufs_trans_max_resv = TOP_MAX_RESV;	/* will be adjusted for testing */
6317c478bd9Sstevel@tonic-gate long ufs_trans_avgbfree = 0;		/* will be adjusted for testing */
6327c478bd9Sstevel@tonic-gate #define	TRANS_MAX_WRITE	(1024 * 1024)
6337c478bd9Sstevel@tonic-gate size_t ufs_trans_max_resid = TRANS_MAX_WRITE;
6347c478bd9Sstevel@tonic-gate 
6357c478bd9Sstevel@tonic-gate /*
6367c478bd9Sstevel@tonic-gate  * Calculate the log reservation for the given write or truncate
6377c478bd9Sstevel@tonic-gate  */
6387c478bd9Sstevel@tonic-gate static ulong_t
6397c478bd9Sstevel@tonic-gate ufs_log_amt(struct inode *ip, offset_t offset, ssize_t resid, int trunc)
6407c478bd9Sstevel@tonic-gate {
6417c478bd9Sstevel@tonic-gate 	long		ncg, last2blk;
6427c478bd9Sstevel@tonic-gate 	long		niblk		= 0;
6437c478bd9Sstevel@tonic-gate 	u_offset_t	writeend, offblk;
6447c478bd9Sstevel@tonic-gate 	int		resv;
6457c478bd9Sstevel@tonic-gate 	daddr_t		nblk, maxfblk;
6467c478bd9Sstevel@tonic-gate 	long		avgbfree;
6477c478bd9Sstevel@tonic-gate 	struct ufsvfs	*ufsvfsp	= ip->i_ufsvfs;
6487c478bd9Sstevel@tonic-gate 	struct fs	*fs		= ufsvfsp->vfs_fs;
6497c478bd9Sstevel@tonic-gate 	long		fni		= NINDIR(fs);
6507c478bd9Sstevel@tonic-gate 	int		bsize		= fs->fs_bsize;
6517c478bd9Sstevel@tonic-gate 
6527c478bd9Sstevel@tonic-gate 	/*
6537c478bd9Sstevel@tonic-gate 	 * Assume that the request will fit in 1 or 2 cg's,
6547c478bd9Sstevel@tonic-gate 	 * resv is the amount of log space to reserve (in bytes).
6557c478bd9Sstevel@tonic-gate 	 */
6567c478bd9Sstevel@tonic-gate 	resv = SIZECG(ip) * 2 + INODESIZE + 1024;
6577c478bd9Sstevel@tonic-gate 
6587c478bd9Sstevel@tonic-gate 	/*
6597c478bd9Sstevel@tonic-gate 	 * get max position of write in fs blocks
6607c478bd9Sstevel@tonic-gate 	 */
6617c478bd9Sstevel@tonic-gate 	writeend = offset + resid;
6627c478bd9Sstevel@tonic-gate 	maxfblk = lblkno(fs, writeend);
6637c478bd9Sstevel@tonic-gate 	offblk = lblkno(fs, offset);
6647c478bd9Sstevel@tonic-gate 	/*
6657c478bd9Sstevel@tonic-gate 	 * request size in fs blocks
6667c478bd9Sstevel@tonic-gate 	 */
6677c478bd9Sstevel@tonic-gate 	nblk = lblkno(fs, blkroundup(fs, resid));
6687c478bd9Sstevel@tonic-gate 	/*
6697c478bd9Sstevel@tonic-gate 	 * Adjust for sparse files
6707c478bd9Sstevel@tonic-gate 	 */
6717c478bd9Sstevel@tonic-gate 	if (trunc)
6727c478bd9Sstevel@tonic-gate 		nblk = MIN(nblk, ip->i_blocks);
6737c478bd9Sstevel@tonic-gate 
6747c478bd9Sstevel@tonic-gate 	/*
6757c478bd9Sstevel@tonic-gate 	 * Adjust avgbfree (for testing)
6767c478bd9Sstevel@tonic-gate 	 */
6777c478bd9Sstevel@tonic-gate 	avgbfree = (ufs_trans_avgbfree) ? 1 : ufsvfsp->vfs_avgbfree + 1;
6787c478bd9Sstevel@tonic-gate 
6797c478bd9Sstevel@tonic-gate 	/*
6807c478bd9Sstevel@tonic-gate 	 * Calculate maximum number of blocks of triple indirect
6817c478bd9Sstevel@tonic-gate 	 * pointers to write.
6827c478bd9Sstevel@tonic-gate 	 */
6837c478bd9Sstevel@tonic-gate 	last2blk = NDADDR + fni + fni * fni;
6847c478bd9Sstevel@tonic-gate 	if (maxfblk > last2blk) {
6857c478bd9Sstevel@tonic-gate 		long nl2ptr;
6867c478bd9Sstevel@tonic-gate 		long n3blk;
6877c478bd9Sstevel@tonic-gate 
6887c478bd9Sstevel@tonic-gate 		if (offblk > last2blk)
6897c478bd9Sstevel@tonic-gate 			n3blk = maxfblk - offblk;
6907c478bd9Sstevel@tonic-gate 		else
6917c478bd9Sstevel@tonic-gate 			n3blk = maxfblk - last2blk;
6927c478bd9Sstevel@tonic-gate 		niblk += roundup(n3blk * sizeof (daddr_t), bsize) / bsize + 1;
6937c478bd9Sstevel@tonic-gate 		nl2ptr = roundup(niblk, fni) / fni + 1;
6947c478bd9Sstevel@tonic-gate 		niblk += roundup(nl2ptr * sizeof (daddr_t), bsize) / bsize + 2;
6957c478bd9Sstevel@tonic-gate 		maxfblk -= n3blk;
6967c478bd9Sstevel@tonic-gate 	}
6977c478bd9Sstevel@tonic-gate 	/*
6987c478bd9Sstevel@tonic-gate 	 * calculate maximum number of blocks of double indirect
6997c478bd9Sstevel@tonic-gate 	 * pointers to write.
7007c478bd9Sstevel@tonic-gate 	 */
7017c478bd9Sstevel@tonic-gate 	if (maxfblk > NDADDR + fni) {
7027c478bd9Sstevel@tonic-gate 		long n2blk;
7037c478bd9Sstevel@tonic-gate 
7047c478bd9Sstevel@tonic-gate 		if (offblk > NDADDR + fni)
7057c478bd9Sstevel@tonic-gate 			n2blk = maxfblk - offblk;
7067c478bd9Sstevel@tonic-gate 		else
7077c478bd9Sstevel@tonic-gate 			n2blk = maxfblk - NDADDR + fni;
7087c478bd9Sstevel@tonic-gate 		niblk += roundup(n2blk * sizeof (daddr_t), bsize) / bsize + 2;
7097c478bd9Sstevel@tonic-gate 		maxfblk -= n2blk;
7107c478bd9Sstevel@tonic-gate 	}
7117c478bd9Sstevel@tonic-gate 	/*
7127c478bd9Sstevel@tonic-gate 	 * Add in indirect pointer block write
7137c478bd9Sstevel@tonic-gate 	 */
7147c478bd9Sstevel@tonic-gate 	if (maxfblk > NDADDR) {
7157c478bd9Sstevel@tonic-gate 		niblk += 1;
7167c478bd9Sstevel@tonic-gate 	}
7177c478bd9Sstevel@tonic-gate 	/*
7187c478bd9Sstevel@tonic-gate 	 * Calculate deltas for indirect pointer writes
7197c478bd9Sstevel@tonic-gate 	 */
7207c478bd9Sstevel@tonic-gate 	resv += niblk * (fs->fs_bsize + sizeof (struct delta));
7217c478bd9Sstevel@tonic-gate 	/*
7227c478bd9Sstevel@tonic-gate 	 * maximum number of cg's needed for request
7237c478bd9Sstevel@tonic-gate 	 */
7247c478bd9Sstevel@tonic-gate 	ncg = nblk / avgbfree;
7257c478bd9Sstevel@tonic-gate 	if (ncg > fs->fs_ncg)
7267c478bd9Sstevel@tonic-gate 		ncg = fs->fs_ncg;
7277c478bd9Sstevel@tonic-gate 
7287c478bd9Sstevel@tonic-gate 	/*
7297c478bd9Sstevel@tonic-gate 	 * maximum amount of log space needed for request
7307c478bd9Sstevel@tonic-gate 	 */
7317c478bd9Sstevel@tonic-gate 	if (ncg > 2)
7327c478bd9Sstevel@tonic-gate 		resv += (ncg - 2) * SIZECG(ip);
7337c478bd9Sstevel@tonic-gate 
7347c478bd9Sstevel@tonic-gate 	return (resv);
7357c478bd9Sstevel@tonic-gate }
7367c478bd9Sstevel@tonic-gate 
7377c478bd9Sstevel@tonic-gate /*
7387c478bd9Sstevel@tonic-gate  * Calculate the amount of log space that needs to be reserved for this
7397c478bd9Sstevel@tonic-gate  * trunc request.  If the amount of log space is too large, then
7407c478bd9Sstevel@tonic-gate  * calculate the the size that the requests needs to be split into.
7417c478bd9Sstevel@tonic-gate  */
742303bf60bSsdebnath void
7437c478bd9Sstevel@tonic-gate ufs_trans_trunc_resv(
7447c478bd9Sstevel@tonic-gate 	struct inode *ip,
7457c478bd9Sstevel@tonic-gate 	u_offset_t length,
7467c478bd9Sstevel@tonic-gate 	int *resvp,
7477c478bd9Sstevel@tonic-gate 	u_offset_t *residp)
7487c478bd9Sstevel@tonic-gate {
7497c478bd9Sstevel@tonic-gate 	ulong_t		resv;
7507c478bd9Sstevel@tonic-gate 	u_offset_t	size, offset, resid;
751*1e13ea4bSvsakar 	int		nchunks, incr;
752*1e13ea4bSvsakar 	int		is_sparse = 0;
7537c478bd9Sstevel@tonic-gate 
7547c478bd9Sstevel@tonic-gate 	/*
7557c478bd9Sstevel@tonic-gate 	 *    *resvp is the amount of log space to reserve (in bytes).
7567c478bd9Sstevel@tonic-gate 	 *    when nonzero, *residp is the number of bytes to truncate.
7577c478bd9Sstevel@tonic-gate 	 */
7587c478bd9Sstevel@tonic-gate 	*residp = 0;
7597c478bd9Sstevel@tonic-gate 
7607c478bd9Sstevel@tonic-gate 	if (length < ip->i_size) {
7617c478bd9Sstevel@tonic-gate 		size = ip->i_size - length;
7627c478bd9Sstevel@tonic-gate 	} else {
7637c478bd9Sstevel@tonic-gate 		resv = SIZECG(ip) * 2 + INODESIZE + 1024;
7647c478bd9Sstevel@tonic-gate 		/*
7657c478bd9Sstevel@tonic-gate 		 * truncate up, doesn't really use much space,
7667c478bd9Sstevel@tonic-gate 		 * the default above should be sufficient.
7677c478bd9Sstevel@tonic-gate 		 */
7687c478bd9Sstevel@tonic-gate 		goto done;
7697c478bd9Sstevel@tonic-gate 	}
7707c478bd9Sstevel@tonic-gate 
771*1e13ea4bSvsakar 	/*
772*1e13ea4bSvsakar 	 * There is no need to split sparse file truncation into
773*1e13ea4bSvsakar 	 * as many chunks as that of regular files.
774*1e13ea4bSvsakar 	 */
775*1e13ea4bSvsakar 	is_sparse = bmap_has_holes(ip);
776*1e13ea4bSvsakar 
7777c478bd9Sstevel@tonic-gate 	offset = length;
7787c478bd9Sstevel@tonic-gate 	resid = size;
7797c478bd9Sstevel@tonic-gate 	nchunks = 1;
780*1e13ea4bSvsakar 	incr = 0;
781*1e13ea4bSvsakar 
782*1e13ea4bSvsakar 	do {
783*1e13ea4bSvsakar 		resv = ufs_log_amt(ip, offset, resid, 1);
7847c478bd9Sstevel@tonic-gate 		/*
785*1e13ea4bSvsakar 		 * If this is the first iteration, set "incr".
7867c478bd9Sstevel@tonic-gate 		 */
787*1e13ea4bSvsakar 		if (!incr) {
788*1e13ea4bSvsakar 			/*
789*1e13ea4bSvsakar 			 * If this request takes too much log space,
790*1e13ea4bSvsakar 			 * it will be split into "nchunks". If this split
791*1e13ea4bSvsakar 			 * is not enough, linearly increment the nchunks in
792*1e13ea4bSvsakar 			 * the next iteration.
793*1e13ea4bSvsakar 			 */
794*1e13ea4bSvsakar 			if (resv > ufs_trans_max_resv && !is_sparse) {
795*1e13ea4bSvsakar 				nchunks = MAX(size/ufs_trans_max_resv, 1);
796*1e13ea4bSvsakar 				incr = nchunks;
797*1e13ea4bSvsakar 			} else {
798*1e13ea4bSvsakar 				incr = 1;
799*1e13ea4bSvsakar 			}
800*1e13ea4bSvsakar 		} else
801*1e13ea4bSvsakar 			nchunks += incr;
802*1e13ea4bSvsakar 		resid = size / nchunks;
803*1e13ea4bSvsakar 		offset = length + (nchunks - 1) * resid;
804*1e13ea4bSvsakar 	} while (resv > ufs_trans_max_resv);
805*1e13ea4bSvsakar 
8067c478bd9Sstevel@tonic-gate 	if (nchunks > 1) {
8077c478bd9Sstevel@tonic-gate 		*residp = resid;
8087c478bd9Sstevel@tonic-gate 	}
8097c478bd9Sstevel@tonic-gate done:
8107c478bd9Sstevel@tonic-gate 	*resvp = resv;
8117c478bd9Sstevel@tonic-gate }
8127c478bd9Sstevel@tonic-gate 
8137c478bd9Sstevel@tonic-gate int
8147c478bd9Sstevel@tonic-gate ufs_trans_itrunc(struct inode *ip, u_offset_t length, int flags, cred_t *cr)
8157c478bd9Sstevel@tonic-gate {
8167c478bd9Sstevel@tonic-gate 	int 		err, issync, resv;
8177c478bd9Sstevel@tonic-gate 	u_offset_t	resid;
8187c478bd9Sstevel@tonic-gate 	int		do_block	= 0;
8197c478bd9Sstevel@tonic-gate 	struct ufsvfs	*ufsvfsp	= ip->i_ufsvfs;
8207c478bd9Sstevel@tonic-gate 	struct fs	*fs		= ufsvfsp->vfs_fs;
8217c478bd9Sstevel@tonic-gate 
8227c478bd9Sstevel@tonic-gate 	/*
8237c478bd9Sstevel@tonic-gate 	 * Not logging; just do the trunc
8247c478bd9Sstevel@tonic-gate 	 */
8257c478bd9Sstevel@tonic-gate 	if (!TRANS_ISTRANS(ufsvfsp)) {
8267c478bd9Sstevel@tonic-gate 		rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER);
8277c478bd9Sstevel@tonic-gate 		rw_enter(&ip->i_contents, RW_WRITER);
8287c478bd9Sstevel@tonic-gate 		err = ufs_itrunc(ip, length, flags, cr);
8297c478bd9Sstevel@tonic-gate 		rw_exit(&ip->i_contents);
8307c478bd9Sstevel@tonic-gate 		rw_exit(&ufsvfsp->vfs_dqrwlock);
8317c478bd9Sstevel@tonic-gate 		return (err);
8327c478bd9Sstevel@tonic-gate 	}
8337c478bd9Sstevel@tonic-gate 
8347c478bd9Sstevel@tonic-gate 	/*
8357c478bd9Sstevel@tonic-gate 	 * within the lockfs protocol but *not* part of a transaction
8367c478bd9Sstevel@tonic-gate 	 */
8377c478bd9Sstevel@tonic-gate 	do_block = curthread->t_flag & T_DONTBLOCK;
8387c478bd9Sstevel@tonic-gate 	curthread->t_flag |= T_DONTBLOCK;
8397c478bd9Sstevel@tonic-gate 
8407c478bd9Sstevel@tonic-gate 	/*
8417c478bd9Sstevel@tonic-gate 	 * Trunc the file (in pieces, if necessary)
8427c478bd9Sstevel@tonic-gate 	 */
8437c478bd9Sstevel@tonic-gate again:
8447c478bd9Sstevel@tonic-gate 	ufs_trans_trunc_resv(ip, length, &resv, &resid);
8457c478bd9Sstevel@tonic-gate 	TRANS_BEGIN_CSYNC(ufsvfsp, issync, TOP_ITRUNC, resv);
8467c478bd9Sstevel@tonic-gate 	rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER);
8477c478bd9Sstevel@tonic-gate 	rw_enter(&ip->i_contents, RW_WRITER);
8487c478bd9Sstevel@tonic-gate 	if (resid) {
8497c478bd9Sstevel@tonic-gate 		/*
8507c478bd9Sstevel@tonic-gate 		 * resid is only set if we have to truncate in chunks
8517c478bd9Sstevel@tonic-gate 		 */
8527c478bd9Sstevel@tonic-gate 		ASSERT(length + resid < ip->i_size);
8537c478bd9Sstevel@tonic-gate 
8547c478bd9Sstevel@tonic-gate 		/*
8557c478bd9Sstevel@tonic-gate 		 * Partially trunc file down to desired size (length).
8567c478bd9Sstevel@tonic-gate 		 * Only retain I_FREE on the last partial trunc.
8577c478bd9Sstevel@tonic-gate 		 * Round up size to a block boundary, to ensure the truncate
8587c478bd9Sstevel@tonic-gate 		 * doesn't have to allocate blocks. This is done both for
8597c478bd9Sstevel@tonic-gate 		 * performance and to fix a bug where if the block can't be
8607c478bd9Sstevel@tonic-gate 		 * allocated then the inode delete fails, but the inode
8617c478bd9Sstevel@tonic-gate 		 * is still freed with attached blocks and non-zero size
8627c478bd9Sstevel@tonic-gate 		 * (bug 4348738).
8637c478bd9Sstevel@tonic-gate 		 */
8647c478bd9Sstevel@tonic-gate 		err = ufs_itrunc(ip, blkroundup(fs, (ip->i_size - resid)),
8657c478bd9Sstevel@tonic-gate 		    flags & ~I_FREE, cr);
8667c478bd9Sstevel@tonic-gate 		ASSERT(ip->i_size != length);
8677c478bd9Sstevel@tonic-gate 	} else
8687c478bd9Sstevel@tonic-gate 		err = ufs_itrunc(ip, length, flags, cr);
8697c478bd9Sstevel@tonic-gate 	if (!do_block)
8707c478bd9Sstevel@tonic-gate 		curthread->t_flag &= ~T_DONTBLOCK;
8717c478bd9Sstevel@tonic-gate 	rw_exit(&ip->i_contents);
8727c478bd9Sstevel@tonic-gate 	rw_exit(&ufsvfsp->vfs_dqrwlock);
8737c478bd9Sstevel@tonic-gate 	TRANS_END_CSYNC(ufsvfsp, err, issync, TOP_ITRUNC, resv);
8747c478bd9Sstevel@tonic-gate 
8757c478bd9Sstevel@tonic-gate 	if ((err == 0) && resid) {
8767c478bd9Sstevel@tonic-gate 		ufsvfsp->vfs_avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
8777c478bd9Sstevel@tonic-gate 		goto again;
8787c478bd9Sstevel@tonic-gate 	}
8797c478bd9Sstevel@tonic-gate 	return (err);
8807c478bd9Sstevel@tonic-gate }
8817c478bd9Sstevel@tonic-gate 
8827c478bd9Sstevel@tonic-gate /*
8837c478bd9Sstevel@tonic-gate  * Fault in the pages of the first n bytes specified by the uio structure.
8847c478bd9Sstevel@tonic-gate  * 1 byte in each page is touched and the uio struct is unmodified.
8857c478bd9Sstevel@tonic-gate  * Any error will terminate the process as this is only a best
8867c478bd9Sstevel@tonic-gate  * attempt to get the pages resident.
8877c478bd9Sstevel@tonic-gate  */
8887c478bd9Sstevel@tonic-gate static void
8897c478bd9Sstevel@tonic-gate ufs_trans_touch(ssize_t n, struct uio *uio)
8907c478bd9Sstevel@tonic-gate {
8917c478bd9Sstevel@tonic-gate 	struct iovec *iov;
8927c478bd9Sstevel@tonic-gate 	ulong_t cnt, incr;
8937c478bd9Sstevel@tonic-gate 	caddr_t p;
8947c478bd9Sstevel@tonic-gate 	uint8_t tmp;
8957c478bd9Sstevel@tonic-gate 
8967c478bd9Sstevel@tonic-gate 	iov = uio->uio_iov;
8977c478bd9Sstevel@tonic-gate 
8987c478bd9Sstevel@tonic-gate 	while (n) {
8997c478bd9Sstevel@tonic-gate 		cnt = MIN(iov->iov_len, n);
9007c478bd9Sstevel@tonic-gate 		if (cnt == 0) {
9017c478bd9Sstevel@tonic-gate 			/* empty iov entry */
9027c478bd9Sstevel@tonic-gate 			iov++;
9037c478bd9Sstevel@tonic-gate 			continue;
9047c478bd9Sstevel@tonic-gate 		}
9057c478bd9Sstevel@tonic-gate 		n -= cnt;
9067c478bd9Sstevel@tonic-gate 		/*
9077c478bd9Sstevel@tonic-gate 		 * touch each page in this segment.
9087c478bd9Sstevel@tonic-gate 		 */
9097c478bd9Sstevel@tonic-gate 		p = iov->iov_base;
9107c478bd9Sstevel@tonic-gate 		while (cnt) {
9117c478bd9Sstevel@tonic-gate 			switch (uio->uio_segflg) {
9127c478bd9Sstevel@tonic-gate 			case UIO_USERSPACE:
9137c478bd9Sstevel@tonic-gate 			case UIO_USERISPACE:
9147c478bd9Sstevel@tonic-gate 				if (fuword8(p, &tmp))
9157c478bd9Sstevel@tonic-gate 					return;
9167c478bd9Sstevel@tonic-gate 				break;
9177c478bd9Sstevel@tonic-gate 			case UIO_SYSSPACE:
9187c478bd9Sstevel@tonic-gate 				if (kcopy(p, &tmp, 1))
9197c478bd9Sstevel@tonic-gate 					return;
9207c478bd9Sstevel@tonic-gate 				break;
9217c478bd9Sstevel@tonic-gate 			}
9227c478bd9Sstevel@tonic-gate 			incr = MIN(cnt, PAGESIZE);
9237c478bd9Sstevel@tonic-gate 			p += incr;
9247c478bd9Sstevel@tonic-gate 			cnt -= incr;
9257c478bd9Sstevel@tonic-gate 		}
9267c478bd9Sstevel@tonic-gate 		/*
9277c478bd9Sstevel@tonic-gate 		 * touch the last byte in case it straddles a page.
9287c478bd9Sstevel@tonic-gate 		 */
9297c478bd9Sstevel@tonic-gate 		p--;
9307c478bd9Sstevel@tonic-gate 		switch (uio->uio_segflg) {
9317c478bd9Sstevel@tonic-gate 		case UIO_USERSPACE:
9327c478bd9Sstevel@tonic-gate 		case UIO_USERISPACE:
9337c478bd9Sstevel@tonic-gate 			if (fuword8(p, &tmp))
9347c478bd9Sstevel@tonic-gate 				return;
9357c478bd9Sstevel@tonic-gate 			break;
9367c478bd9Sstevel@tonic-gate 		case UIO_SYSSPACE:
9377c478bd9Sstevel@tonic-gate 			if (kcopy(p, &tmp, 1))
9387c478bd9Sstevel@tonic-gate 				return;
9397c478bd9Sstevel@tonic-gate 			break;
9407c478bd9Sstevel@tonic-gate 		}
9417c478bd9Sstevel@tonic-gate 		iov++;
9427c478bd9Sstevel@tonic-gate 	}
9437c478bd9Sstevel@tonic-gate }
9447c478bd9Sstevel@tonic-gate 
9457c478bd9Sstevel@tonic-gate /*
9467c478bd9Sstevel@tonic-gate  * Calculate the amount of log space that needs to be reserved for this
9477c478bd9Sstevel@tonic-gate  * write request.  If the amount of log space is too large, then
9487c478bd9Sstevel@tonic-gate  * calculate the size that the requests needs to be split into.
9497c478bd9Sstevel@tonic-gate  * First try fixed chunks of size ufs_trans_max_resid. If that
9507c478bd9Sstevel@tonic-gate  * is too big, iterate down to the largest size that will fit.
9517c478bd9Sstevel@tonic-gate  * Pagein the pages in the first chunk here, so that the pagein is
9527c478bd9Sstevel@tonic-gate  * avoided later when the transaction is open.
9537c478bd9Sstevel@tonic-gate  */
9547c478bd9Sstevel@tonic-gate void
9557c478bd9Sstevel@tonic-gate ufs_trans_write_resv(
9567c478bd9Sstevel@tonic-gate 	struct inode *ip,
9577c478bd9Sstevel@tonic-gate 	struct uio *uio,
9587c478bd9Sstevel@tonic-gate 	int *resvp,
9597c478bd9Sstevel@tonic-gate 	int *residp)
9607c478bd9Sstevel@tonic-gate {
9617c478bd9Sstevel@tonic-gate 	ulong_t		resv;
9627c478bd9Sstevel@tonic-gate 	offset_t	offset;
9637c478bd9Sstevel@tonic-gate 	ssize_t		resid;
9647c478bd9Sstevel@tonic-gate 	int		nchunks;
9657c478bd9Sstevel@tonic-gate 
9667c478bd9Sstevel@tonic-gate 	*residp = 0;
9677c478bd9Sstevel@tonic-gate 	offset = uio->uio_offset;
9687c478bd9Sstevel@tonic-gate 	resid = MIN(uio->uio_resid, ufs_trans_max_resid);
9697c478bd9Sstevel@tonic-gate 	resv = ufs_log_amt(ip, offset, resid, 0);
9707c478bd9Sstevel@tonic-gate 	if (resv <= ufs_trans_max_resv) {
9717c478bd9Sstevel@tonic-gate 		ufs_trans_touch(resid, uio);
9727c478bd9Sstevel@tonic-gate 		if (resid != uio->uio_resid)
9737c478bd9Sstevel@tonic-gate 			*residp = resid;
9747c478bd9Sstevel@tonic-gate 		*resvp = resv;
9757c478bd9Sstevel@tonic-gate 		return;
9767c478bd9Sstevel@tonic-gate 	}
9777c478bd9Sstevel@tonic-gate 
9787c478bd9Sstevel@tonic-gate 	resid = uio->uio_resid;
9797c478bd9Sstevel@tonic-gate 	nchunks = 1;
9807c478bd9Sstevel@tonic-gate 	for (; (resv = ufs_log_amt(ip, offset, resid, 0)) > ufs_trans_max_resv;
9817c478bd9Sstevel@tonic-gate 	    offset = uio->uio_offset + (nchunks - 1) * resid) {
9827c478bd9Sstevel@tonic-gate 		nchunks++;
9837c478bd9Sstevel@tonic-gate 		resid = uio->uio_resid / nchunks;
9847c478bd9Sstevel@tonic-gate 	}
9857c478bd9Sstevel@tonic-gate 	ufs_trans_touch(resid, uio);
9867c478bd9Sstevel@tonic-gate 	/*
9877c478bd9Sstevel@tonic-gate 	 * If this request takes too much log space, it will be split
9887c478bd9Sstevel@tonic-gate 	 */
9897c478bd9Sstevel@tonic-gate 	if (nchunks > 1)
9907c478bd9Sstevel@tonic-gate 		*residp = resid;
9917c478bd9Sstevel@tonic-gate 	*resvp = resv;
9927c478bd9Sstevel@tonic-gate }
9937c478bd9Sstevel@tonic-gate 
9947c478bd9Sstevel@tonic-gate /*
9957c478bd9Sstevel@tonic-gate  * Issue write request.
9967c478bd9Sstevel@tonic-gate  *
9977c478bd9Sstevel@tonic-gate  * Split a large request into smaller chunks.
9987c478bd9Sstevel@tonic-gate  */
9997c478bd9Sstevel@tonic-gate int
10007c478bd9Sstevel@tonic-gate ufs_trans_write(
10017c478bd9Sstevel@tonic-gate 	struct inode *ip,
10027c478bd9Sstevel@tonic-gate 	struct uio *uio,
10037c478bd9Sstevel@tonic-gate 	int ioflag,
10047c478bd9Sstevel@tonic-gate 	cred_t *cr,
10057c478bd9Sstevel@tonic-gate 	int resv,
10067c478bd9Sstevel@tonic-gate 	long resid)
10077c478bd9Sstevel@tonic-gate {
10087c478bd9Sstevel@tonic-gate 	long		realresid;
10097c478bd9Sstevel@tonic-gate 	int		err;
10107c478bd9Sstevel@tonic-gate 	struct ufsvfs	*ufsvfsp = ip->i_ufsvfs;
10117c478bd9Sstevel@tonic-gate 
10127c478bd9Sstevel@tonic-gate 	/*
10137c478bd9Sstevel@tonic-gate 	 * since the write is too big and would "HOG THE LOG" it needs to
10147c478bd9Sstevel@tonic-gate 	 * be broken up and done in pieces.  NOTE, the caller will
10157c478bd9Sstevel@tonic-gate 	 * issue the EOT after the request has been completed
10167c478bd9Sstevel@tonic-gate 	 */
10177c478bd9Sstevel@tonic-gate 	realresid = uio->uio_resid;
10187c478bd9Sstevel@tonic-gate 
10197c478bd9Sstevel@tonic-gate again:
10207c478bd9Sstevel@tonic-gate 	/*
10217c478bd9Sstevel@tonic-gate 	 * Perform partial request (uiomove will update uio for us)
10227c478bd9Sstevel@tonic-gate 	 *	Request is split up into "resid" size chunks until
10237c478bd9Sstevel@tonic-gate 	 *	"realresid" bytes have been transferred.
10247c478bd9Sstevel@tonic-gate 	 */
10257c478bd9Sstevel@tonic-gate 	uio->uio_resid = MIN(resid, realresid);
10267c478bd9Sstevel@tonic-gate 	realresid -= uio->uio_resid;
10277c478bd9Sstevel@tonic-gate 	err = wrip(ip, uio, ioflag, cr);
10287c478bd9Sstevel@tonic-gate 
10297c478bd9Sstevel@tonic-gate 	/*
10307c478bd9Sstevel@tonic-gate 	 * Error or request is done; caller issues final EOT
10317c478bd9Sstevel@tonic-gate 	 */
10327c478bd9Sstevel@tonic-gate 	if (err || uio->uio_resid || (realresid == 0)) {
10337c478bd9Sstevel@tonic-gate 		uio->uio_resid += realresid;
10347c478bd9Sstevel@tonic-gate 		return (err);
10357c478bd9Sstevel@tonic-gate 	}
10367c478bd9Sstevel@tonic-gate 
10377c478bd9Sstevel@tonic-gate 	/*
10387c478bd9Sstevel@tonic-gate 	 * Generate EOT for this part of the request
10397c478bd9Sstevel@tonic-gate 	 */
10407c478bd9Sstevel@tonic-gate 	rw_exit(&ip->i_contents);
10417c478bd9Sstevel@tonic-gate 	rw_exit(&ufsvfsp->vfs_dqrwlock);
10427c478bd9Sstevel@tonic-gate 	if (ioflag & (FSYNC|FDSYNC)) {
10437c478bd9Sstevel@tonic-gate 		TRANS_END_SYNC(ufsvfsp, err, TOP_WRITE_SYNC, resv);
10447c478bd9Sstevel@tonic-gate 	} else {
10457c478bd9Sstevel@tonic-gate 		TRANS_END_ASYNC(ufsvfsp, TOP_WRITE, resv);
10467c478bd9Sstevel@tonic-gate 	}
10477c478bd9Sstevel@tonic-gate 
10487c478bd9Sstevel@tonic-gate 	/*
10497c478bd9Sstevel@tonic-gate 	 * Make sure the input buffer is resident before starting
10507c478bd9Sstevel@tonic-gate 	 * the next transaction.
10517c478bd9Sstevel@tonic-gate 	 */
10527c478bd9Sstevel@tonic-gate 	ufs_trans_touch(MIN(resid, realresid), uio);
10537c478bd9Sstevel@tonic-gate 
10547c478bd9Sstevel@tonic-gate 	/*
10557c478bd9Sstevel@tonic-gate 	 * Generate BOT for next part of the request
10567c478bd9Sstevel@tonic-gate 	 */
10577c478bd9Sstevel@tonic-gate 	if (ioflag & (FSYNC|FDSYNC)) {
10587c478bd9Sstevel@tonic-gate 		int error;
10597c478bd9Sstevel@tonic-gate 		TRANS_BEGIN_SYNC(ufsvfsp, TOP_WRITE_SYNC, resv, error);
10607c478bd9Sstevel@tonic-gate 		ASSERT(!error);
10617c478bd9Sstevel@tonic-gate 	} else {
10627c478bd9Sstevel@tonic-gate 		TRANS_BEGIN_ASYNC(ufsvfsp, TOP_WRITE, resv);
10637c478bd9Sstevel@tonic-gate 	}
10647c478bd9Sstevel@tonic-gate 	rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER);
10657c478bd9Sstevel@tonic-gate 	rw_enter(&ip->i_contents, RW_WRITER);
10667c478bd9Sstevel@tonic-gate 	/*
10677c478bd9Sstevel@tonic-gate 	 * Error during EOT (probably device error while writing commit rec)
10687c478bd9Sstevel@tonic-gate 	 */
10697c478bd9Sstevel@tonic-gate 	if (err)
10707c478bd9Sstevel@tonic-gate 		return (err);
10717c478bd9Sstevel@tonic-gate 	goto again;
10727c478bd9Sstevel@tonic-gate }
1073