1 /* $NetBSD: ufs_inode.c,v 1.83 2010/09/01 16:56:19 chs Exp $ */ 2 3 /* 4 * Copyright (c) 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)ufs_inode.c 8.9 (Berkeley) 5/14/95 37 */ 38 39 #include <sys/cdefs.h> 40 __KERNEL_RCSID(0, "$NetBSD: ufs_inode.c,v 1.83 2010/09/01 16:56:19 chs Exp $"); 41 42 #if defined(_KERNEL_OPT) 43 #include "opt_ffs.h" 44 #include "opt_quota.h" 45 #include "opt_wapbl.h" 46 #endif 47 48 #include <sys/param.h> 49 #include <sys/systm.h> 50 #include <sys/proc.h> 51 #include <sys/vnode.h> 52 #include <sys/mount.h> 53 #include <sys/kernel.h> 54 #include <sys/namei.h> 55 #include <sys/kauth.h> 56 #include <sys/wapbl.h> 57 #include <sys/fstrans.h> 58 #include <sys/kmem.h> 59 60 #include <ufs/ufs/inode.h> 61 #include <ufs/ufs/ufsmount.h> 62 #include <ufs/ufs/ufs_extern.h> 63 #include <ufs/ufs/ufs_wapbl.h> 64 #ifdef UFS_DIRHASH 65 #include <ufs/ufs/dirhash.h> 66 #endif 67 #ifdef UFS_EXTATTR 68 #include <ufs/ufs/extattr.h> 69 #endif 70 71 #include <uvm/uvm.h> 72 73 extern int prtactive; 74 75 /* 76 * Last reference to an inode. If necessary, write or delete it. 77 */ 78 int 79 ufs_inactive(void *v) 80 { 81 struct vop_inactive_args /* { 82 struct vnode *a_vp; 83 struct bool *a_recycle; 84 } */ *ap = v; 85 struct vnode *vp = ap->a_vp; 86 struct inode *ip = VTOI(vp); 87 struct mount *transmp; 88 mode_t mode; 89 int error = 0; 90 int logged = 0; 91 92 UFS_WAPBL_JUNLOCK_ASSERT(vp->v_mount); 93 94 transmp = vp->v_mount; 95 fstrans_start(transmp, FSTRANS_LAZY); 96 /* 97 * Ignore inodes related to stale file handles. 98 */ 99 if (ip->i_mode == 0) 100 goto out; 101 if (ip->i_nlink <= 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { 102 error = UFS_WAPBL_BEGIN(vp->v_mount); 103 if (error) 104 goto out; 105 logged = 1; 106 #ifdef QUOTA 107 (void)chkiq(ip, -1, NOCRED, 0); 108 #endif 109 #ifdef UFS_EXTATTR 110 ufs_extattr_vnode_inactive(vp, curlwp); 111 #endif 112 if (ip->i_size != 0) { 113 /* 114 * When journaling, only truncate one indirect block 115 * at a time 116 */ 117 if (vp->v_mount->mnt_wapbl) { 118 uint64_t incr = MNINDIR(ip->i_ump) << 119 vp->v_mount->mnt_fs_bshift; /* Power of 2 */ 120 uint64_t base = NDADDR << 121 vp->v_mount->mnt_fs_bshift; 122 while (!error && ip->i_size > base + incr) { 123 /* 124 * round down to next full indirect 125 * block boundary. 126 */ 127 uint64_t nsize = base + 128 ((ip->i_size - base - 1) & 129 ~(incr - 1)); 130 error = UFS_TRUNCATE(vp, nsize, 0, 131 NOCRED); 132 if (error) 133 break; 134 UFS_WAPBL_END(vp->v_mount); 135 error = UFS_WAPBL_BEGIN(vp->v_mount); 136 if (error) 137 goto out; 138 } 139 } 140 if (!error) 141 error = UFS_TRUNCATE(vp, (off_t)0, 0, NOCRED); 142 } 143 DIP_ASSIGN(ip, rdev, 0); 144 mode = ip->i_mode; 145 ip->i_mode = 0; 146 ip->i_omode = mode; 147 DIP_ASSIGN(ip, mode, 0); 148 ip->i_flag |= IN_CHANGE | IN_UPDATE; 149 /* 150 * Defer final inode free and update to ufs_reclaim(). 151 */ 152 } 153 154 if (ip->i_flag & (IN_CHANGE | IN_UPDATE | IN_MODIFIED)) { 155 if (!logged++) { 156 int err; 157 err = UFS_WAPBL_BEGIN(vp->v_mount); 158 if (err) 159 goto out; 160 } 161 UFS_UPDATE(vp, NULL, NULL, 0); 162 } 163 if (logged) 164 UFS_WAPBL_END(vp->v_mount); 165 out: 166 /* 167 * If we are done with the inode, reclaim it 168 * so that it can be reused immediately. 169 */ 170 *ap->a_recycle = (ip->i_mode == 0); 171 VOP_UNLOCK(vp); 172 fstrans_done(transmp); 173 return (error); 174 } 175 176 /* 177 * Reclaim an inode so that it can be used for other purposes. 178 */ 179 int 180 ufs_reclaim(struct vnode *vp) 181 { 182 struct inode *ip = VTOI(vp); 183 184 if (prtactive && vp->v_usecount > 1) 185 vprint("ufs_reclaim: pushing active", vp); 186 187 if (!UFS_WAPBL_BEGIN(vp->v_mount)) { 188 UFS_UPDATE(vp, NULL, NULL, UPDATE_CLOSE); 189 UFS_WAPBL_END(vp->v_mount); 190 } 191 UFS_UPDATE(vp, NULL, NULL, UPDATE_CLOSE); 192 193 /* 194 * Remove the inode from its hash chain. 195 */ 196 ufs_ihashrem(ip); 197 /* 198 * Purge old data structures associated with the inode. 199 */ 200 cache_purge(vp); 201 if (ip->i_devvp) { 202 vrele(ip->i_devvp); 203 ip->i_devvp = 0; 204 } 205 #ifdef QUOTA 206 ufsquota_free(ip); 207 #endif 208 #ifdef UFS_DIRHASH 209 if (ip->i_dirhash != NULL) 210 ufsdirhash_free(ip); 211 #endif 212 return (0); 213 } 214 215 /* 216 * allocate a range of blocks in a file. 217 * after this function returns, any page entirely contained within the range 218 * will map to invalid data and thus must be overwritten before it is made 219 * accessible to others. 220 */ 221 222 int 223 ufs_balloc_range(struct vnode *vp, off_t off, off_t len, kauth_cred_t cred, 224 int flags) 225 { 226 off_t neweof; /* file size after the operation */ 227 off_t neweob; /* offset next to the last block after the operation */ 228 off_t pagestart; /* starting offset of range covered by pgs */ 229 off_t eob; /* offset next to allocated blocks */ 230 struct uvm_object *uobj; 231 int i, delta, error, npages; 232 int bshift = vp->v_mount->mnt_fs_bshift; 233 int bsize = 1 << bshift; 234 int ppb = MAX(bsize >> PAGE_SHIFT, 1); 235 struct vm_page **pgs; 236 size_t pgssize; 237 UVMHIST_FUNC("ufs_balloc_range"); UVMHIST_CALLED(ubchist); 238 UVMHIST_LOG(ubchist, "vp %p off 0x%x len 0x%x u_size 0x%x", 239 vp, off, len, vp->v_size); 240 241 neweof = MAX(vp->v_size, off + len); 242 GOP_SIZE(vp, neweof, &neweob, 0); 243 244 error = 0; 245 uobj = &vp->v_uobj; 246 247 /* 248 * read or create pages covering the range of the allocation and 249 * keep them locked until the new block is allocated, so there 250 * will be no window where the old contents of the new block are 251 * visible to racing threads. 252 */ 253 254 pagestart = trunc_page(off) & ~(bsize - 1); 255 npages = MIN(ppb, (round_page(neweob) - pagestart) >> PAGE_SHIFT); 256 pgssize = npages * sizeof(struct vm_page *); 257 pgs = kmem_zalloc(pgssize, KM_SLEEP); 258 259 /* 260 * adjust off to be block-aligned. 261 */ 262 263 delta = off & (bsize - 1); 264 off -= delta; 265 len += delta; 266 267 genfs_node_wrlock(vp); 268 mutex_enter(&uobj->vmobjlock); 269 error = VOP_GETPAGES(vp, pagestart, pgs, &npages, 0, 270 VM_PROT_WRITE, 0, PGO_SYNCIO | PGO_PASTEOF | PGO_NOBLOCKALLOC | 271 PGO_NOTIMESTAMP | PGO_GLOCKHELD); 272 if (error) { 273 goto out; 274 } 275 mutex_enter(&uobj->vmobjlock); 276 mutex_enter(&uvm_pageqlock); 277 for (i = 0; i < npages; i++) { 278 UVMHIST_LOG(ubchist, "got pgs[%d] %p", i, pgs[i],0,0); 279 KASSERT((pgs[i]->flags & PG_RELEASED) == 0); 280 pgs[i]->flags &= ~PG_CLEAN; 281 uvm_pageactivate(pgs[i]); 282 } 283 mutex_exit(&uvm_pageqlock); 284 mutex_exit(&uobj->vmobjlock); 285 286 /* 287 * now allocate the range. 288 */ 289 290 error = GOP_ALLOC(vp, off, len, flags, cred); 291 genfs_node_unlock(vp); 292 293 /* 294 * clear PG_RDONLY on any pages we are holding 295 * (since they now have backing store) and unbusy them. 296 */ 297 298 GOP_SIZE(vp, off + len, &eob, 0); 299 mutex_enter(&uobj->vmobjlock); 300 for (i = 0; i < npages; i++) { 301 if (off <= pagestart + (i << PAGE_SHIFT) && 302 pagestart + ((i + 1) << PAGE_SHIFT) <= eob) { 303 pgs[i]->flags &= ~PG_RDONLY; 304 } else if (error) { 305 pgs[i]->flags |= PG_RELEASED; 306 } 307 } 308 if (error) { 309 mutex_enter(&uvm_pageqlock); 310 uvm_page_unbusy(pgs, npages); 311 mutex_exit(&uvm_pageqlock); 312 } else { 313 uvm_page_unbusy(pgs, npages); 314 } 315 mutex_exit(&uobj->vmobjlock); 316 317 out: 318 kmem_free(pgs, pgssize); 319 return error; 320 } 321