1 /* 2 * Copyright (c) 2007 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.6 2007/11/20 22:55:40 dillon Exp $ 35 */ 36 37 #include "hammer.h" 38 #include <sys/buf.h> 39 #include <sys/buf2.h> 40 41 int 42 hammer_vop_inactive(struct vop_inactive_args *ap) 43 { 44 struct hammer_inode *ip = VTOI(ap->a_vp); 45 46 if (ip == NULL) 47 vrecycle(ap->a_vp); 48 return(0); 49 } 50 51 int 52 hammer_vop_reclaim(struct vop_reclaim_args *ap) 53 { 54 struct hammer_inode *ip; 55 struct vnode *vp; 56 57 vp = ap->a_vp; 58 if ((ip = vp->v_data) != NULL) { 59 vp->v_data = NULL; 60 ip->vp = NULL; 61 hammer_rel_inode(ip, 1); 62 } 63 return(0); 64 } 65 66 /* 67 * Obtain a vnode for the specified inode number. An exclusively locked 68 * vnode is returned. 69 */ 70 int 71 hammer_vfs_vget(struct mount *mp, ino_t ino, struct vnode **vpp) 72 { 73 struct hammer_mount *hmp = (void *)mp->mnt_data; 74 struct hammer_inode *ip; 75 int error; 76 77 /* 78 * Get/allocate the hammer_inode structure. The structure must be 79 * unlocked while we manipulate the related vnode to avoid a 80 * deadlock. 81 */ 82 ip = hammer_get_inode(hmp, ino, &error); 83 if (ip == NULL) { 84 *vpp = NULL; 85 return(error); 86 } 87 error = hammer_get_vnode(ip, LK_EXCLUSIVE, vpp); 88 hammer_rel_inode(ip, 0); 89 return (error); 90 } 91 92 /* 93 * Return a locked vnode for the specified inode. The inode must be 94 * referenced but NOT LOCKED on entry and will remain referenced on 95 * return. 96 */ 97 int 98 hammer_get_vnode(struct hammer_inode *ip, int lktype, struct vnode **vpp) 99 { 100 struct vnode *vp; 101 int error = 0; 102 103 for (;;) { 104 if ((vp = ip->vp) == NULL) { 105 error = getnewvnode(VT_HAMMER, ip->hmp->mp, vpp, 0, 0); 106 if (error) 107 break; 108 hammer_lock_ex(&ip->lock); 109 if (ip->vp != NULL) { 110 hammer_unlock(&ip->lock); 111 vp->v_type = VBAD; 112 vx_put(vp); 113 continue; 114 } 115 hammer_ref(&ip->lock); 116 vp = *vpp; 117 ip->vp = vp; 118 vp->v_type = hammer_get_vnode_type( 119 ip->ino_rec.base.base.obj_type); 120 vp->v_data = (void *)ip; 121 /* vnode locked by getnewvnode() */ 122 /* make related vnode dirty if inode dirty? */ 123 hammer_unlock(&ip->lock); 124 if (vp->v_type == VREG) 125 vinitvmio(vp, ip->ino_rec.ino_size); 126 break; 127 } 128 129 /* 130 * loop if the vget fails (aka races), or if the vp 131 * no longer matches ip->vp. 132 */ 133 if (vget(vp, LK_EXCLUSIVE) == 0) { 134 if (vp == ip->vp) 135 break; 136 vput(vp); 137 } 138 } 139 *vpp = vp; 140 return(error); 141 } 142 143 /* 144 * Acquire a HAMMER inode. The returned inode is not locked. These functions 145 * do not attach or detach the related vnode (use hammer_get_vnode() for 146 * that). 147 */ 148 struct hammer_inode * 149 hammer_get_inode(struct hammer_mount *hmp, u_int64_t obj_id, int *errorp) 150 { 151 struct hammer_inode_info iinfo; 152 struct hammer_cursor cursor; 153 struct hammer_inode *ip; 154 155 /* 156 * Determine if we already have an inode cached. If we do then 157 * we are golden. 158 */ 159 iinfo.obj_id = obj_id; 160 iinfo.obj_asof = HAMMER_MAX_TID; /* XXX */ 161 loop: 162 ip = hammer_ino_rb_tree_RB_LOOKUP_INFO(&hmp->rb_inos_root, &iinfo); 163 if (ip) { 164 hammer_ref(&ip->lock); 165 *errorp = 0; 166 return(ip); 167 } 168 169 ip = kmalloc(sizeof(*ip), M_HAMMER, M_WAITOK|M_ZERO); 170 ip->obj_id = obj_id; 171 ip->obj_asof = iinfo.obj_asof; 172 ip->hmp = hmp; 173 RB_INIT(&ip->rec_tree); 174 175 /* 176 * Locate the on-disk inode. 177 * If we do not have an inode cached search the HAMMER on-disk B-Tree 178 * for it. 179 */ 180 181 hammer_init_cursor_hmp(&cursor, hmp); 182 cursor.key_beg.obj_id = ip->obj_id; 183 cursor.key_beg.key = 0; 184 cursor.key_beg.create_tid = iinfo.obj_asof; 185 cursor.key_beg.delete_tid = 0; 186 cursor.key_beg.rec_type = HAMMER_RECTYPE_INODE; 187 cursor.key_beg.obj_type = 0; 188 cursor.flags = HAMMER_CURSOR_GET_RECORD | HAMMER_CURSOR_GET_DATA; 189 190 *errorp = hammer_btree_lookup(&cursor); 191 192 /* 193 * On success the B-Tree lookup will hold the appropriate 194 * buffer cache buffers and provide a pointer to the requested 195 * information. Copy the information to the in-memory inode. 196 */ 197 if (*errorp == 0) { 198 ip->ino_rec = cursor.record->inode; 199 ip->ino_data = cursor.data->inode; 200 } 201 hammer_cache_node(cursor.node, &ip->cache); 202 hammer_done_cursor(&cursor); 203 204 /* 205 * On success load the inode's record and data and insert the 206 * inode into the B-Tree. It is possible to race another lookup 207 * insertion of the same inode so deal with that condition too. 208 */ 209 if (*errorp == 0) { 210 hammer_ref(&ip->lock); 211 if (RB_INSERT(hammer_ino_rb_tree, &hmp->rb_inos_root, ip)) { 212 hammer_uncache_node(&ip->cache); 213 hammer_unref(&ip->lock); 214 kfree(ip, M_HAMMER); 215 goto loop; 216 } 217 } else { 218 kfree(ip, M_HAMMER); 219 ip = NULL; 220 } 221 return (ip); 222 } 223 224 /* 225 * Create a new filesystem object, returning the inode in *ipp. The 226 * returned inode will be referenced but not locked. 227 * 228 * The inode is created in-memory and will be delay-synchronized to the 229 * disk. 230 */ 231 int 232 hammer_create_inode(hammer_transaction_t trans, struct vattr *vap, 233 struct ucred *cred, hammer_inode_t dip, 234 struct hammer_inode **ipp) 235 { 236 hammer_mount_t hmp; 237 hammer_inode_t ip; 238 uid_t xuid; 239 240 hmp = trans->hmp; 241 ip = kmalloc(sizeof(*ip), M_HAMMER, M_WAITOK|M_ZERO); 242 ip->obj_id = hammer_alloc_tid(trans); 243 KKASSERT(ip->obj_id != 0); 244 ip->obj_asof = HAMMER_MAX_TID; /* XXX */ 245 ip->hmp = hmp; 246 ip->flags = HAMMER_INODE_DDIRTY | HAMMER_INODE_RDIRTY | 247 HAMMER_INODE_ITIMES; 248 ip->last_tid = trans->tid; 249 250 RB_INIT(&ip->rec_tree); 251 252 ip->ino_rec.ino_atime = trans->tid; 253 ip->ino_rec.ino_mtime = trans->tid; 254 ip->ino_rec.ino_size = 0; 255 ip->ino_rec.ino_nlinks = 0; 256 /* XXX */ 257 kprintf("rootvol %p ondisk %p\n", hmp->rootvol, hmp->rootvol->ondisk); 258 ip->ino_rec.base.rec_id = hammer_alloc_recid(trans); 259 KKASSERT(ip->ino_rec.base.rec_id != 0); 260 ip->ino_rec.base.base.obj_id = ip->obj_id; 261 ip->ino_rec.base.base.key = 0; 262 ip->ino_rec.base.base.create_tid = trans->tid; 263 ip->ino_rec.base.base.delete_tid = 0; 264 ip->ino_rec.base.base.rec_type = HAMMER_RECTYPE_INODE; 265 ip->ino_rec.base.base.obj_type = hammer_get_obj_type(vap->va_type); 266 267 ip->ino_data.version = HAMMER_INODE_DATA_VERSION; 268 ip->ino_data.mode = vap->va_mode; 269 ip->ino_data.ctime = trans->tid; 270 ip->ino_data.parent_obj_id = (dip) ? dip->ino_rec.base.base.obj_id : 0; 271 272 /* 273 * Calculate default uid/gid and overwrite with information from 274 * the vap. 275 */ 276 xuid = hammer_to_unix_xid(&dip->ino_data.uid); 277 ip->ino_data.gid = dip->ino_data.gid; 278 xuid = vop_helper_create_uid(hmp->mp, dip->ino_data.mode, xuid, cred, 279 &vap->va_mode); 280 ip->ino_data.mode = vap->va_mode; 281 282 if (vap->va_vaflags & VA_UID_UUID_VALID) 283 ip->ino_data.uid = vap->va_uid_uuid; 284 else if (vap->va_uid != (uid_t)VNOVAL) 285 hammer_guid_to_uuid(&ip->ino_data.uid, xuid); 286 if (vap->va_vaflags & VA_GID_UUID_VALID) 287 ip->ino_data.gid = vap->va_gid_uuid; 288 else if (vap->va_gid != (gid_t)VNOVAL) 289 hammer_guid_to_uuid(&ip->ino_data.gid, vap->va_gid); 290 291 hammer_ref(&ip->lock); 292 if (RB_INSERT(hammer_ino_rb_tree, &hmp->rb_inos_root, ip)) { 293 hammer_unref(&ip->lock); 294 panic("hammer_create_inode: duplicate obj_id %llx", ip->obj_id); 295 } 296 *ipp = ip; 297 return(0); 298 } 299 300 /* 301 * Release a reference on an inode and unload it if told to flush. 302 */ 303 void 304 hammer_rel_inode(struct hammer_inode *ip, int flush) 305 { 306 hammer_unref(&ip->lock); 307 if (flush || ip->ino_rec.ino_nlinks == 0) 308 ip->flags |= HAMMER_INODE_FLUSH; 309 if (ip->lock.refs == 0 && (ip->flags & HAMMER_INODE_FLUSH)) 310 hammer_unload_inode(ip, NULL); 311 } 312 313 /* 314 * Unload and destroy the specified inode. 315 * 316 * (called via RB_SCAN) 317 */ 318 int 319 hammer_unload_inode(struct hammer_inode *ip, void *data __unused) 320 { 321 KASSERT(ip->lock.refs == 0, 322 ("hammer_unload_inode: %d refs\n", ip->lock.refs)); 323 KKASSERT(ip->vp == NULL); 324 hammer_ref(&ip->lock); 325 326 /* XXX flush inode to disk */ 327 kprintf("flush inode %p\n", ip); 328 329 RB_REMOVE(hammer_ino_rb_tree, &ip->hmp->rb_inos_root, ip); 330 331 hammer_uncache_node(&ip->cache); 332 kfree(ip, M_HAMMER); 333 return(0); 334 } 335 336 /* 337 * A transaction has modified an inode, requiring a new record and possibly 338 * also data to be written out. 339 */ 340 void 341 hammer_modify_inode(struct hammer_transaction *trans, 342 struct hammer_inode *ip, int flags) 343 { 344 ip->flags |= flags; 345 ip->last_tid = trans->tid; 346 } 347 348 /* 349 * Access the filesystem buffer containing the cluster-relative byte 350 * offset, validate the buffer type, load *bufferp and return a 351 * pointer to the requested data. The buffer is reference and locked on 352 * return. 353 * 354 * If buf_type is 0 the buffer is assumed to be a pure-data buffer and 355 * no type or crc check is performed. 356 * 357 * If *bufferp is not NULL on entry it is assumed to contain a locked 358 * and referenced buffer which will then be replaced. 359 * 360 * If the caller is holding another unrelated buffer locked it must be 361 * passed in reorderbuf so we can properly order buffer locks. 362 * 363 * XXX add a flag for the buffer type and check the CRC here XXX 364 */ 365 void * 366 hammer_bread(hammer_cluster_t cluster, int32_t cloff, 367 u_int64_t buf_type, int *errorp, 368 struct hammer_buffer **bufferp) 369 { 370 hammer_buffer_t buffer; 371 int32_t buf_no; 372 int32_t buf_off; 373 374 /* 375 * Load the correct filesystem buffer, replacing *bufferp. 376 */ 377 buf_no = cloff / HAMMER_BUFSIZE; 378 buffer = *bufferp; 379 if (buffer == NULL || buffer->cluster != cluster || 380 buffer->buf_no != buf_no) { 381 if (buffer) { 382 hammer_unlock(&buffer->io.lock); 383 hammer_rel_buffer(buffer, 0); 384 } 385 buffer = hammer_get_buffer(cluster, buf_no, 0, errorp); 386 *bufferp = buffer; 387 if (buffer == NULL) 388 return(NULL); 389 hammer_lock_ex(&buffer->io.lock); 390 } 391 392 /* 393 * Validate the buffer type 394 */ 395 buf_off = cloff & HAMMER_BUFMASK; 396 if (buf_type) { 397 if (buf_type != buffer->ondisk->head.buf_type) { 398 kprintf("BUFFER HEAD TYPE MISMATCH %llx %llx\n", 399 buf_type, buffer->ondisk->head.buf_type); 400 *errorp = EIO; 401 return(NULL); 402 } 403 if (buf_off < sizeof(buffer->ondisk->head)) { 404 kprintf("BUFFER OFFSET TOO LOW %d\n", buf_off); 405 *errorp = EIO; 406 return(NULL); 407 } 408 } 409 410 /* 411 * Return a pointer to the buffer data. 412 */ 413 *errorp = 0; 414 return((char *)buffer->ondisk + buf_off); 415 } 416 417