xref: /dragonfly/sys/vfs/hammer2/hammer2_vnops.c (revision 65867155)
1 /*
2  * Copyright (c) 2011-2015 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@dragonflybsd.org>
6  * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
7  * by Daniel Flores (GSOC 2013 - mentored by Matthew Dillon, compression)
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  *
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in
17  *    the documentation and/or other materials provided with the
18  *    distribution.
19  * 3. Neither the name of The DragonFly Project nor the names of its
20  *    contributors may be used to endorse or promote products derived
21  *    from this software without specific, prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
26  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
27  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
28  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
29  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
30  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
31  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
32  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
33  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  */
36 /*
37  * Kernel Filesystem interface
38  *
39  * NOTE! local ipdata pointers must be reloaded on any modifying operation
40  *	 to the inode as its underlying chain may have changed.
41  */
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/kernel.h>
46 #include <sys/fcntl.h>
47 #include <sys/buf.h>
48 #include <sys/proc.h>
49 #include <sys/namei.h>
50 #include <sys/mount.h>
51 #include <sys/vnode.h>
52 #include <sys/mountctl.h>
53 #include <sys/dirent.h>
54 #include <sys/uio.h>
55 #include <sys/objcache.h>
56 #include <sys/event.h>
57 #include <sys/file.h>
58 #include <vfs/fifofs/fifo.h>
59 
60 #include "hammer2.h"
61 
62 static int hammer2_read_file(hammer2_inode_t *ip, struct uio *uio,
63 				int seqcount);
64 static int hammer2_write_file(hammer2_inode_t *ip, struct uio *uio,
65 				int ioflag, int seqcount);
66 static void hammer2_extend_file(hammer2_inode_t *ip, hammer2_key_t nsize);
67 static void hammer2_truncate_file(hammer2_inode_t *ip, hammer2_key_t nsize);
68 
69 struct objcache *cache_xops;
70 
71 static __inline
72 void
73 hammer2_knote(struct vnode *vp, int flags)
74 {
75 	if (flags)
76 		KNOTE(&vp->v_pollinfo.vpi_kqinfo.ki_note, flags);
77 }
78 
79 /*
80  * Last reference to a vnode is going away but it is still cached.
81  */
82 static
83 int
84 hammer2_vop_inactive(struct vop_inactive_args *ap)
85 {
86 	hammer2_inode_t *ip;
87 	struct vnode *vp;
88 
89 	LOCKSTART;
90 	vp = ap->a_vp;
91 	ip = VTOI(vp);
92 
93 	/*
94 	 * Degenerate case
95 	 */
96 	if (ip == NULL) {
97 		vrecycle(vp);
98 		LOCKSTOP;
99 		return (0);
100 	}
101 
102 	/*
103 	 * Check for deleted inodes and recycle immediately on the last
104 	 * release.  Be sure to destroy any left-over buffer cache buffers
105 	 * so we do not waste time trying to flush them.
106 	 *
107 	 * Note that deleting the file block chains under the inode chain
108 	 * would just be a waste of energy, so don't do it.
109 	 *
110 	 * WARNING: nvtruncbuf() can only be safely called without the inode
111 	 *	    lock held due to the way our write thread works.
112 	 */
113 	if (ip->flags & HAMMER2_INODE_ISUNLINKED) {
114 		hammer2_key_t lbase;
115 		int nblksize;
116 
117 		/*
118 		 * Detect updates to the embedded data which may be
119 		 * synchronized by the strategy code.  Simply mark the
120 		 * inode modified so it gets picked up by our normal flush.
121 		 */
122 		nblksize = hammer2_calc_logical(ip, 0, &lbase, NULL);
123 		nvtruncbuf(vp, 0, nblksize, 0, 0);
124 		vrecycle(vp);
125 	}
126 	LOCKSTOP;
127 	return (0);
128 }
129 
130 /*
131  * Reclaim a vnode so that it can be reused; after the inode is
132  * disassociated, the filesystem must manage it alone.
133  */
134 static
135 int
136 hammer2_vop_reclaim(struct vop_reclaim_args *ap)
137 {
138 	hammer2_inode_t *ip;
139 	hammer2_pfs_t *pmp;
140 	struct vnode *vp;
141 
142 	LOCKSTART;
143 	vp = ap->a_vp;
144 	ip = VTOI(vp);
145 	if (ip == NULL) {
146 		LOCKSTOP;
147 		return(0);
148 	}
149 	pmp = ip->pmp;
150 
151 	/*
152 	 * The final close of a deleted file or directory marks it for
153 	 * destruction.  The DELETED flag allows the flusher to shortcut
154 	 * any modified blocks still unflushed (that is, just ignore them).
155 	 *
156 	 * HAMMER2 usually does not try to optimize the freemap by returning
157 	 * deleted blocks to it as it does not usually know how many snapshots
158 	 * might be referencing portions of the file/dir.
159 	 */
160 	vp->v_data = NULL;
161 	ip->vp = NULL;
162 
163 	/*
164 	 * NOTE! We do not attempt to flush chains here, flushing is
165 	 *	 really fragile and could also deadlock.
166 	 */
167 	vclrisdirty(vp);
168 
169 	/*
170 	 * An unlinked inode may have been relinked to the ihidden directory.
171 	 * This occurs if the inode was unlinked while open.  Reclamation of
172 	 * these inodes requires processing we cannot safely do here so add
173 	 * the inode to the sideq in that situation.
174 	 *
175 	 * A modified inode may require chain synchronization which will no
176 	 * longer be driven by a sync or fsync without the vnode, also use
177 	 * the sideq for that.
178 	 *
179 	 * A reclaim can occur at any time so we cannot safely start a
180 	 * transaction to handle reclamation of unlinked files.  Instead,
181 	 * the ip is left with a reference and placed on a linked list and
182 	 * handled later on.
183 	 */
184 
185 	if ((ip->flags & (HAMMER2_INODE_ISUNLINKED |
186 			  HAMMER2_INODE_MODIFIED |
187 			  HAMMER2_INODE_RESIZED)) &&
188 	    (ip->flags & HAMMER2_INODE_ISDELETED) == 0) {
189 		hammer2_inode_sideq_t *ipul;
190 
191 		ipul = kmalloc(sizeof(*ipul), pmp->minode, M_WAITOK | M_ZERO);
192 		ipul->ip = ip;
193 
194 		hammer2_spin_ex(&pmp->list_spin);
195 		if ((ip->flags & HAMMER2_INODE_ONSIDEQ) == 0) {
196 			/* ref -> sideq */
197 			atomic_set_int(&ip->flags, HAMMER2_INODE_ONSIDEQ);
198 			TAILQ_INSERT_TAIL(&pmp->sideq, ipul, entry);
199 			hammer2_spin_unex(&pmp->list_spin);
200 		} else {
201 			hammer2_spin_unex(&pmp->list_spin);
202 			kfree(ipul, pmp->minode);
203 			hammer2_inode_drop(ip);		/* vp ref */
204 		}
205 		/* retain ref from vp for ipul */
206 	} else {
207 		hammer2_inode_drop(ip);			/* vp ref */
208 	}
209 
210 	/*
211 	 * XXX handle background sync when ip dirty, kernel will no longer
212 	 * notify us regarding this inode because there is no longer a
213 	 * vnode attached to it.
214 	 */
215 
216 	LOCKSTOP;
217 	return (0);
218 }
219 
220 static
221 int
222 hammer2_vop_fsync(struct vop_fsync_args *ap)
223 {
224 	hammer2_inode_t *ip;
225 	struct vnode *vp;
226 
227 	LOCKSTART;
228 	vp = ap->a_vp;
229 	ip = VTOI(vp);
230 
231 #if 0
232 	/* XXX can't do this yet */
233 	hammer2_trans_init(ip->pmp, HAMMER2_TRANS_ISFLUSH);
234 	vfsync(vp, ap->a_waitfor, 1, NULL, NULL);
235 #endif
236 	hammer2_trans_init(ip->pmp, 0);
237 	vfsync(vp, ap->a_waitfor, 1, NULL, NULL);
238 
239 	/*
240 	 * Calling chain_flush here creates a lot of duplicative
241 	 * COW operations due to non-optimal vnode ordering.
242 	 *
243 	 * Only do it for an actual fsync() syscall.  The other forms
244 	 * which call this function will eventually call chain_flush
245 	 * on the volume root as a catch-all, which is far more optimal.
246 	 */
247 	hammer2_inode_lock(ip, 0);
248 	if (ip->flags & HAMMER2_INODE_MODIFIED)
249 		hammer2_inode_chain_sync(ip);
250 	hammer2_inode_unlock(ip);
251 	hammer2_trans_done(ip->pmp);
252 
253 	LOCKSTOP;
254 	return (0);
255 }
256 
257 static
258 int
259 hammer2_vop_access(struct vop_access_args *ap)
260 {
261 	hammer2_inode_t *ip = VTOI(ap->a_vp);
262 	uid_t uid;
263 	gid_t gid;
264 	int error;
265 
266 	LOCKSTART;
267 	hammer2_inode_lock(ip, HAMMER2_RESOLVE_SHARED);
268 	uid = hammer2_to_unix_xid(&ip->meta.uid);
269 	gid = hammer2_to_unix_xid(&ip->meta.gid);
270 	error = vop_helper_access(ap, uid, gid, ip->meta.mode, ip->meta.uflags);
271 	hammer2_inode_unlock(ip);
272 
273 	LOCKSTOP;
274 	return (error);
275 }
276 
277 static
278 int
279 hammer2_vop_getattr(struct vop_getattr_args *ap)
280 {
281 	hammer2_pfs_t *pmp;
282 	hammer2_inode_t *ip;
283 	struct vnode *vp;
284 	struct vattr *vap;
285 	hammer2_chain_t *chain;
286 	int i;
287 
288 	LOCKSTART;
289 	vp = ap->a_vp;
290 	vap = ap->a_vap;
291 
292 	ip = VTOI(vp);
293 	pmp = ip->pmp;
294 
295 	hammer2_inode_lock(ip, HAMMER2_RESOLVE_SHARED);
296 
297 	vap->va_fsid = pmp->mp->mnt_stat.f_fsid.val[0];
298 	vap->va_fileid = ip->meta.inum;
299 	vap->va_mode = ip->meta.mode;
300 	vap->va_nlink = ip->meta.nlinks;
301 	vap->va_uid = hammer2_to_unix_xid(&ip->meta.uid);
302 	vap->va_gid = hammer2_to_unix_xid(&ip->meta.gid);
303 	vap->va_rmajor = 0;
304 	vap->va_rminor = 0;
305 	vap->va_size = ip->meta.size;	/* protected by shared lock */
306 	vap->va_blocksize = HAMMER2_PBUFSIZE;
307 	vap->va_flags = ip->meta.uflags;
308 	hammer2_time_to_timespec(ip->meta.ctime, &vap->va_ctime);
309 	hammer2_time_to_timespec(ip->meta.mtime, &vap->va_mtime);
310 	hammer2_time_to_timespec(ip->meta.mtime, &vap->va_atime);
311 	vap->va_gen = 1;
312 	vap->va_bytes = 0;
313 	if (ip->meta.type == HAMMER2_OBJTYPE_DIRECTORY) {
314 		/*
315 		 * Can't really calculate directory use sans the files under
316 		 * it, just assume one block for now.
317 		 */
318 		vap->va_bytes += HAMMER2_INODE_BYTES;
319 	} else {
320 		for (i = 0; i < ip->cluster.nchains; ++i) {
321 			if ((chain = ip->cluster.array[i].chain) != NULL) {
322 				if (vap->va_bytes < chain->bref.data_count)
323 					vap->va_bytes = chain->bref.data_count;
324 			}
325 		}
326 	}
327 	vap->va_type = hammer2_get_vtype(ip->meta.type);
328 	vap->va_filerev = 0;
329 	vap->va_uid_uuid = ip->meta.uid;
330 	vap->va_gid_uuid = ip->meta.gid;
331 	vap->va_vaflags = VA_UID_UUID_VALID | VA_GID_UUID_VALID |
332 			  VA_FSID_UUID_VALID;
333 
334 	hammer2_inode_unlock(ip);
335 
336 	LOCKSTOP;
337 	return (0);
338 }
339 
340 static
341 int
342 hammer2_vop_setattr(struct vop_setattr_args *ap)
343 {
344 	hammer2_inode_t *ip;
345 	struct vnode *vp;
346 	struct vattr *vap;
347 	int error;
348 	int kflags = 0;
349 	uint64_t ctime;
350 
351 	LOCKSTART;
352 	vp = ap->a_vp;
353 	vap = ap->a_vap;
354 	hammer2_update_time(&ctime);
355 
356 	ip = VTOI(vp);
357 
358 	if (ip->pmp->ronly) {
359 		LOCKSTOP;
360 		return(EROFS);
361 	}
362 
363 	hammer2_pfs_memory_wait(ip->pmp);
364 	hammer2_trans_init(ip->pmp, 0);
365 	hammer2_inode_lock(ip, 0);
366 	error = 0;
367 
368 	if (vap->va_flags != VNOVAL) {
369 		uint32_t flags;
370 
371 		flags = ip->meta.uflags;
372 		error = vop_helper_setattr_flags(&flags, vap->va_flags,
373 				     hammer2_to_unix_xid(&ip->meta.uid),
374 				     ap->a_cred);
375 		if (error == 0) {
376 			if (ip->meta.uflags != flags) {
377 				hammer2_inode_modify(ip);
378 				ip->meta.uflags = flags;
379 				ip->meta.ctime = ctime;
380 				kflags |= NOTE_ATTRIB;
381 			}
382 			if (ip->meta.uflags & (IMMUTABLE | APPEND)) {
383 				error = 0;
384 				goto done;
385 			}
386 		}
387 		goto done;
388 	}
389 	if (ip->meta.uflags & (IMMUTABLE | APPEND)) {
390 		error = EPERM;
391 		goto done;
392 	}
393 	if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) {
394 		mode_t cur_mode = ip->meta.mode;
395 		uid_t cur_uid = hammer2_to_unix_xid(&ip->meta.uid);
396 		gid_t cur_gid = hammer2_to_unix_xid(&ip->meta.gid);
397 		uuid_t uuid_uid;
398 		uuid_t uuid_gid;
399 
400 		error = vop_helper_chown(ap->a_vp, vap->va_uid, vap->va_gid,
401 					 ap->a_cred,
402 					 &cur_uid, &cur_gid, &cur_mode);
403 		if (error == 0) {
404 			hammer2_guid_to_uuid(&uuid_uid, cur_uid);
405 			hammer2_guid_to_uuid(&uuid_gid, cur_gid);
406 			if (bcmp(&uuid_uid, &ip->meta.uid, sizeof(uuid_uid)) ||
407 			    bcmp(&uuid_gid, &ip->meta.gid, sizeof(uuid_gid)) ||
408 			    ip->meta.mode != cur_mode
409 			) {
410 				hammer2_inode_modify(ip);
411 				ip->meta.uid = uuid_uid;
412 				ip->meta.gid = uuid_gid;
413 				ip->meta.mode = cur_mode;
414 				ip->meta.ctime = ctime;
415 			}
416 			kflags |= NOTE_ATTRIB;
417 		}
418 	}
419 
420 	/*
421 	 * Resize the file
422 	 */
423 	if (vap->va_size != VNOVAL && ip->meta.size != vap->va_size) {
424 		switch(vp->v_type) {
425 		case VREG:
426 			if (vap->va_size == ip->meta.size)
427 				break;
428 			if (vap->va_size < ip->meta.size) {
429 				hammer2_mtx_ex(&ip->truncate_lock);
430 				hammer2_truncate_file(ip, vap->va_size);
431 				hammer2_mtx_unlock(&ip->truncate_lock);
432 			} else {
433 				hammer2_extend_file(ip, vap->va_size);
434 			}
435 			hammer2_inode_modify(ip);
436 			ip->meta.mtime = ctime;
437 			break;
438 		default:
439 			error = EINVAL;
440 			goto done;
441 		}
442 	}
443 #if 0
444 	/* atime not supported */
445 	if (vap->va_atime.tv_sec != VNOVAL) {
446 		hammer2_inode_modify(ip);
447 		ip->meta.atime = hammer2_timespec_to_time(&vap->va_atime);
448 		kflags |= NOTE_ATTRIB;
449 	}
450 #endif
451 	if (vap->va_mode != (mode_t)VNOVAL) {
452 		mode_t cur_mode = ip->meta.mode;
453 		uid_t cur_uid = hammer2_to_unix_xid(&ip->meta.uid);
454 		gid_t cur_gid = hammer2_to_unix_xid(&ip->meta.gid);
455 
456 		error = vop_helper_chmod(ap->a_vp, vap->va_mode, ap->a_cred,
457 					 cur_uid, cur_gid, &cur_mode);
458 		if (error == 0 && ip->meta.mode != cur_mode) {
459 			hammer2_inode_modify(ip);
460 			ip->meta.mode = cur_mode;
461 			ip->meta.ctime = ctime;
462 			kflags |= NOTE_ATTRIB;
463 		}
464 	}
465 
466 	if (vap->va_mtime.tv_sec != VNOVAL) {
467 		hammer2_inode_modify(ip);
468 		ip->meta.mtime = hammer2_timespec_to_time(&vap->va_mtime);
469 		kflags |= NOTE_ATTRIB;
470 	}
471 
472 done:
473 	/*
474 	 * If a truncation occurred we must call inode_fsync() now in order
475 	 * to trim the related data chains, otherwise a later expansion can
476 	 * cause havoc.
477 	 *
478 	 * If an extend occured that changed the DIRECTDATA state, we must
479 	 * call inode_fsync now in order to prepare the inode's indirect
480 	 * block table.
481 	 */
482 	if (ip->flags & HAMMER2_INODE_RESIZED)
483 		hammer2_inode_chain_sync(ip);
484 
485 	/*
486 	 * Cleanup.
487 	 */
488 	hammer2_inode_unlock(ip);
489 	hammer2_trans_done(ip->pmp);
490 	hammer2_knote(ip->vp, kflags);
491 
492 	LOCKSTOP;
493 	return (error);
494 }
495 
496 static
497 int
498 hammer2_vop_readdir(struct vop_readdir_args *ap)
499 {
500 	hammer2_xop_readdir_t *xop;
501 	hammer2_blockref_t bref;
502 	hammer2_inode_t *ip;
503 	hammer2_tid_t inum;
504 	hammer2_key_t lkey;
505 	struct uio *uio;
506 	off_t *cookies;
507 	off_t saveoff;
508 	int cookie_index;
509 	int ncookies;
510 	int error;
511 	int eofflag;
512 	int dtype;
513 	int r;
514 
515 	LOCKSTART;
516 	ip = VTOI(ap->a_vp);
517 	uio = ap->a_uio;
518 	saveoff = uio->uio_offset;
519 	eofflag = 0;
520 	error = 0;
521 
522 	/*
523 	 * Setup cookies directory entry cookies if requested
524 	 */
525 	if (ap->a_ncookies) {
526 		ncookies = uio->uio_resid / 16 + 1;
527 		if (ncookies > 1024)
528 			ncookies = 1024;
529 		cookies = kmalloc(ncookies * sizeof(off_t), M_TEMP, M_WAITOK);
530 	} else {
531 		ncookies = -1;
532 		cookies = NULL;
533 	}
534 	cookie_index = 0;
535 
536 	hammer2_inode_lock(ip, HAMMER2_RESOLVE_SHARED);
537 
538 	/*
539 	 * Handle artificial entries.  To ensure that only positive 64 bit
540 	 * quantities are returned to userland we always strip off bit 63.
541 	 * The hash code is designed such that codes 0x0000-0x7FFF are not
542 	 * used, allowing us to use these codes for articial entries.
543 	 *
544 	 * Entry 0 is used for '.' and entry 1 is used for '..'.  Do not
545 	 * allow '..' to cross the mount point into (e.g.) the super-root.
546 	 */
547 	if (saveoff == 0) {
548 		inum = ip->meta.inum & HAMMER2_DIRHASH_USERMSK;
549 		r = vop_write_dirent(&error, uio, inum, DT_DIR, 1, ".");
550 		if (r)
551 			goto done;
552 		if (cookies)
553 			cookies[cookie_index] = saveoff;
554 		++saveoff;
555 		++cookie_index;
556 		if (cookie_index == ncookies)
557 			goto done;
558 	}
559 
560 	if (saveoff == 1) {
561 		/*
562 		 * Be careful with lockorder when accessing ".."
563 		 *
564 		 * (ip is the current dir. xip is the parent dir).
565 		 */
566 		inum = ip->meta.inum & HAMMER2_DIRHASH_USERMSK;
567 		if (ip != ip->pmp->iroot)
568 			inum = ip->meta.iparent & HAMMER2_DIRHASH_USERMSK;
569 		r = vop_write_dirent(&error, uio, inum, DT_DIR, 2, "..");
570 		if (r)
571 			goto done;
572 		if (cookies)
573 			cookies[cookie_index] = saveoff;
574 		++saveoff;
575 		++cookie_index;
576 		if (cookie_index == ncookies)
577 			goto done;
578 	}
579 
580 	lkey = saveoff | HAMMER2_DIRHASH_VISIBLE;
581 	if (hammer2_debug & 0x0020)
582 		kprintf("readdir: lkey %016jx\n", lkey);
583 	if (error)
584 		goto done;
585 
586 	/*
587 	 * Use XOP for cluster scan.
588 	 *
589 	 * parent is the inode cluster, already locked for us.  Don't
590 	 * double lock shared locks as this will screw up upgrades.
591 	 */
592 	xop = hammer2_xop_alloc(ip, 0);
593 	xop->lkey = lkey;
594 	hammer2_xop_start(&xop->head, hammer2_xop_readdir);
595 
596 	for (;;) {
597 		const hammer2_inode_data_t *ripdata;
598 
599 		error = hammer2_xop_collect(&xop->head, 0);
600 		if (error)
601 			break;
602 		if (cookie_index == ncookies)
603 			break;
604 		if (hammer2_debug & 0x0020)
605 		kprintf("cluster chain %p %p\n",
606 			xop->head.cluster.focus,
607 			(xop->head.cluster.focus ?
608 			 xop->head.cluster.focus->data : (void *)-1));
609 		ripdata = &hammer2_cluster_rdata(&xop->head.cluster)->ipdata;
610 		hammer2_cluster_bref(&xop->head.cluster, &bref);
611 		if (bref.type == HAMMER2_BREF_TYPE_INODE) {
612 			dtype = hammer2_get_dtype(ripdata);
613 			saveoff = bref.key & HAMMER2_DIRHASH_USERMSK;
614 			r = vop_write_dirent(&error, uio,
615 					     ripdata->meta.inum &
616 					      HAMMER2_DIRHASH_USERMSK,
617 					     dtype,
618 					     ripdata->meta.name_len,
619 					     ripdata->filename);
620 			if (r)
621 				break;
622 			if (cookies)
623 				cookies[cookie_index] = saveoff;
624 			++cookie_index;
625 		} else {
626 			/* XXX chain error */
627 			kprintf("bad chain type readdir %d\n", bref.type);
628 		}
629 	}
630 	hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP);
631 	if (error == ENOENT) {
632 		error = 0;
633 		eofflag = 1;
634 		saveoff = (hammer2_key_t)-1;
635 	} else {
636 		saveoff = bref.key & HAMMER2_DIRHASH_USERMSK;
637 	}
638 done:
639 	hammer2_inode_unlock(ip);
640 	if (ap->a_eofflag)
641 		*ap->a_eofflag = eofflag;
642 	if (hammer2_debug & 0x0020)
643 		kprintf("readdir: done at %016jx\n", saveoff);
644 	uio->uio_offset = saveoff & ~HAMMER2_DIRHASH_VISIBLE;
645 	if (error && cookie_index == 0) {
646 		if (cookies) {
647 			kfree(cookies, M_TEMP);
648 			*ap->a_ncookies = 0;
649 			*ap->a_cookies = NULL;
650 		}
651 	} else {
652 		if (cookies) {
653 			*ap->a_ncookies = cookie_index;
654 			*ap->a_cookies = cookies;
655 		}
656 	}
657 	LOCKSTOP;
658 	return (error);
659 }
660 
661 /*
662  * hammer2_vop_readlink { vp, uio, cred }
663  */
664 static
665 int
666 hammer2_vop_readlink(struct vop_readlink_args *ap)
667 {
668 	struct vnode *vp;
669 	hammer2_inode_t *ip;
670 	int error;
671 
672 	vp = ap->a_vp;
673 	if (vp->v_type != VLNK)
674 		return (EINVAL);
675 	ip = VTOI(vp);
676 
677 	error = hammer2_read_file(ip, ap->a_uio, 0);
678 	return (error);
679 }
680 
681 static
682 int
683 hammer2_vop_read(struct vop_read_args *ap)
684 {
685 	struct vnode *vp;
686 	hammer2_inode_t *ip;
687 	struct uio *uio;
688 	int error;
689 	int seqcount;
690 	int bigread;
691 
692 	/*
693 	 * Read operations supported on this vnode?
694 	 */
695 	vp = ap->a_vp;
696 	if (vp->v_type != VREG)
697 		return (EINVAL);
698 
699 	/*
700 	 * Misc
701 	 */
702 	ip = VTOI(vp);
703 	uio = ap->a_uio;
704 	error = 0;
705 
706 	seqcount = ap->a_ioflag >> 16;
707 	bigread = (uio->uio_resid > 100 * 1024 * 1024);
708 
709 	error = hammer2_read_file(ip, uio, seqcount);
710 	return (error);
711 }
712 
713 static
714 int
715 hammer2_vop_write(struct vop_write_args *ap)
716 {
717 	hammer2_inode_t *ip;
718 	thread_t td;
719 	struct vnode *vp;
720 	struct uio *uio;
721 	int error;
722 	int seqcount;
723 
724 	/*
725 	 * Read operations supported on this vnode?
726 	 */
727 	vp = ap->a_vp;
728 	if (vp->v_type != VREG)
729 		return (EINVAL);
730 
731 	/*
732 	 * Misc
733 	 */
734 	ip = VTOI(vp);
735 	uio = ap->a_uio;
736 	error = 0;
737 	if (ip->pmp->ronly) {
738 		return (EROFS);
739 	}
740 
741 	seqcount = ap->a_ioflag >> 16;
742 
743 	/*
744 	 * Check resource limit
745 	 */
746 	if (uio->uio_resid > 0 && (td = uio->uio_td) != NULL && td->td_proc &&
747 	    uio->uio_offset + uio->uio_resid >
748 	     td->td_proc->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
749 		lwpsignal(td->td_proc, td->td_lwp, SIGXFSZ);
750 		return (EFBIG);
751 	}
752 
753 	/*
754 	 * The transaction interlocks against flushes initiations
755 	 * (note: but will run concurrently with the actual flush).
756 	 */
757 	hammer2_trans_init(ip->pmp, 0);
758 	error = hammer2_write_file(ip, uio, ap->a_ioflag, seqcount);
759 	hammer2_trans_done(ip->pmp);
760 
761 	return (error);
762 }
763 
764 /*
765  * Perform read operations on a file or symlink given an UNLOCKED
766  * inode and uio.
767  *
768  * The passed ip is not locked.
769  */
770 static
771 int
772 hammer2_read_file(hammer2_inode_t *ip, struct uio *uio, int seqcount)
773 {
774 	hammer2_off_t size;
775 	struct buf *bp;
776 	int error;
777 
778 	error = 0;
779 
780 	/*
781 	 * UIO read loop.
782 	 *
783 	 * WARNING! Assumes that the kernel interlocks size changes at the
784 	 *	    vnode level.
785 	 */
786 	hammer2_mtx_sh(&ip->lock);
787 	hammer2_mtx_sh(&ip->truncate_lock);
788 	size = ip->meta.size;
789 	hammer2_mtx_unlock(&ip->lock);
790 
791 	while (uio->uio_resid > 0 && uio->uio_offset < size) {
792 		hammer2_key_t lbase;
793 		hammer2_key_t leof;
794 		int lblksize;
795 		int loff;
796 		int n;
797 
798 		lblksize = hammer2_calc_logical(ip, uio->uio_offset,
799 						&lbase, &leof);
800 
801 #if 1
802 		error = cluster_read(ip->vp, leof, lbase, lblksize,
803 				     uio->uio_resid, seqcount * BKVASIZE,
804 				     &bp);
805 #else
806 		if (uio->uio_segflg == UIO_NOCOPY) {
807 			bp = getblk(ip->vp, lbase, lblksize, GETBLK_BHEAVY, 0);
808 			if (bp->b_flags & B_CACHE) {
809 				int i;
810 				int j = 0;
811 				if (bp->b_xio.xio_npages != 16)
812 					kprintf("NPAGES BAD\n");
813 				for (i = 0; i < bp->b_xio.xio_npages; ++i) {
814 					vm_page_t m;
815 					m = bp->b_xio.xio_pages[i];
816 					if (m == NULL || m->valid == 0) {
817 						kprintf("bp %016jx %016jx pg %d inv",
818 							lbase, leof, i);
819 						if (m)
820 							kprintf("m->object %p/%p", m->object, ip->vp->v_object);
821 						kprintf("\n");
822 						j = 1;
823 					}
824 				}
825 				if (j)
826 					kprintf("b_flags %08x, b_error %d\n", bp->b_flags, bp->b_error);
827 			}
828 			bqrelse(bp);
829 		}
830 		error = bread(ip->vp, lbase, lblksize, &bp);
831 #endif
832 		if (error)
833 			break;
834 		loff = (int)(uio->uio_offset - lbase);
835 		n = lblksize - loff;
836 		if (n > uio->uio_resid)
837 			n = uio->uio_resid;
838 		if (n > size - uio->uio_offset)
839 			n = (int)(size - uio->uio_offset);
840 		bp->b_flags |= B_AGE;
841 		uiomovebp(bp, (char *)bp->b_data + loff, n, uio);
842 		bqrelse(bp);
843 	}
844 	hammer2_mtx_unlock(&ip->truncate_lock);
845 
846 	return (error);
847 }
848 
849 /*
850  * Write to the file represented by the inode via the logical buffer cache.
851  * The inode may represent a regular file or a symlink.
852  *
853  * The inode must not be locked.
854  */
855 static
856 int
857 hammer2_write_file(hammer2_inode_t *ip, struct uio *uio,
858 		   int ioflag, int seqcount)
859 {
860 	hammer2_key_t old_eof;
861 	hammer2_key_t new_eof;
862 	struct buf *bp;
863 	int kflags;
864 	int error;
865 	int modified;
866 
867 	/*
868 	 * Setup if append
869 	 *
870 	 * WARNING! Assumes that the kernel interlocks size changes at the
871 	 *	    vnode level.
872 	 */
873 	hammer2_mtx_ex(&ip->lock);
874 	hammer2_mtx_sh(&ip->truncate_lock);
875 	if (ioflag & IO_APPEND)
876 		uio->uio_offset = ip->meta.size;
877 	old_eof = ip->meta.size;
878 
879 	/*
880 	 * Extend the file if necessary.  If the write fails at some point
881 	 * we will truncate it back down to cover as much as we were able
882 	 * to write.
883 	 *
884 	 * Doing this now makes it easier to calculate buffer sizes in
885 	 * the loop.
886 	 */
887 	kflags = 0;
888 	error = 0;
889 	modified = 0;
890 
891 	if (uio->uio_offset + uio->uio_resid > old_eof) {
892 		new_eof = uio->uio_offset + uio->uio_resid;
893 		modified = 1;
894 		hammer2_extend_file(ip, new_eof);
895 		kflags |= NOTE_EXTEND;
896 	} else {
897 		new_eof = old_eof;
898 	}
899 	hammer2_mtx_unlock(&ip->lock);
900 
901 	/*
902 	 * UIO write loop
903 	 */
904 	while (uio->uio_resid > 0) {
905 		hammer2_key_t lbase;
906 		int trivial;
907 		int endofblk;
908 		int lblksize;
909 		int loff;
910 		int n;
911 
912 		/*
913 		 * Don't allow the buffer build to blow out the buffer
914 		 * cache.
915 		 */
916 		if ((ioflag & IO_RECURSE) == 0)
917 			bwillwrite(HAMMER2_PBUFSIZE);
918 
919 		/*
920 		 * This nominally tells us how much we can cluster and
921 		 * what the logical buffer size needs to be.  Currently
922 		 * we don't try to cluster the write and just handle one
923 		 * block at a time.
924 		 */
925 		lblksize = hammer2_calc_logical(ip, uio->uio_offset,
926 						&lbase, NULL);
927 		loff = (int)(uio->uio_offset - lbase);
928 
929 		KKASSERT(lblksize <= 65536);
930 
931 		/*
932 		 * Calculate bytes to copy this transfer and whether the
933 		 * copy completely covers the buffer or not.
934 		 */
935 		trivial = 0;
936 		n = lblksize - loff;
937 		if (n > uio->uio_resid) {
938 			n = uio->uio_resid;
939 			if (loff == lbase && uio->uio_offset + n == new_eof)
940 				trivial = 1;
941 			endofblk = 0;
942 		} else {
943 			if (loff == 0)
944 				trivial = 1;
945 			endofblk = 1;
946 		}
947 		if (lbase >= new_eof)
948 			trivial = 1;
949 
950 		/*
951 		 * Get the buffer
952 		 */
953 		if (uio->uio_segflg == UIO_NOCOPY) {
954 			/*
955 			 * Issuing a write with the same data backing the
956 			 * buffer.  Instantiate the buffer to collect the
957 			 * backing vm pages, then read-in any missing bits.
958 			 *
959 			 * This case is used by vop_stdputpages().
960 			 */
961 			bp = getblk(ip->vp, lbase, lblksize, GETBLK_BHEAVY, 0);
962 			if ((bp->b_flags & B_CACHE) == 0) {
963 				bqrelse(bp);
964 				error = bread(ip->vp, lbase, lblksize, &bp);
965 			}
966 		} else if (trivial) {
967 			/*
968 			 * Even though we are entirely overwriting the buffer
969 			 * we may still have to zero it out to avoid a
970 			 * mmap/write visibility issue.
971 			 */
972 			bp = getblk(ip->vp, lbase, lblksize, GETBLK_BHEAVY, 0);
973 			if ((bp->b_flags & B_CACHE) == 0)
974 				vfs_bio_clrbuf(bp);
975 		} else {
976 			/*
977 			 * Partial overwrite, read in any missing bits then
978 			 * replace the portion being written.
979 			 *
980 			 * (The strategy code will detect zero-fill physical
981 			 * blocks for this case).
982 			 */
983 			error = bread(ip->vp, lbase, lblksize, &bp);
984 			if (error == 0)
985 				bheavy(bp);
986 		}
987 
988 		if (error) {
989 			brelse(bp);
990 			break;
991 		}
992 
993 		/*
994 		 * Ok, copy the data in
995 		 */
996 		error = uiomovebp(bp, bp->b_data + loff, n, uio);
997 		kflags |= NOTE_WRITE;
998 		modified = 1;
999 		if (error) {
1000 			brelse(bp);
1001 			break;
1002 		}
1003 
1004 		/*
1005 		 * WARNING: Pageout daemon will issue UIO_NOCOPY writes
1006 		 *	    with IO_SYNC or IO_ASYNC set.  These writes
1007 		 *	    must be handled as the pageout daemon expects.
1008 		 *
1009 		 * NOTE!    H2 relies on cluster_write() here because it
1010 		 *	    cannot preallocate disk blocks at the logical
1011 		 *	    level due to not knowing what the compression
1012 		 *	    size will be at this time.
1013 		 *
1014 		 *	    We must use cluster_write() here and we depend
1015 		 *	    on the write-behind feature to flush buffers
1016 		 *	    appropriately.  If we let the buffer daemons do
1017 		 *	    it the block allocations will be all over the
1018 		 *	    map.
1019 		 */
1020 		if (ioflag & IO_SYNC) {
1021 			bwrite(bp);
1022 		} else if ((ioflag & IO_DIRECT) && endofblk) {
1023 			bawrite(bp);
1024 		} else if (ioflag & IO_ASYNC) {
1025 			bawrite(bp);
1026 		} else if (ip->vp->v_mount->mnt_flag & MNT_NOCLUSTERW) {
1027 			bdwrite(bp);
1028 		} else {
1029 #if 1
1030 			bp->b_flags |= B_CLUSTEROK;
1031 			cluster_write(bp, new_eof, lblksize, seqcount);
1032 #else
1033 			bdwrite(bp);
1034 #endif
1035 		}
1036 	}
1037 
1038 	/*
1039 	 * Cleanup.  If we extended the file EOF but failed to write through
1040 	 * the entire write is a failure and we have to back-up.
1041 	 */
1042 	if (error && new_eof != old_eof) {
1043 		hammer2_mtx_unlock(&ip->truncate_lock);
1044 		hammer2_mtx_ex(&ip->lock);
1045 		hammer2_mtx_ex(&ip->truncate_lock);
1046 		hammer2_truncate_file(ip, old_eof);
1047 		if (ip->flags & HAMMER2_INODE_MODIFIED)
1048 			hammer2_inode_chain_sync(ip);
1049 		hammer2_mtx_unlock(&ip->lock);
1050 	} else if (modified) {
1051 		hammer2_mtx_ex(&ip->lock);
1052 		hammer2_inode_modify(ip);
1053 		hammer2_update_time(&ip->meta.mtime);
1054 		if (ip->flags & HAMMER2_INODE_MODIFIED)
1055 			hammer2_inode_chain_sync(ip);
1056 		hammer2_mtx_unlock(&ip->lock);
1057 		hammer2_knote(ip->vp, kflags);
1058 	}
1059 	hammer2_trans_assert_strategy(ip->pmp);
1060 	hammer2_mtx_unlock(&ip->truncate_lock);
1061 
1062 	return error;
1063 }
1064 
1065 /*
1066  * Truncate the size of a file.  The inode must not be locked.
1067  *
1068  * We must unconditionally set HAMMER2_INODE_RESIZED to properly
1069  * ensure that any on-media data beyond the new file EOF has been destroyed.
1070  *
1071  * WARNING: nvtruncbuf() can only be safely called without the inode lock
1072  *	    held due to the way our write thread works.  If the truncation
1073  *	    occurs in the middle of a buffer, nvtruncbuf() is responsible
1074  *	    for dirtying that buffer and zeroing out trailing bytes.
1075  *
1076  * WARNING! Assumes that the kernel interlocks size changes at the
1077  *	    vnode level.
1078  *
1079  * WARNING! Caller assumes responsibility for removing dead blocks
1080  *	    if INODE_RESIZED is set.
1081  */
1082 static
1083 void
1084 hammer2_truncate_file(hammer2_inode_t *ip, hammer2_key_t nsize)
1085 {
1086 	hammer2_key_t lbase;
1087 	int nblksize;
1088 
1089 	LOCKSTART;
1090 	hammer2_mtx_unlock(&ip->lock);
1091 	if (ip->vp) {
1092 		nblksize = hammer2_calc_logical(ip, nsize, &lbase, NULL);
1093 		nvtruncbuf(ip->vp, nsize,
1094 			   nblksize, (int)nsize & (nblksize - 1),
1095 			   0);
1096 	}
1097 	hammer2_mtx_ex(&ip->lock);
1098 	KKASSERT((ip->flags & HAMMER2_INODE_RESIZED) == 0);
1099 	ip->osize = ip->meta.size;
1100 	ip->meta.size = nsize;
1101 	atomic_set_int(&ip->flags, HAMMER2_INODE_RESIZED);
1102 	hammer2_inode_modify(ip);
1103 	LOCKSTOP;
1104 }
1105 
1106 /*
1107  * Extend the size of a file.  The inode must not be locked.
1108  *
1109  * Even though the file size is changing, we do not have to set the
1110  * INODE_RESIZED bit unless the file size crosses the EMBEDDED_BYTES
1111  * boundary.  When this occurs a hammer2_inode_chain_sync() is required
1112  * to prepare the inode cluster's indirect block table, otherwise
1113  * async execution of the strategy code will implode on us.
1114  *
1115  * WARNING! Assumes that the kernel interlocks size changes at the
1116  *	    vnode level.
1117  *
1118  * WARNING! Caller assumes responsibility for transitioning out
1119  *	    of the inode DIRECTDATA mode if INODE_RESIZED is set.
1120  */
1121 static
1122 void
1123 hammer2_extend_file(hammer2_inode_t *ip, hammer2_key_t nsize)
1124 {
1125 	hammer2_key_t lbase;
1126 	hammer2_key_t osize;
1127 	int oblksize;
1128 	int nblksize;
1129 
1130 	LOCKSTART;
1131 
1132 	KKASSERT((ip->flags & HAMMER2_INODE_RESIZED) == 0);
1133 	hammer2_inode_modify(ip);
1134 	osize = ip->meta.size;
1135 	ip->osize = osize;
1136 	ip->meta.size = nsize;
1137 
1138 	if (osize <= HAMMER2_EMBEDDED_BYTES && nsize > HAMMER2_EMBEDDED_BYTES) {
1139 		atomic_set_int(&ip->flags, HAMMER2_INODE_RESIZED);
1140 		hammer2_inode_chain_sync(ip);
1141 	}
1142 
1143 	hammer2_mtx_unlock(&ip->lock);
1144 	if (ip->vp) {
1145 		oblksize = hammer2_calc_logical(ip, osize, &lbase, NULL);
1146 		nblksize = hammer2_calc_logical(ip, nsize, &lbase, NULL);
1147 		nvextendbuf(ip->vp,
1148 			    osize, nsize,
1149 			    oblksize, nblksize,
1150 			    -1, -1, 0);
1151 	}
1152 	hammer2_mtx_ex(&ip->lock);
1153 
1154 	LOCKSTOP;
1155 }
1156 
1157 static
1158 int
1159 hammer2_vop_nresolve(struct vop_nresolve_args *ap)
1160 {
1161 	hammer2_xop_nresolve_t *xop;
1162 	hammer2_inode_t *ip;
1163 	hammer2_inode_t *dip;
1164 	struct namecache *ncp;
1165 	struct vnode *vp;
1166 	int error;
1167 
1168 	LOCKSTART;
1169 	dip = VTOI(ap->a_dvp);
1170 	xop = hammer2_xop_alloc(dip, 0);
1171 
1172 	ncp = ap->a_nch->ncp;
1173 	hammer2_xop_setname(&xop->head, ncp->nc_name, ncp->nc_nlen);
1174 
1175 	/*
1176 	 * Note: In DragonFly the kernel handles '.' and '..'.
1177 	 */
1178 	hammer2_inode_lock(dip, HAMMER2_RESOLVE_SHARED);
1179 	hammer2_xop_start(&xop->head, hammer2_xop_nresolve);
1180 
1181 	error = hammer2_xop_collect(&xop->head, 0);
1182 	if (error) {
1183 		ip = NULL;
1184 	} else {
1185 		ip = hammer2_inode_get(dip->pmp, dip, &xop->head.cluster, -1);
1186 	}
1187 	hammer2_inode_unlock(dip);
1188 
1189 	/*
1190 	 * Acquire the related vnode
1191 	 *
1192 	 * NOTE: For error processing, only ENOENT resolves the namecache
1193 	 *	 entry to NULL, otherwise we just return the error and
1194 	 *	 leave the namecache unresolved.
1195 	 *
1196 	 * NOTE: multiple hammer2_inode structures can be aliased to the
1197 	 *	 same chain element, for example for hardlinks.  This
1198 	 *	 use case does not 'reattach' inode associations that
1199 	 *	 might already exist, but always allocates a new one.
1200 	 *
1201 	 * WARNING: inode structure is locked exclusively via inode_get
1202 	 *	    but chain was locked shared.  inode_unlock()
1203 	 *	    will handle it properly.
1204 	 */
1205 	if (ip) {
1206 		vp = hammer2_igetv(ip, &error);
1207 		if (error == 0) {
1208 			vn_unlock(vp);
1209 			cache_setvp(ap->a_nch, vp);
1210 		} else if (error == ENOENT) {
1211 			cache_setvp(ap->a_nch, NULL);
1212 		}
1213 		hammer2_inode_unlock(ip);
1214 
1215 		/*
1216 		 * The vp should not be released until after we've disposed
1217 		 * of our locks, because it might cause vop_inactive() to
1218 		 * be called.
1219 		 */
1220 		if (vp)
1221 			vrele(vp);
1222 	} else {
1223 		error = ENOENT;
1224 		cache_setvp(ap->a_nch, NULL);
1225 	}
1226 	hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP);
1227 	KASSERT(error || ap->a_nch->ncp->nc_vp != NULL,
1228 		("resolve error %d/%p ap %p\n",
1229 		 error, ap->a_nch->ncp->nc_vp, ap));
1230 	LOCKSTOP;
1231 
1232 	return error;
1233 }
1234 
1235 static
1236 int
1237 hammer2_vop_nlookupdotdot(struct vop_nlookupdotdot_args *ap)
1238 {
1239 	hammer2_inode_t *dip;
1240 	hammer2_tid_t inum;
1241 	int error;
1242 
1243 	LOCKSTART;
1244 	dip = VTOI(ap->a_dvp);
1245 	inum = dip->meta.iparent;
1246 	*ap->a_vpp = NULL;
1247 
1248 	if (inum) {
1249 		error = hammer2_vfs_vget(ap->a_dvp->v_mount, NULL,
1250 					 inum, ap->a_vpp);
1251 	} else {
1252 		error = ENOENT;
1253 	}
1254 	LOCKSTOP;
1255 	return error;
1256 }
1257 
1258 static
1259 int
1260 hammer2_vop_nmkdir(struct vop_nmkdir_args *ap)
1261 {
1262 	hammer2_inode_t *dip;
1263 	hammer2_inode_t *nip;
1264 	struct namecache *ncp;
1265 	const uint8_t *name;
1266 	size_t name_len;
1267 	hammer2_tid_t inum;
1268 	int error;
1269 
1270 	LOCKSTART;
1271 	dip = VTOI(ap->a_dvp);
1272 	if (dip->pmp->ronly) {
1273 		LOCKSTOP;
1274 		return (EROFS);
1275 	}
1276 
1277 	ncp = ap->a_nch->ncp;
1278 	name = ncp->nc_name;
1279 	name_len = ncp->nc_nlen;
1280 
1281 	hammer2_pfs_memory_wait(dip->pmp);
1282 	hammer2_trans_init(dip->pmp, 0);
1283 
1284 	inum = hammer2_trans_newinum(dip->pmp);
1285 
1286 	/*
1287 	 * Create the actual inode as a hidden file in the iroot, then
1288 	 * create the directory entry as a hardlink to it.  The creation
1289 	 * of the actual inode sets its nlinks to 1 which is the value
1290 	 * we desire.
1291 	 */
1292 	nip = hammer2_inode_create(dip->pmp->iroot, dip, ap->a_vap, ap->a_cred,
1293 				   NULL, 0, inum,
1294 				   inum, 0, 0,
1295 				   0, &error);
1296 	if (error == 0) {
1297 		hammer2_inode_create(dip, dip, NULL, NULL,
1298 				     name, name_len, 0,
1299 				     nip->meta.inum,
1300 				     HAMMER2_OBJTYPE_HARDLINK, nip->meta.type,
1301 				     0, &error);
1302 	}
1303 
1304 	if (error) {
1305 		KKASSERT(nip == NULL);
1306 		*ap->a_vpp = NULL;
1307 	} else {
1308 		*ap->a_vpp = hammer2_igetv(nip, &error);
1309 		hammer2_inode_unlock(nip);
1310 	}
1311 
1312 	/*
1313 	 * Update dip's mtime
1314 	 */
1315 	if (error == 0) {
1316 		uint64_t mtime;
1317 
1318 		hammer2_inode_lock(dip, HAMMER2_RESOLVE_SHARED);
1319 		hammer2_update_time(&mtime);
1320 		hammer2_inode_modify(dip);
1321 		dip->meta.mtime = mtime;
1322 		hammer2_inode_unlock(dip);
1323 	}
1324 
1325 	hammer2_trans_done(dip->pmp);
1326 
1327 	if (error == 0) {
1328 		cache_setunresolved(ap->a_nch);
1329 		cache_setvp(ap->a_nch, *ap->a_vpp);
1330 	}
1331 	LOCKSTOP;
1332 	return error;
1333 }
1334 
1335 static
1336 int
1337 hammer2_vop_open(struct vop_open_args *ap)
1338 {
1339 	return vop_stdopen(ap);
1340 }
1341 
1342 /*
1343  * hammer2_vop_advlock { vp, id, op, fl, flags }
1344  */
1345 static
1346 int
1347 hammer2_vop_advlock(struct vop_advlock_args *ap)
1348 {
1349 	hammer2_inode_t *ip = VTOI(ap->a_vp);
1350 	hammer2_off_t size;
1351 
1352 	size = ip->meta.size;
1353 	return (lf_advlock(ap, &ip->advlock, size));
1354 }
1355 
1356 static
1357 int
1358 hammer2_vop_close(struct vop_close_args *ap)
1359 {
1360 	return vop_stdclose(ap);
1361 }
1362 
1363 /*
1364  * hammer2_vop_nlink { nch, dvp, vp, cred }
1365  *
1366  * Create a hardlink from (vp) to {dvp, nch}.
1367  */
1368 static
1369 int
1370 hammer2_vop_nlink(struct vop_nlink_args *ap)
1371 {
1372 	hammer2_inode_t *tdip;	/* target directory to create link in */
1373 	hammer2_inode_t *ip;	/* inode we are hardlinking to */
1374 	struct namecache *ncp;
1375 	const uint8_t *name;
1376 	size_t name_len;
1377 	int error;
1378 
1379 	LOCKSTART;
1380 	tdip = VTOI(ap->a_dvp);
1381 	if (tdip->pmp->ronly) {
1382 		LOCKSTOP;
1383 		return (EROFS);
1384 	}
1385 
1386 	ncp = ap->a_nch->ncp;
1387 	name = ncp->nc_name;
1388 	name_len = ncp->nc_nlen;
1389 
1390 	/*
1391 	 * ip represents the file being hardlinked.  The file could be a
1392 	 * normal file or a hardlink target if it has already been hardlinked.
1393 	 * (with the new semantics, it will almost always be a hardlink
1394 	 * target).
1395 	 *
1396 	 * Bump nlinks and potentially also create or move the hardlink
1397 	 * target in the parent directory common to (ip) and (tdip).  The
1398 	 * consolidation code can modify ip->cluster.  The returned cluster
1399 	 * is locked.
1400 	 */
1401 	ip = VTOI(ap->a_vp);
1402 	hammer2_pfs_memory_wait(ip->pmp);
1403 	hammer2_trans_init(ip->pmp, 0);
1404 
1405 	/*
1406 	 * Target should be an indexed inode or there's no way we will ever
1407 	 * be able to find it!
1408 	 */
1409 	KKASSERT((ip->meta.name_key & HAMMER2_DIRHASH_VISIBLE) == 0);
1410 
1411 	error = 0;
1412 
1413 	/*
1414 	 * Can return NULL and error == EXDEV if the common parent
1415 	 * crosses a directory with the xlink flag set.
1416 	 */
1417 	hammer2_inode_lock(tdip, 0);
1418 	hammer2_inode_lock(ip, 0);
1419 
1420 	/*
1421 	 * Create the hardlink target and bump nlinks.
1422 	 */
1423 	if (error == 0) {
1424 		hammer2_inode_create(tdip, tdip, NULL, NULL,
1425 				     name, name_len, 0,
1426 				     ip->meta.inum,
1427 				     HAMMER2_OBJTYPE_HARDLINK, ip->meta.type,
1428 				     0, &error);
1429 		hammer2_inode_modify(ip);
1430 		++ip->meta.nlinks;
1431 	}
1432 	if (error == 0) {
1433 		/*
1434 		 * Update dip's mtime
1435 		 */
1436 		uint64_t mtime;
1437 
1438 		hammer2_update_time(&mtime);
1439 		hammer2_inode_modify(tdip);
1440 		tdip->meta.mtime = mtime;
1441 
1442 		cache_setunresolved(ap->a_nch);
1443 		cache_setvp(ap->a_nch, ap->a_vp);
1444 	}
1445 	hammer2_inode_unlock(ip);
1446 	hammer2_inode_unlock(tdip);
1447 
1448 	hammer2_trans_done(ip->pmp);
1449 
1450 	LOCKSTOP;
1451 	return error;
1452 }
1453 
1454 /*
1455  * hammer2_vop_ncreate { nch, dvp, vpp, cred, vap }
1456  *
1457  * The operating system has already ensured that the directory entry
1458  * does not exist and done all appropriate namespace locking.
1459  */
1460 static
1461 int
1462 hammer2_vop_ncreate(struct vop_ncreate_args *ap)
1463 {
1464 	hammer2_inode_t *dip;
1465 	hammer2_inode_t *nip;
1466 	struct namecache *ncp;
1467 	const uint8_t *name;
1468 	size_t name_len;
1469 	hammer2_tid_t inum;
1470 	int error;
1471 
1472 	LOCKSTART;
1473 	dip = VTOI(ap->a_dvp);
1474 	if (dip->pmp->ronly) {
1475 		LOCKSTOP;
1476 		return (EROFS);
1477 	}
1478 
1479 	ncp = ap->a_nch->ncp;
1480 	name = ncp->nc_name;
1481 	name_len = ncp->nc_nlen;
1482 	hammer2_pfs_memory_wait(dip->pmp);
1483 	hammer2_trans_init(dip->pmp, 0);
1484 
1485 	inum = hammer2_trans_newinum(dip->pmp);
1486 
1487 	/*
1488 	 * Create the actual inode as a hidden file in the iroot, then
1489 	 * create the directory entry as a hardlink to it.  The creation
1490 	 * of the actual inode sets its nlinks to 1 which is the value
1491 	 * we desire.
1492 	 */
1493 	nip = hammer2_inode_create(dip->pmp->iroot, dip, ap->a_vap, ap->a_cred,
1494 				   NULL, 0, inum,
1495 				   inum, 0, 0,
1496 				   0, &error);
1497 
1498 	if (error == 0) {
1499 		hammer2_inode_create(dip, dip, NULL, NULL,
1500 				     name, name_len, 0,
1501 				     nip->meta.inum,
1502 				     HAMMER2_OBJTYPE_HARDLINK, nip->meta.type,
1503 				     0, &error);
1504 	}
1505 	if (error) {
1506 		KKASSERT(nip == NULL);
1507 		*ap->a_vpp = NULL;
1508 	} else {
1509 		*ap->a_vpp = hammer2_igetv(nip, &error);
1510 		hammer2_inode_unlock(nip);
1511 	}
1512 
1513 	/*
1514 	 * Update dip's mtime
1515 	 */
1516 	if (error == 0) {
1517 		uint64_t mtime;
1518 
1519 		hammer2_inode_lock(dip, HAMMER2_RESOLVE_SHARED);
1520 		hammer2_update_time(&mtime);
1521 		hammer2_inode_modify(dip);
1522 		dip->meta.mtime = mtime;
1523 		hammer2_inode_unlock(dip);
1524 	}
1525 
1526 	hammer2_trans_done(dip->pmp);
1527 
1528 	if (error == 0) {
1529 		cache_setunresolved(ap->a_nch);
1530 		cache_setvp(ap->a_nch, *ap->a_vpp);
1531 	}
1532 	LOCKSTOP;
1533 	return error;
1534 }
1535 
1536 /*
1537  * Make a device node (typically a fifo)
1538  */
1539 static
1540 int
1541 hammer2_vop_nmknod(struct vop_nmknod_args *ap)
1542 {
1543 	hammer2_inode_t *dip;
1544 	hammer2_inode_t *nip;
1545 	struct namecache *ncp;
1546 	const uint8_t *name;
1547 	size_t name_len;
1548 	hammer2_tid_t inum;
1549 	int error;
1550 
1551 	LOCKSTART;
1552 	dip = VTOI(ap->a_dvp);
1553 	if (dip->pmp->ronly) {
1554 		LOCKSTOP;
1555 		return (EROFS);
1556 	}
1557 
1558 	ncp = ap->a_nch->ncp;
1559 	name = ncp->nc_name;
1560 	name_len = ncp->nc_nlen;
1561 	hammer2_pfs_memory_wait(dip->pmp);
1562 	hammer2_trans_init(dip->pmp, 0);
1563 
1564 	/*
1565 	 * The device node is entered as the directory entry itself and not
1566 	 * as a hardlink to an inode.  Since one cannot obtain a
1567 	 * file handle on the filesystem entry representing the device, we
1568 	 * do not have to worry about indexing its inode.
1569 	 */
1570 	inum = hammer2_trans_newinum(dip->pmp);
1571 	nip = hammer2_inode_create(dip->pmp->iroot, dip, ap->a_vap, ap->a_cred,
1572 				   NULL, 0, inum,
1573 				   inum, 0, 0,
1574 				   0, &error);
1575 	if (error == 0) {
1576 		hammer2_inode_create(dip, dip, NULL, NULL,
1577 				     name, name_len, 0,
1578 				     nip->meta.inum,
1579 				     HAMMER2_OBJTYPE_HARDLINK, nip->meta.type,
1580 				     0, &error);
1581 	}
1582 
1583 
1584 	if (error) {
1585 		KKASSERT(nip == NULL);
1586 		*ap->a_vpp = NULL;
1587 	} else {
1588 		*ap->a_vpp = hammer2_igetv(nip, &error);
1589 		hammer2_inode_unlock(nip);
1590 	}
1591 
1592 	/*
1593 	 * Update dip's mtime
1594 	 */
1595 	if (error == 0) {
1596 		uint64_t mtime;
1597 
1598 		hammer2_inode_lock(dip, HAMMER2_RESOLVE_SHARED);
1599 		hammer2_update_time(&mtime);
1600 		hammer2_inode_modify(dip);
1601 		dip->meta.mtime = mtime;
1602 		hammer2_inode_unlock(dip);
1603 	}
1604 
1605 	hammer2_trans_done(dip->pmp);
1606 
1607 	if (error == 0) {
1608 		cache_setunresolved(ap->a_nch);
1609 		cache_setvp(ap->a_nch, *ap->a_vpp);
1610 	}
1611 	LOCKSTOP;
1612 	return error;
1613 }
1614 
1615 /*
1616  * hammer2_vop_nsymlink { nch, dvp, vpp, cred, vap, target }
1617  */
1618 static
1619 int
1620 hammer2_vop_nsymlink(struct vop_nsymlink_args *ap)
1621 {
1622 	hammer2_inode_t *dip;
1623 	hammer2_inode_t *nip;
1624 	struct namecache *ncp;
1625 	const uint8_t *name;
1626 	size_t name_len;
1627 	hammer2_tid_t inum;
1628 	int error;
1629 
1630 	dip = VTOI(ap->a_dvp);
1631 	if (dip->pmp->ronly)
1632 		return (EROFS);
1633 
1634 	ncp = ap->a_nch->ncp;
1635 	name = ncp->nc_name;
1636 	name_len = ncp->nc_nlen;
1637 	hammer2_pfs_memory_wait(dip->pmp);
1638 	hammer2_trans_init(dip->pmp, 0);
1639 
1640 	ap->a_vap->va_type = VLNK;	/* enforce type */
1641 
1642 	/*
1643 	 * The softlink is entered into the directory itself and not
1644 	 * as a hardlink to an inode.  Since one cannot obtain a
1645 	 * file handle on the softlink itself we do not have to worry
1646 	 * about indexing its inode.
1647 	 */
1648 	inum = hammer2_trans_newinum(dip->pmp);
1649 
1650 	nip = hammer2_inode_create(dip->pmp->iroot, dip, ap->a_vap, ap->a_cred,
1651 				   NULL, 0, inum,
1652 				   inum, 0, 0,
1653 				   0, &error);
1654 	if (error == 0) {
1655 		hammer2_inode_create(dip, dip, NULL, NULL,
1656 				     name, name_len, 0,
1657 				     nip->meta.inum,
1658 				     HAMMER2_OBJTYPE_HARDLINK, nip->meta.type,
1659 				     0, &error);
1660 	}
1661 
1662 
1663 	if (error) {
1664 		KKASSERT(nip == NULL);
1665 		*ap->a_vpp = NULL;
1666 		hammer2_trans_done(dip->pmp);
1667 		return error;
1668 	}
1669 	*ap->a_vpp = hammer2_igetv(nip, &error);
1670 
1671 	/*
1672 	 * Build the softlink (~like file data) and finalize the namecache.
1673 	 */
1674 	if (error == 0) {
1675 		size_t bytes;
1676 		struct uio auio;
1677 		struct iovec aiov;
1678 
1679 		bytes = strlen(ap->a_target);
1680 
1681 		hammer2_inode_unlock(nip);
1682 		bzero(&auio, sizeof(auio));
1683 		bzero(&aiov, sizeof(aiov));
1684 		auio.uio_iov = &aiov;
1685 		auio.uio_segflg = UIO_SYSSPACE;
1686 		auio.uio_rw = UIO_WRITE;
1687 		auio.uio_resid = bytes;
1688 		auio.uio_iovcnt = 1;
1689 		auio.uio_td = curthread;
1690 		aiov.iov_base = ap->a_target;
1691 		aiov.iov_len = bytes;
1692 		error = hammer2_write_file(nip, &auio, IO_APPEND, 0);
1693 		/* XXX handle error */
1694 		error = 0;
1695 	} else {
1696 		hammer2_inode_unlock(nip);
1697 	}
1698 
1699 	/*
1700 	 * Update dip's mtime
1701 	 */
1702 	if (error == 0) {
1703 		uint64_t mtime;
1704 
1705 		hammer2_inode_lock(dip, HAMMER2_RESOLVE_SHARED);
1706 		hammer2_update_time(&mtime);
1707 		hammer2_inode_modify(dip);
1708 		dip->meta.mtime = mtime;
1709 		hammer2_inode_unlock(dip);
1710 	}
1711 
1712 	hammer2_trans_done(dip->pmp);
1713 
1714 	/*
1715 	 * Finalize namecache
1716 	 */
1717 	if (error == 0) {
1718 		cache_setunresolved(ap->a_nch);
1719 		cache_setvp(ap->a_nch, *ap->a_vpp);
1720 		/* hammer2_knote(ap->a_dvp, NOTE_WRITE); */
1721 	}
1722 	return error;
1723 }
1724 
1725 /*
1726  * hammer2_vop_nremove { nch, dvp, cred }
1727  */
1728 static
1729 int
1730 hammer2_vop_nremove(struct vop_nremove_args *ap)
1731 {
1732 	hammer2_xop_unlink_t *xop;
1733 	hammer2_inode_t *dip;
1734 	hammer2_inode_t *ip;
1735 	struct namecache *ncp;
1736 	int error;
1737 	int isopen;
1738 
1739 	LOCKSTART;
1740 	dip = VTOI(ap->a_dvp);
1741 	if (dip->pmp->ronly) {
1742 		LOCKSTOP;
1743 		return(EROFS);
1744 	}
1745 
1746 	ncp = ap->a_nch->ncp;
1747 
1748 	hammer2_pfs_memory_wait(dip->pmp);
1749 	hammer2_trans_init(dip->pmp, 0);
1750 	hammer2_inode_lock(dip, 0);
1751 
1752 	/*
1753 	 * The unlink XOP unlinks the path from the directory and
1754 	 * locates and returns the cluster associated with the real inode.
1755 	 * We have to handle nlinks here on the frontend.
1756 	 */
1757 	xop = hammer2_xop_alloc(dip, HAMMER2_XOP_MODIFYING);
1758 	hammer2_xop_setname(&xop->head, ncp->nc_name, ncp->nc_nlen);
1759 	isopen = cache_isopen(ap->a_nch);
1760 	xop->isdir = 0;
1761 	xop->dopermanent = isopen ?  0 : HAMMER2_DELETE_PERMANENT;
1762 	hammer2_xop_start(&xop->head, hammer2_xop_unlink);
1763 
1764 	/*
1765 	 * Collect the real inode and adjust nlinks, destroy the real
1766 	 * inode if nlinks transitions to 0 and it was the real inode
1767 	 * (else it has already been removed).
1768 	 */
1769 	error = hammer2_xop_collect(&xop->head, 0);
1770 	hammer2_inode_unlock(dip);
1771 
1772 	if (error == 0) {
1773 		ip = hammer2_inode_get(dip->pmp, dip, &xop->head.cluster, -1);
1774 		hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP);
1775 		if (ip) {
1776 			hammer2_inode_unlink_finisher(ip, isopen);
1777 			hammer2_inode_unlock(ip);
1778 		}
1779 	} else {
1780 		hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP);
1781 	}
1782 
1783 	/*
1784 	 * Update dip's mtime
1785 	 */
1786 	if (error == 0) {
1787 		uint64_t mtime;
1788 
1789 		hammer2_inode_lock(dip, HAMMER2_RESOLVE_SHARED);
1790 		hammer2_update_time(&mtime);
1791 		hammer2_inode_modify(dip);
1792 		dip->meta.mtime = mtime;
1793 		hammer2_inode_unlock(dip);
1794 	}
1795 
1796 	hammer2_inode_run_sideq(dip->pmp);
1797 	hammer2_trans_done(dip->pmp);
1798 	if (error == 0)
1799 		cache_unlink(ap->a_nch);
1800 	LOCKSTOP;
1801 	return (error);
1802 }
1803 
1804 /*
1805  * hammer2_vop_nrmdir { nch, dvp, cred }
1806  */
1807 static
1808 int
1809 hammer2_vop_nrmdir(struct vop_nrmdir_args *ap)
1810 {
1811 	hammer2_xop_unlink_t *xop;
1812 	hammer2_inode_t *dip;
1813 	hammer2_inode_t *ip;
1814 	struct namecache *ncp;
1815 	int isopen;
1816 	int error;
1817 
1818 	LOCKSTART;
1819 	dip = VTOI(ap->a_dvp);
1820 	if (dip->pmp->ronly) {
1821 		LOCKSTOP;
1822 		return(EROFS);
1823 	}
1824 
1825 	hammer2_pfs_memory_wait(dip->pmp);
1826 	hammer2_trans_init(dip->pmp, 0);
1827 	hammer2_inode_lock(dip, 0);
1828 
1829 	xop = hammer2_xop_alloc(dip, HAMMER2_XOP_MODIFYING);
1830 
1831 	ncp = ap->a_nch->ncp;
1832 	hammer2_xop_setname(&xop->head, ncp->nc_name, ncp->nc_nlen);
1833 	isopen = cache_isopen(ap->a_nch);
1834 	xop->isdir = 1;
1835 	xop->dopermanent = isopen ?  0 : HAMMER2_DELETE_PERMANENT;
1836 	hammer2_xop_start(&xop->head, hammer2_xop_unlink);
1837 
1838 	/*
1839 	 * Collect the real inode and adjust nlinks, destroy the real
1840 	 * inode if nlinks transitions to 0 and it was the real inode
1841 	 * (else it has already been removed).
1842 	 */
1843 	error = hammer2_xop_collect(&xop->head, 0);
1844 	hammer2_inode_unlock(dip);
1845 
1846 	if (error == 0) {
1847 		ip = hammer2_inode_get(dip->pmp, dip, &xop->head.cluster, -1);
1848 		hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP);
1849 		if (ip) {
1850 			hammer2_inode_unlink_finisher(ip, isopen);
1851 			hammer2_inode_unlock(ip);
1852 		}
1853 	} else {
1854 		hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP);
1855 	}
1856 
1857 	/*
1858 	 * Update dip's mtime
1859 	 */
1860 	if (error == 0) {
1861 		uint64_t mtime;
1862 
1863 		hammer2_inode_lock(dip, HAMMER2_RESOLVE_SHARED);
1864 		hammer2_update_time(&mtime);
1865 		hammer2_inode_modify(dip);
1866 		dip->meta.mtime = mtime;
1867 		hammer2_inode_unlock(dip);
1868 	}
1869 
1870 	hammer2_inode_run_sideq(dip->pmp);
1871 	hammer2_trans_done(dip->pmp);
1872 	if (error == 0)
1873 		cache_unlink(ap->a_nch);
1874 	LOCKSTOP;
1875 	return (error);
1876 }
1877 
1878 /*
1879  * hammer2_vop_nrename { fnch, tnch, fdvp, tdvp, cred }
1880  */
1881 static
1882 int
1883 hammer2_vop_nrename(struct vop_nrename_args *ap)
1884 {
1885 	struct namecache *fncp;
1886 	struct namecache *tncp;
1887 	hammer2_inode_t *fdip;
1888 	hammer2_inode_t *tdip;
1889 	hammer2_inode_t *ip;
1890 	const uint8_t *fname;
1891 	size_t fname_len;
1892 	const uint8_t *tname;
1893 	size_t tname_len;
1894 	int error;
1895 	int tnch_error;
1896 	int update_tdip;
1897 	int update_fdip;
1898 	hammer2_key_t tlhc;
1899 
1900 	if (ap->a_fdvp->v_mount != ap->a_tdvp->v_mount)
1901 		return(EXDEV);
1902 	if (ap->a_fdvp->v_mount != ap->a_fnch->ncp->nc_vp->v_mount)
1903 		return(EXDEV);
1904 
1905 	fdip = VTOI(ap->a_fdvp);	/* source directory */
1906 	tdip = VTOI(ap->a_tdvp);	/* target directory */
1907 
1908 	if (fdip->pmp->ronly)
1909 		return(EROFS);
1910 
1911 	LOCKSTART;
1912 	fncp = ap->a_fnch->ncp;		/* entry name in source */
1913 	fname = fncp->nc_name;
1914 	fname_len = fncp->nc_nlen;
1915 
1916 	tncp = ap->a_tnch->ncp;		/* entry name in target */
1917 	tname = tncp->nc_name;
1918 	tname_len = tncp->nc_nlen;
1919 
1920 	hammer2_pfs_memory_wait(tdip->pmp);
1921 	hammer2_trans_init(tdip->pmp, 0);
1922 
1923 	update_tdip = 0;
1924 	update_fdip = 0;
1925 
1926 	/*
1927 	 * ip is the inode being renamed.  If this is a hardlink then
1928 	 * ip represents the actual file and not the hardlink marker.
1929 	 */
1930 	ip = VTOI(fncp->nc_vp);
1931 
1932 	KKASSERT((ip->meta.name_key & HAMMER2_DIRHASH_VISIBLE) == 0);
1933 
1934 	/*
1935 	 * Can return NULL and error == EXDEV if the common parent
1936 	 * crosses a directory with the xlink flag set.
1937 	 */
1938 	error = 0;
1939 	hammer2_inode_lock(fdip, 0);
1940 	hammer2_inode_lock(tdip, 0);
1941 	hammer2_inode_ref(ip);		/* extra ref */
1942 
1943 	hammer2_inode_lock(ip, 0);
1944 
1945 	/*
1946 	 * Delete the target namespace.
1947 	 */
1948 	{
1949 		hammer2_xop_unlink_t *xop2;
1950 		hammer2_inode_t *tip;
1951 		int isopen;
1952 
1953 		/*
1954 		 * The unlink XOP unlinks the path from the directory and
1955 		 * locates and returns the cluster associated with the real
1956 		 * inode.  We have to handle nlinks here on the frontend.
1957 		 */
1958 		xop2 = hammer2_xop_alloc(tdip, HAMMER2_XOP_MODIFYING);
1959 		hammer2_xop_setname(&xop2->head, tname, tname_len);
1960 		isopen = cache_isopen(ap->a_tnch);
1961 		xop2->isdir = -1;
1962 		xop2->dopermanent = isopen ?  0 : HAMMER2_DELETE_PERMANENT;
1963 		hammer2_xop_start(&xop2->head, hammer2_xop_unlink);
1964 
1965 		/*
1966 		 * Collect the real inode and adjust nlinks, destroy the real
1967 		 * inode if nlinks transitions to 0 and it was the real inode
1968 		 * (else it has already been removed).
1969 		 */
1970 		tnch_error = hammer2_xop_collect(&xop2->head, 0);
1971 		/* hammer2_inode_unlock(tdip); */
1972 
1973 		if (tnch_error == 0) {
1974 			tip = hammer2_inode_get(tdip->pmp, NULL,
1975 						&xop2->head.cluster, -1);
1976 			hammer2_xop_retire(&xop2->head, HAMMER2_XOPMASK_VOP);
1977 			if (tip) {
1978 				hammer2_inode_unlink_finisher(tip, isopen);
1979 				hammer2_inode_unlock(tip);
1980 			}
1981 		} else {
1982 			hammer2_xop_retire(&xop2->head, HAMMER2_XOPMASK_VOP);
1983 		}
1984 		/* hammer2_inode_lock(tdip, 0); */
1985 
1986 		if (tnch_error && tnch_error != ENOENT) {
1987 			error = tnch_error;
1988 			goto done2;
1989 		}
1990 		update_tdip = 1;
1991 	}
1992 
1993 	/*
1994 	 * Resolve the collision space for (tdip, tname, tname_len)
1995 	 *
1996 	 * tdip must be held exclusively locked to prevent races.
1997 	 */
1998 	{
1999 		hammer2_xop_scanlhc_t *sxop;
2000 		hammer2_tid_t lhcbase;
2001 
2002 		tlhc = hammer2_dirhash(tname, tname_len);
2003 		lhcbase = tlhc;
2004 		sxop = hammer2_xop_alloc(tdip, HAMMER2_XOP_MODIFYING);
2005 		sxop->lhc = tlhc;
2006 		hammer2_xop_start(&sxop->head, hammer2_xop_scanlhc);
2007 		while ((error = hammer2_xop_collect(&sxop->head, 0)) == 0) {
2008 			if (tlhc != sxop->head.cluster.focus->bref.key)
2009 				break;
2010 			++tlhc;
2011 		}
2012 		hammer2_xop_retire(&sxop->head, HAMMER2_XOPMASK_VOP);
2013 
2014 		if (error) {
2015 			if (error != ENOENT)
2016 				goto done2;
2017 			++tlhc;
2018 			error = 0;
2019 		}
2020 		if ((lhcbase ^ tlhc) & ~HAMMER2_DIRHASH_LOMASK) {
2021 			error = ENOSPC;
2022 			goto done2;
2023 		}
2024 	}
2025 
2026 	/*
2027 	 * Everything is setup, do the rename.
2028 	 *
2029 	 * We have to synchronize ip->meta to the underlying operation.
2030 	 *
2031 	 * NOTE: To avoid deadlocks we cannot lock (ip) while we are
2032 	 *	 unlinking elements from their directories.  Locking
2033 	 *	 the nlinks field does not lock the whole inode.
2034 	 */
2035 	/* hammer2_inode_lock(ip, 0); */
2036 	if (error == 0) {
2037 		hammer2_xop_nrename_t *xop4;
2038 
2039 		xop4 = hammer2_xop_alloc(fdip, HAMMER2_XOP_MODIFYING);
2040 		xop4->lhc = tlhc;
2041 		xop4->ip_key = ip->meta.name_key;
2042 		hammer2_xop_setip2(&xop4->head, ip);
2043 		hammer2_xop_setip3(&xop4->head, tdip);
2044 		hammer2_xop_setname(&xop4->head, fname, fname_len);
2045 		hammer2_xop_setname2(&xop4->head, tname, tname_len);
2046 		hammer2_xop_start(&xop4->head, hammer2_xop_nrename);
2047 
2048 		error = hammer2_xop_collect(&xop4->head, 0);
2049 		hammer2_xop_retire(&xop4->head, HAMMER2_XOPMASK_VOP);
2050 
2051 		if (error == ENOENT)
2052 			error = 0;
2053 		if (error == 0 &&
2054 		    (ip->meta.name_key & HAMMER2_DIRHASH_VISIBLE)) {
2055 			hammer2_inode_modify(ip);
2056 			ip->meta.name_len = tname_len;
2057 			ip->meta.name_key = tlhc;
2058 
2059 		}
2060 		update_fdip = 1;
2061 		update_fdip = 1;
2062 	}
2063 
2064 done2:
2065 	/*
2066 	 * Update directory mtimes to represent the something changed.
2067 	 */
2068 	if (update_fdip || update_tdip) {
2069 		uint64_t mtime;
2070 
2071 		hammer2_update_time(&mtime);
2072 		if (update_fdip) {
2073 			hammer2_inode_modify(fdip);
2074 			fdip->meta.mtime = mtime;
2075 		}
2076 		if (update_tdip) {
2077 			hammer2_inode_modify(tdip);
2078 			tdip->meta.mtime = mtime;
2079 		}
2080 	}
2081 	hammer2_inode_unlock(ip);
2082 	hammer2_inode_unlock(tdip);
2083 	hammer2_inode_unlock(fdip);
2084 	hammer2_inode_drop(ip);
2085 	hammer2_inode_run_sideq(fdip->pmp);
2086 
2087 	hammer2_trans_done(tdip->pmp);
2088 
2089 	/*
2090 	 * Issue the namecache update after unlocking all the internal
2091 	 * hammer structures, otherwise we might deadlock.
2092 	 */
2093 	if (tnch_error == 0) {
2094 		cache_unlink(ap->a_tnch);
2095 		cache_setunresolved(ap->a_tnch);
2096 	}
2097 	if (error == 0)
2098 		cache_rename(ap->a_fnch, ap->a_tnch);
2099 
2100 	LOCKSTOP;
2101 	return (error);
2102 }
2103 
2104 /*
2105  * hammer2_vop_ioctl { vp, command, data, fflag, cred }
2106  */
2107 static
2108 int
2109 hammer2_vop_ioctl(struct vop_ioctl_args *ap)
2110 {
2111 	hammer2_inode_t *ip;
2112 	int error;
2113 
2114 	LOCKSTART;
2115 	ip = VTOI(ap->a_vp);
2116 
2117 	error = hammer2_ioctl(ip, ap->a_command, (void *)ap->a_data,
2118 			      ap->a_fflag, ap->a_cred);
2119 	LOCKSTOP;
2120 	return (error);
2121 }
2122 
2123 static
2124 int
2125 hammer2_vop_mountctl(struct vop_mountctl_args *ap)
2126 {
2127 	struct mount *mp;
2128 	hammer2_pfs_t *pmp;
2129 	int rc;
2130 
2131 	LOCKSTART;
2132 	switch (ap->a_op) {
2133 	case (MOUNTCTL_SET_EXPORT):
2134 		mp = ap->a_head.a_ops->head.vv_mount;
2135 		pmp = MPTOPMP(mp);
2136 
2137 		if (ap->a_ctllen != sizeof(struct export_args))
2138 			rc = (EINVAL);
2139 		else
2140 			rc = vfs_export(mp, &pmp->export,
2141 					(const struct export_args *)ap->a_ctl);
2142 		break;
2143 	default:
2144 		rc = vop_stdmountctl(ap);
2145 		break;
2146 	}
2147 	LOCKSTOP;
2148 	return (rc);
2149 }
2150 
2151 /*
2152  * KQFILTER
2153  */
2154 static void filt_hammer2detach(struct knote *kn);
2155 static int filt_hammer2read(struct knote *kn, long hint);
2156 static int filt_hammer2write(struct knote *kn, long hint);
2157 static int filt_hammer2vnode(struct knote *kn, long hint);
2158 
2159 static struct filterops hammer2read_filtops =
2160 	{ FILTEROP_ISFD | FILTEROP_MPSAFE,
2161 	  NULL, filt_hammer2detach, filt_hammer2read };
2162 static struct filterops hammer2write_filtops =
2163 	{ FILTEROP_ISFD | FILTEROP_MPSAFE,
2164 	  NULL, filt_hammer2detach, filt_hammer2write };
2165 static struct filterops hammer2vnode_filtops =
2166 	{ FILTEROP_ISFD | FILTEROP_MPSAFE,
2167 	  NULL, filt_hammer2detach, filt_hammer2vnode };
2168 
2169 static
2170 int
2171 hammer2_vop_kqfilter(struct vop_kqfilter_args *ap)
2172 {
2173 	struct vnode *vp = ap->a_vp;
2174 	struct knote *kn = ap->a_kn;
2175 
2176 	switch (kn->kn_filter) {
2177 	case EVFILT_READ:
2178 		kn->kn_fop = &hammer2read_filtops;
2179 		break;
2180 	case EVFILT_WRITE:
2181 		kn->kn_fop = &hammer2write_filtops;
2182 		break;
2183 	case EVFILT_VNODE:
2184 		kn->kn_fop = &hammer2vnode_filtops;
2185 		break;
2186 	default:
2187 		return (EOPNOTSUPP);
2188 	}
2189 
2190 	kn->kn_hook = (caddr_t)vp;
2191 
2192 	knote_insert(&vp->v_pollinfo.vpi_kqinfo.ki_note, kn);
2193 
2194 	return(0);
2195 }
2196 
2197 static void
2198 filt_hammer2detach(struct knote *kn)
2199 {
2200 	struct vnode *vp = (void *)kn->kn_hook;
2201 
2202 	knote_remove(&vp->v_pollinfo.vpi_kqinfo.ki_note, kn);
2203 }
2204 
2205 static int
2206 filt_hammer2read(struct knote *kn, long hint)
2207 {
2208 	struct vnode *vp = (void *)kn->kn_hook;
2209 	hammer2_inode_t *ip = VTOI(vp);
2210 	off_t off;
2211 
2212 	if (hint == NOTE_REVOKE) {
2213 		kn->kn_flags |= (EV_EOF | EV_NODATA | EV_ONESHOT);
2214 		return(1);
2215 	}
2216 	off = ip->meta.size - kn->kn_fp->f_offset;
2217 	kn->kn_data = (off < INTPTR_MAX) ? off : INTPTR_MAX;
2218 	if (kn->kn_sfflags & NOTE_OLDAPI)
2219 		return(1);
2220 	return (kn->kn_data != 0);
2221 }
2222 
2223 
2224 static int
2225 filt_hammer2write(struct knote *kn, long hint)
2226 {
2227 	if (hint == NOTE_REVOKE)
2228 		kn->kn_flags |= (EV_EOF | EV_NODATA | EV_ONESHOT);
2229 	kn->kn_data = 0;
2230 	return (1);
2231 }
2232 
2233 static int
2234 filt_hammer2vnode(struct knote *kn, long hint)
2235 {
2236 	if (kn->kn_sfflags & hint)
2237 		kn->kn_fflags |= hint;
2238 	if (hint == NOTE_REVOKE) {
2239 		kn->kn_flags |= (EV_EOF | EV_NODATA);
2240 		return (1);
2241 	}
2242 	return (kn->kn_fflags != 0);
2243 }
2244 
2245 /*
2246  * FIFO VOPS
2247  */
2248 static
2249 int
2250 hammer2_vop_markatime(struct vop_markatime_args *ap)
2251 {
2252 	hammer2_inode_t *ip;
2253 	struct vnode *vp;
2254 
2255 	vp = ap->a_vp;
2256 	ip = VTOI(vp);
2257 
2258 	if (ip->pmp->ronly)
2259 		return(EROFS);
2260 	return(0);
2261 }
2262 
2263 static
2264 int
2265 hammer2_vop_fifokqfilter(struct vop_kqfilter_args *ap)
2266 {
2267 	int error;
2268 
2269 	error = VOCALL(&fifo_vnode_vops, &ap->a_head);
2270 	if (error)
2271 		error = hammer2_vop_kqfilter(ap);
2272 	return(error);
2273 }
2274 
2275 /*
2276  * VOPS vector
2277  */
2278 struct vop_ops hammer2_vnode_vops = {
2279 	.vop_default	= vop_defaultop,
2280 	.vop_fsync	= hammer2_vop_fsync,
2281 	.vop_getpages	= vop_stdgetpages,
2282 	.vop_putpages	= vop_stdputpages,
2283 	.vop_access	= hammer2_vop_access,
2284 	.vop_advlock	= hammer2_vop_advlock,
2285 	.vop_close	= hammer2_vop_close,
2286 	.vop_nlink	= hammer2_vop_nlink,
2287 	.vop_ncreate	= hammer2_vop_ncreate,
2288 	.vop_nsymlink	= hammer2_vop_nsymlink,
2289 	.vop_nremove	= hammer2_vop_nremove,
2290 	.vop_nrmdir	= hammer2_vop_nrmdir,
2291 	.vop_nrename	= hammer2_vop_nrename,
2292 	.vop_getattr	= hammer2_vop_getattr,
2293 	.vop_setattr	= hammer2_vop_setattr,
2294 	.vop_readdir	= hammer2_vop_readdir,
2295 	.vop_readlink	= hammer2_vop_readlink,
2296 	.vop_getpages	= vop_stdgetpages,
2297 	.vop_putpages	= vop_stdputpages,
2298 	.vop_read	= hammer2_vop_read,
2299 	.vop_write	= hammer2_vop_write,
2300 	.vop_open	= hammer2_vop_open,
2301 	.vop_inactive	= hammer2_vop_inactive,
2302 	.vop_reclaim 	= hammer2_vop_reclaim,
2303 	.vop_nresolve	= hammer2_vop_nresolve,
2304 	.vop_nlookupdotdot = hammer2_vop_nlookupdotdot,
2305 	.vop_nmkdir 	= hammer2_vop_nmkdir,
2306 	.vop_nmknod 	= hammer2_vop_nmknod,
2307 	.vop_ioctl	= hammer2_vop_ioctl,
2308 	.vop_mountctl	= hammer2_vop_mountctl,
2309 	.vop_bmap	= hammer2_vop_bmap,
2310 	.vop_strategy	= hammer2_vop_strategy,
2311         .vop_kqfilter	= hammer2_vop_kqfilter
2312 };
2313 
2314 struct vop_ops hammer2_spec_vops = {
2315         .vop_default =          vop_defaultop,
2316         .vop_fsync =            hammer2_vop_fsync,
2317         .vop_read =             vop_stdnoread,
2318         .vop_write =            vop_stdnowrite,
2319         .vop_access =           hammer2_vop_access,
2320         .vop_close =            hammer2_vop_close,
2321         .vop_markatime =        hammer2_vop_markatime,
2322         .vop_getattr =          hammer2_vop_getattr,
2323         .vop_inactive =         hammer2_vop_inactive,
2324         .vop_reclaim =          hammer2_vop_reclaim,
2325         .vop_setattr =          hammer2_vop_setattr
2326 };
2327 
2328 struct vop_ops hammer2_fifo_vops = {
2329         .vop_default =          fifo_vnoperate,
2330         .vop_fsync =            hammer2_vop_fsync,
2331 #if 0
2332         .vop_read =             hammer2_vop_fiforead,
2333         .vop_write =            hammer2_vop_fifowrite,
2334 #endif
2335         .vop_access =           hammer2_vop_access,
2336 #if 0
2337         .vop_close =            hammer2_vop_fifoclose,
2338 #endif
2339         .vop_markatime =        hammer2_vop_markatime,
2340         .vop_getattr =          hammer2_vop_getattr,
2341         .vop_inactive =         hammer2_vop_inactive,
2342         .vop_reclaim =          hammer2_vop_reclaim,
2343         .vop_setattr =          hammer2_vop_setattr,
2344         .vop_kqfilter =         hammer2_vop_fifokqfilter
2345 };
2346 
2347