xref: /dragonfly/sys/vfs/dirfs/dirfs_vnops.c (revision 279dd846)
1 /*
2  * Copyright (c) 2013 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Antonio Huete Jimenez <tuxillo@quantumachine.net>
6  * by Matthew Dillon <dillon@dragonflybsd.org>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in
16  *    the documentation and/or other materials provided with the
17  *    distribution.
18  * 3. Neither the name of The DragonFly Project nor the names of its
19  *    contributors may be used to endorse or promote products derived
20  *    from this software without specific, prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
26  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
28  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  *
35  */
36 
37 #include <stdio.h>
38 #include <errno.h>
39 #include <strings.h>
40 #include <unistd.h>
41 
42 #include <sys/vfsops.h>
43 #include <sys/vnode.h>
44 #include <sys/stat.h>
45 #include <sys/namecache.h>
46 #include <sys/queue.h>
47 #include <sys/systm.h>
48 #include <sys/dirent.h>
49 #include <sys/mount.h>
50 #include <sys/signalvar.h>
51 #include <sys/resource.h>
52 #include <sys/buf2.h>
53 #include <sys/kern_syscall.h>
54 #include <sys/ktr.h>
55 
56 #include "dirfs.h"
57 
58 /*
59  * Kernel tracing facilities
60  */
61 KTR_INFO_MASTER_EXTERN(dirfs);
62 
63 KTR_INFO(KTR_DIRFS, dirfs, unsupported, 0,
64     "DIRFS(func=%s)",
65     const char *func);
66 
67 KTR_INFO(KTR_DIRFS, dirfs, nresolve, 0,
68     "DIRFS(dnp=%p ncp_name=%s parent=%p pfd=%d error=%d)",
69     dirfs_node_t dnp, char *name, dirfs_node_t pdnp, int pfd, int error);
70 
71 KTR_INFO(KTR_DIRFS, dirfs, ncreate, 1,
72     "DIRFS(dnp=%p ncp_name=%s parent=%p pfd=%d error=%d)",
73     dirfs_node_t dnp, char *name, dirfs_node_t pdnp, int pfd, int error);
74 
75 KTR_INFO(KTR_DIRFS, dirfs, open, 2,
76     "DIRFS(dnp=%p dn_name=%s nfd=%d)",
77     dirfs_node_t dnp, char *name, int fd);
78 
79 KTR_INFO(KTR_DIRFS, dirfs, close, 3,
80     "DIRFS(dnp=%p fd=%d opencount=%d writecount=%d vfsync error=%d)",
81     dirfs_node_t dnp, int fd, int oc, int wc, int error);
82 
83 KTR_INFO(KTR_DIRFS, dirfs, readdir, 4,
84     "DIRFS(dnp=%p fd=%d startoff=%jd uio_offset=%jd)",
85     dirfs_node_t dnp, int fd, off_t startoff, off_t uoff);
86 
87 KTR_INFO(KTR_DIRFS, dirfs, access, 5,
88     "DIRFS(dnp=%p error=%d)",
89     dirfs_node_t dnp, int error);
90 
91 KTR_INFO(KTR_DIRFS, dirfs, getattr, 6,
92     "DIRFS(dnp=%p error=%d)",
93     dirfs_node_t dnp, int error);
94 
95 KTR_INFO(KTR_DIRFS, dirfs, setattr, 7,
96     "DIRFS(dnp=%p action=%s error=%d)",
97     dirfs_node_t dnp, const char *action, int error);
98 
99 KTR_INFO(KTR_DIRFS, dirfs, fsync, 8,
100     "DIRFS(dnp=%p error=%d)",
101     dirfs_node_t dnp, int error);
102 
103 KTR_INFO(KTR_DIRFS, dirfs, read, 9,
104     "DIRFS(dnp=%p size=%jd error=%d)",
105     dirfs_node_t dnp, size_t size, int error);
106 
107 KTR_INFO(KTR_DIRFS, dirfs, write, 10,
108     "DIRFS(dnp=%p size=%jd boff=%jd uio_resid=%jd error=%d)",
109     dirfs_node_t dnp, off_t boff, size_t resid, size_t size, int error);
110 
111 KTR_INFO(KTR_DIRFS, dirfs, strategy, 11,
112     "DIRFS(dnp=%p dnp_size=%jd iosize=%jd b_cmd=%d b_error=%d "
113     "b_resid=%d bio_off=%jd error=%d)",
114     dirfs_node_t dnp, size_t size, size_t iosize, int cmd, int berror,
115     int bresid, off_t biooff, int error);
116 
117 KTR_INFO(KTR_DIRFS, dirfs, nremove, 12,
118     "DIRFS(dnp=%p pdnp=%p error=%d)",
119     dirfs_node_t dnp, dirfs_node_t pdnp, int error);
120 
121 KTR_INFO(KTR_DIRFS, dirfs, nmkdir, 13,
122     "DIRFS(pdnp=%p dnp=%p nc_name=%p error=%d)",
123     dirfs_node_t dnp, dirfs_node_t pdnp, char *n, int error);
124 
125 KTR_INFO(KTR_DIRFS, dirfs, nrmdir, 13,
126     "DIRFS(pdnp=%p dnp=%p error=%d)",
127     dirfs_node_t dnp, dirfs_node_t pdnp, int error);
128 
129 KTR_INFO(KTR_DIRFS, dirfs, nsymlink, 14,
130     "DIRFS(dnp=%p target=%s symlink=%s error=%d)",
131     dirfs_node_t dnp, char *tgt, char *lnk, int error);
132 
133 /* Needed prototypes */
134 int dirfs_access(struct vop_access_args *);
135 int dirfs_getattr(struct vop_getattr_args *);
136 int dirfs_setattr(struct vop_setattr_args *);
137 int dirfs_reclaim(struct vop_reclaim_args *);
138 
139 static __inline
140 void
141 dirfs_knote(struct vnode *vp, int flags)
142 {
143 	if (flags)
144 		KNOTE(&vp->v_pollinfo.vpi_kqinfo.ki_note, flags);
145 }
146 
147 static int
148 dirfs_nresolve(struct vop_nresolve_args *ap)
149 {
150 	dirfs_node_t pdnp, dnp, d1, d2;
151 	dirfs_mount_t dmp;
152 	struct namecache *ncp;
153 	struct nchandle *nch;
154 	struct vnode *dvp;
155 	struct vnode *vp;
156 	struct mount *mp;
157 	int error;
158 
159 	dbg(3, "called\n");
160 
161 	error = 0;
162 	nch = ap->a_nch;
163 	ncp = nch->ncp;
164 	mp = nch->mount;
165 	dvp = ap->a_dvp;
166 	vp = NULL;
167 	dnp = d1 = d2 = NULL;
168 	pdnp = VP_TO_NODE(dvp);
169 	dmp = VFS_TO_DIRFS(mp);
170 
171 	dirfs_node_lock(pdnp);
172 	TAILQ_FOREACH_MUTABLE(d1, &dmp->dm_fdlist, dn_fdentry, d2) {
173 		if (d1->dn_parent == pdnp &&
174 		    (strcmp(d1->dn_name, ncp->nc_name) == 0)) {
175 			dnp = d1;
176 			dirfs_node_ref(dnp);
177 			passive_fd_list_hits++;
178 			break;
179 		}
180 	}
181 	dirfs_node_unlock(pdnp);
182 
183 	if (dnp) {
184 		dirfs_alloc_vp(mp, &vp, LK_CANRECURSE, dnp);
185 		dirfs_node_drop(dmp, dnp);
186 	} else {
187 		passive_fd_list_miss++;
188 		error = dirfs_alloc_file(dmp, &dnp, pdnp, ncp, &vp, NULL, 0);
189 	}
190 
191 	if (vp) {
192 		if (error && error == ENOENT) {
193 			cache_setvp(nch, NULL);
194 		} else {
195 			vn_unlock(vp);
196 			cache_setvp(nch, vp);
197 			vrele(vp);
198 		}
199 	}
200 
201 	KTR_LOG(dirfs_nresolve, dnp, ncp->nc_name, pdnp, pdnp->dn_fd, error);
202 
203 	return error;
204 }
205 
206 static int
207 dirfs_nlookupdotdot(struct vop_nlookupdotdot_args *ap)
208 {
209 	dbg(3, "called\n");
210 
211 	KTR_LOG(dirfs_unsupported, __func__);
212 
213 	return EOPNOTSUPP;
214 }
215 
216 static int
217 dirfs_ncreate(struct vop_ncreate_args *ap)
218 {
219 	dirfs_node_t pdnp;
220 	dirfs_node_t dnp;
221 	dirfs_mount_t dmp;
222 	struct namecache *ncp;
223 	struct vnode *dvp;
224 	struct vnode **vpp;
225 	struct vattr *vap;
226 	int perms = 0;
227 	int error;
228 
229 	dbg(3, "called\n");
230 
231 	error = 0;
232 	dnp = NULL;
233 	dvp = ap->a_dvp;
234 	pdnp = VP_TO_NODE(dvp);
235 	dmp = VFS_TO_DIRFS(dvp->v_mount);
236 	vap = ap->a_vap;
237 	ncp = ap->a_nch->ncp;
238 	vpp = ap->a_vpp;
239 
240 	dirfs_node_getperms(pdnp, &perms);
241 	if ((perms & DIRFS_NODE_WR) == 0)
242 		error = EPERM;
243 
244 	error = dirfs_alloc_file(dmp, &dnp, pdnp, ncp, vpp, vap,
245 	    (O_CREAT | O_RDWR));
246 
247 	if (error == 0) {
248 		cache_setunresolved(ap->a_nch);
249 		cache_setvp(ap->a_nch, *vpp);
250 		dirfs_knote(dvp, NOTE_WRITE);
251 	}
252 
253 	KTR_LOG(dirfs_ncreate, dnp, ncp->nc_name, pdnp, pdnp->dn_fd, error);
254 
255 	return error;
256 }
257 
258 static int
259 dirfs_nmknod(struct vop_nmknod_args *v)
260 {
261 	dbg(3, "called\n");
262 
263 	return EOPNOTSUPP;
264 }
265 
266 static int
267 dirfs_open(struct vop_open_args *ap)
268 {
269 	dirfs_node_t dnp;
270 	dirfs_mount_t dmp;
271 	struct vnode *vp;
272 	int error;
273 
274 	dbg(3, "called\n");
275 
276 	vp = ap->a_vp;
277 	dnp = VP_TO_NODE(vp);
278 	dmp = VFS_TO_DIRFS(vp->v_mount);
279 	error = 0;
280 
281 	/*
282 	 * Root inode has been allocated and opened in VFS_ROOT() so
283 	 * no reason to attempt to open it again.
284 	 */
285 	if (dmp->dm_root != dnp && dnp->dn_fd == DIRFS_NOFD) {
286 		error = dirfs_open_helper(dmp, dnp, DIRFS_NOFD, NULL);
287 		if (error)
288 			return error;
289 	}
290 
291 	KTR_LOG(dirfs_open, dnp, dnp->dn_name, dnp->dn_fd);
292 
293 	return vop_stdopen(ap);
294 }
295 
296 static int
297 dirfs_close(struct vop_close_args *ap)
298 {
299 	struct vnode *vp;
300 	dirfs_node_t dnp;
301 	int error;
302 
303 	dbg(3, "called\n");
304 
305 	error = 0;
306 	vp = ap->a_vp;
307 	dnp = VP_TO_NODE(vp);
308 
309 	if (vp->v_type == VREG) {
310 		error = vfsync(vp, 0, 1, NULL, NULL);
311 		if (error)
312 			dbg(9, "vfsync error=%d\n", error);
313 	}
314 	vop_stdclose(ap);
315 
316 	KTR_LOG(dirfs_close, dnp, dnp->dn_fd, vp->v_opencount,
317 	    vp->v_writecount, error);
318 
319 	return 0;
320 }
321 
322 int
323 dirfs_access(struct vop_access_args *ap)
324 {
325 	struct vnode *vp = ap->a_vp;
326 	int error;
327 	dirfs_node_t dnp;
328 
329 	dbg(3, "called\n");
330 
331 	dnp = VP_TO_NODE(vp);
332 
333 	switch (vp->v_type) {
334 	case VDIR:
335 		/* FALLTHROUGH */
336 	case VLNK:
337 		/* FALLTHROUGH */
338 	case VREG:
339 		if ((ap->a_mode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) {
340 			error = EROFS;
341 			goto out;
342 		}
343 		break;
344 	case VBLK:
345 		/* FALLTHROUGH */
346 	case VCHR:
347 		/* FALLTHROUGH */
348 	case VSOCK:
349 		/* FALLTHROUGH */
350 	case VFIFO:
351 		break;
352 
353 	default:
354 		error = EINVAL;
355 		goto out;
356 	}
357 
358 	error = vop_helper_access(ap, dnp->dn_uid,
359 	    dnp->dn_gid, dnp->dn_mode, 0);
360 
361 out:
362 	KTR_LOG(dirfs_access, dnp, error);
363 
364 	return error;
365 }
366 
367 int
368 dirfs_getattr(struct vop_getattr_args *ap)
369 {
370 	dirfs_mount_t dmp;
371 	dirfs_node_t dnp;
372 	dirfs_node_t pathnp;
373 	struct vnode *vp;
374 	struct vattr *vap;
375 	char *tmp;
376 	char *pathfree;
377 	int error;
378 
379 	dbg(3, "called\n");
380 
381 	vp = ap->a_vp;
382 	vap = ap->a_vap;
383 	dnp = VP_TO_NODE(vp);
384 	dmp = VFS_TO_DIRFS(vp->v_mount);
385 
386 	KKASSERT(dnp);	/* This must not happen */
387 
388 	if (!dirfs_node_isroot(dnp)) {
389 		pathnp = dirfs_findfd(dmp, dnp, &tmp, &pathfree);
390 
391 		KKASSERT(pathnp->dn_fd != DIRFS_NOFD);
392 
393 		error = dirfs_node_stat(pathnp->dn_fd, tmp, dnp);
394 		dirfs_dropfd(dmp, pathnp, pathfree);
395 	} else {
396 		error = dirfs_node_stat(DIRFS_NOFD, dmp->dm_path, dnp);
397 	}
398 
399 	if (error == 0) {
400 		dirfs_node_lock(dnp);
401 		vap->va_nlink = dnp->dn_links;
402 		vap->va_type = dnp->dn_type;
403 		vap->va_mode = dnp->dn_mode;
404 		vap->va_uid = dnp->dn_uid;
405 		vap->va_gid = dnp->dn_gid;
406 		vap->va_fileid = dnp->dn_ino;
407 		vap->va_size = dnp->dn_size;
408 		vap->va_blocksize = dnp->dn_blocksize;
409 		vap->va_atime.tv_sec = dnp->dn_atime;
410 		vap->va_atime.tv_nsec = dnp->dn_atimensec;
411 		vap->va_mtime.tv_sec = dnp->dn_mtime;
412 		vap->va_mtime.tv_nsec = dnp->dn_mtimensec;
413 		vap->va_ctime.tv_sec = dnp->dn_ctime;
414 		vap->va_ctime.tv_nsec = dnp->dn_ctimensec;
415 		vap->va_bytes = dnp->dn_size;
416 		vap->va_gen = dnp->dn_gen;
417 		vap->va_flags = dnp->dn_flags;
418 		vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
419 		dirfs_node_unlock(dnp);
420 	}
421 
422 	KTR_LOG(dirfs_getattr, dnp, error);
423 
424 	return 0;
425 }
426 
427 int
428 dirfs_setattr(struct vop_setattr_args *ap)
429 {
430 	dirfs_mount_t dmp;
431 	dirfs_node_t dnp;
432 	struct vnode *vp;
433 	struct vattr *vap;
434 	struct ucred *cred;
435 	int error;
436 	int kflags = 0;
437 #ifdef KTR
438 	const char *msg[6] = {
439 		"invalid",
440 		"chflags",
441 		"chsize",
442 		"chown",
443 		"chmod",
444 		"chtimes"
445 	};
446 #endif
447 	int msgno;
448 
449 	dbg(3, "called\n");
450 
451 	error = msgno = 0;
452 	vp = ap->a_vp;
453 	vap = ap->a_vap;
454 	cred = ap->a_cred;
455 	dnp = VP_TO_NODE(vp);
456 	dmp = VFS_TO_DIRFS(vp->v_mount);
457 
458 	/*
459 	 * Check for unsettable attributes.
460 	 */
461 	if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) ||
462 	    (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) ||
463 	    (vap->va_blocksize != VNOVAL) || (vap->va_rmajor != VNOVAL) ||
464 	    ((int)vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) {
465 		msgno = 0;
466 		error = EINVAL;
467 		goto out;
468 	}
469 
470 	/*
471 	 * Change file flags
472 	 */
473 	if (error == 0 && (vap->va_flags != VNOVAL)) {
474 		if (vp->v_mount->mnt_flag & MNT_RDONLY) {
475 			error = EROFS;
476 		} else {
477 			error = dirfs_node_chflags(dnp, vap->va_flags, cred);
478 			kflags |= NOTE_ATTRIB;
479 		}
480 		msgno = 1;
481 		goto out;
482 	}
483 
484 	/*
485 	 * Extend or truncate a file
486 	 */
487 	if (error == 0 && (vap->va_size != VNOVAL)) {
488 		if (vp->v_mount->mnt_flag & MNT_RDONLY) {
489 			error = EROFS;
490 		} else {
491 			if (vap->va_size > dnp->dn_size)
492 				kflags |= NOTE_WRITE | NOTE_EXTEND;
493 			else
494 				kflags |= NOTE_WRITE;
495 			error = dirfs_node_chsize(dnp, vap->va_size);
496 		}
497 		dbg(9, "dnp size=%jd vap size=%jd\n", dnp->dn_size, vap->va_size);
498 		msgno = 2;
499 		goto out;
500 	}
501 
502 	/*
503 	 * Change file owner or group
504 	 */
505 	if (error == 0 && (vap->va_uid != (uid_t)VNOVAL ||
506 		vap->va_gid != (gid_t)VNOVAL)) {
507 		if (vp->v_mount->mnt_flag & MNT_RDONLY) {
508 			error = EROFS;
509 		} else {
510 			mode_t cur_mode = dnp->dn_mode;
511 			uid_t cur_uid = dnp->dn_uid;
512 			gid_t cur_gid = dnp->dn_gid;
513 
514 			error = vop_helper_chown(ap->a_vp, vap->va_uid,
515 						 vap->va_gid, ap->a_cred,
516 						 &cur_uid, &cur_gid, &cur_mode);
517 			if (error == 0 &&
518 			    (cur_mode != dnp->dn_mode ||
519 			     cur_uid != dnp->dn_uid ||
520 			     cur_gid != dnp->dn_gid)) {
521 				error = dirfs_node_chown(dmp, dnp, cur_uid,
522 							 cur_gid, cur_mode);
523 				kflags |= NOTE_ATTRIB;
524 			}
525 		}
526 		msgno = 3;
527 		goto out;
528 	}
529 
530 	/*
531 	 * Change file mode
532 	 */
533 	if (error == 0 && (vap->va_mode != (mode_t)VNOVAL)) {
534 		if (vp->v_mount->mnt_flag & MNT_RDONLY) {
535 			error = EROFS;
536 		} else {
537 			mode_t cur_mode = dnp->dn_mode;
538 			uid_t cur_uid = dnp->dn_uid;
539 			gid_t cur_gid = dnp->dn_gid;
540 
541 			error = vop_helper_chmod(ap->a_vp, vap->va_mode,
542 						 ap->a_cred,
543 						 cur_uid, cur_gid, &cur_mode);
544 			if (error == 0 && cur_mode != dnp->dn_mode) {
545 				error = dirfs_node_chmod(dmp, dnp, cur_mode);
546 				kflags |= NOTE_ATTRIB;
547 			}
548 		}
549 		msgno = 4;
550 		goto out;
551 	}
552 
553 	/*
554 	 * Change file times
555 	 */
556 	if (error == 0 && ((vap->va_atime.tv_sec != VNOVAL &&
557 		vap->va_atime.tv_nsec != VNOVAL) ||
558 		(vap->va_mtime.tv_sec != VNOVAL &&
559 		vap->va_mtime.tv_nsec != VNOVAL) )) {
560 		if (vp->v_mount->mnt_flag & MNT_RDONLY) {
561 			error = EROFS;
562 		} else {
563 			error = dirfs_node_chtimes(dnp);
564 			kflags |= NOTE_ATTRIB;
565 		}
566 		msgno = 5;
567 		goto out;
568 
569 	}
570 out:
571 	KTR_LOG(dirfs_setattr, dnp, msg[msgno], error);
572 
573 	dirfs_knote(vp, kflags);
574 
575 	return error;
576 }
577 
578 static int
579 dirfs_fsync(struct vop_fsync_args *ap)
580 {
581 	dirfs_node_t dnp = VP_TO_NODE(ap->a_vp);
582 	int error = 0;
583 
584 	dbg(3, "called\n");
585 
586 	vfsync(ap->a_vp, ap->a_waitfor, 1, NULL, NULL);
587 
588 	if (dnp->dn_fd != DIRFS_NOFD) {
589 		if (fsync(dnp->dn_fd) == -1)
590 			error = fsync(dnp->dn_fd);
591 	}
592 
593 	KTR_LOG(dirfs_fsync, dnp, error);
594 
595 	return 0;
596 }
597 
598 static int
599 dirfs_read(struct vop_read_args *ap)
600 {
601 	struct buf *bp;
602 	struct vnode *vp = ap->a_vp;
603 	struct uio *uio = ap->a_uio;
604 	dirfs_node_t dnp;
605 	off_t base_offset;
606 	size_t offset;
607 	size_t len;
608 	int error;
609 
610 	dbg(3, "called\n");
611 
612 	error = 0;
613 	if (uio->uio_resid == 0) {
614 		dbg(9, "zero len uio->uio_resid\n");
615 		return error;
616 	}
617 
618 	dnp = VP_TO_NODE(vp);
619 
620 	if (uio->uio_offset < 0)
621 		return (EINVAL);
622 	if (vp->v_type != VREG)
623 		return (EINVAL);
624 
625 	while (uio->uio_resid > 0 && uio->uio_offset < dnp->dn_size) {
626 		/*
627 		 * Use buffer cache I/O (via dirfs_strategy)
628 		 */
629 		offset = (size_t)uio->uio_offset & BMASK;
630 		base_offset = (off_t)uio->uio_offset - offset;
631 		bp = getcacheblk(vp, base_offset, BSIZE, 0);
632 		if (bp == NULL) {
633 			lwkt_gettoken(&vp->v_mount->mnt_token);
634 			error = bread(vp, base_offset, BSIZE, &bp);
635 			if (error) {
636 				brelse(bp);
637 				lwkt_reltoken(&vp->v_mount->mnt_token);
638 				dbg(9, "dirfs_read bread error %d\n", error);
639 				break;
640 			}
641 			lwkt_reltoken(&vp->v_mount->mnt_token);
642 		}
643 
644 		/*
645 		 * Figure out how many bytes we can actually copy this loop.
646 		 */
647 		len = BSIZE - offset;
648 		if (len > uio->uio_resid)
649 			len = uio->uio_resid;
650 		if (len > dnp->dn_size - uio->uio_offset)
651 			len = (size_t)(dnp->dn_size - uio->uio_offset);
652 
653 		error = uiomovebp(bp, (char *)bp->b_data + offset, len, uio);
654 		bqrelse(bp);
655 		if (error) {
656 			dbg(9, "dirfs_read uiomove error %d\n", error);
657 			break;
658 		}
659 	}
660 
661 	KTR_LOG(dirfs_read, dnp, dnp->dn_size, error);
662 
663 	return(error);
664 }
665 
666 static int
667 dirfs_write (struct vop_write_args *ap)
668 {
669 	dirfs_node_t dnp;
670 	dirfs_mount_t dmp;
671 	struct buf *bp;
672 	struct vnode *vp = ap->a_vp;
673 	struct uio *uio = ap->a_uio;
674 	struct thread *td = uio->uio_td;
675 	int error;
676 	int kflags = 0;
677 	off_t osize;
678 	off_t nsize;
679 	off_t base_offset;
680 	size_t offset;
681 	size_t len;
682 	struct rlimit limit;
683 
684 	dbg(3, "called\n");
685 
686 	error = 0;
687 	if (uio->uio_resid == 0) {
688 		dbg(9, "zero-length uio->uio_resid\n");
689 		return error;
690 	}
691 
692 	dnp = VP_TO_NODE(vp);
693 	dmp = VFS_TO_DIRFS(vp->v_mount);
694 
695 	if (vp->v_type != VREG)
696 		return (EINVAL);
697 
698 	if (vp->v_type == VREG && td != NULL) {
699 		error = kern_getrlimit(RLIMIT_FSIZE, &limit);
700 		if (error != 0) {
701 			dbg(9, "rlimit failure\n");
702 			return error;
703 		}
704 		if (uio->uio_offset + uio->uio_resid > limit.rlim_cur) {
705 			dbg(9, "file too big\n");
706 			ksignal(td->td_proc, SIGXFSZ);
707 			return (EFBIG);
708 		}
709 	}
710 
711 	if (ap->a_ioflag & IO_APPEND)
712 		uio->uio_offset = dnp->dn_size;
713 
714 	/*
715 	 * buffer cache operations may be deferred, make sure
716 	 * the file is correctly sized right now.
717 	 */
718 	osize = dnp->dn_size;
719 	nsize = uio->uio_offset + uio->uio_resid;
720 	if (nsize > osize && uio->uio_resid) {
721 		KKASSERT(dnp->dn_fd >= 0);
722 		dnp->dn_size = nsize;
723 		ftruncate(dnp->dn_fd, nsize);
724 		nvextendbuf(vp, osize, nsize,
725 			    BSIZE, BSIZE, -1, -1, 0);
726 		kflags |= NOTE_EXTEND;
727 	} /* else nsize = osize; NOT USED */
728 
729 	while (uio->uio_resid > 0) {
730 		/*
731 		 * Use buffer cache I/O (via dirfs_strategy)
732 		 */
733 		offset = (size_t)uio->uio_offset & BMASK;
734 		base_offset = (off_t)uio->uio_offset - offset;
735 		len = BSIZE - offset;
736 
737 		if (len > uio->uio_resid)
738 			len = uio->uio_resid;
739 
740 		error = bread(vp, base_offset, BSIZE, &bp);
741 		error = uiomovebp(bp, (char *)bp->b_data + offset, len, uio);
742 		if (error) {
743 			brelse(bp);
744 			dbg(9, "WRITE uiomove failed\n");
745 			break;
746 		}
747 		kflags |= NOTE_WRITE;
748 
749 		dbg(9, "WRITE dn_size=%jd uio_offset=%jd uio_resid=%jd base_offset=%jd\n",
750 		    dnp->dn_size, uio->uio_offset, uio->uio_resid, base_offset);
751 
752 		if (ap->a_ioflag & IO_SYNC)
753 			bwrite(bp);
754 		else
755 			bdwrite(bp);
756 	}
757 
758 	KTR_LOG(dirfs_write, dnp, base_offset, uio->uio_resid,
759 	    dnp->dn_size, error);
760 
761 	if (kflags)
762 		dirfs_knote(vp, kflags);
763 	return error;
764 }
765 
766 static int
767 dirfs_advlock (struct vop_advlock_args *ap)
768 {
769 	struct vnode *vp = ap->a_vp;
770 	dirfs_node_t dnp = VP_TO_NODE(vp);
771 
772 	dbg(3, "called\n");
773 
774 	return (lf_advlock(ap, &dnp->dn_advlock, dnp->dn_size));
775 }
776 
777 static int
778 dirfs_strategy(struct vop_strategy_args *ap)
779 {
780 	dirfs_node_t dnp;
781 	dirfs_mount_t dmp;
782 	struct bio *bio = ap->a_bio;
783 	struct buf *bp = bio->bio_buf;
784 	struct vnode *vp = ap->a_vp;
785 	int error;
786 	size_t iosize;
787 	char *tmp;
788 	char *pathfree;
789 
790 	dbg(3, "called\n");
791 
792 	dnp = VP_TO_NODE(vp);
793 	dmp = VFS_TO_DIRFS(vp->v_mount);
794 
795 	error = 0;
796 
797 	if (vp->v_type != VREG)  {
798 		dbg(9, "not VREG\n");
799 		bp->b_resid = bp->b_bcount;
800 		bp->b_flags |= B_ERROR | B_INVAL;
801 		bp->b_error = EINVAL;
802 		biodone(bio);
803 		return(0);
804 	}
805 
806 	if (dnp->dn_fd == DIRFS_NOFD) {
807 		print_backtrace(-1);
808 		panic("Meh, no fd to write to. dnp=%p\n", dnp);
809 	}
810 
811 	if (bio->bio_offset + bp->b_bcount > dnp->dn_size)
812 		iosize = dnp->dn_size - bio->bio_offset;
813 	else
814 		iosize = bp->b_bcount;
815 	KKASSERT((ssize_t)iosize >= 0);
816 
817 	switch (bp->b_cmd) {
818 	case BUF_CMD_WRITE:
819 		error = pwrite(dnp->dn_fd, bp->b_data, iosize, bio->bio_offset);
820 		break;
821 	case BUF_CMD_READ:
822 		error = pread(dnp->dn_fd, bp->b_data, iosize, bio->bio_offset);
823 		break;
824 	default:
825 		bp->b_error = error = EINVAL;
826 		bp->b_flags |= B_ERROR;
827 		break;
828 	}
829 
830 	if (error >= 0 && error < bp->b_bcount)
831 		bzero(bp->b_data + error, bp->b_bcount - error);
832 
833 	if (error < 0 && errno != EINTR) {
834 		dbg(9, "error=%d dnp=%p dnp->dn_fd=%d "
835 		    "bio->bio_offset=%ld bcount=%d resid=%d iosize=%zd\n",
836 		    errno, dnp, dnp->dn_fd, bio->bio_offset, bp->b_bcount,
837 		    bp->b_resid, iosize);
838 		bp->b_error = errno;
839 		bp->b_resid = bp->b_bcount;
840 		bp->b_flags |= B_ERROR;
841 	} else {
842 		tmp = dirfs_node_absolute_path(dmp, dnp, &pathfree);
843 		dirfs_node_stat(DIRFS_NOFD, tmp, dnp);
844 		dirfs_dropfd(dmp, NULL, pathfree);
845 	}
846 
847 	KTR_LOG(dirfs_strategy, dnp, dnp->dn_size, iosize, bp->b_cmd,
848 	    bp->b_error, bp->b_resid, bio->bio_offset, error);
849 
850 	biodone(bio);
851 
852 	return 0;
853 }
854 
855 static int
856 dirfs_bmap(struct vop_bmap_args *ap)
857 {
858 	dbg(3, "called\n");
859 
860 	if (ap->a_doffsetp != NULL)
861 		*ap->a_doffsetp = ap->a_loffset;
862 	if (ap->a_runp != NULL)
863 		*ap->a_runp = 0;
864 	if (ap->a_runb != NULL)
865 		*ap->a_runb = 0;
866 
867 	return 0;
868 }
869 
870 static int
871 dirfs_nremove(struct vop_nremove_args *ap)
872 {
873 	dirfs_node_t dnp, pdnp;
874 	dirfs_node_t pathnp;
875 	dirfs_mount_t dmp;
876 	struct vnode *dvp;
877 	struct nchandle *nch;
878 	struct namecache *ncp;
879 	struct mount *mp;
880 	struct vnode *vp;
881 	int error;
882 	char *tmp;
883 	char *pathfree;
884 
885 	dbg(3, "called\n");
886 
887 	error = 0;
888 	tmp = NULL;
889 	vp = NULL;
890 	dvp = ap->a_dvp;
891 	nch = ap->a_nch;
892 	ncp = nch->ncp;
893 
894 	mp = dvp->v_mount;
895 	dmp = VFS_TO_DIRFS(mp);
896 
897 	lwkt_gettoken(&mp->mnt_token);
898 	cache_vget(nch, ap->a_cred, LK_SHARED, &vp);
899 	vn_unlock(vp);
900 
901 	pdnp = VP_TO_NODE(dvp);
902 	dnp = VP_TO_NODE(vp);
903 
904 	if (vp->v_type == VDIR) {
905 		error = EISDIR;
906 	} else {
907 		pathnp = dirfs_findfd(dmp, dnp, &tmp, &pathfree);
908 		dirfs_node_lock(pdnp);
909 		error = unlinkat(pathnp->dn_fd, tmp, 0);
910 		if (error == 0) {
911 			cache_unlink(nch);
912 			dirfs_knote(vp, NOTE_DELETE);
913 			dirfs_node_setpassive(dmp, dnp, 0);
914 			if (dnp->dn_parent) {
915 				dirfs_node_drop(dmp, dnp->dn_parent);
916 				dnp->dn_parent = NULL;
917 			}
918 		} else {
919 			error = errno;
920 		}
921 		dirfs_node_unlock(pdnp);
922 		dirfs_dropfd(dmp, pathnp, pathfree);
923 	}
924 	dirfs_knote(dvp, NOTE_WRITE);
925 	vrele(vp);
926 	lwkt_reltoken(&mp->mnt_token);
927 
928 	KTR_LOG(dirfs_nremove, dnp, pdnp, error);
929 
930 	return error;
931 }
932 
933 static int
934 dirfs_nlink(struct vop_nlink_args *ap)
935 {
936 	dbg(3, "called\n");
937 
938 	KTR_LOG(dirfs_unsupported, __func__);
939 
940 	return EOPNOTSUPP;
941 }
942 
943 static int
944 dirfs_nrename(struct vop_nrename_args *ap)
945 {
946 	dirfs_node_t dnp, fdnp, tdnp;
947 	dirfs_mount_t dmp;
948 	struct namecache *fncp, *tncp;
949 	struct vnode *fdvp, *tdvp, *vp;
950 	struct mount *mp;
951 	char *fpath, *fpathfree;
952 	char *tpath, *tpathfree;
953 	int error;
954 
955 	dbg(3, "called\n");
956 
957 	error = 0;
958 	fdvp = ap->a_fdvp;
959 	tdvp = ap->a_tdvp;
960 	fncp = ap->a_fnch->ncp;
961 	tncp = ap->a_tnch->ncp;
962 	mp = fdvp->v_mount;
963 	dmp = VFS_TO_DIRFS(mp);
964 	fdnp = VP_TO_NODE(fdvp);
965 	tdnp = VP_TO_NODE(tdvp);
966 
967 	dbg(9, "fdnp=%p tdnp=%p from=%s to=%s\n", fdnp, tdnp, fncp->nc_name,
968 	    tncp->nc_name);
969 
970 	if (fdvp->v_mount != tdvp->v_mount)
971 		return(EXDEV);
972 	if (fdvp->v_mount != fncp->nc_vp->v_mount)
973 		return(EXDEV);
974 	if (fdvp->v_mount->mnt_flag & MNT_RDONLY)
975 		return (EROFS);
976 
977 	tpath = dirfs_node_absolute_path_plus(dmp, tdnp,
978 					      tncp->nc_name, &tpathfree);
979 	fpath = dirfs_node_absolute_path_plus(dmp, fdnp,
980 					      fncp->nc_name, &fpathfree);
981 	error = rename(fpath, tpath);
982 	if (error < 0)
983 		error = errno;
984 	if (error == 0) {
985 		vp = fncp->nc_vp;	/* file being renamed */
986 		dnp = VP_TO_NODE(vp);
987 		dirfs_node_setname(dnp, tncp->nc_name, tncp->nc_nlen);
988 		dirfs_knote(fdvp, NOTE_RENAME);
989 		/*
990 		 * We have to mark the target file that was replaced by
991 		 * the rename as having been unlinked.
992 		 */
993 		vp = tncp->nc_vp;
994 		if (vp) {
995 			dbg(9, "RENAME2\n");
996 			dnp = VP_TO_NODE(vp);
997 			cache_unlink(ap->a_tnch);
998 			dirfs_knote(vp, NOTE_DELETE);
999 			dirfs_node_setpassive(dmp, dnp, 0);
1000 			if (dnp->dn_parent) {
1001 				dirfs_node_drop(dmp, dnp->dn_parent);
1002 				dnp->dn_parent = NULL;
1003 			}
1004 
1005 			/*
1006 			 * nlinks on directories can be a bit weird.  Zero
1007 			 * it out.
1008 			 */
1009 			dnp->dn_links = 0;
1010 			cache_inval_vp(vp, CINV_DESTROY);
1011 		}
1012 		cache_rename(ap->a_fnch, ap->a_tnch);
1013 		dirfs_knote(fdvp, NOTE_WRITE);
1014 		dirfs_knote(tdvp, NOTE_WRITE);
1015 
1016 	}
1017 	dirfs_dropfd(dmp, NULL, fpathfree);
1018 	dirfs_dropfd(dmp, NULL, tpathfree);
1019 
1020 	return error;
1021 }
1022 
1023 static int
1024 dirfs_nmkdir(struct vop_nmkdir_args *ap)
1025 {
1026 	dirfs_mount_t dmp;
1027 	dirfs_node_t dnp, pdnp, dnp1;
1028 	struct namecache *ncp;
1029 	struct vattr *vap;
1030 	struct vnode *dvp;
1031 	struct vnode **vpp;
1032 	char *tmp, *pathfree;
1033 	char *path;
1034 	int pfd, error;
1035 	int extrapath;
1036 
1037 	dbg(3, "called\n");
1038 
1039 	extrapath = error = 0;
1040 	dvp = ap->a_dvp;
1041 	vpp = ap->a_vpp;
1042 	dmp = VFS_TO_DIRFS(dvp->v_mount);
1043 	pdnp = VP_TO_NODE(dvp);
1044 	ncp = ap->a_nch->ncp;
1045 	vap = ap->a_vap;
1046 	pathfree = tmp = path = NULL;
1047 	dnp = NULL;
1048 
1049 	dirfs_node_lock(pdnp);
1050 	if (pdnp->dn_fd != DIRFS_NOFD) {
1051 		pfd = pdnp->dn_fd;
1052 		path = ncp->nc_name;
1053 	} else {
1054 		dnp1 = dirfs_findfd(dmp, pdnp, &tmp, &pathfree);
1055 		pfd = dnp1->dn_fd;
1056 		/* XXX check there is room to copy the path */
1057 		path = kmalloc(MAXPATHLEN, M_DIRFS_MISC, M_ZERO | M_WAITOK);
1058 		ksnprintf(path, MAXPATHLEN, "%s/%s", tmp, ncp->nc_name);
1059 		extrapath = 1;
1060 		dirfs_dropfd(dmp, dnp1, pathfree);
1061 	}
1062 
1063 	error = mkdirat(pfd, path, vap->va_mode);
1064 	if (error) {
1065 		error = errno;
1066 	} else { /* Directory has been made */
1067 		error = dirfs_alloc_file(dmp, &dnp, pdnp, ncp, vpp,
1068 		    vap, O_DIRECTORY);
1069 		if (error)
1070 			error = errno;
1071 		cache_setunresolved(ap->a_nch);
1072 		cache_setvp(ap->a_nch, *vpp);
1073 		dirfs_knote(dvp, NOTE_WRITE | NOTE_LINK);
1074 	}
1075 	dirfs_node_unlock(pdnp);
1076 
1077 	if (extrapath)
1078 		kfree(path, M_DIRFS_MISC);
1079 
1080 	KTR_LOG(dirfs_nmkdir, pdnp, dnp, ncp->nc_name, error);
1081 
1082 	return error;
1083 }
1084 
1085 static int
1086 dirfs_nrmdir(struct vop_nrmdir_args *ap)
1087 {
1088 	dirfs_node_t dnp, pdnp;
1089 	dirfs_mount_t dmp;
1090 	struct vnode *dvp;
1091 	struct nchandle *nch;
1092 	struct namecache *ncp;
1093 	struct mount *mp;
1094 	struct vnode *vp;
1095 	int error;
1096 	char *tmp;
1097 	char *pathfree;
1098 
1099 	dbg(3, "called\n");
1100 
1101 	error = 0;
1102 	tmp = NULL;
1103 	vp = NULL;
1104 	dvp = ap->a_dvp;
1105 	nch = ap->a_nch;
1106 	ncp = nch->ncp;
1107 
1108 	mp = dvp->v_mount;
1109 	dmp = VFS_TO_DIRFS(mp);
1110 
1111 	lwkt_gettoken(&mp->mnt_token);
1112 	cache_vget(nch, ap->a_cred, LK_SHARED, &vp);
1113 	vn_unlock(vp);
1114 
1115 	pdnp = VP_TO_NODE(dvp);
1116 	dnp = VP_TO_NODE(vp);
1117 
1118 	if (vp->v_type != VDIR) {
1119 		error = ENOTDIR;
1120 	} else {
1121 		tmp = dirfs_node_absolute_path(dmp, dnp, &pathfree);
1122 		dirfs_node_lock(pdnp);
1123 		error = rmdir(tmp);
1124 		if (error == 0) {
1125 			cache_unlink(nch);
1126 			dirfs_knote(dvp, NOTE_WRITE | NOTE_LINK);
1127 			dirfs_node_setpassive(dmp, dnp, 0);
1128 			if (dnp->dn_parent) {
1129 				dirfs_node_drop(dmp, dnp->dn_parent);
1130 				dnp->dn_parent = NULL;
1131 			}
1132 
1133 			/*
1134 			 * nlinks on directories can be a bit weird.  Zero
1135 			 * it out.
1136 			 */
1137 			dnp->dn_links = 0;
1138 			cache_inval_vp(vp, CINV_DESTROY);
1139 		} else {
1140 			error = errno;
1141 		}
1142 		dirfs_node_unlock(pdnp);
1143 		dirfs_dropfd(dmp, NULL, pathfree);
1144 	}
1145 	vrele(vp);
1146 	lwkt_reltoken(&mp->mnt_token);
1147 
1148 	KTR_LOG(dirfs_nrmdir, dnp, pdnp, error);
1149 
1150 	return error;
1151 }
1152 
1153 static int
1154 dirfs_nsymlink(struct vop_nsymlink_args *ap)
1155 {
1156 	dirfs_mount_t dmp;
1157 	dirfs_node_t dnp, pdnp;
1158 	struct mount *mp;
1159 	struct namecache *ncp;
1160 	struct vattr *vap;
1161 	struct vnode *dvp;
1162 	struct vnode **vpp;
1163 	char *tmp, *pathfree;
1164 	char *path;
1165 	int error;
1166 
1167 	dbg(3, "called\n");
1168 
1169 	error = 0;
1170 	dvp = ap->a_dvp;
1171 	vpp = ap->a_vpp;
1172 	mp = dvp->v_mount;
1173 	dmp = VFS_TO_DIRFS(dvp->v_mount);
1174 	pdnp = VP_TO_NODE(dvp);
1175 	ncp = ap->a_nch->ncp;
1176 	vap = ap->a_vap;
1177 	pathfree = tmp = path = NULL;
1178 	dnp = NULL;
1179 
1180 	lwkt_gettoken(&mp->mnt_token);
1181 	vap->va_type = VLNK;
1182 
1183 	/* Find out the whole path of our new symbolic link */
1184 	tmp = dirfs_node_absolute_path(dmp, pdnp, &pathfree);
1185 	/* XXX check there is room to copy the path */
1186 	path = kmalloc(MAXPATHLEN, M_DIRFS_MISC, M_ZERO | M_WAITOK);
1187 	ksnprintf(path, MAXPATHLEN, "%s/%s", tmp, ncp->nc_name);
1188 	dirfs_dropfd(dmp, NULL, pathfree);
1189 
1190 	error = symlink(ap->a_target, path);
1191 	if (error) {
1192 		error = errno;
1193 	} else { /* Symlink has been made */
1194 		error = dirfs_alloc_file(dmp, &dnp, pdnp, ncp, vpp,
1195 		    NULL, 0);
1196 		if (error)
1197 			error = errno;
1198 		cache_setunresolved(ap->a_nch);
1199 		cache_setvp(ap->a_nch, *vpp);
1200 		dirfs_knote(*vpp, NOTE_WRITE);
1201 	}
1202 	dbg(5, "path=%s a_target=%s\n", path, ap->a_target);
1203 
1204 	KTR_LOG(dirfs_nsymlink, dnp, ap->a_target, path, error);
1205 	kfree(path, M_DIRFS_MISC);
1206 	lwkt_reltoken(&mp->mnt_token);
1207 
1208 	return error;
1209 
1210 }
1211 
1212 static int
1213 dirfs_readdir(struct vop_readdir_args *ap)
1214 {
1215 
1216 	struct dirent *dp, *dpn;
1217 	off_t __unused **cookies = ap->a_cookies;
1218 	int *ncookies = ap->a_ncookies;
1219 	int bytes;
1220 	char *buf;
1221 	long base;
1222 	struct vnode *vp = ap->a_vp;
1223 	struct uio *uio;
1224 	dirfs_node_t dnp;
1225 	off_t startoff;
1226 	off_t cnt;
1227 	int error, r;
1228 	size_t bufsiz;
1229 	off_t curoff;
1230 
1231 	dbg(3, "called\n");
1232 
1233 	if (ncookies)
1234 		debug(1, "ncookies=%d\n", *ncookies);
1235 
1236 	dnp = VP_TO_NODE(vp);
1237 	uio = ap->a_uio;
1238 	startoff = uio->uio_offset;
1239 	cnt = 0;
1240 	error = 0;
1241 	base = 0;
1242 	bytes = 0;
1243 
1244 	if (vp->v_type != VDIR)
1245 		return ENOTDIR;
1246 	if (uio->uio_resid < 0)
1247 		return EINVAL;
1248 	if ((bufsiz = uio->uio_resid) > 4096)
1249 		bufsiz = 4096;
1250 	buf = kmalloc(bufsiz, M_DIRFS_MISC, M_WAITOK | M_ZERO);
1251 
1252 	/*
1253 	 * Generally speaking we have to be able to process ALL the
1254 	 * entries returned by getdirentries() in order for the seek
1255 	 * position to be correct.  For now try to size the buffer
1256 	 * to make this happen.  A smaller buffer always works.  For
1257 	 * now just use an appropriate size.
1258 	 */
1259 	dirfs_node_lock(dnp);
1260 	lseek(dnp->dn_fd, startoff, SEEK_SET);
1261 	bytes = getdirentries(dnp->dn_fd, buf, bufsiz, &base);
1262 	dbg(9, "seek %016jx %016jx %016jx\n",
1263 		(intmax_t)startoff, (intmax_t)base,
1264 		(intmax_t)lseek(dnp->dn_fd, 0, SEEK_CUR));
1265 	if (bytes < 0) {
1266 		if (errno == EINVAL)
1267 			panic("EINVAL on readdir\n");
1268 		error = errno;
1269 		curoff = startoff;
1270 		goto out;
1271 	} else if (bytes == 0) {
1272 		*ap->a_eofflag = 1;
1273 		curoff = startoff;
1274 		goto out;
1275 	}
1276 
1277 	for (dp = (struct dirent *)buf; bytes > 0 && uio->uio_resid > 0;
1278 	    bytes -= _DIRENT_DIRSIZ(dp), dp = dpn) {
1279 		r = vop_write_dirent(&error, uio, dp->d_ino, dp->d_type,
1280 		    dp->d_namlen, dp->d_name);
1281 		if (error || r)
1282 			break;
1283 		dpn = _DIRENT_NEXT(dp);
1284 		dp = dpn;
1285 		cnt++;
1286 	}
1287 	curoff = lseek(dnp->dn_fd, 0, SEEK_CUR);
1288 
1289 out:
1290 	kfree(buf, M_DIRFS_MISC);
1291 	uio->uio_offset = curoff;
1292 	dirfs_node_unlock(dnp);
1293 
1294 	KTR_LOG(dirfs_readdir, dnp, dnp->dn_fd, startoff, uio->uio_offset);
1295 
1296 	return error;
1297 }
1298 
1299 static int
1300 dirfs_readlink(struct vop_readlink_args *ap)
1301 {
1302 	dirfs_node_t dnp, pathnp;
1303 	dirfs_mount_t dmp;
1304 	struct vnode *vp;
1305 	struct mount *mp;
1306 	struct uio *uio;
1307 	char *tmp, *pathfree, *buf;
1308 	ssize_t nlen;
1309 	int error;
1310 
1311 	dbg(3, "called\n");
1312 
1313 	vp = ap->a_vp;
1314 
1315 	KKASSERT(vp->v_type == VLNK);
1316 
1317 	error = 0;
1318 	tmp = pathfree = NULL;
1319 	uio = ap->a_uio;
1320 	mp = vp->v_mount;
1321 	dmp = VFS_TO_DIRFS(mp);
1322 	dnp = VP_TO_NODE(vp);
1323 
1324 	lwkt_gettoken(&mp->mnt_token);
1325 
1326 	pathnp = dirfs_findfd(dmp, dnp, &tmp, &pathfree);
1327 
1328 	buf = kmalloc(uio->uio_resid, M_DIRFS_MISC, M_WAITOK | M_ZERO);
1329 	nlen = readlinkat(pathnp->dn_fd, dnp->dn_name, buf, uio->uio_resid);
1330 	if (nlen == -1 ) {
1331 		error = errno;
1332 	} else {
1333 		error = uiomove(buf, nlen + 1, uio);
1334 		buf[nlen] = '\0';
1335 		if (error)
1336 			error = errno;
1337 	}
1338 	dirfs_dropfd(dmp, pathnp, pathfree);
1339 	kfree(buf, M_DIRFS_MISC);
1340 
1341 	lwkt_reltoken(&mp->mnt_token);
1342 
1343 	return error;
1344 }
1345 
1346 /*
1347  * Main tasks to be performed.
1348  * 1) When inode is NULL recycle the vnode
1349  * 2) When the inode has 0 links:
1350  *	- Check if in the TAILQ, if so remove.
1351  *	- Destroy the inode.
1352  *	- Recycle the vnode.
1353  * 3) If none of the above, add the node to the TAILQ
1354  *    when it has a valid fd and there is room on the
1355  *    queue.
1356  *
1357  */
1358 static int
1359 dirfs_inactive(struct vop_inactive_args *ap)
1360 {
1361 	struct vnode *vp;
1362 	dirfs_mount_t dmp;
1363 	dirfs_node_t dnp;
1364 
1365 	dbg(3, "called\n");
1366 
1367 	vp = ap->a_vp;
1368 	dmp = VFS_TO_DIRFS(vp->v_mount);
1369 	dnp = VP_TO_NODE(vp);
1370 
1371 	/* Degenerate case */
1372 	if (dnp == NULL) {
1373 		dbg(5, "dnp was NULL\n");
1374 		vrecycle(vp);
1375 		return 0;
1376 	}
1377 
1378 	/*
1379 	 * Deal with the case the inode has 0 links which means it was unlinked.
1380 	 */
1381 	if (dnp->dn_links == 0) {
1382 		vrecycle(vp);
1383 		dbg(5, "recycled a vnode of an unlinked dnp\n");
1384 
1385 		goto out;
1386 	}
1387 
1388 	/*
1389 	 * Try to retain the fd in our fd cache.
1390 	 */
1391 	dirfs_node_setpassive(dmp, dnp, 1);
1392 out:
1393 
1394 	return 0;
1395 
1396 }
1397 
1398 int
1399 dirfs_reclaim(struct vop_reclaim_args *ap)
1400 {
1401 	struct vnode *vp;
1402 	dirfs_node_t dnp;
1403 	dirfs_mount_t dmp;
1404 
1405 	dbg(3, "called\n");
1406 
1407 	vp = ap->a_vp;
1408 	dnp = VP_TO_NODE(vp);
1409 	dmp = VFS_TO_DIRFS(vp->v_mount);
1410 
1411 	dirfs_free_vp(dmp, dnp);
1412 	/* dnp is now invalid, may have been destroyed */
1413 
1414 	return 0;
1415 }
1416 
1417 static int
1418 dirfs_mountctl(struct vop_mountctl_args *ap)
1419 {
1420 	dbg(3, "called\n");
1421 
1422 	KTR_LOG(dirfs_unsupported, __func__);
1423 
1424 	return EOPNOTSUPP;
1425 }
1426 
1427 static int
1428 dirfs_print(struct vop_print_args *v)
1429 {
1430 	dbg(3, "called\n");
1431 
1432 	KTR_LOG(dirfs_unsupported, __func__);
1433 
1434 	return EOPNOTSUPP;
1435 }
1436 
1437 static int __unused
1438 dirfs_pathconf(struct vop_pathconf_args *v)
1439 {
1440 	dbg(3, "called\n");
1441 
1442 	return EOPNOTSUPP;
1443 }
1444 
1445 /************************************************************************
1446  *                          KQFILTER OPS                                *
1447  ************************************************************************/
1448 
1449 static void filt_dirfsdetach(struct knote *kn);
1450 static int filt_dirfsread(struct knote *kn, long hint);
1451 static int filt_dirfswrite(struct knote *kn, long hint);
1452 static int filt_dirfsvnode(struct knote *kn, long hint);
1453 
1454 static struct filterops dirfsread_filtops =
1455 	{ FILTEROP_ISFD | FILTEROP_MPSAFE,
1456 	  NULL, filt_dirfsdetach, filt_dirfsread };
1457 static struct filterops dirfswrite_filtops =
1458 	{ FILTEROP_ISFD | FILTEROP_MPSAFE,
1459 	  NULL, filt_dirfsdetach, filt_dirfswrite };
1460 static struct filterops dirfsvnode_filtops =
1461 	{ FILTEROP_ISFD | FILTEROP_MPSAFE,
1462 	  NULL, filt_dirfsdetach, filt_dirfsvnode };
1463 
1464 static int
1465 dirfs_kqfilter (struct vop_kqfilter_args *ap)
1466 {
1467 	struct vnode *vp = ap->a_vp;
1468 	struct knote *kn = ap->a_kn;
1469 
1470 	switch (kn->kn_filter) {
1471 	case EVFILT_READ:
1472 		kn->kn_fop = &dirfsread_filtops;
1473 		break;
1474 	case EVFILT_WRITE:
1475 		kn->kn_fop = &dirfswrite_filtops;
1476 		break;
1477 	case EVFILT_VNODE:
1478 		kn->kn_fop = &dirfsvnode_filtops;
1479 		break;
1480 	default:
1481 		return (EOPNOTSUPP);
1482 	}
1483 
1484 	kn->kn_hook = (caddr_t)vp;
1485 
1486 	knote_insert(&vp->v_pollinfo.vpi_kqinfo.ki_note, kn);
1487 
1488 	return(0);
1489 }
1490 
1491 static void
1492 filt_dirfsdetach(struct knote *kn)
1493 {
1494 	struct vnode *vp = (void *)kn->kn_hook;
1495 
1496 	knote_remove(&vp->v_pollinfo.vpi_kqinfo.ki_note, kn);
1497 }
1498 
1499 static int
1500 filt_dirfsread(struct knote *kn, long hint)
1501 {
1502 	struct vnode *vp = (void *)kn->kn_hook;
1503 	dirfs_node_t dnp  = VP_TO_NODE(vp);
1504 	off_t off;
1505 
1506 	if (hint == NOTE_REVOKE) {
1507 		kn->kn_flags |= (EV_EOF | EV_NODATA | EV_ONESHOT);
1508 		return(1);
1509 	}
1510 
1511 	/*
1512 	 * Interlock against MP races when performing this function.
1513 	 */
1514 	dirfs_node_lock(dnp);
1515 	off = dnp->dn_size - kn->kn_fp->f_offset;
1516 	kn->kn_data = (off < INTPTR_MAX) ? off : INTPTR_MAX;
1517 	if (kn->kn_sfflags & NOTE_OLDAPI) {
1518 		dirfs_node_unlock(dnp);
1519 		return(1);
1520 	}
1521 	if (kn->kn_data == 0) {
1522 		kn->kn_data = (off < INTPTR_MAX) ? off : INTPTR_MAX;
1523 	}
1524 	dirfs_node_unlock(dnp);
1525 	return (kn->kn_data != 0);
1526 }
1527 
1528 static int
1529 filt_dirfswrite(struct knote *kn, long hint)
1530 {
1531 	if (hint == NOTE_REVOKE)
1532 		kn->kn_flags |= (EV_EOF | EV_NODATA | EV_ONESHOT);
1533 	kn->kn_data = 0;
1534 	return (1);
1535 }
1536 
1537 static int
1538 filt_dirfsvnode(struct knote *kn, long hint)
1539 {
1540 	if (kn->kn_sfflags & hint)
1541 		kn->kn_fflags |= hint;
1542 	if (hint == NOTE_REVOKE) {
1543 		kn->kn_flags |= (EV_EOF | EV_NODATA);
1544 		return (1);
1545 	}
1546 	return (kn->kn_fflags != 0);
1547 }
1548 
1549 
1550 /* --------------------------------------------------------------------- */
1551 
1552 struct vop_ops dirfs_vnode_vops = {
1553 	.vop_default =			vop_defaultop,
1554 	.vop_nwhiteout =		vop_compat_nwhiteout,
1555 	.vop_ncreate =			dirfs_ncreate,
1556 	.vop_nresolve =			dirfs_nresolve,
1557 	.vop_markatime =		vop_stdmarkatime,
1558 	.vop_nlookupdotdot =		dirfs_nlookupdotdot,
1559 	.vop_nmknod =			dirfs_nmknod,
1560 	.vop_open =			dirfs_open,
1561 	.vop_close =			dirfs_close,
1562 	.vop_access =			dirfs_access,
1563 	.vop_getattr =			dirfs_getattr,
1564 	.vop_setattr =			dirfs_setattr,
1565 	.vop_read =			dirfs_read,
1566 	.vop_write =			dirfs_write,
1567 	.vop_fsync =			dirfs_fsync,
1568 	.vop_mountctl =			dirfs_mountctl,
1569 	.vop_nremove =			dirfs_nremove,
1570 	.vop_nlink =			dirfs_nlink,
1571 	.vop_nrename =			dirfs_nrename,
1572 	.vop_nmkdir =			dirfs_nmkdir,
1573 	.vop_nrmdir =			dirfs_nrmdir,
1574 	.vop_nsymlink =			dirfs_nsymlink,
1575 	.vop_readdir =			dirfs_readdir,
1576 	.vop_readlink =			dirfs_readlink,
1577 	.vop_inactive =			dirfs_inactive,
1578 	.vop_reclaim =			dirfs_reclaim,
1579 	.vop_print =			dirfs_print,
1580 	.vop_pathconf =			vop_stdpathconf,
1581 	.vop_bmap =			dirfs_bmap,
1582 	.vop_strategy =			dirfs_strategy,
1583 	.vop_advlock =			dirfs_advlock,
1584 	.vop_kqfilter =			dirfs_kqfilter,
1585 	.vop_getpages =			vop_stdgetpages,
1586 	.vop_putpages =			vop_stdputpages
1587 };
1588