xref: /dragonfly/sys/kern/vfs_jops.c (revision 0e9b9130)
16ddb7618SMatthew Dillon /*
2f56dc967SMatthew Dillon  * Copyright (c) 2004-2006 The DragonFly Project.  All rights reserved.
36ddb7618SMatthew Dillon  *
46ddb7618SMatthew Dillon  * This code is derived from software contributed to The DragonFly Project
56ddb7618SMatthew Dillon  * by Matthew Dillon <dillon@backplane.com>
66ddb7618SMatthew Dillon  *
76ddb7618SMatthew Dillon  * Redistribution and use in source and binary forms, with or without
86ddb7618SMatthew Dillon  * modification, are permitted provided that the following conditions
96ddb7618SMatthew Dillon  * are met:
106ddb7618SMatthew Dillon  *
116ddb7618SMatthew Dillon  * 1. Redistributions of source code must retain the above copyright
126ddb7618SMatthew Dillon  *    notice, this list of conditions and the following disclaimer.
136ddb7618SMatthew Dillon  * 2. Redistributions in binary form must reproduce the above copyright
146ddb7618SMatthew Dillon  *    notice, this list of conditions and the following disclaimer in
156ddb7618SMatthew Dillon  *    the documentation and/or other materials provided with the
166ddb7618SMatthew Dillon  *    distribution.
176ddb7618SMatthew Dillon  * 3. Neither the name of The DragonFly Project nor the names of its
186ddb7618SMatthew Dillon  *    contributors may be used to endorse or promote products derived
196ddb7618SMatthew Dillon  *    from this software without specific, prior written permission.
206ddb7618SMatthew Dillon  *
216ddb7618SMatthew Dillon  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
226ddb7618SMatthew Dillon  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
236ddb7618SMatthew Dillon  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
246ddb7618SMatthew Dillon  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
256ddb7618SMatthew Dillon  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
266ddb7618SMatthew Dillon  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
276ddb7618SMatthew Dillon  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
286ddb7618SMatthew Dillon  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
296ddb7618SMatthew Dillon  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
306ddb7618SMatthew Dillon  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
316ddb7618SMatthew Dillon  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
326ddb7618SMatthew Dillon  * SUCH DAMAGE.
336ddb7618SMatthew Dillon  *
34*0e9b9130SMatthew Dillon  * $DragonFly: src/sys/kern/vfs_jops.c,v 1.34 2007/05/09 00:53:34 dillon Exp $
352281065eSMatthew Dillon  */
362281065eSMatthew Dillon /*
372281065eSMatthew Dillon  * Each mount point may have zero or more independantly configured journals
382281065eSMatthew Dillon  * attached to it.  Each journal is represented by a memory FIFO and worker
392281065eSMatthew Dillon  * thread.  Journal events are streamed through the FIFO to the thread,
402281065eSMatthew Dillon  * batched up (typically on one-second intervals), and written out by the
412281065eSMatthew Dillon  * thread.
422281065eSMatthew Dillon  *
432281065eSMatthew Dillon  * Journal vnode ops are executed instead of mnt_vn_norm_ops when one or
442281065eSMatthew Dillon  * more journals have been installed on a mount point.  It becomes the
452281065eSMatthew Dillon  * responsibility of the journal op to call the underlying normal op as
462281065eSMatthew Dillon  * appropriate.
476ddb7618SMatthew Dillon  */
486ddb7618SMatthew Dillon #include <sys/param.h>
496ddb7618SMatthew Dillon #include <sys/systm.h>
506ddb7618SMatthew Dillon #include <sys/buf.h>
516ddb7618SMatthew Dillon #include <sys/conf.h>
526ddb7618SMatthew Dillon #include <sys/kernel.h>
5382eaef15SMatthew Dillon #include <sys/queue.h>
546ddb7618SMatthew Dillon #include <sys/lock.h>
556ddb7618SMatthew Dillon #include <sys/malloc.h>
566ddb7618SMatthew Dillon #include <sys/mount.h>
576ddb7618SMatthew Dillon #include <sys/unistd.h>
586ddb7618SMatthew Dillon #include <sys/vnode.h>
596ddb7618SMatthew Dillon #include <sys/poll.h>
602281065eSMatthew Dillon #include <sys/mountctl.h>
61b2f7ec6cSMatthew Dillon #include <sys/journal.h>
622281065eSMatthew Dillon #include <sys/file.h>
63b2f7ec6cSMatthew Dillon #include <sys/proc.h>
649578bde0SMatthew Dillon #include <sys/msfbuf.h>
65500b6a22SMatthew Dillon #include <sys/socket.h>
66500b6a22SMatthew Dillon #include <sys/socketvar.h>
676ddb7618SMatthew Dillon 
686ddb7618SMatthew Dillon #include <machine/limits.h>
696ddb7618SMatthew Dillon 
706ddb7618SMatthew Dillon #include <vm/vm.h>
716ddb7618SMatthew Dillon #include <vm/vm_object.h>
726ddb7618SMatthew Dillon #include <vm/vm_page.h>
736ddb7618SMatthew Dillon #include <vm/vm_pager.h>
746ddb7618SMatthew Dillon #include <vm/vnode_pager.h>
756ddb7618SMatthew Dillon 
762281065eSMatthew Dillon #include <sys/file2.h>
772281065eSMatthew Dillon #include <sys/thread2.h>
782281065eSMatthew Dillon 
792281065eSMatthew Dillon static int journal_attach(struct mount *mp);
802281065eSMatthew Dillon static void journal_detach(struct mount *mp);
812281065eSMatthew Dillon static int journal_install_vfs_journal(struct mount *mp, struct file *fp,
822281065eSMatthew Dillon 			    const struct mountctl_install_journal *info);
83500b6a22SMatthew Dillon static int journal_restart_vfs_journal(struct mount *mp, struct file *fp,
84500b6a22SMatthew Dillon 			    const struct mountctl_restart_journal *info);
852281065eSMatthew Dillon static int journal_remove_vfs_journal(struct mount *mp,
862281065eSMatthew Dillon 			    const struct mountctl_remove_journal *info);
87500b6a22SMatthew Dillon static int journal_restart(struct mount *mp, struct file *fp,
88500b6a22SMatthew Dillon 			    struct journal *jo, int flags);
89432b8263SMatthew Dillon static int journal_destroy(struct mount *mp, struct journal *jo, int flags);
902281065eSMatthew Dillon static int journal_resync_vfs_journal(struct mount *mp, const void *ctl);
9139b13188SMatthew Dillon static int journal_status_vfs_journal(struct mount *mp,
9239b13188SMatthew Dillon 		       const struct mountctl_status_journal *info,
9339b13188SMatthew Dillon 		       struct mountctl_journal_ret_status *rstat,
9439b13188SMatthew Dillon 		       int buflen, int *res);
9582eaef15SMatthew Dillon 
9626e603edSMatthew Dillon static void jrecord_undo_file(struct jrecord *jrec, struct vnode *vp,
9726e603edSMatthew Dillon 			     int jrflags, off_t off, off_t bytes);
9882eaef15SMatthew Dillon 
99558b8e00SMatthew Dillon static int journal_setattr(struct vop_setattr_args *ap);
100558b8e00SMatthew Dillon static int journal_write(struct vop_write_args *ap);
101558b8e00SMatthew Dillon static int journal_fsync(struct vop_fsync_args *ap);
102558b8e00SMatthew Dillon static int journal_putpages(struct vop_putpages_args *ap);
103558b8e00SMatthew Dillon static int journal_setacl(struct vop_setacl_args *ap);
104558b8e00SMatthew Dillon static int journal_setextattr(struct vop_setextattr_args *ap);
105558b8e00SMatthew Dillon static int journal_ncreate(struct vop_ncreate_args *ap);
106558b8e00SMatthew Dillon static int journal_nmknod(struct vop_nmknod_args *ap);
107558b8e00SMatthew Dillon static int journal_nlink(struct vop_nlink_args *ap);
108558b8e00SMatthew Dillon static int journal_nsymlink(struct vop_nsymlink_args *ap);
109558b8e00SMatthew Dillon static int journal_nwhiteout(struct vop_nwhiteout_args *ap);
110558b8e00SMatthew Dillon static int journal_nremove(struct vop_nremove_args *ap);
1112281065eSMatthew Dillon static int journal_nmkdir(struct vop_nmkdir_args *ap);
112558b8e00SMatthew Dillon static int journal_nrmdir(struct vop_nrmdir_args *ap);
113558b8e00SMatthew Dillon static int journal_nrename(struct vop_nrename_args *ap);
1142281065eSMatthew Dillon 
11526e603edSMatthew Dillon #define JRUNDO_SIZE	0x00000001
11626e603edSMatthew Dillon #define JRUNDO_UID	0x00000002
11726e603edSMatthew Dillon #define JRUNDO_GID	0x00000004
11826e603edSMatthew Dillon #define JRUNDO_FSID	0x00000008
11926e603edSMatthew Dillon #define JRUNDO_MODES	0x00000010
12026e603edSMatthew Dillon #define JRUNDO_INUM	0x00000020
12126e603edSMatthew Dillon #define JRUNDO_ATIME	0x00000040
12226e603edSMatthew Dillon #define JRUNDO_MTIME	0x00000080
12326e603edSMatthew Dillon #define JRUNDO_CTIME	0x00000100
12426e603edSMatthew Dillon #define JRUNDO_GEN	0x00000200
12526e603edSMatthew Dillon #define JRUNDO_FLAGS	0x00000400
12626e603edSMatthew Dillon #define JRUNDO_UDEV	0x00000800
127aa159335SMatthew Dillon #define JRUNDO_NLINK	0x00001000
12826e603edSMatthew Dillon #define JRUNDO_FILEDATA	0x00010000
12926e603edSMatthew Dillon #define JRUNDO_GETVP	0x00020000
13026e603edSMatthew Dillon #define JRUNDO_CONDLINK	0x00040000	/* write file data if link count 1 */
13126e603edSMatthew Dillon #define JRUNDO_VATTR	(JRUNDO_SIZE|JRUNDO_UID|JRUNDO_GID|JRUNDO_FSID|\
13226e603edSMatthew Dillon 			 JRUNDO_MODES|JRUNDO_INUM|JRUNDO_ATIME|JRUNDO_MTIME|\
133aa159335SMatthew Dillon 			 JRUNDO_CTIME|JRUNDO_GEN|JRUNDO_FLAGS|JRUNDO_UDEV|\
134aa159335SMatthew Dillon 			 JRUNDO_NLINK)
13526e603edSMatthew Dillon #define JRUNDO_ALL	(JRUNDO_VATTR|JRUNDO_FILEDATA)
13626e603edSMatthew Dillon 
13766a1ddf5SMatthew Dillon static struct vop_ops journal_vnode_vops = {
13866a1ddf5SMatthew Dillon     .vop_default =	vop_journal_operate_ap,
13966a1ddf5SMatthew Dillon     .vop_mountctl =	journal_mountctl,
14066a1ddf5SMatthew Dillon     .vop_setattr =	journal_setattr,
14166a1ddf5SMatthew Dillon     .vop_write =	journal_write,
14266a1ddf5SMatthew Dillon     .vop_fsync =	journal_fsync,
14366a1ddf5SMatthew Dillon     .vop_putpages =	journal_putpages,
14466a1ddf5SMatthew Dillon     .vop_setacl =	journal_setacl,
14566a1ddf5SMatthew Dillon     .vop_setextattr =	journal_setextattr,
14666a1ddf5SMatthew Dillon     .vop_ncreate =	journal_ncreate,
14766a1ddf5SMatthew Dillon     .vop_nmknod =	journal_nmknod,
14866a1ddf5SMatthew Dillon     .vop_nlink =	journal_nlink,
14966a1ddf5SMatthew Dillon     .vop_nsymlink =	journal_nsymlink,
15066a1ddf5SMatthew Dillon     .vop_nwhiteout =	journal_nwhiteout,
15166a1ddf5SMatthew Dillon     .vop_nremove =	journal_nremove,
15266a1ddf5SMatthew Dillon     .vop_nmkdir =	journal_nmkdir,
15366a1ddf5SMatthew Dillon     .vop_nrmdir =	journal_nrmdir,
15466a1ddf5SMatthew Dillon     .vop_nrename =	journal_nrename
1556ddb7618SMatthew Dillon };
1566ddb7618SMatthew Dillon 
1576ddb7618SMatthew Dillon int
1582281065eSMatthew Dillon journal_mountctl(struct vop_mountctl_args *ap)
1592281065eSMatthew Dillon {
1602281065eSMatthew Dillon     struct mount *mp;
1612281065eSMatthew Dillon     int error = 0;
1622281065eSMatthew Dillon 
16366a1ddf5SMatthew Dillon     mp = ap->a_head.a_ops->head.vv_mount;
1642281065eSMatthew Dillon     KKASSERT(mp);
1652281065eSMatthew Dillon 
1662281065eSMatthew Dillon     if (mp->mnt_vn_journal_ops == NULL) {
1672281065eSMatthew Dillon 	switch(ap->a_op) {
1682281065eSMatthew Dillon 	case MOUNTCTL_INSTALL_VFS_JOURNAL:
1692281065eSMatthew Dillon 	    error = journal_attach(mp);
1702281065eSMatthew Dillon 	    if (error == 0 && ap->a_ctllen != sizeof(struct mountctl_install_journal))
1712281065eSMatthew Dillon 		error = EINVAL;
1722281065eSMatthew Dillon 	    if (error == 0 && ap->a_fp == NULL)
1732281065eSMatthew Dillon 		error = EBADF;
1742281065eSMatthew Dillon 	    if (error == 0)
1752281065eSMatthew Dillon 		error = journal_install_vfs_journal(mp, ap->a_fp, ap->a_ctl);
1762281065eSMatthew Dillon 	    if (TAILQ_EMPTY(&mp->mnt_jlist))
1772281065eSMatthew Dillon 		journal_detach(mp);
1782281065eSMatthew Dillon 	    break;
179500b6a22SMatthew Dillon 	case MOUNTCTL_RESTART_VFS_JOURNAL:
1802281065eSMatthew Dillon 	case MOUNTCTL_REMOVE_VFS_JOURNAL:
1812281065eSMatthew Dillon 	case MOUNTCTL_RESYNC_VFS_JOURNAL:
18239b13188SMatthew Dillon 	case MOUNTCTL_STATUS_VFS_JOURNAL:
18339b13188SMatthew Dillon 	    error = ENOENT;
1842281065eSMatthew Dillon 	    break;
1852281065eSMatthew Dillon 	default:
1862281065eSMatthew Dillon 	    error = EOPNOTSUPP;
1872281065eSMatthew Dillon 	    break;
1882281065eSMatthew Dillon 	}
1892281065eSMatthew Dillon     } else {
1902281065eSMatthew Dillon 	switch(ap->a_op) {
1912281065eSMatthew Dillon 	case MOUNTCTL_INSTALL_VFS_JOURNAL:
1922281065eSMatthew Dillon 	    if (ap->a_ctllen != sizeof(struct mountctl_install_journal))
1932281065eSMatthew Dillon 		error = EINVAL;
1942281065eSMatthew Dillon 	    if (error == 0 && ap->a_fp == NULL)
1952281065eSMatthew Dillon 		error = EBADF;
1962281065eSMatthew Dillon 	    if (error == 0)
1972281065eSMatthew Dillon 		error = journal_install_vfs_journal(mp, ap->a_fp, ap->a_ctl);
1982281065eSMatthew Dillon 	    break;
199500b6a22SMatthew Dillon 	case MOUNTCTL_RESTART_VFS_JOURNAL:
200500b6a22SMatthew Dillon 	    if (ap->a_ctllen != sizeof(struct mountctl_restart_journal))
201500b6a22SMatthew Dillon 		error = EINVAL;
202500b6a22SMatthew Dillon 	    if (error == 0 && ap->a_fp == NULL)
203500b6a22SMatthew Dillon 		error = EBADF;
204500b6a22SMatthew Dillon 	    if (error == 0)
205500b6a22SMatthew Dillon 		error = journal_restart_vfs_journal(mp, ap->a_fp, ap->a_ctl);
206500b6a22SMatthew Dillon 	    break;
2072281065eSMatthew Dillon 	case MOUNTCTL_REMOVE_VFS_JOURNAL:
2082281065eSMatthew Dillon 	    if (ap->a_ctllen != sizeof(struct mountctl_remove_journal))
2092281065eSMatthew Dillon 		error = EINVAL;
2102281065eSMatthew Dillon 	    if (error == 0)
2112281065eSMatthew Dillon 		error = journal_remove_vfs_journal(mp, ap->a_ctl);
2122281065eSMatthew Dillon 	    if (TAILQ_EMPTY(&mp->mnt_jlist))
2132281065eSMatthew Dillon 		journal_detach(mp);
2142281065eSMatthew Dillon 	    break;
2152281065eSMatthew Dillon 	case MOUNTCTL_RESYNC_VFS_JOURNAL:
2162281065eSMatthew Dillon 	    if (ap->a_ctllen != 0)
2172281065eSMatthew Dillon 		error = EINVAL;
2182281065eSMatthew Dillon 	    error = journal_resync_vfs_journal(mp, ap->a_ctl);
2192281065eSMatthew Dillon 	    break;
22039b13188SMatthew Dillon 	case MOUNTCTL_STATUS_VFS_JOURNAL:
22139b13188SMatthew Dillon 	    if (ap->a_ctllen != sizeof(struct mountctl_status_journal))
22239b13188SMatthew Dillon 		error = EINVAL;
22339b13188SMatthew Dillon 	    if (error == 0) {
22439b13188SMatthew Dillon 		error = journal_status_vfs_journal(mp, ap->a_ctl,
22539b13188SMatthew Dillon 					ap->a_buf, ap->a_buflen, ap->a_res);
22639b13188SMatthew Dillon 	    }
22739b13188SMatthew Dillon 	    break;
2282281065eSMatthew Dillon 	default:
2292281065eSMatthew Dillon 	    error = EOPNOTSUPP;
2302281065eSMatthew Dillon 	    break;
2312281065eSMatthew Dillon 	}
2322281065eSMatthew Dillon     }
2332281065eSMatthew Dillon     return (error);
2342281065eSMatthew Dillon }
2352281065eSMatthew Dillon 
2362281065eSMatthew Dillon /*
2372281065eSMatthew Dillon  * High level mount point setup.  When a
2382281065eSMatthew Dillon  */
2392281065eSMatthew Dillon static int
2406ddb7618SMatthew Dillon journal_attach(struct mount *mp)
2416ddb7618SMatthew Dillon {
242797e4fe9SMatthew Dillon     KKASSERT(mp->mnt_jbitmap == NULL);
24366a1ddf5SMatthew Dillon     vfs_add_vnodeops(mp, &journal_vnode_vops, &mp->mnt_vn_journal_ops);
244efda3bd0SMatthew Dillon     mp->mnt_jbitmap = kmalloc(JREC_STREAMID_JMAX/8, M_JOURNAL, M_WAITOK|M_ZERO);
245797e4fe9SMatthew Dillon     mp->mnt_streamid = JREC_STREAMID_JMIN;
2466ddb7618SMatthew Dillon     return(0);
2476ddb7618SMatthew Dillon }
2486ddb7618SMatthew Dillon 
2492281065eSMatthew Dillon static void
2506ddb7618SMatthew Dillon journal_detach(struct mount *mp)
2516ddb7618SMatthew Dillon {
252797e4fe9SMatthew Dillon     KKASSERT(mp->mnt_jbitmap != NULL);
2536ddb7618SMatthew Dillon     if (mp->mnt_vn_journal_ops)
25466a1ddf5SMatthew Dillon 	vfs_rm_vnodeops(mp, &journal_vnode_vops, &mp->mnt_vn_journal_ops);
255efda3bd0SMatthew Dillon     kfree(mp->mnt_jbitmap, M_JOURNAL);
256797e4fe9SMatthew Dillon     mp->mnt_jbitmap = NULL;
2576ddb7618SMatthew Dillon }
2586ddb7618SMatthew Dillon 
2592281065eSMatthew Dillon /*
26082eaef15SMatthew Dillon  * Install a journal on a mount point.  Each journal has an associated worker
26182eaef15SMatthew Dillon  * thread which is responsible for buffering and spooling the data to the
26282eaef15SMatthew Dillon  * target.  A mount point may have multiple journals attached to it.  An
26382eaef15SMatthew Dillon  * initial start record is generated when the journal is associated.
2642281065eSMatthew Dillon  */
2652281065eSMatthew Dillon static int
2662281065eSMatthew Dillon journal_install_vfs_journal(struct mount *mp, struct file *fp,
2672281065eSMatthew Dillon 			    const struct mountctl_install_journal *info)
2682281065eSMatthew Dillon {
2692281065eSMatthew Dillon     struct journal *jo;
27082eaef15SMatthew Dillon     struct jrecord jrec;
2712281065eSMatthew Dillon     int error = 0;
2722281065eSMatthew Dillon     int size;
2732281065eSMatthew Dillon 
274efda3bd0SMatthew Dillon     jo = kmalloc(sizeof(struct journal), M_JOURNAL, M_WAITOK|M_ZERO);
2752281065eSMatthew Dillon     bcopy(info->id, jo->id, sizeof(jo->id));
276432b8263SMatthew Dillon     jo->flags = info->flags & ~(MC_JOURNAL_WACTIVE | MC_JOURNAL_RACTIVE |
277432b8263SMatthew Dillon 				MC_JOURNAL_STOP_REQ);
2782281065eSMatthew Dillon 
2792281065eSMatthew Dillon     /*
2802281065eSMatthew Dillon      * Memory FIFO size, round to nearest power of 2
2812281065eSMatthew Dillon      */
28282eaef15SMatthew Dillon     if (info->membufsize) {
2832281065eSMatthew Dillon 	if (info->membufsize < 65536)
2842281065eSMatthew Dillon 	    size = 65536;
2852281065eSMatthew Dillon 	else if (info->membufsize > 128 * 1024 * 1024)
2862281065eSMatthew Dillon 	    size = 128 * 1024 * 1024;
2872281065eSMatthew Dillon 	else
2882281065eSMatthew Dillon 	    size = (int)info->membufsize;
2892281065eSMatthew Dillon     } else {
2902281065eSMatthew Dillon 	size = 1024 * 1024;
2912281065eSMatthew Dillon     }
2922281065eSMatthew Dillon     jo->fifo.size = 1;
2932281065eSMatthew Dillon     while (jo->fifo.size < size)
2942281065eSMatthew Dillon 	jo->fifo.size <<= 1;
2952281065eSMatthew Dillon 
2962281065eSMatthew Dillon     /*
2972281065eSMatthew Dillon      * Other parameters.  If not specified the starting transaction id
2982281065eSMatthew Dillon      * will be the current date.
2992281065eSMatthew Dillon      */
30082eaef15SMatthew Dillon     if (info->transid) {
3012281065eSMatthew Dillon 	jo->transid = info->transid;
3022281065eSMatthew Dillon     } else {
3032281065eSMatthew Dillon 	struct timespec ts;
3042281065eSMatthew Dillon 	getnanotime(&ts);
3052281065eSMatthew Dillon 	jo->transid = ((int64_t)ts.tv_sec << 30) | ts.tv_nsec;
3062281065eSMatthew Dillon     }
3072281065eSMatthew Dillon 
3082281065eSMatthew Dillon     jo->fp = fp;
3092281065eSMatthew Dillon 
3102281065eSMatthew Dillon     /*
3112281065eSMatthew Dillon      * Allocate the memory FIFO
3122281065eSMatthew Dillon      */
3132281065eSMatthew Dillon     jo->fifo.mask = jo->fifo.size - 1;
314efda3bd0SMatthew Dillon     jo->fifo.membase = kmalloc(jo->fifo.size, M_JFIFO, M_WAITOK|M_ZERO|M_NULLOK);
3152281065eSMatthew Dillon     if (jo->fifo.membase == NULL)
3162281065eSMatthew Dillon 	error = ENOMEM;
3172281065eSMatthew Dillon 
31882eaef15SMatthew Dillon     /*
3193119bac5SMatthew Dillon      * Create the worker threads and generate the association record.
32082eaef15SMatthew Dillon      */
3212281065eSMatthew Dillon     if (error) {
322efda3bd0SMatthew Dillon 	kfree(jo, M_JOURNAL);
3232281065eSMatthew Dillon     } else {
3242281065eSMatthew Dillon 	fhold(fp);
325500b6a22SMatthew Dillon 	journal_create_threads(jo);
32682eaef15SMatthew Dillon 	jrecord_init(jo, &jrec, JREC_STREAMID_DISCONT);
32782eaef15SMatthew Dillon 	jrecord_write(&jrec, JTYPE_ASSOCIATE, 0);
32882eaef15SMatthew Dillon 	jrecord_done(&jrec, 0);
3292281065eSMatthew Dillon 	TAILQ_INSERT_TAIL(&mp->mnt_jlist, jo, jentry);
3302281065eSMatthew Dillon     }
3312281065eSMatthew Dillon     return(error);
3322281065eSMatthew Dillon }
3332281065eSMatthew Dillon 
33482eaef15SMatthew Dillon /*
335500b6a22SMatthew Dillon  * Restart a journal with a new descriptor.   The existing reader and writer
336500b6a22SMatthew Dillon  * threads are terminated and a new descriptor is associated with the
337500b6a22SMatthew Dillon  * journal.  The FIFO rindex is reset to xindex and the threads are then
338500b6a22SMatthew Dillon  * restarted.
339500b6a22SMatthew Dillon  */
340500b6a22SMatthew Dillon static int
341500b6a22SMatthew Dillon journal_restart_vfs_journal(struct mount *mp, struct file *fp,
342500b6a22SMatthew Dillon 			   const struct mountctl_restart_journal *info)
343500b6a22SMatthew Dillon {
344500b6a22SMatthew Dillon     struct journal *jo;
345500b6a22SMatthew Dillon     int error;
346500b6a22SMatthew Dillon 
347500b6a22SMatthew Dillon     TAILQ_FOREACH(jo, &mp->mnt_jlist, jentry) {
348500b6a22SMatthew Dillon 	if (bcmp(jo->id, info->id, sizeof(jo->id)) == 0)
349500b6a22SMatthew Dillon 	    break;
350500b6a22SMatthew Dillon     }
351500b6a22SMatthew Dillon     if (jo)
352500b6a22SMatthew Dillon 	error = journal_restart(mp, fp, jo, info->flags);
353500b6a22SMatthew Dillon     else
354500b6a22SMatthew Dillon 	error = EINVAL;
355500b6a22SMatthew Dillon     return (error);
356500b6a22SMatthew Dillon }
357500b6a22SMatthew Dillon 
358500b6a22SMatthew Dillon static int
359500b6a22SMatthew Dillon journal_restart(struct mount *mp, struct file *fp,
360500b6a22SMatthew Dillon 		struct journal *jo, int flags)
361500b6a22SMatthew Dillon {
362500b6a22SMatthew Dillon     /*
363500b6a22SMatthew Dillon      * XXX lock the jo
364500b6a22SMatthew Dillon      */
365500b6a22SMatthew Dillon 
366500b6a22SMatthew Dillon #if 0
367500b6a22SMatthew Dillon     /*
368500b6a22SMatthew Dillon      * Record the fact that we are doing a restart in the journal.
369500b6a22SMatthew Dillon      * XXX it isn't safe to do this if the journal is being restarted
370500b6a22SMatthew Dillon      * because it was locked up and the writer thread has already exited.
371500b6a22SMatthew Dillon      */
372500b6a22SMatthew Dillon     jrecord_init(jo, &jrec, JREC_STREAMID_RESTART);
373500b6a22SMatthew Dillon     jrecord_write(&jrec, JTYPE_DISASSOCIATE, 0);
374500b6a22SMatthew Dillon     jrecord_done(&jrec, 0);
375500b6a22SMatthew Dillon #endif
376500b6a22SMatthew Dillon 
377500b6a22SMatthew Dillon     /*
378500b6a22SMatthew Dillon      * Stop the reader and writer threads and clean up the current
379500b6a22SMatthew Dillon      * descriptor.
380500b6a22SMatthew Dillon      */
3816ea70f76SSascha Wildner     kprintf("RESTART WITH FP %p KILLING %p\n", fp, jo->fp);
382500b6a22SMatthew Dillon     journal_destroy_threads(jo, flags);
383500b6a22SMatthew Dillon 
384500b6a22SMatthew Dillon     if (jo->fp)
3859f87144fSMatthew Dillon 	fdrop(jo->fp);
386500b6a22SMatthew Dillon 
387500b6a22SMatthew Dillon     /*
388500b6a22SMatthew Dillon      * Associate the new descriptor, reset the FIFO index, and recreate
389500b6a22SMatthew Dillon      * the threads.
390500b6a22SMatthew Dillon      */
391500b6a22SMatthew Dillon     fhold(fp);
392500b6a22SMatthew Dillon     jo->fp = fp;
393500b6a22SMatthew Dillon     jo->fifo.rindex = jo->fifo.xindex;
394500b6a22SMatthew Dillon     journal_create_threads(jo);
395500b6a22SMatthew Dillon 
396500b6a22SMatthew Dillon     return(0);
397500b6a22SMatthew Dillon }
398500b6a22SMatthew Dillon 
399500b6a22SMatthew Dillon /*
40082eaef15SMatthew Dillon  * Disassociate a journal from a mount point and terminate its worker thread.
40182eaef15SMatthew Dillon  * A final termination record is written out before the file pointer is
40282eaef15SMatthew Dillon  * dropped.
40382eaef15SMatthew Dillon  */
4042281065eSMatthew Dillon static int
40582eaef15SMatthew Dillon journal_remove_vfs_journal(struct mount *mp,
40682eaef15SMatthew Dillon 			   const struct mountctl_remove_journal *info)
4072281065eSMatthew Dillon {
4082281065eSMatthew Dillon     struct journal *jo;
4092281065eSMatthew Dillon     int error;
4102281065eSMatthew Dillon 
4112281065eSMatthew Dillon     TAILQ_FOREACH(jo, &mp->mnt_jlist, jentry) {
4122281065eSMatthew Dillon 	if (bcmp(jo->id, info->id, sizeof(jo->id)) == 0)
4132281065eSMatthew Dillon 	    break;
4142281065eSMatthew Dillon     }
415432b8263SMatthew Dillon     if (jo)
416432b8263SMatthew Dillon 	error = journal_destroy(mp, jo, info->flags);
417432b8263SMatthew Dillon     else
418432b8263SMatthew Dillon 	error = EINVAL;
419432b8263SMatthew Dillon     return (error);
420432b8263SMatthew Dillon }
421432b8263SMatthew Dillon 
422432b8263SMatthew Dillon /*
423432b8263SMatthew Dillon  * Remove all journals associated with a mount point.  Usually called
424432b8263SMatthew Dillon  * by the umount code.
425432b8263SMatthew Dillon  */
426432b8263SMatthew Dillon void
427432b8263SMatthew Dillon journal_remove_all_journals(struct mount *mp, int flags)
428432b8263SMatthew Dillon {
429432b8263SMatthew Dillon     struct journal *jo;
430432b8263SMatthew Dillon 
431432b8263SMatthew Dillon     while ((jo = TAILQ_FIRST(&mp->mnt_jlist)) != NULL) {
432432b8263SMatthew Dillon 	journal_destroy(mp, jo, flags);
433432b8263SMatthew Dillon     }
434432b8263SMatthew Dillon }
435432b8263SMatthew Dillon 
436432b8263SMatthew Dillon static int
437432b8263SMatthew Dillon journal_destroy(struct mount *mp, struct journal *jo, int flags)
438432b8263SMatthew Dillon {
439432b8263SMatthew Dillon     struct jrecord jrec;
440432b8263SMatthew Dillon 
4412281065eSMatthew Dillon     TAILQ_REMOVE(&mp->mnt_jlist, jo, jentry);
44282eaef15SMatthew Dillon 
44382eaef15SMatthew Dillon     jrecord_init(jo, &jrec, JREC_STREAMID_DISCONT);
44482eaef15SMatthew Dillon     jrecord_write(&jrec, JTYPE_DISASSOCIATE, 0);
44582eaef15SMatthew Dillon     jrecord_done(&jrec, 0);
44682eaef15SMatthew Dillon 
447500b6a22SMatthew Dillon     journal_destroy_threads(jo, flags);
448500b6a22SMatthew Dillon 
4492281065eSMatthew Dillon     if (jo->fp)
4509f87144fSMatthew Dillon 	fdrop(jo->fp);
4512281065eSMatthew Dillon     if (jo->fifo.membase)
452efda3bd0SMatthew Dillon 	kfree(jo->fifo.membase, M_JFIFO);
453efda3bd0SMatthew Dillon     kfree(jo, M_JOURNAL);
454797e4fe9SMatthew Dillon 
455432b8263SMatthew Dillon     return(0);
4562281065eSMatthew Dillon }
4572281065eSMatthew Dillon 
4582281065eSMatthew Dillon static int
4592281065eSMatthew Dillon journal_resync_vfs_journal(struct mount *mp, const void *ctl)
4602281065eSMatthew Dillon {
4612281065eSMatthew Dillon     return(EINVAL);
4622281065eSMatthew Dillon }
4632281065eSMatthew Dillon 
46439b13188SMatthew Dillon static int
46539b13188SMatthew Dillon journal_status_vfs_journal(struct mount *mp,
46639b13188SMatthew Dillon 		       const struct mountctl_status_journal *info,
46739b13188SMatthew Dillon 		       struct mountctl_journal_ret_status *rstat,
46839b13188SMatthew Dillon 		       int buflen, int *res)
46939b13188SMatthew Dillon {
47039b13188SMatthew Dillon     struct journal *jo;
47139b13188SMatthew Dillon     int error = 0;
47239b13188SMatthew Dillon     int index;
47339b13188SMatthew Dillon 
47439b13188SMatthew Dillon     index = 0;
47539b13188SMatthew Dillon     *res = 0;
47639b13188SMatthew Dillon     TAILQ_FOREACH(jo, &mp->mnt_jlist, jentry) {
47739b13188SMatthew Dillon 	if (info->index == MC_JOURNAL_INDEX_ID) {
47839b13188SMatthew Dillon 	    if (bcmp(jo->id, info->id, sizeof(jo->id)) != 0)
47939b13188SMatthew Dillon 		continue;
48039b13188SMatthew Dillon 	} else if (info->index >= 0) {
48139b13188SMatthew Dillon 	    if (info->index < index)
48239b13188SMatthew Dillon 		continue;
48339b13188SMatthew Dillon 	} else if (info->index != MC_JOURNAL_INDEX_ALL) {
48439b13188SMatthew Dillon 	    continue;
48539b13188SMatthew Dillon 	}
48639b13188SMatthew Dillon 	if (buflen < sizeof(*rstat)) {
48739b13188SMatthew Dillon 	    if (*res)
48839b13188SMatthew Dillon 		rstat[-1].flags |= MC_JOURNAL_STATUS_MORETOCOME;
48939b13188SMatthew Dillon 	    else
49039b13188SMatthew Dillon 		error = EINVAL;
49139b13188SMatthew Dillon 	    break;
49239b13188SMatthew Dillon 	}
49339b13188SMatthew Dillon 	bzero(rstat, sizeof(*rstat));
49439b13188SMatthew Dillon 	rstat->recsize = sizeof(*rstat);
49539b13188SMatthew Dillon 	bcopy(jo->id, rstat->id, sizeof(jo->id));
49639b13188SMatthew Dillon 	rstat->index = index;
49739b13188SMatthew Dillon 	rstat->membufsize = jo->fifo.size;
4983119bac5SMatthew Dillon 	rstat->membufused = jo->fifo.windex - jo->fifo.xindex;
4993119bac5SMatthew Dillon 	rstat->membufunacked = jo->fifo.rindex - jo->fifo.xindex;
50039b13188SMatthew Dillon 	rstat->bytessent = jo->total_acked;
5013119bac5SMatthew Dillon 	rstat->fifostalls = jo->fifostalls;
50239b13188SMatthew Dillon 	++rstat;
50339b13188SMatthew Dillon 	++index;
50439b13188SMatthew Dillon 	*res += sizeof(*rstat);
50539b13188SMatthew Dillon 	buflen -= sizeof(*rstat);
50639b13188SMatthew Dillon     }
50739b13188SMatthew Dillon     return(error);
50839b13188SMatthew Dillon }
509432b8263SMatthew Dillon 
51082eaef15SMatthew Dillon /************************************************************************
51126e603edSMatthew Dillon  *			PARALLEL TRANSACTION SUPPORT ROUTINES		*
51226e603edSMatthew Dillon  ************************************************************************
51326e603edSMatthew Dillon  *
51426e603edSMatthew Dillon  * JRECLIST_*() - routines which create and iterate over jrecord structures,
51526e603edSMatthew Dillon  *		  because a mount point may have multiple attached journals.
51626e603edSMatthew Dillon  */
51726e603edSMatthew Dillon 
51826e603edSMatthew Dillon /*
51926e603edSMatthew Dillon  * Initialize the passed jrecord_list and create a jrecord for each
52026e603edSMatthew Dillon  * journal we need to write to.  Unnecessary mallocs are avoided by
52126e603edSMatthew Dillon  * using the passed jrecord structure as the first jrecord in the list.
52226e603edSMatthew Dillon  * A starting transaction is pushed for each jrecord.
52326e603edSMatthew Dillon  *
52426e603edSMatthew Dillon  * Returns non-zero if any of the journals require undo records.
52526e603edSMatthew Dillon  */
52626e603edSMatthew Dillon static
52726e603edSMatthew Dillon int
52826e603edSMatthew Dillon jreclist_init(struct mount *mp, struct jrecord_list *jreclist,
52926e603edSMatthew Dillon 	      struct jrecord *jreccache, int16_t rectype)
53026e603edSMatthew Dillon {
53126e603edSMatthew Dillon     struct journal *jo;
53226e603edSMatthew Dillon     struct jrecord *jrec;
533797e4fe9SMatthew Dillon     int wantrev;
534797e4fe9SMatthew Dillon     int count;
535797e4fe9SMatthew Dillon     int16_t streamid;
53626e603edSMatthew Dillon 
537797e4fe9SMatthew Dillon     TAILQ_INIT(&jreclist->list);
538797e4fe9SMatthew Dillon 
539797e4fe9SMatthew Dillon     /*
540797e4fe9SMatthew Dillon      * Select the stream ID to use for the transaction.  We must select
541797e4fe9SMatthew Dillon      * a stream ID that is not currently in use by some other parallel
542797e4fe9SMatthew Dillon      * transaction.
543797e4fe9SMatthew Dillon      *
544797e4fe9SMatthew Dillon      * Don't bother calculating the next streamid when reassigning
545797e4fe9SMatthew Dillon      * mnt_streamid, since parallel transactions are fairly rare.  This
546797e4fe9SMatthew Dillon      * also allows someone observing the raw records to clearly see
547797e4fe9SMatthew Dillon      * when parallel transactions occur.
548797e4fe9SMatthew Dillon      */
549797e4fe9SMatthew Dillon     streamid = mp->mnt_streamid;
550797e4fe9SMatthew Dillon     count = 0;
551797e4fe9SMatthew Dillon     while (mp->mnt_jbitmap[streamid >> 3] & (1 << (streamid & 7))) {
552797e4fe9SMatthew Dillon 	if (++streamid == JREC_STREAMID_JMAX)
553797e4fe9SMatthew Dillon 		streamid = JREC_STREAMID_JMIN;
554797e4fe9SMatthew Dillon 	if (++count == JREC_STREAMID_JMAX - JREC_STREAMID_JMIN) {
5556ea70f76SSascha Wildner 		kprintf("jreclist_init: all streamid's in use! sleeping\n");
556797e4fe9SMatthew Dillon 		tsleep(jreclist, 0, "jsidfl", hz * 10);
557797e4fe9SMatthew Dillon 		count = 0;
558797e4fe9SMatthew Dillon 	}
559797e4fe9SMatthew Dillon     }
560797e4fe9SMatthew Dillon     mp->mnt_jbitmap[streamid >> 3] |= 1 << (streamid & 7);
561797e4fe9SMatthew Dillon     mp->mnt_streamid = streamid;
562797e4fe9SMatthew Dillon     jreclist->streamid = streamid;
563797e4fe9SMatthew Dillon 
564797e4fe9SMatthew Dillon     /*
565797e4fe9SMatthew Dillon      * Now initialize a stream on each journal.
566797e4fe9SMatthew Dillon      */
567797e4fe9SMatthew Dillon     count = 0;
568797e4fe9SMatthew Dillon     wantrev = 0;
56926e603edSMatthew Dillon     TAILQ_FOREACH(jo, &mp->mnt_jlist, jentry) {
57026e603edSMatthew Dillon 	if (count == 0)
57126e603edSMatthew Dillon 	    jrec = jreccache;
57226e603edSMatthew Dillon 	else
573efda3bd0SMatthew Dillon 	    jrec = kmalloc(sizeof(*jrec), M_JOURNAL, M_WAITOK);
574797e4fe9SMatthew Dillon 	jrecord_init(jo, jrec, streamid);
57526e603edSMatthew Dillon 	jrec->user_save = jrecord_push(jrec, rectype);
576797e4fe9SMatthew Dillon 	TAILQ_INSERT_TAIL(&jreclist->list, jrec, user_entry);
57726e603edSMatthew Dillon 	if (jo->flags & MC_JOURNAL_WANT_REVERSABLE)
57826e603edSMatthew Dillon 	    wantrev = 1;
57926e603edSMatthew Dillon 	++count;
58026e603edSMatthew Dillon     }
58126e603edSMatthew Dillon     return(wantrev);
58226e603edSMatthew Dillon }
58326e603edSMatthew Dillon 
58426e603edSMatthew Dillon /*
58526e603edSMatthew Dillon  * Terminate the journaled transactions started by jreclist_init().  If
58626e603edSMatthew Dillon  * an error occured, the transaction records will be aborted.
58726e603edSMatthew Dillon  */
58826e603edSMatthew Dillon static
58926e603edSMatthew Dillon void
590797e4fe9SMatthew Dillon jreclist_done(struct mount *mp, struct jrecord_list *jreclist, int error)
59126e603edSMatthew Dillon {
59226e603edSMatthew Dillon     struct jrecord *jrec;
59326e603edSMatthew Dillon     int count;
59426e603edSMatthew Dillon 
595797e4fe9SMatthew Dillon     /*
596797e4fe9SMatthew Dillon      * Cleanup the jrecord state on each journal.
597797e4fe9SMatthew Dillon      */
598797e4fe9SMatthew Dillon     TAILQ_FOREACH(jrec, &jreclist->list, user_entry) {
59926e603edSMatthew Dillon 	jrecord_pop(jrec, jrec->user_save);
60026e603edSMatthew Dillon 	jrecord_done(jrec, error);
60126e603edSMatthew Dillon     }
602797e4fe9SMatthew Dillon 
603797e4fe9SMatthew Dillon     /*
604797e4fe9SMatthew Dillon      * Free allocated jrec's (the first is always supplied)
605797e4fe9SMatthew Dillon      */
60626e603edSMatthew Dillon     count = 0;
607797e4fe9SMatthew Dillon     while ((jrec = TAILQ_FIRST(&jreclist->list)) != NULL) {
608797e4fe9SMatthew Dillon 	TAILQ_REMOVE(&jreclist->list, jrec, user_entry);
60926e603edSMatthew Dillon 	if (count)
610efda3bd0SMatthew Dillon 	    kfree(jrec, M_JOURNAL);
61126e603edSMatthew Dillon 	++count;
61226e603edSMatthew Dillon     }
613797e4fe9SMatthew Dillon 
614797e4fe9SMatthew Dillon     /*
615797e4fe9SMatthew Dillon      * Clear the streamid so it can be reused.
616797e4fe9SMatthew Dillon      */
617797e4fe9SMatthew Dillon     mp->mnt_jbitmap[jreclist->streamid >> 3] &= ~(1 << (jreclist->streamid & 7));
61826e603edSMatthew Dillon }
61926e603edSMatthew Dillon 
62026e603edSMatthew Dillon /*
62126e603edSMatthew Dillon  * This procedure writes out UNDO records for available reversable
62226e603edSMatthew Dillon  * journals.
62326e603edSMatthew Dillon  *
62426e603edSMatthew Dillon  * XXX could use improvement.  There is no need to re-read the file
62526e603edSMatthew Dillon  * for each journal.
62626e603edSMatthew Dillon  */
62726e603edSMatthew Dillon static
62826e603edSMatthew Dillon void
62926e603edSMatthew Dillon jreclist_undo_file(struct jrecord_list *jreclist, struct vnode *vp,
63026e603edSMatthew Dillon 		   int jrflags, off_t off, off_t bytes)
63126e603edSMatthew Dillon {
63226e603edSMatthew Dillon     struct jrecord *jrec;
63326e603edSMatthew Dillon     int error;
63426e603edSMatthew Dillon 
63526e603edSMatthew Dillon     error = 0;
63626e603edSMatthew Dillon     if (jrflags & JRUNDO_GETVP)
63787de5057SMatthew Dillon 	error = vget(vp, LK_SHARED);
63826e603edSMatthew Dillon     if (error == 0) {
639797e4fe9SMatthew Dillon 	TAILQ_FOREACH(jrec, &jreclist->list, user_entry) {
64026e603edSMatthew Dillon 	    if (jrec->jo->flags & MC_JOURNAL_WANT_REVERSABLE) {
64126e603edSMatthew Dillon 		jrecord_undo_file(jrec, vp, jrflags, off, bytes);
64226e603edSMatthew Dillon 	    }
64326e603edSMatthew Dillon 	}
64426e603edSMatthew Dillon     }
64526e603edSMatthew Dillon     if (error == 0 && jrflags & JRUNDO_GETVP)
64626e603edSMatthew Dillon 	vput(vp);
64726e603edSMatthew Dillon }
64826e603edSMatthew Dillon 
64926e603edSMatthew Dillon /************************************************************************
65026e603edSMatthew Dillon  *			LOW LEVEL UNDO SUPPORT ROUTINE			*
65126e603edSMatthew Dillon  ************************************************************************
65226e603edSMatthew Dillon  *
65326e603edSMatthew Dillon  * This function is used to support UNDO records.  It will generate an
65426e603edSMatthew Dillon  * appropriate record with the requested portion of the file data.  Note
65526e603edSMatthew Dillon  * that file data is only recorded if JRUNDO_FILEDATA is passed.  If bytes
65626e603edSMatthew Dillon  * is -1, it will be set to the size of the file.
65726e603edSMatthew Dillon  */
65826e603edSMatthew Dillon static void
65926e603edSMatthew Dillon jrecord_undo_file(struct jrecord *jrec, struct vnode *vp, int jrflags,
66026e603edSMatthew Dillon 		  off_t off, off_t bytes)
66126e603edSMatthew Dillon {
66226e603edSMatthew Dillon     struct vattr attr;
66326e603edSMatthew Dillon     void *save1; /* warning, save pointers do not always remain valid */
66426e603edSMatthew Dillon     void *save2;
66526e603edSMatthew Dillon     int error;
66626e603edSMatthew Dillon 
66726e603edSMatthew Dillon     /*
66826e603edSMatthew Dillon      * Setup.  Start the UNDO record, obtain a shared lock on the vnode,
66926e603edSMatthew Dillon      * and retrieve attribute info.
67026e603edSMatthew Dillon      */
67126e603edSMatthew Dillon     save1 = jrecord_push(jrec, JTYPE_UNDO);
67287de5057SMatthew Dillon     error = VOP_GETATTR(vp, &attr);
67326e603edSMatthew Dillon     if (error)
67426e603edSMatthew Dillon 	goto done;
67526e603edSMatthew Dillon 
67626e603edSMatthew Dillon     /*
67726e603edSMatthew Dillon      * Generate UNDO records as requested.
67826e603edSMatthew Dillon      */
67926e603edSMatthew Dillon     if (jrflags & JRUNDO_VATTR) {
68026e603edSMatthew Dillon 	save2 = jrecord_push(jrec, JTYPE_VATTR);
68126e603edSMatthew Dillon 	jrecord_leaf(jrec, JLEAF_VTYPE, &attr.va_type, sizeof(attr.va_type));
682aa159335SMatthew Dillon 	if ((jrflags & JRUNDO_NLINK) && attr.va_nlink != VNOVAL)
683aa159335SMatthew Dillon 	    jrecord_leaf(jrec, JLEAF_NLINK, &attr.va_nlink, sizeof(attr.va_nlink));
68426e603edSMatthew Dillon 	if ((jrflags & JRUNDO_SIZE) && attr.va_size != VNOVAL)
68526e603edSMatthew Dillon 	    jrecord_leaf(jrec, JLEAF_SIZE, &attr.va_size, sizeof(attr.va_size));
68626e603edSMatthew Dillon 	if ((jrflags & JRUNDO_UID) && attr.va_uid != VNOVAL)
68726e603edSMatthew Dillon 	    jrecord_leaf(jrec, JLEAF_UID, &attr.va_uid, sizeof(attr.va_uid));
68826e603edSMatthew Dillon 	if ((jrflags & JRUNDO_GID) && attr.va_gid != VNOVAL)
68926e603edSMatthew Dillon 	    jrecord_leaf(jrec, JLEAF_GID, &attr.va_gid, sizeof(attr.va_gid));
69026e603edSMatthew Dillon 	if ((jrflags & JRUNDO_FSID) && attr.va_fsid != VNOVAL)
69126e603edSMatthew Dillon 	    jrecord_leaf(jrec, JLEAF_FSID, &attr.va_fsid, sizeof(attr.va_fsid));
69226e603edSMatthew Dillon 	if ((jrflags & JRUNDO_MODES) && attr.va_mode != (mode_t)VNOVAL)
69326e603edSMatthew Dillon 	    jrecord_leaf(jrec, JLEAF_MODES, &attr.va_mode, sizeof(attr.va_mode));
69426e603edSMatthew Dillon 	if ((jrflags & JRUNDO_INUM) && attr.va_fileid != VNOVAL)
69526e603edSMatthew Dillon 	    jrecord_leaf(jrec, JLEAF_INUM, &attr.va_fileid, sizeof(attr.va_fileid));
69626e603edSMatthew Dillon 	if ((jrflags & JRUNDO_ATIME) && attr.va_atime.tv_sec != VNOVAL)
69726e603edSMatthew Dillon 	    jrecord_leaf(jrec, JLEAF_ATIME, &attr.va_atime, sizeof(attr.va_atime));
69826e603edSMatthew Dillon 	if ((jrflags & JRUNDO_MTIME) && attr.va_mtime.tv_sec != VNOVAL)
69926e603edSMatthew Dillon 	    jrecord_leaf(jrec, JLEAF_MTIME, &attr.va_mtime, sizeof(attr.va_mtime));
70026e603edSMatthew Dillon 	if ((jrflags & JRUNDO_CTIME) && attr.va_ctime.tv_sec != VNOVAL)
70126e603edSMatthew Dillon 	    jrecord_leaf(jrec, JLEAF_CTIME, &attr.va_ctime, sizeof(attr.va_ctime));
70226e603edSMatthew Dillon 	if ((jrflags & JRUNDO_GEN) && attr.va_gen != VNOVAL)
70326e603edSMatthew Dillon 	    jrecord_leaf(jrec, JLEAF_GEN, &attr.va_gen, sizeof(attr.va_gen));
70426e603edSMatthew Dillon 	if ((jrflags & JRUNDO_FLAGS) && attr.va_flags != VNOVAL)
70526e603edSMatthew Dillon 	    jrecord_leaf(jrec, JLEAF_FLAGS, &attr.va_flags, sizeof(attr.va_flags));
706*0e9b9130SMatthew Dillon 	if ((jrflags & JRUNDO_UDEV) && attr.va_rmajor != VNOVAL) {
707*0e9b9130SMatthew Dillon 	    udev_t rdev = makeudev(attr.va_rmajor, attr.va_rminor);
708*0e9b9130SMatthew Dillon 	    jrecord_leaf(jrec, JLEAF_UDEV, &rdev, sizeof(rdev));
709*0e9b9130SMatthew Dillon 	    jrecord_leaf(jrec, JLEAF_UMAJOR, &attr.va_rmajor, sizeof(attr.va_rmajor));
710*0e9b9130SMatthew Dillon 	    jrecord_leaf(jrec, JLEAF_UMINOR, &attr.va_rminor, sizeof(attr.va_rminor));
711*0e9b9130SMatthew Dillon 	}
71226e603edSMatthew Dillon 	jrecord_pop(jrec, save2);
71326e603edSMatthew Dillon     }
71426e603edSMatthew Dillon 
71526e603edSMatthew Dillon     /*
71626e603edSMatthew Dillon      * Output the file data being overwritten by reading the file and
71726e603edSMatthew Dillon      * writing it out to the journal prior to the write operation.  We
71826e603edSMatthew Dillon      * do not need to write out data past the current file EOF.
71926e603edSMatthew Dillon      *
72026e603edSMatthew Dillon      * XXX support JRUNDO_CONDLINK - do not write out file data for files
72126e603edSMatthew Dillon      * with a link count > 1.  The undo code needs to locate the inode and
72226e603edSMatthew Dillon      * regenerate the hardlink.
72326e603edSMatthew Dillon      */
72425bae9ceSMatthew Dillon     if ((jrflags & JRUNDO_FILEDATA) && attr.va_type == VREG) {
72526e603edSMatthew Dillon 	if (attr.va_size != VNOVAL) {
72626e603edSMatthew Dillon 	    if (bytes == -1)
72726e603edSMatthew Dillon 		bytes = attr.va_size - off;
72826e603edSMatthew Dillon 	    if (off + bytes > attr.va_size)
72926e603edSMatthew Dillon 		bytes = attr.va_size - off;
73026e603edSMatthew Dillon 	    if (bytes > 0)
73126e603edSMatthew Dillon 		jrecord_file_data(jrec, vp, off, bytes);
73226e603edSMatthew Dillon 	} else {
73326e603edSMatthew Dillon 	    error = EINVAL;
73426e603edSMatthew Dillon 	}
73526e603edSMatthew Dillon     }
73625bae9ceSMatthew Dillon     if ((jrflags & JRUNDO_FILEDATA) && attr.va_type == VLNK) {
73725bae9ceSMatthew Dillon 	struct iovec aiov;
73825bae9ceSMatthew Dillon 	struct uio auio;
73925bae9ceSMatthew Dillon 	char *buf;
74025bae9ceSMatthew Dillon 
741efda3bd0SMatthew Dillon 	buf = kmalloc(PATH_MAX, M_JOURNAL, M_WAITOK);
74225bae9ceSMatthew Dillon 	aiov.iov_base = buf;
74325bae9ceSMatthew Dillon 	aiov.iov_len = PATH_MAX;
74425bae9ceSMatthew Dillon 	auio.uio_iov = &aiov;
74525bae9ceSMatthew Dillon 	auio.uio_iovcnt = 1;
74625bae9ceSMatthew Dillon 	auio.uio_offset = 0;
74725bae9ceSMatthew Dillon 	auio.uio_rw = UIO_READ;
74825bae9ceSMatthew Dillon 	auio.uio_segflg = UIO_SYSSPACE;
74925bae9ceSMatthew Dillon 	auio.uio_td = curthread;
75025bae9ceSMatthew Dillon 	auio.uio_resid = PATH_MAX;
75125bae9ceSMatthew Dillon 	error = VOP_READLINK(vp, &auio, proc0.p_ucred);
75225bae9ceSMatthew Dillon 	if (error == 0) {
75325bae9ceSMatthew Dillon 		jrecord_leaf(jrec, JLEAF_SYMLINKDATA, buf,
75425bae9ceSMatthew Dillon 				PATH_MAX - auio.uio_resid);
75525bae9ceSMatthew Dillon 	}
756efda3bd0SMatthew Dillon 	kfree(buf, M_JOURNAL);
75725bae9ceSMatthew Dillon     }
75826e603edSMatthew Dillon done:
75926e603edSMatthew Dillon     if (error)
76026e603edSMatthew Dillon 	jrecord_leaf(jrec, JLEAF_ERROR, &error, sizeof(error));
76126e603edSMatthew Dillon     jrecord_pop(jrec, save1);
76226e603edSMatthew Dillon }
76326e603edSMatthew Dillon 
7642281065eSMatthew Dillon /************************************************************************
7652281065eSMatthew Dillon  *			JOURNAL VNOPS					*
766558b8e00SMatthew Dillon  ************************************************************************
767558b8e00SMatthew Dillon  *
768558b8e00SMatthew Dillon  * These are function shims replacing the normal filesystem ops.  We become
769558b8e00SMatthew Dillon  * responsible for calling the underlying filesystem ops.  We have the choice
770558b8e00SMatthew Dillon  * of executing the underlying op first and then generating the journal entry,
771558b8e00SMatthew Dillon  * or starting the journal entry, executing the underlying op, and then
772558b8e00SMatthew Dillon  * either completing or aborting it.
773558b8e00SMatthew Dillon  *
774558b8e00SMatthew Dillon  * The journal is supposed to be a high-level entity, which generally means
775558b8e00SMatthew Dillon  * identifying files by name rather then by inode.  Supplying both allows
776558b8e00SMatthew Dillon  * the journal to be used both for inode-number-compatible 'mirrors' and
777558b8e00SMatthew Dillon  * for simple filesystem replication.
778558b8e00SMatthew Dillon  *
779558b8e00SMatthew Dillon  * Writes are particularly difficult to deal with because a single write may
780558b8e00SMatthew Dillon  * represent a hundred megabyte buffer or more, and both writes and truncations
781558b8e00SMatthew Dillon  * require the 'old' data to be written out as well as the new data if the
782558b8e00SMatthew Dillon  * log is reversable.  Other issues:
783558b8e00SMatthew Dillon  *
784558b8e00SMatthew Dillon  * - How to deal with operations on unlinked files (no path available),
785558b8e00SMatthew Dillon  *   but which may still be filesystem visible due to hard links.
786558b8e00SMatthew Dillon  *
787558b8e00SMatthew Dillon  * - How to deal with modifications made via a memory map.
788558b8e00SMatthew Dillon  *
789558b8e00SMatthew Dillon  * - Future cache coherency support will require cache coherency API calls
790558b8e00SMatthew Dillon  *   both prior to and after the call to the underlying VFS.
791558b8e00SMatthew Dillon  *
792558b8e00SMatthew Dillon  * ALSO NOTE: We do not have to shim compatibility VOPs like MKDIR which have
793558b8e00SMatthew Dillon  * new VFS equivalents (NMKDIR).
794558b8e00SMatthew Dillon  */
795558b8e00SMatthew Dillon 
796b2f7ec6cSMatthew Dillon /*
797b2f7ec6cSMatthew Dillon  * Journal vop_settattr { a_vp, a_vap, a_cred, a_td }
798b2f7ec6cSMatthew Dillon  */
799558b8e00SMatthew Dillon static
800558b8e00SMatthew Dillon int
801558b8e00SMatthew Dillon journal_setattr(struct vop_setattr_args *ap)
802558b8e00SMatthew Dillon {
80326e603edSMatthew Dillon     struct jrecord_list jreclist;
80426e603edSMatthew Dillon     struct jrecord jreccache;
80526e603edSMatthew Dillon     struct jrecord *jrec;
806558b8e00SMatthew Dillon     struct mount *mp;
807aa159335SMatthew Dillon     void *save;
808558b8e00SMatthew Dillon     int error;
809558b8e00SMatthew Dillon 
81066a1ddf5SMatthew Dillon     mp = ap->a_head.a_ops->head.vv_mount;
81126e603edSMatthew Dillon     if (jreclist_init(mp, &jreclist, &jreccache, JTYPE_SETATTR)) {
81226e603edSMatthew Dillon 	jreclist_undo_file(&jreclist, ap->a_vp, JRUNDO_VATTR, 0, 0);
81326e603edSMatthew Dillon     }
81426e603edSMatthew Dillon     error = vop_journal_operate_ap(&ap->a_head);
815558b8e00SMatthew Dillon     if (error == 0) {
816797e4fe9SMatthew Dillon 	TAILQ_FOREACH(jrec, &jreclist.list, user_entry) {
81787de5057SMatthew Dillon 	    jrecord_write_cred(jrec, curthread, ap->a_cred);
81826e603edSMatthew Dillon 	    jrecord_write_vnode_ref(jrec, ap->a_vp);
819aa159335SMatthew Dillon 	    save = jrecord_push(jrec, JTYPE_REDO);
82026e603edSMatthew Dillon 	    jrecord_write_vattr(jrec, ap->a_vap);
821aa159335SMatthew Dillon 	    jrecord_pop(jrec, save);
822558b8e00SMatthew Dillon 	}
823558b8e00SMatthew Dillon     }
824797e4fe9SMatthew Dillon     jreclist_done(mp, &jreclist, error);
825558b8e00SMatthew Dillon     return (error);
826558b8e00SMatthew Dillon }
827558b8e00SMatthew Dillon 
828b2f7ec6cSMatthew Dillon /*
829b2f7ec6cSMatthew Dillon  * Journal vop_write { a_vp, a_uio, a_ioflag, a_cred }
830b2f7ec6cSMatthew Dillon  */
831558b8e00SMatthew Dillon static
832558b8e00SMatthew Dillon int
833558b8e00SMatthew Dillon journal_write(struct vop_write_args *ap)
834558b8e00SMatthew Dillon {
83526e603edSMatthew Dillon     struct jrecord_list jreclist;
83626e603edSMatthew Dillon     struct jrecord jreccache;
83726e603edSMatthew Dillon     struct jrecord *jrec;
838558b8e00SMatthew Dillon     struct mount *mp;
8399578bde0SMatthew Dillon     struct uio uio_copy;
8409578bde0SMatthew Dillon     struct iovec uio_one_iovec;
841aa159335SMatthew Dillon     void *save;
842558b8e00SMatthew Dillon     int error;
843558b8e00SMatthew Dillon 
8449578bde0SMatthew Dillon     /*
8459578bde0SMatthew Dillon      * This is really nasty.  UIO's don't retain sufficient information to
8469578bde0SMatthew Dillon      * be reusable once they've gone through the VOP chain.  The iovecs get
8479578bde0SMatthew Dillon      * cleared, so we have to copy the UIO.
8489578bde0SMatthew Dillon      *
8499578bde0SMatthew Dillon      * XXX fix the UIO code to not destroy iov's during a scan so we can
8509578bde0SMatthew Dillon      *     reuse the uio over and over again.
851d0887c34SMatthew Dillon      *
852d0887c34SMatthew Dillon      * XXX UNDO code needs to journal the old data prior to the write.
8539578bde0SMatthew Dillon      */
8549578bde0SMatthew Dillon     uio_copy = *ap->a_uio;
8559578bde0SMatthew Dillon     if (uio_copy.uio_iovcnt == 1) {
8569578bde0SMatthew Dillon 	uio_one_iovec = ap->a_uio->uio_iov[0];
8579578bde0SMatthew Dillon 	uio_copy.uio_iov = &uio_one_iovec;
8589578bde0SMatthew Dillon     } else {
85977652cadSMatthew Dillon 	uio_copy.uio_iov = kmalloc(uio_copy.uio_iovcnt * sizeof(struct iovec),
8609578bde0SMatthew Dillon 				    M_JOURNAL, M_WAITOK);
8619578bde0SMatthew Dillon 	bcopy(ap->a_uio->uio_iov, uio_copy.uio_iov,
8629578bde0SMatthew Dillon 		uio_copy.uio_iovcnt * sizeof(struct iovec));
8639578bde0SMatthew Dillon     }
8649578bde0SMatthew Dillon 
86526e603edSMatthew Dillon     /*
86626e603edSMatthew Dillon      * Write out undo data.  Note that uio_offset is incorrect if
86726e603edSMatthew Dillon      * IO_APPEND is set, but fortunately we have no undo file data to
86826e603edSMatthew Dillon      * write out in that case.
86926e603edSMatthew Dillon      */
87066a1ddf5SMatthew Dillon     mp = ap->a_head.a_ops->head.vv_mount;
87126e603edSMatthew Dillon     if (jreclist_init(mp, &jreclist, &jreccache, JTYPE_WRITE)) {
87226e603edSMatthew Dillon 	if (ap->a_ioflag & IO_APPEND) {
87326e603edSMatthew Dillon 	    jreclist_undo_file(&jreclist, ap->a_vp, JRUNDO_SIZE|JRUNDO_MTIME, 0, 0);
87426e603edSMatthew Dillon 	} else {
87526e603edSMatthew Dillon 	    jreclist_undo_file(&jreclist, ap->a_vp,
87626e603edSMatthew Dillon 			       JRUNDO_FILEDATA|JRUNDO_SIZE|JRUNDO_MTIME,
87726e603edSMatthew Dillon 			       uio_copy.uio_offset, uio_copy.uio_resid);
87826e603edSMatthew Dillon 	}
87926e603edSMatthew Dillon     }
880558b8e00SMatthew Dillon     error = vop_journal_operate_ap(&ap->a_head);
881d0887c34SMatthew Dillon 
882d0887c34SMatthew Dillon     /*
883d0887c34SMatthew Dillon      * XXX bad hack to figure out the offset for O_APPEND writes (note:
884d0887c34SMatthew Dillon      * uio field state after the VFS operation).
885d0887c34SMatthew Dillon      */
886d0887c34SMatthew Dillon     uio_copy.uio_offset = ap->a_uio->uio_offset -
887d0887c34SMatthew Dillon 			  (uio_copy.uio_resid - ap->a_uio->uio_resid);
888d0887c34SMatthew Dillon 
88926e603edSMatthew Dillon     /*
89026e603edSMatthew Dillon      * Output the write data to the journal.
89126e603edSMatthew Dillon      */
892558b8e00SMatthew Dillon     if (error == 0) {
893797e4fe9SMatthew Dillon 	TAILQ_FOREACH(jrec, &jreclist.list, user_entry) {
89426e603edSMatthew Dillon 	    jrecord_write_cred(jrec, NULL, ap->a_cred);
89526e603edSMatthew Dillon 	    jrecord_write_vnode_ref(jrec, ap->a_vp);
896aa159335SMatthew Dillon 	    save = jrecord_push(jrec, JTYPE_REDO);
89726e603edSMatthew Dillon 	    jrecord_write_uio(jrec, JLEAF_FILEDATA, &uio_copy);
898aa159335SMatthew Dillon 	    jrecord_pop(jrec, save);
899558b8e00SMatthew Dillon 	}
900558b8e00SMatthew Dillon     }
901797e4fe9SMatthew Dillon     jreclist_done(mp, &jreclist, error);
9029578bde0SMatthew Dillon 
9039578bde0SMatthew Dillon     if (uio_copy.uio_iov != &uio_one_iovec)
904efda3bd0SMatthew Dillon 	kfree(uio_copy.uio_iov, M_JOURNAL);
905558b8e00SMatthew Dillon     return (error);
906558b8e00SMatthew Dillon }
907558b8e00SMatthew Dillon 
908b2f7ec6cSMatthew Dillon /*
909b2f7ec6cSMatthew Dillon  * Journal vop_fsync { a_vp, a_waitfor, a_td }
910b2f7ec6cSMatthew Dillon  */
911558b8e00SMatthew Dillon static
912558b8e00SMatthew Dillon int
913558b8e00SMatthew Dillon journal_fsync(struct vop_fsync_args *ap)
914558b8e00SMatthew Dillon {
91526e603edSMatthew Dillon #if 0
916558b8e00SMatthew Dillon     struct mount *mp;
917558b8e00SMatthew Dillon     struct journal *jo;
91826e603edSMatthew Dillon #endif
919558b8e00SMatthew Dillon     int error;
920558b8e00SMatthew Dillon 
921558b8e00SMatthew Dillon     error = vop_journal_operate_ap(&ap->a_head);
92226e603edSMatthew Dillon #if 0
92366a1ddf5SMatthew Dillon     mp = ap->a_head.a_ops->head.vv_mount;
924558b8e00SMatthew Dillon     if (error == 0) {
925558b8e00SMatthew Dillon 	TAILQ_FOREACH(jo, &mp->mnt_jlist, jentry) {
926558b8e00SMatthew Dillon 	    /* XXX synchronize pending journal records */
927558b8e00SMatthew Dillon 	}
928558b8e00SMatthew Dillon     }
92926e603edSMatthew Dillon #endif
930558b8e00SMatthew Dillon     return (error);
931558b8e00SMatthew Dillon }
932558b8e00SMatthew Dillon 
933b2f7ec6cSMatthew Dillon /*
934b2f7ec6cSMatthew Dillon  * Journal vop_putpages { a_vp, a_m, a_count, a_sync, a_rtvals, a_offset }
935143c4f15SMatthew Dillon  *
936143c4f15SMatthew Dillon  * note: a_count is in bytes.
937b2f7ec6cSMatthew Dillon  */
938558b8e00SMatthew Dillon static
939558b8e00SMatthew Dillon int
940558b8e00SMatthew Dillon journal_putpages(struct vop_putpages_args *ap)
941558b8e00SMatthew Dillon {
94226e603edSMatthew Dillon     struct jrecord_list jreclist;
94326e603edSMatthew Dillon     struct jrecord jreccache;
94426e603edSMatthew Dillon     struct jrecord *jrec;
945558b8e00SMatthew Dillon     struct mount *mp;
946aa159335SMatthew Dillon     void *save;
947558b8e00SMatthew Dillon     int error;
948558b8e00SMatthew Dillon 
94966a1ddf5SMatthew Dillon     mp = ap->a_head.a_ops->head.vv_mount;
95026e603edSMatthew Dillon     if (jreclist_init(mp, &jreclist, &jreccache, JTYPE_PUTPAGES) &&
95126e603edSMatthew Dillon 	ap->a_count > 0
95226e603edSMatthew Dillon     ) {
95326e603edSMatthew Dillon 	jreclist_undo_file(&jreclist, ap->a_vp,
95426e603edSMatthew Dillon 			   JRUNDO_FILEDATA|JRUNDO_SIZE|JRUNDO_MTIME,
95526e603edSMatthew Dillon 			   ap->a_offset, btoc(ap->a_count));
95626e603edSMatthew Dillon     }
95726e603edSMatthew Dillon     error = vop_journal_operate_ap(&ap->a_head);
958143c4f15SMatthew Dillon     if (error == 0 && ap->a_count > 0) {
959797e4fe9SMatthew Dillon 	TAILQ_FOREACH(jrec, &jreclist.list, user_entry) {
96026e603edSMatthew Dillon 	    jrecord_write_vnode_ref(jrec, ap->a_vp);
961aa159335SMatthew Dillon 	    save = jrecord_push(jrec, JTYPE_REDO);
96226e603edSMatthew Dillon 	    jrecord_write_pagelist(jrec, JLEAF_FILEDATA, ap->a_m, ap->a_rtvals,
96326e603edSMatthew Dillon 				   btoc(ap->a_count), ap->a_offset);
964aa159335SMatthew Dillon 	    jrecord_pop(jrec, save);
965558b8e00SMatthew Dillon 	}
966558b8e00SMatthew Dillon     }
967797e4fe9SMatthew Dillon     jreclist_done(mp, &jreclist, error);
968558b8e00SMatthew Dillon     return (error);
969558b8e00SMatthew Dillon }
970558b8e00SMatthew Dillon 
971b2f7ec6cSMatthew Dillon /*
972b2f7ec6cSMatthew Dillon  * Journal vop_setacl { a_vp, a_type, a_aclp, a_cred, a_td }
973b2f7ec6cSMatthew Dillon  */
974558b8e00SMatthew Dillon static
975558b8e00SMatthew Dillon int
976558b8e00SMatthew Dillon journal_setacl(struct vop_setacl_args *ap)
977558b8e00SMatthew Dillon {
97826e603edSMatthew Dillon     struct jrecord_list jreclist;
97926e603edSMatthew Dillon     struct jrecord jreccache;
98026e603edSMatthew Dillon     struct jrecord *jrec;
981558b8e00SMatthew Dillon     struct mount *mp;
982558b8e00SMatthew Dillon     int error;
983558b8e00SMatthew Dillon 
98466a1ddf5SMatthew Dillon     mp = ap->a_head.a_ops->head.vv_mount;
98526e603edSMatthew Dillon     jreclist_init(mp, &jreclist, &jreccache, JTYPE_SETACL);
98626e603edSMatthew Dillon     error = vop_journal_operate_ap(&ap->a_head);
987558b8e00SMatthew Dillon     if (error == 0) {
988797e4fe9SMatthew Dillon 	TAILQ_FOREACH(jrec, &jreclist.list, user_entry) {
98926e603edSMatthew Dillon #if 0
99026e603edSMatthew Dillon 	    if ((jo->flags & MC_JOURNAL_WANT_REVERSABLE))
99126e603edSMatthew Dillon 		jrecord_undo_file(jrec, ap->a_vp, JRUNDO_XXX, 0, 0);
99226e603edSMatthew Dillon #endif
99387de5057SMatthew Dillon 	    jrecord_write_cred(jrec, curthread, ap->a_cred);
99426e603edSMatthew Dillon 	    jrecord_write_vnode_ref(jrec, ap->a_vp);
995aa159335SMatthew Dillon #if 0
996aa159335SMatthew Dillon 	    save = jrecord_push(jrec, JTYPE_REDO);
997b2f7ec6cSMatthew Dillon 	    /* XXX type, aclp */
998aa159335SMatthew Dillon 	    jrecord_pop(jrec, save);
999aa159335SMatthew Dillon #endif
1000558b8e00SMatthew Dillon 	}
1001558b8e00SMatthew Dillon     }
1002797e4fe9SMatthew Dillon     jreclist_done(mp, &jreclist, error);
1003558b8e00SMatthew Dillon     return (error);
1004558b8e00SMatthew Dillon }
1005558b8e00SMatthew Dillon 
1006b2f7ec6cSMatthew Dillon /*
1007b2f7ec6cSMatthew Dillon  * Journal vop_setextattr { a_vp, a_name, a_uio, a_cred, a_td }
1008b2f7ec6cSMatthew Dillon  */
1009558b8e00SMatthew Dillon static
1010558b8e00SMatthew Dillon int
1011558b8e00SMatthew Dillon journal_setextattr(struct vop_setextattr_args *ap)
1012558b8e00SMatthew Dillon {
101326e603edSMatthew Dillon     struct jrecord_list jreclist;
101426e603edSMatthew Dillon     struct jrecord jreccache;
101526e603edSMatthew Dillon     struct jrecord *jrec;
1016558b8e00SMatthew Dillon     struct mount *mp;
1017aa159335SMatthew Dillon     void *save;
1018558b8e00SMatthew Dillon     int error;
1019558b8e00SMatthew Dillon 
102066a1ddf5SMatthew Dillon     mp = ap->a_head.a_ops->head.vv_mount;
102126e603edSMatthew Dillon     jreclist_init(mp, &jreclist, &jreccache, JTYPE_SETEXTATTR);
102226e603edSMatthew Dillon     error = vop_journal_operate_ap(&ap->a_head);
1023558b8e00SMatthew Dillon     if (error == 0) {
1024797e4fe9SMatthew Dillon 	TAILQ_FOREACH(jrec, &jreclist.list, user_entry) {
102526e603edSMatthew Dillon #if 0
102626e603edSMatthew Dillon 	    if ((jo->flags & MC_JOURNAL_WANT_REVERSABLE))
102726e603edSMatthew Dillon 		jrecord_undo_file(jrec, ap->a_vp, JRUNDO_XXX, 0, 0);
102826e603edSMatthew Dillon #endif
102987de5057SMatthew Dillon 	    jrecord_write_cred(jrec, curthread, ap->a_cred);
103026e603edSMatthew Dillon 	    jrecord_write_vnode_ref(jrec, ap->a_vp);
103126e603edSMatthew Dillon 	    jrecord_leaf(jrec, JLEAF_ATTRNAME, ap->a_name, strlen(ap->a_name));
1032aa159335SMatthew Dillon 	    save = jrecord_push(jrec, JTYPE_REDO);
103326e603edSMatthew Dillon 	    jrecord_write_uio(jrec, JLEAF_FILEDATA, ap->a_uio);
1034aa159335SMatthew Dillon 	    jrecord_pop(jrec, save);
1035558b8e00SMatthew Dillon 	}
1036558b8e00SMatthew Dillon     }
1037797e4fe9SMatthew Dillon     jreclist_done(mp, &jreclist, error);
1038558b8e00SMatthew Dillon     return (error);
1039558b8e00SMatthew Dillon }
1040558b8e00SMatthew Dillon 
1041b2f7ec6cSMatthew Dillon /*
104228623bf9SMatthew Dillon  * Journal vop_ncreate { a_nch, a_vpp, a_cred, a_vap }
1043b2f7ec6cSMatthew Dillon  */
1044558b8e00SMatthew Dillon static
1045558b8e00SMatthew Dillon int
1046558b8e00SMatthew Dillon journal_ncreate(struct vop_ncreate_args *ap)
1047558b8e00SMatthew Dillon {
104826e603edSMatthew Dillon     struct jrecord_list jreclist;
104926e603edSMatthew Dillon     struct jrecord jreccache;
105026e603edSMatthew Dillon     struct jrecord *jrec;
1051558b8e00SMatthew Dillon     struct mount *mp;
1052aa159335SMatthew Dillon     void *save;
1053558b8e00SMatthew Dillon     int error;
1054558b8e00SMatthew Dillon 
105566a1ddf5SMatthew Dillon     mp = ap->a_head.a_ops->head.vv_mount;
105626e603edSMatthew Dillon     jreclist_init(mp, &jreclist, &jreccache, JTYPE_CREATE);
105726e603edSMatthew Dillon     error = vop_journal_operate_ap(&ap->a_head);
1058558b8e00SMatthew Dillon     if (error == 0) {
1059797e4fe9SMatthew Dillon 	TAILQ_FOREACH(jrec, &jreclist.list, user_entry) {
106026e603edSMatthew Dillon 	    jrecord_write_cred(jrec, NULL, ap->a_cred);
106128623bf9SMatthew Dillon 	    jrecord_write_path(jrec, JLEAF_PATH1, ap->a_nch->ncp);
1062b2f7ec6cSMatthew Dillon 	    if (*ap->a_vpp)
106326e603edSMatthew Dillon 		jrecord_write_vnode_ref(jrec, *ap->a_vpp);
1064aa159335SMatthew Dillon 	    save = jrecord_push(jrec, JTYPE_REDO);
106526e603edSMatthew Dillon 	    jrecord_write_vattr(jrec, ap->a_vap);
1066aa159335SMatthew Dillon 	    jrecord_pop(jrec, save);
1067558b8e00SMatthew Dillon 	}
1068558b8e00SMatthew Dillon     }
1069797e4fe9SMatthew Dillon     jreclist_done(mp, &jreclist, error);
1070558b8e00SMatthew Dillon     return (error);
1071558b8e00SMatthew Dillon }
1072558b8e00SMatthew Dillon 
1073b2f7ec6cSMatthew Dillon /*
107428623bf9SMatthew Dillon  * Journal vop_nmknod { a_nch, a_vpp, a_cred, a_vap }
1075b2f7ec6cSMatthew Dillon  */
1076558b8e00SMatthew Dillon static
1077558b8e00SMatthew Dillon int
1078558b8e00SMatthew Dillon journal_nmknod(struct vop_nmknod_args *ap)
1079558b8e00SMatthew Dillon {
108026e603edSMatthew Dillon     struct jrecord_list jreclist;
108126e603edSMatthew Dillon     struct jrecord jreccache;
108226e603edSMatthew Dillon     struct jrecord *jrec;
1083558b8e00SMatthew Dillon     struct mount *mp;
1084aa159335SMatthew Dillon     void *save;
1085558b8e00SMatthew Dillon     int error;
1086558b8e00SMatthew Dillon 
108766a1ddf5SMatthew Dillon     mp = ap->a_head.a_ops->head.vv_mount;
108826e603edSMatthew Dillon     jreclist_init(mp, &jreclist, &jreccache, JTYPE_MKNOD);
108926e603edSMatthew Dillon     error = vop_journal_operate_ap(&ap->a_head);
1090558b8e00SMatthew Dillon     if (error == 0) {
1091797e4fe9SMatthew Dillon 	TAILQ_FOREACH(jrec, &jreclist.list, user_entry) {
109226e603edSMatthew Dillon 	    jrecord_write_cred(jrec, NULL, ap->a_cred);
109328623bf9SMatthew Dillon 	    jrecord_write_path(jrec, JLEAF_PATH1, ap->a_nch->ncp);
1094aa159335SMatthew Dillon 	    save = jrecord_push(jrec, JTYPE_REDO);
109526e603edSMatthew Dillon 	    jrecord_write_vattr(jrec, ap->a_vap);
1096aa159335SMatthew Dillon 	    jrecord_pop(jrec, save);
1097b2f7ec6cSMatthew Dillon 	    if (*ap->a_vpp)
109826e603edSMatthew Dillon 		jrecord_write_vnode_ref(jrec, *ap->a_vpp);
1099558b8e00SMatthew Dillon 	}
1100558b8e00SMatthew Dillon     }
1101797e4fe9SMatthew Dillon     jreclist_done(mp, &jreclist, error);
1102558b8e00SMatthew Dillon     return (error);
1103558b8e00SMatthew Dillon }
1104558b8e00SMatthew Dillon 
1105b2f7ec6cSMatthew Dillon /*
110628623bf9SMatthew Dillon  * Journal vop_nlink { a_nch, a_vp, a_cred }
1107b2f7ec6cSMatthew Dillon  */
1108558b8e00SMatthew Dillon static
1109558b8e00SMatthew Dillon int
1110558b8e00SMatthew Dillon journal_nlink(struct vop_nlink_args *ap)
1111558b8e00SMatthew Dillon {
111226e603edSMatthew Dillon     struct jrecord_list jreclist;
111326e603edSMatthew Dillon     struct jrecord jreccache;
111426e603edSMatthew Dillon     struct jrecord *jrec;
1115558b8e00SMatthew Dillon     struct mount *mp;
1116aa159335SMatthew Dillon     void *save;
1117558b8e00SMatthew Dillon     int error;
1118558b8e00SMatthew Dillon 
111966a1ddf5SMatthew Dillon     mp = ap->a_head.a_ops->head.vv_mount;
112026e603edSMatthew Dillon     jreclist_init(mp, &jreclist, &jreccache, JTYPE_LINK);
112126e603edSMatthew Dillon     error = vop_journal_operate_ap(&ap->a_head);
1122558b8e00SMatthew Dillon     if (error == 0) {
1123797e4fe9SMatthew Dillon 	TAILQ_FOREACH(jrec, &jreclist.list, user_entry) {
112426e603edSMatthew Dillon 	    jrecord_write_cred(jrec, NULL, ap->a_cred);
112528623bf9SMatthew Dillon 	    jrecord_write_path(jrec, JLEAF_PATH1, ap->a_nch->ncp);
1126b2f7ec6cSMatthew Dillon 	    /* XXX PATH to VP and inode number */
1127f4659a6cSMatthew Dillon 	    /* XXX this call may not record the correct path when
1128f4659a6cSMatthew Dillon 	     * multiple paths are available */
1129aa159335SMatthew Dillon 	    save = jrecord_push(jrec, JTYPE_REDO);
113028623bf9SMatthew Dillon 	    jrecord_write_vnode_link(jrec, ap->a_vp, ap->a_nch->ncp);
1131aa159335SMatthew Dillon 	    jrecord_pop(jrec, save);
1132558b8e00SMatthew Dillon 	}
1133558b8e00SMatthew Dillon     }
1134797e4fe9SMatthew Dillon     jreclist_done(mp, &jreclist, error);
1135558b8e00SMatthew Dillon     return (error);
1136558b8e00SMatthew Dillon }
1137558b8e00SMatthew Dillon 
1138b2f7ec6cSMatthew Dillon /*
113928623bf9SMatthew Dillon  * Journal vop_symlink { a_nch, a_vpp, a_cred, a_vap, a_target }
1140b2f7ec6cSMatthew Dillon  */
1141558b8e00SMatthew Dillon static
1142558b8e00SMatthew Dillon int
1143558b8e00SMatthew Dillon journal_nsymlink(struct vop_nsymlink_args *ap)
1144558b8e00SMatthew Dillon {
114526e603edSMatthew Dillon     struct jrecord_list jreclist;
114626e603edSMatthew Dillon     struct jrecord jreccache;
114726e603edSMatthew Dillon     struct jrecord *jrec;
1148558b8e00SMatthew Dillon     struct mount *mp;
1149aa159335SMatthew Dillon     void *save;
1150558b8e00SMatthew Dillon     int error;
1151558b8e00SMatthew Dillon 
115266a1ddf5SMatthew Dillon     mp = ap->a_head.a_ops->head.vv_mount;
115326e603edSMatthew Dillon     jreclist_init(mp, &jreclist, &jreccache, JTYPE_SYMLINK);
115426e603edSMatthew Dillon     error = vop_journal_operate_ap(&ap->a_head);
1155558b8e00SMatthew Dillon     if (error == 0) {
1156797e4fe9SMatthew Dillon 	TAILQ_FOREACH(jrec, &jreclist.list, user_entry) {
115726e603edSMatthew Dillon 	    jrecord_write_cred(jrec, NULL, ap->a_cred);
115828623bf9SMatthew Dillon 	    jrecord_write_path(jrec, JLEAF_PATH1, ap->a_nch->ncp);
1159aa159335SMatthew Dillon 	    save = jrecord_push(jrec, JTYPE_REDO);
116026e603edSMatthew Dillon 	    jrecord_leaf(jrec, JLEAF_SYMLINKDATA,
1161b2f7ec6cSMatthew Dillon 			ap->a_target, strlen(ap->a_target));
1162aa159335SMatthew Dillon 	    jrecord_pop(jrec, save);
1163b2f7ec6cSMatthew Dillon 	    if (*ap->a_vpp)
116426e603edSMatthew Dillon 		jrecord_write_vnode_ref(jrec, *ap->a_vpp);
1165558b8e00SMatthew Dillon 	}
1166558b8e00SMatthew Dillon     }
1167797e4fe9SMatthew Dillon     jreclist_done(mp, &jreclist, error);
1168558b8e00SMatthew Dillon     return (error);
1169558b8e00SMatthew Dillon }
1170558b8e00SMatthew Dillon 
1171b2f7ec6cSMatthew Dillon /*
117228623bf9SMatthew Dillon  * Journal vop_nwhiteout { a_nch, a_cred, a_flags }
1173b2f7ec6cSMatthew Dillon  */
1174558b8e00SMatthew Dillon static
1175558b8e00SMatthew Dillon int
1176558b8e00SMatthew Dillon journal_nwhiteout(struct vop_nwhiteout_args *ap)
1177558b8e00SMatthew Dillon {
117826e603edSMatthew Dillon     struct jrecord_list jreclist;
117926e603edSMatthew Dillon     struct jrecord jreccache;
118026e603edSMatthew Dillon     struct jrecord *jrec;
1181558b8e00SMatthew Dillon     struct mount *mp;
1182558b8e00SMatthew Dillon     int error;
1183558b8e00SMatthew Dillon 
118466a1ddf5SMatthew Dillon     mp = ap->a_head.a_ops->head.vv_mount;
118526e603edSMatthew Dillon     jreclist_init(mp, &jreclist, &jreccache, JTYPE_WHITEOUT);
118626e603edSMatthew Dillon     error = vop_journal_operate_ap(&ap->a_head);
1187558b8e00SMatthew Dillon     if (error == 0) {
1188797e4fe9SMatthew Dillon 	TAILQ_FOREACH(jrec, &jreclist.list, user_entry) {
118926e603edSMatthew Dillon 	    jrecord_write_cred(jrec, NULL, ap->a_cred);
119028623bf9SMatthew Dillon 	    jrecord_write_path(jrec, JLEAF_PATH1, ap->a_nch->ncp);
1191558b8e00SMatthew Dillon 	}
1192558b8e00SMatthew Dillon     }
1193797e4fe9SMatthew Dillon     jreclist_done(mp, &jreclist, error);
1194558b8e00SMatthew Dillon     return (error);
1195558b8e00SMatthew Dillon }
1196558b8e00SMatthew Dillon 
1197b2f7ec6cSMatthew Dillon /*
119828623bf9SMatthew Dillon  * Journal vop_nremove { a_nch, a_cred }
1199b2f7ec6cSMatthew Dillon  */
1200558b8e00SMatthew Dillon static
1201558b8e00SMatthew Dillon int
1202558b8e00SMatthew Dillon journal_nremove(struct vop_nremove_args *ap)
1203558b8e00SMatthew Dillon {
120426e603edSMatthew Dillon     struct jrecord_list jreclist;
120526e603edSMatthew Dillon     struct jrecord jreccache;
120626e603edSMatthew Dillon     struct jrecord *jrec;
1207558b8e00SMatthew Dillon     struct mount *mp;
1208558b8e00SMatthew Dillon     int error;
1209558b8e00SMatthew Dillon 
121066a1ddf5SMatthew Dillon     mp = ap->a_head.a_ops->head.vv_mount;
121126e603edSMatthew Dillon     if (jreclist_init(mp, &jreclist, &jreccache, JTYPE_REMOVE) &&
121228623bf9SMatthew Dillon 	ap->a_nch->ncp->nc_vp
121326e603edSMatthew Dillon     ) {
121428623bf9SMatthew Dillon 	jreclist_undo_file(&jreclist, ap->a_nch->ncp->nc_vp,
121526e603edSMatthew Dillon 			   JRUNDO_ALL|JRUNDO_GETVP|JRUNDO_CONDLINK, 0, -1);
121626e603edSMatthew Dillon     }
121726e603edSMatthew Dillon     error = vop_journal_operate_ap(&ap->a_head);
1218558b8e00SMatthew Dillon     if (error == 0) {
1219797e4fe9SMatthew Dillon 	TAILQ_FOREACH(jrec, &jreclist.list, user_entry) {
122026e603edSMatthew Dillon 	    jrecord_write_cred(jrec, NULL, ap->a_cred);
122128623bf9SMatthew Dillon 	    jrecord_write_path(jrec, JLEAF_PATH1, ap->a_nch->ncp);
1222558b8e00SMatthew Dillon 	}
1223558b8e00SMatthew Dillon     }
1224797e4fe9SMatthew Dillon     jreclist_done(mp, &jreclist, error);
1225558b8e00SMatthew Dillon     return (error);
1226558b8e00SMatthew Dillon }
12272281065eSMatthew Dillon 
1228b2f7ec6cSMatthew Dillon /*
122928623bf9SMatthew Dillon  * Journal vop_nmkdir { a_nch, a_vpp, a_cred, a_vap }
1230b2f7ec6cSMatthew Dillon  */
12312281065eSMatthew Dillon static
12322281065eSMatthew Dillon int
12332281065eSMatthew Dillon journal_nmkdir(struct vop_nmkdir_args *ap)
12342281065eSMatthew Dillon {
123526e603edSMatthew Dillon     struct jrecord_list jreclist;
123626e603edSMatthew Dillon     struct jrecord jreccache;
123726e603edSMatthew Dillon     struct jrecord *jrec;
123882eaef15SMatthew Dillon     struct mount *mp;
12392281065eSMatthew Dillon     int error;
12402281065eSMatthew Dillon 
124166a1ddf5SMatthew Dillon     mp = ap->a_head.a_ops->head.vv_mount;
124226e603edSMatthew Dillon     jreclist_init(mp, &jreclist, &jreccache, JTYPE_MKDIR);
124326e603edSMatthew Dillon     error = vop_journal_operate_ap(&ap->a_head);
124482eaef15SMatthew Dillon     if (error == 0) {
1245797e4fe9SMatthew Dillon 	TAILQ_FOREACH(jrec, &jreclist.list, user_entry) {
124682eaef15SMatthew Dillon #if 0
124782eaef15SMatthew Dillon 	    if (jo->flags & MC_JOURNAL_WANT_AUDIT) {
124826e603edSMatthew Dillon 		jrecord_write_audit(jrec);
124982eaef15SMatthew Dillon 	    }
125082eaef15SMatthew Dillon #endif
125128623bf9SMatthew Dillon 	    jrecord_write_path(jrec, JLEAF_PATH1, ap->a_nch->ncp);
125226e603edSMatthew Dillon 	    jrecord_write_cred(jrec, NULL, ap->a_cred);
125326e603edSMatthew Dillon 	    jrecord_write_vattr(jrec, ap->a_vap);
125428623bf9SMatthew Dillon 	    jrecord_write_path(jrec, JLEAF_PATH1, ap->a_nch->ncp);
1255b2f7ec6cSMatthew Dillon 	    if (*ap->a_vpp)
125626e603edSMatthew Dillon 		jrecord_write_vnode_ref(jrec, *ap->a_vpp);
125782eaef15SMatthew Dillon 	}
125882eaef15SMatthew Dillon     }
1259797e4fe9SMatthew Dillon     jreclist_done(mp, &jreclist, error);
12602281065eSMatthew Dillon     return (error);
12612281065eSMatthew Dillon }
12622281065eSMatthew Dillon 
1263b2f7ec6cSMatthew Dillon /*
126428623bf9SMatthew Dillon  * Journal vop_nrmdir { a_nch, a_cred }
1265b2f7ec6cSMatthew Dillon  */
1266558b8e00SMatthew Dillon static
1267558b8e00SMatthew Dillon int
1268558b8e00SMatthew Dillon journal_nrmdir(struct vop_nrmdir_args *ap)
1269558b8e00SMatthew Dillon {
127026e603edSMatthew Dillon     struct jrecord_list jreclist;
127126e603edSMatthew Dillon     struct jrecord jreccache;
127226e603edSMatthew Dillon     struct jrecord *jrec;
1273558b8e00SMatthew Dillon     struct mount *mp;
1274558b8e00SMatthew Dillon     int error;
1275558b8e00SMatthew Dillon 
127666a1ddf5SMatthew Dillon     mp = ap->a_head.a_ops->head.vv_mount;
127726e603edSMatthew Dillon     if (jreclist_init(mp, &jreclist, &jreccache, JTYPE_RMDIR)) {
127828623bf9SMatthew Dillon 	jreclist_undo_file(&jreclist, ap->a_nch->ncp->nc_vp,
127926e603edSMatthew Dillon 			   JRUNDO_VATTR|JRUNDO_GETVP, 0, 0);
128026e603edSMatthew Dillon     }
128126e603edSMatthew Dillon     error = vop_journal_operate_ap(&ap->a_head);
1282558b8e00SMatthew Dillon     if (error == 0) {
1283797e4fe9SMatthew Dillon 	TAILQ_FOREACH(jrec, &jreclist.list, user_entry) {
128426e603edSMatthew Dillon 	    jrecord_write_cred(jrec, NULL, ap->a_cred);
128528623bf9SMatthew Dillon 	    jrecord_write_path(jrec, JLEAF_PATH1, ap->a_nch->ncp);
1286558b8e00SMatthew Dillon 	}
1287558b8e00SMatthew Dillon     }
1288797e4fe9SMatthew Dillon     jreclist_done(mp, &jreclist, error);
1289558b8e00SMatthew Dillon     return (error);
1290558b8e00SMatthew Dillon }
1291558b8e00SMatthew Dillon 
1292b2f7ec6cSMatthew Dillon /*
129328623bf9SMatthew Dillon  * Journal vop_nrename { a_fnch, a_tnch, a_cred }
1294b2f7ec6cSMatthew Dillon  */
1295558b8e00SMatthew Dillon static
1296558b8e00SMatthew Dillon int
1297558b8e00SMatthew Dillon journal_nrename(struct vop_nrename_args *ap)
1298558b8e00SMatthew Dillon {
129926e603edSMatthew Dillon     struct jrecord_list jreclist;
130026e603edSMatthew Dillon     struct jrecord jreccache;
130126e603edSMatthew Dillon     struct jrecord *jrec;
1302558b8e00SMatthew Dillon     struct mount *mp;
1303558b8e00SMatthew Dillon     int error;
1304558b8e00SMatthew Dillon 
130566a1ddf5SMatthew Dillon     mp = ap->a_head.a_ops->head.vv_mount;
130626e603edSMatthew Dillon     if (jreclist_init(mp, &jreclist, &jreccache, JTYPE_RENAME) &&
130728623bf9SMatthew Dillon 	ap->a_tnch->ncp->nc_vp
130826e603edSMatthew Dillon     ) {
130928623bf9SMatthew Dillon 	jreclist_undo_file(&jreclist, ap->a_tnch->ncp->nc_vp,
131026e603edSMatthew Dillon 			   JRUNDO_ALL|JRUNDO_GETVP|JRUNDO_CONDLINK, 0, -1);
131126e603edSMatthew Dillon     }
131226e603edSMatthew Dillon     error = vop_journal_operate_ap(&ap->a_head);
1313558b8e00SMatthew Dillon     if (error == 0) {
1314797e4fe9SMatthew Dillon 	TAILQ_FOREACH(jrec, &jreclist.list, user_entry) {
131526e603edSMatthew Dillon 	    jrecord_write_cred(jrec, NULL, ap->a_cred);
131628623bf9SMatthew Dillon 	    jrecord_write_path(jrec, JLEAF_PATH1, ap->a_fnch->ncp);
131728623bf9SMatthew Dillon 	    jrecord_write_path(jrec, JLEAF_PATH2, ap->a_tnch->ncp);
1318558b8e00SMatthew Dillon 	}
1319558b8e00SMatthew Dillon     }
1320797e4fe9SMatthew Dillon     jreclist_done(mp, &jreclist, error);
1321558b8e00SMatthew Dillon     return (error);
1322558b8e00SMatthew Dillon }
1323558b8e00SMatthew Dillon 
1324