xref: /dragonfly/sys/kern/vfs_jops.c (revision 77652cad)
16ddb7618SMatthew Dillon /*
2f56dc967SMatthew Dillon  * Copyright (c) 2004-2006 The DragonFly Project.  All rights reserved.
36ddb7618SMatthew Dillon  *
46ddb7618SMatthew Dillon  * This code is derived from software contributed to The DragonFly Project
56ddb7618SMatthew Dillon  * by Matthew Dillon <dillon@backplane.com>
66ddb7618SMatthew Dillon  *
76ddb7618SMatthew Dillon  * Redistribution and use in source and binary forms, with or without
86ddb7618SMatthew Dillon  * modification, are permitted provided that the following conditions
96ddb7618SMatthew Dillon  * are met:
106ddb7618SMatthew Dillon  *
116ddb7618SMatthew Dillon  * 1. Redistributions of source code must retain the above copyright
126ddb7618SMatthew Dillon  *    notice, this list of conditions and the following disclaimer.
136ddb7618SMatthew Dillon  * 2. Redistributions in binary form must reproduce the above copyright
146ddb7618SMatthew Dillon  *    notice, this list of conditions and the following disclaimer in
156ddb7618SMatthew Dillon  *    the documentation and/or other materials provided with the
166ddb7618SMatthew Dillon  *    distribution.
176ddb7618SMatthew Dillon  * 3. Neither the name of The DragonFly Project nor the names of its
186ddb7618SMatthew Dillon  *    contributors may be used to endorse or promote products derived
196ddb7618SMatthew Dillon  *    from this software without specific, prior written permission.
206ddb7618SMatthew Dillon  *
216ddb7618SMatthew Dillon  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
226ddb7618SMatthew Dillon  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
236ddb7618SMatthew Dillon  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
246ddb7618SMatthew Dillon  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
256ddb7618SMatthew Dillon  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
266ddb7618SMatthew Dillon  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
276ddb7618SMatthew Dillon  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
286ddb7618SMatthew Dillon  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
296ddb7618SMatthew Dillon  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
306ddb7618SMatthew Dillon  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
316ddb7618SMatthew Dillon  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
326ddb7618SMatthew Dillon  * SUCH DAMAGE.
336ddb7618SMatthew Dillon  *
34*77652cadSMatthew Dillon  * $DragonFly: src/sys/kern/vfs_jops.c,v 1.30 2006/09/05 03:48:12 dillon Exp $
352281065eSMatthew Dillon  */
362281065eSMatthew Dillon /*
372281065eSMatthew Dillon  * Each mount point may have zero or more independantly configured journals
382281065eSMatthew Dillon  * attached to it.  Each journal is represented by a memory FIFO and worker
392281065eSMatthew Dillon  * thread.  Journal events are streamed through the FIFO to the thread,
402281065eSMatthew Dillon  * batched up (typically on one-second intervals), and written out by the
412281065eSMatthew Dillon  * thread.
422281065eSMatthew Dillon  *
432281065eSMatthew Dillon  * Journal vnode ops are executed instead of mnt_vn_norm_ops when one or
442281065eSMatthew Dillon  * more journals have been installed on a mount point.  It becomes the
452281065eSMatthew Dillon  * responsibility of the journal op to call the underlying normal op as
462281065eSMatthew Dillon  * appropriate.
476ddb7618SMatthew Dillon  */
486ddb7618SMatthew Dillon #include <sys/param.h>
496ddb7618SMatthew Dillon #include <sys/systm.h>
506ddb7618SMatthew Dillon #include <sys/buf.h>
516ddb7618SMatthew Dillon #include <sys/conf.h>
526ddb7618SMatthew Dillon #include <sys/kernel.h>
5382eaef15SMatthew Dillon #include <sys/queue.h>
546ddb7618SMatthew Dillon #include <sys/lock.h>
556ddb7618SMatthew Dillon #include <sys/malloc.h>
566ddb7618SMatthew Dillon #include <sys/mount.h>
576ddb7618SMatthew Dillon #include <sys/unistd.h>
586ddb7618SMatthew Dillon #include <sys/vnode.h>
596ddb7618SMatthew Dillon #include <sys/poll.h>
602281065eSMatthew Dillon #include <sys/mountctl.h>
61b2f7ec6cSMatthew Dillon #include <sys/journal.h>
622281065eSMatthew Dillon #include <sys/file.h>
63b2f7ec6cSMatthew Dillon #include <sys/proc.h>
649578bde0SMatthew Dillon #include <sys/msfbuf.h>
65500b6a22SMatthew Dillon #include <sys/socket.h>
66500b6a22SMatthew Dillon #include <sys/socketvar.h>
676ddb7618SMatthew Dillon 
686ddb7618SMatthew Dillon #include <machine/limits.h>
696ddb7618SMatthew Dillon 
706ddb7618SMatthew Dillon #include <vm/vm.h>
716ddb7618SMatthew Dillon #include <vm/vm_object.h>
726ddb7618SMatthew Dillon #include <vm/vm_page.h>
736ddb7618SMatthew Dillon #include <vm/vm_pager.h>
746ddb7618SMatthew Dillon #include <vm/vnode_pager.h>
756ddb7618SMatthew Dillon 
762281065eSMatthew Dillon #include <sys/file2.h>
772281065eSMatthew Dillon #include <sys/thread2.h>
782281065eSMatthew Dillon 
792281065eSMatthew Dillon static int journal_attach(struct mount *mp);
802281065eSMatthew Dillon static void journal_detach(struct mount *mp);
812281065eSMatthew Dillon static int journal_install_vfs_journal(struct mount *mp, struct file *fp,
822281065eSMatthew Dillon 			    const struct mountctl_install_journal *info);
83500b6a22SMatthew Dillon static int journal_restart_vfs_journal(struct mount *mp, struct file *fp,
84500b6a22SMatthew Dillon 			    const struct mountctl_restart_journal *info);
852281065eSMatthew Dillon static int journal_remove_vfs_journal(struct mount *mp,
862281065eSMatthew Dillon 			    const struct mountctl_remove_journal *info);
87500b6a22SMatthew Dillon static int journal_restart(struct mount *mp, struct file *fp,
88500b6a22SMatthew Dillon 			    struct journal *jo, int flags);
89432b8263SMatthew Dillon static int journal_destroy(struct mount *mp, struct journal *jo, int flags);
902281065eSMatthew Dillon static int journal_resync_vfs_journal(struct mount *mp, const void *ctl);
9139b13188SMatthew Dillon static int journal_status_vfs_journal(struct mount *mp,
9239b13188SMatthew Dillon 		       const struct mountctl_status_journal *info,
9339b13188SMatthew Dillon 		       struct mountctl_journal_ret_status *rstat,
9439b13188SMatthew Dillon 		       int buflen, int *res);
9582eaef15SMatthew Dillon 
9626e603edSMatthew Dillon static void jrecord_undo_file(struct jrecord *jrec, struct vnode *vp,
9726e603edSMatthew Dillon 			     int jrflags, off_t off, off_t bytes);
9882eaef15SMatthew Dillon 
99558b8e00SMatthew Dillon static int journal_setattr(struct vop_setattr_args *ap);
100558b8e00SMatthew Dillon static int journal_write(struct vop_write_args *ap);
101558b8e00SMatthew Dillon static int journal_fsync(struct vop_fsync_args *ap);
102558b8e00SMatthew Dillon static int journal_putpages(struct vop_putpages_args *ap);
103558b8e00SMatthew Dillon static int journal_setacl(struct vop_setacl_args *ap);
104558b8e00SMatthew Dillon static int journal_setextattr(struct vop_setextattr_args *ap);
105558b8e00SMatthew Dillon static int journal_ncreate(struct vop_ncreate_args *ap);
106558b8e00SMatthew Dillon static int journal_nmknod(struct vop_nmknod_args *ap);
107558b8e00SMatthew Dillon static int journal_nlink(struct vop_nlink_args *ap);
108558b8e00SMatthew Dillon static int journal_nsymlink(struct vop_nsymlink_args *ap);
109558b8e00SMatthew Dillon static int journal_nwhiteout(struct vop_nwhiteout_args *ap);
110558b8e00SMatthew Dillon static int journal_nremove(struct vop_nremove_args *ap);
1112281065eSMatthew Dillon static int journal_nmkdir(struct vop_nmkdir_args *ap);
112558b8e00SMatthew Dillon static int journal_nrmdir(struct vop_nrmdir_args *ap);
113558b8e00SMatthew Dillon static int journal_nrename(struct vop_nrename_args *ap);
1142281065eSMatthew Dillon 
11526e603edSMatthew Dillon #define JRUNDO_SIZE	0x00000001
11626e603edSMatthew Dillon #define JRUNDO_UID	0x00000002
11726e603edSMatthew Dillon #define JRUNDO_GID	0x00000004
11826e603edSMatthew Dillon #define JRUNDO_FSID	0x00000008
11926e603edSMatthew Dillon #define JRUNDO_MODES	0x00000010
12026e603edSMatthew Dillon #define JRUNDO_INUM	0x00000020
12126e603edSMatthew Dillon #define JRUNDO_ATIME	0x00000040
12226e603edSMatthew Dillon #define JRUNDO_MTIME	0x00000080
12326e603edSMatthew Dillon #define JRUNDO_CTIME	0x00000100
12426e603edSMatthew Dillon #define JRUNDO_GEN	0x00000200
12526e603edSMatthew Dillon #define JRUNDO_FLAGS	0x00000400
12626e603edSMatthew Dillon #define JRUNDO_UDEV	0x00000800
127aa159335SMatthew Dillon #define JRUNDO_NLINK	0x00001000
12826e603edSMatthew Dillon #define JRUNDO_FILEDATA	0x00010000
12926e603edSMatthew Dillon #define JRUNDO_GETVP	0x00020000
13026e603edSMatthew Dillon #define JRUNDO_CONDLINK	0x00040000	/* write file data if link count 1 */
13126e603edSMatthew Dillon #define JRUNDO_VATTR	(JRUNDO_SIZE|JRUNDO_UID|JRUNDO_GID|JRUNDO_FSID|\
13226e603edSMatthew Dillon 			 JRUNDO_MODES|JRUNDO_INUM|JRUNDO_ATIME|JRUNDO_MTIME|\
133aa159335SMatthew Dillon 			 JRUNDO_CTIME|JRUNDO_GEN|JRUNDO_FLAGS|JRUNDO_UDEV|\
134aa159335SMatthew Dillon 			 JRUNDO_NLINK)
13526e603edSMatthew Dillon #define JRUNDO_ALL	(JRUNDO_VATTR|JRUNDO_FILEDATA)
13626e603edSMatthew Dillon 
13766a1ddf5SMatthew Dillon static struct vop_ops journal_vnode_vops = {
13866a1ddf5SMatthew Dillon     .vop_default =	vop_journal_operate_ap,
13966a1ddf5SMatthew Dillon     .vop_mountctl =	journal_mountctl,
14066a1ddf5SMatthew Dillon     .vop_setattr =	journal_setattr,
14166a1ddf5SMatthew Dillon     .vop_write =	journal_write,
14266a1ddf5SMatthew Dillon     .vop_fsync =	journal_fsync,
14366a1ddf5SMatthew Dillon     .vop_putpages =	journal_putpages,
14466a1ddf5SMatthew Dillon     .vop_setacl =	journal_setacl,
14566a1ddf5SMatthew Dillon     .vop_setextattr =	journal_setextattr,
14666a1ddf5SMatthew Dillon     .vop_ncreate =	journal_ncreate,
14766a1ddf5SMatthew Dillon     .vop_nmknod =	journal_nmknod,
14866a1ddf5SMatthew Dillon     .vop_nlink =	journal_nlink,
14966a1ddf5SMatthew Dillon     .vop_nsymlink =	journal_nsymlink,
15066a1ddf5SMatthew Dillon     .vop_nwhiteout =	journal_nwhiteout,
15166a1ddf5SMatthew Dillon     .vop_nremove =	journal_nremove,
15266a1ddf5SMatthew Dillon     .vop_nmkdir =	journal_nmkdir,
15366a1ddf5SMatthew Dillon     .vop_nrmdir =	journal_nrmdir,
15466a1ddf5SMatthew Dillon     .vop_nrename =	journal_nrename
1556ddb7618SMatthew Dillon };
1566ddb7618SMatthew Dillon 
15782eaef15SMatthew Dillon static MALLOC_DEFINE(M_JOURNAL, "journal", "Journaling structures");
1582281065eSMatthew Dillon static MALLOC_DEFINE(M_JFIFO, "journal-fifo", "Journal FIFO");
1592281065eSMatthew Dillon 
1606ddb7618SMatthew Dillon int
1612281065eSMatthew Dillon journal_mountctl(struct vop_mountctl_args *ap)
1622281065eSMatthew Dillon {
1632281065eSMatthew Dillon     struct mount *mp;
1642281065eSMatthew Dillon     int error = 0;
1652281065eSMatthew Dillon 
16666a1ddf5SMatthew Dillon     mp = ap->a_head.a_ops->head.vv_mount;
1672281065eSMatthew Dillon     KKASSERT(mp);
1682281065eSMatthew Dillon 
1692281065eSMatthew Dillon     if (mp->mnt_vn_journal_ops == NULL) {
1702281065eSMatthew Dillon 	switch(ap->a_op) {
1712281065eSMatthew Dillon 	case MOUNTCTL_INSTALL_VFS_JOURNAL:
1722281065eSMatthew Dillon 	    error = journal_attach(mp);
1732281065eSMatthew Dillon 	    if (error == 0 && ap->a_ctllen != sizeof(struct mountctl_install_journal))
1742281065eSMatthew Dillon 		error = EINVAL;
1752281065eSMatthew Dillon 	    if (error == 0 && ap->a_fp == NULL)
1762281065eSMatthew Dillon 		error = EBADF;
1772281065eSMatthew Dillon 	    if (error == 0)
1782281065eSMatthew Dillon 		error = journal_install_vfs_journal(mp, ap->a_fp, ap->a_ctl);
1792281065eSMatthew Dillon 	    if (TAILQ_EMPTY(&mp->mnt_jlist))
1802281065eSMatthew Dillon 		journal_detach(mp);
1812281065eSMatthew Dillon 	    break;
182500b6a22SMatthew Dillon 	case MOUNTCTL_RESTART_VFS_JOURNAL:
1832281065eSMatthew Dillon 	case MOUNTCTL_REMOVE_VFS_JOURNAL:
1842281065eSMatthew Dillon 	case MOUNTCTL_RESYNC_VFS_JOURNAL:
18539b13188SMatthew Dillon 	case MOUNTCTL_STATUS_VFS_JOURNAL:
18639b13188SMatthew Dillon 	    error = ENOENT;
1872281065eSMatthew Dillon 	    break;
1882281065eSMatthew Dillon 	default:
1892281065eSMatthew Dillon 	    error = EOPNOTSUPP;
1902281065eSMatthew Dillon 	    break;
1912281065eSMatthew Dillon 	}
1922281065eSMatthew Dillon     } else {
1932281065eSMatthew Dillon 	switch(ap->a_op) {
1942281065eSMatthew Dillon 	case MOUNTCTL_INSTALL_VFS_JOURNAL:
1952281065eSMatthew Dillon 	    if (ap->a_ctllen != sizeof(struct mountctl_install_journal))
1962281065eSMatthew Dillon 		error = EINVAL;
1972281065eSMatthew Dillon 	    if (error == 0 && ap->a_fp == NULL)
1982281065eSMatthew Dillon 		error = EBADF;
1992281065eSMatthew Dillon 	    if (error == 0)
2002281065eSMatthew Dillon 		error = journal_install_vfs_journal(mp, ap->a_fp, ap->a_ctl);
2012281065eSMatthew Dillon 	    break;
202500b6a22SMatthew Dillon 	case MOUNTCTL_RESTART_VFS_JOURNAL:
203500b6a22SMatthew Dillon 	    if (ap->a_ctllen != sizeof(struct mountctl_restart_journal))
204500b6a22SMatthew Dillon 		error = EINVAL;
205500b6a22SMatthew Dillon 	    if (error == 0 && ap->a_fp == NULL)
206500b6a22SMatthew Dillon 		error = EBADF;
207500b6a22SMatthew Dillon 	    if (error == 0)
208500b6a22SMatthew Dillon 		error = journal_restart_vfs_journal(mp, ap->a_fp, ap->a_ctl);
209500b6a22SMatthew Dillon 	    break;
2102281065eSMatthew Dillon 	case MOUNTCTL_REMOVE_VFS_JOURNAL:
2112281065eSMatthew Dillon 	    if (ap->a_ctllen != sizeof(struct mountctl_remove_journal))
2122281065eSMatthew Dillon 		error = EINVAL;
2132281065eSMatthew Dillon 	    if (error == 0)
2142281065eSMatthew Dillon 		error = journal_remove_vfs_journal(mp, ap->a_ctl);
2152281065eSMatthew Dillon 	    if (TAILQ_EMPTY(&mp->mnt_jlist))
2162281065eSMatthew Dillon 		journal_detach(mp);
2172281065eSMatthew Dillon 	    break;
2182281065eSMatthew Dillon 	case MOUNTCTL_RESYNC_VFS_JOURNAL:
2192281065eSMatthew Dillon 	    if (ap->a_ctllen != 0)
2202281065eSMatthew Dillon 		error = EINVAL;
2212281065eSMatthew Dillon 	    error = journal_resync_vfs_journal(mp, ap->a_ctl);
2222281065eSMatthew Dillon 	    break;
22339b13188SMatthew Dillon 	case MOUNTCTL_STATUS_VFS_JOURNAL:
22439b13188SMatthew Dillon 	    if (ap->a_ctllen != sizeof(struct mountctl_status_journal))
22539b13188SMatthew Dillon 		error = EINVAL;
22639b13188SMatthew Dillon 	    if (error == 0) {
22739b13188SMatthew Dillon 		error = journal_status_vfs_journal(mp, ap->a_ctl,
22839b13188SMatthew Dillon 					ap->a_buf, ap->a_buflen, ap->a_res);
22939b13188SMatthew Dillon 	    }
23039b13188SMatthew Dillon 	    break;
2312281065eSMatthew Dillon 	default:
2322281065eSMatthew Dillon 	    error = EOPNOTSUPP;
2332281065eSMatthew Dillon 	    break;
2342281065eSMatthew Dillon 	}
2352281065eSMatthew Dillon     }
2362281065eSMatthew Dillon     return (error);
2372281065eSMatthew Dillon }
2382281065eSMatthew Dillon 
2392281065eSMatthew Dillon /*
2402281065eSMatthew Dillon  * High level mount point setup.  When a
2412281065eSMatthew Dillon  */
2422281065eSMatthew Dillon static int
2436ddb7618SMatthew Dillon journal_attach(struct mount *mp)
2446ddb7618SMatthew Dillon {
245797e4fe9SMatthew Dillon     KKASSERT(mp->mnt_jbitmap == NULL);
24666a1ddf5SMatthew Dillon     vfs_add_vnodeops(mp, &journal_vnode_vops, &mp->mnt_vn_journal_ops);
247efda3bd0SMatthew Dillon     mp->mnt_jbitmap = kmalloc(JREC_STREAMID_JMAX/8, M_JOURNAL, M_WAITOK|M_ZERO);
248797e4fe9SMatthew Dillon     mp->mnt_streamid = JREC_STREAMID_JMIN;
2496ddb7618SMatthew Dillon     return(0);
2506ddb7618SMatthew Dillon }
2516ddb7618SMatthew Dillon 
2522281065eSMatthew Dillon static void
2536ddb7618SMatthew Dillon journal_detach(struct mount *mp)
2546ddb7618SMatthew Dillon {
255797e4fe9SMatthew Dillon     KKASSERT(mp->mnt_jbitmap != NULL);
2566ddb7618SMatthew Dillon     if (mp->mnt_vn_journal_ops)
25766a1ddf5SMatthew Dillon 	vfs_rm_vnodeops(mp, &journal_vnode_vops, &mp->mnt_vn_journal_ops);
258efda3bd0SMatthew Dillon     kfree(mp->mnt_jbitmap, M_JOURNAL);
259797e4fe9SMatthew Dillon     mp->mnt_jbitmap = NULL;
2606ddb7618SMatthew Dillon }
2616ddb7618SMatthew Dillon 
2622281065eSMatthew Dillon /*
26382eaef15SMatthew Dillon  * Install a journal on a mount point.  Each journal has an associated worker
26482eaef15SMatthew Dillon  * thread which is responsible for buffering and spooling the data to the
26582eaef15SMatthew Dillon  * target.  A mount point may have multiple journals attached to it.  An
26682eaef15SMatthew Dillon  * initial start record is generated when the journal is associated.
2672281065eSMatthew Dillon  */
2682281065eSMatthew Dillon static int
2692281065eSMatthew Dillon journal_install_vfs_journal(struct mount *mp, struct file *fp,
2702281065eSMatthew Dillon 			    const struct mountctl_install_journal *info)
2712281065eSMatthew Dillon {
2722281065eSMatthew Dillon     struct journal *jo;
27382eaef15SMatthew Dillon     struct jrecord jrec;
2742281065eSMatthew Dillon     int error = 0;
2752281065eSMatthew Dillon     int size;
2762281065eSMatthew Dillon 
277efda3bd0SMatthew Dillon     jo = kmalloc(sizeof(struct journal), M_JOURNAL, M_WAITOK|M_ZERO);
2782281065eSMatthew Dillon     bcopy(info->id, jo->id, sizeof(jo->id));
279432b8263SMatthew Dillon     jo->flags = info->flags & ~(MC_JOURNAL_WACTIVE | MC_JOURNAL_RACTIVE |
280432b8263SMatthew Dillon 				MC_JOURNAL_STOP_REQ);
2812281065eSMatthew Dillon 
2822281065eSMatthew Dillon     /*
2832281065eSMatthew Dillon      * Memory FIFO size, round to nearest power of 2
2842281065eSMatthew Dillon      */
28582eaef15SMatthew Dillon     if (info->membufsize) {
2862281065eSMatthew Dillon 	if (info->membufsize < 65536)
2872281065eSMatthew Dillon 	    size = 65536;
2882281065eSMatthew Dillon 	else if (info->membufsize > 128 * 1024 * 1024)
2892281065eSMatthew Dillon 	    size = 128 * 1024 * 1024;
2902281065eSMatthew Dillon 	else
2912281065eSMatthew Dillon 	    size = (int)info->membufsize;
2922281065eSMatthew Dillon     } else {
2932281065eSMatthew Dillon 	size = 1024 * 1024;
2942281065eSMatthew Dillon     }
2952281065eSMatthew Dillon     jo->fifo.size = 1;
2962281065eSMatthew Dillon     while (jo->fifo.size < size)
2972281065eSMatthew Dillon 	jo->fifo.size <<= 1;
2982281065eSMatthew Dillon 
2992281065eSMatthew Dillon     /*
3002281065eSMatthew Dillon      * Other parameters.  If not specified the starting transaction id
3012281065eSMatthew Dillon      * will be the current date.
3022281065eSMatthew Dillon      */
30382eaef15SMatthew Dillon     if (info->transid) {
3042281065eSMatthew Dillon 	jo->transid = info->transid;
3052281065eSMatthew Dillon     } else {
3062281065eSMatthew Dillon 	struct timespec ts;
3072281065eSMatthew Dillon 	getnanotime(&ts);
3082281065eSMatthew Dillon 	jo->transid = ((int64_t)ts.tv_sec << 30) | ts.tv_nsec;
3092281065eSMatthew Dillon     }
3102281065eSMatthew Dillon 
3112281065eSMatthew Dillon     jo->fp = fp;
3122281065eSMatthew Dillon 
3132281065eSMatthew Dillon     /*
3142281065eSMatthew Dillon      * Allocate the memory FIFO
3152281065eSMatthew Dillon      */
3162281065eSMatthew Dillon     jo->fifo.mask = jo->fifo.size - 1;
317efda3bd0SMatthew Dillon     jo->fifo.membase = kmalloc(jo->fifo.size, M_JFIFO, M_WAITOK|M_ZERO|M_NULLOK);
3182281065eSMatthew Dillon     if (jo->fifo.membase == NULL)
3192281065eSMatthew Dillon 	error = ENOMEM;
3202281065eSMatthew Dillon 
32182eaef15SMatthew Dillon     /*
3223119bac5SMatthew Dillon      * Create the worker threads and generate the association record.
32382eaef15SMatthew Dillon      */
3242281065eSMatthew Dillon     if (error) {
325efda3bd0SMatthew Dillon 	kfree(jo, M_JOURNAL);
3262281065eSMatthew Dillon     } else {
3272281065eSMatthew Dillon 	fhold(fp);
328500b6a22SMatthew Dillon 	journal_create_threads(jo);
32982eaef15SMatthew Dillon 	jrecord_init(jo, &jrec, JREC_STREAMID_DISCONT);
33082eaef15SMatthew Dillon 	jrecord_write(&jrec, JTYPE_ASSOCIATE, 0);
33182eaef15SMatthew Dillon 	jrecord_done(&jrec, 0);
3322281065eSMatthew Dillon 	TAILQ_INSERT_TAIL(&mp->mnt_jlist, jo, jentry);
3332281065eSMatthew Dillon     }
3342281065eSMatthew Dillon     return(error);
3352281065eSMatthew Dillon }
3362281065eSMatthew Dillon 
33782eaef15SMatthew Dillon /*
338500b6a22SMatthew Dillon  * Restart a journal with a new descriptor.   The existing reader and writer
339500b6a22SMatthew Dillon  * threads are terminated and a new descriptor is associated with the
340500b6a22SMatthew Dillon  * journal.  The FIFO rindex is reset to xindex and the threads are then
341500b6a22SMatthew Dillon  * restarted.
342500b6a22SMatthew Dillon  */
343500b6a22SMatthew Dillon static int
344500b6a22SMatthew Dillon journal_restart_vfs_journal(struct mount *mp, struct file *fp,
345500b6a22SMatthew Dillon 			   const struct mountctl_restart_journal *info)
346500b6a22SMatthew Dillon {
347500b6a22SMatthew Dillon     struct journal *jo;
348500b6a22SMatthew Dillon     int error;
349500b6a22SMatthew Dillon 
350500b6a22SMatthew Dillon     TAILQ_FOREACH(jo, &mp->mnt_jlist, jentry) {
351500b6a22SMatthew Dillon 	if (bcmp(jo->id, info->id, sizeof(jo->id)) == 0)
352500b6a22SMatthew Dillon 	    break;
353500b6a22SMatthew Dillon     }
354500b6a22SMatthew Dillon     if (jo)
355500b6a22SMatthew Dillon 	error = journal_restart(mp, fp, jo, info->flags);
356500b6a22SMatthew Dillon     else
357500b6a22SMatthew Dillon 	error = EINVAL;
358500b6a22SMatthew Dillon     return (error);
359500b6a22SMatthew Dillon }
360500b6a22SMatthew Dillon 
361500b6a22SMatthew Dillon static int
362500b6a22SMatthew Dillon journal_restart(struct mount *mp, struct file *fp,
363500b6a22SMatthew Dillon 		struct journal *jo, int flags)
364500b6a22SMatthew Dillon {
365500b6a22SMatthew Dillon     /*
366500b6a22SMatthew Dillon      * XXX lock the jo
367500b6a22SMatthew Dillon      */
368500b6a22SMatthew Dillon 
369500b6a22SMatthew Dillon #if 0
370500b6a22SMatthew Dillon     /*
371500b6a22SMatthew Dillon      * Record the fact that we are doing a restart in the journal.
372500b6a22SMatthew Dillon      * XXX it isn't safe to do this if the journal is being restarted
373500b6a22SMatthew Dillon      * because it was locked up and the writer thread has already exited.
374500b6a22SMatthew Dillon      */
375500b6a22SMatthew Dillon     jrecord_init(jo, &jrec, JREC_STREAMID_RESTART);
376500b6a22SMatthew Dillon     jrecord_write(&jrec, JTYPE_DISASSOCIATE, 0);
377500b6a22SMatthew Dillon     jrecord_done(&jrec, 0);
378500b6a22SMatthew Dillon #endif
379500b6a22SMatthew Dillon 
380500b6a22SMatthew Dillon     /*
381500b6a22SMatthew Dillon      * Stop the reader and writer threads and clean up the current
382500b6a22SMatthew Dillon      * descriptor.
383500b6a22SMatthew Dillon      */
384500b6a22SMatthew Dillon     printf("RESTART WITH FP %p KILLING %p\n", fp, jo->fp);
385500b6a22SMatthew Dillon     journal_destroy_threads(jo, flags);
386500b6a22SMatthew Dillon 
387500b6a22SMatthew Dillon     if (jo->fp)
3889f87144fSMatthew Dillon 	fdrop(jo->fp);
389500b6a22SMatthew Dillon 
390500b6a22SMatthew Dillon     /*
391500b6a22SMatthew Dillon      * Associate the new descriptor, reset the FIFO index, and recreate
392500b6a22SMatthew Dillon      * the threads.
393500b6a22SMatthew Dillon      */
394500b6a22SMatthew Dillon     fhold(fp);
395500b6a22SMatthew Dillon     jo->fp = fp;
396500b6a22SMatthew Dillon     jo->fifo.rindex = jo->fifo.xindex;
397500b6a22SMatthew Dillon     journal_create_threads(jo);
398500b6a22SMatthew Dillon 
399500b6a22SMatthew Dillon     return(0);
400500b6a22SMatthew Dillon }
401500b6a22SMatthew Dillon 
402500b6a22SMatthew Dillon /*
40382eaef15SMatthew Dillon  * Disassociate a journal from a mount point and terminate its worker thread.
40482eaef15SMatthew Dillon  * A final termination record is written out before the file pointer is
40582eaef15SMatthew Dillon  * dropped.
40682eaef15SMatthew Dillon  */
4072281065eSMatthew Dillon static int
40882eaef15SMatthew Dillon journal_remove_vfs_journal(struct mount *mp,
40982eaef15SMatthew Dillon 			   const struct mountctl_remove_journal *info)
4102281065eSMatthew Dillon {
4112281065eSMatthew Dillon     struct journal *jo;
4122281065eSMatthew Dillon     int error;
4132281065eSMatthew Dillon 
4142281065eSMatthew Dillon     TAILQ_FOREACH(jo, &mp->mnt_jlist, jentry) {
4152281065eSMatthew Dillon 	if (bcmp(jo->id, info->id, sizeof(jo->id)) == 0)
4162281065eSMatthew Dillon 	    break;
4172281065eSMatthew Dillon     }
418432b8263SMatthew Dillon     if (jo)
419432b8263SMatthew Dillon 	error = journal_destroy(mp, jo, info->flags);
420432b8263SMatthew Dillon     else
421432b8263SMatthew Dillon 	error = EINVAL;
422432b8263SMatthew Dillon     return (error);
423432b8263SMatthew Dillon }
424432b8263SMatthew Dillon 
425432b8263SMatthew Dillon /*
426432b8263SMatthew Dillon  * Remove all journals associated with a mount point.  Usually called
427432b8263SMatthew Dillon  * by the umount code.
428432b8263SMatthew Dillon  */
429432b8263SMatthew Dillon void
430432b8263SMatthew Dillon journal_remove_all_journals(struct mount *mp, int flags)
431432b8263SMatthew Dillon {
432432b8263SMatthew Dillon     struct journal *jo;
433432b8263SMatthew Dillon 
434432b8263SMatthew Dillon     while ((jo = TAILQ_FIRST(&mp->mnt_jlist)) != NULL) {
435432b8263SMatthew Dillon 	journal_destroy(mp, jo, flags);
436432b8263SMatthew Dillon     }
437432b8263SMatthew Dillon }
438432b8263SMatthew Dillon 
439432b8263SMatthew Dillon static int
440432b8263SMatthew Dillon journal_destroy(struct mount *mp, struct journal *jo, int flags)
441432b8263SMatthew Dillon {
442432b8263SMatthew Dillon     struct jrecord jrec;
443432b8263SMatthew Dillon 
4442281065eSMatthew Dillon     TAILQ_REMOVE(&mp->mnt_jlist, jo, jentry);
44582eaef15SMatthew Dillon 
44682eaef15SMatthew Dillon     jrecord_init(jo, &jrec, JREC_STREAMID_DISCONT);
44782eaef15SMatthew Dillon     jrecord_write(&jrec, JTYPE_DISASSOCIATE, 0);
44882eaef15SMatthew Dillon     jrecord_done(&jrec, 0);
44982eaef15SMatthew Dillon 
450500b6a22SMatthew Dillon     journal_destroy_threads(jo, flags);
451500b6a22SMatthew Dillon 
4522281065eSMatthew Dillon     if (jo->fp)
4539f87144fSMatthew Dillon 	fdrop(jo->fp);
4542281065eSMatthew Dillon     if (jo->fifo.membase)
455efda3bd0SMatthew Dillon 	kfree(jo->fifo.membase, M_JFIFO);
456efda3bd0SMatthew Dillon     kfree(jo, M_JOURNAL);
457797e4fe9SMatthew Dillon 
458432b8263SMatthew Dillon     return(0);
4592281065eSMatthew Dillon }
4602281065eSMatthew Dillon 
4612281065eSMatthew Dillon static int
4622281065eSMatthew Dillon journal_resync_vfs_journal(struct mount *mp, const void *ctl)
4632281065eSMatthew Dillon {
4642281065eSMatthew Dillon     return(EINVAL);
4652281065eSMatthew Dillon }
4662281065eSMatthew Dillon 
46739b13188SMatthew Dillon static int
46839b13188SMatthew Dillon journal_status_vfs_journal(struct mount *mp,
46939b13188SMatthew Dillon 		       const struct mountctl_status_journal *info,
47039b13188SMatthew Dillon 		       struct mountctl_journal_ret_status *rstat,
47139b13188SMatthew Dillon 		       int buflen, int *res)
47239b13188SMatthew Dillon {
47339b13188SMatthew Dillon     struct journal *jo;
47439b13188SMatthew Dillon     int error = 0;
47539b13188SMatthew Dillon     int index;
47639b13188SMatthew Dillon 
47739b13188SMatthew Dillon     index = 0;
47839b13188SMatthew Dillon     *res = 0;
47939b13188SMatthew Dillon     TAILQ_FOREACH(jo, &mp->mnt_jlist, jentry) {
48039b13188SMatthew Dillon 	if (info->index == MC_JOURNAL_INDEX_ID) {
48139b13188SMatthew Dillon 	    if (bcmp(jo->id, info->id, sizeof(jo->id)) != 0)
48239b13188SMatthew Dillon 		continue;
48339b13188SMatthew Dillon 	} else if (info->index >= 0) {
48439b13188SMatthew Dillon 	    if (info->index < index)
48539b13188SMatthew Dillon 		continue;
48639b13188SMatthew Dillon 	} else if (info->index != MC_JOURNAL_INDEX_ALL) {
48739b13188SMatthew Dillon 	    continue;
48839b13188SMatthew Dillon 	}
48939b13188SMatthew Dillon 	if (buflen < sizeof(*rstat)) {
49039b13188SMatthew Dillon 	    if (*res)
49139b13188SMatthew Dillon 		rstat[-1].flags |= MC_JOURNAL_STATUS_MORETOCOME;
49239b13188SMatthew Dillon 	    else
49339b13188SMatthew Dillon 		error = EINVAL;
49439b13188SMatthew Dillon 	    break;
49539b13188SMatthew Dillon 	}
49639b13188SMatthew Dillon 	bzero(rstat, sizeof(*rstat));
49739b13188SMatthew Dillon 	rstat->recsize = sizeof(*rstat);
49839b13188SMatthew Dillon 	bcopy(jo->id, rstat->id, sizeof(jo->id));
49939b13188SMatthew Dillon 	rstat->index = index;
50039b13188SMatthew Dillon 	rstat->membufsize = jo->fifo.size;
5013119bac5SMatthew Dillon 	rstat->membufused = jo->fifo.windex - jo->fifo.xindex;
5023119bac5SMatthew Dillon 	rstat->membufunacked = jo->fifo.rindex - jo->fifo.xindex;
50339b13188SMatthew Dillon 	rstat->bytessent = jo->total_acked;
5043119bac5SMatthew Dillon 	rstat->fifostalls = jo->fifostalls;
50539b13188SMatthew Dillon 	++rstat;
50639b13188SMatthew Dillon 	++index;
50739b13188SMatthew Dillon 	*res += sizeof(*rstat);
50839b13188SMatthew Dillon 	buflen -= sizeof(*rstat);
50939b13188SMatthew Dillon     }
51039b13188SMatthew Dillon     return(error);
51139b13188SMatthew Dillon }
512432b8263SMatthew Dillon 
51382eaef15SMatthew Dillon /************************************************************************
51426e603edSMatthew Dillon  *			PARALLEL TRANSACTION SUPPORT ROUTINES		*
51526e603edSMatthew Dillon  ************************************************************************
51626e603edSMatthew Dillon  *
51726e603edSMatthew Dillon  * JRECLIST_*() - routines which create and iterate over jrecord structures,
51826e603edSMatthew Dillon  *		  because a mount point may have multiple attached journals.
51926e603edSMatthew Dillon  */
52026e603edSMatthew Dillon 
52126e603edSMatthew Dillon /*
52226e603edSMatthew Dillon  * Initialize the passed jrecord_list and create a jrecord for each
52326e603edSMatthew Dillon  * journal we need to write to.  Unnecessary mallocs are avoided by
52426e603edSMatthew Dillon  * using the passed jrecord structure as the first jrecord in the list.
52526e603edSMatthew Dillon  * A starting transaction is pushed for each jrecord.
52626e603edSMatthew Dillon  *
52726e603edSMatthew Dillon  * Returns non-zero if any of the journals require undo records.
52826e603edSMatthew Dillon  */
52926e603edSMatthew Dillon static
53026e603edSMatthew Dillon int
53126e603edSMatthew Dillon jreclist_init(struct mount *mp, struct jrecord_list *jreclist,
53226e603edSMatthew Dillon 	      struct jrecord *jreccache, int16_t rectype)
53326e603edSMatthew Dillon {
53426e603edSMatthew Dillon     struct journal *jo;
53526e603edSMatthew Dillon     struct jrecord *jrec;
536797e4fe9SMatthew Dillon     int wantrev;
537797e4fe9SMatthew Dillon     int count;
538797e4fe9SMatthew Dillon     int16_t streamid;
53926e603edSMatthew Dillon 
540797e4fe9SMatthew Dillon     TAILQ_INIT(&jreclist->list);
541797e4fe9SMatthew Dillon 
542797e4fe9SMatthew Dillon     /*
543797e4fe9SMatthew Dillon      * Select the stream ID to use for the transaction.  We must select
544797e4fe9SMatthew Dillon      * a stream ID that is not currently in use by some other parallel
545797e4fe9SMatthew Dillon      * transaction.
546797e4fe9SMatthew Dillon      *
547797e4fe9SMatthew Dillon      * Don't bother calculating the next streamid when reassigning
548797e4fe9SMatthew Dillon      * mnt_streamid, since parallel transactions are fairly rare.  This
549797e4fe9SMatthew Dillon      * also allows someone observing the raw records to clearly see
550797e4fe9SMatthew Dillon      * when parallel transactions occur.
551797e4fe9SMatthew Dillon      */
552797e4fe9SMatthew Dillon     streamid = mp->mnt_streamid;
553797e4fe9SMatthew Dillon     count = 0;
554797e4fe9SMatthew Dillon     while (mp->mnt_jbitmap[streamid >> 3] & (1 << (streamid & 7))) {
555797e4fe9SMatthew Dillon 	if (++streamid == JREC_STREAMID_JMAX)
556797e4fe9SMatthew Dillon 		streamid = JREC_STREAMID_JMIN;
557797e4fe9SMatthew Dillon 	if (++count == JREC_STREAMID_JMAX - JREC_STREAMID_JMIN) {
558797e4fe9SMatthew Dillon 		printf("jreclist_init: all streamid's in use! sleeping\n");
559797e4fe9SMatthew Dillon 		tsleep(jreclist, 0, "jsidfl", hz * 10);
560797e4fe9SMatthew Dillon 		count = 0;
561797e4fe9SMatthew Dillon 	}
562797e4fe9SMatthew Dillon     }
563797e4fe9SMatthew Dillon     mp->mnt_jbitmap[streamid >> 3] |= 1 << (streamid & 7);
564797e4fe9SMatthew Dillon     mp->mnt_streamid = streamid;
565797e4fe9SMatthew Dillon     jreclist->streamid = streamid;
566797e4fe9SMatthew Dillon 
567797e4fe9SMatthew Dillon     /*
568797e4fe9SMatthew Dillon      * Now initialize a stream on each journal.
569797e4fe9SMatthew Dillon      */
570797e4fe9SMatthew Dillon     count = 0;
571797e4fe9SMatthew Dillon     wantrev = 0;
57226e603edSMatthew Dillon     TAILQ_FOREACH(jo, &mp->mnt_jlist, jentry) {
57326e603edSMatthew Dillon 	if (count == 0)
57426e603edSMatthew Dillon 	    jrec = jreccache;
57526e603edSMatthew Dillon 	else
576efda3bd0SMatthew Dillon 	    jrec = kmalloc(sizeof(*jrec), M_JOURNAL, M_WAITOK);
577797e4fe9SMatthew Dillon 	jrecord_init(jo, jrec, streamid);
57826e603edSMatthew Dillon 	jrec->user_save = jrecord_push(jrec, rectype);
579797e4fe9SMatthew Dillon 	TAILQ_INSERT_TAIL(&jreclist->list, jrec, user_entry);
58026e603edSMatthew Dillon 	if (jo->flags & MC_JOURNAL_WANT_REVERSABLE)
58126e603edSMatthew Dillon 	    wantrev = 1;
58226e603edSMatthew Dillon 	++count;
58326e603edSMatthew Dillon     }
58426e603edSMatthew Dillon     return(wantrev);
58526e603edSMatthew Dillon }
58626e603edSMatthew Dillon 
58726e603edSMatthew Dillon /*
58826e603edSMatthew Dillon  * Terminate the journaled transactions started by jreclist_init().  If
58926e603edSMatthew Dillon  * an error occured, the transaction records will be aborted.
59026e603edSMatthew Dillon  */
59126e603edSMatthew Dillon static
59226e603edSMatthew Dillon void
593797e4fe9SMatthew Dillon jreclist_done(struct mount *mp, struct jrecord_list *jreclist, int error)
59426e603edSMatthew Dillon {
59526e603edSMatthew Dillon     struct jrecord *jrec;
59626e603edSMatthew Dillon     int count;
59726e603edSMatthew Dillon 
598797e4fe9SMatthew Dillon     /*
599797e4fe9SMatthew Dillon      * Cleanup the jrecord state on each journal.
600797e4fe9SMatthew Dillon      */
601797e4fe9SMatthew Dillon     TAILQ_FOREACH(jrec, &jreclist->list, user_entry) {
60226e603edSMatthew Dillon 	jrecord_pop(jrec, jrec->user_save);
60326e603edSMatthew Dillon 	jrecord_done(jrec, error);
60426e603edSMatthew Dillon     }
605797e4fe9SMatthew Dillon 
606797e4fe9SMatthew Dillon     /*
607797e4fe9SMatthew Dillon      * Free allocated jrec's (the first is always supplied)
608797e4fe9SMatthew Dillon      */
60926e603edSMatthew Dillon     count = 0;
610797e4fe9SMatthew Dillon     while ((jrec = TAILQ_FIRST(&jreclist->list)) != NULL) {
611797e4fe9SMatthew Dillon 	TAILQ_REMOVE(&jreclist->list, jrec, user_entry);
61226e603edSMatthew Dillon 	if (count)
613efda3bd0SMatthew Dillon 	    kfree(jrec, M_JOURNAL);
61426e603edSMatthew Dillon 	++count;
61526e603edSMatthew Dillon     }
616797e4fe9SMatthew Dillon 
617797e4fe9SMatthew Dillon     /*
618797e4fe9SMatthew Dillon      * Clear the streamid so it can be reused.
619797e4fe9SMatthew Dillon      */
620797e4fe9SMatthew Dillon     mp->mnt_jbitmap[jreclist->streamid >> 3] &= ~(1 << (jreclist->streamid & 7));
62126e603edSMatthew Dillon }
62226e603edSMatthew Dillon 
62326e603edSMatthew Dillon /*
62426e603edSMatthew Dillon  * This procedure writes out UNDO records for available reversable
62526e603edSMatthew Dillon  * journals.
62626e603edSMatthew Dillon  *
62726e603edSMatthew Dillon  * XXX could use improvement.  There is no need to re-read the file
62826e603edSMatthew Dillon  * for each journal.
62926e603edSMatthew Dillon  */
63026e603edSMatthew Dillon static
63126e603edSMatthew Dillon void
63226e603edSMatthew Dillon jreclist_undo_file(struct jrecord_list *jreclist, struct vnode *vp,
63326e603edSMatthew Dillon 		   int jrflags, off_t off, off_t bytes)
63426e603edSMatthew Dillon {
63526e603edSMatthew Dillon     struct jrecord *jrec;
63626e603edSMatthew Dillon     int error;
63726e603edSMatthew Dillon 
63826e603edSMatthew Dillon     error = 0;
63926e603edSMatthew Dillon     if (jrflags & JRUNDO_GETVP)
64087de5057SMatthew Dillon 	error = vget(vp, LK_SHARED);
64126e603edSMatthew Dillon     if (error == 0) {
642797e4fe9SMatthew Dillon 	TAILQ_FOREACH(jrec, &jreclist->list, user_entry) {
64326e603edSMatthew Dillon 	    if (jrec->jo->flags & MC_JOURNAL_WANT_REVERSABLE) {
64426e603edSMatthew Dillon 		jrecord_undo_file(jrec, vp, jrflags, off, bytes);
64526e603edSMatthew Dillon 	    }
64626e603edSMatthew Dillon 	}
64726e603edSMatthew Dillon     }
64826e603edSMatthew Dillon     if (error == 0 && jrflags & JRUNDO_GETVP)
64926e603edSMatthew Dillon 	vput(vp);
65026e603edSMatthew Dillon }
65126e603edSMatthew Dillon 
65226e603edSMatthew Dillon /************************************************************************
65326e603edSMatthew Dillon  *			LOW LEVEL UNDO SUPPORT ROUTINE			*
65426e603edSMatthew Dillon  ************************************************************************
65526e603edSMatthew Dillon  *
65626e603edSMatthew Dillon  * This function is used to support UNDO records.  It will generate an
65726e603edSMatthew Dillon  * appropriate record with the requested portion of the file data.  Note
65826e603edSMatthew Dillon  * that file data is only recorded if JRUNDO_FILEDATA is passed.  If bytes
65926e603edSMatthew Dillon  * is -1, it will be set to the size of the file.
66026e603edSMatthew Dillon  */
66126e603edSMatthew Dillon static void
66226e603edSMatthew Dillon jrecord_undo_file(struct jrecord *jrec, struct vnode *vp, int jrflags,
66326e603edSMatthew Dillon 		  off_t off, off_t bytes)
66426e603edSMatthew Dillon {
66526e603edSMatthew Dillon     struct vattr attr;
66626e603edSMatthew Dillon     void *save1; /* warning, save pointers do not always remain valid */
66726e603edSMatthew Dillon     void *save2;
66826e603edSMatthew Dillon     int error;
66926e603edSMatthew Dillon 
67026e603edSMatthew Dillon     /*
67126e603edSMatthew Dillon      * Setup.  Start the UNDO record, obtain a shared lock on the vnode,
67226e603edSMatthew Dillon      * and retrieve attribute info.
67326e603edSMatthew Dillon      */
67426e603edSMatthew Dillon     save1 = jrecord_push(jrec, JTYPE_UNDO);
67587de5057SMatthew Dillon     error = VOP_GETATTR(vp, &attr);
67626e603edSMatthew Dillon     if (error)
67726e603edSMatthew Dillon 	goto done;
67826e603edSMatthew Dillon 
67926e603edSMatthew Dillon     /*
68026e603edSMatthew Dillon      * Generate UNDO records as requested.
68126e603edSMatthew Dillon      */
68226e603edSMatthew Dillon     if (jrflags & JRUNDO_VATTR) {
68326e603edSMatthew Dillon 	save2 = jrecord_push(jrec, JTYPE_VATTR);
68426e603edSMatthew Dillon 	jrecord_leaf(jrec, JLEAF_VTYPE, &attr.va_type, sizeof(attr.va_type));
685aa159335SMatthew Dillon 	if ((jrflags & JRUNDO_NLINK) && attr.va_nlink != VNOVAL)
686aa159335SMatthew Dillon 	    jrecord_leaf(jrec, JLEAF_NLINK, &attr.va_nlink, sizeof(attr.va_nlink));
68726e603edSMatthew Dillon 	if ((jrflags & JRUNDO_SIZE) && attr.va_size != VNOVAL)
68826e603edSMatthew Dillon 	    jrecord_leaf(jrec, JLEAF_SIZE, &attr.va_size, sizeof(attr.va_size));
68926e603edSMatthew Dillon 	if ((jrflags & JRUNDO_UID) && attr.va_uid != VNOVAL)
69026e603edSMatthew Dillon 	    jrecord_leaf(jrec, JLEAF_UID, &attr.va_uid, sizeof(attr.va_uid));
69126e603edSMatthew Dillon 	if ((jrflags & JRUNDO_GID) && attr.va_gid != VNOVAL)
69226e603edSMatthew Dillon 	    jrecord_leaf(jrec, JLEAF_GID, &attr.va_gid, sizeof(attr.va_gid));
69326e603edSMatthew Dillon 	if ((jrflags & JRUNDO_FSID) && attr.va_fsid != VNOVAL)
69426e603edSMatthew Dillon 	    jrecord_leaf(jrec, JLEAF_FSID, &attr.va_fsid, sizeof(attr.va_fsid));
69526e603edSMatthew Dillon 	if ((jrflags & JRUNDO_MODES) && attr.va_mode != (mode_t)VNOVAL)
69626e603edSMatthew Dillon 	    jrecord_leaf(jrec, JLEAF_MODES, &attr.va_mode, sizeof(attr.va_mode));
69726e603edSMatthew Dillon 	if ((jrflags & JRUNDO_INUM) && attr.va_fileid != VNOVAL)
69826e603edSMatthew Dillon 	    jrecord_leaf(jrec, JLEAF_INUM, &attr.va_fileid, sizeof(attr.va_fileid));
69926e603edSMatthew Dillon 	if ((jrflags & JRUNDO_ATIME) && attr.va_atime.tv_sec != VNOVAL)
70026e603edSMatthew Dillon 	    jrecord_leaf(jrec, JLEAF_ATIME, &attr.va_atime, sizeof(attr.va_atime));
70126e603edSMatthew Dillon 	if ((jrflags & JRUNDO_MTIME) && attr.va_mtime.tv_sec != VNOVAL)
70226e603edSMatthew Dillon 	    jrecord_leaf(jrec, JLEAF_MTIME, &attr.va_mtime, sizeof(attr.va_mtime));
70326e603edSMatthew Dillon 	if ((jrflags & JRUNDO_CTIME) && attr.va_ctime.tv_sec != VNOVAL)
70426e603edSMatthew Dillon 	    jrecord_leaf(jrec, JLEAF_CTIME, &attr.va_ctime, sizeof(attr.va_ctime));
70526e603edSMatthew Dillon 	if ((jrflags & JRUNDO_GEN) && attr.va_gen != VNOVAL)
70626e603edSMatthew Dillon 	    jrecord_leaf(jrec, JLEAF_GEN, &attr.va_gen, sizeof(attr.va_gen));
70726e603edSMatthew Dillon 	if ((jrflags & JRUNDO_FLAGS) && attr.va_flags != VNOVAL)
70826e603edSMatthew Dillon 	    jrecord_leaf(jrec, JLEAF_FLAGS, &attr.va_flags, sizeof(attr.va_flags));
70926e603edSMatthew Dillon 	if ((jrflags & JRUNDO_UDEV) && attr.va_rdev != VNOVAL)
71026e603edSMatthew Dillon 	    jrecord_leaf(jrec, JLEAF_UDEV, &attr.va_rdev, sizeof(attr.va_rdev));
71126e603edSMatthew Dillon 	jrecord_pop(jrec, save2);
71226e603edSMatthew Dillon     }
71326e603edSMatthew Dillon 
71426e603edSMatthew Dillon     /*
71526e603edSMatthew Dillon      * Output the file data being overwritten by reading the file and
71626e603edSMatthew Dillon      * writing it out to the journal prior to the write operation.  We
71726e603edSMatthew Dillon      * do not need to write out data past the current file EOF.
71826e603edSMatthew Dillon      *
71926e603edSMatthew Dillon      * XXX support JRUNDO_CONDLINK - do not write out file data for files
72026e603edSMatthew Dillon      * with a link count > 1.  The undo code needs to locate the inode and
72126e603edSMatthew Dillon      * regenerate the hardlink.
72226e603edSMatthew Dillon      */
72325bae9ceSMatthew Dillon     if ((jrflags & JRUNDO_FILEDATA) && attr.va_type == VREG) {
72426e603edSMatthew Dillon 	if (attr.va_size != VNOVAL) {
72526e603edSMatthew Dillon 	    if (bytes == -1)
72626e603edSMatthew Dillon 		bytes = attr.va_size - off;
72726e603edSMatthew Dillon 	    if (off + bytes > attr.va_size)
72826e603edSMatthew Dillon 		bytes = attr.va_size - off;
72926e603edSMatthew Dillon 	    if (bytes > 0)
73026e603edSMatthew Dillon 		jrecord_file_data(jrec, vp, off, bytes);
73126e603edSMatthew Dillon 	} else {
73226e603edSMatthew Dillon 	    error = EINVAL;
73326e603edSMatthew Dillon 	}
73426e603edSMatthew Dillon     }
73525bae9ceSMatthew Dillon     if ((jrflags & JRUNDO_FILEDATA) && attr.va_type == VLNK) {
73625bae9ceSMatthew Dillon 	struct iovec aiov;
73725bae9ceSMatthew Dillon 	struct uio auio;
73825bae9ceSMatthew Dillon 	char *buf;
73925bae9ceSMatthew Dillon 
740efda3bd0SMatthew Dillon 	buf = kmalloc(PATH_MAX, M_JOURNAL, M_WAITOK);
74125bae9ceSMatthew Dillon 	aiov.iov_base = buf;
74225bae9ceSMatthew Dillon 	aiov.iov_len = PATH_MAX;
74325bae9ceSMatthew Dillon 	auio.uio_iov = &aiov;
74425bae9ceSMatthew Dillon 	auio.uio_iovcnt = 1;
74525bae9ceSMatthew Dillon 	auio.uio_offset = 0;
74625bae9ceSMatthew Dillon 	auio.uio_rw = UIO_READ;
74725bae9ceSMatthew Dillon 	auio.uio_segflg = UIO_SYSSPACE;
74825bae9ceSMatthew Dillon 	auio.uio_td = curthread;
74925bae9ceSMatthew Dillon 	auio.uio_resid = PATH_MAX;
75025bae9ceSMatthew Dillon 	error = VOP_READLINK(vp, &auio, proc0.p_ucred);
75125bae9ceSMatthew Dillon 	if (error == 0) {
75225bae9ceSMatthew Dillon 		jrecord_leaf(jrec, JLEAF_SYMLINKDATA, buf,
75325bae9ceSMatthew Dillon 				PATH_MAX - auio.uio_resid);
75425bae9ceSMatthew Dillon 	}
755efda3bd0SMatthew Dillon 	kfree(buf, M_JOURNAL);
75625bae9ceSMatthew Dillon     }
75726e603edSMatthew Dillon done:
75826e603edSMatthew Dillon     if (error)
75926e603edSMatthew Dillon 	jrecord_leaf(jrec, JLEAF_ERROR, &error, sizeof(error));
76026e603edSMatthew Dillon     jrecord_pop(jrec, save1);
76126e603edSMatthew Dillon }
76226e603edSMatthew Dillon 
7632281065eSMatthew Dillon /************************************************************************
7642281065eSMatthew Dillon  *			JOURNAL VNOPS					*
765558b8e00SMatthew Dillon  ************************************************************************
766558b8e00SMatthew Dillon  *
767558b8e00SMatthew Dillon  * These are function shims replacing the normal filesystem ops.  We become
768558b8e00SMatthew Dillon  * responsible for calling the underlying filesystem ops.  We have the choice
769558b8e00SMatthew Dillon  * of executing the underlying op first and then generating the journal entry,
770558b8e00SMatthew Dillon  * or starting the journal entry, executing the underlying op, and then
771558b8e00SMatthew Dillon  * either completing or aborting it.
772558b8e00SMatthew Dillon  *
773558b8e00SMatthew Dillon  * The journal is supposed to be a high-level entity, which generally means
774558b8e00SMatthew Dillon  * identifying files by name rather then by inode.  Supplying both allows
775558b8e00SMatthew Dillon  * the journal to be used both for inode-number-compatible 'mirrors' and
776558b8e00SMatthew Dillon  * for simple filesystem replication.
777558b8e00SMatthew Dillon  *
778558b8e00SMatthew Dillon  * Writes are particularly difficult to deal with because a single write may
779558b8e00SMatthew Dillon  * represent a hundred megabyte buffer or more, and both writes and truncations
780558b8e00SMatthew Dillon  * require the 'old' data to be written out as well as the new data if the
781558b8e00SMatthew Dillon  * log is reversable.  Other issues:
782558b8e00SMatthew Dillon  *
783558b8e00SMatthew Dillon  * - How to deal with operations on unlinked files (no path available),
784558b8e00SMatthew Dillon  *   but which may still be filesystem visible due to hard links.
785558b8e00SMatthew Dillon  *
786558b8e00SMatthew Dillon  * - How to deal with modifications made via a memory map.
787558b8e00SMatthew Dillon  *
788558b8e00SMatthew Dillon  * - Future cache coherency support will require cache coherency API calls
789558b8e00SMatthew Dillon  *   both prior to and after the call to the underlying VFS.
790558b8e00SMatthew Dillon  *
791558b8e00SMatthew Dillon  * ALSO NOTE: We do not have to shim compatibility VOPs like MKDIR which have
792558b8e00SMatthew Dillon  * new VFS equivalents (NMKDIR).
793558b8e00SMatthew Dillon  */
794558b8e00SMatthew Dillon 
795b2f7ec6cSMatthew Dillon /*
796b2f7ec6cSMatthew Dillon  * Journal vop_settattr { a_vp, a_vap, a_cred, a_td }
797b2f7ec6cSMatthew Dillon  */
798558b8e00SMatthew Dillon static
799558b8e00SMatthew Dillon int
800558b8e00SMatthew Dillon journal_setattr(struct vop_setattr_args *ap)
801558b8e00SMatthew Dillon {
80226e603edSMatthew Dillon     struct jrecord_list jreclist;
80326e603edSMatthew Dillon     struct jrecord jreccache;
80426e603edSMatthew Dillon     struct jrecord *jrec;
805558b8e00SMatthew Dillon     struct mount *mp;
806aa159335SMatthew Dillon     void *save;
807558b8e00SMatthew Dillon     int error;
808558b8e00SMatthew Dillon 
80966a1ddf5SMatthew Dillon     mp = ap->a_head.a_ops->head.vv_mount;
81026e603edSMatthew Dillon     if (jreclist_init(mp, &jreclist, &jreccache, JTYPE_SETATTR)) {
81126e603edSMatthew Dillon 	jreclist_undo_file(&jreclist, ap->a_vp, JRUNDO_VATTR, 0, 0);
81226e603edSMatthew Dillon     }
81326e603edSMatthew Dillon     error = vop_journal_operate_ap(&ap->a_head);
814558b8e00SMatthew Dillon     if (error == 0) {
815797e4fe9SMatthew Dillon 	TAILQ_FOREACH(jrec, &jreclist.list, user_entry) {
81687de5057SMatthew Dillon 	    jrecord_write_cred(jrec, curthread, ap->a_cred);
81726e603edSMatthew Dillon 	    jrecord_write_vnode_ref(jrec, ap->a_vp);
818aa159335SMatthew Dillon 	    save = jrecord_push(jrec, JTYPE_REDO);
81926e603edSMatthew Dillon 	    jrecord_write_vattr(jrec, ap->a_vap);
820aa159335SMatthew Dillon 	    jrecord_pop(jrec, save);
821558b8e00SMatthew Dillon 	}
822558b8e00SMatthew Dillon     }
823797e4fe9SMatthew Dillon     jreclist_done(mp, &jreclist, error);
824558b8e00SMatthew Dillon     return (error);
825558b8e00SMatthew Dillon }
826558b8e00SMatthew Dillon 
827b2f7ec6cSMatthew Dillon /*
828b2f7ec6cSMatthew Dillon  * Journal vop_write { a_vp, a_uio, a_ioflag, a_cred }
829b2f7ec6cSMatthew Dillon  */
830558b8e00SMatthew Dillon static
831558b8e00SMatthew Dillon int
832558b8e00SMatthew Dillon journal_write(struct vop_write_args *ap)
833558b8e00SMatthew Dillon {
83426e603edSMatthew Dillon     struct jrecord_list jreclist;
83526e603edSMatthew Dillon     struct jrecord jreccache;
83626e603edSMatthew Dillon     struct jrecord *jrec;
837558b8e00SMatthew Dillon     struct mount *mp;
8389578bde0SMatthew Dillon     struct uio uio_copy;
8399578bde0SMatthew Dillon     struct iovec uio_one_iovec;
840aa159335SMatthew Dillon     void *save;
841558b8e00SMatthew Dillon     int error;
842558b8e00SMatthew Dillon 
8439578bde0SMatthew Dillon     /*
8449578bde0SMatthew Dillon      * This is really nasty.  UIO's don't retain sufficient information to
8459578bde0SMatthew Dillon      * be reusable once they've gone through the VOP chain.  The iovecs get
8469578bde0SMatthew Dillon      * cleared, so we have to copy the UIO.
8479578bde0SMatthew Dillon      *
8489578bde0SMatthew Dillon      * XXX fix the UIO code to not destroy iov's during a scan so we can
8499578bde0SMatthew Dillon      *     reuse the uio over and over again.
850d0887c34SMatthew Dillon      *
851d0887c34SMatthew Dillon      * XXX UNDO code needs to journal the old data prior to the write.
8529578bde0SMatthew Dillon      */
8539578bde0SMatthew Dillon     uio_copy = *ap->a_uio;
8549578bde0SMatthew Dillon     if (uio_copy.uio_iovcnt == 1) {
8559578bde0SMatthew Dillon 	uio_one_iovec = ap->a_uio->uio_iov[0];
8569578bde0SMatthew Dillon 	uio_copy.uio_iov = &uio_one_iovec;
8579578bde0SMatthew Dillon     } else {
858*77652cadSMatthew Dillon 	uio_copy.uio_iov = kmalloc(uio_copy.uio_iovcnt * sizeof(struct iovec),
8599578bde0SMatthew Dillon 				    M_JOURNAL, M_WAITOK);
8609578bde0SMatthew Dillon 	bcopy(ap->a_uio->uio_iov, uio_copy.uio_iov,
8619578bde0SMatthew Dillon 		uio_copy.uio_iovcnt * sizeof(struct iovec));
8629578bde0SMatthew Dillon     }
8639578bde0SMatthew Dillon 
86426e603edSMatthew Dillon     /*
86526e603edSMatthew Dillon      * Write out undo data.  Note that uio_offset is incorrect if
86626e603edSMatthew Dillon      * IO_APPEND is set, but fortunately we have no undo file data to
86726e603edSMatthew Dillon      * write out in that case.
86826e603edSMatthew Dillon      */
86966a1ddf5SMatthew Dillon     mp = ap->a_head.a_ops->head.vv_mount;
87026e603edSMatthew Dillon     if (jreclist_init(mp, &jreclist, &jreccache, JTYPE_WRITE)) {
87126e603edSMatthew Dillon 	if (ap->a_ioflag & IO_APPEND) {
87226e603edSMatthew Dillon 	    jreclist_undo_file(&jreclist, ap->a_vp, JRUNDO_SIZE|JRUNDO_MTIME, 0, 0);
87326e603edSMatthew Dillon 	} else {
87426e603edSMatthew Dillon 	    jreclist_undo_file(&jreclist, ap->a_vp,
87526e603edSMatthew Dillon 			       JRUNDO_FILEDATA|JRUNDO_SIZE|JRUNDO_MTIME,
87626e603edSMatthew Dillon 			       uio_copy.uio_offset, uio_copy.uio_resid);
87726e603edSMatthew Dillon 	}
87826e603edSMatthew Dillon     }
879558b8e00SMatthew Dillon     error = vop_journal_operate_ap(&ap->a_head);
880d0887c34SMatthew Dillon 
881d0887c34SMatthew Dillon     /*
882d0887c34SMatthew Dillon      * XXX bad hack to figure out the offset for O_APPEND writes (note:
883d0887c34SMatthew Dillon      * uio field state after the VFS operation).
884d0887c34SMatthew Dillon      */
885d0887c34SMatthew Dillon     uio_copy.uio_offset = ap->a_uio->uio_offset -
886d0887c34SMatthew Dillon 			  (uio_copy.uio_resid - ap->a_uio->uio_resid);
887d0887c34SMatthew Dillon 
88826e603edSMatthew Dillon     /*
88926e603edSMatthew Dillon      * Output the write data to the journal.
89026e603edSMatthew Dillon      */
891558b8e00SMatthew Dillon     if (error == 0) {
892797e4fe9SMatthew Dillon 	TAILQ_FOREACH(jrec, &jreclist.list, user_entry) {
89326e603edSMatthew Dillon 	    jrecord_write_cred(jrec, NULL, ap->a_cred);
89426e603edSMatthew Dillon 	    jrecord_write_vnode_ref(jrec, ap->a_vp);
895aa159335SMatthew Dillon 	    save = jrecord_push(jrec, JTYPE_REDO);
89626e603edSMatthew Dillon 	    jrecord_write_uio(jrec, JLEAF_FILEDATA, &uio_copy);
897aa159335SMatthew Dillon 	    jrecord_pop(jrec, save);
898558b8e00SMatthew Dillon 	}
899558b8e00SMatthew Dillon     }
900797e4fe9SMatthew Dillon     jreclist_done(mp, &jreclist, error);
9019578bde0SMatthew Dillon 
9029578bde0SMatthew Dillon     if (uio_copy.uio_iov != &uio_one_iovec)
903efda3bd0SMatthew Dillon 	kfree(uio_copy.uio_iov, M_JOURNAL);
904558b8e00SMatthew Dillon     return (error);
905558b8e00SMatthew Dillon }
906558b8e00SMatthew Dillon 
907b2f7ec6cSMatthew Dillon /*
908b2f7ec6cSMatthew Dillon  * Journal vop_fsync { a_vp, a_waitfor, a_td }
909b2f7ec6cSMatthew Dillon  */
910558b8e00SMatthew Dillon static
911558b8e00SMatthew Dillon int
912558b8e00SMatthew Dillon journal_fsync(struct vop_fsync_args *ap)
913558b8e00SMatthew Dillon {
91426e603edSMatthew Dillon #if 0
915558b8e00SMatthew Dillon     struct mount *mp;
916558b8e00SMatthew Dillon     struct journal *jo;
91726e603edSMatthew Dillon #endif
918558b8e00SMatthew Dillon     int error;
919558b8e00SMatthew Dillon 
920558b8e00SMatthew Dillon     error = vop_journal_operate_ap(&ap->a_head);
92126e603edSMatthew Dillon #if 0
92266a1ddf5SMatthew Dillon     mp = ap->a_head.a_ops->head.vv_mount;
923558b8e00SMatthew Dillon     if (error == 0) {
924558b8e00SMatthew Dillon 	TAILQ_FOREACH(jo, &mp->mnt_jlist, jentry) {
925558b8e00SMatthew Dillon 	    /* XXX synchronize pending journal records */
926558b8e00SMatthew Dillon 	}
927558b8e00SMatthew Dillon     }
92826e603edSMatthew Dillon #endif
929558b8e00SMatthew Dillon     return (error);
930558b8e00SMatthew Dillon }
931558b8e00SMatthew Dillon 
932b2f7ec6cSMatthew Dillon /*
933b2f7ec6cSMatthew Dillon  * Journal vop_putpages { a_vp, a_m, a_count, a_sync, a_rtvals, a_offset }
934143c4f15SMatthew Dillon  *
935143c4f15SMatthew Dillon  * note: a_count is in bytes.
936b2f7ec6cSMatthew Dillon  */
937558b8e00SMatthew Dillon static
938558b8e00SMatthew Dillon int
939558b8e00SMatthew Dillon journal_putpages(struct vop_putpages_args *ap)
940558b8e00SMatthew Dillon {
94126e603edSMatthew Dillon     struct jrecord_list jreclist;
94226e603edSMatthew Dillon     struct jrecord jreccache;
94326e603edSMatthew Dillon     struct jrecord *jrec;
944558b8e00SMatthew Dillon     struct mount *mp;
945aa159335SMatthew Dillon     void *save;
946558b8e00SMatthew Dillon     int error;
947558b8e00SMatthew Dillon 
94866a1ddf5SMatthew Dillon     mp = ap->a_head.a_ops->head.vv_mount;
94926e603edSMatthew Dillon     if (jreclist_init(mp, &jreclist, &jreccache, JTYPE_PUTPAGES) &&
95026e603edSMatthew Dillon 	ap->a_count > 0
95126e603edSMatthew Dillon     ) {
95226e603edSMatthew Dillon 	jreclist_undo_file(&jreclist, ap->a_vp,
95326e603edSMatthew Dillon 			   JRUNDO_FILEDATA|JRUNDO_SIZE|JRUNDO_MTIME,
95426e603edSMatthew Dillon 			   ap->a_offset, btoc(ap->a_count));
95526e603edSMatthew Dillon     }
95626e603edSMatthew Dillon     error = vop_journal_operate_ap(&ap->a_head);
957143c4f15SMatthew Dillon     if (error == 0 && ap->a_count > 0) {
958797e4fe9SMatthew Dillon 	TAILQ_FOREACH(jrec, &jreclist.list, user_entry) {
95926e603edSMatthew Dillon 	    jrecord_write_vnode_ref(jrec, ap->a_vp);
960aa159335SMatthew Dillon 	    save = jrecord_push(jrec, JTYPE_REDO);
96126e603edSMatthew Dillon 	    jrecord_write_pagelist(jrec, JLEAF_FILEDATA, ap->a_m, ap->a_rtvals,
96226e603edSMatthew Dillon 				   btoc(ap->a_count), ap->a_offset);
963aa159335SMatthew Dillon 	    jrecord_pop(jrec, save);
964558b8e00SMatthew Dillon 	}
965558b8e00SMatthew Dillon     }
966797e4fe9SMatthew Dillon     jreclist_done(mp, &jreclist, error);
967558b8e00SMatthew Dillon     return (error);
968558b8e00SMatthew Dillon }
969558b8e00SMatthew Dillon 
970b2f7ec6cSMatthew Dillon /*
971b2f7ec6cSMatthew Dillon  * Journal vop_setacl { a_vp, a_type, a_aclp, a_cred, a_td }
972b2f7ec6cSMatthew Dillon  */
973558b8e00SMatthew Dillon static
974558b8e00SMatthew Dillon int
975558b8e00SMatthew Dillon journal_setacl(struct vop_setacl_args *ap)
976558b8e00SMatthew Dillon {
97726e603edSMatthew Dillon     struct jrecord_list jreclist;
97826e603edSMatthew Dillon     struct jrecord jreccache;
97926e603edSMatthew Dillon     struct jrecord *jrec;
980558b8e00SMatthew Dillon     struct mount *mp;
981558b8e00SMatthew Dillon     int error;
982558b8e00SMatthew Dillon 
98366a1ddf5SMatthew Dillon     mp = ap->a_head.a_ops->head.vv_mount;
98426e603edSMatthew Dillon     jreclist_init(mp, &jreclist, &jreccache, JTYPE_SETACL);
98526e603edSMatthew Dillon     error = vop_journal_operate_ap(&ap->a_head);
986558b8e00SMatthew Dillon     if (error == 0) {
987797e4fe9SMatthew Dillon 	TAILQ_FOREACH(jrec, &jreclist.list, user_entry) {
98826e603edSMatthew Dillon #if 0
98926e603edSMatthew Dillon 	    if ((jo->flags & MC_JOURNAL_WANT_REVERSABLE))
99026e603edSMatthew Dillon 		jrecord_undo_file(jrec, ap->a_vp, JRUNDO_XXX, 0, 0);
99126e603edSMatthew Dillon #endif
99287de5057SMatthew Dillon 	    jrecord_write_cred(jrec, curthread, ap->a_cred);
99326e603edSMatthew Dillon 	    jrecord_write_vnode_ref(jrec, ap->a_vp);
994aa159335SMatthew Dillon #if 0
995aa159335SMatthew Dillon 	    save = jrecord_push(jrec, JTYPE_REDO);
996b2f7ec6cSMatthew Dillon 	    /* XXX type, aclp */
997aa159335SMatthew Dillon 	    jrecord_pop(jrec, save);
998aa159335SMatthew Dillon #endif
999558b8e00SMatthew Dillon 	}
1000558b8e00SMatthew Dillon     }
1001797e4fe9SMatthew Dillon     jreclist_done(mp, &jreclist, error);
1002558b8e00SMatthew Dillon     return (error);
1003558b8e00SMatthew Dillon }
1004558b8e00SMatthew Dillon 
1005b2f7ec6cSMatthew Dillon /*
1006b2f7ec6cSMatthew Dillon  * Journal vop_setextattr { a_vp, a_name, a_uio, a_cred, a_td }
1007b2f7ec6cSMatthew Dillon  */
1008558b8e00SMatthew Dillon static
1009558b8e00SMatthew Dillon int
1010558b8e00SMatthew Dillon journal_setextattr(struct vop_setextattr_args *ap)
1011558b8e00SMatthew Dillon {
101226e603edSMatthew Dillon     struct jrecord_list jreclist;
101326e603edSMatthew Dillon     struct jrecord jreccache;
101426e603edSMatthew Dillon     struct jrecord *jrec;
1015558b8e00SMatthew Dillon     struct mount *mp;
1016aa159335SMatthew Dillon     void *save;
1017558b8e00SMatthew Dillon     int error;
1018558b8e00SMatthew Dillon 
101966a1ddf5SMatthew Dillon     mp = ap->a_head.a_ops->head.vv_mount;
102026e603edSMatthew Dillon     jreclist_init(mp, &jreclist, &jreccache, JTYPE_SETEXTATTR);
102126e603edSMatthew Dillon     error = vop_journal_operate_ap(&ap->a_head);
1022558b8e00SMatthew Dillon     if (error == 0) {
1023797e4fe9SMatthew Dillon 	TAILQ_FOREACH(jrec, &jreclist.list, user_entry) {
102426e603edSMatthew Dillon #if 0
102526e603edSMatthew Dillon 	    if ((jo->flags & MC_JOURNAL_WANT_REVERSABLE))
102626e603edSMatthew Dillon 		jrecord_undo_file(jrec, ap->a_vp, JRUNDO_XXX, 0, 0);
102726e603edSMatthew Dillon #endif
102887de5057SMatthew Dillon 	    jrecord_write_cred(jrec, curthread, ap->a_cred);
102926e603edSMatthew Dillon 	    jrecord_write_vnode_ref(jrec, ap->a_vp);
103026e603edSMatthew Dillon 	    jrecord_leaf(jrec, JLEAF_ATTRNAME, ap->a_name, strlen(ap->a_name));
1031aa159335SMatthew Dillon 	    save = jrecord_push(jrec, JTYPE_REDO);
103226e603edSMatthew Dillon 	    jrecord_write_uio(jrec, JLEAF_FILEDATA, ap->a_uio);
1033aa159335SMatthew Dillon 	    jrecord_pop(jrec, save);
1034558b8e00SMatthew Dillon 	}
1035558b8e00SMatthew Dillon     }
1036797e4fe9SMatthew Dillon     jreclist_done(mp, &jreclist, error);
1037558b8e00SMatthew Dillon     return (error);
1038558b8e00SMatthew Dillon }
1039558b8e00SMatthew Dillon 
1040b2f7ec6cSMatthew Dillon /*
1041b2f7ec6cSMatthew Dillon  * Journal vop_ncreate { a_ncp, a_vpp, a_cred, a_vap }
1042b2f7ec6cSMatthew Dillon  */
1043558b8e00SMatthew Dillon static
1044558b8e00SMatthew Dillon int
1045558b8e00SMatthew Dillon journal_ncreate(struct vop_ncreate_args *ap)
1046558b8e00SMatthew Dillon {
104726e603edSMatthew Dillon     struct jrecord_list jreclist;
104826e603edSMatthew Dillon     struct jrecord jreccache;
104926e603edSMatthew Dillon     struct jrecord *jrec;
1050558b8e00SMatthew Dillon     struct mount *mp;
1051aa159335SMatthew Dillon     void *save;
1052558b8e00SMatthew Dillon     int error;
1053558b8e00SMatthew Dillon 
105466a1ddf5SMatthew Dillon     mp = ap->a_head.a_ops->head.vv_mount;
105526e603edSMatthew Dillon     jreclist_init(mp, &jreclist, &jreccache, JTYPE_CREATE);
105626e603edSMatthew Dillon     error = vop_journal_operate_ap(&ap->a_head);
1057558b8e00SMatthew Dillon     if (error == 0) {
1058797e4fe9SMatthew Dillon 	TAILQ_FOREACH(jrec, &jreclist.list, user_entry) {
105926e603edSMatthew Dillon 	    jrecord_write_cred(jrec, NULL, ap->a_cred);
106026e603edSMatthew Dillon 	    jrecord_write_path(jrec, JLEAF_PATH1, ap->a_ncp);
1061b2f7ec6cSMatthew Dillon 	    if (*ap->a_vpp)
106226e603edSMatthew Dillon 		jrecord_write_vnode_ref(jrec, *ap->a_vpp);
1063aa159335SMatthew Dillon 	    save = jrecord_push(jrec, JTYPE_REDO);
106426e603edSMatthew Dillon 	    jrecord_write_vattr(jrec, ap->a_vap);
1065aa159335SMatthew Dillon 	    jrecord_pop(jrec, save);
1066558b8e00SMatthew Dillon 	}
1067558b8e00SMatthew Dillon     }
1068797e4fe9SMatthew Dillon     jreclist_done(mp, &jreclist, error);
1069558b8e00SMatthew Dillon     return (error);
1070558b8e00SMatthew Dillon }
1071558b8e00SMatthew Dillon 
1072b2f7ec6cSMatthew Dillon /*
1073b2f7ec6cSMatthew Dillon  * Journal vop_nmknod { a_ncp, a_vpp, a_cred, a_vap }
1074b2f7ec6cSMatthew Dillon  */
1075558b8e00SMatthew Dillon static
1076558b8e00SMatthew Dillon int
1077558b8e00SMatthew Dillon journal_nmknod(struct vop_nmknod_args *ap)
1078558b8e00SMatthew Dillon {
107926e603edSMatthew Dillon     struct jrecord_list jreclist;
108026e603edSMatthew Dillon     struct jrecord jreccache;
108126e603edSMatthew Dillon     struct jrecord *jrec;
1082558b8e00SMatthew Dillon     struct mount *mp;
1083aa159335SMatthew Dillon     void *save;
1084558b8e00SMatthew Dillon     int error;
1085558b8e00SMatthew Dillon 
108666a1ddf5SMatthew Dillon     mp = ap->a_head.a_ops->head.vv_mount;
108726e603edSMatthew Dillon     jreclist_init(mp, &jreclist, &jreccache, JTYPE_MKNOD);
108826e603edSMatthew Dillon     error = vop_journal_operate_ap(&ap->a_head);
1089558b8e00SMatthew Dillon     if (error == 0) {
1090797e4fe9SMatthew Dillon 	TAILQ_FOREACH(jrec, &jreclist.list, user_entry) {
109126e603edSMatthew Dillon 	    jrecord_write_cred(jrec, NULL, ap->a_cred);
109226e603edSMatthew Dillon 	    jrecord_write_path(jrec, JLEAF_PATH1, ap->a_ncp);
1093aa159335SMatthew Dillon 	    save = jrecord_push(jrec, JTYPE_REDO);
109426e603edSMatthew Dillon 	    jrecord_write_vattr(jrec, ap->a_vap);
1095aa159335SMatthew Dillon 	    jrecord_pop(jrec, save);
1096b2f7ec6cSMatthew Dillon 	    if (*ap->a_vpp)
109726e603edSMatthew Dillon 		jrecord_write_vnode_ref(jrec, *ap->a_vpp);
1098558b8e00SMatthew Dillon 	}
1099558b8e00SMatthew Dillon     }
1100797e4fe9SMatthew Dillon     jreclist_done(mp, &jreclist, error);
1101558b8e00SMatthew Dillon     return (error);
1102558b8e00SMatthew Dillon }
1103558b8e00SMatthew Dillon 
1104b2f7ec6cSMatthew Dillon /*
1105b2f7ec6cSMatthew Dillon  * Journal vop_nlink { a_ncp, a_vp, a_cred }
1106b2f7ec6cSMatthew Dillon  */
1107558b8e00SMatthew Dillon static
1108558b8e00SMatthew Dillon int
1109558b8e00SMatthew Dillon journal_nlink(struct vop_nlink_args *ap)
1110558b8e00SMatthew Dillon {
111126e603edSMatthew Dillon     struct jrecord_list jreclist;
111226e603edSMatthew Dillon     struct jrecord jreccache;
111326e603edSMatthew Dillon     struct jrecord *jrec;
1114558b8e00SMatthew Dillon     struct mount *mp;
1115aa159335SMatthew Dillon     void *save;
1116558b8e00SMatthew Dillon     int error;
1117558b8e00SMatthew Dillon 
111866a1ddf5SMatthew Dillon     mp = ap->a_head.a_ops->head.vv_mount;
111926e603edSMatthew Dillon     jreclist_init(mp, &jreclist, &jreccache, JTYPE_LINK);
112026e603edSMatthew Dillon     error = vop_journal_operate_ap(&ap->a_head);
1121558b8e00SMatthew Dillon     if (error == 0) {
1122797e4fe9SMatthew Dillon 	TAILQ_FOREACH(jrec, &jreclist.list, user_entry) {
112326e603edSMatthew Dillon 	    jrecord_write_cred(jrec, NULL, ap->a_cred);
112426e603edSMatthew Dillon 	    jrecord_write_path(jrec, JLEAF_PATH1, ap->a_ncp);
1125b2f7ec6cSMatthew Dillon 	    /* XXX PATH to VP and inode number */
1126f4659a6cSMatthew Dillon 	    /* XXX this call may not record the correct path when
1127f4659a6cSMatthew Dillon 	     * multiple paths are available */
1128aa159335SMatthew Dillon 	    save = jrecord_push(jrec, JTYPE_REDO);
112926e603edSMatthew Dillon 	    jrecord_write_vnode_link(jrec, ap->a_vp, ap->a_ncp);
1130aa159335SMatthew Dillon 	    jrecord_pop(jrec, save);
1131558b8e00SMatthew Dillon 	}
1132558b8e00SMatthew Dillon     }
1133797e4fe9SMatthew Dillon     jreclist_done(mp, &jreclist, error);
1134558b8e00SMatthew Dillon     return (error);
1135558b8e00SMatthew Dillon }
1136558b8e00SMatthew Dillon 
1137b2f7ec6cSMatthew Dillon /*
1138b2f7ec6cSMatthew Dillon  * Journal vop_symlink { a_ncp, a_vpp, a_cred, a_vap, a_target }
1139b2f7ec6cSMatthew Dillon  */
1140558b8e00SMatthew Dillon static
1141558b8e00SMatthew Dillon int
1142558b8e00SMatthew Dillon journal_nsymlink(struct vop_nsymlink_args *ap)
1143558b8e00SMatthew Dillon {
114426e603edSMatthew Dillon     struct jrecord_list jreclist;
114526e603edSMatthew Dillon     struct jrecord jreccache;
114626e603edSMatthew Dillon     struct jrecord *jrec;
1147558b8e00SMatthew Dillon     struct mount *mp;
1148aa159335SMatthew Dillon     void *save;
1149558b8e00SMatthew Dillon     int error;
1150558b8e00SMatthew Dillon 
115166a1ddf5SMatthew Dillon     mp = ap->a_head.a_ops->head.vv_mount;
115226e603edSMatthew Dillon     jreclist_init(mp, &jreclist, &jreccache, JTYPE_SYMLINK);
115326e603edSMatthew Dillon     error = vop_journal_operate_ap(&ap->a_head);
1154558b8e00SMatthew Dillon     if (error == 0) {
1155797e4fe9SMatthew Dillon 	TAILQ_FOREACH(jrec, &jreclist.list, user_entry) {
115626e603edSMatthew Dillon 	    jrecord_write_cred(jrec, NULL, ap->a_cred);
115726e603edSMatthew Dillon 	    jrecord_write_path(jrec, JLEAF_PATH1, ap->a_ncp);
1158aa159335SMatthew Dillon 	    save = jrecord_push(jrec, JTYPE_REDO);
115926e603edSMatthew Dillon 	    jrecord_leaf(jrec, JLEAF_SYMLINKDATA,
1160b2f7ec6cSMatthew Dillon 			ap->a_target, strlen(ap->a_target));
1161aa159335SMatthew Dillon 	    jrecord_pop(jrec, save);
1162b2f7ec6cSMatthew Dillon 	    if (*ap->a_vpp)
116326e603edSMatthew Dillon 		jrecord_write_vnode_ref(jrec, *ap->a_vpp);
1164558b8e00SMatthew Dillon 	}
1165558b8e00SMatthew Dillon     }
1166797e4fe9SMatthew Dillon     jreclist_done(mp, &jreclist, error);
1167558b8e00SMatthew Dillon     return (error);
1168558b8e00SMatthew Dillon }
1169558b8e00SMatthew Dillon 
1170b2f7ec6cSMatthew Dillon /*
1171b2f7ec6cSMatthew Dillon  * Journal vop_nwhiteout { a_ncp, a_cred, a_flags }
1172b2f7ec6cSMatthew Dillon  */
1173558b8e00SMatthew Dillon static
1174558b8e00SMatthew Dillon int
1175558b8e00SMatthew Dillon journal_nwhiteout(struct vop_nwhiteout_args *ap)
1176558b8e00SMatthew Dillon {
117726e603edSMatthew Dillon     struct jrecord_list jreclist;
117826e603edSMatthew Dillon     struct jrecord jreccache;
117926e603edSMatthew Dillon     struct jrecord *jrec;
1180558b8e00SMatthew Dillon     struct mount *mp;
1181558b8e00SMatthew Dillon     int error;
1182558b8e00SMatthew Dillon 
118366a1ddf5SMatthew Dillon     mp = ap->a_head.a_ops->head.vv_mount;
118426e603edSMatthew Dillon     jreclist_init(mp, &jreclist, &jreccache, JTYPE_WHITEOUT);
118526e603edSMatthew Dillon     error = vop_journal_operate_ap(&ap->a_head);
1186558b8e00SMatthew Dillon     if (error == 0) {
1187797e4fe9SMatthew Dillon 	TAILQ_FOREACH(jrec, &jreclist.list, user_entry) {
118826e603edSMatthew Dillon 	    jrecord_write_cred(jrec, NULL, ap->a_cred);
118926e603edSMatthew Dillon 	    jrecord_write_path(jrec, JLEAF_PATH1, ap->a_ncp);
1190558b8e00SMatthew Dillon 	}
1191558b8e00SMatthew Dillon     }
1192797e4fe9SMatthew Dillon     jreclist_done(mp, &jreclist, error);
1193558b8e00SMatthew Dillon     return (error);
1194558b8e00SMatthew Dillon }
1195558b8e00SMatthew Dillon 
1196b2f7ec6cSMatthew Dillon /*
1197b2f7ec6cSMatthew Dillon  * Journal vop_nremove { a_ncp, a_cred }
1198b2f7ec6cSMatthew Dillon  */
1199558b8e00SMatthew Dillon static
1200558b8e00SMatthew Dillon int
1201558b8e00SMatthew Dillon journal_nremove(struct vop_nremove_args *ap)
1202558b8e00SMatthew Dillon {
120326e603edSMatthew Dillon     struct jrecord_list jreclist;
120426e603edSMatthew Dillon     struct jrecord jreccache;
120526e603edSMatthew Dillon     struct jrecord *jrec;
1206558b8e00SMatthew Dillon     struct mount *mp;
1207558b8e00SMatthew Dillon     int error;
1208558b8e00SMatthew Dillon 
120966a1ddf5SMatthew Dillon     mp = ap->a_head.a_ops->head.vv_mount;
121026e603edSMatthew Dillon     if (jreclist_init(mp, &jreclist, &jreccache, JTYPE_REMOVE) &&
121126e603edSMatthew Dillon 	ap->a_ncp->nc_vp
121226e603edSMatthew Dillon     ) {
121326e603edSMatthew Dillon 	jreclist_undo_file(&jreclist, ap->a_ncp->nc_vp,
121426e603edSMatthew Dillon 			   JRUNDO_ALL|JRUNDO_GETVP|JRUNDO_CONDLINK, 0, -1);
121526e603edSMatthew Dillon     }
121626e603edSMatthew Dillon     error = vop_journal_operate_ap(&ap->a_head);
1217558b8e00SMatthew Dillon     if (error == 0) {
1218797e4fe9SMatthew Dillon 	TAILQ_FOREACH(jrec, &jreclist.list, user_entry) {
121926e603edSMatthew Dillon 	    jrecord_write_cred(jrec, NULL, ap->a_cred);
122026e603edSMatthew Dillon 	    jrecord_write_path(jrec, JLEAF_PATH1, ap->a_ncp);
1221558b8e00SMatthew Dillon 	}
1222558b8e00SMatthew Dillon     }
1223797e4fe9SMatthew Dillon     jreclist_done(mp, &jreclist, error);
1224558b8e00SMatthew Dillon     return (error);
1225558b8e00SMatthew Dillon }
12262281065eSMatthew Dillon 
1227b2f7ec6cSMatthew Dillon /*
1228b2f7ec6cSMatthew Dillon  * Journal vop_nmkdir { a_ncp, a_vpp, a_cred, a_vap }
1229b2f7ec6cSMatthew Dillon  */
12302281065eSMatthew Dillon static
12312281065eSMatthew Dillon int
12322281065eSMatthew Dillon journal_nmkdir(struct vop_nmkdir_args *ap)
12332281065eSMatthew Dillon {
123426e603edSMatthew Dillon     struct jrecord_list jreclist;
123526e603edSMatthew Dillon     struct jrecord jreccache;
123626e603edSMatthew Dillon     struct jrecord *jrec;
123782eaef15SMatthew Dillon     struct mount *mp;
12382281065eSMatthew Dillon     int error;
12392281065eSMatthew Dillon 
124066a1ddf5SMatthew Dillon     mp = ap->a_head.a_ops->head.vv_mount;
124126e603edSMatthew Dillon     jreclist_init(mp, &jreclist, &jreccache, JTYPE_MKDIR);
124226e603edSMatthew Dillon     error = vop_journal_operate_ap(&ap->a_head);
124382eaef15SMatthew Dillon     if (error == 0) {
1244797e4fe9SMatthew Dillon 	TAILQ_FOREACH(jrec, &jreclist.list, user_entry) {
124582eaef15SMatthew Dillon #if 0
124682eaef15SMatthew Dillon 	    if (jo->flags & MC_JOURNAL_WANT_AUDIT) {
124726e603edSMatthew Dillon 		jrecord_write_audit(jrec);
124882eaef15SMatthew Dillon 	    }
124982eaef15SMatthew Dillon #endif
125026e603edSMatthew Dillon 	    jrecord_write_path(jrec, JLEAF_PATH1, ap->a_ncp);
125126e603edSMatthew Dillon 	    jrecord_write_cred(jrec, NULL, ap->a_cred);
125226e603edSMatthew Dillon 	    jrecord_write_vattr(jrec, ap->a_vap);
125326e603edSMatthew Dillon 	    jrecord_write_path(jrec, JLEAF_PATH1, ap->a_ncp);
1254b2f7ec6cSMatthew Dillon 	    if (*ap->a_vpp)
125526e603edSMatthew Dillon 		jrecord_write_vnode_ref(jrec, *ap->a_vpp);
125682eaef15SMatthew Dillon 	}
125782eaef15SMatthew Dillon     }
1258797e4fe9SMatthew Dillon     jreclist_done(mp, &jreclist, error);
12592281065eSMatthew Dillon     return (error);
12602281065eSMatthew Dillon }
12612281065eSMatthew Dillon 
1262b2f7ec6cSMatthew Dillon /*
1263b2f7ec6cSMatthew Dillon  * Journal vop_nrmdir { a_ncp, a_cred }
1264b2f7ec6cSMatthew Dillon  */
1265558b8e00SMatthew Dillon static
1266558b8e00SMatthew Dillon int
1267558b8e00SMatthew Dillon journal_nrmdir(struct vop_nrmdir_args *ap)
1268558b8e00SMatthew Dillon {
126926e603edSMatthew Dillon     struct jrecord_list jreclist;
127026e603edSMatthew Dillon     struct jrecord jreccache;
127126e603edSMatthew Dillon     struct jrecord *jrec;
1272558b8e00SMatthew Dillon     struct mount *mp;
1273558b8e00SMatthew Dillon     int error;
1274558b8e00SMatthew Dillon 
127566a1ddf5SMatthew Dillon     mp = ap->a_head.a_ops->head.vv_mount;
127626e603edSMatthew Dillon     if (jreclist_init(mp, &jreclist, &jreccache, JTYPE_RMDIR)) {
127726e603edSMatthew Dillon 	jreclist_undo_file(&jreclist, ap->a_ncp->nc_vp,
127826e603edSMatthew Dillon 			   JRUNDO_VATTR|JRUNDO_GETVP, 0, 0);
127926e603edSMatthew Dillon     }
128026e603edSMatthew Dillon     error = vop_journal_operate_ap(&ap->a_head);
1281558b8e00SMatthew Dillon     if (error == 0) {
1282797e4fe9SMatthew Dillon 	TAILQ_FOREACH(jrec, &jreclist.list, user_entry) {
128326e603edSMatthew Dillon 	    jrecord_write_cred(jrec, NULL, ap->a_cred);
128426e603edSMatthew Dillon 	    jrecord_write_path(jrec, JLEAF_PATH1, ap->a_ncp);
1285558b8e00SMatthew Dillon 	}
1286558b8e00SMatthew Dillon     }
1287797e4fe9SMatthew Dillon     jreclist_done(mp, &jreclist, error);
1288558b8e00SMatthew Dillon     return (error);
1289558b8e00SMatthew Dillon }
1290558b8e00SMatthew Dillon 
1291b2f7ec6cSMatthew Dillon /*
1292b2f7ec6cSMatthew Dillon  * Journal vop_nrename { a_fncp, a_tncp, a_cred }
1293b2f7ec6cSMatthew Dillon  */
1294558b8e00SMatthew Dillon static
1295558b8e00SMatthew Dillon int
1296558b8e00SMatthew Dillon journal_nrename(struct vop_nrename_args *ap)
1297558b8e00SMatthew Dillon {
129826e603edSMatthew Dillon     struct jrecord_list jreclist;
129926e603edSMatthew Dillon     struct jrecord jreccache;
130026e603edSMatthew Dillon     struct jrecord *jrec;
1301558b8e00SMatthew Dillon     struct mount *mp;
1302558b8e00SMatthew Dillon     int error;
1303558b8e00SMatthew Dillon 
130466a1ddf5SMatthew Dillon     mp = ap->a_head.a_ops->head.vv_mount;
130526e603edSMatthew Dillon     if (jreclist_init(mp, &jreclist, &jreccache, JTYPE_RENAME) &&
130626e603edSMatthew Dillon 	ap->a_tncp->nc_vp
130726e603edSMatthew Dillon     ) {
130826e603edSMatthew Dillon 	jreclist_undo_file(&jreclist, ap->a_tncp->nc_vp,
130926e603edSMatthew Dillon 			   JRUNDO_ALL|JRUNDO_GETVP|JRUNDO_CONDLINK, 0, -1);
131026e603edSMatthew Dillon     }
131126e603edSMatthew Dillon     error = vop_journal_operate_ap(&ap->a_head);
1312558b8e00SMatthew Dillon     if (error == 0) {
1313797e4fe9SMatthew Dillon 	TAILQ_FOREACH(jrec, &jreclist.list, user_entry) {
131426e603edSMatthew Dillon 	    jrecord_write_cred(jrec, NULL, ap->a_cred);
131526e603edSMatthew Dillon 	    jrecord_write_path(jrec, JLEAF_PATH1, ap->a_fncp);
131626e603edSMatthew Dillon 	    jrecord_write_path(jrec, JLEAF_PATH2, ap->a_tncp);
1317558b8e00SMatthew Dillon 	}
1318558b8e00SMatthew Dillon     }
1319797e4fe9SMatthew Dillon     jreclist_done(mp, &jreclist, error);
1320558b8e00SMatthew Dillon     return (error);
1321558b8e00SMatthew Dillon }
1322558b8e00SMatthew Dillon 
1323