xref: /dragonfly/sys/vfs/hammer/hammer_vnops.c (revision 70705abf)
1 /*
2  * Copyright (c) 2007 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  * $DragonFly: src/sys/vfs/hammer/hammer_vnops.c,v 1.3 2007/11/19 00:53:40 dillon Exp $
35  */
36 
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/kernel.h>
40 #include <sys/fcntl.h>
41 #include <sys/namecache.h>
42 #include <sys/vnode.h>
43 #include <sys/lockf.h>
44 #include <sys/event.h>
45 #include <sys/stat.h>
46 #include "hammer.h"
47 
48 /*
49  * USERFS VNOPS
50  */
51 /*static int hammer_vop_vnoperate(struct vop_generic_args *);*/
52 static int hammer_vop_fsync(struct vop_fsync_args *);
53 static int hammer_vop_read(struct vop_read_args *);
54 static int hammer_vop_write(struct vop_write_args *);
55 static int hammer_vop_access(struct vop_access_args *);
56 static int hammer_vop_advlock(struct vop_advlock_args *);
57 static int hammer_vop_close(struct vop_close_args *);
58 static int hammer_vop_ncreate(struct vop_ncreate_args *);
59 static int hammer_vop_getattr(struct vop_getattr_args *);
60 static int hammer_vop_nresolve(struct vop_nresolve_args *);
61 static int hammer_vop_nlookupdotdot(struct vop_nlookupdotdot_args *);
62 static int hammer_vop_nlink(struct vop_nlink_args *);
63 static int hammer_vop_nmkdir(struct vop_nmkdir_args *);
64 static int hammer_vop_nmknod(struct vop_nmknod_args *);
65 static int hammer_vop_open(struct vop_open_args *);
66 static int hammer_vop_pathconf(struct vop_pathconf_args *);
67 static int hammer_vop_print(struct vop_print_args *);
68 static int hammer_vop_readdir(struct vop_readdir_args *);
69 static int hammer_vop_readlink(struct vop_readlink_args *);
70 static int hammer_vop_nremove(struct vop_nremove_args *);
71 static int hammer_vop_nrename(struct vop_nrename_args *);
72 static int hammer_vop_nrmdir(struct vop_nrmdir_args *);
73 static int hammer_vop_setattr(struct vop_setattr_args *);
74 static int hammer_vop_strategy(struct vop_strategy_args *);
75 static int hammer_vop_nsymlink(struct vop_nsymlink_args *);
76 static int hammer_vop_nwhiteout(struct vop_nwhiteout_args *);
77 
78 struct vop_ops hammer_vnode_vops = {
79 	.vop_default =		vop_defaultop,
80 	.vop_fsync =		hammer_vop_fsync,
81 	.vop_read =		hammer_vop_read,
82 	.vop_write =		hammer_vop_write,
83 	.vop_access =		hammer_vop_access,
84 	.vop_advlock =		hammer_vop_advlock,
85 	.vop_close =		hammer_vop_close,
86 	.vop_ncreate =		hammer_vop_ncreate,
87 	.vop_getattr =		hammer_vop_getattr,
88 	.vop_inactive =		hammer_vop_inactive,
89 	.vop_reclaim =		hammer_vop_reclaim,
90 	.vop_nresolve =		hammer_vop_nresolve,
91 	.vop_nlookupdotdot =	hammer_vop_nlookupdotdot,
92 	.vop_nlink =		hammer_vop_nlink,
93 	.vop_nmkdir =		hammer_vop_nmkdir,
94 	.vop_nmknod =		hammer_vop_nmknod,
95 	.vop_open =		hammer_vop_open,
96 	.vop_pathconf =		hammer_vop_pathconf,
97 	.vop_print =		hammer_vop_print,
98 	.vop_readdir =		hammer_vop_readdir,
99 	.vop_readlink =		hammer_vop_readlink,
100 	.vop_nremove =		hammer_vop_nremove,
101 	.vop_nrename =		hammer_vop_nrename,
102 	.vop_nrmdir =		hammer_vop_nrmdir,
103 	.vop_setattr =		hammer_vop_setattr,
104 	.vop_strategy =		hammer_vop_strategy,
105 	.vop_nsymlink =		hammer_vop_nsymlink,
106 	.vop_nwhiteout =	hammer_vop_nwhiteout
107 };
108 
109 static int hammer_dounlink(struct nchandle *nch, struct vnode *dvp,
110 			   struct ucred *cred, int flags);
111 static int hammer_vop_strategy_read(struct vop_strategy_args *ap);
112 static int hammer_vop_strategy_write(struct vop_strategy_args *ap);
113 
114 #if 0
115 static
116 int
117 hammer_vop_vnoperate(struct vop_generic_args *)
118 {
119 	return (VOCALL(&hammer_vnode_vops, ap));
120 }
121 #endif
122 
123 /*
124  * hammer_vop_fsync { vp, waitfor }
125  */
126 static
127 int
128 hammer_vop_fsync(struct vop_fsync_args *ap)
129 {
130 	return EOPNOTSUPP;
131 }
132 
133 /*
134  * hammer_vop_read { vp, uio, ioflag, cred }
135  */
136 static
137 int
138 hammer_vop_read(struct vop_read_args *ap)
139 {
140 	struct hammer_transaction trans;
141 	struct hammer_inode *ip;
142 	off_t offset;
143 	struct buf *bp;
144 	struct uio *uio;
145 	int error;
146 	int n;
147 	int seqcount;
148 
149 	if (ap->a_vp->v_type != VREG)
150 		return (EINVAL);
151 	ip = VTOI(ap->a_vp);
152 	error = 0;
153 	seqcount = ap->a_ioflag >> 16;
154 
155 	hammer_start_transaction(&trans, ip->hmp);
156 
157 	/*
158 	 * Access the data in HAMMER_BUFSIZE blocks via the buffer cache.
159 	 */
160 	uio = ap->a_uio;
161 	while (uio->uio_resid > 0 && uio->uio_offset < ip->ino_rec.ino_size) {
162 		offset = uio->uio_offset & HAMMER_BUFMASK;
163 		error = cluster_read(ap->a_vp, ip->ino_rec.ino_size,
164 				     uio->uio_offset - offset, HAMMER_BUFSIZE,
165 				     MAXBSIZE, seqcount, &bp);
166 		if (error) {
167 			brelse(bp);
168 			break;
169 		}
170 		bp->b_flags |= B_CLUSTEROK;
171 		n = HAMMER_BUFSIZE - offset;
172 		if (n > uio->uio_resid)
173 			n = uio->uio_resid;
174 		if (n > ip->ino_rec.ino_size - uio->uio_offset)
175 			n = (int)(ip->ino_rec.ino_size - uio->uio_offset);
176 		error = uiomove((char *)bp->b_data + offset, n, uio);
177 		if (error) {
178 			bqrelse(bp);
179 			break;
180 		}
181 		ip->ino_rec.ino_atime = trans.tid;
182 		hammer_modify_inode(&trans, ip, HAMMER_INODE_ITIMES);
183 		bqrelse(bp);
184 	}
185 	hammer_commit_transaction(&trans);
186 	return (error);
187 }
188 
189 /*
190  * hammer_vop_write { vp, uio, ioflag, cred }
191  */
192 static
193 int
194 hammer_vop_write(struct vop_write_args *ap)
195 {
196 	struct hammer_transaction trans;
197 	struct hammer_inode *ip;
198 	struct uio *uio;
199 	off_t offset;
200 	struct buf *bp;
201 	int error;
202 	int n;
203 
204 	if (ap->a_vp->v_type != VREG)
205 		return (EINVAL);
206 	ip = VTOI(ap->a_vp);
207 	error = 0;
208 
209 	/*
210 	 * Create a transaction to cover the operations we perform.
211 	 */
212 	hammer_start_transaction(&trans, ip->hmp);
213 	uio = ap->a_uio;
214 
215 	/*
216 	 * Check append mode
217 	 */
218 	if (ap->a_ioflag & IO_APPEND)
219 		uio->uio_offset = ip->ino_rec.ino_size;
220 
221 	/*
222 	 * Check for illegal write offsets.  Valid range is 0...2^63-1
223 	 */
224 	if (uio->uio_offset < 0 || uio->uio_offset + uio->uio_resid <= 0)
225 		return (EFBIG);
226 
227 	/*
228 	 * Access the data in HAMMER_BUFSIZE blocks via the buffer cache.
229 	 */
230 	while (uio->uio_resid > 0) {
231 		offset = uio->uio_offset & HAMMER_BUFMASK;
232 		if (offset == 0 && uio->uio_resid >= HAMMER_BUFSIZE) {
233 			bp = getblk(ap->a_vp, uio->uio_offset, HAMMER_BUFSIZE,
234 				    0, 0);
235 		} else if (offset == 0 && uio->uio_offset >= ip->ino_rec.ino_size) {
236 			bp = getblk(ap->a_vp, uio->uio_offset, HAMMER_BUFSIZE,
237 				    0, 0);
238 			vfs_bio_clrbuf(bp);
239 		} else {
240 			error = bread(ap->a_vp, uio->uio_offset - offset,
241 				      HAMMER_BUFSIZE, &bp);
242 			if (error) {
243 				brelse(bp);
244 				break;
245 			}
246 		}
247 		n = HAMMER_BUFSIZE - offset;
248 		if (n > uio->uio_resid)
249 			n = uio->uio_resid;
250 		error = uiomove((char *)bp->b_data + offset, n, uio);
251 		if (error) {
252 			brelse(bp);
253 			break;
254 		}
255 		bp->b_flags |= B_CLUSTEROK;
256 		if (ip->ino_rec.ino_size < uio->uio_offset) {
257 			ip->ino_rec.ino_size = uio->uio_offset;
258 			ip->ino_rec.ino_mtime = trans.tid;
259 			hammer_modify_inode(&trans, ip,
260 				HAMMER_INODE_RDIRTY | HAMMER_INODE_ITIMES);
261 		}
262 		if (ap->a_ioflag & IO_SYNC) {
263 			bwrite(bp);
264 		} else if (ap->a_ioflag & IO_DIRECT) {
265 			bawrite(bp);
266 		} else {
267 			bdwrite(bp);
268 		}
269 	}
270 	if (error)
271 		hammer_abort_transaction(&trans);
272 	else
273 		hammer_commit_transaction(&trans);
274 	return (error);
275 }
276 
277 /*
278  * hammer_vop_access { vp, mode, cred }
279  */
280 static
281 int
282 hammer_vop_access(struct vop_access_args *ap)
283 {
284 	struct hammer_inode *ip = VTOI(ap->a_vp);
285 	uid_t uid;
286 	gid_t gid;
287 	int error;
288 
289 	uid = hammer_to_unix_xid(&ip->ino_data.uid);
290 	gid = hammer_to_unix_xid(&ip->ino_data.gid);
291 
292 	error = vop_helper_access(ap, uid, gid, ip->ino_data.mode,
293 				  ip->ino_data.uflags);
294 	return (error);
295 }
296 
297 /*
298  * hammer_vop_advlock { vp, id, op, fl, flags }
299  */
300 static
301 int
302 hammer_vop_advlock(struct vop_advlock_args *ap)
303 {
304 	struct hammer_inode *ip = VTOI(ap->a_vp);
305 
306 	return (lf_advlock(ap, &ip->advlock, ip->ino_rec.ino_size));
307 }
308 
309 /*
310  * hammer_vop_close { vp, fflag }
311  */
312 static
313 int
314 hammer_vop_close(struct vop_close_args *ap)
315 {
316 	return EOPNOTSUPP;
317 }
318 
319 /*
320  * hammer_vop_ncreate { nch, dvp, vpp, cred, vap }
321  *
322  * The operating system has already ensured that the directory entry
323  * does not exist and done all appropriate namespace locking.
324  */
325 static
326 int
327 hammer_vop_ncreate(struct vop_ncreate_args *ap)
328 {
329 	struct hammer_transaction trans;
330 	struct hammer_inode *dip;
331 	struct hammer_inode *nip;
332 	struct nchandle *nch;
333 	int error;
334 
335 	nch = ap->a_nch;
336 	dip = VTOI(ap->a_dvp);
337 
338 	/*
339 	 * Create a transaction to cover the operations we perform.
340 	 */
341 	hammer_start_transaction(&trans, dip->hmp);
342 
343 	/*
344 	 * Create a new filesystem object of the requested type.  The
345 	 * returned inode will be referenceds but not locked.
346 	 */
347 
348 	error = hammer_create_inode(&trans, ap->a_vap, ap->a_cred, dip, &nip);
349 	if (error) {
350 		hammer_abort_transaction(&trans);
351 		*ap->a_vpp = NULL;
352 		return (error);
353 	}
354 
355 	/*
356 	 * Add the new filesystem object to the directory.  This will also
357 	 * bump the inode's link count.
358 	 */
359 	error = hammer_add_directory(&trans, dip, nch->ncp, nip);
360 
361 	/*
362 	 * Finish up.
363 	 */
364 	if (error) {
365 		hammer_rel_inode(nip);
366 		hammer_abort_transaction(&trans);
367 		*ap->a_vpp = NULL;
368 	} else {
369 		hammer_commit_transaction(&trans);
370 		error = hammer_get_vnode(nip, LK_EXCLUSIVE, ap->a_vpp);
371 		hammer_rel_inode(nip);
372 	}
373 	return (error);
374 }
375 
376 /*
377  * hammer_vop_getattr { vp, vap }
378  */
379 static
380 int
381 hammer_vop_getattr(struct vop_getattr_args *ap)
382 {
383 	struct hammer_inode *ip = VTOI(ap->a_vp);
384 	struct vattr *vap = ap->a_vap;
385 
386 #if 0
387 	if (cache_check_fsmid_vp(ap->a_vp, &ip->fsmid) &&
388 	    (vp->v_mount->mnt_flag & MNT_RDONLY) == 0 &&
389 	    ip->obj_asof == 0
390 	) {
391 		/* LAZYMOD XXX */
392 	}
393 	hammer_itimes(ap->a_vp);
394 #endif
395 
396 	vap->va_fsid = ip->hmp->fsid_udev;
397 	vap->va_fileid = ip->ino_rec.base.base.obj_id;
398 	vap->va_mode = ip->ino_data.mode;
399 	vap->va_nlink = ip->ino_rec.ino_nlinks;
400 	vap->va_uid = hammer_to_unix_xid(&ip->ino_data.uid);
401 	vap->va_gid = hammer_to_unix_xid(&ip->ino_data.gid);
402 	vap->va_rmajor = 0;
403 	vap->va_rminor = 0;
404 	vap->va_size = ip->ino_rec.ino_size;
405 	hammer_to_timespec(ip->ino_rec.ino_atime, &vap->va_atime);
406 	hammer_to_timespec(ip->ino_rec.ino_mtime, &vap->va_mtime);
407 	hammer_to_timespec(ip->ino_data.ctime, &vap->va_ctime);
408 	vap->va_flags = ip->ino_data.uflags;
409 	vap->va_gen = 1;	/* hammer inums are unique for all time */
410 	vap->va_blocksize = 32768; /* XXX - extract from root volume */
411 	vap->va_bytes = ip->ino_rec.ino_size;
412 	vap->va_type = hammer_get_vnode_type(ip->ino_rec.base.base.obj_type);
413 	vap->va_filerev = 0; 	/* XXX */
414 	/* mtime uniquely identifies any adjustments made to the file */
415 	vap->va_fsmid = ip->ino_rec.ino_mtime;
416 	vap->va_uid_uuid = ip->ino_data.uid;
417 	vap->va_gid_uuid = ip->ino_data.gid;
418 	vap->va_fsid_uuid = ip->hmp->fsid;
419 	vap->va_vaflags = VA_UID_UUID_VALID | VA_GID_UUID_VALID |
420 			  VA_FSID_UUID_VALID;
421 	return(0);
422 }
423 
424 /*
425  * hammer_vop_nresolve { nch, dvp, cred }
426  *
427  * Locate the requested directory entry.
428  */
429 static
430 int
431 hammer_vop_nresolve(struct vop_nresolve_args *ap)
432 {
433 	struct namecache *ncp;
434 	struct hammer_inode *dip;
435 	struct hammer_cursor cursor;
436 	union hammer_record_ondisk *rec;
437 	struct vnode *vp;
438 	int64_t namekey;
439 	int error;
440 
441 	/*
442 	 * Calculate the namekey and setup the key range for the scan.  This
443 	 * works kinda like a chained hash table where the lower 32 bits
444 	 * of the namekey synthesize the chain.
445 	 *
446 	 * The key range is inclusive of both key_beg and key_end.
447 	 */
448 	dip = VTOI(ap->a_dvp);
449 	ncp = ap->a_nch->ncp;
450 	namekey = hammer_directory_namekey(ncp->nc_name, ncp->nc_nlen);
451 
452 	hammer_init_cursor_ip(&cursor, dip);
453         cursor.key_beg.obj_id = dip->obj_id;
454 	cursor.key_beg.key = namekey;
455         cursor.key_beg.create_tid = dip->obj_asof;
456         cursor.key_beg.delete_tid = 0;
457         cursor.key_beg.rec_type = HAMMER_RECTYPE_DIRENTRY;
458         cursor.key_beg.obj_type = 0;
459 
460 	cursor.key_end = cursor.key_beg;
461 	cursor.key_end.key |= 0xFFFFFFFFULL;
462 
463 	/*
464 	 * Scan all matching records (the chain), locate the one matching
465 	 * the requested path component.  info->last_error contains the
466 	 * error code on search termination and could be 0, ENOENT, or
467 	 * something else.
468 	 *
469 	 * The hammer_ip_*() functions merge in-memory records with on-disk
470 	 * records for the purposes of the search.
471 	 */
472 	rec = hammer_ip_first(&cursor, dip);
473 	while (rec) {
474 		if (hammer_ip_resolve_data(&cursor) != 0)  /* sets last_error */
475 			break;
476 		if (ncp->nc_nlen == rec->entry.base.data_len &&
477 		    bcmp(ncp->nc_name, (void *)cursor.data, ncp->nc_nlen) == 0) {
478 			break;
479 		}
480 		rec = hammer_ip_next(&cursor);
481 	}
482 	error = cursor.last_error;
483 	if (error == 0) {
484 		error = hammer_vfs_vget(dip->hmp->mp, rec->entry.obj_id, &vp);
485 		if (error == 0) {
486 			vn_unlock(vp);
487 			cache_setvp(ap->a_nch, vp);
488 			vrele(vp);
489 		}
490 	} else if (error == ENOENT) {
491 		cache_setvp(ap->a_nch, NULL);
492 	}
493 	hammer_done_cursor(&cursor);
494 	return (error);
495 }
496 
497 /*
498  * hammer_vop_nlookupdotdot { dvp, vpp, cred }
499  *
500  * Locate the parent directory of a directory vnode.
501  *
502  * dvp is referenced but not locked.  *vpp must be returned referenced and
503  * locked.  A parent_obj_id of 0 does not necessarily indicate that we are
504  * at the root, instead it could indicate that the directory we were in was
505  * removed.
506  */
507 static
508 int
509 hammer_vop_nlookupdotdot(struct vop_nlookupdotdot_args *ap)
510 {
511 	struct hammer_inode *dip;
512 	u_int64_t parent_obj_id;
513 
514 	dip = VTOI(ap->a_dvp);
515 	if ((parent_obj_id = dip->ino_data.parent_obj_id) == 0) {
516 		*ap->a_vpp = NULL;
517 		return ENOENT;
518 	}
519 	return(hammer_vfs_vget(dip->hmp->mp, parent_obj_id, ap->a_vpp));
520 }
521 
522 /*
523  * hammer_vop_nlink { nch, dvp, vp, cred }
524  */
525 static
526 int
527 hammer_vop_nlink(struct vop_nlink_args *ap)
528 {
529 	struct hammer_transaction trans;
530 	struct hammer_inode *dip;
531 	struct hammer_inode *ip;
532 	struct nchandle *nch;
533 	int error;
534 
535 	nch = ap->a_nch;
536 	dip = VTOI(ap->a_dvp);
537 	ip = VTOI(ap->a_vp);
538 
539 	/*
540 	 * Create a transaction to cover the operations we perform.
541 	 */
542 	hammer_start_transaction(&trans, dip->hmp);
543 
544 	/*
545 	 * Add the filesystem object to the directory.  Note that neither
546 	 * dip nor ip are referenced or locked, but their vnodes are
547 	 * referenced.  This function will bump the inode's link count.
548 	 */
549 	error = hammer_add_directory(&trans, dip, nch->ncp, ip);
550 
551 	/*
552 	 * Finish up.
553 	 */
554 	if (error) {
555 		hammer_abort_transaction(&trans);
556 	} else {
557 		hammer_commit_transaction(&trans);
558 	}
559 	return (error);
560 }
561 
562 /*
563  * hammer_vop_nmkdir { nch, dvp, vpp, cred, vap }
564  *
565  * The operating system has already ensured that the directory entry
566  * does not exist and done all appropriate namespace locking.
567  */
568 static
569 int
570 hammer_vop_nmkdir(struct vop_nmkdir_args *ap)
571 {
572 	struct hammer_transaction trans;
573 	struct hammer_inode *dip;
574 	struct hammer_inode *nip;
575 	struct nchandle *nch;
576 	int error;
577 
578 	nch = ap->a_nch;
579 	dip = VTOI(ap->a_dvp);
580 
581 	/*
582 	 * Create a transaction to cover the operations we perform.
583 	 */
584 	hammer_start_transaction(&trans, dip->hmp);
585 
586 	/*
587 	 * Create a new filesystem object of the requested type.  The
588 	 * returned inode will be referenced but not locked.
589 	 */
590 	error = hammer_create_inode(&trans, ap->a_vap, ap->a_cred, dip, &nip);
591 	if (error) {
592 		hammer_abort_transaction(&trans);
593 		*ap->a_vpp = NULL;
594 		return (error);
595 	}
596 
597 	/*
598 	 * Add the new filesystem object to the directory.  This will also
599 	 * bump the inode's link count.
600 	 */
601 	error = hammer_add_directory(&trans, dip, nch->ncp, nip);
602 
603 	/*
604 	 * Finish up.
605 	 */
606 	if (error) {
607 		hammer_rel_inode(nip);
608 		hammer_abort_transaction(&trans);
609 		*ap->a_vpp = NULL;
610 	} else {
611 		hammer_commit_transaction(&trans);
612 		error = hammer_get_vnode(nip, LK_EXCLUSIVE, ap->a_vpp);
613 		hammer_rel_inode(nip);
614 	}
615 	return (error);
616 }
617 
618 /*
619  * hammer_vop_nmknod { nch, dvp, vpp, cred, vap }
620  *
621  * The operating system has already ensured that the directory entry
622  * does not exist and done all appropriate namespace locking.
623  */
624 static
625 int
626 hammer_vop_nmknod(struct vop_nmknod_args *ap)
627 {
628 	struct hammer_transaction trans;
629 	struct hammer_inode *dip;
630 	struct hammer_inode *nip;
631 	struct nchandle *nch;
632 	int error;
633 
634 	nch = ap->a_nch;
635 	dip = VTOI(ap->a_dvp);
636 
637 	/*
638 	 * Create a transaction to cover the operations we perform.
639 	 */
640 	hammer_start_transaction(&trans, dip->hmp);
641 
642 	/*
643 	 * Create a new filesystem object of the requested type.  The
644 	 * returned inode will be referenced but not locked.
645 	 */
646 	error = hammer_create_inode(&trans, ap->a_vap, ap->a_cred, dip, &nip);
647 	if (error) {
648 		hammer_abort_transaction(&trans);
649 		*ap->a_vpp = NULL;
650 		return (error);
651 	}
652 
653 	/*
654 	 * Add the new filesystem object to the directory.  This will also
655 	 * bump the inode's link count.
656 	 */
657 	error = hammer_add_directory(&trans, dip, nch->ncp, nip);
658 
659 	/*
660 	 * Finish up.
661 	 */
662 	if (error) {
663 		hammer_rel_inode(nip);
664 		hammer_abort_transaction(&trans);
665 		*ap->a_vpp = NULL;
666 	} else {
667 		hammer_commit_transaction(&trans);
668 		error = hammer_get_vnode(nip, LK_EXCLUSIVE, ap->a_vpp);
669 		hammer_rel_inode(nip);
670 	}
671 	return (error);
672 }
673 
674 /*
675  * hammer_vop_open { vp, mode, cred, fp }
676  */
677 static
678 int
679 hammer_vop_open(struct vop_open_args *ap)
680 {
681 	return EOPNOTSUPP;
682 }
683 
684 /*
685  * hammer_vop_pathconf { vp, name, retval }
686  */
687 static
688 int
689 hammer_vop_pathconf(struct vop_pathconf_args *ap)
690 {
691 	return EOPNOTSUPP;
692 }
693 
694 /*
695  * hammer_vop_print { vp }
696  */
697 static
698 int
699 hammer_vop_print(struct vop_print_args *ap)
700 {
701 	return EOPNOTSUPP;
702 }
703 
704 /*
705  * hammer_vop_readdir { vp, uio, cred, *eofflag }
706  */
707 static
708 int
709 hammer_vop_readdir(struct vop_readdir_args *ap)
710 {
711 	return EOPNOTSUPP;
712 }
713 
714 /*
715  * hammer_vop_readlink { vp, uio, cred }
716  */
717 static
718 int
719 hammer_vop_readlink(struct vop_readlink_args *ap)
720 {
721 	return EOPNOTSUPP;
722 }
723 
724 /*
725  * hammer_vop_nremove { nch, dvp, cred }
726  */
727 static
728 int
729 hammer_vop_nremove(struct vop_nremove_args *ap)
730 {
731 	return(hammer_dounlink(ap->a_nch, ap->a_dvp, ap->a_cred, 0));
732 }
733 
734 /*
735  * hammer_vop_nrename { fnch, tnch, fdvp, tdvp, cred }
736  */
737 static
738 int
739 hammer_vop_nrename(struct vop_nrename_args *ap)
740 {
741 	struct hammer_transaction trans;
742 	struct namecache *fncp;
743 	struct namecache *tncp;
744 	struct hammer_inode *fdip;
745 	struct hammer_inode *tdip;
746 	struct hammer_inode *ip;
747 	struct hammer_cursor cursor;
748 	union hammer_record_ondisk *rec;
749 	int64_t namekey;
750 	int error;
751 
752 	fdip = VTOI(ap->a_fdvp);
753 	tdip = VTOI(ap->a_tdvp);
754 	fncp = ap->a_fnch->ncp;
755 	tncp = ap->a_tnch->ncp;
756 	hammer_start_transaction(&trans, fdip->hmp);
757 
758 	/*
759 	 * Extract the hammer_inode from fncp and add link to the target
760 	 * directory.
761 	 */
762 	ip = VTOI(fncp->nc_vp);
763 	KKASSERT(ip != NULL);
764 
765 	error = hammer_add_directory(&trans, tdip, tncp, ip);
766 
767 	/*
768 	 * Locate the record in the originating directory and remove it.
769 	 *
770 	 * Calculate the namekey and setup the key range for the scan.  This
771 	 * works kinda like a chained hash table where the lower 32 bits
772 	 * of the namekey synthesize the chain.
773 	 *
774 	 * The key range is inclusive of both key_beg and key_end.
775 	 */
776 	namekey = hammer_directory_namekey(fncp->nc_name, fncp->nc_nlen);
777 
778 	hammer_init_cursor_ip(&cursor, fdip);
779         cursor.key_beg.obj_id = fdip->obj_id;
780 	cursor.key_beg.key = namekey;
781         cursor.key_beg.create_tid = fdip->obj_asof;
782         cursor.key_beg.delete_tid = 0;
783         cursor.key_beg.rec_type = HAMMER_RECTYPE_DIRENTRY;
784         cursor.key_beg.obj_type = 0;
785 
786 	cursor.key_end = cursor.key_beg;
787 	cursor.key_end.key |= 0xFFFFFFFFULL;
788 
789 	/*
790 	 * Scan all matching records (the chain), locate the one matching
791 	 * the requested path component.  info->last_error contains the
792 	 * error code on search termination and could be 0, ENOENT, or
793 	 * something else.
794 	 *
795 	 * The hammer_ip_*() functions merge in-memory records with on-disk
796 	 * records for the purposes of the search.
797 	 */
798 	rec = hammer_ip_first(&cursor, fdip);
799 	while (rec) {
800 		if (hammer_ip_resolve_data(&cursor) != 0)
801 			break;
802 		if (fncp->nc_nlen == rec->entry.base.data_len &&
803 		    bcmp(fncp->nc_name, cursor.data, fncp->nc_nlen) == 0) {
804 			break;
805 		}
806 		rec = hammer_ip_next(&cursor);
807 	}
808 	error = cursor.last_error;
809 
810 	/*
811 	 * If all is ok we have to get the inode so we can adjust nlinks.
812 	 */
813 	if (error)
814 		goto done;
815 	error = hammer_del_directory(&trans, &cursor, fdip, ip);
816 	if (error == 0) {
817 		cache_rename(ap->a_fnch, ap->a_tnch);
818 		cache_setvp(ap->a_tnch, ip->vp);
819 	}
820 done:
821 	if (error == 0) {
822 		hammer_commit_transaction(&trans);
823 	} else {
824 		hammer_abort_transaction(&trans);
825 	}
826         hammer_done_cursor(&cursor);
827 	return (error);
828 }
829 
830 /*
831  * hammer_vop_nrmdir { nch, dvp, cred }
832  */
833 static
834 int
835 hammer_vop_nrmdir(struct vop_nrmdir_args *ap)
836 {
837 	/* XXX check that directory is empty */
838 
839 	return(hammer_dounlink(ap->a_nch, ap->a_dvp, ap->a_cred, 0));
840 }
841 
842 /*
843  * hammer_vop_setattr { vp, vap, cred }
844  */
845 static
846 int
847 hammer_vop_setattr(struct vop_setattr_args *ap)
848 {
849 	struct hammer_transaction trans;
850 	struct vattr *vap;
851 	struct hammer_inode *ip;
852 	int modflags;
853 	int error;
854 	u_int32_t flags;
855 	uuid_t uuid;
856 
857 	vap = ap->a_vap;
858 	ip = ap->a_vp->v_data;
859 	modflags = 0;
860 
861 	if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)
862 		return(EROFS);
863 
864 	hammer_start_transaction(&trans, ip->hmp);
865 	error = 0;
866 
867 	if (vap->va_flags != VNOVAL) {
868 		flags = ip->ino_data.uflags;
869 		error = vop_helper_setattr_flags(&flags, vap->va_flags,
870 					 hammer_to_unix_xid(&ip->ino_data.uid),
871 					 ap->a_cred);
872 		if (error == 0) {
873 			if (ip->ino_data.uflags != flags) {
874 				ip->ino_data.uflags = flags;
875 				modflags |= HAMMER_INODE_DDIRTY;
876 			}
877 			if (ip->ino_data.uflags & (IMMUTABLE | APPEND)) {
878 				error = 0;
879 				goto done;
880 			}
881 		}
882 		goto done;
883 	}
884 	if (ip->ino_data.uflags & (IMMUTABLE | APPEND)) {
885 		error = EPERM;
886 		goto done;
887 	}
888 	if (vap->va_uid != (uid_t)VNOVAL) {
889 		hammer_guid_to_uuid(&uuid, vap->va_uid);
890 		if (bcmp(&uuid, &ip->ino_data.uid, sizeof(uuid)) == 0) {
891 			ip->ino_data.uid = uuid;
892 			modflags |= HAMMER_INODE_DDIRTY;
893 		}
894 	}
895 	if (vap->va_gid != (uid_t)VNOVAL) {
896 		hammer_guid_to_uuid(&uuid, vap->va_uid);
897 		if (bcmp(&uuid, &ip->ino_data.gid, sizeof(uuid)) == 0) {
898 			ip->ino_data.gid = uuid;
899 			modflags |= HAMMER_INODE_DDIRTY;
900 		}
901 	}
902 	if (vap->va_size != VNOVAL) {
903 		switch(ap->a_vp->v_type) {
904 		case VREG:
905 		case VDATABASE:
906 			error = hammer_delete_range(&trans, ip,
907 						    vap->va_size,
908 						    0x7FFFFFFFFFFFFFFFLL);
909 			break;
910 		default:
911 			error = EINVAL;
912 			goto done;
913 		}
914 	}
915 	if (vap->va_atime.tv_sec != VNOVAL) {
916 		ip->ino_rec.ino_atime =
917 			hammer_timespec_to_transid(&vap->va_atime);
918 		modflags |= HAMMER_INODE_ITIMES;
919 	}
920 	if (vap->va_mtime.tv_sec != VNOVAL) {
921 		ip->ino_rec.ino_mtime =
922 			hammer_timespec_to_transid(&vap->va_mtime);
923 		modflags |= HAMMER_INODE_ITIMES;
924 	}
925 	if (vap->va_mode != (mode_t)VNOVAL) {
926 		if (ip->ino_data.mode != vap->va_mode) {
927 			ip->ino_data.mode = vap->va_mode;
928 			modflags |= HAMMER_INODE_DDIRTY;
929 		}
930 	}
931 done:
932 	if (error) {
933 		hammer_abort_transaction(&trans);
934 	} else {
935 		if (modflags)
936 			hammer_modify_inode(&trans, ip, modflags);
937 		hammer_commit_transaction(&trans);
938 	}
939 	return (error);
940 }
941 
942 /*
943  * hammer_vop_nsymlink { nch, dvp, vpp, cred, vap, target }
944  */
945 static
946 int
947 hammer_vop_nsymlink(struct vop_nsymlink_args *ap)
948 {
949 	return EOPNOTSUPP;
950 }
951 
952 /*
953  * hammer_vop_nwhiteout { nch, dvp, cred, flags }
954  */
955 static
956 int
957 hammer_vop_nwhiteout(struct vop_nwhiteout_args *ap)
958 {
959 	return(hammer_dounlink(ap->a_nch, ap->a_dvp, ap->a_cred, ap->a_flags));
960 }
961 
962 /*
963  * hammer_vop_strategy { vp, bio }
964  *
965  * Strategy call, used for regular file read & write only.  Note that the
966  * bp may represent a cluster.
967  *
968  * To simplify operation and allow better optimizations in the future,
969  * this code does not make any assumptions with regards to buffer alignment
970  * or size.
971  */
972 static
973 int
974 hammer_vop_strategy(struct vop_strategy_args *ap)
975 {
976 	struct buf *bp;
977 	int error;
978 
979 	bp = ap->a_bio->bio_buf;
980 
981 	switch(bp->b_cmd) {
982 	case BUF_CMD_READ:
983 		error = hammer_vop_strategy_read(ap);
984 		break;
985 	case BUF_CMD_WRITE:
986 		error = hammer_vop_strategy_write(ap);
987 		break;
988 	default:
989 		error = EINVAL;
990 		break;
991 	}
992 	bp->b_error = error;
993 	if (error)
994 		bp->b_flags |= B_ERROR;
995 	biodone(ap->a_bio);
996 	return (error);
997 }
998 
999 /*
1000  * Read from a regular file.  Iterate the related records and fill in the
1001  * BIO/BUF.  Gaps are zero-filled.
1002  *
1003  * The support code in hammer_object.c should be used to deal with mixed
1004  * in-memory and on-disk records.
1005  *
1006  * XXX atime update
1007  */
1008 static
1009 int
1010 hammer_vop_strategy_read(struct vop_strategy_args *ap)
1011 {
1012 	struct hammer_inode *ip = ap->a_vp->v_data;
1013 	struct hammer_cursor cursor;
1014 	hammer_record_ondisk_t rec;
1015 	hammer_base_elm_t base;
1016 	struct bio *bio;
1017 	struct buf *bp;
1018 	int64_t rec_offset;
1019 	int error;
1020 	int boff;
1021 	int roff;
1022 	int n;
1023 
1024 	bio = ap->a_bio;
1025 	bp = bio->bio_buf;
1026 
1027 	hammer_init_cursor_ip(&cursor, ip);
1028 
1029 	/*
1030 	 * Key range (begin and end inclusive) to scan.  Note that the key's
1031 	 * stored in the actual records represent the
1032 	 */
1033 	cursor.key_beg.obj_id = ip->obj_id;
1034 	cursor.key_beg.create_tid = ip->obj_asof;
1035 	cursor.key_beg.delete_tid = 0;
1036 	cursor.key_beg.rec_type = HAMMER_RECTYPE_DATA;
1037 	cursor.key_beg.obj_type = 0;
1038 	cursor.key_beg.key = bio->bio_offset;
1039 
1040 	cursor.key_end = cursor.key_beg;
1041 	cursor.key_end.key = bio->bio_offset + bp->b_bufsize - 1;
1042 
1043 	rec = hammer_ip_first(&cursor, ip);
1044 	boff = 0;
1045 
1046 	while (rec) {
1047 		if (hammer_ip_resolve_data(&cursor) != 0)
1048 			break;
1049 		base = &rec->base.base;
1050 
1051 		rec_offset = base->key - rec->data.base.data_len;
1052 
1053 		/*
1054 		 * Zero-fill any gap
1055 		 */
1056 		n = (int)(rec_offset - (bio->bio_offset + boff));
1057 		if (n > 0) {
1058 			kprintf("zfill %d bytes\n", n);
1059 			bzero((char *)bp->b_data + boff, n);
1060 			boff += n;
1061 			n = 0;
1062 		}
1063 
1064 		/*
1065 		 * Calculate the data offset in the record and the number
1066 		 * of bytes we can copy.
1067 		 */
1068 		roff = -n;
1069 		n = rec->data.base.data_len - roff;
1070 		KKASSERT(n > 0);
1071 		if (n > bp->b_bufsize - boff)
1072 			n = bp->b_bufsize - boff;
1073 		bcopy((char *)cursor.data + roff, (char *)bp->b_data + boff, n);
1074 		boff += n;
1075 		if (boff == bp->b_bufsize)
1076 			break;
1077 		rec = hammer_ip_next(&cursor);
1078 	}
1079 	hammer_done_cursor(&cursor);
1080 
1081 	/*
1082 	 * There may have been a gap after the last record
1083 	 */
1084 	error = cursor.last_error;
1085 	if (error == ENOENT)
1086 		error = 0;
1087 	if (error == 0 && boff != bp->b_bufsize) {
1088 		bzero((char *)bp->b_data + boff, bp->b_bufsize - boff);
1089 		/* boff = bp->b_bufsize; */
1090 	}
1091 	bp->b_resid = 0;
1092 	return(error);
1093 }
1094 
1095 /*
1096  * Write to a regular file.  Iterate the related records and mark for
1097  * deletion.  If existing edge records (left and right side) overlap our
1098  * write they have to be marked deleted and new records created, usually
1099  * referencing a portion of the original data.  Then add a record to
1100  * represent the buffer.
1101  *
1102  * The support code in hammer_object.c should be used to deal with mixed
1103  * in-memory and on-disk records.
1104  */
1105 static
1106 int
1107 hammer_vop_strategy_write(struct vop_strategy_args *ap)
1108 {
1109 	struct hammer_transaction trans;
1110 	hammer_inode_t ip;
1111 	struct bio *bio;
1112 	struct buf *bp;
1113 	int error;
1114 
1115 	bio = ap->a_bio;
1116 	bp = bio->bio_buf;
1117 	ip = ap->a_vp->v_data;
1118 	hammer_start_transaction(&trans, ip->hmp);
1119 
1120 	/*
1121 	 * Delete any records overlapping our range.  This function will
1122 	 * properly
1123 	 */
1124 	error = hammer_delete_range(&trans, ip, bio->bio_offset,
1125 				    bio->bio_offset + bp->b_bufsize - 1);
1126 
1127 	/*
1128 	 * Add a single record to cover the write
1129 	 */
1130 	if (error == 0) {
1131 		error = hammer_add_data(&trans, ip, bio->bio_offset,
1132 					bp->b_data, bp->b_bufsize);
1133 	}
1134 
1135 	/*
1136 	 * If an error occured abort the transaction
1137 	 */
1138 	if (error) {
1139 		/* XXX undo deletion */
1140 		hammer_abort_transaction(&trans);
1141 		bp->b_resid = bp->b_bufsize;
1142 	} else {
1143 		hammer_commit_transaction(&trans);
1144 		bp->b_resid = 0;
1145 	}
1146 	return(error);
1147 }
1148 
1149 /*
1150  * dounlink - disconnect a directory entry
1151  *
1152  * XXX whiteout support not really in yet
1153  */
1154 static int
1155 hammer_dounlink(struct nchandle *nch, struct vnode *dvp, struct ucred *cred,
1156 		int flags)
1157 {
1158 	struct hammer_transaction trans;
1159 	struct namecache *ncp;
1160 	hammer_inode_t dip;
1161 	hammer_inode_t ip;
1162 	hammer_record_ondisk_t rec;
1163 	struct hammer_cursor cursor;
1164 	struct vnode *vp;
1165 	int64_t namekey;
1166 	int error;
1167 
1168 	/*
1169 	 * Calculate the namekey and setup the key range for the scan.  This
1170 	 * works kinda like a chained hash table where the lower 32 bits
1171 	 * of the namekey synthesize the chain.
1172 	 *
1173 	 * The key range is inclusive of both key_beg and key_end.
1174 	 */
1175 	dip = VTOI(dvp);
1176 	ncp = nch->ncp;
1177 	namekey = hammer_directory_namekey(ncp->nc_name, ncp->nc_nlen);
1178 
1179 	hammer_init_cursor_ip(&cursor, dip);
1180         cursor.key_beg.obj_id = dip->obj_id;
1181 	cursor.key_beg.key = namekey;
1182         cursor.key_beg.create_tid = dip->obj_asof;
1183         cursor.key_beg.delete_tid = 0;
1184         cursor.key_beg.rec_type = HAMMER_RECTYPE_DIRENTRY;
1185         cursor.key_beg.obj_type = 0;
1186 
1187 	cursor.key_end = cursor.key_beg;
1188 	cursor.key_end.key |= 0xFFFFFFFFULL;
1189 
1190 	hammer_start_transaction(&trans, dip->hmp);
1191 
1192 	/*
1193 	 * Scan all matching records (the chain), locate the one matching
1194 	 * the requested path component.  info->last_error contains the
1195 	 * error code on search termination and could be 0, ENOENT, or
1196 	 * something else.
1197 	 *
1198 	 * The hammer_ip_*() functions merge in-memory records with on-disk
1199 	 * records for the purposes of the search.
1200 	 */
1201 	rec = hammer_ip_first(&cursor, dip);
1202 	while (rec) {
1203 		if (hammer_ip_resolve_data(&cursor) != 0)
1204 			break;
1205 		if (ncp->nc_nlen == rec->entry.base.data_len &&
1206 		    bcmp(ncp->nc_name, cursor.data, ncp->nc_nlen) == 0) {
1207 			break;
1208 		}
1209 		rec = hammer_ip_next(&cursor);
1210 	}
1211 	error = cursor.last_error;
1212 
1213 	/*
1214 	 * If all is ok we have to get the inode so we can adjust nlinks.
1215 	 */
1216 	if (error == 0) {
1217 		ip = hammer_get_inode(dip->hmp, rec->entry.obj_id, &error);
1218 		if (error == 0)
1219 			error = hammer_del_directory(&trans, &cursor, dip, ip);
1220 		if (error == 0) {
1221 			cache_setunresolved(nch);
1222 			cache_setvp(nch, NULL);
1223 			/* XXX locking */
1224 			if (ip->vp)
1225 				cache_inval_vp(ip->vp, CINV_DESTROY);
1226 		}
1227 		hammer_rel_inode(ip);
1228 
1229 		error = hammer_vfs_vget(dip->hmp->mp, rec->entry.obj_id, &vp);
1230 		if (error == 0) {
1231 			vn_unlock(vp);
1232 			cache_setvp(nch, vp);
1233 			vrele(vp);
1234 			hammer_commit_transaction(&trans);
1235 		} else {
1236 			hammer_abort_transaction(&trans);
1237 		}
1238 	}
1239         hammer_done_cursor(&cursor);
1240 	return (error);
1241 }
1242 
1243