xref: /dragonfly/sys/vfs/hammer2/hammer2_inode.c (revision 51871435)
1 /*
2  * Copyright (c) 2011-2012 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@dragonflybsd.org>
6  * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in
16  *    the documentation and/or other materials provided with the
17  *    distribution.
18  * 3. Neither the name of The DragonFly Project nor the names of its
19  *    contributors may be used to endorse or promote products derived
20  *    from this software without specific, prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
26  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
28  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 #include <sys/cdefs.h>
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/types.h>
39 #include <sys/lock.h>
40 #include <sys/uuid.h>
41 
42 #include "hammer2.h"
43 
44 /*
45  * Adding a ref to an inode is only legal if the inode already has at least
46  * one ref.
47  */
48 void
49 hammer2_inode_ref(hammer2_inode_t *ip)
50 {
51 	hammer2_chain_ref(ip->hmp, &ip->chain);
52 }
53 
54 /*
55  * Drop an inode reference, freeing the inode when the last reference goes
56  * away.
57  */
58 void
59 hammer2_inode_drop(hammer2_inode_t *ip)
60 {
61 	hammer2_chain_drop(ip->hmp, &ip->chain);
62 }
63 
64 /*
65  * Get the vnode associated with the given inode, allocating the vnode if
66  * necessary.  The vnode will be returned exclusively locked.
67  *
68  * The caller must lock the inode (shared or exclusive).
69  *
70  * Great care must be taken to avoid deadlocks and vnode acquisition/reclaim
71  * races.
72  */
73 struct vnode *
74 hammer2_igetv(hammer2_inode_t *ip, int *errorp)
75 {
76 	struct vnode *vp;
77 	hammer2_pfsmount_t *pmp;
78 	ccms_state_t ostate;
79 
80 	pmp = ip->pmp;
81 	KKASSERT(pmp != NULL);
82 	*errorp = 0;
83 
84 	for (;;) {
85 		/*
86 		 * Attempt to reuse an existing vnode assignment.  It is
87 		 * possible to race a reclaim so the vget() may fail.  The
88 		 * inode must be unlocked during the vget() to avoid a
89 		 * deadlock against a reclaim.
90 		 */
91 		vp = ip->vp;
92 		if (vp) {
93 			/*
94 			 * Inode must be unlocked during the vget() to avoid
95 			 * possible deadlocks, vnode is held to prevent
96 			 * destruction during the vget().  The vget() can
97 			 * still fail if we lost a reclaim race on the vnode.
98 			 */
99 			vhold_interlocked(vp);
100 			ccms_thread_unlock(&ip->chain.cst);
101 			if (vget(vp, LK_EXCLUSIVE)) {
102 				vdrop(vp);
103 				ccms_thread_lock(&ip->chain.cst,
104 						 CCMS_STATE_EXCLUSIVE);
105 				continue;
106 			}
107 			ccms_thread_lock(&ip->chain.cst, CCMS_STATE_EXCLUSIVE);
108 			vdrop(vp);
109 			/* vp still locked and ref from vget */
110 			if (ip->vp != vp) {
111 				kprintf("hammer2: igetv race %p/%p\n",
112 					ip->vp, vp);
113 				vput(vp);
114 				continue;
115 			}
116 			*errorp = 0;
117 			break;
118 		}
119 
120 		/*
121 		 * No vnode exists, allocate a new vnode.  Beware of
122 		 * allocation races.  This function will return an
123 		 * exclusively locked and referenced vnode.
124 		 */
125 		*errorp = getnewvnode(VT_HAMMER2, pmp->mp, &vp, 0, 0);
126 		if (*errorp) {
127 			kprintf("hammer2: igetv getnewvnode failed %d\n",
128 				*errorp);
129 			vp = NULL;
130 			break;
131 		}
132 
133 		/*
134 		 * Lock the inode and check for an allocation race.
135 		 */
136 		ostate = ccms_thread_lock_upgrade(&ip->chain.cst);
137 		if (ip->vp != NULL) {
138 			vp->v_type = VBAD;
139 			vx_put(vp);
140 			ccms_thread_lock_restore(&ip->chain.cst, ostate);
141 			continue;
142 		}
143 
144 		switch (ip->ip_data.type) {
145 		case HAMMER2_OBJTYPE_DIRECTORY:
146 			vp->v_type = VDIR;
147 			break;
148 		case HAMMER2_OBJTYPE_REGFILE:
149 			vp->v_type = VREG;
150 			vinitvmio(vp, ip->ip_data.size,
151 				  HAMMER2_LBUFSIZE,
152 				  (int)ip->ip_data.size & HAMMER2_LBUFMASK);
153 			break;
154 		case HAMMER2_OBJTYPE_SOFTLINK:
155 			/*
156 			 * XXX for now we are using the generic file_read
157 			 * and file_write code so we need a buffer cache
158 			 * association.
159 			 */
160 			vp->v_type = VLNK;
161 			vinitvmio(vp, ip->ip_data.size,
162 				  HAMMER2_LBUFSIZE,
163 				  (int)ip->ip_data.size & HAMMER2_LBUFMASK);
164 			break;
165 		/* XXX FIFO */
166 		default:
167 			panic("hammer2: unhandled objtype %d",
168 			      ip->ip_data.type);
169 			break;
170 		}
171 
172 		if (ip == pmp->iroot)
173 			vsetflags(vp, VROOT);
174 
175 		vp->v_data = ip;
176 		ip->vp = vp;
177 		hammer2_chain_ref(ip->hmp, &ip->chain);	/* vp association */
178 		ccms_thread_lock_restore(&ip->chain.cst, ostate);
179 		break;
180 	}
181 
182 	/*
183 	 * Return non-NULL vp and *errorp == 0, or NULL vp and *errorp != 0.
184 	 */
185 	if (hammer2_debug & 0x0002) {
186 		kprintf("igetv vp %p refs %d aux %d\n",
187 			vp, vp->v_sysref.refcnt, vp->v_auxrefs);
188 	}
189 	return (vp);
190 }
191 
192 /*
193  * Create a new inode in the specified directory using the vattr to
194  * figure out the type of inode.
195  *
196  * If no error occurs the new inode with its chain locked is returned in
197  * *nipp, otherwise an error is returned and *nipp is set to NULL.
198  *
199  * If vap and/or cred are NULL the related fields are not set and the
200  * inode type defaults to a directory.  This is used when creating PFSs
201  * under the super-root, so the inode number is set to 1 in this case.
202  */
203 int
204 hammer2_inode_create(hammer2_inode_t *dip,
205 		     struct vattr *vap, struct ucred *cred,
206 		     const uint8_t *name, size_t name_len,
207 		     hammer2_inode_t **nipp)
208 {
209 	hammer2_mount_t *hmp = dip->hmp;
210 	hammer2_chain_t *chain;
211 	hammer2_chain_t *parent;
212 	hammer2_inode_t *nip;
213 	hammer2_key_t lhc;
214 	int error;
215 	uid_t xuid;
216 
217 	lhc = hammer2_dirhash(name, name_len);
218 
219 	/*
220 	 * Locate the inode or indirect block to create the new
221 	 * entry in.  At the same time check for key collisions
222 	 * and iterate until we don't get one.
223 	 */
224 retry:
225 	parent = &dip->chain;
226 	hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS);
227 
228 	error = 0;
229 	while (error == 0) {
230 		chain = hammer2_chain_lookup(hmp, &parent, lhc, lhc, 0);
231 		if (chain == NULL)
232 			break;
233 		if ((lhc & HAMMER2_DIRHASH_VISIBLE) == 0)
234 			error = ENOSPC;
235 		if ((lhc & HAMMER2_DIRHASH_LOMASK) == HAMMER2_DIRHASH_LOMASK)
236 			error = ENOSPC;
237 		hammer2_chain_unlock(hmp, chain);
238 		chain = NULL;
239 		++lhc;
240 	}
241 	if (error == 0) {
242 		chain = hammer2_chain_create(hmp, parent, NULL, lhc, 0,
243 					     HAMMER2_BREF_TYPE_INODE,
244 					     HAMMER2_INODE_BYTES,
245 					     &error);
246 	}
247 	hammer2_chain_unlock(hmp, parent);
248 
249 	/*
250 	 * Handle the error case
251 	 */
252 	if (error) {
253 		KKASSERT(chain == NULL);
254 		if (error == EAGAIN) {
255 			hammer2_chain_wait(hmp, parent);
256 			goto retry;
257 		}
258 		*nipp = NULL;
259 		return (error);
260 	}
261 
262 	/*
263 	 * Set up the new inode
264 	 */
265 	nip = chain->u.ip;
266 	*nipp = nip;
267 
268 	hammer2_voldata_lock(hmp);
269 	if (vap) {
270 		nip->ip_data.type = hammer2_get_obj_type(vap->va_type);
271 		nip->ip_data.inum = hmp->voldata.alloc_tid++;
272 		/* XXX modify/lock */
273 	} else {
274 		nip->ip_data.type = HAMMER2_OBJTYPE_DIRECTORY;
275 		nip->ip_data.inum = 1;
276 	}
277 	hammer2_voldata_unlock(hmp);
278 	nip->ip_data.version = HAMMER2_INODE_VERSION_ONE;
279 	hammer2_update_time(&nip->ip_data.ctime);
280 	nip->ip_data.mtime = nip->ip_data.ctime;
281 	if (vap)
282 		nip->ip_data.mode = vap->va_mode;
283 	nip->ip_data.nlinks = 1;
284 	if (vap) {
285 		if (dip) {
286 			xuid = hammer2_to_unix_xid(&dip->ip_data.uid);
287 			xuid = vop_helper_create_uid(dip->pmp->mp,
288 						     dip->ip_data.mode,
289 						     xuid,
290 						     cred,
291 						     &vap->va_mode);
292 		} else {
293 			xuid = 0;
294 		}
295 		if (vap->va_vaflags & VA_UID_UUID_VALID)
296 			nip->ip_data.uid = vap->va_uid_uuid;
297 		else if (vap->va_uid != (uid_t)VNOVAL)
298 			hammer2_guid_to_uuid(&nip->ip_data.uid, vap->va_uid);
299 		else
300 			hammer2_guid_to_uuid(&nip->ip_data.uid, xuid);
301 
302 		if (vap->va_vaflags & VA_GID_UUID_VALID)
303 			nip->ip_data.gid = vap->va_gid_uuid;
304 		else if (vap->va_gid != (gid_t)VNOVAL)
305 			hammer2_guid_to_uuid(&nip->ip_data.gid, vap->va_gid);
306 		else if (dip)
307 			nip->ip_data.gid = dip->ip_data.gid;
308 	}
309 
310 	/*
311 	 * Regular files and softlinks allow a small amount of data to be
312 	 * directly embedded in the inode.  This flag will be cleared if
313 	 * the size is extended past the embedded limit.
314 	 */
315 	if (nip->ip_data.type == HAMMER2_OBJTYPE_REGFILE ||
316 	    nip->ip_data.type == HAMMER2_OBJTYPE_SOFTLINK) {
317 		nip->ip_data.op_flags |= HAMMER2_OPFLAG_DIRECTDATA;
318 	}
319 
320 	KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
321 	bcopy(name, nip->ip_data.filename, name_len);
322 	nip->ip_data.name_key = lhc;
323 	nip->ip_data.name_len = name_len;
324 
325 	return (0);
326 }
327 
328 /*
329  * Duplicate the specified existing inode in the specified target directory.
330  * If name is NULL the inode is duplicated as a hidden directory entry.
331  *
332  * Returns the new inode.  The old inode is left alone.
333  *
334  * XXX name needs to be NULL for now.
335  */
336 int
337 hammer2_inode_duplicate(hammer2_inode_t *dip, hammer2_inode_t *oip,
338 			hammer2_inode_t **nipp,
339 			const uint8_t *name, size_t name_len)
340 {
341 	hammer2_mount_t *hmp = dip->hmp;
342 	hammer2_inode_t *nip;
343 	hammer2_chain_t *parent;
344 	hammer2_chain_t *chain;
345 	hammer2_key_t lhc;
346 	int error;
347 
348 	if (name) {
349 		lhc = hammer2_dirhash(name, name_len);
350 	} else {
351 		lhc = oip->ip_data.inum;
352 		KKASSERT((lhc & HAMMER2_DIRHASH_VISIBLE) == 0);
353 	}
354 
355 	/*
356 	 * Locate the inode or indirect block to create the new
357 	 * entry in.  At the same time check for key collisions
358 	 * and iterate until we don't get one.
359 	 */
360 	nip = NULL;
361 retry:
362 	parent = &dip->chain;
363 	hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS);
364 
365 	error = 0;
366 	while (error == 0) {
367 		chain = hammer2_chain_lookup(hmp, &parent, lhc, lhc, 0);
368 		if (chain == NULL)
369 			break;
370 		/* XXX bcmp name if not NULL */
371 		if ((lhc & HAMMER2_DIRHASH_LOMASK) == HAMMER2_DIRHASH_LOMASK)
372 			error = ENOSPC;
373 		if ((lhc & HAMMER2_DIRHASH_VISIBLE) == 0) /* shouldn't happen */
374 			error = ENOSPC;
375 		hammer2_chain_unlock(hmp, chain);
376 		chain = NULL;
377 		++lhc;
378 	}
379 
380 	/*
381 	 * Create entry in common parent directory.
382 	 */
383 	if (error == 0) {
384 		chain = hammer2_chain_create(hmp, parent, NULL, lhc, 0,
385 					     HAMMER2_BREF_TYPE_INODE, /* n/a */
386 					     HAMMER2_INODE_BYTES,     /* n/a */
387 					     &error);
388 	}
389 	hammer2_chain_unlock(hmp, parent);
390 
391 	/*
392 	 * Handle the error case
393 	 */
394 	if (error) {
395 		KKASSERT(chain == NULL);
396 		if (error == EAGAIN) {
397 			hammer2_chain_wait(hmp, parent);
398 			goto retry;
399 		}
400 		return (error);
401 	}
402 
403 	/*
404 	 * XXX This is currently a horrible hack.  Well, if we wanted to
405 	 *     duplicate a file, i.e. as in a snapshot, we definitely
406 	 *     would have to flush it first.
407 	 *
408 	 *     For hardlink target generation we can theoretically move any
409 	 *     active chain structures without flushing, but that gets really
410 	 *     iffy for code which follows chain->parent and ip->pip links.
411 	 *
412 	 * XXX only works with files.  Duplicating a directory hierarchy
413 	 *     requires a flush but doesn't deal with races post-flush.
414 	 *     Well, it would work I guess, but you might catch some files
415 	 *     mid-operation.
416 	 *
417 	 * We cannot leave oip with any in-memory chains because (for a
418 	 * hardlink), oip will become a OBJTYPE_HARDLINK which is just a
419 	 * pointer to the real hardlink's inum and can't have any sub-chains.
420 	 * XXX might be 0-ref chains left.
421 	 */
422 	hammer2_inode_lock_ex(oip);
423 	hammer2_chain_flush(hmp, &oip->chain, 0);
424 	hammer2_inode_unlock_ex(oip);
425 	/*KKASSERT(RB_EMPTY(&oip->chain.rbhead));*/
426 
427 	nip = chain->u.ip;
428 	hammer2_chain_modify(hmp, chain, 0);
429 	nip->ip_data = oip->ip_data;	/* sync media data after flush */
430 
431 	if (name) {
432 		/*
433 		 * Directory entries are inodes so if the name has changed
434 		 * we have to update the inode.
435 		 */
436 		KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
437 		bcopy(name, nip->ip_data.filename, name_len);
438 		nip->ip_data.name_key = lhc;
439 		nip->ip_data.name_len = name_len;
440 	} else {
441 		/*
442 		 * Directory entries are inodes but this is a hidden hardlink
443 		 * target.  The name isn't used but to ease debugging give it
444 		 * a name after its inode number.
445 		 */
446 		ksnprintf(nip->ip_data.filename, sizeof(nip->ip_data.filename),
447 			  "0x%016jx", (intmax_t)nip->ip_data.inum);
448 		nip->ip_data.name_len = strlen(nip->ip_data.filename);
449 		nip->ip_data.name_key = lhc;
450 	}
451 	*nipp = nip;
452 
453 	return (0);
454 }
455 
456 
457 /*
458  * Connect inode (oip) to the specified directory using the specified name.
459  * (oip) must be locked.
460  *
461  * If (oip) is not currently connected we simply connect it up.
462  *
463  * If (oip) is already connected we create a OBJTYPE_HARDLINK entry which
464  * points to (oip)'s inode number.  (oip) is expected to be the terminus of
465  * the hardlink sitting as a hidden file in a common parent directory
466  * in this situation.
467  */
468 int
469 hammer2_inode_connect(hammer2_inode_t *dip, hammer2_inode_t *oip,
470 		      const uint8_t *name, size_t name_len)
471 {
472 	hammer2_mount_t *hmp = dip->hmp;
473 	hammer2_chain_t *chain;
474 	hammer2_chain_t *parent;
475 	hammer2_inode_t *nip;
476 	hammer2_key_t lhc;
477 	int error;
478 	int hlink;
479 
480 	/*
481 	 * (oip) is the terminus of the hardlink sitting in the common
482 	 * parent directory.  This means that if oip->pip != dip then
483 	 * the already locked oip is ABOVE dip.
484 	 *
485 	 * But if the common parent directory IS dip, then we would have
486 	 * a lock-order reversal and must rearrange the lock ordering.
487 	 * For now the caller deals with this for us by locking dip in
488 	 * that case (and our lock here winds up just being recursive)
489 	 */
490 retry:
491 	parent = &dip->chain;
492 	if (oip->pip == dip) {
493 		hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS);
494 		hammer2_chain_lock(hmp, &oip->chain, HAMMER2_RESOLVE_ALWAYS);
495 	} else {
496 		hammer2_chain_lock(hmp, &oip->chain, HAMMER2_RESOLVE_ALWAYS);
497 		hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS);
498 	}
499 
500 	lhc = hammer2_dirhash(name, name_len);
501 	hlink = (oip->chain.parent != NULL);
502 
503 	/*
504 	 * In fake mode flush oip so we can just snapshot it downbelow.
505 	 */
506 	if (hlink && hammer2_hardlink_enable < 0)
507 		hammer2_chain_flush(hmp, &oip->chain, 0);
508 
509 	/*
510 	 * Locate the inode or indirect block to create the new
511 	 * entry in.  At the same time check for key collisions
512 	 * and iterate until we don't get one.
513 	 */
514 	error = 0;
515 	while (error == 0) {
516 		chain = hammer2_chain_lookup(hmp, &parent, lhc, lhc, 0);
517 		if (chain == NULL)
518 			break;
519 		if ((lhc & HAMMER2_DIRHASH_LOMASK) == HAMMER2_DIRHASH_LOMASK)
520 			error = ENOSPC;
521 		hammer2_chain_unlock(hmp, chain);
522 		chain = NULL;
523 		++lhc;
524 	}
525 
526 	/*
527 	 * Passing a non-NULL chain to hammer2_chain_create() reconnects the
528 	 * existing chain instead of creating a new one.  The chain's bref
529 	 * will be properly updated.
530 	 */
531 	if (error == 0) {
532 		if (hlink) {
533 			chain = hammer2_chain_create(hmp, parent,
534 						     NULL, lhc, 0,
535 						     HAMMER2_BREF_TYPE_INODE,
536 						     HAMMER2_INODE_BYTES,
537 						     &error);
538 		} else {
539 			chain = hammer2_chain_create(hmp, parent,
540 						     &oip->chain, lhc, 0,
541 						     HAMMER2_BREF_TYPE_INODE,
542 						     HAMMER2_INODE_BYTES,
543 						     &error);
544 			if (chain)
545 				KKASSERT(chain == &oip->chain);
546 		}
547 	}
548 	hammer2_chain_unlock(hmp, parent);
549 
550 	/*
551 	 * Handle the error case
552 	 */
553 	if (error) {
554 		KKASSERT(chain == NULL);
555 		if (error == EAGAIN) {
556 			hammer2_chain_wait(hmp, parent);
557 			hammer2_chain_unlock(hmp, &oip->chain);
558 			goto retry;
559 		}
560 		hammer2_chain_unlock(hmp, &oip->chain);
561 		return (error);
562 	}
563 
564 	/*
565 	 * Directory entries are inodes so if the name has changed we have
566 	 * to update the inode.
567 	 *
568 	 * When creating an OBJTYPE_HARDLINK entry remember to unlock the
569 	 * chain, the caller will access the hardlink via the actual hardlink
570 	 * target file and not the hardlink pointer entry.
571 	 */
572 	if (hlink && hammer2_hardlink_enable >= 0) {
573 		/*
574 		 * Create the HARDLINK pointer.  oip represents the hardlink
575 		 * target in this situation.
576 		 */
577 		nip = chain->u.ip;
578 		hammer2_chain_modify(hmp, chain, 0);
579 		KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
580 		bcopy(name, nip->ip_data.filename, name_len);
581 		nip->ip_data.name_key = lhc;
582 		nip->ip_data.name_len = name_len;
583 		nip->ip_data.target_type = oip->ip_data.type;
584 		nip->ip_data.type = HAMMER2_OBJTYPE_HARDLINK;
585 		nip->ip_data.inum = oip->ip_data.inum;
586 		nip->ip_data.nlinks = 1;
587 		kprintf("created hardlink %*.*s\n",
588 			(int)name_len, (int)name_len, name);
589 		hammer2_chain_unlock(hmp, chain);
590 	} else if (hlink && hammer2_hardlink_enable < 0) {
591 		/*
592 		 * Create a snapshot (hardlink fake mode for debugging).
593 		 */
594 		nip = chain->u.ip;
595 		nip->ip_data = oip->ip_data;
596 		hammer2_chain_modify(hmp, chain, 0);
597 		KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
598 		bcopy(name, nip->ip_data.filename, name_len);
599 		nip->ip_data.name_key = lhc;
600 		nip->ip_data.name_len = name_len;
601 		kprintf("created fake hardlink %*.*s\n",
602 			(int)name_len, (int)name_len, name);
603 		hammer2_chain_unlock(hmp, chain);
604 	} else {
605 		/*
606 		 * Normally disconnected inode (e.g. during a rename) that
607 		 * was reconnected.  We must fixup the name stored in
608 		 * oip.
609 		 *
610 		 * We are using oip as chain, already locked by caller,
611 		 * do not unlock it.
612 		 */
613 		hammer2_chain_modify(hmp, chain, 0);
614 		if (oip->ip_data.name_len != name_len ||
615 		    bcmp(oip->ip_data.filename, name, name_len) != 0) {
616 			KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
617 			bcopy(name, oip->ip_data.filename, name_len);
618 			oip->ip_data.name_key = lhc;
619 			oip->ip_data.name_len = name_len;
620 		}
621 		oip->ip_data.nlinks = 1;
622 	}
623 	hammer2_chain_unlock(hmp, &oip->chain);
624 	return (0);
625 }
626 
627 /*
628  * Unlink the file from the specified directory inode.  The directory inode
629  * does not need to be locked.
630  *
631  * isdir determines whether a directory/non-directory check should be made.
632  * No check is made if isdir is set to -1.
633  *
634  * If retain_ip is non-NULL this function can fail with an EAGAIN if it
635  * catches the object in the middle of a flush.
636  */
637 int
638 hammer2_unlink_file(hammer2_inode_t *dip,
639 		    const uint8_t *name, size_t name_len,
640 		    int isdir, hammer2_inode_t *retain_ip)
641 {
642 	hammer2_mount_t *hmp;
643 	hammer2_chain_t *parent;
644 	hammer2_chain_t *chain;
645 	hammer2_chain_t *dparent;
646 	hammer2_chain_t *dchain;
647 	hammer2_key_t lhc;
648 	hammer2_inode_t *ip;
649 	hammer2_inode_t *oip;
650 	int error;
651 	uint8_t type;
652 
653 	error = 0;
654 	oip = NULL;
655 	hmp = dip->hmp;
656 	lhc = hammer2_dirhash(name, name_len);
657 
658 	/*
659 	 * Search for the filename in the directory
660 	 */
661 	parent = &dip->chain;
662 	hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS);
663 	chain = hammer2_chain_lookup(hmp, &parent,
664 				     lhc, lhc + HAMMER2_DIRHASH_LOMASK,
665 				     0);
666 	while (chain) {
667 		if (chain->bref.type == HAMMER2_BREF_TYPE_INODE &&
668 		    chain->u.ip &&
669 		    name_len == chain->data->ipdata.name_len &&
670 		    bcmp(name, chain->data->ipdata.filename, name_len) == 0) {
671 			break;
672 		}
673 		chain = hammer2_chain_next(hmp, &parent, chain,
674 					   lhc, lhc + HAMMER2_DIRHASH_LOMASK,
675 					   0);
676 	}
677 
678 	/*
679 	 * Not found or wrong type (isdir < 0 disables the type check).
680 	 */
681 	if (chain == NULL) {
682 		hammer2_chain_unlock(hmp, parent);
683 		return ENOENT;
684 	}
685 	if ((type = chain->data->ipdata.type) == HAMMER2_OBJTYPE_HARDLINK)
686 		type = chain->data->ipdata.target_type;
687 
688 	if (type == HAMMER2_OBJTYPE_DIRECTORY && isdir == 0) {
689 		error = ENOTDIR;
690 		goto done;
691 	}
692 	if (type != HAMMER2_OBJTYPE_DIRECTORY && isdir == 1) {
693 		error = EISDIR;
694 		goto done;
695 	}
696 
697 	/*
698 	 * Hardlink must be resolved.  We can't hold parent locked while we
699 	 * do this or we could deadlock.
700 	 */
701 	if (chain->data->ipdata.type == HAMMER2_OBJTYPE_HARDLINK) {
702 		hammer2_chain_unlock(hmp, parent);
703 		parent = NULL;
704 		error = hammer2_hardlink_find(dip, &chain, &oip);
705 	}
706 
707 	/*
708 	 * If this is a directory the directory must be empty.  However, if
709 	 * isdir < 0 we are doing a rename and the directory does not have
710 	 * to be empty.
711 	 *
712 	 * NOTE: We check the full key range here which covers both visible
713 	 *	 and invisible entries.  Theoretically there should be no
714 	 *	 invisible (hardlink target) entries if there are no visible
715 	 *	 entries.
716 	 */
717 	if (type == HAMMER2_OBJTYPE_DIRECTORY && isdir >= 0) {
718 		dparent = chain;
719 		hammer2_chain_lock(hmp, dparent, HAMMER2_RESOLVE_ALWAYS);
720 		dchain = hammer2_chain_lookup(hmp, &dparent,
721 					      0, (hammer2_key_t)-1,
722 					      HAMMER2_LOOKUP_NODATA);
723 		if (dchain) {
724 			hammer2_chain_unlock(hmp, dchain);
725 			hammer2_chain_unlock(hmp, dparent);
726 			error = ENOTEMPTY;
727 			goto done;
728 		}
729 		hammer2_chain_unlock(hmp, dparent);
730 		dparent = NULL;
731 		/* dchain NULL */
732 	}
733 
734 	/*
735 	 * Ok, we can now unlink the chain.  We always decrement nlinks even
736 	 * if the entry can be deleted in case someone has the file open and
737 	 * does an fstat().
738 	 *
739 	 * The chain itself will no longer be in the on-media topology but
740 	 * can still be flushed to the media (e.g. if an open descriptor
741 	 * remains).  When the last vnode/ip ref goes away the chain will
742 	 * be marked unmodified, avoiding any further (now unnecesary) I/O.
743 	 */
744 	if (oip) {
745 		/*
746 		 * If this was a hardlink we first delete the hardlink
747 		 * pointer entry.  parent is NULL on entry due to the oip
748 		 * path.
749 		 */
750 		parent = oip->chain.parent;
751 		hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS);
752 		hammer2_chain_lock(hmp, &oip->chain, HAMMER2_RESOLVE_ALWAYS);
753 		if (oip == retain_ip && oip->chain.flushing) {
754 			hammer2_chain_unlock(hmp, &oip->chain);
755 			error = EAGAIN;
756 			goto done;
757 		}
758 		hammer2_chain_delete(hmp, parent, &oip->chain,
759 				     (retain_ip == oip));
760 		hammer2_chain_unlock(hmp, &oip->chain);
761 		hammer2_chain_unlock(hmp, parent);
762 		parent = NULL;
763 
764 		/*
765 		 * Then decrement nlinks on hardlink target.
766 		 */
767 		ip = chain->u.ip;
768 		if (ip->ip_data.nlinks == 1) {
769 			dparent = chain->parent;
770 			hammer2_chain_ref(hmp, chain);
771 			hammer2_chain_unlock(hmp, chain);
772 			hammer2_chain_lock(hmp, dparent,
773 					   HAMMER2_RESOLVE_ALWAYS);
774 			hammer2_chain_lock(hmp, chain,
775 					   HAMMER2_RESOLVE_ALWAYS);
776 			hammer2_chain_drop(hmp, chain);
777 			hammer2_chain_modify(hmp, chain, 0);
778 			--ip->ip_data.nlinks;
779 			hammer2_chain_delete(hmp, dparent, chain, 0);
780 			hammer2_chain_unlock(hmp, dparent);
781 		} else {
782 			hammer2_chain_modify(hmp, chain, 0);
783 			--ip->ip_data.nlinks;
784 		}
785 	} else {
786 		/*
787 		 * Otherwise this was not a hardlink and we can just
788 		 * remove the entry and decrement nlinks.
789 		 */
790 		ip = chain->u.ip;
791 		if (ip == retain_ip && chain->flushing) {
792 			error = EAGAIN;
793 			goto done;
794 		}
795 		hammer2_chain_modify(hmp, chain, 0);
796 		--ip->ip_data.nlinks;
797 		hammer2_chain_delete(hmp, parent, chain,
798 				     (retain_ip == ip));
799 	}
800 
801 	error = 0;
802 
803 done:
804 	if (chain)
805 		hammer2_chain_unlock(hmp, chain);
806 	if (parent)
807 		hammer2_chain_unlock(hmp, parent);
808 	if (oip)
809 		hammer2_chain_drop(oip->hmp, &oip->chain);
810 
811 	return error;
812 }
813 
814 /*
815  * Calculate the allocation size for the file fragment straddling EOF
816  */
817 int
818 hammer2_inode_calc_alloc(hammer2_key_t filesize)
819 {
820 	int frag = (int)filesize & HAMMER2_PBUFMASK;
821 	int radix;
822 
823 	if (frag == 0)
824 		return(0);
825 	for (radix = HAMMER2_MINALLOCRADIX; frag > (1 << radix); ++radix)
826 		;
827 	return (radix);
828 }
829 
830 void
831 hammer2_inode_lock_nlinks(hammer2_inode_t *ip)
832 {
833 	hammer2_chain_ref(ip->hmp, &ip->chain);
834 }
835 
836 void
837 hammer2_inode_unlock_nlinks(hammer2_inode_t *ip)
838 {
839 	hammer2_chain_drop(ip->hmp, &ip->chain);
840 }
841 
842 /*
843  * Consolidate for hard link creation.  This moves the specified terminal
844  * hardlink inode to a directory common to its current directory and tdip
845  * if necessary, replacing *ipp with the new inode chain element and
846  * modifying the original inode chain element to OBJTYPE_HARDLINK.
847  *
848  * If the original inode chain element was a prior incarnation of a hidden
849  * inode it can simply be deleted instead of converted.
850  *
851  * (*ipp)'s nlinks field is locked on entry and the new (*ipp)'s nlinks
852  * field will be locked on return (with the original's unlocked).
853  *
854  * The link count is bumped if requested.
855  */
856 int
857 hammer2_hardlink_consolidate(hammer2_inode_t **ipp, hammer2_inode_t *tdip)
858 {
859 	hammer2_mount_t *hmp;
860 	hammer2_inode_t *oip = *ipp;
861 	hammer2_inode_t *nip = NULL;
862 	hammer2_inode_t *fdip;
863 	hammer2_inode_t *cdip;
864 	hammer2_chain_t *parent;
865 	int error;
866 
867 	hmp = tdip->hmp;
868 
869 	if (hammer2_hardlink_enable < 0)
870 		return (0);
871 	if (hammer2_hardlink_enable == 0)
872 		return (ENOTSUP);
873 
874 	fdip = oip->pip;
875 	cdip = hammer2_inode_common_parent(hmp, fdip, tdip);
876 
877 	/*
878 	 * Nothing to do (except bump the link count) if the hardlink has
879 	 * already been consolidated in the correct place.
880 	 */
881 	if (cdip == fdip &&
882 	    (oip->ip_data.name_key & HAMMER2_DIRHASH_VISIBLE) == 0) {
883 		kprintf("hardlink already consolidated correctly\n");
884 		nip = oip;
885 		hammer2_inode_lock_ex(nip);
886 		hammer2_chain_modify(hmp, &nip->chain, 0);
887 		++nip->ip_data.nlinks;
888 		hammer2_inode_unlock_ex(nip);
889 		hammer2_inode_drop(cdip);
890 		return (0);
891 	}
892 
893 	/*
894 	 * Create a hidden inode directory entry in the parent, copying
895 	 * (*oip)'s state.  Then replace oip with OBJTYPE_HARDLINK.
896 	 *
897 	 * The duplication function will either flush or move any chains
898 	 * under oip to the new hardlink target inode, retiring all chains
899 	 * related to oip before returning.  XXX vp->ip races.
900 	 */
901 	error = hammer2_inode_duplicate(cdip, oip, &nip, NULL, 0);
902 	if (error == 0) {
903 		/*
904 		 * Bump nlinks on duplicated hidden inode.
905 		 */
906 		kprintf("hardlink consolidation success in parent dir %s\n",
907 			cdip->ip_data.filename);
908 		hammer2_inode_lock_nlinks(nip);
909 		hammer2_inode_unlock_nlinks(oip);
910 		hammer2_chain_modify(hmp, &nip->chain, 0);
911 		++nip->ip_data.nlinks;
912 		hammer2_inode_unlock_ex(nip);
913 
914 		if (oip->ip_data.name_key & HAMMER2_DIRHASH_VISIBLE) {
915 			/*
916 			 * Replace the old inode with an OBJTYPE_HARDLINK
917 			 * pointer.
918 			 */
919 			hammer2_inode_lock_ex(oip);
920 			hammer2_chain_modify(hmp, &oip->chain, 0);
921 			oip->ip_data.target_type = oip->ip_data.type;
922 			oip->ip_data.type = HAMMER2_OBJTYPE_HARDLINK;
923 			oip->ip_data.uflags = 0;
924 			oip->ip_data.rmajor = 0;
925 			oip->ip_data.rminor = 0;
926 			oip->ip_data.ctime = 0;
927 			oip->ip_data.mtime = 0;
928 			oip->ip_data.atime = 0;
929 			oip->ip_data.btime = 0;
930 			bzero(&oip->ip_data.uid, sizeof(oip->ip_data.uid));
931 			bzero(&oip->ip_data.gid, sizeof(oip->ip_data.gid));
932 			oip->ip_data.op_flags = HAMMER2_OPFLAG_DIRECTDATA;
933 			oip->ip_data.cap_flags = 0;
934 			oip->ip_data.mode = 0;
935 			oip->ip_data.size = 0;
936 			oip->ip_data.nlinks = 1;
937 			oip->ip_data.iparent = 0;	/* XXX */
938 			oip->ip_data.pfs_type = 0;
939 			oip->ip_data.pfs_inum = 0;
940 			bzero(&oip->ip_data.pfs_clid,
941 			      sizeof(oip->ip_data.pfs_clid));
942 			bzero(&oip->ip_data.pfs_fsid,
943 			      sizeof(oip->ip_data.pfs_fsid));
944 			oip->ip_data.data_quota = 0;
945 			oip->ip_data.data_count = 0;
946 			oip->ip_data.inode_quota = 0;
947 			oip->ip_data.inode_count = 0;
948 			oip->ip_data.attr_tid = 0;
949 			oip->ip_data.dirent_tid = 0;
950 			bzero(&oip->ip_data.u, sizeof(oip->ip_data.u));
951 			/* XXX transaction ids */
952 
953 			hammer2_inode_unlock_ex(oip);
954 		} else {
955 			/*
956 			 * The old inode was a hardlink target, which we
957 			 * have now moved.  We must delete it so the new
958 			 * hardlink target at a higher directory level
959 			 * becomes the only hardlink target for this inode.
960 			 */
961 			kprintf("DELETE INVISIBLE\n");
962 			parent = oip->chain.parent;
963 			hammer2_chain_lock(hmp, parent,
964 					   HAMMER2_RESOLVE_ALWAYS);
965 			hammer2_chain_lock(hmp, &oip->chain,
966 					   HAMMER2_RESOLVE_ALWAYS);
967 			hammer2_chain_delete(hmp, parent, &oip->chain, 0);
968 			hammer2_chain_unlock(hmp, &oip->chain);
969 			hammer2_chain_unlock(hmp, parent);
970 		}
971 		*ipp = nip;
972 	} else {
973 		KKASSERT(nip == NULL);
974 	}
975 	hammer2_inode_drop(cdip);
976 
977 	return (error);
978 }
979 
980 /*
981  * If (*ipp) is non-NULL it points to the forward OBJTYPE_HARDLINK inode while
982  * (*chainp) points to the resolved (hidden hardlink target) inode.  In this
983  * situation when nlinks is 1 we wish to deconsolidate the hardlink, moving
984  * it back to the directory that now represents the only remaining link.
985  */
986 int
987 hammer2_hardlink_deconsolidate(hammer2_inode_t *dip, hammer2_chain_t **chainp,
988 			       hammer2_inode_t **ipp)
989 {
990 	if (*ipp == NULL)
991 		return (0);
992 	/* XXX */
993 	return (0);
994 }
995 
996 /*
997  * When presented with a (*chainp) representing an inode of type
998  * OBJTYPE_HARDLINK this code will save the original inode (with a ref)
999  * in (*ipp), and then locate the hidden hardlink target in (dip) or
1000  * any parent directory above (dip).  The locked (*chainp) is replaced
1001  * with a new locked (*chainp) representing the hardlink target.
1002  */
1003 int
1004 hammer2_hardlink_find(hammer2_inode_t *dip, hammer2_chain_t **chainp,
1005 		      hammer2_inode_t **ipp)
1006 {
1007 	hammer2_mount_t *hmp = dip->hmp;
1008 	hammer2_chain_t *chain = *chainp;
1009 	hammer2_chain_t *parent;
1010 	hammer2_inode_t *pip;
1011 	hammer2_key_t lhc;
1012 
1013 	*ipp = chain->u.ip;
1014 	hammer2_inode_ref(chain->u.ip);
1015 	lhc = chain->u.ip->ip_data.inum;
1016 
1017 	hammer2_inode_unlock_ex(chain->u.ip);
1018 	pip = chain->u.ip->pip;
1019 
1020 	chain = NULL;
1021 	while (pip) {
1022 		parent = &pip->chain;
1023 		KKASSERT(parent->bref.type == HAMMER2_BREF_TYPE_INODE);
1024 
1025 		hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS);
1026 		chain = hammer2_chain_lookup(hmp, &parent, lhc, lhc, 0);
1027 		hammer2_chain_unlock(hmp, parent);
1028 		if (chain)
1029 			break;
1030 		pip = pip->pip;	/* XXX SMP RACE */
1031 	}
1032 	*chainp = chain;
1033 	if (chain) {
1034 		KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_INODE);
1035 		/* already locked */
1036 		return (0);
1037 	} else {
1038 		return (EIO);
1039 	}
1040 }
1041 
1042 /*
1043  * Find the directory common to both fdip and tdip, hold and return
1044  * its inode.
1045  */
1046 hammer2_inode_t *
1047 hammer2_inode_common_parent(hammer2_mount_t *hmp,
1048 			    hammer2_inode_t *fdip, hammer2_inode_t *tdip)
1049 {
1050 	hammer2_inode_t *scan1;
1051 	hammer2_inode_t *scan2;
1052 
1053 	/*
1054 	 * We used to have a depth field but it complicated matters too
1055 	 * much for directory renames.  So now its ugly.  Check for
1056 	 * simple cases before giving up and doing it the expensive way.
1057 	 *
1058 	 * XXX need a bottom-up topology stability lock
1059 	 */
1060 	if (fdip == tdip || fdip == tdip->pip) {
1061 		hammer2_inode_ref(fdip);
1062 		return(fdip);
1063 	}
1064 	if (fdip->pip == tdip) {
1065 		hammer2_inode_ref(tdip);
1066 		return(tdip);
1067 	}
1068 	for (scan1 = fdip; scan1->pmp == fdip->pmp; scan1 = scan1->pip) {
1069 		scan2 = tdip;
1070 		while (scan2->pmp == tdip->pmp) {
1071 			if (scan1 == scan2) {
1072 				hammer2_inode_ref(scan1);
1073 				return(scan1);
1074 			}
1075 			scan2 = scan2->pip;
1076 		}
1077 	}
1078 	panic("hammer2_inode_common_parent: no common parent %p %p\n",
1079 	      fdip, tdip);
1080 	/* NOT REACHED */
1081 	return(NULL);
1082 }
1083