xref: /dragonfly/sys/vfs/hammer2/hammer2_inode.c (revision 8accc937)
1 /*
2  * Copyright (c) 2011-2012 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@dragonflybsd.org>
6  * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in
16  *    the documentation and/or other materials provided with the
17  *    distribution.
18  * 3. Neither the name of The DragonFly Project nor the names of its
19  *    contributors may be used to endorse or promote products derived
20  *    from this software without specific, prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
26  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
28  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 #include <sys/cdefs.h>
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/types.h>
39 #include <sys/lock.h>
40 #include <sys/uuid.h>
41 
42 #include "hammer2.h"
43 
44 /*
45  * Adding a ref to an inode is only legal if the inode already has at least
46  * one ref.
47  */
48 void
49 hammer2_inode_ref(hammer2_inode_t *ip)
50 {
51 	hammer2_chain_ref(ip->hmp, &ip->chain);
52 }
53 
54 /*
55  * Drop an inode reference, freeing the inode when the last reference goes
56  * away.
57  */
58 void
59 hammer2_inode_drop(hammer2_inode_t *ip)
60 {
61 	hammer2_chain_drop(ip->hmp, &ip->chain);
62 }
63 
64 /*
65  * Get the vnode associated with the given inode, allocating the vnode if
66  * necessary.  The vnode will be returned exclusively locked.
67  *
68  * The caller must lock the inode (shared or exclusive).
69  *
70  * Great care must be taken to avoid deadlocks and vnode acquisition/reclaim
71  * races.
72  */
73 struct vnode *
74 hammer2_igetv(hammer2_inode_t *ip, int *errorp)
75 {
76 	struct vnode *vp;
77 	hammer2_pfsmount_t *pmp;
78 	ccms_state_t ostate;
79 
80 	pmp = ip->pmp;
81 	KKASSERT(pmp != NULL);
82 	*errorp = 0;
83 
84 	for (;;) {
85 		/*
86 		 * Attempt to reuse an existing vnode assignment.  It is
87 		 * possible to race a reclaim so the vget() may fail.  The
88 		 * inode must be unlocked during the vget() to avoid a
89 		 * deadlock against a reclaim.
90 		 */
91 		vp = ip->vp;
92 		if (vp) {
93 			/*
94 			 * Inode must be unlocked during the vget() to avoid
95 			 * possible deadlocks, vnode is held to prevent
96 			 * destruction during the vget().  The vget() can
97 			 * still fail if we lost a reclaim race on the vnode.
98 			 */
99 			vhold_interlocked(vp);
100 			ccms_thread_unlock(&ip->chain.cst);
101 			if (vget(vp, LK_EXCLUSIVE)) {
102 				vdrop(vp);
103 				ccms_thread_lock(&ip->chain.cst,
104 						 CCMS_STATE_EXCLUSIVE);
105 				continue;
106 			}
107 			ccms_thread_lock(&ip->chain.cst, CCMS_STATE_EXCLUSIVE);
108 			vdrop(vp);
109 			/* vp still locked and ref from vget */
110 			*errorp = 0;
111 			break;
112 		}
113 
114 		/*
115 		 * No vnode exists, allocate a new vnode.  Beware of
116 		 * allocation races.  This function will return an
117 		 * exclusively locked and referenced vnode.
118 		 */
119 		*errorp = getnewvnode(VT_HAMMER2, pmp->mp, &vp, 0, 0);
120 		if (*errorp) {
121 			vp = NULL;
122 			break;
123 		}
124 
125 		/*
126 		 * Lock the inode and check for an allocation race.
127 		 */
128 		ostate = ccms_thread_lock_upgrade(&ip->chain.cst);
129 		if (ip->vp != NULL) {
130 			vp->v_type = VBAD;
131 			vx_put(vp);
132 			ccms_thread_lock_restore(&ip->chain.cst, ostate);
133 			continue;
134 		}
135 
136 		switch (ip->ip_data.type) {
137 		case HAMMER2_OBJTYPE_DIRECTORY:
138 			vp->v_type = VDIR;
139 			break;
140 		case HAMMER2_OBJTYPE_REGFILE:
141 			vp->v_type = VREG;
142 			vinitvmio(vp, ip->ip_data.size,
143 				  HAMMER2_LBUFSIZE,
144 				  (int)ip->ip_data.size & HAMMER2_LBUFMASK);
145 			break;
146 		case HAMMER2_OBJTYPE_SOFTLINK:
147 			/*
148 			 * XXX for now we are using the generic file_read
149 			 * and file_write code so we need a buffer cache
150 			 * association.
151 			 */
152 			vp->v_type = VLNK;
153 			vinitvmio(vp, ip->ip_data.size,
154 				  HAMMER2_LBUFSIZE,
155 				  (int)ip->ip_data.size & HAMMER2_LBUFMASK);
156 			break;
157 		/* XXX FIFO */
158 		default:
159 			panic("hammer2: unhandled objtype %d",
160 			      ip->ip_data.type);
161 			break;
162 		}
163 
164 		if (ip == pmp->iroot)
165 			vsetflags(vp, VROOT);
166 
167 		vp->v_data = ip;
168 		ip->vp = vp;
169 		hammer2_chain_ref(ip->hmp, &ip->chain);	/* vp association */
170 		ccms_thread_lock_restore(&ip->chain.cst, ostate);
171 		break;
172 	}
173 
174 	/*
175 	 * Return non-NULL vp and *errorp == 0, or NULL vp and *errorp != 0.
176 	 */
177 	if (hammer2_debug & 0x0002) {
178 		kprintf("igetv vp %p refs %d aux %d\n",
179 			vp, vp->v_sysref.refcnt, vp->v_auxrefs);
180 	}
181 	return (vp);
182 }
183 
184 /*
185  * Create a new inode in the specified directory using the vattr to
186  * figure out the type of inode.
187  *
188  * If no error occurs the new inode with its chain locked is returned in
189  * *nipp, otherwise an error is returned and *nipp is set to NULL.
190  *
191  * If vap and/or cred are NULL the related fields are not set and the
192  * inode type defaults to a directory.  This is used when creating PFSs
193  * under the super-root, so the inode number is set to 1 in this case.
194  */
195 int
196 hammer2_inode_create(hammer2_inode_t *dip,
197 		     struct vattr *vap, struct ucred *cred,
198 		     const uint8_t *name, size_t name_len,
199 		     hammer2_inode_t **nipp)
200 {
201 	hammer2_mount_t *hmp = dip->hmp;
202 	hammer2_chain_t *chain;
203 	hammer2_chain_t *parent;
204 	hammer2_inode_t *nip;
205 	hammer2_key_t lhc;
206 	int error;
207 	uid_t xuid;
208 
209 	lhc = hammer2_dirhash(name, name_len);
210 
211 	/*
212 	 * Locate the inode or indirect block to create the new
213 	 * entry in.  At the same time check for key collisions
214 	 * and iterate until we don't get one.
215 	 */
216 	parent = &dip->chain;
217 	hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS);
218 
219 	error = 0;
220 	while (error == 0) {
221 		chain = hammer2_chain_lookup(hmp, &parent, lhc, lhc, 0);
222 		if (chain == NULL)
223 			break;
224 		if ((lhc & HAMMER2_DIRHASH_VISIBLE) == 0)
225 			error = ENOSPC;
226 		if ((lhc & HAMMER2_DIRHASH_LOMASK) == HAMMER2_DIRHASH_LOMASK)
227 			error = ENOSPC;
228 		hammer2_chain_unlock(hmp, chain);
229 		chain = NULL;
230 		++lhc;
231 	}
232 	if (error == 0) {
233 		chain = hammer2_chain_create(hmp, parent, NULL, lhc, 0,
234 					     HAMMER2_BREF_TYPE_INODE,
235 					     HAMMER2_INODE_BYTES);
236 		if (chain == NULL)
237 			error = EIO;
238 	}
239 	hammer2_chain_unlock(hmp, parent);
240 
241 	/*
242 	 * Handle the error case
243 	 */
244 	if (error) {
245 		KKASSERT(chain == NULL);
246 		*nipp = NULL;
247 		return (error);
248 	}
249 
250 	/*
251 	 * Set up the new inode
252 	 */
253 	nip = chain->u.ip;
254 	*nipp = nip;
255 
256 	hammer2_voldata_lock(hmp);
257 	if (vap) {
258 		nip->ip_data.type = hammer2_get_obj_type(vap->va_type);
259 		nip->ip_data.inum = hmp->voldata.alloc_tid++;
260 		/* XXX modify/lock */
261 	} else {
262 		nip->ip_data.type = HAMMER2_OBJTYPE_DIRECTORY;
263 		nip->ip_data.inum = 1;
264 	}
265 	hammer2_voldata_unlock(hmp);
266 	nip->ip_data.version = HAMMER2_INODE_VERSION_ONE;
267 	hammer2_update_time(&nip->ip_data.ctime);
268 	nip->ip_data.mtime = nip->ip_data.ctime;
269 	if (vap)
270 		nip->ip_data.mode = vap->va_mode;
271 	nip->ip_data.nlinks = 1;
272 	if (vap) {
273 		if (dip) {
274 			xuid = hammer2_to_unix_xid(&dip->ip_data.uid);
275 			xuid = vop_helper_create_uid(dip->pmp->mp,
276 						     dip->ip_data.mode,
277 						     xuid,
278 						     cred,
279 						     &vap->va_mode);
280 		} else {
281 			xuid = 0;
282 		}
283 		if (vap->va_vaflags & VA_UID_UUID_VALID)
284 			nip->ip_data.uid = vap->va_uid_uuid;
285 		else if (vap->va_uid != (uid_t)VNOVAL)
286 			hammer2_guid_to_uuid(&nip->ip_data.uid, vap->va_uid);
287 		else
288 			hammer2_guid_to_uuid(&nip->ip_data.uid, xuid);
289 
290 		if (vap->va_vaflags & VA_GID_UUID_VALID)
291 			nip->ip_data.gid = vap->va_gid_uuid;
292 		else if (vap->va_gid != (gid_t)VNOVAL)
293 			hammer2_guid_to_uuid(&nip->ip_data.gid, vap->va_gid);
294 		else if (dip)
295 			nip->ip_data.gid = dip->ip_data.gid;
296 	}
297 
298 	/*
299 	 * Regular files and softlinks allow a small amount of data to be
300 	 * directly embedded in the inode.  This flag will be cleared if
301 	 * the size is extended past the embedded limit.
302 	 */
303 	if (nip->ip_data.type == HAMMER2_OBJTYPE_REGFILE ||
304 	    nip->ip_data.type == HAMMER2_OBJTYPE_SOFTLINK) {
305 		nip->ip_data.op_flags |= HAMMER2_OPFLAG_DIRECTDATA;
306 	}
307 
308 	KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
309 	bcopy(name, nip->ip_data.filename, name_len);
310 	nip->ip_data.name_key = lhc;
311 	nip->ip_data.name_len = name_len;
312 
313 	return (0);
314 }
315 
316 /*
317  * Duplicate the specified existing inode in the specified target directory.
318  * If name is NULL the inode is duplicated as a hidden directory entry.
319  *
320  * Returns the new inode.  The old inode is left alone.
321  *
322  * XXX name needs to be NULL for now.
323  */
324 int
325 hammer2_inode_duplicate(hammer2_inode_t *dip, hammer2_inode_t *oip,
326 			hammer2_inode_t **nipp,
327 			const uint8_t *name, size_t name_len)
328 {
329 	hammer2_mount_t *hmp = dip->hmp;
330 	hammer2_inode_t *nip;
331 	hammer2_chain_t *parent;
332 	hammer2_chain_t *chain;
333 	hammer2_key_t lhc;
334 	int error;
335 
336 	if (name) {
337 		lhc = hammer2_dirhash(name, name_len);
338 	} else {
339 		lhc = oip->ip_data.inum;
340 		KKASSERT((lhc & HAMMER2_DIRHASH_VISIBLE) == 0);
341 	}
342 
343 	/*
344 	 * Locate the inode or indirect block to create the new
345 	 * entry in.  At the same time check for key collisions
346 	 * and iterate until we don't get one.
347 	 */
348 	nip = NULL;
349 	parent = &dip->chain;
350 	hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS);
351 
352 	error = 0;
353 	while (error == 0) {
354 		chain = hammer2_chain_lookup(hmp, &parent, lhc, lhc, 0);
355 		if (chain == NULL)
356 			break;
357 		/* XXX bcmp name if not NULL */
358 		if ((lhc & HAMMER2_DIRHASH_LOMASK) == HAMMER2_DIRHASH_LOMASK)
359 			error = ENOSPC;
360 		if ((lhc & HAMMER2_DIRHASH_VISIBLE) == 0) /* shouldn't happen */
361 			error = ENOSPC;
362 		hammer2_chain_unlock(hmp, chain);
363 		chain = NULL;
364 		++lhc;
365 	}
366 
367 	/*
368 	 * Create entry in common parent directory.
369 	 */
370 	if (error == 0) {
371 		chain = hammer2_chain_create(hmp, parent, NULL, lhc, 0,
372 					     HAMMER2_BREF_TYPE_INODE /* n/a */,
373 					     HAMMER2_INODE_BYTES);   /* n/a */
374 		if (chain == NULL)
375 			error = EIO;
376 	}
377 	hammer2_chain_unlock(hmp, parent);
378 
379 	/*
380 	 * Handle the error case
381 	 */
382 	if (error) {
383 		KKASSERT(chain == NULL);
384 		return (error);
385 	}
386 
387 	/*
388 	 * XXX This is currently a horrible hack.  Well, if we wanted to
389 	 *     duplicate a file, i.e. as in a snapshot, we definitely
390 	 *     would have to flush it first.
391 	 *
392 	 *     For hardlink target generation we can theoretically move any
393 	 *     active chain structures without flushing, but that gets really
394 	 *     iffy for code which follows chain->parent and ip->pip links.
395 	 *
396 	 * XXX only works with files.  Duplicating a directory hierarchy
397 	 *     requires a flush but doesn't deal with races post-flush.
398 	 *     Well, it would work I guess, but you might catch some files
399 	 *     mid-operation.
400 	 *
401 	 * We cannot leave oip with any in-memory chains because (for a
402 	 * hardlink), oip will become a OBJTYPE_HARDLINK which is just a
403 	 * pointer to the real hardlink's inum and can't have any sub-chains.
404 	 * XXX might be 0-ref chains left.
405 	 */
406 	hammer2_inode_lock_ex(oip);
407 	hammer2_chain_flush(hmp, &oip->chain, 0);
408 	hammer2_inode_unlock_ex(oip);
409 	/*KKASSERT(RB_EMPTY(&oip->chain.rbhead));*/
410 
411 	nip = chain->u.ip;
412 	hammer2_chain_modify(hmp, chain, 0);
413 	nip->ip_data = oip->ip_data;	/* sync media data after flush */
414 
415 	if (name) {
416 		/*
417 		 * Directory entries are inodes so if the name has changed
418 		 * we have to update the inode.
419 		 */
420 		KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
421 		bcopy(name, nip->ip_data.filename, name_len);
422 		nip->ip_data.name_key = lhc;
423 		nip->ip_data.name_len = name_len;
424 	} else {
425 		/*
426 		 * Directory entries are inodes but this is a hidden hardlink
427 		 * target.  The name isn't used but to ease debugging give it
428 		 * a name after its inode number.
429 		 */
430 		ksnprintf(nip->ip_data.filename, sizeof(nip->ip_data.filename),
431 			  "0x%016jx", (intmax_t)nip->ip_data.inum);
432 		nip->ip_data.name_len = strlen(nip->ip_data.filename);
433 		nip->ip_data.name_key = lhc;
434 	}
435 	*nipp = nip;
436 
437 	return (0);
438 }
439 
440 
441 /*
442  * Connect inode (oip) to the specified directory using the specified name.
443  * (oip) must be locked.
444  *
445  * If (oip) is not currently connected we simply connect it up.
446  *
447  * If (oip) is already connected we create a OBJTYPE_HARDLINK entry which
448  * points to (oip)'s inode number.  (oip) is expected to be the terminus of
449  * the hardlink sitting as a hidden file in a common parent directory
450  * in this situation (thus the lock order is correct).
451  */
452 int
453 hammer2_inode_connect(hammer2_inode_t *dip, hammer2_inode_t *oip,
454 		      const uint8_t *name, size_t name_len)
455 {
456 	hammer2_mount_t *hmp = dip->hmp;
457 	hammer2_chain_t *chain;
458 	hammer2_chain_t *parent;
459 	hammer2_inode_t *nip;
460 	hammer2_key_t lhc;
461 	int error;
462 	int hlink;
463 
464 	lhc = hammer2_dirhash(name, name_len);
465 	hlink = (oip->chain.parent != NULL);
466 
467 	/*
468 	 * In fake mode flush oip so we can just snapshot it downbelow.
469 	 */
470 	if (hlink && hammer2_hardlink_enable < 0)
471 		hammer2_chain_flush(hmp, &oip->chain, 0);
472 
473 	/*
474 	 * Locate the inode or indirect block to create the new
475 	 * entry in.  At the same time check for key collisions
476 	 * and iterate until we don't get one.
477 	 */
478 	parent = &dip->chain;
479 	hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS);
480 
481 	error = 0;
482 	while (error == 0) {
483 		chain = hammer2_chain_lookup(hmp, &parent, lhc, lhc, 0);
484 		if (chain == NULL)
485 			break;
486 		if ((lhc & HAMMER2_DIRHASH_LOMASK) == HAMMER2_DIRHASH_LOMASK)
487 			error = ENOSPC;
488 		hammer2_chain_unlock(hmp, chain);
489 		chain = NULL;
490 		++lhc;
491 	}
492 
493 	/*
494 	 * Passing a non-NULL chain to hammer2_chain_create() reconnects the
495 	 * existing chain instead of creating a new one.  The chain's bref
496 	 * will be properly updated.
497 	 */
498 	if (error == 0) {
499 		if (hlink) {
500 			chain = hammer2_chain_create(hmp, parent,
501 						     NULL, lhc, 0,
502 						     HAMMER2_BREF_TYPE_INODE,
503 						     HAMMER2_INODE_BYTES);
504 		} else {
505 			chain = hammer2_chain_create(hmp, parent,
506 						     &oip->chain, lhc, 0,
507 						     HAMMER2_BREF_TYPE_INODE,
508 						     HAMMER2_INODE_BYTES);
509 			if (chain)
510 				KKASSERT(chain == &oip->chain);
511 		}
512 		if (chain == NULL)
513 			error = EIO;
514 	}
515 	hammer2_chain_unlock(hmp, parent);
516 
517 	/*
518 	 * Handle the error case
519 	 */
520 	if (error) {
521 		KKASSERT(chain == NULL);
522 		return (error);
523 	}
524 
525 	/*
526 	 * Directory entries are inodes so if the name has changed we have
527 	 * to update the inode.
528 	 *
529 	 * When creating an OBJTYPE_HARDLINK entry remember to unlock the
530 	 * chain, the caller will access the hardlink via the actual hardlink
531 	 * target file and not the hardlink pointer entry.
532 	 */
533 	if (hlink && hammer2_hardlink_enable >= 0) {
534 		/*
535 		 * Create the HARDLINK pointer.  oip represents the hardlink
536 		 * target in this situation.
537 		 */
538 		nip = chain->u.ip;
539 		hammer2_chain_modify(hmp, chain, 0);
540 		KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
541 		bcopy(name, nip->ip_data.filename, name_len);
542 		nip->ip_data.name_key = lhc;
543 		nip->ip_data.name_len = name_len;
544 		nip->ip_data.target_type = oip->ip_data.type;
545 		nip->ip_data.type = HAMMER2_OBJTYPE_HARDLINK;
546 		nip->ip_data.inum = oip->ip_data.inum;
547 		nip->ip_data.nlinks = 1;
548 		kprintf("created hardlink %*.*s\n",
549 			(int)name_len, (int)name_len, name);
550 		hammer2_chain_unlock(hmp, chain);
551 	} else if (hlink && hammer2_hardlink_enable < 0) {
552 		/*
553 		 * Create a snapshot (hardlink fake mode for debugging).
554 		 */
555 		nip = chain->u.ip;
556 		nip->ip_data = oip->ip_data;
557 		hammer2_chain_modify(hmp, chain, 0);
558 		KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
559 		bcopy(name, nip->ip_data.filename, name_len);
560 		nip->ip_data.name_key = lhc;
561 		nip->ip_data.name_len = name_len;
562 		kprintf("created fake hardlink %*.*s\n",
563 			(int)name_len, (int)name_len, name);
564 		hammer2_chain_unlock(hmp, chain);
565 	} else {
566 		/*
567 		 * Normally disconnected inode (e.g. during a rename) that
568 		 * was reconnected.  We must fixup the name stored in
569 		 * oip.
570 		 *
571 		 * We are using oip as chain, already locked by caller,
572 		 * do not unlock it.
573 		 */
574 		hammer2_chain_modify(hmp, chain, 0);
575 		if (oip->ip_data.name_len != name_len ||
576 		    bcmp(oip->ip_data.filename, name, name_len) != 0) {
577 			KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
578 			bcopy(name, oip->ip_data.filename, name_len);
579 			oip->ip_data.name_key = lhc;
580 			oip->ip_data.name_len = name_len;
581 		}
582 		oip->ip_data.nlinks = 1;
583 	}
584 
585 	return (0);
586 }
587 
588 /*
589  * Unlink the file from the specified directory inode.  The directory inode
590  * does not need to be locked.
591  *
592  * isdir determines whether a directory/non-directory check should be made.
593  * No check is made if isdir is set to -1.
594  */
595 int
596 hammer2_unlink_file(hammer2_inode_t *dip,
597 		    const uint8_t *name, size_t name_len,
598 		    int isdir, hammer2_inode_t *retain_ip)
599 {
600 	hammer2_mount_t *hmp;
601 	hammer2_chain_t *parent;
602 	hammer2_chain_t *chain;
603 	hammer2_chain_t *dparent;
604 	hammer2_chain_t *dchain;
605 	hammer2_key_t lhc;
606 	hammer2_inode_t *ip;
607 	hammer2_inode_t *oip;
608 	int error;
609 	uint8_t type;
610 
611 	error = 0;
612 	oip = NULL;
613 	hmp = dip->hmp;
614 	lhc = hammer2_dirhash(name, name_len);
615 
616 	/*
617 	 * Search for the filename in the directory
618 	 */
619 	parent = &dip->chain;
620 	hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS);
621 	chain = hammer2_chain_lookup(hmp, &parent,
622 				     lhc, lhc + HAMMER2_DIRHASH_LOMASK,
623 				     0);
624 	while (chain) {
625 		if (chain->bref.type == HAMMER2_BREF_TYPE_INODE &&
626 		    chain->u.ip &&
627 		    name_len == chain->data->ipdata.name_len &&
628 		    bcmp(name, chain->data->ipdata.filename, name_len) == 0) {
629 			break;
630 		}
631 		chain = hammer2_chain_next(hmp, &parent, chain,
632 					   lhc, lhc + HAMMER2_DIRHASH_LOMASK,
633 					   0);
634 	}
635 
636 	/*
637 	 * Not found or wrong type (isdir < 0 disables the type check).
638 	 */
639 	if (chain == NULL) {
640 		hammer2_chain_unlock(hmp, parent);
641 		return ENOENT;
642 	}
643 	if ((type = chain->data->ipdata.type) == HAMMER2_OBJTYPE_HARDLINK)
644 		type = chain->data->ipdata.target_type;
645 
646 	if (type == HAMMER2_OBJTYPE_DIRECTORY && isdir == 0) {
647 		error = ENOTDIR;
648 		goto done;
649 	}
650 	if (type != HAMMER2_OBJTYPE_DIRECTORY && isdir == 1) {
651 		error = EISDIR;
652 		goto done;
653 	}
654 
655 	/*
656 	 * Hardlink must be resolved.  We can't hold parent locked while we
657 	 * do this or we could deadlock.
658 	 */
659 	if (chain->data->ipdata.type == HAMMER2_OBJTYPE_HARDLINK) {
660 		hammer2_chain_unlock(hmp, parent);
661 		parent = NULL;
662 		error = hammer2_hardlink_find(dip, &chain, &oip);
663 	}
664 
665 	/*
666 	 * If this is a directory the directory must be empty.  However, if
667 	 * isdir < 0 we are doing a rename and the directory does not have
668 	 * to be empty.
669 	 *
670 	 * NOTE: We check the full key range here which covers both visible
671 	 *	 and invisible entries.  Theoretically there should be no
672 	 *	 invisible (hardlink target) entries if there are no visible
673 	 *	 entries.
674 	 */
675 	if (type == HAMMER2_OBJTYPE_DIRECTORY && isdir >= 0) {
676 		dparent = chain;
677 		hammer2_chain_lock(hmp, dparent, HAMMER2_RESOLVE_ALWAYS);
678 		dchain = hammer2_chain_lookup(hmp, &dparent,
679 					      0, (hammer2_key_t)-1,
680 					      HAMMER2_LOOKUP_NODATA);
681 		if (dchain) {
682 			hammer2_chain_unlock(hmp, dchain);
683 			hammer2_chain_unlock(hmp, dparent);
684 			error = ENOTEMPTY;
685 			goto done;
686 		}
687 		hammer2_chain_unlock(hmp, dparent);
688 		dparent = NULL;
689 		/* dchain NULL */
690 	}
691 
692 	/*
693 	 * Ok, we can now unlink the chain.  We always decrement nlinks even
694 	 * if the entry can be deleted in case someone has the file open and
695 	 * does an fstat().
696 	 *
697 	 * The chain itself will no longer be in the on-media topology but
698 	 * can still be flushed to the media (e.g. if an open descriptor
699 	 * remains).  When the last vnode/ip ref goes away the chain will
700 	 * be marked unmodified, avoiding any further (now unnecesary) I/O.
701 	 */
702 	if (oip) {
703 		/*
704 		 * If this was a hardlink we first delete the hardlink
705 		 * pointer entry.
706 		 */
707 		parent = oip->chain.parent;
708 		hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS);
709 		hammer2_chain_lock(hmp, &oip->chain, HAMMER2_RESOLVE_ALWAYS);
710 		hammer2_chain_delete(hmp, parent, &oip->chain,
711 				    (retain_ip == oip));
712 		hammer2_chain_unlock(hmp, &oip->chain);
713 		hammer2_chain_unlock(hmp, parent);
714 		parent = NULL;
715 
716 		/*
717 		 * Then decrement nlinks on hardlink target.
718 		 */
719 		ip = chain->u.ip;
720 		if (ip->ip_data.nlinks == 1) {
721 			dparent = chain->parent;
722 			hammer2_chain_ref(hmp, chain);
723 			hammer2_chain_unlock(hmp, chain);
724 			hammer2_chain_lock(hmp, dparent,
725 					   HAMMER2_RESOLVE_ALWAYS);
726 			hammer2_chain_lock(hmp, chain, HAMMER2_RESOLVE_ALWAYS);
727 			hammer2_chain_drop(hmp, chain);
728 			hammer2_chain_modify(hmp, chain, 0);
729 			--ip->ip_data.nlinks;
730 			hammer2_chain_delete(hmp, dparent, chain, 0);
731 			hammer2_chain_unlock(hmp, dparent);
732 		} else {
733 			hammer2_chain_modify(hmp, chain, 0);
734 			--ip->ip_data.nlinks;
735 		}
736 	} else {
737 		/*
738 		 * Otherwise this was not a hardlink and we can just
739 		 * remove the entry and decrement nlinks.
740 		 */
741 		ip = chain->u.ip;
742 		hammer2_chain_modify(hmp, chain, 0);
743 		--ip->ip_data.nlinks;
744 		hammer2_chain_delete(hmp, parent, chain,
745 				     (retain_ip == ip));
746 	}
747 
748 	error = 0;
749 
750 done:
751 	if (chain)
752 		hammer2_chain_unlock(hmp, chain);
753 	if (parent)
754 		hammer2_chain_unlock(hmp, parent);
755 	if (oip)
756 		hammer2_chain_drop(oip->hmp, &oip->chain);
757 
758 	return error;
759 }
760 
761 /*
762  * Calculate the allocation size for the file fragment straddling EOF
763  */
764 int
765 hammer2_inode_calc_alloc(hammer2_key_t filesize)
766 {
767 	int frag = (int)filesize & HAMMER2_PBUFMASK;
768 	int radix;
769 
770 	if (frag == 0)
771 		return(0);
772 	for (radix = HAMMER2_MINALLOCRADIX; frag > (1 << radix); ++radix)
773 		;
774 	return (radix);
775 }
776 
777 void
778 hammer2_inode_lock_nlinks(hammer2_inode_t *ip)
779 {
780 	hammer2_chain_ref(ip->hmp, &ip->chain);
781 }
782 
783 void
784 hammer2_inode_unlock_nlinks(hammer2_inode_t *ip)
785 {
786 	hammer2_chain_drop(ip->hmp, &ip->chain);
787 }
788 
789 /*
790  * Consolidate for hard link creation.  This moves the specified terminal
791  * hardlink inode to a directory common to its current directory and tdip
792  * if necessary, replacing *ipp with the new inode chain element and
793  * modifying the original inode chain element to OBJTYPE_HARDLINK.
794  *
795  * If the original inode chain element was a prior incarnation of a hidden
796  * inode it can simply be deleted instead of converted.
797  *
798  * (*ipp)'s nlinks field is locked on entry and the new (*ipp)'s nlinks
799  * field will be locked on return (with the original's unlocked).
800  *
801  * The link count is bumped if requested.
802  */
803 int
804 hammer2_hardlink_consolidate(hammer2_inode_t **ipp, hammer2_inode_t *tdip)
805 {
806 	hammer2_mount_t *hmp;
807 	hammer2_inode_t *oip = *ipp;
808 	hammer2_inode_t *nip = NULL;
809 	hammer2_inode_t *fdip;
810 	hammer2_chain_t *parent;
811 	int error;
812 
813 	hmp = tdip->hmp;
814 
815 	if (hammer2_hardlink_enable < 0)
816 		return (0);
817 	if (hammer2_hardlink_enable == 0)
818 		return (ENOTSUP);
819 
820 	/*
821 	 * Find the common parent directory
822 	 */
823 	fdip = oip->pip;
824 	while (fdip->depth > tdip->depth) {
825 		fdip = fdip->pip;
826 		KKASSERT(fdip != NULL);
827 	}
828 	while (tdip->depth > fdip->depth) {
829 		tdip = tdip->pip;
830 		KKASSERT(tdip != NULL);
831 	}
832 	while (fdip != tdip) {
833 		fdip = fdip->pip;
834 		tdip = tdip->pip;
835 		KKASSERT(fdip != NULL);
836 		KKASSERT(tdip != NULL);
837 	}
838 
839 	/*
840 	 * Nothing to do (except bump the link count) if the hardlink has
841 	 * already been consolidated in the correct place.
842 	 */
843 	if (oip->pip == fdip &&
844 	    (oip->ip_data.name_key & HAMMER2_DIRHASH_VISIBLE) == 0) {
845 		kprintf("hardlink already consolidated correctly\n");
846 		nip = oip;
847 		hammer2_inode_lock_ex(nip);
848 		hammer2_chain_modify(hmp, &nip->chain, 0);
849 		++nip->ip_data.nlinks;
850 		hammer2_inode_unlock_ex(nip);
851 		return (0);
852 	}
853 
854 	/*
855 	 * Create a hidden inode directory entry in the parent, copying
856 	 * (*oip)'s state.  Then replace oip with OBJTYPE_HARDLINK.
857 	 *
858 	 * The duplication function will either flush or move any chains
859 	 * under oip to the new hardlink target inode, retiring all chains
860 	 * related to oip before returning.  XXX vp->ip races.
861 	 */
862 	error = hammer2_inode_duplicate(fdip, oip, &nip, NULL, 0);
863 	if (error == 0) {
864 		/*
865 		 * Bump nlinks on duplicated hidden inode.
866 		 */
867 		kprintf("hardlink consolidation success in parent dir %s\n",
868 			fdip->ip_data.filename);
869 		hammer2_inode_lock_nlinks(nip);
870 		hammer2_inode_unlock_nlinks(oip);
871 		hammer2_chain_modify(hmp, &nip->chain, 0);
872 		++nip->ip_data.nlinks;
873 		hammer2_inode_unlock_ex(nip);
874 
875 		if (oip->ip_data.name_key & HAMMER2_DIRHASH_VISIBLE) {
876 			/*
877 			 * Replace the old inode with an OBJTYPE_HARDLINK
878 			 * pointer.
879 			 */
880 			hammer2_inode_lock_ex(oip);
881 			hammer2_chain_modify(hmp, &oip->chain, 0);
882 			oip->ip_data.target_type = oip->ip_data.type;
883 			oip->ip_data.type = HAMMER2_OBJTYPE_HARDLINK;
884 			oip->ip_data.uflags = 0;
885 			oip->ip_data.rmajor = 0;
886 			oip->ip_data.rminor = 0;
887 			oip->ip_data.ctime = 0;
888 			oip->ip_data.mtime = 0;
889 			oip->ip_data.atime = 0;
890 			oip->ip_data.btime = 0;
891 			bzero(&oip->ip_data.uid, sizeof(oip->ip_data.uid));
892 			bzero(&oip->ip_data.gid, sizeof(oip->ip_data.gid));
893 			oip->ip_data.op_flags = HAMMER2_OPFLAG_DIRECTDATA;
894 			oip->ip_data.cap_flags = 0;
895 			oip->ip_data.mode = 0;
896 			oip->ip_data.size = 0;
897 			oip->ip_data.nlinks = 1;
898 			oip->ip_data.iparent = 0;	/* XXX */
899 			oip->ip_data.pfs_type = 0;
900 			oip->ip_data.pfs_inum = 0;
901 			bzero(&oip->ip_data.pfs_clid,
902 			      sizeof(oip->ip_data.pfs_clid));
903 			bzero(&oip->ip_data.pfs_fsid,
904 			      sizeof(oip->ip_data.pfs_fsid));
905 			oip->ip_data.data_quota = 0;
906 			oip->ip_data.data_count = 0;
907 			oip->ip_data.inode_quota = 0;
908 			oip->ip_data.inode_count = 0;
909 			oip->ip_data.attr_tid = 0;
910 			oip->ip_data.dirent_tid = 0;
911 			bzero(&oip->ip_data.u, sizeof(oip->ip_data.u));
912 			/* XXX transaction ids */
913 
914 			hammer2_inode_unlock_ex(oip);
915 		} else {
916 			/*
917 			 * The old inode was a hardlink target, which we
918 			 * have now moved.  We must delete it so the new
919 			 * hardlink target at a higher directory level
920 			 * becomes the only hardlink target for this inode.
921 			 */
922 			kprintf("DELETE INVISIBLE\n");
923 			parent = oip->chain.parent;
924 			hammer2_chain_lock(hmp, parent,
925 					   HAMMER2_RESOLVE_ALWAYS);
926 			hammer2_chain_lock(hmp, &oip->chain,
927 					   HAMMER2_RESOLVE_ALWAYS);
928 			hammer2_chain_delete(hmp, parent, &oip->chain, 0);
929 			hammer2_chain_unlock(hmp, &oip->chain);
930 			hammer2_chain_unlock(hmp, parent);
931 		}
932 		*ipp = nip;
933 	} else {
934 		KKASSERT(nip == NULL);
935 	}
936 
937 	return (error);
938 }
939 
940 /*
941  * If (*ipp) is non-NULL it points to the forward OBJTYPE_HARDLINK inode while
942  * (*chainp) points to the resolved (hidden hardlink target) inode.  In this
943  * situation when nlinks is 1 we wish to deconsolidate the hardlink, moving
944  * it back to the directory that now represents the only remaining link.
945  */
946 int
947 hammer2_hardlink_deconsolidate(hammer2_inode_t *dip, hammer2_chain_t **chainp,
948 			       hammer2_inode_t **ipp)
949 {
950 	if (*ipp == NULL)
951 		return (0);
952 	/* XXX */
953 	return (0);
954 }
955 
956 /*
957  * When presented with a (*chainp) representing an inode of type
958  * OBJTYPE_HARDLINK this code will save the original inode (with a ref)
959  * in (*ipp), and then locate the hidden hardlink target in (dip) or
960  * any parent directory above (dip).  The locked (*chainp) is replaced
961  * with a new locked (*chainp) representing the hardlink target.
962  */
963 int
964 hammer2_hardlink_find(hammer2_inode_t *dip, hammer2_chain_t **chainp,
965 		      hammer2_inode_t **ipp)
966 {
967 	hammer2_mount_t *hmp = dip->hmp;
968 	hammer2_chain_t *chain = *chainp;
969 	hammer2_chain_t *parent;
970 	hammer2_inode_t *pip;
971 	hammer2_key_t lhc;
972 
973 	*ipp = chain->u.ip;
974 	hammer2_inode_ref(chain->u.ip);
975 	lhc = chain->u.ip->ip_data.inum;
976 
977 	hammer2_inode_unlock_ex(chain->u.ip);
978 	pip = chain->u.ip->pip;
979 
980 	chain = NULL;
981 	while (pip) {
982 		parent = &pip->chain;
983 		KKASSERT(parent->bref.type == HAMMER2_BREF_TYPE_INODE);
984 
985 		hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS);
986 		chain = hammer2_chain_lookup(hmp, &parent, lhc, lhc, 0);
987 		hammer2_chain_unlock(hmp, parent);
988 		if (chain)
989 			break;
990 		pip = pip->pip;
991 	}
992 	*chainp = chain;
993 	if (chain) {
994 		KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_INODE);
995 		/* already locked */
996 		return (0);
997 	} else {
998 		return (EIO);
999 	}
1000 }
1001