166325755SMatthew Dillon /*
2b84de5afSMatthew Dillon  * Copyright (c) 2007-2008 The DragonFly Project.  All rights reserved.
366325755SMatthew Dillon  *
466325755SMatthew Dillon  * This code is derived from software contributed to The DragonFly Project
566325755SMatthew Dillon  * by Matthew Dillon <dillon@backplane.com>
666325755SMatthew Dillon  *
766325755SMatthew Dillon  * Redistribution and use in source and binary forms, with or without
866325755SMatthew Dillon  * modification, are permitted provided that the following conditions
966325755SMatthew Dillon  * are met:
1066325755SMatthew Dillon  *
1166325755SMatthew Dillon  * 1. Redistributions of source code must retain the above copyright
1266325755SMatthew Dillon  *    notice, this list of conditions and the following disclaimer.
1366325755SMatthew Dillon  * 2. Redistributions in binary form must reproduce the above copyright
1466325755SMatthew Dillon  *    notice, this list of conditions and the following disclaimer in
1566325755SMatthew Dillon  *    the documentation and/or other materials provided with the
1666325755SMatthew Dillon  *    distribution.
1766325755SMatthew Dillon  * 3. Neither the name of The DragonFly Project nor the names of its
1866325755SMatthew Dillon  *    contributors may be used to endorse or promote products derived
1966325755SMatthew Dillon  *    from this software without specific, prior written permission.
2066325755SMatthew Dillon  *
2166325755SMatthew Dillon  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
2266325755SMatthew Dillon  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
2366325755SMatthew Dillon  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
2466325755SMatthew Dillon  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
2566325755SMatthew Dillon  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
2666325755SMatthew Dillon  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
2766325755SMatthew Dillon  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
2866325755SMatthew Dillon  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
2966325755SMatthew Dillon  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
3066325755SMatthew Dillon  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
3166325755SMatthew Dillon  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3266325755SMatthew Dillon  * SUCH DAMAGE.
3366325755SMatthew Dillon  */
3466325755SMatthew Dillon 
3566325755SMatthew Dillon #include "hammer.h"
3666325755SMatthew Dillon 
375a64efa1SMatthew Dillon static u_int32_t ocp_allocbit(hammer_objid_cache_t ocp, u_int32_t n);
380729c8c8SMatthew Dillon 
390729c8c8SMatthew Dillon 
40b84de5afSMatthew Dillon /*
41b84de5afSMatthew Dillon  * Start a standard transaction.
42b84de5afSMatthew Dillon  */
4366325755SMatthew Dillon void
448cd0a023SMatthew Dillon hammer_start_transaction(struct hammer_transaction *trans,
458cd0a023SMatthew Dillon 			 struct hammer_mount *hmp)
4666325755SMatthew Dillon {
47ddfdf542SMatthew Dillon 	struct timeval tv;
48a89aec1bSMatthew Dillon 	int error;
4966325755SMatthew Dillon 
50b84de5afSMatthew Dillon 	trans->type = HAMMER_TRANS_STD;
5166325755SMatthew Dillon 	trans->hmp = hmp;
52a89aec1bSMatthew Dillon 	trans->rootvol = hammer_get_root_volume(hmp, &error);
53a89aec1bSMatthew Dillon 	KKASSERT(error == 0);
54b84de5afSMatthew Dillon 	trans->tid = 0;
552f85fa4dSMatthew Dillon 	trans->sync_lock_refs = 0;
5621fde338SMatthew Dillon 	trans->flags = 0;
57ddfdf542SMatthew Dillon 
58ddfdf542SMatthew Dillon 	getmicrotime(&tv);
59dd94f1b1SMatthew Dillon 	trans->time = (unsigned long)tv.tv_sec * 1000000ULL + tv.tv_usec;
60dd94f1b1SMatthew Dillon 	trans->time32 = (u_int32_t)tv.tv_sec;
6166325755SMatthew Dillon }
6266325755SMatthew Dillon 
63b84de5afSMatthew Dillon /*
64b84de5afSMatthew Dillon  * Start a simple read-only transaction.  This will not stall.
65b84de5afSMatthew Dillon  */
6666325755SMatthew Dillon void
6736f82b23SMatthew Dillon hammer_simple_transaction(struct hammer_transaction *trans,
6836f82b23SMatthew Dillon 			  struct hammer_mount *hmp)
6936f82b23SMatthew Dillon {
70ddfdf542SMatthew Dillon 	struct timeval tv;
7136f82b23SMatthew Dillon 	int error;
7236f82b23SMatthew Dillon 
73b84de5afSMatthew Dillon 	trans->type = HAMMER_TRANS_RO;
7436f82b23SMatthew Dillon 	trans->hmp = hmp;
7536f82b23SMatthew Dillon 	trans->rootvol = hammer_get_root_volume(hmp, &error);
7636f82b23SMatthew Dillon 	KKASSERT(error == 0);
77b84de5afSMatthew Dillon 	trans->tid = 0;
782f85fa4dSMatthew Dillon 	trans->sync_lock_refs = 0;
7921fde338SMatthew Dillon 	trans->flags = 0;
80ddfdf542SMatthew Dillon 
81ddfdf542SMatthew Dillon 	getmicrotime(&tv);
82dd94f1b1SMatthew Dillon 	trans->time = (unsigned long)tv.tv_sec * 1000000ULL + tv.tv_usec;
83dd94f1b1SMatthew Dillon 	trans->time32 = (u_int32_t)tv.tv_sec;
8436f82b23SMatthew Dillon }
8536f82b23SMatthew Dillon 
86b84de5afSMatthew Dillon /*
87b84de5afSMatthew Dillon  * Start a transaction using a particular TID.  Used by the sync code.
88b84de5afSMatthew Dillon  * This does not stall.
892f85fa4dSMatthew Dillon  *
902f85fa4dSMatthew Dillon  * This routine may only be called from the flusher thread.  We predispose
912f85fa4dSMatthew Dillon  * sync_lock_refs, implying serialization against the synchronization stage
922f85fa4dSMatthew Dillon  * (which the flusher is responsible for).
93b84de5afSMatthew Dillon  */
9436f82b23SMatthew Dillon void
95b84de5afSMatthew Dillon hammer_start_transaction_fls(struct hammer_transaction *trans,
96b84de5afSMatthew Dillon 			     struct hammer_mount *hmp)
97d113fda1SMatthew Dillon {
98ddfdf542SMatthew Dillon 	struct timeval tv;
99d113fda1SMatthew Dillon 	int error;
100d113fda1SMatthew Dillon 
1019f5097dcSMatthew Dillon 	bzero(trans, sizeof(*trans));
1029f5097dcSMatthew Dillon 
103b84de5afSMatthew Dillon 	trans->type = HAMMER_TRANS_FLS;
104d113fda1SMatthew Dillon 	trans->hmp = hmp;
105d113fda1SMatthew Dillon 	trans->rootvol = hammer_get_root_volume(hmp, &error);
106d113fda1SMatthew Dillon 	KKASSERT(error == 0);
1075de0c0e5SMatthew Dillon 	trans->tid = hammer_alloc_tid(hmp, 1);
1082f85fa4dSMatthew Dillon 	trans->sync_lock_refs = 1;
10921fde338SMatthew Dillon 	trans->flags = 0;
110ddfdf542SMatthew Dillon 
111ddfdf542SMatthew Dillon 	getmicrotime(&tv);
112dd94f1b1SMatthew Dillon 	trans->time = (unsigned long)tv.tv_sec * 1000000ULL + tv.tv_usec;
113dd94f1b1SMatthew Dillon 	trans->time32 = (u_int32_t)tv.tv_sec;
114d113fda1SMatthew Dillon }
115d113fda1SMatthew Dillon 
116d113fda1SMatthew Dillon void
117b84de5afSMatthew Dillon hammer_done_transaction(struct hammer_transaction *trans)
11866325755SMatthew Dillon {
119*f31f6d84SSascha Wildner 	int expected_lock_refs __debugvar;
1202f85fa4dSMatthew Dillon 
121a89aec1bSMatthew Dillon 	hammer_rel_volume(trans->rootvol, 0);
122b84de5afSMatthew Dillon 	trans->rootvol = NULL;
1232f85fa4dSMatthew Dillon 	expected_lock_refs = (trans->type == HAMMER_TRANS_FLS) ? 1 : 0;
1242f85fa4dSMatthew Dillon 	KKASSERT(trans->sync_lock_refs == expected_lock_refs);
1252f85fa4dSMatthew Dillon 	trans->sync_lock_refs = 0;
12682010f9fSMatthew Dillon 	if (trans->type != HAMMER_TRANS_FLS) {
12721fde338SMatthew Dillon 		if (trans->flags & HAMMER_TRANSF_NEWINODE)
128e98f1b96SMatthew Dillon 			hammer_inode_waitreclaims(trans);
129e98f1b96SMatthew Dillon 		/*
13082010f9fSMatthew Dillon 		else if (trans->flags & HAMMER_TRANSF_DIDIO)
131e98f1b96SMatthew Dillon 			hammer_inode_waitreclaims(trans);
132e98f1b96SMatthew Dillon 		*/
13382010f9fSMatthew Dillon 	}
13466325755SMatthew Dillon }
13566325755SMatthew Dillon 
136d113fda1SMatthew Dillon /*
1375de0c0e5SMatthew Dillon  * Allocate (count) TIDs.  If running in multi-master mode the returned
1385de0c0e5SMatthew Dillon  * base will be aligned to a 16-count plus the master id (0-15).
1395de0c0e5SMatthew Dillon  * Multi-master mode allows non-conflicting to run and new objects to be
1405de0c0e5SMatthew Dillon  * created on multiple masters in parallel.  The transaction id identifies
1415de0c0e5SMatthew Dillon  * the original master.  The object_id is also subject to this rule in
1425de0c0e5SMatthew Dillon  * order to allow objects to be created on multiple masters in parallel.
1435de0c0e5SMatthew Dillon  *
1445de0c0e5SMatthew Dillon  * Directories may pre-allocate a large number of object ids (100,000).
1455de0c0e5SMatthew Dillon  *
1465de0c0e5SMatthew Dillon  * NOTE: There is no longer a requirement that successive transaction
1475de0c0e5SMatthew Dillon  *	 ids be 2 apart for separator generation.
14883f2a3aaSMatthew Dillon  *
14983f2a3aaSMatthew Dillon  * NOTE: When called by pseudo-backends such as ioctls the allocated
15083f2a3aaSMatthew Dillon  *	 TID will be larger then the current flush TID, if a flush is running,
15183f2a3aaSMatthew Dillon  *	 so any mirroring will pick the records up on a later flush.
152d113fda1SMatthew Dillon  */
15383f2a3aaSMatthew Dillon hammer_tid_t
1545de0c0e5SMatthew Dillon hammer_alloc_tid(hammer_mount_t hmp, int count)
155a89aec1bSMatthew Dillon {
156a89aec1bSMatthew Dillon 	hammer_tid_t tid;
157a89aec1bSMatthew Dillon 
158732a1697SMatthew Dillon 	if (hmp->master_id < 0) {
159c82af904SMatthew Dillon 		tid = hmp->next_tid + 1;
160c82af904SMatthew Dillon 		hmp->next_tid = tid + count;
161c82af904SMatthew Dillon 	} else {
162c82af904SMatthew Dillon 		tid = (hmp->next_tid + HAMMER_MAX_MASTERS) &
163c82af904SMatthew Dillon 		      ~(hammer_tid_t)(HAMMER_MAX_MASTERS - 1);
164c82af904SMatthew Dillon 		hmp->next_tid = tid + count * HAMMER_MAX_MASTERS;
165732a1697SMatthew Dillon 		tid |= hmp->master_id;
166c82af904SMatthew Dillon 	}
167c82af904SMatthew Dillon 	if (tid >= 0xFFFFFFFFFF000000ULL)
168a89aec1bSMatthew Dillon 		panic("hammer_start_transaction: Ran out of TIDs!");
1695de0c0e5SMatthew Dillon 	if (hammer_debug_tid)
170973c11b9SMatthew Dillon 		kprintf("alloc_tid %016llx\n", (long long)tid);
171a89aec1bSMatthew Dillon 	return(tid);
172a89aec1bSMatthew Dillon }
173a89aec1bSMatthew Dillon 
1740729c8c8SMatthew Dillon /*
1755a64efa1SMatthew Dillon  * Allocate an object id.
1765a64efa1SMatthew Dillon  *
1775a64efa1SMatthew Dillon  * We use the upper OBJID_CACHE_BITS bits of the namekey to try to match
1785a64efa1SMatthew Dillon  * the low bits of the objid we allocate.
1790729c8c8SMatthew Dillon  */
1800729c8c8SMatthew Dillon hammer_tid_t
1815a64efa1SMatthew Dillon hammer_alloc_objid(hammer_mount_t hmp, hammer_inode_t dip, int64_t namekey)
1820729c8c8SMatthew Dillon {
1830729c8c8SMatthew Dillon 	hammer_objid_cache_t ocp;
1840729c8c8SMatthew Dillon 	hammer_tid_t tid;
1855a64efa1SMatthew Dillon 	u_int32_t n;
1860729c8c8SMatthew Dillon 
1870729c8c8SMatthew Dillon 	while ((ocp = dip->objid_cache) == NULL) {
1885de0c0e5SMatthew Dillon 		if (hmp->objid_cache_count < OBJID_CACHE_SIZE) {
189bac808feSMatthew Dillon 			ocp = kmalloc(sizeof(*ocp), hmp->m_misc,
190bac808feSMatthew Dillon 				      M_WAITOK|M_ZERO);
1915a64efa1SMatthew Dillon 			ocp->base_tid = hammer_alloc_tid(hmp,
1925a64efa1SMatthew Dillon 							OBJID_CACHE_BULK * 2);
1935a64efa1SMatthew Dillon 			ocp->base_tid += OBJID_CACHE_BULK_MASK64;
1945a64efa1SMatthew Dillon 			ocp->base_tid &= ~OBJID_CACHE_BULK_MASK64;
1950729c8c8SMatthew Dillon 			/* may have blocked, recheck */
1960729c8c8SMatthew Dillon 			if (dip->objid_cache == NULL) {
197d1ce1558SMatthew Dillon 				TAILQ_INSERT_TAIL(&hmp->objid_cache_list,
198d1ce1558SMatthew Dillon 						  ocp, entry);
199d1ce1558SMatthew Dillon 				++hmp->objid_cache_count;
2000729c8c8SMatthew Dillon 				dip->objid_cache = ocp;
2010729c8c8SMatthew Dillon 				ocp->dip = dip;
202d1ce1558SMatthew Dillon 			} else {
203d1ce1558SMatthew Dillon 				kfree(ocp, hmp->m_misc);
2040729c8c8SMatthew Dillon 			}
2050729c8c8SMatthew Dillon 		} else {
2065a64efa1SMatthew Dillon 			/*
2075a64efa1SMatthew Dillon 			 * Steal one from another directory?
2085a64efa1SMatthew Dillon 			 *
2095a64efa1SMatthew Dillon 			 * Throw away ocp's that are more then half full, they
2105a64efa1SMatthew Dillon 			 * aren't worth stealing.
2115a64efa1SMatthew Dillon 			 */
2125de0c0e5SMatthew Dillon 			ocp = TAILQ_FIRST(&hmp->objid_cache_list);
2130729c8c8SMatthew Dillon 			if (ocp->dip)
2140729c8c8SMatthew Dillon 				ocp->dip->objid_cache = NULL;
2155a64efa1SMatthew Dillon 			if (ocp->count >= OBJID_CACHE_BULK / 2) {
216d1ce1558SMatthew Dillon 				TAILQ_REMOVE(&hmp->objid_cache_list,
217d1ce1558SMatthew Dillon 					     ocp, entry);
2185a64efa1SMatthew Dillon 				--hmp->objid_cache_count;
2195a64efa1SMatthew Dillon 				kfree(ocp, hmp->m_misc);
2205a64efa1SMatthew Dillon 			} else {
2210729c8c8SMatthew Dillon 				dip->objid_cache = ocp;
2220729c8c8SMatthew Dillon 				ocp->dip = dip;
2230729c8c8SMatthew Dillon 			}
2240729c8c8SMatthew Dillon 		}
2255a64efa1SMatthew Dillon 	}
2265de0c0e5SMatthew Dillon 	TAILQ_REMOVE(&hmp->objid_cache_list, ocp, entry);
2275de0c0e5SMatthew Dillon 
2285de0c0e5SMatthew Dillon 	/*
229d1ce1558SMatthew Dillon 	 * Allocate inode numbers uniformly.
2305a64efa1SMatthew Dillon 	 */
231d1ce1558SMatthew Dillon 
2325a64efa1SMatthew Dillon 	n = (namekey >> (63 - OBJID_CACHE_BULK_BITS)) & OBJID_CACHE_BULK_MASK;
2335a64efa1SMatthew Dillon 	n = ocp_allocbit(ocp, n);
2345a64efa1SMatthew Dillon 	tid = ocp->base_tid + n;
2355a64efa1SMatthew Dillon 
2365a64efa1SMatthew Dillon #if 0
2375a64efa1SMatthew Dillon 	/*
2385de0c0e5SMatthew Dillon 	 * The TID is incremented by 1 or by 16 depending what mode the
2395de0c0e5SMatthew Dillon 	 * mount is operating in.
2405de0c0e5SMatthew Dillon 	 */
241732a1697SMatthew Dillon 	ocp->next_tid += (hmp->master_id < 0) ? 1 : HAMMER_MAX_MASTERS;
2425a64efa1SMatthew Dillon #endif
243d1ce1558SMatthew Dillon 	if (ocp->count >= OBJID_CACHE_BULK * 3 / 4) {
2440729c8c8SMatthew Dillon 		dip->objid_cache = NULL;
2455de0c0e5SMatthew Dillon 		--hmp->objid_cache_count;
2460729c8c8SMatthew Dillon 		ocp->dip = NULL;
247bac808feSMatthew Dillon 		kfree(ocp, hmp->m_misc);
2480729c8c8SMatthew Dillon 	} else {
2495de0c0e5SMatthew Dillon 		TAILQ_INSERT_TAIL(&hmp->objid_cache_list, ocp, entry);
2500729c8c8SMatthew Dillon 	}
2510729c8c8SMatthew Dillon 	return(tid);
2520729c8c8SMatthew Dillon }
2530729c8c8SMatthew Dillon 
2545a64efa1SMatthew Dillon /*
2555a64efa1SMatthew Dillon  * Allocate a bit starting with bit n.  Wrap if necessary.
2565a64efa1SMatthew Dillon  *
2575a64efa1SMatthew Dillon  * This routine is only ever called if a bit is available somewhere
2585a64efa1SMatthew Dillon  * in the bitmap.
2595a64efa1SMatthew Dillon  */
2605a64efa1SMatthew Dillon static u_int32_t
2615a64efa1SMatthew Dillon ocp_allocbit(hammer_objid_cache_t ocp, u_int32_t n)
2625a64efa1SMatthew Dillon {
2635a64efa1SMatthew Dillon 	u_int32_t n0;
2645a64efa1SMatthew Dillon 
2655a64efa1SMatthew Dillon 	n0 = (n >> 5) & 31;
2665a64efa1SMatthew Dillon 	n &= 31;
2675a64efa1SMatthew Dillon 
2685a64efa1SMatthew Dillon 	while (ocp->bm1[n0] & (1 << n)) {
2695a64efa1SMatthew Dillon 		if (ocp->bm0 & (1 << n0)) {
2705a64efa1SMatthew Dillon 			n0 = (n0 + 1) & 31;
2715a64efa1SMatthew Dillon 			n = 0;
2725a64efa1SMatthew Dillon 		} else if (++n == 32) {
2735a64efa1SMatthew Dillon 			n0 = (n0 + 1) & 31;
2745a64efa1SMatthew Dillon 			n = 0;
2755a64efa1SMatthew Dillon 		}
2765a64efa1SMatthew Dillon 	}
2775a64efa1SMatthew Dillon 	++ocp->count;
2785a64efa1SMatthew Dillon 	ocp->bm1[n0] |= 1 << n;
2795a64efa1SMatthew Dillon 	if (ocp->bm1[n0] == 0xFFFFFFFFU)
2805a64efa1SMatthew Dillon 		ocp->bm0 |= 1 << n0;
2815a64efa1SMatthew Dillon 	return((n0 << 5) + n);
2825a64efa1SMatthew Dillon }
2835a64efa1SMatthew Dillon 
2840729c8c8SMatthew Dillon void
2850729c8c8SMatthew Dillon hammer_clear_objid(hammer_inode_t dip)
2860729c8c8SMatthew Dillon {
2870729c8c8SMatthew Dillon 	hammer_objid_cache_t ocp;
2880729c8c8SMatthew Dillon 
2890729c8c8SMatthew Dillon 	if ((ocp = dip->objid_cache) != NULL) {
2900729c8c8SMatthew Dillon 		dip->objid_cache = NULL;
2910729c8c8SMatthew Dillon 		ocp->dip = NULL;
2920729c8c8SMatthew Dillon 		TAILQ_REMOVE(&dip->hmp->objid_cache_list, ocp, entry);
2930729c8c8SMatthew Dillon 		TAILQ_INSERT_HEAD(&dip->hmp->objid_cache_list, ocp, entry);
2940729c8c8SMatthew Dillon 	}
2950729c8c8SMatthew Dillon }
2960729c8c8SMatthew Dillon 
2970729c8c8SMatthew Dillon void
2980729c8c8SMatthew Dillon hammer_destroy_objid_cache(hammer_mount_t hmp)
2990729c8c8SMatthew Dillon {
3000729c8c8SMatthew Dillon 	hammer_objid_cache_t ocp;
3010729c8c8SMatthew Dillon 
3020729c8c8SMatthew Dillon 	while ((ocp = TAILQ_FIRST(&hmp->objid_cache_list)) != NULL) {
3030729c8c8SMatthew Dillon 		TAILQ_REMOVE(&hmp->objid_cache_list, ocp, entry);
304f437a2abSMatthew Dillon 		if (ocp->dip)
305f437a2abSMatthew Dillon 			ocp->dip->objid_cache = NULL;
306bac808feSMatthew Dillon 		kfree(ocp, hmp->m_misc);
307d1ce1558SMatthew Dillon 		--hmp->objid_cache_count;
3080729c8c8SMatthew Dillon 	}
309d1ce1558SMatthew Dillon 	KKASSERT(hmp->objid_cache_count == 0);
3100729c8c8SMatthew Dillon }
3110729c8c8SMatthew Dillon 
312