15c23d7f1SMatthew Dillon /* 20dea3156SMatthew Dillon * Copyright (c) 2011-2013 The DragonFly Project. All rights reserved. 35c23d7f1SMatthew Dillon * 45c23d7f1SMatthew Dillon * This code is derived from software contributed to The DragonFly Project 55c23d7f1SMatthew Dillon * by Matthew Dillon <dillon@dragonflybsd.org> 65c23d7f1SMatthew Dillon * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org> 75c23d7f1SMatthew Dillon * 85c23d7f1SMatthew Dillon * Redistribution and use in source and binary forms, with or without 95c23d7f1SMatthew Dillon * modification, are permitted provided that the following conditions 105c23d7f1SMatthew Dillon * are met: 115c23d7f1SMatthew Dillon * 125c23d7f1SMatthew Dillon * 1. Redistributions of source code must retain the above copyright 135c23d7f1SMatthew Dillon * notice, this list of conditions and the following disclaimer. 145c23d7f1SMatthew Dillon * 2. Redistributions in binary form must reproduce the above copyright 155c23d7f1SMatthew Dillon * notice, this list of conditions and the following disclaimer in 165c23d7f1SMatthew Dillon * the documentation and/or other materials provided with the 175c23d7f1SMatthew Dillon * distribution. 185c23d7f1SMatthew Dillon * 3. Neither the name of The DragonFly Project nor the names of its 195c23d7f1SMatthew Dillon * contributors may be used to endorse or promote products derived 205c23d7f1SMatthew Dillon * from this software without specific, prior written permission. 215c23d7f1SMatthew Dillon * 225c23d7f1SMatthew Dillon * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 235c23d7f1SMatthew Dillon * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 245c23d7f1SMatthew Dillon * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 255c23d7f1SMatthew Dillon * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 265c23d7f1SMatthew Dillon * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 275c23d7f1SMatthew Dillon * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 285c23d7f1SMatthew Dillon * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 295c23d7f1SMatthew Dillon * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 305c23d7f1SMatthew Dillon * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 315c23d7f1SMatthew Dillon * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 325c23d7f1SMatthew Dillon * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 335c23d7f1SMatthew Dillon * SUCH DAMAGE. 345c23d7f1SMatthew Dillon */ 355c23d7f1SMatthew Dillon #include <sys/param.h> 365c23d7f1SMatthew Dillon #include <sys/systm.h> 375c23d7f1SMatthew Dillon #include <sys/kernel.h> 385c23d7f1SMatthew Dillon #include <sys/fcntl.h> 395c23d7f1SMatthew Dillon #include <sys/buf.h> 405c23d7f1SMatthew Dillon #include <sys/proc.h> 415c23d7f1SMatthew Dillon #include <sys/namei.h> 425c23d7f1SMatthew Dillon #include <sys/mount.h> 435c23d7f1SMatthew Dillon #include <sys/vnode.h> 445c23d7f1SMatthew Dillon #include <sys/mountctl.h> 455c23d7f1SMatthew Dillon 465c23d7f1SMatthew Dillon #include "hammer2.h" 475c23d7f1SMatthew Dillon 4891abd410SMatthew Dillon struct hammer2_fiterate { 4991abd410SMatthew Dillon hammer2_off_t bpref; 5091abd410SMatthew Dillon hammer2_off_t bnext; 5191abd410SMatthew Dillon int loops; 5291abd410SMatthew Dillon }; 5391abd410SMatthew Dillon 5491abd410SMatthew Dillon typedef struct hammer2_fiterate hammer2_fiterate_t; 5591abd410SMatthew Dillon 561a7cfe5aSMatthew Dillon static int hammer2_freemap_try_alloc(hammer2_trans_t *trans, 571a7cfe5aSMatthew Dillon hammer2_chain_t **parentp, hammer2_blockref_t *bref, 5891abd410SMatthew Dillon int radix, hammer2_fiterate_t *iter); 59a5913bdfSMatthew Dillon static void hammer2_freemap_init(hammer2_trans_t *trans, hammer2_mount_t *hmp, 60a5913bdfSMatthew Dillon hammer2_key_t key, hammer2_chain_t *chain); 61a5913bdfSMatthew Dillon static int hammer2_bmap_alloc(hammer2_trans_t *trans, hammer2_mount_t *hmp, 6291abd410SMatthew Dillon hammer2_bmap_data_t *bmap, uint16_t class, 6393f3933aSMatthew Dillon int n, int radix, hammer2_key_t *basep); 641a7cfe5aSMatthew Dillon static int hammer2_freemap_iterate(hammer2_trans_t *trans, 651a7cfe5aSMatthew Dillon hammer2_chain_t **parentp, hammer2_chain_t **chainp, 6691abd410SMatthew Dillon hammer2_fiterate_t *iter); 671a7cfe5aSMatthew Dillon 68a98aa0b0SMatthew Dillon static __inline 69a98aa0b0SMatthew Dillon int 70a98aa0b0SMatthew Dillon hammer2_freemapradix(int radix) 71a98aa0b0SMatthew Dillon { 72a98aa0b0SMatthew Dillon return(radix); 73a98aa0b0SMatthew Dillon } 74a98aa0b0SMatthew Dillon 751a7cfe5aSMatthew Dillon /* 761a7cfe5aSMatthew Dillon * Calculate the device offset for the specified FREEMAP_NODE or FREEMAP_LEAF 771a7cfe5aSMatthew Dillon * bref. Return a combined media offset and physical size radix. Freemap 781a7cfe5aSMatthew Dillon * chains use fixed storage offsets in the 4MB reserved area at the 791a7cfe5aSMatthew Dillon * beginning of each 2GB zone 801a7cfe5aSMatthew Dillon * 811a7cfe5aSMatthew Dillon * Rotate between four possibilities. Theoretically this means we have three 821a7cfe5aSMatthew Dillon * good freemaps in case of a crash which we can use as a base for the fixup 831a7cfe5aSMatthew Dillon * scan at mount-time. 841a7cfe5aSMatthew Dillon */ 851a7cfe5aSMatthew Dillon #define H2FMBASE(key, radix) ((key) & ~(((hammer2_off_t)1 << (radix)) - 1)) 861a7cfe5aSMatthew Dillon #define H2FMSHIFT(radix) ((hammer2_off_t)1 << (radix)) 871a7cfe5aSMatthew Dillon 881a7cfe5aSMatthew Dillon static 891a7cfe5aSMatthew Dillon int 901a7cfe5aSMatthew Dillon hammer2_freemap_reserve(hammer2_mount_t *hmp, hammer2_blockref_t *bref, 911a7cfe5aSMatthew Dillon int radix) 921a7cfe5aSMatthew Dillon { 931a7cfe5aSMatthew Dillon hammer2_off_t off; 941a7cfe5aSMatthew Dillon size_t bytes; 951a7cfe5aSMatthew Dillon 961a7cfe5aSMatthew Dillon /* 971a7cfe5aSMatthew Dillon * Physical allocation size -> radix. Typically either 256 for 981a7cfe5aSMatthew Dillon * a level 0 freemap leaf or 65536 for a level N freemap node. 991a7cfe5aSMatthew Dillon * 1001a7cfe5aSMatthew Dillon * NOTE: A 256 byte bitmap represents 256 x 8 x 1024 = 2MB of storage. 1011a7cfe5aSMatthew Dillon * Do not use hammer2_allocsize() here as it has a min cap. 1021a7cfe5aSMatthew Dillon */ 1031a7cfe5aSMatthew Dillon bytes = 1 << radix; 1041a7cfe5aSMatthew Dillon 1051a7cfe5aSMatthew Dillon /* 1061a7cfe5aSMatthew Dillon * Adjust by HAMMER2_ZONE_FREEMAP_{A,B,C,D} using the existing 107a98aa0b0SMatthew Dillon * offset as a basis. Start in zone A if previously unallocated. 1081a7cfe5aSMatthew Dillon */ 1091a7cfe5aSMatthew Dillon if ((bref->data_off & ~HAMMER2_OFF_MASK_RADIX) == 0) { 1101a7cfe5aSMatthew Dillon off = HAMMER2_ZONE_FREEMAP_A; 1111a7cfe5aSMatthew Dillon } else { 1121a7cfe5aSMatthew Dillon off = bref->data_off & ~HAMMER2_OFF_MASK_RADIX & 1131a7cfe5aSMatthew Dillon (((hammer2_off_t)1 << HAMMER2_FREEMAP_LEVEL1_RADIX) - 1); 1141a7cfe5aSMatthew Dillon off = off / HAMMER2_PBUFSIZE; 1151a7cfe5aSMatthew Dillon KKASSERT(off >= HAMMER2_ZONE_FREEMAP_A); 11693f3933aSMatthew Dillon KKASSERT(off < HAMMER2_ZONE_FREEMAP_D + 4); 1171a7cfe5aSMatthew Dillon 1181a7cfe5aSMatthew Dillon if (off >= HAMMER2_ZONE_FREEMAP_D) 1191a7cfe5aSMatthew Dillon off = HAMMER2_ZONE_FREEMAP_A; 1201a7cfe5aSMatthew Dillon else if (off >= HAMMER2_ZONE_FREEMAP_C) 1211a7cfe5aSMatthew Dillon off = HAMMER2_ZONE_FREEMAP_D; 1221a7cfe5aSMatthew Dillon else if (off >= HAMMER2_ZONE_FREEMAP_B) 1231a7cfe5aSMatthew Dillon off = HAMMER2_ZONE_FREEMAP_C; 1241a7cfe5aSMatthew Dillon else 1251a7cfe5aSMatthew Dillon off = HAMMER2_ZONE_FREEMAP_B; 1261a7cfe5aSMatthew Dillon } 1271a7cfe5aSMatthew Dillon off = off * HAMMER2_PBUFSIZE; 1281a7cfe5aSMatthew Dillon 1291a7cfe5aSMatthew Dillon /* 1301a7cfe5aSMatthew Dillon * Calculate the block offset of the reserved block. This will 1311a7cfe5aSMatthew Dillon * point into the 4MB reserved area at the base of the appropriate 1321a7cfe5aSMatthew Dillon * 2GB zone, once added to the FREEMAP_x selection above. 1331a7cfe5aSMatthew Dillon */ 1341a7cfe5aSMatthew Dillon switch(bref->keybits) { 1351a7cfe5aSMatthew Dillon /* case HAMMER2_FREEMAP_LEVEL5_RADIX: not applicable */ 1361a7cfe5aSMatthew Dillon case HAMMER2_FREEMAP_LEVEL4_RADIX: /* 2EB */ 1371a7cfe5aSMatthew Dillon KKASSERT(bref->type == HAMMER2_BREF_TYPE_FREEMAP_NODE); 1381a7cfe5aSMatthew Dillon KKASSERT(bytes == HAMMER2_FREEMAP_LEVELN_PSIZE); 1391a7cfe5aSMatthew Dillon off += H2FMBASE(bref->key, HAMMER2_FREEMAP_LEVEL4_RADIX) + 1401a7cfe5aSMatthew Dillon HAMMER2_ZONEFM_LEVEL4 * HAMMER2_PBUFSIZE; 1411a7cfe5aSMatthew Dillon break; 1421a7cfe5aSMatthew Dillon case HAMMER2_FREEMAP_LEVEL3_RADIX: /* 2PB */ 1431a7cfe5aSMatthew Dillon KKASSERT(bref->type == HAMMER2_BREF_TYPE_FREEMAP_NODE); 1441a7cfe5aSMatthew Dillon KKASSERT(bytes == HAMMER2_FREEMAP_LEVELN_PSIZE); 1451a7cfe5aSMatthew Dillon off += H2FMBASE(bref->key, HAMMER2_FREEMAP_LEVEL3_RADIX) + 1461a7cfe5aSMatthew Dillon HAMMER2_ZONEFM_LEVEL3 * HAMMER2_PBUFSIZE; 1471a7cfe5aSMatthew Dillon break; 1481a7cfe5aSMatthew Dillon case HAMMER2_FREEMAP_LEVEL2_RADIX: /* 2TB */ 1491a7cfe5aSMatthew Dillon KKASSERT(bref->type == HAMMER2_BREF_TYPE_FREEMAP_NODE); 1501a7cfe5aSMatthew Dillon KKASSERT(bytes == HAMMER2_FREEMAP_LEVELN_PSIZE); 1511a7cfe5aSMatthew Dillon off += H2FMBASE(bref->key, HAMMER2_FREEMAP_LEVEL2_RADIX) + 1521a7cfe5aSMatthew Dillon HAMMER2_ZONEFM_LEVEL2 * HAMMER2_PBUFSIZE; 1531a7cfe5aSMatthew Dillon break; 1541a7cfe5aSMatthew Dillon case HAMMER2_FREEMAP_LEVEL1_RADIX: /* 2GB */ 15593f3933aSMatthew Dillon KKASSERT(bref->type == HAMMER2_BREF_TYPE_FREEMAP_LEAF); 1561a7cfe5aSMatthew Dillon KKASSERT(bytes == HAMMER2_FREEMAP_LEVELN_PSIZE); 1571a7cfe5aSMatthew Dillon off += H2FMBASE(bref->key, HAMMER2_FREEMAP_LEVEL1_RADIX) + 1581a7cfe5aSMatthew Dillon HAMMER2_ZONEFM_LEVEL1 * HAMMER2_PBUFSIZE; 1591a7cfe5aSMatthew Dillon break; 1601a7cfe5aSMatthew Dillon default: 1611a7cfe5aSMatthew Dillon panic("freemap: bad radix(2) %p %d\n", bref, bref->keybits); 1621a7cfe5aSMatthew Dillon /* NOT REACHED */ 1631a7cfe5aSMatthew Dillon break; 1641a7cfe5aSMatthew Dillon } 1651a7cfe5aSMatthew Dillon bref->data_off = off | radix; 1661a7cfe5aSMatthew Dillon return (0); 1671a7cfe5aSMatthew Dillon } 1681a7cfe5aSMatthew Dillon 1695c23d7f1SMatthew Dillon /* 1701a7cfe5aSMatthew Dillon * Normal freemap allocator 1711a7cfe5aSMatthew Dillon * 1721a7cfe5aSMatthew Dillon * Use available hints to allocate space using the freemap. Create missing 1731a7cfe5aSMatthew Dillon * freemap infrastructure on-the-fly as needed (including marking initial 1741a7cfe5aSMatthew Dillon * allocations using the iterator as allocated, instantiating new 2GB zones, 1751a7cfe5aSMatthew Dillon * and dealing with the end-of-media edge case). 1761a7cfe5aSMatthew Dillon * 1771a7cfe5aSMatthew Dillon * ip and bpref are only used as a heuristic to determine locality of 1781a7cfe5aSMatthew Dillon * reference. bref->key may also be used heuristically. 179a4dc31e0SMatthew Dillon * 180a4dc31e0SMatthew Dillon * WARNING! When called from a flush we have to use the 'live' sync_tid 181a4dc31e0SMatthew Dillon * and not the flush sync_tid. The live sync_tid is the flush 182a4dc31e0SMatthew Dillon * sync_tid + 1. That is, freemap allocations which occur during 183a4dc31e0SMatthew Dillon * a flush are not part of the flush. Crash-recovery will restore 184a4dc31e0SMatthew Dillon * any lost allocations. 1851a7cfe5aSMatthew Dillon */ 1861a7cfe5aSMatthew Dillon int 187a5913bdfSMatthew Dillon hammer2_freemap_alloc(hammer2_trans_t *trans, hammer2_mount_t *hmp, 1881a7cfe5aSMatthew Dillon hammer2_blockref_t *bref, size_t bytes) 1891a7cfe5aSMatthew Dillon { 1901a7cfe5aSMatthew Dillon hammer2_chain_t *parent; 1911a7cfe5aSMatthew Dillon int radix; 1921a7cfe5aSMatthew Dillon int error; 19391abd410SMatthew Dillon unsigned int hindex; 19491abd410SMatthew Dillon hammer2_fiterate_t iter; 1951a7cfe5aSMatthew Dillon 1961a7cfe5aSMatthew Dillon /* 1971a7cfe5aSMatthew Dillon * Validate the allocation size. It must be a power of 2. 19893f3933aSMatthew Dillon * 19993f3933aSMatthew Dillon * For now require that the caller be aware of the minimum 20093f3933aSMatthew Dillon * allocation (1K). 2011a7cfe5aSMatthew Dillon */ 2021a7cfe5aSMatthew Dillon radix = hammer2_getradix(bytes); 2031a7cfe5aSMatthew Dillon KKASSERT((size_t)1 << radix == bytes); 2041a7cfe5aSMatthew Dillon 2051a7cfe5aSMatthew Dillon /* 20693f3933aSMatthew Dillon * Freemap blocks themselves are simply assigned from the reserve 20793f3933aSMatthew Dillon * area, not allocated from the freemap. 2081a7cfe5aSMatthew Dillon */ 2091a7cfe5aSMatthew Dillon if (bref->type == HAMMER2_BREF_TYPE_FREEMAP_NODE || 2101a7cfe5aSMatthew Dillon bref->type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) { 2111a7cfe5aSMatthew Dillon return(hammer2_freemap_reserve(hmp, bref, radix)); 2121a7cfe5aSMatthew Dillon } 2131a7cfe5aSMatthew Dillon 21491abd410SMatthew Dillon if (bref->data_off & ~HAMMER2_OFF_MASK_RADIX) 21591abd410SMatthew Dillon hammer2_freemap_free(trans, hmp, bref, 0); 21691abd410SMatthew Dillon 21793f3933aSMatthew Dillon /* 218a4dc31e0SMatthew Dillon * Setting ISALLOCATING ensures correct operation even when the 219a4dc31e0SMatthew Dillon * flusher itself is making allocations. 22093f3933aSMatthew Dillon */ 22193f3933aSMatthew Dillon KKASSERT(bytes >= HAMMER2_MIN_ALLOC && bytes <= HAMMER2_MAX_ALLOC); 222a4dc31e0SMatthew Dillon KKASSERT((trans->flags & HAMMER2_TRANS_ISALLOCATING) == 0); 223a7720be7SMatthew Dillon atomic_set_int(&trans->flags, HAMMER2_TRANS_ISALLOCATING); 224a4dc31e0SMatthew Dillon if (trans->flags & HAMMER2_TRANS_ISFLUSH) 225a4dc31e0SMatthew Dillon ++trans->sync_tid; 2261a7cfe5aSMatthew Dillon 22701eabad4SMatthew Dillon /* 228a98aa0b0SMatthew Dillon * Calculate the starting point for our allocation search. 229a98aa0b0SMatthew Dillon * 230a98aa0b0SMatthew Dillon * Each freemap leaf is dedicated to a specific freemap_radix. 231a98aa0b0SMatthew Dillon * The freemap_radix can be more fine-grained than the device buffer 232a98aa0b0SMatthew Dillon * radix which results in inodes being grouped together in their 233a98aa0b0SMatthew Dillon * own segment, terminal-data (16K or less) and initial indirect 234a98aa0b0SMatthew Dillon * block being grouped together, and then full-indirect and full-data 235a98aa0b0SMatthew Dillon * blocks (64K) being grouped together. 236a98aa0b0SMatthew Dillon * 237a98aa0b0SMatthew Dillon * The single most important aspect of this is the inode grouping 238a98aa0b0SMatthew Dillon * because that is what allows 'find' and 'ls' and other filesystem 239a98aa0b0SMatthew Dillon * topology operations to run fast. 2401a7cfe5aSMatthew Dillon */ 241a98aa0b0SMatthew Dillon #if 0 2421a7cfe5aSMatthew Dillon if (bref->data_off & ~HAMMER2_OFF_MASK_RADIX) 2431a7cfe5aSMatthew Dillon bpref = bref->data_off & ~HAMMER2_OFF_MASK_RADIX; 2441a7cfe5aSMatthew Dillon else if (trans->tmp_bpref) 2451a7cfe5aSMatthew Dillon bpref = trans->tmp_bpref; 2461a7cfe5aSMatthew Dillon else if (trans->tmp_ip) 2471a7cfe5aSMatthew Dillon bpref = trans->tmp_ip->chain->bref.data_off; 2481a7cfe5aSMatthew Dillon else 2491a7cfe5aSMatthew Dillon #endif 25091abd410SMatthew Dillon /* 25191abd410SMatthew Dillon * Heuristic tracking index. We would like one for each distinct 25291abd410SMatthew Dillon * bref type if possible. heur_freemap[] has room for two classes 25391abd410SMatthew Dillon * for each type. At a minimum we have to break-up our heuristic 25491abd410SMatthew Dillon * by device block sizes. 25591abd410SMatthew Dillon */ 25691abd410SMatthew Dillon hindex = hammer2_devblkradix(radix) - HAMMER2_MINIORADIX; 25791abd410SMatthew Dillon KKASSERT(hindex < HAMMER2_FREEMAP_HEUR_NRADIX); 25891abd410SMatthew Dillon hindex += bref->type * HAMMER2_FREEMAP_HEUR_NRADIX; 25991abd410SMatthew Dillon hindex &= HAMMER2_FREEMAP_HEUR_TYPES * HAMMER2_FREEMAP_HEUR_NRADIX - 1; 26091abd410SMatthew Dillon KKASSERT(hindex < HAMMER2_FREEMAP_HEUR); 26191abd410SMatthew Dillon 26291abd410SMatthew Dillon iter.bpref = hmp->heur_freemap[hindex]; 2631a7cfe5aSMatthew Dillon 2641a7cfe5aSMatthew Dillon /* 2651a7cfe5aSMatthew Dillon * Make sure bpref is in-bounds. It's ok if bpref covers a zone's 2661a7cfe5aSMatthew Dillon * reserved area, the try code will iterate past it. 2671a7cfe5aSMatthew Dillon */ 26891abd410SMatthew Dillon if (iter.bpref > hmp->voldata.volu_size) 26991abd410SMatthew Dillon iter.bpref = hmp->voldata.volu_size - 1; 2701a7cfe5aSMatthew Dillon 2711a7cfe5aSMatthew Dillon /* 2721a7cfe5aSMatthew Dillon * Iterate the freemap looking for free space before and after. 2731a7cfe5aSMatthew Dillon */ 2741a7cfe5aSMatthew Dillon parent = &hmp->fchain; 2751a7cfe5aSMatthew Dillon hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS); 2761a7cfe5aSMatthew Dillon error = EAGAIN; 27791abd410SMatthew Dillon iter.bnext = iter.bpref; 27891abd410SMatthew Dillon iter.loops = 0; 2791a7cfe5aSMatthew Dillon 2801a7cfe5aSMatthew Dillon while (error == EAGAIN) { 2811a7cfe5aSMatthew Dillon error = hammer2_freemap_try_alloc(trans, &parent, bref, 28291abd410SMatthew Dillon radix, &iter); 2831a7cfe5aSMatthew Dillon } 28491abd410SMatthew Dillon hmp->heur_freemap[hindex] = iter.bnext; 2851a7cfe5aSMatthew Dillon hammer2_chain_unlock(parent); 2861a7cfe5aSMatthew Dillon 287a7720be7SMatthew Dillon atomic_clear_int(&trans->flags, HAMMER2_TRANS_ISALLOCATING); 288a4dc31e0SMatthew Dillon if (trans->flags & HAMMER2_TRANS_ISFLUSH) 289a4dc31e0SMatthew Dillon --trans->sync_tid; 290a7720be7SMatthew Dillon 2911a7cfe5aSMatthew Dillon return (error); 2921a7cfe5aSMatthew Dillon } 2931a7cfe5aSMatthew Dillon 2941a7cfe5aSMatthew Dillon static int 2951a7cfe5aSMatthew Dillon hammer2_freemap_try_alloc(hammer2_trans_t *trans, hammer2_chain_t **parentp, 2961a7cfe5aSMatthew Dillon hammer2_blockref_t *bref, int radix, 29791abd410SMatthew Dillon hammer2_fiterate_t *iter) 2981a7cfe5aSMatthew Dillon { 299a5913bdfSMatthew Dillon hammer2_mount_t *hmp = (*parentp)->hmp; 3001a7cfe5aSMatthew Dillon hammer2_off_t l0size; 30193f3933aSMatthew Dillon hammer2_off_t l1size; 30293f3933aSMatthew Dillon hammer2_off_t l1mask; 3031897c66eSMatthew Dillon hammer2_key_t key_dummy; 3041a7cfe5aSMatthew Dillon hammer2_chain_t *chain; 3051a7cfe5aSMatthew Dillon hammer2_off_t key; 3061a7cfe5aSMatthew Dillon size_t bytes; 30791abd410SMatthew Dillon uint16_t class; 3081a7cfe5aSMatthew Dillon int error = 0; 3091897c66eSMatthew Dillon int cache_index = -1; 31093f3933aSMatthew Dillon 3111a7cfe5aSMatthew Dillon 3121a7cfe5aSMatthew Dillon /* 3131a7cfe5aSMatthew Dillon * Calculate the number of bytes being allocated, the number 3141a7cfe5aSMatthew Dillon * of contiguous bits of bitmap being allocated, and the bitmap 3151a7cfe5aSMatthew Dillon * mask. 31601eabad4SMatthew Dillon * 3171a7cfe5aSMatthew Dillon * WARNING! cpu hardware may mask bits == 64 -> 0 and blow up the 3181a7cfe5aSMatthew Dillon * mask calculation. 3191a7cfe5aSMatthew Dillon */ 3201a7cfe5aSMatthew Dillon bytes = (size_t)1 << radix; 32191abd410SMatthew Dillon class = (bref->type << 8) | hammer2_devblkradix(radix); 322a98aa0b0SMatthew Dillon 3231a7cfe5aSMatthew Dillon /* 32493f3933aSMatthew Dillon * Lookup the level1 freemap chain, creating and initializing one 3251a7cfe5aSMatthew Dillon * if necessary. Intermediate levels will be created automatically 3261a7cfe5aSMatthew Dillon * when necessary by hammer2_chain_create(). 3271a7cfe5aSMatthew Dillon */ 32891abd410SMatthew Dillon key = H2FMBASE(iter->bnext, HAMMER2_FREEMAP_LEVEL1_RADIX); 3291a7cfe5aSMatthew Dillon l0size = H2FMSHIFT(HAMMER2_FREEMAP_LEVEL0_RADIX); 33093f3933aSMatthew Dillon l1size = H2FMSHIFT(HAMMER2_FREEMAP_LEVEL1_RADIX); 33193f3933aSMatthew Dillon l1mask = l1size - 1; 3321a7cfe5aSMatthew Dillon 3331897c66eSMatthew Dillon chain = hammer2_chain_lookup(parentp, &key_dummy, key, key + l1mask, 3341897c66eSMatthew Dillon &cache_index, 3351a7cfe5aSMatthew Dillon HAMMER2_LOOKUP_FREEMAP | 3361a7cfe5aSMatthew Dillon HAMMER2_LOOKUP_ALWAYS | 3371a7cfe5aSMatthew Dillon HAMMER2_LOOKUP_MATCHIND/*XXX*/); 3381a7cfe5aSMatthew Dillon if (chain == NULL) { 3391a7cfe5aSMatthew Dillon /* 3401a7cfe5aSMatthew Dillon * Create the missing leaf, be sure to initialize 3411a7cfe5aSMatthew Dillon * the auxillary freemap tracking information in 3421a7cfe5aSMatthew Dillon * the bref.check.freemap structure. 3431a7cfe5aSMatthew Dillon */ 3441a7cfe5aSMatthew Dillon #if 0 34593f3933aSMatthew Dillon kprintf("freemap create L1 @ %016jx bpref %016jx\n", 34691abd410SMatthew Dillon key, iter->bpref); 3471a7cfe5aSMatthew Dillon #endif 3481a7cfe5aSMatthew Dillon error = hammer2_chain_create(trans, parentp, &chain, 34993f3933aSMatthew Dillon key, HAMMER2_FREEMAP_LEVEL1_RADIX, 3501a7cfe5aSMatthew Dillon HAMMER2_BREF_TYPE_FREEMAP_LEAF, 35193f3933aSMatthew Dillon HAMMER2_FREEMAP_LEVELN_PSIZE); 3521a7cfe5aSMatthew Dillon if (error == 0) { 3531a7cfe5aSMatthew Dillon hammer2_chain_modify(trans, &chain, 0); 35493f3933aSMatthew Dillon bzero(&chain->data->bmdata[0], 35593f3933aSMatthew Dillon HAMMER2_FREEMAP_LEVELN_PSIZE); 35693f3933aSMatthew Dillon chain->bref.check.freemap.bigmask = (uint32_t)-1; 35793f3933aSMatthew Dillon chain->bref.check.freemap.avail = l1size; 35893f3933aSMatthew Dillon /* bref.methods should already be inherited */ 3591a7cfe5aSMatthew Dillon 360a5913bdfSMatthew Dillon hammer2_freemap_init(trans, hmp, key, chain); 3611a7cfe5aSMatthew Dillon } 36293f3933aSMatthew Dillon } else if ((chain->bref.check.freemap.bigmask & (1 << radix)) == 0) { 3631a7cfe5aSMatthew Dillon /* 3641a7cfe5aSMatthew Dillon * Already flagged as not having enough space 3651a7cfe5aSMatthew Dillon */ 3661a7cfe5aSMatthew Dillon error = ENOSPC; 3671a7cfe5aSMatthew Dillon } else { 3681a7cfe5aSMatthew Dillon /* 3691a7cfe5aSMatthew Dillon * Modify existing chain to setup for adjustment. 3701a7cfe5aSMatthew Dillon */ 3711a7cfe5aSMatthew Dillon hammer2_chain_modify(trans, &chain, 0); 3721a7cfe5aSMatthew Dillon } 3731a7cfe5aSMatthew Dillon 3741a7cfe5aSMatthew Dillon /* 37593f3933aSMatthew Dillon * Scan 2MB entries. 3761a7cfe5aSMatthew Dillon */ 37793f3933aSMatthew Dillon if (error == 0) { 37893f3933aSMatthew Dillon hammer2_bmap_data_t *bmap; 37993f3933aSMatthew Dillon hammer2_key_t base_key; 38093f3933aSMatthew Dillon int count; 38193f3933aSMatthew Dillon int start; 38293f3933aSMatthew Dillon int n; 3831a7cfe5aSMatthew Dillon 38491abd410SMatthew Dillon start = (int)((iter->bnext - key) >> 38591abd410SMatthew Dillon HAMMER2_FREEMAP_LEVEL0_RADIX); 38693f3933aSMatthew Dillon KKASSERT(start >= 0 && start < HAMMER2_FREEMAP_COUNT); 38793f3933aSMatthew Dillon hammer2_chain_modify(trans, &chain, 0); 3881a7cfe5aSMatthew Dillon 3891a7cfe5aSMatthew Dillon error = ENOSPC; 39093f3933aSMatthew Dillon for (count = 0; count < HAMMER2_FREEMAP_COUNT; ++count) { 39193f3933aSMatthew Dillon if (start + count >= HAMMER2_FREEMAP_COUNT && 39293f3933aSMatthew Dillon start - count < 0) { 39393f3933aSMatthew Dillon break; 39493f3933aSMatthew Dillon } 39593f3933aSMatthew Dillon n = start + count; 39693f3933aSMatthew Dillon bmap = &chain->data->bmdata[n]; 39793f3933aSMatthew Dillon if (n < HAMMER2_FREEMAP_COUNT && bmap->avail && 39891abd410SMatthew Dillon (bmap->class == 0 || bmap->class == class)) { 39993f3933aSMatthew Dillon base_key = key + n * l0size; 40091abd410SMatthew Dillon error = hammer2_bmap_alloc(trans, hmp, bmap, 40191abd410SMatthew Dillon class, n, radix, 40291abd410SMatthew Dillon &base_key); 40393f3933aSMatthew Dillon if (error != ENOSPC) { 40493f3933aSMatthew Dillon key = base_key; 40593f3933aSMatthew Dillon break; 40693f3933aSMatthew Dillon } 40793f3933aSMatthew Dillon } 40893f3933aSMatthew Dillon n = start - count; 40993f3933aSMatthew Dillon bmap = &chain->data->bmdata[n]; 41093f3933aSMatthew Dillon if (n >= 0 && bmap->avail && 41191abd410SMatthew Dillon (bmap->class == 0 || bmap->class == class)) { 41293f3933aSMatthew Dillon base_key = key + n * l0size; 41391abd410SMatthew Dillon error = hammer2_bmap_alloc(trans, hmp, bmap, 41491abd410SMatthew Dillon class, n, radix, 41591abd410SMatthew Dillon &base_key); 41693f3933aSMatthew Dillon if (error != ENOSPC) { 41793f3933aSMatthew Dillon key = base_key; 41893f3933aSMatthew Dillon break; 41993f3933aSMatthew Dillon } 42093f3933aSMatthew Dillon } 42193f3933aSMatthew Dillon } 42293f3933aSMatthew Dillon if (error == ENOSPC) 42393f3933aSMatthew Dillon chain->bref.check.freemap.bigmask &= ~(1 << radix); 42493f3933aSMatthew Dillon /* XXX also scan down from original count */ 42593f3933aSMatthew Dillon } 4261a7cfe5aSMatthew Dillon 4271a7cfe5aSMatthew Dillon if (error == 0) { 4281a7cfe5aSMatthew Dillon /* 4291a7cfe5aSMatthew Dillon * Assert validity. Must be beyond the static allocator used 4301a7cfe5aSMatthew Dillon * by newfs_hammer2 (and thus also beyond the aux area), 4311a7cfe5aSMatthew Dillon * not go past the volume size, and must not be in the 4321a7cfe5aSMatthew Dillon * reserved segment area for a zone. 4331a7cfe5aSMatthew Dillon */ 4341a7cfe5aSMatthew Dillon KKASSERT(key >= hmp->voldata.allocator_beg && 4351a7cfe5aSMatthew Dillon key + bytes <= hmp->voldata.volu_size); 4361a7cfe5aSMatthew Dillon KKASSERT((key & HAMMER2_ZONE_MASK64) >= HAMMER2_ZONE_SEG); 4371a7cfe5aSMatthew Dillon bref->data_off = key | radix; 4381a7cfe5aSMatthew Dillon 4391a7cfe5aSMatthew Dillon #if 0 44093f3933aSMatthew Dillon kprintf("alloc cp=%p %016jx %016jx using %016jx\n", 4411a7cfe5aSMatthew Dillon chain, 44293f3933aSMatthew Dillon bref->key, bref->data_off, chain->bref.data_off); 4431a7cfe5aSMatthew Dillon #endif 4441a7cfe5aSMatthew Dillon } else if (error == ENOSPC) { 4451a7cfe5aSMatthew Dillon /* 44691abd410SMatthew Dillon * Return EAGAIN with next iteration in iter->bnext, or 4471a7cfe5aSMatthew Dillon * return ENOSPC if the allocation map has been exhausted. 4481a7cfe5aSMatthew Dillon */ 44991abd410SMatthew Dillon error = hammer2_freemap_iterate(trans, parentp, &chain, iter); 4501a7cfe5aSMatthew Dillon } 4511a7cfe5aSMatthew Dillon 4521a7cfe5aSMatthew Dillon /* 4531a7cfe5aSMatthew Dillon * Cleanup 4541a7cfe5aSMatthew Dillon */ 4551a7cfe5aSMatthew Dillon if (chain) 4561a7cfe5aSMatthew Dillon hammer2_chain_unlock(chain); 4571a7cfe5aSMatthew Dillon return (error); 4581a7cfe5aSMatthew Dillon } 4591a7cfe5aSMatthew Dillon 46093f3933aSMatthew Dillon /* 46193f3933aSMatthew Dillon * Allocate (1<<radix) bytes from the bmap whos base data offset is (*basep). 46293f3933aSMatthew Dillon * 46393f3933aSMatthew Dillon * If the linear iterator is mid-block we use it directly (the bitmap should 46493f3933aSMatthew Dillon * already be marked allocated), otherwise we search for a block in the bitmap 46593f3933aSMatthew Dillon * that fits the allocation request. 46693f3933aSMatthew Dillon * 46793f3933aSMatthew Dillon * A partial bitmap allocation sets the minimum bitmap granularity (16KB) 46893f3933aSMatthew Dillon * to fully allocated and adjusts the linear allocator to allow the 46993f3933aSMatthew Dillon * remaining space to be allocated. 47093f3933aSMatthew Dillon */ 47193f3933aSMatthew Dillon static 47293f3933aSMatthew Dillon int 473a5913bdfSMatthew Dillon hammer2_bmap_alloc(hammer2_trans_t *trans, hammer2_mount_t *hmp, 474a5913bdfSMatthew Dillon hammer2_bmap_data_t *bmap, 47591abd410SMatthew Dillon uint16_t class, int n, int radix, hammer2_key_t *basep) 47693f3933aSMatthew Dillon { 477*fdf62707SMatthew Dillon hammer2_io_t *dio; 47893f3933aSMatthew Dillon size_t size; 47991abd410SMatthew Dillon size_t bsize; 48093f3933aSMatthew Dillon int bmradix; 48193f3933aSMatthew Dillon uint32_t bmmask; 48293f3933aSMatthew Dillon int offset; 483*fdf62707SMatthew Dillon int error; 48493f3933aSMatthew Dillon int i; 48593f3933aSMatthew Dillon int j; 48693f3933aSMatthew Dillon 48793f3933aSMatthew Dillon /* 48893f3933aSMatthew Dillon * Take into account 2-bits per block when calculating bmradix. 48993f3933aSMatthew Dillon */ 49093f3933aSMatthew Dillon size = (size_t)1 << radix; 49191abd410SMatthew Dillon 49293f3933aSMatthew Dillon if (radix <= HAMMER2_FREEMAP_BLOCK_RADIX) { 49393f3933aSMatthew Dillon bmradix = 2; 49491abd410SMatthew Dillon bsize = HAMMER2_FREEMAP_BLOCK_SIZE; 49593f3933aSMatthew Dillon /* (16K) 2 bits per allocation block */ 49693f3933aSMatthew Dillon } else { 49793f3933aSMatthew Dillon bmradix = 2 << (radix - HAMMER2_FREEMAP_BLOCK_RADIX); 49891abd410SMatthew Dillon bsize = size; 49993f3933aSMatthew Dillon /* (32K-256K) 4, 8, 16, 32 bits per allocation block */ 50093f3933aSMatthew Dillon } 50193f3933aSMatthew Dillon 50293f3933aSMatthew Dillon /* 50391abd410SMatthew Dillon * Use the linear iterator to pack small allocations, otherwise 50491abd410SMatthew Dillon * fall-back to finding a free 16KB chunk. The linear iterator 50591abd410SMatthew Dillon * is only valid when *NOT* on a freemap chunking boundary (16KB). 50691abd410SMatthew Dillon * If it is the bitmap must be scanned. It can become invalid 50791abd410SMatthew Dillon * once we pack to the boundary. We adjust it after a bitmap 50891abd410SMatthew Dillon * allocation only for sub-16KB allocations (so the perfectly good 50991abd410SMatthew Dillon * previous value can still be used for fragments when 16KB+ 51091abd410SMatthew Dillon * allocations are made). 51191abd410SMatthew Dillon * 51291abd410SMatthew Dillon * Beware of hardware artifacts when bmradix == 32 (intermediate 51391abd410SMatthew Dillon * result can wind up being '1' instead of '0' if hardware masks 51491abd410SMatthew Dillon * bit-count & 31). 51593f3933aSMatthew Dillon * 51693f3933aSMatthew Dillon * NOTE: j needs to be even in the j= calculation. As an artifact 51793f3933aSMatthew Dillon * of the /2 division, our bitmask has to clear bit 0. 51891abd410SMatthew Dillon * 51991abd410SMatthew Dillon * NOTE: TODO this can leave little unallocatable fragments lying 52091abd410SMatthew Dillon * around. 52193f3933aSMatthew Dillon */ 52291abd410SMatthew Dillon if (((uint32_t)bmap->linear & HAMMER2_FREEMAP_BLOCK_MASK) + size <= 52391abd410SMatthew Dillon HAMMER2_FREEMAP_BLOCK_SIZE && 5242c6f594dSMatthew Dillon (bmap->linear & HAMMER2_FREEMAP_BLOCK_MASK) && 52591abd410SMatthew Dillon bmap->linear < HAMMER2_SEGSIZE) { 52693f3933aSMatthew Dillon KKASSERT(bmap->linear >= 0 && 52791abd410SMatthew Dillon bmap->linear + size <= HAMMER2_SEGSIZE && 52891abd410SMatthew Dillon (bmap->linear & (HAMMER2_MIN_ALLOC - 1)) == 0); 52993f3933aSMatthew Dillon offset = bmap->linear; 53093f3933aSMatthew Dillon i = offset / (HAMMER2_SEGSIZE / 8); 53193f3933aSMatthew Dillon j = (offset / (HAMMER2_FREEMAP_BLOCK_SIZE / 2)) & 30; 53293f3933aSMatthew Dillon bmmask = (bmradix == 32) ? 53393f3933aSMatthew Dillon 0xFFFFFFFFU : (1 << bmradix) - 1; 53493f3933aSMatthew Dillon bmmask <<= j; 53591abd410SMatthew Dillon bmap->linear = offset + size; 53693f3933aSMatthew Dillon } else { 53793f3933aSMatthew Dillon for (i = 0; i < 8; ++i) { 53893f3933aSMatthew Dillon bmmask = (bmradix == 32) ? 53993f3933aSMatthew Dillon 0xFFFFFFFFU : (1 << bmradix) - 1; 54093f3933aSMatthew Dillon for (j = 0; j < 32; j += bmradix) { 54193f3933aSMatthew Dillon if ((bmap->bitmap[i] & bmmask) == 0) 54293f3933aSMatthew Dillon goto success; 54393f3933aSMatthew Dillon bmmask <<= bmradix; 54493f3933aSMatthew Dillon } 54593f3933aSMatthew Dillon } 54691abd410SMatthew Dillon /*fragments might remain*/ 54791abd410SMatthew Dillon /*KKASSERT(bmap->avail == 0);*/ 54893f3933aSMatthew Dillon return (ENOSPC); 54993f3933aSMatthew Dillon success: 55093f3933aSMatthew Dillon offset = i * (HAMMER2_SEGSIZE / 8) + 55193f3933aSMatthew Dillon (j * (HAMMER2_FREEMAP_BLOCK_SIZE / 2)); 55291abd410SMatthew Dillon if (size & HAMMER2_FREEMAP_BLOCK_MASK) 55391abd410SMatthew Dillon bmap->linear = offset + size; 55493f3933aSMatthew Dillon } 55593f3933aSMatthew Dillon 55693f3933aSMatthew Dillon KKASSERT(i >= 0 && i < 8); /* 8 x 16 -> 128 x 16K -> 2MB */ 55793f3933aSMatthew Dillon 55893f3933aSMatthew Dillon /* 55993f3933aSMatthew Dillon * Optimize the buffer cache to avoid unnecessary read-before-write 56093f3933aSMatthew Dillon * operations. 56191abd410SMatthew Dillon * 56291abd410SMatthew Dillon * The device block size could be larger than the allocation size 56391abd410SMatthew Dillon * so the actual bitmap test is somewhat more involved. We have 56491abd410SMatthew Dillon * to use a compatible buffer size for this operation. 56593f3933aSMatthew Dillon */ 5662c6f594dSMatthew Dillon if ((bmap->bitmap[i] & bmmask) == 0 && 5672c6f594dSMatthew Dillon hammer2_devblksize(size) != size) { 56891abd410SMatthew Dillon size_t psize = hammer2_devblksize(size); 56991abd410SMatthew Dillon hammer2_off_t pmask = (hammer2_off_t)psize - 1; 57091abd410SMatthew Dillon int pbmradix = 2 << (hammer2_devblkradix(radix) - 57191abd410SMatthew Dillon HAMMER2_FREEMAP_BLOCK_RADIX); 57291abd410SMatthew Dillon uint32_t pbmmask; 573*fdf62707SMatthew Dillon int pradix = hammer2_getradix(psize); 57491abd410SMatthew Dillon 57591abd410SMatthew Dillon pbmmask = (pbmradix == 32) ? 0xFFFFFFFFU : (1 << pbmradix) - 1; 57691abd410SMatthew Dillon while ((pbmmask & bmmask) == 0) 57791abd410SMatthew Dillon pbmmask <<= pbmradix; 57891abd410SMatthew Dillon 5792c6f594dSMatthew Dillon #if 0 5802c6f594dSMatthew Dillon kprintf("%016jx mask %08x %08x %08x (%zd/%zd)\n", 5812c6f594dSMatthew Dillon *basep + offset, bmap->bitmap[i], 5822c6f594dSMatthew Dillon pbmmask, bmmask, size, psize); 5832c6f594dSMatthew Dillon #endif 5842c6f594dSMatthew Dillon 58591abd410SMatthew Dillon if ((bmap->bitmap[i] & pbmmask) == 0) { 586*fdf62707SMatthew Dillon error = hammer2_io_newq(hmp, 587*fdf62707SMatthew Dillon (*basep + (offset & ~pmask)) | 588*fdf62707SMatthew Dillon pradix, 589*fdf62707SMatthew Dillon psize, &dio); 590*fdf62707SMatthew Dillon hammer2_io_bqrelse(&dio); 59193f3933aSMatthew Dillon } 59291abd410SMatthew Dillon } 59393f3933aSMatthew Dillon 59493f3933aSMatthew Dillon #if 0 59593f3933aSMatthew Dillon /* 59693f3933aSMatthew Dillon * When initializing a new inode segment also attempt to initialize 59793f3933aSMatthew Dillon * an adjacent segment. Be careful not to index beyond the array 59893f3933aSMatthew Dillon * bounds. 59993f3933aSMatthew Dillon * 60093f3933aSMatthew Dillon * We do this to try to localize inode accesses to improve 60193f3933aSMatthew Dillon * directory scan rates. XXX doesn't improve scan rates. 60293f3933aSMatthew Dillon */ 60393f3933aSMatthew Dillon if (size == HAMMER2_INODE_BYTES) { 60493f3933aSMatthew Dillon if (n & 1) { 60593f3933aSMatthew Dillon if (bmap[-1].radix == 0 && bmap[-1].avail) 60693f3933aSMatthew Dillon bmap[-1].radix = radix; 60793f3933aSMatthew Dillon } else { 60893f3933aSMatthew Dillon if (bmap[1].radix == 0 && bmap[1].avail) 60993f3933aSMatthew Dillon bmap[1].radix = radix; 61093f3933aSMatthew Dillon } 61193f3933aSMatthew Dillon } 61293f3933aSMatthew Dillon #endif 61393f3933aSMatthew Dillon 61493f3933aSMatthew Dillon /* 61593f3933aSMatthew Dillon * Adjust the linear iterator, set the radix if necessary (might as 61693f3933aSMatthew Dillon * well just set it unconditionally), adjust *basep to return the 61793f3933aSMatthew Dillon * allocated data offset. 61893f3933aSMatthew Dillon */ 61993f3933aSMatthew Dillon bmap->bitmap[i] |= bmmask; 62091abd410SMatthew Dillon bmap->class = class; 62193f3933aSMatthew Dillon bmap->avail -= size; 62293f3933aSMatthew Dillon *basep += offset; 62393f3933aSMatthew Dillon 6249f604b01SMatthew Dillon hammer2_voldata_lock(hmp); 6259f604b01SMatthew Dillon hmp->voldata.allocator_free -= size; /* XXX */ 6269f604b01SMatthew Dillon hammer2_voldata_unlock(hmp, 1); 6279f604b01SMatthew Dillon 62893f3933aSMatthew Dillon return(0); 62993f3933aSMatthew Dillon } 63093f3933aSMatthew Dillon 63193f3933aSMatthew Dillon static 63293f3933aSMatthew Dillon void 633a5913bdfSMatthew Dillon hammer2_freemap_init(hammer2_trans_t *trans, hammer2_mount_t *hmp, 634a5913bdfSMatthew Dillon hammer2_key_t key, hammer2_chain_t *chain) 63593f3933aSMatthew Dillon { 63693f3933aSMatthew Dillon hammer2_off_t l1size; 63793f3933aSMatthew Dillon hammer2_off_t lokey; 63893f3933aSMatthew Dillon hammer2_off_t hikey; 63993f3933aSMatthew Dillon hammer2_bmap_data_t *bmap; 64093f3933aSMatthew Dillon int count; 64193f3933aSMatthew Dillon 64293f3933aSMatthew Dillon l1size = H2FMSHIFT(HAMMER2_FREEMAP_LEVEL1_RADIX); 64393f3933aSMatthew Dillon 64493f3933aSMatthew Dillon /* 64593f3933aSMatthew Dillon * Calculate the portion of the 2GB map that should be initialized 64693f3933aSMatthew Dillon * as free. Portions below or after will be initialized as allocated. 64793f3933aSMatthew Dillon * SEGMASK-align the areas so we don't have to worry about sub-scans 64893f3933aSMatthew Dillon * or endianess when using memset. 64993f3933aSMatthew Dillon * 65093f3933aSMatthew Dillon * (1) Ensure that all statically allocated space from newfs_hammer2 65193f3933aSMatthew Dillon * is marked allocated. 65293f3933aSMatthew Dillon * 65393f3933aSMatthew Dillon * (2) Ensure that the reserved area is marked allocated (typically 65493f3933aSMatthew Dillon * the first 4MB of the 2GB area being represented). 65593f3933aSMatthew Dillon * 65693f3933aSMatthew Dillon * (3) Ensure that any trailing space at the end-of-volume is marked 65793f3933aSMatthew Dillon * allocated. 65893f3933aSMatthew Dillon * 65993f3933aSMatthew Dillon * WARNING! It is possible for lokey to be larger than hikey if the 66093f3933aSMatthew Dillon * entire 2GB segment is within the static allocation. 66193f3933aSMatthew Dillon */ 662a5913bdfSMatthew Dillon lokey = (hmp->voldata.allocator_beg + HAMMER2_SEGMASK64) & 66393f3933aSMatthew Dillon ~HAMMER2_SEGMASK64; 66493f3933aSMatthew Dillon 66593f3933aSMatthew Dillon if (lokey < H2FMBASE(key, HAMMER2_FREEMAP_LEVEL1_RADIX) + 66693f3933aSMatthew Dillon HAMMER2_ZONE_SEG64) { 66793f3933aSMatthew Dillon lokey = H2FMBASE(key, HAMMER2_FREEMAP_LEVEL1_RADIX) + 66893f3933aSMatthew Dillon HAMMER2_ZONE_SEG64; 66993f3933aSMatthew Dillon } 67093f3933aSMatthew Dillon 67193f3933aSMatthew Dillon hikey = key + H2FMSHIFT(HAMMER2_FREEMAP_LEVEL1_RADIX); 672a5913bdfSMatthew Dillon if (hikey > hmp->voldata.volu_size) { 673a5913bdfSMatthew Dillon hikey = hmp->voldata.volu_size & ~HAMMER2_SEGMASK64; 67493f3933aSMatthew Dillon } 67593f3933aSMatthew Dillon 67693f3933aSMatthew Dillon chain->bref.check.freemap.avail = 67793f3933aSMatthew Dillon H2FMSHIFT(HAMMER2_FREEMAP_LEVEL1_RADIX); 67893f3933aSMatthew Dillon bmap = &chain->data->bmdata[0]; 67993f3933aSMatthew Dillon 68093f3933aSMatthew Dillon for (count = 0; count < HAMMER2_FREEMAP_COUNT; ++count) { 68193f3933aSMatthew Dillon if (key < lokey || key >= hikey) { 68293f3933aSMatthew Dillon memset(bmap->bitmap, -1, 68393f3933aSMatthew Dillon sizeof(bmap->bitmap)); 68493f3933aSMatthew Dillon bmap->avail = 0; 6852c6f594dSMatthew Dillon bmap->linear = HAMMER2_SEGSIZE; 68693f3933aSMatthew Dillon chain->bref.check.freemap.avail -= 68793f3933aSMatthew Dillon H2FMSHIFT(HAMMER2_FREEMAP_LEVEL0_RADIX); 68893f3933aSMatthew Dillon } else { 68993f3933aSMatthew Dillon bmap->avail = H2FMSHIFT(HAMMER2_FREEMAP_LEVEL0_RADIX); 69093f3933aSMatthew Dillon } 69193f3933aSMatthew Dillon key += H2FMSHIFT(HAMMER2_FREEMAP_LEVEL0_RADIX); 69293f3933aSMatthew Dillon ++bmap; 69393f3933aSMatthew Dillon } 69493f3933aSMatthew Dillon } 69593f3933aSMatthew Dillon 69693f3933aSMatthew Dillon /* 69793f3933aSMatthew Dillon * The current Level 1 freemap has been exhausted, iterate to the next 69893f3933aSMatthew Dillon * one, return ENOSPC if no freemaps remain. 69993f3933aSMatthew Dillon * 70093f3933aSMatthew Dillon * XXX this should rotate back to the beginning to handle freed-up space 70193f3933aSMatthew Dillon * XXX or use intermediate entries to locate free space. TODO 70293f3933aSMatthew Dillon */ 7031a7cfe5aSMatthew Dillon static int 7041a7cfe5aSMatthew Dillon hammer2_freemap_iterate(hammer2_trans_t *trans, hammer2_chain_t **parentp, 70591abd410SMatthew Dillon hammer2_chain_t **chainp, hammer2_fiterate_t *iter) 7061a7cfe5aSMatthew Dillon { 707a5913bdfSMatthew Dillon hammer2_mount_t *hmp = (*parentp)->hmp; 708a5913bdfSMatthew Dillon 70991abd410SMatthew Dillon iter->bnext &= ~(H2FMSHIFT(HAMMER2_FREEMAP_LEVEL1_RADIX) - 1); 71091abd410SMatthew Dillon iter->bnext += H2FMSHIFT(HAMMER2_FREEMAP_LEVEL1_RADIX); 71191abd410SMatthew Dillon if (iter->bnext >= hmp->voldata.volu_size) { 71291abd410SMatthew Dillon iter->bnext = 0; 71391abd410SMatthew Dillon if (++iter->loops == 2) 7141a7cfe5aSMatthew Dillon return (ENOSPC); 71591abd410SMatthew Dillon } 7161a7cfe5aSMatthew Dillon return(EAGAIN); 71737aa19dfSMatthew Dillon } 7181a7cfe5aSMatthew Dillon 71991abd410SMatthew Dillon /* 72091abd410SMatthew Dillon * Free the specified blockref. This code is only able to fully free 72191abd410SMatthew Dillon * blocks when (how) is non-zero, otherwise the block is marked for 72291abd410SMatthew Dillon * the bulk freeing pass to check. 72391abd410SMatthew Dillon * 72491abd410SMatthew Dillon * Normal use is to only mark inodes as possibly being free. The underlying 72591abd410SMatthew Dillon * file blocks are not necessarily marked. The bulk freescan can 72691abd410SMatthew Dillon * theoretically handle the case. 72791abd410SMatthew Dillon * 72891abd410SMatthew Dillon * XXX currently disabled when how == 0 (the normal real-time case). At 72991abd410SMatthew Dillon * the moment we depend on the bulk freescan to actually free blocks. It 73091abd410SMatthew Dillon * will still call this routine with a non-zero how to stage possible frees 73191abd410SMatthew Dillon * and to do the actual free. 732a4dc31e0SMatthew Dillon * 733a4dc31e0SMatthew Dillon * WARNING! When called from a flush we have to use the 'live' sync_tid 734a4dc31e0SMatthew Dillon * and not the flush sync_tid. The live sync_tid is the flush 735a4dc31e0SMatthew Dillon * sync_tid + 1. That is, freemap allocations which occur during 736a4dc31e0SMatthew Dillon * a flush are not part of the flush. Crash-recovery will restore 737a4dc31e0SMatthew Dillon * any lost allocations. 73891abd410SMatthew Dillon */ 739004f88b4SMatthew Dillon void 74091abd410SMatthew Dillon hammer2_freemap_free(hammer2_trans_t *trans, hammer2_mount_t *hmp, 74191abd410SMatthew Dillon hammer2_blockref_t *bref, int how) 742004f88b4SMatthew Dillon { 74391abd410SMatthew Dillon hammer2_off_t data_off = bref->data_off; 74491abd410SMatthew Dillon hammer2_chain_t *chain; 74591abd410SMatthew Dillon hammer2_chain_t *parent; 74691abd410SMatthew Dillon hammer2_bmap_data_t *bmap; 74791abd410SMatthew Dillon hammer2_key_t key; 7481897c66eSMatthew Dillon hammer2_key_t key_dummy; 74991abd410SMatthew Dillon hammer2_off_t l0size; 75091abd410SMatthew Dillon hammer2_off_t l1size; 75191abd410SMatthew Dillon hammer2_off_t l1mask; 75291abd410SMatthew Dillon uint32_t *bitmap; 75391abd410SMatthew Dillon const uint32_t bmmask00 = 0; 75491abd410SMatthew Dillon uint32_t bmmask01; 75591abd410SMatthew Dillon uint32_t bmmask10; 75691abd410SMatthew Dillon uint32_t bmmask11; 75791abd410SMatthew Dillon size_t bytes; 75891abd410SMatthew Dillon uint16_t class; 759004f88b4SMatthew Dillon int radix; 76091abd410SMatthew Dillon int start; 76191abd410SMatthew Dillon int count; 76291abd410SMatthew Dillon int modified = 0; 7631897c66eSMatthew Dillon int cache_index = -1; 764004f88b4SMatthew Dillon 765004f88b4SMatthew Dillon radix = (int)data_off & HAMMER2_OFF_MASK_RADIX; 766004f88b4SMatthew Dillon data_off &= ~HAMMER2_OFF_MASK_RADIX; 76791abd410SMatthew Dillon KKASSERT(radix <= HAMMER2_MAX_RADIX); 768004f88b4SMatthew Dillon 76991abd410SMatthew Dillon bytes = (size_t)1 << radix; 77091abd410SMatthew Dillon class = (bref->type << 8) | hammer2_devblkradix(radix); 771004f88b4SMatthew Dillon 7725c23d7f1SMatthew Dillon /* 773355d67fcSMatthew Dillon * We can't free data allocated by newfs_hammer2. 774355d67fcSMatthew Dillon * Assert validity. 775355d67fcSMatthew Dillon */ 776a4dc31e0SMatthew Dillon KKASSERT((data_off & HAMMER2_ZONE_MASK64) >= HAMMER2_ZONE_SEG); 777355d67fcSMatthew Dillon if (data_off < hmp->voldata.allocator_beg) 778355d67fcSMatthew Dillon return; 779355d67fcSMatthew Dillon 780a4dc31e0SMatthew Dillon KKASSERT((trans->flags & HAMMER2_TRANS_ISALLOCATING) == 0); 781a7720be7SMatthew Dillon atomic_set_int(&trans->flags, HAMMER2_TRANS_ISALLOCATING); 782a4dc31e0SMatthew Dillon if (trans->flags & HAMMER2_TRANS_ISFLUSH) 783a4dc31e0SMatthew Dillon ++trans->sync_tid; 784a7720be7SMatthew Dillon 785355d67fcSMatthew Dillon /* 78691abd410SMatthew Dillon * Lookup the level1 freemap chain. The chain must exist. 7875c23d7f1SMatthew Dillon */ 78891abd410SMatthew Dillon key = H2FMBASE(data_off, HAMMER2_FREEMAP_LEVEL1_RADIX); 78991abd410SMatthew Dillon l0size = H2FMSHIFT(HAMMER2_FREEMAP_LEVEL0_RADIX); 79091abd410SMatthew Dillon l1size = H2FMSHIFT(HAMMER2_FREEMAP_LEVEL1_RADIX); 79191abd410SMatthew Dillon l1mask = l1size - 1; 79291abd410SMatthew Dillon 79391abd410SMatthew Dillon parent = &hmp->fchain; 79491abd410SMatthew Dillon hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS); 79591abd410SMatthew Dillon 7961897c66eSMatthew Dillon chain = hammer2_chain_lookup(&parent, &key_dummy, key, key + l1mask, 7971897c66eSMatthew Dillon &cache_index, 79891abd410SMatthew Dillon HAMMER2_LOOKUP_FREEMAP | 79991abd410SMatthew Dillon HAMMER2_LOOKUP_ALWAYS | 80091abd410SMatthew Dillon HAMMER2_LOOKUP_MATCHIND/*XXX*/); 80191abd410SMatthew Dillon if (chain == NULL) { 80291abd410SMatthew Dillon kprintf("hammer2_freemap_free: %016jx: no chain\n", 80391abd410SMatthew Dillon (intmax_t)bref->data_off); 80491abd410SMatthew Dillon hammer2_chain_unlock(parent); 80591abd410SMatthew Dillon return; 80691abd410SMatthew Dillon } 80791abd410SMatthew Dillon KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_LEAF); 80891abd410SMatthew Dillon 80991abd410SMatthew Dillon /* 81091abd410SMatthew Dillon * Find the bmap entry (covering a 2MB swath) 81191abd410SMatthew Dillon * Find the bitmap array index 81291abd410SMatthew Dillon * Find the bitmap bit index (runs in 2-bit pairs) 81391abd410SMatthew Dillon */ 81491abd410SMatthew Dillon bmap = &chain->data->bmdata[(int)(data_off >> HAMMER2_SEGRADIX) & 81591abd410SMatthew Dillon (HAMMER2_FREEMAP_COUNT - 1)]; 81691abd410SMatthew Dillon bitmap = &bmap->bitmap[(int)(data_off >> (HAMMER2_SEGRADIX - 3)) & 7]; 81791abd410SMatthew Dillon 81891abd410SMatthew Dillon start = ((int)(data_off >> HAMMER2_FREEMAP_BLOCK_RADIX) & 15) * 2; 81991abd410SMatthew Dillon bmmask01 = 1 << start; 82091abd410SMatthew Dillon bmmask10 = 2 << start; 82191abd410SMatthew Dillon bmmask11 = 3 << start; 82291abd410SMatthew Dillon 82391abd410SMatthew Dillon /* 82491abd410SMatthew Dillon * Fixup the bitmap 82591abd410SMatthew Dillon */ 82691abd410SMatthew Dillon if (radix < HAMMER2_FREEMAP_BLOCK_RADIX) { 82791abd410SMatthew Dillon count = 1; 82891abd410SMatthew Dillon how = 0; /* partial block, cannot set to 00 */ 82991abd410SMatthew Dillon } else { 83091abd410SMatthew Dillon count = 1 << (radix - HAMMER2_FREEMAP_BLOCK_RADIX); 8315c23d7f1SMatthew Dillon } 8325c23d7f1SMatthew Dillon 83391abd410SMatthew Dillon while (count) { 83491abd410SMatthew Dillon KKASSERT(bmmask11); 83591abd410SMatthew Dillon KKASSERT((*bitmap & bmmask11) != bmmask00); 83691abd410SMatthew Dillon if ((*bitmap & bmmask11) == bmmask11) { 83791abd410SMatthew Dillon if (!modified) { 83891abd410SMatthew Dillon hammer2_chain_modify(trans, &chain, 0); 83991abd410SMatthew Dillon modified = 1; 84091abd410SMatthew Dillon bmap = &chain->data->bmdata[(int)(data_off >> HAMMER2_SEGRADIX) & 84191abd410SMatthew Dillon (HAMMER2_FREEMAP_COUNT - 1)]; 84291abd410SMatthew Dillon bitmap = &bmap->bitmap[(int)(data_off >> (HAMMER2_SEGRADIX - 3)) & 7]; 84391abd410SMatthew Dillon } 84491abd410SMatthew Dillon if (how) 84591abd410SMatthew Dillon *bitmap &= ~bmmask11; 84691abd410SMatthew Dillon else 84791abd410SMatthew Dillon *bitmap = (*bitmap & ~bmmask11) | bmmask10; 84891abd410SMatthew Dillon } else if ((*bitmap & bmmask11) == bmmask10) { 84991abd410SMatthew Dillon if (how) { 85091abd410SMatthew Dillon if (!modified) { 85191abd410SMatthew Dillon hammer2_chain_modify(trans, &chain, 0); 85291abd410SMatthew Dillon modified = 1; 85391abd410SMatthew Dillon bmap = &chain->data->bmdata[(int)(data_off >> HAMMER2_SEGRADIX) & 85491abd410SMatthew Dillon (HAMMER2_FREEMAP_COUNT - 1)]; 85591abd410SMatthew Dillon bitmap = &bmap->bitmap[(int)(data_off >> (HAMMER2_SEGRADIX - 3)) & 7]; 85691abd410SMatthew Dillon } 85791abd410SMatthew Dillon *bitmap &= ~bmmask11; 85891abd410SMatthew Dillon } 85991abd410SMatthew Dillon } else if ((*bitmap & bmmask11) == bmmask01) { 86091abd410SMatthew Dillon KKASSERT(0); 86191abd410SMatthew Dillon } 86291abd410SMatthew Dillon --count; 86391abd410SMatthew Dillon bmmask01 <<= 2; 86491abd410SMatthew Dillon bmmask10 <<= 2; 86591abd410SMatthew Dillon bmmask11 <<= 2; 86691abd410SMatthew Dillon } 86791abd410SMatthew Dillon if (how && modified) { 86891abd410SMatthew Dillon bmap->avail += 1 << radix; 86991abd410SMatthew Dillon KKASSERT(bmap->avail <= HAMMER2_SEGSIZE); 87091abd410SMatthew Dillon if (bmap->avail == HAMMER2_SEGSIZE && 87191abd410SMatthew Dillon bmap->bitmap[0] == 0 && 87291abd410SMatthew Dillon bmap->bitmap[1] == 0 && 87391abd410SMatthew Dillon bmap->bitmap[2] == 0 && 87491abd410SMatthew Dillon bmap->bitmap[3] == 0 && 87591abd410SMatthew Dillon bmap->bitmap[4] == 0 && 87691abd410SMatthew Dillon bmap->bitmap[5] == 0 && 87791abd410SMatthew Dillon bmap->bitmap[6] == 0 && 87891abd410SMatthew Dillon bmap->bitmap[7] == 0) { 87991abd410SMatthew Dillon key = H2FMBASE(data_off, HAMMER2_FREEMAP_LEVEL0_RADIX); 88091abd410SMatthew Dillon kprintf("Freeseg %016jx\n", (intmax_t)key); 88191abd410SMatthew Dillon bmap->class = 0; 88291abd410SMatthew Dillon } 88391abd410SMatthew Dillon } 88491abd410SMatthew Dillon 88591abd410SMatthew Dillon /* 88691abd410SMatthew Dillon * chain->bref.check.freemap.bigmask (XXX) 88791abd410SMatthew Dillon */ 88891abd410SMatthew Dillon if (modified) 88991abd410SMatthew Dillon chain->bref.check.freemap.bigmask |= 1 << radix; 89091abd410SMatthew Dillon 89191abd410SMatthew Dillon hammer2_chain_unlock(chain); 89291abd410SMatthew Dillon hammer2_chain_unlock(parent); 893a7720be7SMatthew Dillon 894a7720be7SMatthew Dillon atomic_clear_int(&trans->flags, HAMMER2_TRANS_ISALLOCATING); 895a4dc31e0SMatthew Dillon if (trans->flags & HAMMER2_TRANS_ISFLUSH) 896a4dc31e0SMatthew Dillon --trans->sync_tid; 89791abd410SMatthew Dillon } 898