1 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
3 #ident "$Id$"
4 /*======
5 This file is part of PerconaFT.
6 
7 
8 Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
9 
10     PerconaFT is free software: you can redistribute it and/or modify
11     it under the terms of the GNU General Public License, version 2,
12     as published by the Free Software Foundation.
13 
14     PerconaFT is distributed in the hope that it will be useful,
15     but WITHOUT ANY WARRANTY; without even the implied warranty of
16     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17     GNU General Public License for more details.
18 
19     You should have received a copy of the GNU General Public License
20     along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
21 
22 ----------------------------------------
23 
24     PerconaFT is free software: you can redistribute it and/or modify
25     it under the terms of the GNU Affero General Public License, version 3,
26     as published by the Free Software Foundation.
27 
28     PerconaFT is distributed in the hope that it will be useful,
29     but WITHOUT ANY WARRANTY; without even the implied warranty of
30     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
31     GNU Affero General Public License for more details.
32 
33     You should have received a copy of the GNU Affero General Public License
34     along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
35 ======= */
36 
37 #ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
38 
39 #include <algorithm>
40 
41 #include <string.h>
42 
43 #include "toku_portability.h"
44 #include "portability/memory.h"
45 #include "portability/toku_assert.h"
46 #include "portability/toku_stdint.h"
47 #include "portability/toku_stdlib.h"
48 
49 #include "ft/serialize/block_allocator.h"
50 #include "ft/serialize/rbtree_mhs.h"
51 
52 #if defined(TOKU_DEBUG_PARANOID) && TOKU_DEBUG_PARANOID
53 #define VALIDATE() Validate()
54 #else
55 #define VALIDATE()
56 #endif
57 
CreateInternal(uint64_t reserve_at_beginning,uint64_t alignment)58 void BlockAllocator::CreateInternal(uint64_t reserve_at_beginning,
59                                     uint64_t alignment) {
60     // the alignment must be at least 512 and aligned with 512 to work with
61     // direct I/O
62     invariant(alignment >= 512 && (alignment % 512) == 0);
63 
64     _reserve_at_beginning = reserve_at_beginning;
65     _alignment = alignment;
66     _n_blocks = 0;
67     _n_bytes_in_use = reserve_at_beginning;
68     _tree = new MhsRbTree::Tree(alignment);
69 }
70 
Create(uint64_t reserve_at_beginning,uint64_t alignment)71 void BlockAllocator::Create(uint64_t reserve_at_beginning, uint64_t alignment) {
72     CreateInternal(reserve_at_beginning, alignment);
73     _tree->Insert({reserve_at_beginning, MAX_BYTE});
74     VALIDATE();
75 }
76 
Destroy()77 void BlockAllocator::Destroy() {
78     delete _tree;
79 }
80 
CreateFromBlockPairs(uint64_t reserve_at_beginning,uint64_t alignment,struct BlockPair * translation_pairs,uint64_t n_blocks)81 void BlockAllocator::CreateFromBlockPairs(uint64_t reserve_at_beginning,
82                                           uint64_t alignment,
83                                           struct BlockPair *translation_pairs,
84                                           uint64_t n_blocks) {
85     CreateInternal(reserve_at_beginning, alignment);
86     _n_blocks = n_blocks;
87 
88     struct BlockPair *XMALLOC_N(n_blocks, pairs);
89     memcpy(pairs, translation_pairs, n_blocks * sizeof(struct BlockPair));
90     std::sort(pairs, pairs + n_blocks);
91 
92     if (pairs[0]._offset > reserve_at_beginning) {
93         _tree->Insert(
94             {reserve_at_beginning, pairs[0]._offset - reserve_at_beginning});
95     }
96     for (uint64_t i = 0; i < _n_blocks; i++) {
97         // Allocator does not support size 0 blocks. See
98         // block_allocator_free_block.
99         invariant(pairs[i]._size > 0);
100         invariant(pairs[i]._offset >= _reserve_at_beginning);
101         invariant(pairs[i]._offset % _alignment == 0);
102 
103         _n_bytes_in_use += pairs[i]._size;
104 
105         MhsRbTree::OUUInt64 free_size(MAX_BYTE);
106         MhsRbTree::OUUInt64 free_offset(pairs[i]._offset + pairs[i]._size);
107         if (i < n_blocks - 1) {
108             MhsRbTree::OUUInt64 next_offset(pairs[i + 1]._offset);
109             invariant(next_offset >= free_offset);
110             free_size = next_offset - free_offset;
111             if (free_size == 0)
112                 continue;
113         }
114         _tree->Insert({free_offset, free_size});
115     }
116     toku_free(pairs);
117     VALIDATE();
118 }
119 
120 // Effect: align a value by rounding up.
Align(uint64_t value,uint64_t ba_alignment)121 static inline uint64_t Align(uint64_t value, uint64_t ba_alignment) {
122     return ((value + ba_alignment - 1) / ba_alignment) * ba_alignment;
123 }
124 
125 // Effect: Allocate a block. The resulting block must be aligned on the
126 // ba->alignment (which to make direct_io happy must be a positive multiple of
127 // 512).
AllocBlock(uint64_t size,uint64_t * offset)128 void BlockAllocator::AllocBlock(uint64_t size,
129                                 uint64_t *offset) {
130     // Allocator does not support size 0 blocks. See block_allocator_free_block.
131     invariant(size > 0);
132 
133     _n_bytes_in_use += size;
134     *offset = _tree->Remove(size);
135 
136     _n_blocks++;
137     VALIDATE();
138 }
139 
140 // To support 0-sized blocks, we need to include size as an input to this
141 // function.
142 // All 0-sized blocks at the same offset can be considered identical, but
143 // a 0-sized block can share offset with a non-zero sized block.
144 // The non-zero sized block is not exchangable with a zero sized block (or vice
145 // versa), so inserting 0-sized blocks can cause corruption here.
FreeBlock(uint64_t offset,uint64_t size)146 void BlockAllocator::FreeBlock(uint64_t offset, uint64_t size) {
147     VALIDATE();
148     _n_bytes_in_use -= size;
149     _tree->Insert({offset, size});
150     _n_blocks--;
151     VALIDATE();
152 }
153 
AllocatedLimit() const154 uint64_t BlockAllocator::AllocatedLimit() const {
155     MhsRbTree::Node *max_node = _tree->MaxNode();
156     return rbn_offset(max_node).ToInt();
157 }
158 
159 // Effect: Consider the blocks in sorted order.  The reserved block at the
160 // beginning is number 0.  The next one is number 1 and so forth.
161 // Return the offset and size of the block with that number.
162 // Return 0 if there is a block that big, return nonzero if b is too big.
NthBlockInLayoutOrder(uint64_t b,uint64_t * offset,uint64_t * size)163 int BlockAllocator::NthBlockInLayoutOrder(uint64_t b,
164                                           uint64_t *offset,
165                                           uint64_t *size) {
166     MhsRbTree::Node *x, *y;
167     if (b == 0) {
168         *offset = 0;
169         *size = _reserve_at_beginning;
170         return 0;
171     } else if (b > _n_blocks) {
172         return -1;
173     } else {
174         x = _tree->MinNode();
175         for (uint64_t i = 1; i <= b; i++) {
176             y = x;
177             x = _tree->Successor(x);
178         }
179         *size = (rbn_offset(x) - (rbn_offset(y) + rbn_size(y))).ToInt();
180         *offset = (rbn_offset(y) + rbn_size(y)).ToInt();
181         return 0;
182     }
183 }
184 
185 struct VisUnusedExtra {
186     TOKU_DB_FRAGMENTATION _report;
187     uint64_t _align;
188 };
189 
VisUnusedCollector(void * extra,MhsRbTree::Node * node,uint64_t UU (depth))190 static void VisUnusedCollector(void *extra,
191                                MhsRbTree::Node *node,
192                                uint64_t UU(depth)) {
193     struct VisUnusedExtra *v_e = (struct VisUnusedExtra *)extra;
194     TOKU_DB_FRAGMENTATION report = v_e->_report;
195     uint64_t alignm = v_e->_align;
196 
197     MhsRbTree::OUUInt64 offset = rbn_offset(node);
198     MhsRbTree::OUUInt64 size = rbn_size(node);
199     MhsRbTree::OUUInt64 answer_offset(Align(offset.ToInt(), alignm));
200     uint64_t free_space = (offset + size - answer_offset).ToInt();
201     if (free_space > 0) {
202         report->unused_bytes += free_space;
203         report->unused_blocks++;
204         if (free_space > report->largest_unused_block) {
205             report->largest_unused_block = free_space;
206         }
207     }
208 }
209 // Requires: report->file_size_bytes is filled in
210 // Requires: report->data_bytes is filled in
211 // Requires: report->checkpoint_bytes_additional is filled in
UnusedStatistics(TOKU_DB_FRAGMENTATION report)212 void BlockAllocator::UnusedStatistics(TOKU_DB_FRAGMENTATION report) {
213     invariant(_n_bytes_in_use ==
214               report->data_bytes + report->checkpoint_bytes_additional);
215 
216     report->unused_bytes = 0;
217     report->unused_blocks = 0;
218     report->largest_unused_block = 0;
219     struct VisUnusedExtra extra = {report, _alignment};
220     _tree->InOrderVisitor(VisUnusedCollector, &extra);
221 }
222 
Statistics(TOKU_DB_FRAGMENTATION report)223 void BlockAllocator::Statistics(TOKU_DB_FRAGMENTATION report) {
224     report->data_bytes = _n_bytes_in_use;
225     report->data_blocks = _n_blocks;
226     report->file_size_bytes = 0;
227     report->checkpoint_bytes_additional = 0;
228     UnusedStatistics(report);
229 }
230 
231 struct ValidateExtra {
232     uint64_t _bytes;
233     MhsRbTree::Node *_pre_node;
234 };
VisUsedBlocksInOrder(void * extra,MhsRbTree::Node * cur_node,uint64_t UU (depth))235 static void VisUsedBlocksInOrder(void *extra,
236                                  MhsRbTree::Node *cur_node,
237                                  uint64_t UU(depth)) {
238     struct ValidateExtra *v_e = (struct ValidateExtra *)extra;
239     MhsRbTree::Node *pre_node = v_e->_pre_node;
240     // verify no overlaps
241     if (pre_node) {
242         invariant(rbn_size(pre_node) > 0);
243         invariant(rbn_offset(cur_node) >
244                   rbn_offset(pre_node) + rbn_size(pre_node));
245         MhsRbTree::OUUInt64 used_space =
246             rbn_offset(cur_node) - (rbn_offset(pre_node) + rbn_size(pre_node));
247         v_e->_bytes += used_space.ToInt();
248     } else {
249         v_e->_bytes += rbn_offset(cur_node).ToInt();
250     }
251     v_e->_pre_node = cur_node;
252 }
253 
Validate() const254 void BlockAllocator::Validate() const {
255     _tree->ValidateBalance();
256     _tree->ValidateMhs();
257     struct ValidateExtra extra = {0, nullptr};
258     _tree->InOrderVisitor(VisUsedBlocksInOrder, &extra);
259     invariant(extra._bytes == _n_bytes_in_use);
260 }
261