1# Copyright (C) 2008-2012, 2016 Canonical Ltd 2# 3# This program is free software; you can redistribute it and/or modify 4# it under the terms of the GNU General Public License as published by 5# the Free Software Foundation; either version 2 of the License, or 6# (at your option) any later version. 7# 8# This program is distributed in the hope that it will be useful, 9# but WITHOUT ANY WARRANTY; without even the implied warranty of 10# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11# GNU General Public License for more details. 12# 13# You should have received a copy of the GNU General Public License 14# along with this program; if not, write to the Free Software 15# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 16# 17 18"""Tests for btree indices.""" 19 20import pprint 21import zlib 22 23from ... import ( 24 errors, 25 fifo_cache, 26 lru_cache, 27 osutils, 28 tests, 29 transport, 30 ) 31from .. import ( 32 btree_index, 33 index as _mod_index, 34 ) 35from ...tests import ( 36 TestCaseWithTransport, 37 scenarios, 38 ) 39from ...tests import ( 40 features, 41 ) 42 43 44load_tests = scenarios.load_tests_apply_scenarios 45 46 47def btreeparser_scenarios(): 48 import breezy.bzr._btree_serializer_py as py_module 49 scenarios = [('python', {'parse_btree': py_module})] 50 if compiled_btreeparser_feature.available(): 51 scenarios.append(('C', 52 {'parse_btree': compiled_btreeparser_feature.module})) 53 return scenarios 54 55 56compiled_btreeparser_feature = features.ModuleAvailableFeature( 57 'breezy.bzr._btree_serializer_pyx') 58 59 60class BTreeTestCase(TestCaseWithTransport): 61 # test names here are suffixed by the key length and reference list count 62 # that they test. 63 64 def setUp(self): 65 super(BTreeTestCase, self).setUp() 66 self.overrideAttr(btree_index, '_RESERVED_HEADER_BYTES', 100) 67 68 def make_nodes(self, count, key_elements, reference_lists): 69 """Generate count*key_elements sample nodes.""" 70 def _pos_to_key(pos, lead=b""): 71 return (lead + (b"%d" % pos) * 40,) 72 keys = [] 73 for prefix_pos in range(key_elements): 74 if key_elements - 1: 75 prefix = _pos_to_key(prefix_pos) 76 else: 77 prefix = () 78 for pos in range(count): 79 # TODO: This creates odd keys. When count == 100,000, it 80 # creates a 240 byte key 81 key = prefix + _pos_to_key(pos) 82 value = b"value:%d" % pos 83 if reference_lists: 84 # generate some references 85 refs = [] 86 for list_pos in range(reference_lists): 87 # as many keys in each list as its index + the key depth 88 # mod 2 - this generates both 0 length lists and 89 # ones slightly longer than the number of lists. 90 # It also ensures we have non homogeneous lists. 91 refs.append([]) 92 for ref_pos in range(list_pos + pos % 2): 93 if pos % 2: 94 # refer to a nearby key 95 refs[-1].append(prefix 96 + _pos_to_key(pos - 1, b"ref")) 97 else: 98 # serial of this ref in the ref list 99 refs[-1].append(prefix 100 + _pos_to_key(ref_pos, b"ref")) 101 refs[-1] = tuple(refs[-1]) 102 refs = tuple(refs) 103 else: 104 refs = () 105 keys.append((key, value, refs)) 106 return keys 107 108 def shrink_page_size(self): 109 """Shrink the default page size so that less fits in a page.""" 110 self.overrideAttr(btree_index, '_PAGE_SIZE') 111 btree_index._PAGE_SIZE = 2048 112 113 def assertEqualApproxCompressed(self, expected, actual, slop=6): 114 """Check a count of compressed bytes is approximately as expected 115 116 Relying on compressed length being stable even with fixed inputs is 117 slightly bogus, but zlib is stable enough that this mostly works. 118 """ 119 if not expected - slop < actual < expected + slop: 120 self.fail("Expected around %d bytes compressed but got %d" % 121 (expected, actual)) 122 123 124class TestBTreeBuilder(BTreeTestCase): 125 126 def test_clear_cache(self): 127 builder = btree_index.BTreeBuilder(reference_lists=0, key_elements=1) 128 # This is a no-op, but we need the api to be consistent with other 129 # BTreeGraphIndex apis. 130 builder.clear_cache() 131 132 def test_empty_1_0(self): 133 builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0) 134 # NamedTemporaryFile dies on builder.finish().read(). weird. 135 temp_file = builder.finish() 136 content = temp_file.read() 137 del temp_file 138 self.assertEqual( 139 b"B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=0\n" 140 b"row_lengths=\n", 141 content) 142 143 def test_empty_2_1(self): 144 builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=1) 145 # NamedTemporaryFile dies on builder.finish().read(). weird. 146 temp_file = builder.finish() 147 content = temp_file.read() 148 del temp_file 149 self.assertEqual( 150 b"B+Tree Graph Index 2\nnode_ref_lists=1\nkey_elements=2\nlen=0\n" 151 b"row_lengths=\n", 152 content) 153 154 def test_root_leaf_1_0(self): 155 builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0) 156 nodes = self.make_nodes(5, 1, 0) 157 for node in nodes: 158 builder.add_node(*node) 159 # NamedTemporaryFile dies on builder.finish().read(). weird. 160 temp_file = builder.finish() 161 content = temp_file.read() 162 del temp_file 163 self.assertEqual(131, len(content)) 164 self.assertEqual( 165 b"B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=5\n" 166 b"row_lengths=1\n", 167 content[:73]) 168 node_content = content[73:] 169 node_bytes = zlib.decompress(node_content) 170 expected_node = (b"type=leaf\n" 171 b"0000000000000000000000000000000000000000\x00\x00value:0\n" 172 b"1111111111111111111111111111111111111111\x00\x00value:1\n" 173 b"2222222222222222222222222222222222222222\x00\x00value:2\n" 174 b"3333333333333333333333333333333333333333\x00\x00value:3\n" 175 b"4444444444444444444444444444444444444444\x00\x00value:4\n") 176 self.assertEqual(expected_node, node_bytes) 177 178 def test_root_leaf_2_2(self): 179 builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2) 180 nodes = self.make_nodes(5, 2, 2) 181 for node in nodes: 182 builder.add_node(*node) 183 # NamedTemporaryFile dies on builder.finish().read(). weird. 184 temp_file = builder.finish() 185 content = temp_file.read() 186 del temp_file 187 self.assertEqual(238, len(content)) 188 self.assertEqual( 189 b"B+Tree Graph Index 2\nnode_ref_lists=2\nkey_elements=2\nlen=10\n" 190 b"row_lengths=1\n", 191 content[:74]) 192 node_content = content[74:] 193 node_bytes = zlib.decompress(node_content) 194 expected_node = ( 195 b"type=leaf\n" 196 b"0000000000000000000000000000000000000000\x000000000000000000000000000000000000000000\x00\t0000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\x00value:0\n" 197 b"0000000000000000000000000000000000000000\x001111111111111111111111111111111111111111\x000000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\t0000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\r0000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\x00value:1\n" 198 b"0000000000000000000000000000000000000000\x002222222222222222222222222222222222222222\x00\t0000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\x00value:2\n" 199 b"0000000000000000000000000000000000000000\x003333333333333333333333333333333333333333\x000000000000000000000000000000000000000000\x00ref2222222222222222222222222222222222222222\t0000000000000000000000000000000000000000\x00ref2222222222222222222222222222222222222222\r0000000000000000000000000000000000000000\x00ref2222222222222222222222222222222222222222\x00value:3\n" 200 b"0000000000000000000000000000000000000000\x004444444444444444444444444444444444444444\x00\t0000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\x00value:4\n" 201 b"1111111111111111111111111111111111111111\x000000000000000000000000000000000000000000\x00\t1111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\x00value:0\n" 202 b"1111111111111111111111111111111111111111\x001111111111111111111111111111111111111111\x001111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\t1111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\r1111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\x00value:1\n" 203 b"1111111111111111111111111111111111111111\x002222222222222222222222222222222222222222\x00\t1111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\x00value:2\n" 204 b"1111111111111111111111111111111111111111\x003333333333333333333333333333333333333333\x001111111111111111111111111111111111111111\x00ref2222222222222222222222222222222222222222\t1111111111111111111111111111111111111111\x00ref2222222222222222222222222222222222222222\r1111111111111111111111111111111111111111\x00ref2222222222222222222222222222222222222222\x00value:3\n" 205 b"1111111111111111111111111111111111111111\x004444444444444444444444444444444444444444\x00\t1111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\x00value:4\n" 206 b"" 207 ) 208 self.assertEqual(expected_node, node_bytes) 209 210 def test_2_leaves_1_0(self): 211 builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0) 212 nodes = self.make_nodes(400, 1, 0) 213 for node in nodes: 214 builder.add_node(*node) 215 # NamedTemporaryFile dies on builder.finish().read(). weird. 216 temp_file = builder.finish() 217 content = temp_file.read() 218 del temp_file 219 self.assertEqualApproxCompressed(9283, len(content)) 220 self.assertEqual( 221 b"B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=400\n" 222 b"row_lengths=1,2\n", 223 content[:77]) 224 root = content[77:4096] 225 leaf1 = content[4096:8192] 226 leaf2 = content[8192:] 227 root_bytes = zlib.decompress(root) 228 expected_root = ( 229 b"type=internal\n" 230 b"offset=0\n" 231 ) + (b"307" * 40) + b"\n" 232 self.assertEqual(expected_root, root_bytes) 233 # We already know serialisation works for leaves, check key selection: 234 leaf1_bytes = zlib.decompress(leaf1) 235 sorted_node_keys = sorted(node[0] for node in nodes) 236 node = btree_index._LeafNode(leaf1_bytes, 1, 0) 237 self.assertEqual(231, len(node)) 238 self.assertEqual(sorted_node_keys[:231], node.all_keys()) 239 leaf2_bytes = zlib.decompress(leaf2) 240 node = btree_index._LeafNode(leaf2_bytes, 1, 0) 241 self.assertEqual(400 - 231, len(node)) 242 self.assertEqual(sorted_node_keys[231:], node.all_keys()) 243 244 def test_last_page_rounded_1_layer(self): 245 builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0) 246 nodes = self.make_nodes(10, 1, 0) 247 for node in nodes: 248 builder.add_node(*node) 249 # NamedTemporaryFile dies on builder.finish().read(). weird. 250 temp_file = builder.finish() 251 content = temp_file.read() 252 del temp_file 253 self.assertEqualApproxCompressed(155, len(content)) 254 self.assertEqual( 255 b"B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=10\n" 256 b"row_lengths=1\n", 257 content[:74]) 258 # Check thelast page is well formed 259 leaf2 = content[74:] 260 leaf2_bytes = zlib.decompress(leaf2) 261 node = btree_index._LeafNode(leaf2_bytes, 1, 0) 262 self.assertEqual(10, len(node)) 263 sorted_node_keys = sorted(node[0] for node in nodes) 264 self.assertEqual(sorted_node_keys, node.all_keys()) 265 266 def test_last_page_not_rounded_2_layer(self): 267 builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0) 268 nodes = self.make_nodes(400, 1, 0) 269 for node in nodes: 270 builder.add_node(*node) 271 # NamedTemporaryFile dies on builder.finish().read(). weird. 272 temp_file = builder.finish() 273 content = temp_file.read() 274 del temp_file 275 self.assertEqualApproxCompressed(9283, len(content)) 276 self.assertEqual( 277 b"B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=400\n" 278 b"row_lengths=1,2\n", 279 content[:77]) 280 # Check the last page is well formed 281 leaf2 = content[8192:] 282 leaf2_bytes = zlib.decompress(leaf2) 283 node = btree_index._LeafNode(leaf2_bytes, 1, 0) 284 self.assertEqual(400 - 231, len(node)) 285 sorted_node_keys = sorted(node[0] for node in nodes) 286 self.assertEqual(sorted_node_keys[231:], node.all_keys()) 287 288 def test_three_level_tree_details(self): 289 # The left most pointer in the second internal node in a row should 290 # pointer to the second node that the internal node is for, _not_ 291 # the first, otherwise the first node overlaps with the last node of 292 # the prior internal node on that row. 293 self.shrink_page_size() 294 builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2) 295 # 40K nodes is enough to create a two internal nodes on the second 296 # level, with a 2K page size 297 nodes = self.make_nodes(20000, 2, 2) 298 299 for node in nodes: 300 builder.add_node(*node) 301 t = transport.get_transport_from_url('trace+' + self.get_url('')) 302 size = t.put_file('index', self.time(builder.finish)) 303 del builder 304 index = btree_index.BTreeGraphIndex(t, 'index', size) 305 # Seed the metadata, we're using internal calls now. 306 index.key_count() 307 self.assertEqual(3, len(index._row_lengths), 308 "Not enough rows: %r" % index._row_lengths) 309 self.assertEqual(4, len(index._row_offsets)) 310 self.assertEqual(sum(index._row_lengths), index._row_offsets[-1]) 311 internal_nodes = index._get_internal_nodes([0, 1, 2]) 312 root_node = internal_nodes[0] 313 internal_node1 = internal_nodes[1] 314 internal_node2 = internal_nodes[2] 315 # The left most node node2 points at should be one after the right most 316 # node pointed at by node1. 317 self.assertEqual(internal_node2.offset, 1 + len(internal_node1.keys)) 318 # The left most key of the second node pointed at by internal_node2 319 # should be its first key. We can check this by looking for its first key 320 # in the second node it points at 321 pos = index._row_offsets[2] + internal_node2.offset + 1 322 leaf = index._get_leaf_nodes([pos])[pos] 323 self.assertTrue(internal_node2.keys[0] in leaf) 324 325 def test_2_leaves_2_2(self): 326 builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2) 327 nodes = self.make_nodes(100, 2, 2) 328 for node in nodes: 329 builder.add_node(*node) 330 # NamedTemporaryFile dies on builder.finish().read(). weird. 331 temp_file = builder.finish() 332 content = temp_file.read() 333 del temp_file 334 self.assertEqualApproxCompressed(12643, len(content)) 335 self.assertEqual( 336 b"B+Tree Graph Index 2\nnode_ref_lists=2\nkey_elements=2\nlen=200\n" 337 b"row_lengths=1,3\n", 338 content[:77]) 339 root = content[77:4096] 340 leaf1 = content[4096:8192] 341 leaf2 = content[8192:12288] 342 leaf3 = content[12288:] 343 root_bytes = zlib.decompress(root) 344 expected_root = ( 345 b"type=internal\n" 346 b"offset=0\n" + 347 (b"0" * 40) + b"\x00" + (b"91" * 40) + b"\n" + 348 (b"1" * 40) + b"\x00" + (b"81" * 40) + b"\n" 349 ) 350 self.assertEqual(expected_root, root_bytes) 351 # We assume the other leaf nodes have been written correctly - layering 352 # FTW. 353 354 def test_spill_index_stress_1_1(self): 355 builder = btree_index.BTreeBuilder(key_elements=1, spill_at=2) 356 nodes = [node[0:2] for node in self.make_nodes(16, 1, 0)] 357 builder.add_node(*nodes[0]) 358 # Test the parts of the index that take up memory are doing so 359 # predictably. 360 self.assertEqual(1, len(builder._nodes)) 361 self.assertIs(None, builder._nodes_by_key) 362 builder.add_node(*nodes[1]) 363 self.assertEqual(0, len(builder._nodes)) 364 self.assertIs(None, builder._nodes_by_key) 365 self.assertEqual(1, len(builder._backing_indices)) 366 self.assertEqual(2, builder._backing_indices[0].key_count()) 367 # now back to memory 368 builder.add_node(*nodes[2]) 369 self.assertEqual(1, len(builder._nodes)) 370 self.assertIs(None, builder._nodes_by_key) 371 # And spills to a second backing index combing all 372 builder.add_node(*nodes[3]) 373 self.assertEqual(0, len(builder._nodes)) 374 self.assertIs(None, builder._nodes_by_key) 375 self.assertEqual(2, len(builder._backing_indices)) 376 self.assertEqual(None, builder._backing_indices[0]) 377 self.assertEqual(4, builder._backing_indices[1].key_count()) 378 # The next spills to the 2-len slot 379 builder.add_node(*nodes[4]) 380 builder.add_node(*nodes[5]) 381 self.assertEqual(0, len(builder._nodes)) 382 self.assertIs(None, builder._nodes_by_key) 383 self.assertEqual(2, len(builder._backing_indices)) 384 self.assertEqual(2, builder._backing_indices[0].key_count()) 385 self.assertEqual(4, builder._backing_indices[1].key_count()) 386 # Next spill combines 387 builder.add_node(*nodes[6]) 388 builder.add_node(*nodes[7]) 389 self.assertEqual(3, len(builder._backing_indices)) 390 self.assertEqual(None, builder._backing_indices[0]) 391 self.assertEqual(None, builder._backing_indices[1]) 392 self.assertEqual(8, builder._backing_indices[2].key_count()) 393 # And so forth - counting up in binary. 394 builder.add_node(*nodes[8]) 395 builder.add_node(*nodes[9]) 396 self.assertEqual(3, len(builder._backing_indices)) 397 self.assertEqual(2, builder._backing_indices[0].key_count()) 398 self.assertEqual(None, builder._backing_indices[1]) 399 self.assertEqual(8, builder._backing_indices[2].key_count()) 400 builder.add_node(*nodes[10]) 401 builder.add_node(*nodes[11]) 402 self.assertEqual(3, len(builder._backing_indices)) 403 self.assertEqual(None, builder._backing_indices[0]) 404 self.assertEqual(4, builder._backing_indices[1].key_count()) 405 self.assertEqual(8, builder._backing_indices[2].key_count()) 406 builder.add_node(*nodes[12]) 407 # Test that memory and disk are both used for query methods; and that 408 # None is skipped over happily. 409 self.assertEqual([(builder,) + node for node in sorted(nodes[:13])], 410 list(builder.iter_all_entries())) 411 # Two nodes - one memory one disk 412 self.assertEqual({(builder,) + node for node in nodes[11:13]}, 413 set(builder.iter_entries([nodes[12][0], nodes[11][0]]))) 414 self.assertEqual(13, builder.key_count()) 415 self.assertEqual({(builder,) + node for node in nodes[11:13]}, 416 set(builder.iter_entries_prefix([nodes[12][0], nodes[11][0]]))) 417 builder.add_node(*nodes[13]) 418 self.assertEqual(3, len(builder._backing_indices)) 419 self.assertEqual(2, builder._backing_indices[0].key_count()) 420 self.assertEqual(4, builder._backing_indices[1].key_count()) 421 self.assertEqual(8, builder._backing_indices[2].key_count()) 422 builder.add_node(*nodes[14]) 423 builder.add_node(*nodes[15]) 424 self.assertEqual(4, len(builder._backing_indices)) 425 self.assertEqual(None, builder._backing_indices[0]) 426 self.assertEqual(None, builder._backing_indices[1]) 427 self.assertEqual(None, builder._backing_indices[2]) 428 self.assertEqual(16, builder._backing_indices[3].key_count()) 429 # Now finish, and check we got a correctly ordered tree 430 t = self.get_transport('') 431 size = t.put_file('index', builder.finish()) 432 index = btree_index.BTreeGraphIndex(t, 'index', size) 433 nodes = list(index.iter_all_entries()) 434 self.assertEqual(sorted(nodes), nodes) 435 self.assertEqual(16, len(nodes)) 436 437 def test_spill_index_stress_1_1_no_combine(self): 438 builder = btree_index.BTreeBuilder(key_elements=1, spill_at=2) 439 builder.set_optimize(for_size=False, combine_backing_indices=False) 440 nodes = [node[0:2] for node in self.make_nodes(16, 1, 0)] 441 builder.add_node(*nodes[0]) 442 # Test the parts of the index that take up memory are doing so 443 # predictably. 444 self.assertEqual(1, len(builder._nodes)) 445 self.assertIs(None, builder._nodes_by_key) 446 builder.add_node(*nodes[1]) 447 self.assertEqual(0, len(builder._nodes)) 448 self.assertIs(None, builder._nodes_by_key) 449 self.assertEqual(1, len(builder._backing_indices)) 450 self.assertEqual(2, builder._backing_indices[0].key_count()) 451 # now back to memory 452 builder.add_node(*nodes[2]) 453 self.assertEqual(1, len(builder._nodes)) 454 self.assertIs(None, builder._nodes_by_key) 455 # And spills to a second backing index but doesn't combine 456 builder.add_node(*nodes[3]) 457 self.assertEqual(0, len(builder._nodes)) 458 self.assertIs(None, builder._nodes_by_key) 459 self.assertEqual(2, len(builder._backing_indices)) 460 for backing_index in builder._backing_indices: 461 self.assertEqual(2, backing_index.key_count()) 462 # The next spills to the 3rd slot 463 builder.add_node(*nodes[4]) 464 builder.add_node(*nodes[5]) 465 self.assertEqual(0, len(builder._nodes)) 466 self.assertIs(None, builder._nodes_by_key) 467 self.assertEqual(3, len(builder._backing_indices)) 468 for backing_index in builder._backing_indices: 469 self.assertEqual(2, backing_index.key_count()) 470 # Now spill a few more, and check that we don't combine 471 builder.add_node(*nodes[6]) 472 builder.add_node(*nodes[7]) 473 builder.add_node(*nodes[8]) 474 builder.add_node(*nodes[9]) 475 builder.add_node(*nodes[10]) 476 builder.add_node(*nodes[11]) 477 builder.add_node(*nodes[12]) 478 self.assertEqual(6, len(builder._backing_indices)) 479 for backing_index in builder._backing_indices: 480 self.assertEqual(2, backing_index.key_count()) 481 # Test that memory and disk are both used for query methods; and that 482 # None is skipped over happily. 483 self.assertEqual([(builder,) + node for node in sorted(nodes[:13])], 484 list(builder.iter_all_entries())) 485 # Two nodes - one memory one disk 486 self.assertEqual({(builder,) + node for node in nodes[11:13]}, 487 set(builder.iter_entries([nodes[12][0], nodes[11][0]]))) 488 self.assertEqual(13, builder.key_count()) 489 self.assertEqual({(builder,) + node for node in nodes[11:13]}, 490 set(builder.iter_entries_prefix([nodes[12][0], nodes[11][0]]))) 491 builder.add_node(*nodes[13]) 492 builder.add_node(*nodes[14]) 493 builder.add_node(*nodes[15]) 494 self.assertEqual(8, len(builder._backing_indices)) 495 for backing_index in builder._backing_indices: 496 self.assertEqual(2, backing_index.key_count()) 497 # Now finish, and check we got a correctly ordered tree 498 transport = self.get_transport('') 499 size = transport.put_file('index', builder.finish()) 500 index = btree_index.BTreeGraphIndex(transport, 'index', size) 501 nodes = list(index.iter_all_entries()) 502 self.assertEqual(sorted(nodes), nodes) 503 self.assertEqual(16, len(nodes)) 504 505 def test_set_optimize(self): 506 builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2) 507 builder.set_optimize(for_size=True) 508 self.assertTrue(builder._optimize_for_size) 509 builder.set_optimize(for_size=False) 510 self.assertFalse(builder._optimize_for_size) 511 # test that we can set combine_backing_indices without effecting 512 # _optimize_for_size 513 obj = object() 514 builder._optimize_for_size = obj 515 builder.set_optimize(combine_backing_indices=False) 516 self.assertFalse(builder._combine_backing_indices) 517 self.assertIs(obj, builder._optimize_for_size) 518 builder.set_optimize(combine_backing_indices=True) 519 self.assertTrue(builder._combine_backing_indices) 520 self.assertIs(obj, builder._optimize_for_size) 521 522 def test_spill_index_stress_2_2(self): 523 # test that references and longer keys don't confuse things. 524 builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2, 525 spill_at=2) 526 nodes = self.make_nodes(16, 2, 2) 527 builder.add_node(*nodes[0]) 528 # Test the parts of the index that take up memory are doing so 529 # predictably. 530 self.assertEqual(1, len(builder._nodes)) 531 self.assertIs(None, builder._nodes_by_key) 532 builder.add_node(*nodes[1]) 533 self.assertEqual(0, len(builder._nodes)) 534 self.assertIs(None, builder._nodes_by_key) 535 self.assertEqual(1, len(builder._backing_indices)) 536 self.assertEqual(2, builder._backing_indices[0].key_count()) 537 # now back to memory 538 # Build up the nodes by key dict 539 old = dict(builder._get_nodes_by_key()) 540 builder.add_node(*nodes[2]) 541 self.assertEqual(1, len(builder._nodes)) 542 self.assertIsNot(None, builder._nodes_by_key) 543 self.assertNotEqual({}, builder._nodes_by_key) 544 # We should have a new entry 545 self.assertNotEqual(old, builder._nodes_by_key) 546 # And spills to a second backing index combing all 547 builder.add_node(*nodes[3]) 548 self.assertEqual(0, len(builder._nodes)) 549 self.assertIs(None, builder._nodes_by_key) 550 self.assertEqual(2, len(builder._backing_indices)) 551 self.assertEqual(None, builder._backing_indices[0]) 552 self.assertEqual(4, builder._backing_indices[1].key_count()) 553 # The next spills to the 2-len slot 554 builder.add_node(*nodes[4]) 555 builder.add_node(*nodes[5]) 556 self.assertEqual(0, len(builder._nodes)) 557 self.assertIs(None, builder._nodes_by_key) 558 self.assertEqual(2, len(builder._backing_indices)) 559 self.assertEqual(2, builder._backing_indices[0].key_count()) 560 self.assertEqual(4, builder._backing_indices[1].key_count()) 561 # Next spill combines 562 builder.add_node(*nodes[6]) 563 builder.add_node(*nodes[7]) 564 self.assertEqual(3, len(builder._backing_indices)) 565 self.assertEqual(None, builder._backing_indices[0]) 566 self.assertEqual(None, builder._backing_indices[1]) 567 self.assertEqual(8, builder._backing_indices[2].key_count()) 568 # And so forth - counting up in binary. 569 builder.add_node(*nodes[8]) 570 builder.add_node(*nodes[9]) 571 self.assertEqual(3, len(builder._backing_indices)) 572 self.assertEqual(2, builder._backing_indices[0].key_count()) 573 self.assertEqual(None, builder._backing_indices[1]) 574 self.assertEqual(8, builder._backing_indices[2].key_count()) 575 builder.add_node(*nodes[10]) 576 builder.add_node(*nodes[11]) 577 self.assertEqual(3, len(builder._backing_indices)) 578 self.assertEqual(None, builder._backing_indices[0]) 579 self.assertEqual(4, builder._backing_indices[1].key_count()) 580 self.assertEqual(8, builder._backing_indices[2].key_count()) 581 builder.add_node(*nodes[12]) 582 # Test that memory and disk are both used for query methods; and that 583 # None is skipped over happily. 584 self.assertEqual([(builder,) + node for node in sorted(nodes[:13])], 585 list(builder.iter_all_entries())) 586 # Two nodes - one memory one disk 587 self.assertEqual({(builder,) + node for node in nodes[11:13]}, 588 set(builder.iter_entries([nodes[12][0], nodes[11][0]]))) 589 self.assertEqual(13, builder.key_count()) 590 self.assertEqual({(builder,) + node for node in nodes[11:13]}, 591 set(builder.iter_entries_prefix([nodes[12][0], nodes[11][0]]))) 592 builder.add_node(*nodes[13]) 593 self.assertEqual(3, len(builder._backing_indices)) 594 self.assertEqual(2, builder._backing_indices[0].key_count()) 595 self.assertEqual(4, builder._backing_indices[1].key_count()) 596 self.assertEqual(8, builder._backing_indices[2].key_count()) 597 builder.add_node(*nodes[14]) 598 builder.add_node(*nodes[15]) 599 self.assertEqual(4, len(builder._backing_indices)) 600 self.assertEqual(None, builder._backing_indices[0]) 601 self.assertEqual(None, builder._backing_indices[1]) 602 self.assertEqual(None, builder._backing_indices[2]) 603 self.assertEqual(16, builder._backing_indices[3].key_count()) 604 # Now finish, and check we got a correctly ordered tree 605 transport = self.get_transport('') 606 size = transport.put_file('index', builder.finish()) 607 index = btree_index.BTreeGraphIndex(transport, 'index', size) 608 nodes = list(index.iter_all_entries()) 609 self.assertEqual(sorted(nodes), nodes) 610 self.assertEqual(16, len(nodes)) 611 612 def test_spill_index_duplicate_key_caught_on_finish(self): 613 builder = btree_index.BTreeBuilder(key_elements=1, spill_at=2) 614 nodes = [node[0:2] for node in self.make_nodes(16, 1, 0)] 615 builder.add_node(*nodes[0]) 616 builder.add_node(*nodes[1]) 617 builder.add_node(*nodes[0]) 618 self.assertRaises(_mod_index.BadIndexDuplicateKey, builder.finish) 619 620 621class TestBTreeIndex(BTreeTestCase): 622 623 def make_index(self, ref_lists=0, key_elements=1, nodes=[]): 624 builder = btree_index.BTreeBuilder(reference_lists=ref_lists, 625 key_elements=key_elements) 626 for key, value, references in nodes: 627 builder.add_node(key, value, references) 628 stream = builder.finish() 629 trans = transport.get_transport_from_url('trace+' + self.get_url()) 630 size = trans.put_file('index', stream) 631 return btree_index.BTreeGraphIndex(trans, 'index', size) 632 633 def make_index_with_offset(self, ref_lists=1, key_elements=1, nodes=[], 634 offset=0): 635 builder = btree_index.BTreeBuilder(key_elements=key_elements, 636 reference_lists=ref_lists) 637 builder.add_nodes(nodes) 638 transport = self.get_transport('') 639 # NamedTemporaryFile dies on builder.finish().read(). weird. 640 temp_file = builder.finish() 641 content = temp_file.read() 642 del temp_file 643 size = len(content) 644 transport.put_bytes('index', (b' ' * offset) + content) 645 return btree_index.BTreeGraphIndex(transport, 'index', size=size, 646 offset=offset) 647 648 def test_clear_cache(self): 649 nodes = self.make_nodes(160, 2, 2) 650 index = self.make_index(ref_lists=2, key_elements=2, nodes=nodes) 651 self.assertEqual(1, len(list(index.iter_entries([nodes[30][0]])))) 652 self.assertEqual([1, 4], index._row_lengths) 653 self.assertIsNot(None, index._root_node) 654 internal_node_pre_clear = set(index._internal_node_cache) 655 self.assertTrue(len(index._leaf_node_cache) > 0) 656 index.clear_cache() 657 # We don't touch _root_node or _internal_node_cache, both should be 658 # small, and can save a round trip or two 659 self.assertIsNot(None, index._root_node) 660 # NOTE: We don't want to affect the _internal_node_cache, as we expect 661 # it will be small, and if we ever do touch this index again, it 662 # will save round-trips. This assertion isn't very strong, 663 # becuase without a 3-level index, we don't have any internal 664 # nodes cached. 665 self.assertEqual(internal_node_pre_clear, 666 set(index._internal_node_cache)) 667 self.assertEqual(0, len(index._leaf_node_cache)) 668 669 def test_trivial_constructor(self): 670 t = transport.get_transport_from_url('trace+' + self.get_url('')) 671 index = btree_index.BTreeGraphIndex(t, 'index', None) 672 # Checks the page size at load, but that isn't logged yet. 673 self.assertEqual([], t._activity) 674 675 def test_with_size_constructor(self): 676 t = transport.get_transport_from_url('trace+' + self.get_url('')) 677 index = btree_index.BTreeGraphIndex(t, 'index', 1) 678 # Checks the page size at load, but that isn't logged yet. 679 self.assertEqual([], t._activity) 680 681 def test_empty_key_count_no_size(self): 682 builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0) 683 t = transport.get_transport_from_url('trace+' + self.get_url('')) 684 t.put_file('index', builder.finish()) 685 index = btree_index.BTreeGraphIndex(t, 'index', None) 686 del t._activity[:] 687 self.assertEqual([], t._activity) 688 self.assertEqual(0, index.key_count()) 689 # The entire index should have been requested (as we generally have the 690 # size available, and doing many small readvs is inappropriate). 691 # We can't tell how much was actually read here, but - check the code. 692 self.assertEqual([('get', 'index')], t._activity) 693 694 def test_empty_key_count(self): 695 builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0) 696 t = transport.get_transport_from_url('trace+' + self.get_url('')) 697 size = t.put_file('index', builder.finish()) 698 self.assertEqual(72, size) 699 index = btree_index.BTreeGraphIndex(t, 'index', size) 700 del t._activity[:] 701 self.assertEqual([], t._activity) 702 self.assertEqual(0, index.key_count()) 703 # The entire index should have been read, as 4K > size 704 self.assertEqual([('readv', 'index', [(0, 72)], False, None)], 705 t._activity) 706 707 def test_non_empty_key_count_2_2(self): 708 builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2) 709 nodes = self.make_nodes(35, 2, 2) 710 for node in nodes: 711 builder.add_node(*node) 712 t = transport.get_transport_from_url('trace+' + self.get_url('')) 713 size = t.put_file('index', builder.finish()) 714 index = btree_index.BTreeGraphIndex(t, 'index', size) 715 del t._activity[:] 716 self.assertEqual([], t._activity) 717 self.assertEqual(70, index.key_count()) 718 # The entire index should have been read, as it is one page long. 719 self.assertEqual([('readv', 'index', [(0, size)], False, None)], 720 t._activity) 721 self.assertEqualApproxCompressed(1173, size) 722 723 def test_with_offset_no_size(self): 724 index = self.make_index_with_offset(key_elements=1, ref_lists=1, 725 offset=1234, 726 nodes=self.make_nodes(200, 1, 1)) 727 index._size = None # throw away the size info 728 self.assertEqual(200, index.key_count()) 729 730 def test_with_small_offset(self): 731 index = self.make_index_with_offset(key_elements=1, ref_lists=1, 732 offset=1234, 733 nodes=self.make_nodes(200, 1, 1)) 734 self.assertEqual(200, index.key_count()) 735 736 def test_with_large_offset(self): 737 index = self.make_index_with_offset(key_elements=1, ref_lists=1, 738 offset=123456, 739 nodes=self.make_nodes(200, 1, 1)) 740 self.assertEqual(200, index.key_count()) 741 742 def test__read_nodes_no_size_one_page_reads_once(self): 743 self.make_index(nodes=[((b'key',), b'value', ())]) 744 trans = transport.get_transport_from_url('trace+' + self.get_url()) 745 index = btree_index.BTreeGraphIndex(trans, 'index', None) 746 del trans._activity[:] 747 nodes = dict(index._read_nodes([0])) 748 self.assertEqual({0}, set(nodes)) 749 node = nodes[0] 750 self.assertEqual([(b'key',)], node.all_keys()) 751 self.assertEqual([('get', 'index')], trans._activity) 752 753 def test__read_nodes_no_size_multiple_pages(self): 754 index = self.make_index(2, 2, nodes=self.make_nodes(160, 2, 2)) 755 index.key_count() 756 num_pages = index._row_offsets[-1] 757 # Reopen with a traced transport and no size 758 trans = transport.get_transport_from_url('trace+' + self.get_url()) 759 index = btree_index.BTreeGraphIndex(trans, 'index', None) 760 del trans._activity[:] 761 nodes = dict(index._read_nodes([0])) 762 self.assertEqual(list(range(num_pages)), sorted(nodes)) 763 764 def test_2_levels_key_count_2_2(self): 765 builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2) 766 nodes = self.make_nodes(160, 2, 2) 767 for node in nodes: 768 builder.add_node(*node) 769 t = transport.get_transport_from_url('trace+' + self.get_url('')) 770 size = t.put_file('index', builder.finish()) 771 self.assertEqualApproxCompressed(17692, size) 772 index = btree_index.BTreeGraphIndex(t, 'index', size) 773 del t._activity[:] 774 self.assertEqual([], t._activity) 775 self.assertEqual(320, index.key_count()) 776 # The entire index should not have been read. 777 self.assertEqual([('readv', 'index', [(0, 4096)], False, None)], 778 t._activity) 779 780 def test_validate_one_page(self): 781 builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2) 782 nodes = self.make_nodes(45, 2, 2) 783 for node in nodes: 784 builder.add_node(*node) 785 t = transport.get_transport_from_url('trace+' + self.get_url('')) 786 size = t.put_file('index', builder.finish()) 787 index = btree_index.BTreeGraphIndex(t, 'index', size) 788 del t._activity[:] 789 self.assertEqual([], t._activity) 790 index.validate() 791 # The entire index should have been read linearly. 792 self.assertEqual([('readv', 'index', [(0, size)], False, None)], 793 t._activity) 794 self.assertEqualApproxCompressed(1488, size) 795 796 def test_validate_two_pages(self): 797 builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2) 798 nodes = self.make_nodes(80, 2, 2) 799 for node in nodes: 800 builder.add_node(*node) 801 t = transport.get_transport_from_url('trace+' + self.get_url('')) 802 size = t.put_file('index', builder.finish()) 803 # Root page, 2 leaf pages 804 self.assertEqualApproxCompressed(9339, size) 805 index = btree_index.BTreeGraphIndex(t, 'index', size) 806 del t._activity[:] 807 self.assertEqual([], t._activity) 808 index.validate() 809 rem = size - 8192 # Number of remaining bytes after second block 810 # The entire index should have been read linearly. 811 self.assertEqual( 812 [('readv', 'index', [(0, 4096)], False, None), 813 ('readv', 'index', [(4096, 4096), (8192, rem)], False, None)], 814 t._activity) 815 # XXX: TODO: write some badly-ordered nodes, and some pointers-to-wrong 816 # node and make validate find them. 817 818 def test_eq_ne(self): 819 # two indices are equal when constructed with the same parameters: 820 t1 = transport.get_transport_from_url('trace+' + self.get_url('')) 821 t2 = self.get_transport() 822 self.assertTrue( 823 btree_index.BTreeGraphIndex(t1, 'index', None) 824 == btree_index.BTreeGraphIndex(t1, 'index', None)) 825 self.assertTrue( 826 btree_index.BTreeGraphIndex(t1, 'index', 20) 827 == btree_index.BTreeGraphIndex(t1, 'index', 20)) 828 self.assertFalse( 829 btree_index.BTreeGraphIndex(t1, 'index', 20) 830 == btree_index.BTreeGraphIndex(t2, 'index', 20)) 831 self.assertFalse( 832 btree_index.BTreeGraphIndex(t1, 'inde1', 20) 833 == btree_index.BTreeGraphIndex(t1, 'inde2', 20)) 834 self.assertFalse( 835 btree_index.BTreeGraphIndex(t1, 'index', 10) 836 == btree_index.BTreeGraphIndex(t1, 'index', 20)) 837 self.assertFalse( 838 btree_index.BTreeGraphIndex(t1, 'index', None) 839 != btree_index.BTreeGraphIndex(t1, 'index', None)) 840 self.assertFalse( 841 btree_index.BTreeGraphIndex(t1, 'index', 20) 842 != btree_index.BTreeGraphIndex(t1, 'index', 20)) 843 self.assertTrue( 844 btree_index.BTreeGraphIndex(t1, 'index', 20) 845 != btree_index.BTreeGraphIndex(t2, 'index', 20)) 846 self.assertTrue( 847 btree_index.BTreeGraphIndex(t1, 'inde1', 20) 848 != btree_index.BTreeGraphIndex(t1, 'inde2', 20)) 849 self.assertTrue( 850 btree_index.BTreeGraphIndex(t1, 'index', 10) 851 != btree_index.BTreeGraphIndex(t1, 'index', 20)) 852 853 def test_key_too_big(self): 854 # the size that matters here is the _compressed_ size of the key, so we can't 855 # do a simple character repeat. 856 bigKey = b''.join(b'%d' % n for n in range(btree_index._PAGE_SIZE)) 857 self.assertRaises(_mod_index.BadIndexKey, 858 self.make_index, 859 nodes=[((bigKey,), b'value', ())]) 860 861 def test_iter_all_only_root_no_size(self): 862 self.make_index(nodes=[((b'key',), b'value', ())]) 863 t = transport.get_transport_from_url('trace+' + self.get_url('')) 864 index = btree_index.BTreeGraphIndex(t, 'index', None) 865 del t._activity[:] 866 self.assertEqual([((b'key',), b'value')], 867 [x[1:] for x in index.iter_all_entries()]) 868 self.assertEqual([('get', 'index')], t._activity) 869 870 def test_iter_all_entries_reads(self): 871 # iterating all entries reads the header, then does a linear 872 # read. 873 self.shrink_page_size() 874 builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2) 875 # 20k nodes is enough to create a two internal nodes on the second 876 # level, with a 2K page size 877 nodes = self.make_nodes(10000, 2, 2) 878 for node in nodes: 879 builder.add_node(*node) 880 t = transport.get_transport_from_url('trace+' + self.get_url('')) 881 size = t.put_file('index', builder.finish()) 882 page_size = btree_index._PAGE_SIZE 883 del builder 884 index = btree_index.BTreeGraphIndex(t, 'index', size) 885 del t._activity[:] 886 self.assertEqual([], t._activity) 887 found_nodes = self.time(list, index.iter_all_entries()) 888 bare_nodes = [] 889 for node in found_nodes: 890 self.assertTrue(node[0] is index) 891 bare_nodes.append(node[1:]) 892 self.assertEqual(3, len(index._row_lengths), 893 "Not enough rows: %r" % index._row_lengths) 894 # Should be as long as the nodes we supplied 895 self.assertEqual(20000, len(found_nodes)) 896 # Should have the same content 897 self.assertEqual(set(nodes), set(bare_nodes)) 898 # Should have done linear scan IO up the index, ignoring 899 # the internal nodes: 900 # The entire index should have been read 901 total_pages = sum(index._row_lengths) 902 self.assertEqual(total_pages, index._row_offsets[-1]) 903 self.assertEqualApproxCompressed(1303220, size) 904 # The start of the leaves 905 first_byte = index._row_offsets[-2] * page_size 906 readv_request = [] 907 for offset in range(first_byte, size, page_size): 908 readv_request.append((offset, page_size)) 909 # The last page is truncated 910 readv_request[-1] = (readv_request[-1][0], size % page_size) 911 expected = [('readv', 'index', [(0, page_size)], False, None), 912 ('readv', 'index', readv_request, False, None)] 913 if expected != t._activity: 914 self.assertEqualDiff(pprint.pformat(expected), 915 pprint.pformat(t._activity)) 916 917 def test_iter_entries_references_2_refs_resolved(self): 918 # iterating some entries reads just the pages needed. For now, to 919 # get it working and start measuring, only 4K pages are read. 920 builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2) 921 # 80 nodes is enough to create a two-level index. 922 nodes = self.make_nodes(160, 2, 2) 923 for node in nodes: 924 builder.add_node(*node) 925 t = transport.get_transport_from_url('trace+' + self.get_url('')) 926 size = t.put_file('index', builder.finish()) 927 del builder 928 index = btree_index.BTreeGraphIndex(t, 'index', size) 929 del t._activity[:] 930 self.assertEqual([], t._activity) 931 # search for one key 932 found_nodes = list(index.iter_entries([nodes[30][0]])) 933 bare_nodes = [] 934 for node in found_nodes: 935 self.assertTrue(node[0] is index) 936 bare_nodes.append(node[1:]) 937 # Should be as long as the nodes we supplied 938 self.assertEqual(1, len(found_nodes)) 939 # Should have the same content 940 self.assertEqual(nodes[30], bare_nodes[0]) 941 # Should have read the root node, then one leaf page: 942 self.assertEqual([('readv', 'index', [(0, 4096)], False, None), 943 ('readv', 'index', [(8192, 4096), ], False, None)], 944 t._activity) 945 946 def test_iter_key_prefix_1_element_key_None(self): 947 index = self.make_index() 948 self.assertRaises(_mod_index.BadIndexKey, list, 949 index.iter_entries_prefix([(None, )])) 950 951 def test_iter_key_prefix_wrong_length(self): 952 index = self.make_index() 953 self.assertRaises(_mod_index.BadIndexKey, list, 954 index.iter_entries_prefix([(b'foo', None)])) 955 index = self.make_index(key_elements=2) 956 self.assertRaises(_mod_index.BadIndexKey, list, 957 index.iter_entries_prefix([(b'foo', )])) 958 self.assertRaises(_mod_index.BadIndexKey, list, 959 index.iter_entries_prefix([(b'foo', None, None)])) 960 961 def test_iter_key_prefix_1_key_element_no_refs(self): 962 index = self.make_index(nodes=[ 963 ((b'name', ), b'data', ()), 964 ((b'ref', ), b'refdata', ())]) 965 self.assertEqual({(index, (b'name', ), b'data'), 966 (index, (b'ref', ), b'refdata')}, 967 set(index.iter_entries_prefix([(b'name', ), (b'ref', )]))) 968 969 def test_iter_key_prefix_1_key_element_refs(self): 970 index = self.make_index(1, nodes=[ 971 ((b'name', ), b'data', ([(b'ref', )], )), 972 ((b'ref', ), b'refdata', ([], ))]) 973 self.assertEqual({(index, (b'name', ), b'data', (((b'ref',),),)), 974 (index, (b'ref', ), b'refdata', ((), ))}, 975 set(index.iter_entries_prefix([(b'name', ), (b'ref', )]))) 976 977 def test_iter_key_prefix_2_key_element_no_refs(self): 978 index = self.make_index(key_elements=2, nodes=[ 979 ((b'name', b'fin1'), b'data', ()), 980 ((b'name', b'fin2'), b'beta', ()), 981 ((b'ref', b'erence'), b'refdata', ())]) 982 self.assertEqual({(index, (b'name', b'fin1'), b'data'), 983 (index, (b'ref', b'erence'), b'refdata')}, 984 set(index.iter_entries_prefix( 985 [(b'name', b'fin1'), (b'ref', b'erence')]))) 986 self.assertEqual({(index, (b'name', b'fin1'), b'data'), 987 (index, (b'name', b'fin2'), b'beta')}, 988 set(index.iter_entries_prefix([(b'name', None)]))) 989 990 def test_iter_key_prefix_2_key_element_refs(self): 991 index = self.make_index(1, key_elements=2, nodes=[ 992 ((b'name', b'fin1'), b'data', ([(b'ref', b'erence')], )), 993 ((b'name', b'fin2'), b'beta', ([], )), 994 ((b'ref', b'erence'), b'refdata', ([], ))]) 995 self.assertEqual( 996 {(index, (b'name', b'fin1'), b'data', (((b'ref', b'erence'),),)), 997 (index, (b'ref', b'erence'), b'refdata', ((), ))}, 998 set(index.iter_entries_prefix( 999 [(b'name', b'fin1'), (b'ref', b'erence')]))) 1000 self.assertEqual( 1001 {(index, (b'name', b'fin1'), b'data', (((b'ref', b'erence'),),)), 1002 (index, (b'name', b'fin2'), b'beta', ((), ))}, 1003 set(index.iter_entries_prefix([(b'name', None)]))) 1004 1005 # XXX: external_references tests are duplicated in test_index. We 1006 # probably should have per_graph_index tests... 1007 def test_external_references_no_refs(self): 1008 index = self.make_index(ref_lists=0, nodes=[]) 1009 self.assertRaises(ValueError, index.external_references, 0) 1010 1011 def test_external_references_no_results(self): 1012 index = self.make_index(ref_lists=1, nodes=[ 1013 ((b'key',), b'value', ([],))]) 1014 self.assertEqual(set(), index.external_references(0)) 1015 1016 def test_external_references_missing_ref(self): 1017 missing_key = (b'missing',) 1018 index = self.make_index(ref_lists=1, nodes=[ 1019 ((b'key',), b'value', ([missing_key],))]) 1020 self.assertEqual({missing_key}, index.external_references(0)) 1021 1022 def test_external_references_multiple_ref_lists(self): 1023 missing_key = (b'missing',) 1024 index = self.make_index(ref_lists=2, nodes=[ 1025 ((b'key',), b'value', ([], [missing_key]))]) 1026 self.assertEqual(set([]), index.external_references(0)) 1027 self.assertEqual({missing_key}, index.external_references(1)) 1028 1029 def test_external_references_two_records(self): 1030 index = self.make_index(ref_lists=1, nodes=[ 1031 ((b'key-1',), b'value', ([(b'key-2',)],)), 1032 ((b'key-2',), b'value', ([],)), 1033 ]) 1034 self.assertEqual(set([]), index.external_references(0)) 1035 1036 def test__find_ancestors_one_page(self): 1037 key1 = (b'key-1',) 1038 key2 = (b'key-2',) 1039 index = self.make_index(ref_lists=1, key_elements=1, nodes=[ 1040 (key1, b'value', ([key2],)), 1041 (key2, b'value', ([],)), 1042 ]) 1043 parent_map = {} 1044 missing_keys = set() 1045 search_keys = index._find_ancestors( 1046 [key1], 0, parent_map, missing_keys) 1047 self.assertEqual({key1: (key2,), key2: ()}, parent_map) 1048 self.assertEqual(set(), missing_keys) 1049 self.assertEqual(set(), search_keys) 1050 1051 def test__find_ancestors_one_page_w_missing(self): 1052 key1 = (b'key-1',) 1053 key2 = (b'key-2',) 1054 key3 = (b'key-3',) 1055 index = self.make_index(ref_lists=1, key_elements=1, nodes=[ 1056 (key1, b'value', ([key2],)), 1057 (key2, b'value', ([],)), 1058 ]) 1059 parent_map = {} 1060 missing_keys = set() 1061 search_keys = index._find_ancestors([key2, key3], 0, parent_map, 1062 missing_keys) 1063 self.assertEqual({key2: ()}, parent_map) 1064 # we know that key3 is missing because we read the page that it would 1065 # otherwise be on 1066 self.assertEqual({key3}, missing_keys) 1067 self.assertEqual(set(), search_keys) 1068 1069 def test__find_ancestors_one_parent_missing(self): 1070 key1 = (b'key-1',) 1071 key2 = (b'key-2',) 1072 key3 = (b'key-3',) 1073 index = self.make_index(ref_lists=1, key_elements=1, nodes=[ 1074 (key1, b'value', ([key2],)), 1075 (key2, b'value', ([key3],)), 1076 ]) 1077 parent_map = {} 1078 missing_keys = set() 1079 search_keys = index._find_ancestors([key1], 0, parent_map, 1080 missing_keys) 1081 self.assertEqual({key1: (key2,), key2: (key3,)}, parent_map) 1082 self.assertEqual(set(), missing_keys) 1083 # all we know is that key3 wasn't present on the page we were reading 1084 # but if you look, the last key is key2 which comes before key3, so we 1085 # don't know whether key3 would land on this page or not. 1086 self.assertEqual({key3}, search_keys) 1087 search_keys = index._find_ancestors(search_keys, 0, parent_map, 1088 missing_keys) 1089 # passing it back in, we are sure it is 'missing' 1090 self.assertEqual({key1: (key2,), key2: (key3,)}, parent_map) 1091 self.assertEqual({key3}, missing_keys) 1092 self.assertEqual(set([]), search_keys) 1093 1094 def test__find_ancestors_dont_search_known(self): 1095 key1 = (b'key-1',) 1096 key2 = (b'key-2',) 1097 key3 = (b'key-3',) 1098 index = self.make_index(ref_lists=1, key_elements=1, nodes=[ 1099 (key1, b'value', ([key2],)), 1100 (key2, b'value', ([key3],)), 1101 (key3, b'value', ([],)), 1102 ]) 1103 # We already know about key2, so we won't try to search for key3 1104 parent_map = {key2: (key3,)} 1105 missing_keys = set() 1106 search_keys = index._find_ancestors([key1], 0, parent_map, 1107 missing_keys) 1108 self.assertEqual({key1: (key2,), key2: (key3,)}, parent_map) 1109 self.assertEqual(set(), missing_keys) 1110 self.assertEqual(set(), search_keys) 1111 1112 def test__find_ancestors_multiple_pages(self): 1113 # We need to use enough keys that we actually cause a split 1114 start_time = 1249671539 1115 email = "joebob@example.com" 1116 nodes = [] 1117 ref_lists = ((),) 1118 rev_keys = [] 1119 for i in range(400): 1120 rev_id = ('%s-%s-%s' % (email, 1121 osutils.compact_date(start_time + i), 1122 osutils.rand_chars(16))).encode('ascii') 1123 rev_key = (rev_id,) 1124 nodes.append((rev_key, b'value', ref_lists)) 1125 # We have a ref 'list' of length 1, with a list of parents, with 1 1126 # parent which is a key 1127 ref_lists = ((rev_key,),) 1128 rev_keys.append(rev_key) 1129 index = self.make_index(ref_lists=1, key_elements=1, nodes=nodes) 1130 self.assertEqual(400, index.key_count()) 1131 self.assertEqual(3, len(index._row_offsets)) 1132 nodes = dict(index._read_nodes([1, 2])) 1133 l1 = nodes[1] 1134 l2 = nodes[2] 1135 min_l2_key = l2.min_key 1136 max_l1_key = l1.max_key 1137 self.assertTrue(max_l1_key < min_l2_key) 1138 parents_min_l2_key = l2[min_l2_key][1][0] 1139 self.assertEqual((l1.max_key,), parents_min_l2_key) 1140 # Now, whatever key we select that would fall on the second page, 1141 # should give us all the parents until the page break 1142 key_idx = rev_keys.index(min_l2_key) 1143 next_key = rev_keys[key_idx + 1] 1144 # So now when we get the parent map, we should get the key we are 1145 # looking for, min_l2_key, and then a reference to go look for the 1146 # parent of that key 1147 parent_map = {} 1148 missing_keys = set() 1149 search_keys = index._find_ancestors([next_key], 0, parent_map, 1150 missing_keys) 1151 self.assertEqual([min_l2_key, next_key], sorted(parent_map)) 1152 self.assertEqual(set(), missing_keys) 1153 self.assertEqual({max_l1_key}, search_keys) 1154 parent_map = {} 1155 search_keys = index._find_ancestors([max_l1_key], 0, parent_map, 1156 missing_keys) 1157 self.assertEqual(l1.all_keys(), sorted(parent_map)) 1158 self.assertEqual(set(), missing_keys) 1159 self.assertEqual(set(), search_keys) 1160 1161 def test__find_ancestors_empty_index(self): 1162 index = self.make_index(ref_lists=1, key_elements=1, nodes=[]) 1163 parent_map = {} 1164 missing_keys = set() 1165 search_keys = index._find_ancestors([('one',), ('two',)], 0, parent_map, 1166 missing_keys) 1167 self.assertEqual(set(), search_keys) 1168 self.assertEqual({}, parent_map) 1169 self.assertEqual({('one',), ('two',)}, missing_keys) 1170 1171 def test_supports_unlimited_cache(self): 1172 builder = btree_index.BTreeBuilder(reference_lists=0, key_elements=1) 1173 # We need enough nodes to cause a page split (so we have both an 1174 # internal node and a couple leaf nodes. 500 seems to be enough.) 1175 nodes = self.make_nodes(500, 1, 0) 1176 for node in nodes: 1177 builder.add_node(*node) 1178 stream = builder.finish() 1179 trans = self.get_transport() 1180 size = trans.put_file('index', stream) 1181 index = btree_index.BTreeGraphIndex(trans, 'index', size) 1182 self.assertEqual(500, index.key_count()) 1183 # We have an internal node 1184 self.assertEqual(2, len(index._row_lengths)) 1185 # We have at least 2 leaf nodes 1186 self.assertTrue(index._row_lengths[-1] >= 2) 1187 self.assertIsInstance(index._leaf_node_cache, lru_cache.LRUCache) 1188 self.assertEqual(btree_index._NODE_CACHE_SIZE, 1189 index._leaf_node_cache._max_cache) 1190 self.assertIsInstance(index._internal_node_cache, fifo_cache.FIFOCache) 1191 self.assertEqual(100, index._internal_node_cache._max_cache) 1192 # No change if unlimited_cache=False is passed 1193 index = btree_index.BTreeGraphIndex(trans, 'index', size, 1194 unlimited_cache=False) 1195 self.assertIsInstance(index._leaf_node_cache, lru_cache.LRUCache) 1196 self.assertEqual(btree_index._NODE_CACHE_SIZE, 1197 index._leaf_node_cache._max_cache) 1198 self.assertIsInstance(index._internal_node_cache, fifo_cache.FIFOCache) 1199 self.assertEqual(100, index._internal_node_cache._max_cache) 1200 index = btree_index.BTreeGraphIndex(trans, 'index', size, 1201 unlimited_cache=True) 1202 self.assertIsInstance(index._leaf_node_cache, dict) 1203 self.assertIs(type(index._internal_node_cache), dict) 1204 # Exercise the lookup code 1205 entries = set(index.iter_entries([n[0] for n in nodes])) 1206 self.assertEqual(500, len(entries)) 1207 1208 1209class TestBTreeNodes(BTreeTestCase): 1210 1211 scenarios = btreeparser_scenarios() 1212 1213 def setUp(self): 1214 super(TestBTreeNodes, self).setUp() 1215 self.overrideAttr(btree_index, '_btree_serializer', self.parse_btree) 1216 1217 def test_LeafNode_1_0(self): 1218 node_bytes = (b"type=leaf\n" 1219 b"0000000000000000000000000000000000000000\x00\x00value:0\n" 1220 b"1111111111111111111111111111111111111111\x00\x00value:1\n" 1221 b"2222222222222222222222222222222222222222\x00\x00value:2\n" 1222 b"3333333333333333333333333333333333333333\x00\x00value:3\n" 1223 b"4444444444444444444444444444444444444444\x00\x00value:4\n") 1224 node = btree_index._LeafNode(node_bytes, 1, 0) 1225 # We do direct access, or don't care about order, to leaf nodes most of 1226 # the time, so a dict is useful: 1227 self.assertEqual({ 1228 (b"0000000000000000000000000000000000000000",): (b"value:0", ()), 1229 (b"1111111111111111111111111111111111111111",): (b"value:1", ()), 1230 (b"2222222222222222222222222222222222222222",): (b"value:2", ()), 1231 (b"3333333333333333333333333333333333333333",): (b"value:3", ()), 1232 (b"4444444444444444444444444444444444444444",): (b"value:4", ()), 1233 }, dict(node.all_items())) 1234 1235 def test_LeafNode_2_2(self): 1236 node_bytes = (b"type=leaf\n" 1237 b"00\x0000\x00\t00\x00ref00\x00value:0\n" 1238 b"00\x0011\x0000\x00ref00\t00\x00ref00\r01\x00ref01\x00value:1\n" 1239 b"11\x0033\x0011\x00ref22\t11\x00ref22\r11\x00ref22\x00value:3\n" 1240 b"11\x0044\x00\t11\x00ref00\x00value:4\n" 1241 b"" 1242 ) 1243 node = btree_index._LeafNode(node_bytes, 2, 2) 1244 # We do direct access, or don't care about order, to leaf nodes most of 1245 # the time, so a dict is useful: 1246 self.assertEqual({ 1247 (b'00', b'00'): (b'value:0', ((), ((b'00', b'ref00'),))), 1248 (b'00', b'11'): (b'value:1', (((b'00', b'ref00'),), 1249 ((b'00', b'ref00'), (b'01', b'ref01')))), 1250 (b'11', b'33'): (b'value:3', (((b'11', b'ref22'),), 1251 ((b'11', b'ref22'), (b'11', b'ref22')))), 1252 (b'11', b'44'): (b'value:4', ((), ((b'11', b'ref00'),))) 1253 }, dict(node.all_items())) 1254 1255 def test_InternalNode_1(self): 1256 node_bytes = (b"type=internal\n" 1257 b"offset=1\n" 1258 b"0000000000000000000000000000000000000000\n" 1259 b"1111111111111111111111111111111111111111\n" 1260 b"2222222222222222222222222222222222222222\n" 1261 b"3333333333333333333333333333333333333333\n" 1262 b"4444444444444444444444444444444444444444\n" 1263 ) 1264 node = btree_index._InternalNode(node_bytes) 1265 # We want to bisect to find the right children from this node, so a 1266 # vector is most useful. 1267 self.assertEqual([ 1268 (b"0000000000000000000000000000000000000000",), 1269 (b"1111111111111111111111111111111111111111",), 1270 (b"2222222222222222222222222222222222222222",), 1271 (b"3333333333333333333333333333333333333333",), 1272 (b"4444444444444444444444444444444444444444",), 1273 ], node.keys) 1274 self.assertEqual(1, node.offset) 1275 1276 def test_LeafNode_2_2(self): 1277 node_bytes = (b"type=leaf\n" 1278 b"00\x0000\x00\t00\x00ref00\x00value:0\n" 1279 b"00\x0011\x0000\x00ref00\t00\x00ref00\r01\x00ref01\x00value:1\n" 1280 b"11\x0033\x0011\x00ref22\t11\x00ref22\r11\x00ref22\x00value:3\n" 1281 b"11\x0044\x00\t11\x00ref00\x00value:4\n" 1282 b"" 1283 ) 1284 node = btree_index._LeafNode(node_bytes, 2, 2) 1285 # We do direct access, or don't care about order, to leaf nodes most of 1286 # the time, so a dict is useful: 1287 self.assertEqual({ 1288 (b'00', b'00'): (b'value:0', ((), ((b'00', b'ref00'),))), 1289 (b'00', b'11'): (b'value:1', (((b'00', b'ref00'),), 1290 ((b'00', b'ref00'), (b'01', b'ref01')))), 1291 (b'11', b'33'): (b'value:3', (((b'11', b'ref22'),), 1292 ((b'11', b'ref22'), (b'11', b'ref22')))), 1293 (b'11', b'44'): (b'value:4', ((), ((b'11', b'ref00'),))) 1294 }, dict(node.all_items())) 1295 1296 def assertFlattened(self, expected, key, value, refs): 1297 flat_key, flat_line = self.parse_btree._flatten_node( 1298 (None, key, value, refs), bool(refs)) 1299 self.assertEqual(b'\x00'.join(key), flat_key) 1300 self.assertEqual(expected, flat_line) 1301 1302 def test__flatten_node(self): 1303 self.assertFlattened(b'key\0\0value\n', (b'key',), b'value', []) 1304 self.assertFlattened(b'key\0tuple\0\0value str\n', 1305 (b'key', b'tuple'), b'value str', []) 1306 self.assertFlattened(b'key\0tuple\0triple\0\0value str\n', 1307 (b'key', b'tuple', b'triple'), b'value str', []) 1308 self.assertFlattened(b'k\0t\0s\0ref\0value str\n', 1309 (b'k', b't', b's'), b'value str', [[(b'ref',)]]) 1310 self.assertFlattened(b'key\0tuple\0ref\0key\0value str\n', 1311 (b'key', b'tuple'), b'value str', [[(b'ref', b'key')]]) 1312 self.assertFlattened(b"00\x0000\x00\t00\x00ref00\x00value:0\n", 1313 (b'00', b'00'), b'value:0', ((), ((b'00', b'ref00'),))) 1314 self.assertFlattened( 1315 b"00\x0011\x0000\x00ref00\t00\x00ref00\r01\x00ref01\x00value:1\n", 1316 (b'00', b'11'), b'value:1', 1317 (((b'00', b'ref00'),), ((b'00', b'ref00'), (b'01', b'ref01')))) 1318 self.assertFlattened( 1319 b"11\x0033\x0011\x00ref22\t11\x00ref22\r11\x00ref22\x00value:3\n", 1320 (b'11', b'33'), b'value:3', 1321 (((b'11', b'ref22'),), ((b'11', b'ref22'), (b'11', b'ref22')))) 1322 self.assertFlattened( 1323 b"11\x0044\x00\t11\x00ref00\x00value:4\n", 1324 (b'11', b'44'), b'value:4', ((), ((b'11', b'ref00'),))) 1325 1326 1327class TestCompiledBtree(tests.TestCase): 1328 1329 def test_exists(self): 1330 # This is just to let the user know if they don't have the feature 1331 # available 1332 self.requireFeature(compiled_btreeparser_feature) 1333 1334 1335class TestMultiBisectRight(tests.TestCase): 1336 1337 def assertMultiBisectRight(self, offsets, search_keys, fixed_keys): 1338 self.assertEqual(offsets, 1339 btree_index.BTreeGraphIndex._multi_bisect_right( 1340 search_keys, fixed_keys)) 1341 1342 def test_after(self): 1343 self.assertMultiBisectRight([(1, ['b'])], ['b'], ['a']) 1344 self.assertMultiBisectRight([(3, ['e', 'f', 'g'])], 1345 ['e', 'f', 'g'], ['a', 'b', 'c']) 1346 1347 def test_before(self): 1348 self.assertMultiBisectRight([(0, ['a'])], ['a'], ['b']) 1349 self.assertMultiBisectRight([(0, ['a', 'b', 'c', 'd'])], 1350 ['a', 'b', 'c', 'd'], ['e', 'f', 'g']) 1351 1352 def test_exact(self): 1353 self.assertMultiBisectRight([(1, ['a'])], ['a'], ['a']) 1354 self.assertMultiBisectRight( 1355 [(1, ['a']), (2, ['b'])], ['a', 'b'], ['a', 'b']) 1356 self.assertMultiBisectRight([(1, ['a']), (3, ['c'])], 1357 ['a', 'c'], ['a', 'b', 'c']) 1358 1359 def test_inbetween(self): 1360 self.assertMultiBisectRight([(1, ['b'])], ['b'], ['a', 'c']) 1361 self.assertMultiBisectRight([(1, ['b', 'c', 'd']), (2, ['f', 'g'])], 1362 ['b', 'c', 'd', 'f', 'g'], ['a', 'e', 'h']) 1363 1364 def test_mixed(self): 1365 self.assertMultiBisectRight([(0, ['a', 'b']), (2, ['d', 'e']), 1366 (4, ['g', 'h'])], 1367 ['a', 'b', 'd', 'e', 'g', 'h'], 1368 ['c', 'd', 'f', 'g']) 1369 1370 1371class TestExpandOffsets(tests.TestCase): 1372 1373 def make_index(self, size, recommended_pages=None): 1374 """Make an index with a generic size. 1375 1376 This doesn't actually create anything on disk, it just primes a 1377 BTreeGraphIndex with the recommended information. 1378 """ 1379 index = btree_index.BTreeGraphIndex( 1380 transport.get_transport_from_url('memory:///'), 1381 'test-index', size=size) 1382 if recommended_pages is not None: 1383 index._recommended_pages = recommended_pages 1384 return index 1385 1386 def set_cached_offsets(self, index, cached_offsets): 1387 """Monkeypatch to give a canned answer for _get_offsets_for...().""" 1388 def _get_offsets_to_cached_pages(): 1389 cached = set(cached_offsets) 1390 return cached 1391 index._get_offsets_to_cached_pages = _get_offsets_to_cached_pages 1392 1393 def prepare_index(self, index, node_ref_lists, key_length, key_count, 1394 row_lengths, cached_offsets): 1395 """Setup the BTreeGraphIndex with some pre-canned information.""" 1396 index.node_ref_lists = node_ref_lists 1397 index._key_length = key_length 1398 index._key_count = key_count 1399 index._row_lengths = row_lengths 1400 index._compute_row_offsets() 1401 index._root_node = btree_index._InternalNode(b'internal\noffset=0\n') 1402 self.set_cached_offsets(index, cached_offsets) 1403 1404 def make_100_node_index(self): 1405 index = self.make_index(4096 * 100, 6) 1406 # Consider we've already made a single request at the middle 1407 self.prepare_index(index, node_ref_lists=0, key_length=1, 1408 key_count=1000, row_lengths=[1, 99], 1409 cached_offsets=[0, 50]) 1410 return index 1411 1412 def make_1000_node_index(self): 1413 index = self.make_index(4096 * 1000, 6) 1414 # Pretend we've already made a single request in the middle 1415 self.prepare_index(index, node_ref_lists=0, key_length=1, 1416 key_count=90000, row_lengths=[1, 9, 990], 1417 cached_offsets=[0, 5, 500]) 1418 return index 1419 1420 def assertNumPages(self, expected_pages, index, size): 1421 index._size = size 1422 self.assertEqual(expected_pages, index._compute_total_pages_in_index()) 1423 1424 def assertExpandOffsets(self, expected, index, offsets): 1425 self.assertEqual(expected, index._expand_offsets(offsets), 1426 'We did not get the expected value after expanding' 1427 ' %s' % (offsets,)) 1428 1429 def test_default_recommended_pages(self): 1430 index = self.make_index(None) 1431 # local transport recommends 4096 byte reads, which is 1 page 1432 self.assertEqual(1, index._recommended_pages) 1433 1434 def test__compute_total_pages_in_index(self): 1435 index = self.make_index(None) 1436 self.assertNumPages(1, index, 1024) 1437 self.assertNumPages(1, index, 4095) 1438 self.assertNumPages(1, index, 4096) 1439 self.assertNumPages(2, index, 4097) 1440 self.assertNumPages(2, index, 8192) 1441 self.assertNumPages(76, index, 4096 * 75 + 10) 1442 1443 def test__find_layer_start_and_stop(self): 1444 index = self.make_1000_node_index() 1445 self.assertEqual((0, 1), index._find_layer_first_and_end(0)) 1446 self.assertEqual((1, 10), index._find_layer_first_and_end(1)) 1447 self.assertEqual((1, 10), index._find_layer_first_and_end(9)) 1448 self.assertEqual((10, 1000), index._find_layer_first_and_end(10)) 1449 self.assertEqual((10, 1000), index._find_layer_first_and_end(99)) 1450 self.assertEqual((10, 1000), index._find_layer_first_and_end(999)) 1451 1452 def test_unknown_size(self): 1453 # We should not expand if we don't know the file size 1454 index = self.make_index(None, 10) 1455 self.assertExpandOffsets([0], index, [0]) 1456 self.assertExpandOffsets([1, 4, 9], index, [1, 4, 9]) 1457 1458 def test_more_than_recommended(self): 1459 index = self.make_index(4096 * 100, 2) 1460 self.assertExpandOffsets([1, 10], index, [1, 10]) 1461 self.assertExpandOffsets([1, 10, 20], index, [1, 10, 20]) 1462 1463 def test_read_all_from_root(self): 1464 index = self.make_index(4096 * 10, 20) 1465 self.assertExpandOffsets(list(range(10)), index, [0]) 1466 1467 def test_read_all_when_cached(self): 1468 # We've read enough that we can grab all the rest in a single request 1469 index = self.make_index(4096 * 10, 5) 1470 self.prepare_index(index, node_ref_lists=0, key_length=1, 1471 key_count=1000, row_lengths=[1, 9], 1472 cached_offsets=[0, 1, 2, 5, 6]) 1473 # It should fill the remaining nodes, regardless of the one requested 1474 self.assertExpandOffsets([3, 4, 7, 8, 9], index, [3]) 1475 self.assertExpandOffsets([3, 4, 7, 8, 9], index, [8]) 1476 self.assertExpandOffsets([3, 4, 7, 8, 9], index, [9]) 1477 1478 def test_no_root_node(self): 1479 index = self.make_index(4096 * 10, 5) 1480 self.assertExpandOffsets([0], index, [0]) 1481 1482 def test_include_neighbors(self): 1483 index = self.make_100_node_index() 1484 # We expand in both directions, until we have at least 'recommended' 1485 # pages 1486 self.assertExpandOffsets([9, 10, 11, 12, 13, 14, 15], index, [12]) 1487 self.assertExpandOffsets([88, 89, 90, 91, 92, 93, 94], index, [91]) 1488 # If we hit an 'edge' we continue in the other direction 1489 self.assertExpandOffsets([1, 2, 3, 4, 5, 6], index, [2]) 1490 self.assertExpandOffsets([94, 95, 96, 97, 98, 99], index, [98]) 1491 1492 # Requesting many nodes will expand all locations equally 1493 self.assertExpandOffsets([1, 2, 3, 80, 81, 82], index, [2, 81]) 1494 self.assertExpandOffsets([1, 2, 3, 9, 10, 11, 80, 81, 82], index, 1495 [2, 10, 81]) 1496 1497 def test_stop_at_cached(self): 1498 index = self.make_100_node_index() 1499 self.set_cached_offsets(index, [0, 10, 19]) 1500 self.assertExpandOffsets([11, 12, 13, 14, 15, 16], index, [11]) 1501 self.assertExpandOffsets([11, 12, 13, 14, 15, 16], index, [12]) 1502 self.assertExpandOffsets([12, 13, 14, 15, 16, 17, 18], index, [15]) 1503 self.assertExpandOffsets([13, 14, 15, 16, 17, 18], index, [16]) 1504 self.assertExpandOffsets([13, 14, 15, 16, 17, 18], index, [17]) 1505 self.assertExpandOffsets([13, 14, 15, 16, 17, 18], index, [18]) 1506 1507 def test_cannot_fully_expand(self): 1508 index = self.make_100_node_index() 1509 self.set_cached_offsets(index, [0, 10, 12]) 1510 # We don't go into an endless loop if we are bound by cached nodes 1511 self.assertExpandOffsets([11], index, [11]) 1512 1513 def test_overlap(self): 1514 index = self.make_100_node_index() 1515 self.assertExpandOffsets([10, 11, 12, 13, 14, 15], index, [12, 13]) 1516 self.assertExpandOffsets([10, 11, 12, 13, 14, 15], index, [11, 14]) 1517 1518 def test_stay_within_layer(self): 1519 index = self.make_1000_node_index() 1520 # When expanding a request, we won't read nodes from the next layer 1521 self.assertExpandOffsets([1, 2, 3, 4], index, [2]) 1522 self.assertExpandOffsets([6, 7, 8, 9], index, [6]) 1523 self.assertExpandOffsets([6, 7, 8, 9], index, [9]) 1524 self.assertExpandOffsets([10, 11, 12, 13, 14, 15], index, [10]) 1525 self.assertExpandOffsets([10, 11, 12, 13, 14, 15, 16], index, [13]) 1526 1527 self.set_cached_offsets(index, [0, 4, 12]) 1528 self.assertExpandOffsets([5, 6, 7, 8, 9], index, [7]) 1529 self.assertExpandOffsets([10, 11], index, [11]) 1530 1531 def test_small_requests_unexpanded(self): 1532 index = self.make_100_node_index() 1533 self.set_cached_offsets(index, [0]) 1534 self.assertExpandOffsets([1], index, [1]) 1535 self.assertExpandOffsets([50], index, [50]) 1536 # If we request more than one node, then we'll expand 1537 self.assertExpandOffsets([49, 50, 51, 59, 60, 61], index, [50, 60]) 1538 1539 # The first pass does not expand 1540 index = self.make_1000_node_index() 1541 self.set_cached_offsets(index, [0]) 1542 self.assertExpandOffsets([1], index, [1]) 1543 self.set_cached_offsets(index, [0, 1]) 1544 self.assertExpandOffsets([100], index, [100]) 1545 self.set_cached_offsets(index, [0, 1, 100]) 1546 # But after the first depth, we will expand 1547 self.assertExpandOffsets([2, 3, 4, 5, 6, 7], index, [2]) 1548 self.assertExpandOffsets([2, 3, 4, 5, 6, 7], index, [4]) 1549 self.set_cached_offsets(index, [0, 1, 2, 3, 4, 5, 6, 7, 100]) 1550 self.assertExpandOffsets([102, 103, 104, 105, 106, 107, 108], index, 1551 [105]) 1552